summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomáš Mózes <tomas.mozes@gmail.com>2024-08-01 15:02:58 +0200
committerTomáš Mózes <tomas.mozes@gmail.com>2024-08-01 15:02:58 +0200
commit212febf72900c12405591dcc5902d4cfa11173bf (patch)
tree7a093fae6f723d02b6c4a573669615024fe65e4d
parentXen 4.17.4-pre-patchset-1 (diff)
downloadxen-upstream-patches-212febf72900c12405591dcc5902d4cfa11173bf.tar.gz
xen-upstream-patches-212febf72900c12405591dcc5902d4cfa11173bf.tar.bz2
xen-upstream-patches-212febf72900c12405591dcc5902d4cfa11173bf.zip
Xen 4.18.3-pre-patchset-04.18.3-pre-patchset-0
Signed-off-by: Tomáš Mózes <tomas.mozes@gmail.com>
-rw-r--r--0001-x86-entry-Fix-build-with-older-toolchains.patch32
-rw-r--r--0002-altcall-fix-__alt_call_maybe_initdata-so-it-s-safe-f.patch49
-rw-r--r--0002-pci-fail-device-assignment-if-phantom-functions-cann.patch91
-rw-r--r--0003-VT-d-Fix-else-vs-endif-misplacement.patch70
-rw-r--r--0003-x86-rtc-Avoid-UIP-flag-being-set-for-longer-than-exp.patch57
-rw-r--r--0004-x86-MTRR-correct-inadvertently-inverted-WC-check.patch36
-rw-r--r--0004-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch123
-rw-r--r--0005-CirrusCI-drop-FreeBSD-12.patch39
-rw-r--r--0005-x86-spec-fix-reporting-of-BHB-clearing-usage-from-gu.patch69
-rw-r--r--0006-x86-intel-ensure-Global-Performance-Counter-Control-.patch74
-rw-r--r--0006-x86-spec-adjust-logic-that-elides-lfence.patch75
-rw-r--r--0007-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch65
-rw-r--r--0007-xen-xsm-Wire-up-get_dom0_console.patch66
-rw-r--r--0008-x86-vmx-Disallow-the-use-of-inactivity-states.patch126
-rw-r--r--0008-xen-x86-Fix-Syntax-warning-in-gen-cpuid.py.patch41
-rw-r--r--0009-VT-d-correct-ATS-checking-for-root-complex-integrate.patch63
-rw-r--r--0009-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch70
-rw-r--r--0010-tools-libxs-Open-dev-xen-xenbus-fds-as-O_CLOEXEC.patch47
-rw-r--r--0010-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch36
-rw-r--r--0011-tools-xentop-fix-sorting-bug-for-some-columns.patch67
-rw-r--r--0011-x86-cpu-policy-Fix-migration-from-Ice-Lake-to-Cascad.patch92
-rw-r--r--0012-amd-vi-fix-IVMD-memory-type-checks.patch53
-rw-r--r--0012-x86-ucode-Distinguish-ucode-already-up-to-date.patch58
-rw-r--r--0013-libxl-fix-population-of-the-online-vCPU-bitmap-for-P.patch61
-rw-r--r--0013-x86-hvm-Fix-fast-singlestep-state-persistence.patch86
-rw-r--r--0014-libxl-Fix-handling-XenStore-errors-in-device-creatio.patch191
-rw-r--r--0014-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch63
-rw-r--r--0015-build-Replace-which-with-command-v.patch57
-rw-r--r--0015-xen-sched-set-all-sched_resource-data-inside-locked-.patch84
-rw-r--r--0016-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch50
-rw-r--r--0016-x86-respect-mapcache_domain_init-failing.patch38
-rw-r--r--0017-build-make-sure-build-fails-when-running-kconfig-fai.patch58
-rw-r--r--0017-tools-xentop-Fix-cpu-sort-order.patch76
-rw-r--r--0018-x86-mtrr-avoid-system-wide-rendezvous-when-setting-A.patch60
-rw-r--r--0018-x86emul-add-missing-EVEX.R-checks.patch50
-rw-r--r--0019-update-Xen-version-to-4.18.3-pre.patch (renamed from 0001-update-Xen-version-to-4.17.4-pre.patch)16
-rw-r--r--0019-xen-livepatch-fix-norevert-test-hook-setup-typo.patch36
-rw-r--r--0020-x86-ucode-Further-fixes-to-identify-ucode-already-up.patch92
-rw-r--r--0020-xen-cmdline-fix-printf-format-specifier-in-no_config.patch38
-rw-r--r--0021-x86-altcall-use-a-union-as-register-type-for-functio.patch141
-rw-r--r--0021-x86-msi-prevent-watchdog-triggering-when-dumping-MSI.patch44
-rw-r--r--0022-x86-irq-remove-offline-CPUs-from-old-CPU-mask-when-a.patch44
-rw-r--r--0022-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch57
-rw-r--r--0023-CI-Update-FreeBSD-to-13.3.patch33
-rw-r--r--0023-x86-account-for-shadow-stack-in-exception-from-stub-.patch212
-rw-r--r--0024-x86-smp-do-not-use-shorthand-IPI-destinations-in-CPU.patch98
-rw-r--r--0024-xen-arm-Fix-UBSAN-failure-in-start_xen.patch52
-rw-r--r--0025-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch67
-rw-r--r--0025-x86-irq-limit-interrupt-movement-done-by-fixup_irqs.patch104
-rw-r--r--0026-x86-EPT-correct-special-page-checking-in-epte_get_en.patch46
-rw-r--r--0026-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch86
-rw-r--r--0027-x86-EPT-avoid-marking-non-present-entries-for-re-con.patch85
-rw-r--r--0027-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch54
-rw-r--r--0028-x86-EPT-drop-questionable-mfn_valid-from-epte_get_en.patch47
-rw-r--r--0028-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch67
-rw-r--r--0029-x86-Intel-unlock-CPUID-earlier-for-the-BSP.patch105
-rw-r--r--0029-x86-spec-do-not-print-thunk-option-selection-if-not-.patch50
-rw-r--r--0030-x86-irq-deal-with-old_cpu_mask-for-interrupts-in-mov.patch84
-rw-r--r--0030-xen-livepatch-register-livepatch-regions-when-loaded.patch159
-rw-r--r--0031-x86-irq-handle-moving-interrupts-in-_assign_irq_vect.patch172
-rw-r--r--0031-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch149
-rw-r--r--0032-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch186
-rw-r--r--0032-xen-ubsan-Fix-UB-in-type_descriptor-declaration.patch39
-rw-r--r--0033-x86-xstate-Fix-initialisation-of-XSS-cache.patch74
-rw-r--r--0033-xen-livepatch-properly-build-the-noapply-and-norever.patch43
-rw-r--r--0034-libxl-Fix-segfault-in-device_model_spawn_outcome.patch39
-rw-r--r--0034-x86-cpuid-Fix-handling-of-XSAVE-dynamic-leaves.patch72
-rw-r--r--0035-x86-altcall-always-use-a-temporary-parameter-stashin.patch197
-rw-r--r--0035-x86-irq-forward-pending-interrupts-to-new-destinatio.patch143
-rw-r--r--0036-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch102
-rw-r--r--0036-x86-re-run-exception-from-stub-recovery-selftests-wi.patch84
-rw-r--r--0037-hvmloader-PCI-skip-huge-BARs-in-certain-calculations.patch99
-rw-r--r--0037-tools-tests-don-t-let-test-xenstore-write-nodes-exce.patch41
-rw-r--r--0038-tools-tests-let-test-xenstore-exit-with-non-0-status.patch57
-rw-r--r--0038-x86-mm-fix-detection-of-last-L1-entry-in-modify_xen_.patch41
-rw-r--r--0039-LICENSES-Add-MIT-0-MIT-No-Attribution.patch58
-rw-r--r--0039-x86-entry-Introduce-EFRAME_-constants.patch314
-rw-r--r--0040-tools-Import-stand-alone-sd_notify-implementation-fr.patch130
-rw-r--r--0040-x86-Resync-intel-family.h-from-Linux.patch98
-rw-r--r--0041-tools-c-o-xenstored-Don-t-link-against-libsystemd.patch87
-rw-r--r--0041-x86-vmx-Perform-VERW-flushing-later-in-the-VMExit-pa.patch146
-rw-r--r--0042-tools-Drop-libsystemd-as-a-dependency.patch648
-rw-r--r--0042-x86-spec-ctrl-Perform-VERW-flushing-later-in-exit-pa.patch209
-rw-r--r--0043-x86-ioapic-Fix-signed-shifts-in-io_apic.c.patch46
-rw-r--r--0043-x86-spec-ctrl-Rename-VERW-related-options.patch248
-rw-r--r--0044-tools-xl-Open-xldevd.log-with-O_CLOEXEC.patch53
-rw-r--r--0044-x86-spec-ctrl-VERW-handling-adjustments.patch171
-rw-r--r--0045-pirq_cleanup_check-leaks.patch84
-rw-r--r--0045-x86-spec-ctrl-Mitigation-Register-File-Data-Sampling.patch320
-rw-r--r--0046-tools-dombuilder-Correct-the-length-calculation-in-x.patch44
-rw-r--r--0046-x86-paging-Delete-update_cr3-s-do_locking-parameter.patch161
-rw-r--r--0047-tools-libxs-Fix-CLOEXEC-handling-in-get_dev.patch95
-rw-r--r--0047-xen-Swap-order-of-actions-in-the-FREE-macros.patch58
-rw-r--r--0048-tools-libxs-Fix-CLOEXEC-handling-in-get_socket.patch60
-rw-r--r--0048-x86-spinlock-introduce-support-for-blocking-speculat.patch331
-rw-r--r--0049-rwlock-introduce-support-for-blocking-speculation-in.patch125
-rw-r--r--0049-tools-libxs-Fix-CLOEXEC-handling-in-xs_fileno.patch109
-rw-r--r--0050-cmdline-document-and-enforce-extra_guest_irqs-upper-.patch156
-rw-r--r--0050-percpu-rwlock-introduce-support-for-blocking-specula.patch87
-rw-r--r--0051-locking-attempt-to-ensure-lock-wrappers-are-always-i.patch405
-rw-r--r--0051-x86-entry-don-t-clear-DF-when-raising-UD-for-lack-of.patch58
-rw-r--r--0052-evtchn-build-fix-for-Arm.patch43
-rw-r--r--0052-x86-mm-add-speculation-barriers-to-open-coded-locks.patch73
-rw-r--r--0053-x86-IRQ-avoid-double-unlock-in-map_domain_pirq.patch53
-rw-r--r--0053-x86-protect-conditional-lock-taking-from-speculative.patch216
-rw-r--r--0054-tools-ipxe-update-for-fixing-build-with-GCC12.patch33
-rw-r--r--0054-x86-physdev-Return-pirq-that-irq-was-already-mapped-.patch38
-rw-r--r--0055-tools-libxs-Fix-fcntl-invocation-in-set_cloexec.patch57
-rw-r--r--0055-x86-mm-use-block_lock_speculation-in-_mm_write_lock.patch35
-rw-r--r--0056-x86-altcall-fix-clang-code-gen-when-using-altcall-in.patch85
-rw-r--r--0056-x86-boot-Fix-setup_apic_nmi_watchdog-to-fail-more-cl.patch120
-rw-r--r--0057-x86-PoD-tie-together-P2M-update-and-increment-of-ent.patch61
-rw-r--r--0058-tools-oxenstored-Use-Map-instead-of-Hashtbl-for-quot.patch143
-rw-r--r--0059-tools-oxenstored-Make-Quota.t-pure.patch121
-rw-r--r--0060-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch90
-rw-r--r--0061-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch85
-rw-r--r--0062-xen-virtual-region-Rename-the-start-end-fields.patch140
-rw-r--r--0063-xen-virtual-region-Include-rodata-pointers.patch71
-rw-r--r--0064-x86-livepatch-Relax-permissions-on-rodata-too.patch85
-rw-r--r--0065-x86-boot-Improve-the-boot-watchdog-determination-of-.patch106
-rw-r--r--0066-x86-boot-Support-the-watchdog-on-newer-AMD-systems.patch48
-rw-r--r--0067-tests-resource-Fix-HVM-guest-in-SHADOW-builds.patch110
-rw-r--r--info.txt6
123 files changed, 4574 insertions, 7274 deletions
diff --git a/0001-x86-entry-Fix-build-with-older-toolchains.patch b/0001-x86-entry-Fix-build-with-older-toolchains.patch
new file mode 100644
index 0000000..ad6e76a
--- /dev/null
+++ b/0001-x86-entry-Fix-build-with-older-toolchains.patch
@@ -0,0 +1,32 @@
+From 2d38302c33b117aa9a417056db241aefc840c2f0 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 9 Apr 2024 21:39:51 +0100
+Subject: [PATCH 01/56] x86/entry: Fix build with older toolchains
+
+Binutils older than 2.29 doesn't know INCSSPD.
+
+Fixes: 8e186f98ce0e ("x86: Use indirect calls in reset-stack infrastructure")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+(cherry picked from commit a9fa82500818a8d8ce5f2843f1577bd2c29d088e)
+---
+ xen/arch/x86/x86_64/entry.S | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index ad7dd3b23b..054fcb225f 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -643,7 +643,9 @@ ENTRY(continue_pv_domain)
+ * JMPed to. Drop the return address.
+ */
+ add $8, %rsp
++#ifdef CONFIG_XEN_SHSTK
+ ALTERNATIVE "", "mov $2, %eax; incsspd %eax", X86_FEATURE_XEN_SHSTK
++#endif
+
+ call check_wakeup_from_wait
+ ret_from_intr:
+--
+2.45.2
+
diff --git a/0002-altcall-fix-__alt_call_maybe_initdata-so-it-s-safe-f.patch b/0002-altcall-fix-__alt_call_maybe_initdata-so-it-s-safe-f.patch
new file mode 100644
index 0000000..05ecd83
--- /dev/null
+++ b/0002-altcall-fix-__alt_call_maybe_initdata-so-it-s-safe-f.patch
@@ -0,0 +1,49 @@
+From 8bdcb0b98b53140102031ceca0611f22190227fd Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Mon, 29 Apr 2024 09:35:21 +0200
+Subject: [PATCH 02/56] altcall: fix __alt_call_maybe_initdata so it's safe for
+ livepatch
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Setting alternative call variables as __init is not safe for use with
+livepatch, as livepatches can rightfully introduce new alternative calls to
+structures marked as __alt_call_maybe_initdata (possibly just indirectly due to
+replacing existing functions that use those). Attempting to resolve those
+alternative calls then results in page faults as the variable that holds the
+function pointer address has been freed.
+
+When livepatch is supported use the __ro_after_init attribute instead of
+__initdata for __alt_call_maybe_initdata.
+
+Fixes: f26bb285949b ('xen: Implement xen/alternative-call.h for use in common code')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: af4cd0a6a61cdb03bc1afca9478b05b0c9703599
+master date: 2024-04-11 18:51:36 +0100
+---
+ xen/include/xen/alternative-call.h | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/xen/include/xen/alternative-call.h b/xen/include/xen/alternative-call.h
+index 5c6b9a562b..10f7d7637e 100644
+--- a/xen/include/xen/alternative-call.h
++++ b/xen/include/xen/alternative-call.h
+@@ -50,7 +50,12 @@
+
+ #include <asm/alternative.h>
+
+-#define __alt_call_maybe_initdata __initdata
++#ifdef CONFIG_LIVEPATCH
++/* Must keep for livepatches to resolve alternative calls. */
++# define __alt_call_maybe_initdata __ro_after_init
++#else
++# define __alt_call_maybe_initdata __initdata
++#endif
+
+ #else
+
+--
+2.45.2
+
diff --git a/0002-pci-fail-device-assignment-if-phantom-functions-cann.patch b/0002-pci-fail-device-assignment-if-phantom-functions-cann.patch
deleted file mode 100644
index bafad55..0000000
--- a/0002-pci-fail-device-assignment-if-phantom-functions-cann.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From f9e1ed51bdba31017ea17e1819eb2ade6b5c8615 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 30 Jan 2024 14:37:39 +0100
-Subject: [PATCH 02/67] pci: fail device assignment if phantom functions cannot
- be assigned
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The current behavior is that no error is reported if (some) phantom functions
-fail to be assigned during device add or assignment, so the operation succeeds
-even if some phantom functions are not correctly setup.
-
-This can lead to devices possibly being successfully assigned to a domU while
-some of the device phantom functions are still assigned to dom0. Even when the
-device is assigned domIO before being assigned to a domU phantom functions
-might fail to be assigned to domIO, and also fail to be assigned to the domU,
-leaving them assigned to dom0.
-
-Since the device can generate requests using the IDs of those phantom
-functions, given the scenario above a device in such state would be in control
-of a domU, but still capable of generating transactions that use a context ID
-targeting dom0 owned memory.
-
-Modify device assign in order to attempt to deassign the device if phantom
-functions failed to be assigned.
-
-Note that device addition is not modified in the same way, as in that case the
-device is assigned to a trusted domain, and hence partial assign can lead to
-device malfunction but not a security issue.
-
-This is XSA-449 / CVE-2023-46839
-
-Fixes: 4e9950dc1bd2 ('IOMMU: add phantom function support')
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: cb4ecb3cc17b02c2814bc817efd05f3f3ba33d1e
-master date: 2024-01-30 14:28:01 +0100
----
- xen/drivers/passthrough/pci.c | 27 +++++++++++++++++++++------
- 1 file changed, 21 insertions(+), 6 deletions(-)
-
-diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
-index 07d1986d33..8c62b14d19 100644
---- a/xen/drivers/passthrough/pci.c
-+++ b/xen/drivers/passthrough/pci.c
-@@ -1444,11 +1444,10 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
-
- pdev->fault.count = 0;
-
-- if ( (rc = iommu_call(hd->platform_ops, assign_device, d, devfn,
-- pci_to_dev(pdev), flag)) )
-- goto done;
-+ rc = iommu_call(hd->platform_ops, assign_device, d, devfn, pci_to_dev(pdev),
-+ flag);
-
-- for ( ; pdev->phantom_stride; rc = 0 )
-+ while ( pdev->phantom_stride && !rc )
- {
- devfn += pdev->phantom_stride;
- if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
-@@ -1459,8 +1458,24 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
-
- done:
- if ( rc )
-- printk(XENLOG_G_WARNING "%pd: assign (%pp) failed (%d)\n",
-- d, &PCI_SBDF(seg, bus, devfn), rc);
-+ {
-+ printk(XENLOG_G_WARNING "%pd: assign %s(%pp) failed (%d)\n",
-+ d, devfn != pdev->devfn ? "phantom function " : "",
-+ &PCI_SBDF(seg, bus, devfn), rc);
-+
-+ if ( devfn != pdev->devfn && deassign_device(d, seg, bus, pdev->devfn) )
-+ {
-+ /*
-+ * Device with phantom functions that failed to both assign and
-+ * rollback. Mark the device as broken and crash the target domain,
-+ * as the state of the functions at this point is unknown and Xen
-+ * has no way to assert consistent context assignment among them.
-+ */
-+ pdev->broken = true;
-+ if ( !is_hardware_domain(d) && d != dom_io )
-+ domain_crash(d);
-+ }
-+ }
- /* The device is assigned to dom_io so mark it as quarantined */
- else if ( d == dom_io )
- pdev->quarantine = true;
---
-2.44.0
-
diff --git a/0003-VT-d-Fix-else-vs-endif-misplacement.patch b/0003-VT-d-Fix-else-vs-endif-misplacement.patch
deleted file mode 100644
index 622fa18..0000000
--- a/0003-VT-d-Fix-else-vs-endif-misplacement.patch
+++ /dev/null
@@ -1,70 +0,0 @@
-From 6b1864afc14d484cdbc9754ce3172ac3dc189846 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 30 Jan 2024 14:38:38 +0100
-Subject: [PATCH 03/67] VT-d: Fix "else" vs "#endif" misplacement
-
-In domain_pgd_maddr() the "#endif" is misplaced with respect to "else". This
-generates incorrect logic when CONFIG_HVM is compiled out, as the "else" body
-is executed unconditionally.
-
-Rework the logic to use IS_ENABLED() instead of explicit #ifdef-ary, as it's
-clearer to follow. This in turn involves adjusting p2m_get_pagetable() to
-compile when CONFIG_HVM is disabled.
-
-This is XSA-450 / CVE-2023-46840.
-
-Fixes: 033ff90aa9c1 ("x86/P2M: p2m_{alloc,free}_ptp() and p2m_alloc_table() are HVM-only")
-Reported-by: Teddy Astie <teddy.astie@vates.tech>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: cc6ba68edf6dcd18c3865e7d7c0f1ed822796426
-master date: 2024-01-30 14:29:15 +0100
----
- xen/arch/x86/include/asm/p2m.h | 9 ++++++++-
- xen/drivers/passthrough/vtd/iommu.c | 4 +---
- 2 files changed, 9 insertions(+), 4 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/p2m.h b/xen/arch/x86/include/asm/p2m.h
-index cd43d8621a..4f691533d5 100644
---- a/xen/arch/x86/include/asm/p2m.h
-+++ b/xen/arch/x86/include/asm/p2m.h
-@@ -447,7 +447,14 @@ static inline bool_t p2m_is_altp2m(const struct p2m_domain *p2m)
- return p2m->p2m_class == p2m_alternate;
- }
-
--#define p2m_get_pagetable(p2m) ((p2m)->phys_table)
-+#ifdef CONFIG_HVM
-+static inline pagetable_t p2m_get_pagetable(const struct p2m_domain *p2m)
-+{
-+ return p2m->phys_table;
-+}
-+#else
-+pagetable_t p2m_get_pagetable(const struct p2m_domain *p2m);
-+#endif
-
- /*
- * Ensure any deferred p2m TLB flush has been completed on all VCPUs.
-diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
-index b4c11a6b48..908b3ba6ee 100644
---- a/xen/drivers/passthrough/vtd/iommu.c
-+++ b/xen/drivers/passthrough/vtd/iommu.c
-@@ -441,15 +441,13 @@ static paddr_t domain_pgd_maddr(struct domain *d, paddr_t pgd_maddr,
-
- if ( pgd_maddr )
- /* nothing */;
--#ifdef CONFIG_HVM
-- else if ( iommu_use_hap_pt(d) )
-+ else if ( IS_ENABLED(CONFIG_HVM) && iommu_use_hap_pt(d) )
- {
- pagetable_t pgt = p2m_get_pagetable(p2m_get_hostp2m(d));
-
- pgd_maddr = pagetable_get_paddr(pgt);
- }
- else
--#endif
- {
- if ( !hd->arch.vtd.pgd_maddr )
- {
---
-2.44.0
-
diff --git a/0003-x86-rtc-Avoid-UIP-flag-being-set-for-longer-than-exp.patch b/0003-x86-rtc-Avoid-UIP-flag-being-set-for-longer-than-exp.patch
new file mode 100644
index 0000000..8307630
--- /dev/null
+++ b/0003-x86-rtc-Avoid-UIP-flag-being-set-for-longer-than-exp.patch
@@ -0,0 +1,57 @@
+From af0e9ba44a58c87d6d135d8ffbf468b4ceac0a41 Mon Sep 17 00:00:00 2001
+From: Ross Lagerwall <ross.lagerwall@citrix.com>
+Date: Mon, 29 Apr 2024 09:36:04 +0200
+Subject: [PATCH 03/56] x86/rtc: Avoid UIP flag being set for longer than
+ expected
+
+In a test, OVMF reported an error initializing the RTC without
+indicating the precise nature of the error. The only plausible
+explanation I can find is as follows:
+
+As part of the initialization, OVMF reads register C and then reads
+register A repatedly until the UIP flag is not set. If this takes longer
+than 100 ms, OVMF fails and reports an error. This may happen with the
+following sequence of events:
+
+At guest time=0s, rtc_init() calls check_update_timer() which schedules
+update_timer for t=(1 - 244us).
+
+At t=1s, the update_timer function happens to have been called >= 244us
+late. In the timer callback, it sets the UIP flag and schedules
+update_timer2 for t=1s.
+
+Before update_timer2 runs, the guest reads register C which calls
+check_update_timer(). check_update_timer() stops the scheduled
+update_timer2 and since the guest time is now outside of the update
+cycle, it schedules update_timer for t=(2 - 244us).
+
+The UIP flag will therefore be set for a whole second from t=1 to t=2
+while the guest repeatedly reads register A waiting for the UIP flag to
+clear. Fix it by clearing the UIP flag when scheduling update_timer.
+
+I was able to reproduce this issue with a synthetic test and this
+resolves the issue.
+
+Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 43a07069863b419433dee12c9b58c1f7ce70aa97
+master date: 2024-04-23 14:09:18 +0200
+---
+ xen/arch/x86/hvm/rtc.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/xen/arch/x86/hvm/rtc.c b/xen/arch/x86/hvm/rtc.c
+index 206b4296e9..4839374352 100644
+--- a/xen/arch/x86/hvm/rtc.c
++++ b/xen/arch/x86/hvm/rtc.c
+@@ -202,6 +202,7 @@ static void check_update_timer(RTCState *s)
+ }
+ else
+ {
++ s->hw.cmos_data[RTC_REG_A] &= ~RTC_UIP;
+ next_update_time = (USEC_PER_SEC - guest_usec - 244) * NS_PER_USEC;
+ expire_time = NOW() + next_update_time;
+ s->next_update_time = expire_time;
+--
+2.45.2
+
diff --git a/0004-x86-MTRR-correct-inadvertently-inverted-WC-check.patch b/0004-x86-MTRR-correct-inadvertently-inverted-WC-check.patch
new file mode 100644
index 0000000..ed7754d
--- /dev/null
+++ b/0004-x86-MTRR-correct-inadvertently-inverted-WC-check.patch
@@ -0,0 +1,36 @@
+From eb7059767c82d833ebecdf8106e96482b04f3c40 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 29 Apr 2024 09:36:37 +0200
+Subject: [PATCH 04/56] x86/MTRR: correct inadvertently inverted WC check
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The ! clearly got lost by mistake.
+
+Fixes: e9e0eb30d4d6 ("x86/MTRR: avoid several indirect calls")
+Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 77e25f0e30ddd11e043e6fce84bf108ce7de5b6f
+master date: 2024-04-23 14:13:48 +0200
+---
+ xen/arch/x86/cpu/mtrr/main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/cpu/mtrr/main.c b/xen/arch/x86/cpu/mtrr/main.c
+index 55a4da54a7..90b235f57e 100644
+--- a/xen/arch/x86/cpu/mtrr/main.c
++++ b/xen/arch/x86/cpu/mtrr/main.c
+@@ -316,7 +316,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
+ }
+
+ /* If the type is WC, check that this processor supports it */
+- if ((type == X86_MT_WC) && mtrr_have_wrcomb()) {
++ if ((type == X86_MT_WC) && !mtrr_have_wrcomb()) {
+ printk(KERN_WARNING
+ "mtrr: your processor doesn't support write-combining\n");
+ return -EOPNOTSUPP;
+--
+2.45.2
+
diff --git a/0004-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch b/0004-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch
deleted file mode 100644
index fa90a46..0000000
--- a/0004-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch
+++ /dev/null
@@ -1,123 +0,0 @@
-From abcc32f0634627fe21117a48bd10e792bfbdd6dc Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Fri, 2 Feb 2024 08:01:09 +0100
-Subject: [PATCH 04/67] x86/amd: Extend CPU erratum #1474 fix to more affected
- models
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Erratum #1474 has now been extended to cover models from family 17h ranges
-00-2Fh, so the errata now covers all the models released under Family
-17h (Zen, Zen+ and Zen2).
-
-Additionally extend the workaround to Family 18h (Hygon), since it's based on
-the Zen architecture and very likely affected.
-
-Rename all the zen2 related symbols to fam17, since the errata doesn't
-exclusively affect Zen2 anymore.
-
-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 23db507a01a4ec5259ec0ab43d296a41b1c326ba
-master date: 2023-12-21 12:19:40 +0000
----
- xen/arch/x86/cpu/amd.c | 27 ++++++++++++++-------------
- 1 file changed, 14 insertions(+), 13 deletions(-)
-
-diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
-index 29ae97e7c0..3d85e9797d 100644
---- a/xen/arch/x86/cpu/amd.c
-+++ b/xen/arch/x86/cpu/amd.c
-@@ -54,7 +54,7 @@ bool __read_mostly amd_acpi_c1e_quirk;
- bool __ro_after_init amd_legacy_ssbd;
- bool __initdata amd_virt_spec_ctrl;
-
--static bool __read_mostly zen2_c6_disabled;
-+static bool __read_mostly fam17_c6_disabled;
-
- static inline int rdmsr_amd_safe(unsigned int msr, unsigned int *lo,
- unsigned int *hi)
-@@ -951,24 +951,24 @@ void amd_check_zenbleed(void)
- val & chickenbit ? "chickenbit" : "microcode");
- }
-
--static void cf_check zen2_disable_c6(void *arg)
-+static void cf_check fam17_disable_c6(void *arg)
- {
- /* Disable C6 by clearing the CCR{0,1,2}_CC6EN bits. */
- const uint64_t mask = ~((1ul << 6) | (1ul << 14) | (1ul << 22));
- uint64_t val;
-
-- if (!zen2_c6_disabled) {
-+ if (!fam17_c6_disabled) {
- printk(XENLOG_WARNING
- "Disabling C6 after 1000 days apparent uptime due to AMD errata 1474\n");
-- zen2_c6_disabled = true;
-+ fam17_c6_disabled = true;
- /*
- * Prevent CPU hotplug so that started CPUs will either see
-- * zen2_c6_disabled set, or will be handled by
-+ * zen_c6_disabled set, or will be handled by
- * smp_call_function().
- */
- while (!get_cpu_maps())
- process_pending_softirqs();
-- smp_call_function(zen2_disable_c6, NULL, 0);
-+ smp_call_function(fam17_disable_c6, NULL, 0);
- put_cpu_maps();
- }
-
-@@ -1273,8 +1273,8 @@ static void cf_check init_amd(struct cpuinfo_x86 *c)
- amd_check_zenbleed();
- amd_check_erratum_1485();
-
-- if (zen2_c6_disabled)
-- zen2_disable_c6(NULL);
-+ if (fam17_c6_disabled)
-+ fam17_disable_c6(NULL);
-
- check_syscfg_dram_mod_en();
-
-@@ -1286,7 +1286,7 @@ const struct cpu_dev amd_cpu_dev = {
- .c_init = init_amd,
- };
-
--static int __init cf_check zen2_c6_errata_check(void)
-+static int __init cf_check amd_check_erratum_1474(void)
- {
- /*
- * Errata #1474: A Core May Hang After About 1044 Days
-@@ -1294,7 +1294,8 @@ static int __init cf_check zen2_c6_errata_check(void)
- */
- s_time_t delta;
-
-- if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || !is_zen2_uarch())
-+ if (cpu_has_hypervisor ||
-+ (boot_cpu_data.x86 != 0x17 && boot_cpu_data.x86 != 0x18))
- return 0;
-
- /*
-@@ -1309,10 +1310,10 @@ static int __init cf_check zen2_c6_errata_check(void)
- if (delta > 0) {
- static struct timer errata_c6;
-
-- init_timer(&errata_c6, zen2_disable_c6, NULL, 0);
-+ init_timer(&errata_c6, fam17_disable_c6, NULL, 0);
- set_timer(&errata_c6, NOW() + delta);
- } else
-- zen2_disable_c6(NULL);
-+ fam17_disable_c6(NULL);
-
- return 0;
- }
-@@ -1320,4 +1321,4 @@ static int __init cf_check zen2_c6_errata_check(void)
- * Must be executed after early_time_init() for tsc_ticks2ns() to have been
- * calibrated. That prevents us doing the check in init_amd().
- */
--presmp_initcall(zen2_c6_errata_check);
-+presmp_initcall(amd_check_erratum_1474);
---
-2.44.0
-
diff --git a/0005-CirrusCI-drop-FreeBSD-12.patch b/0005-CirrusCI-drop-FreeBSD-12.patch
deleted file mode 100644
index dac712b..0000000
--- a/0005-CirrusCI-drop-FreeBSD-12.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 0ef1fb43ddd61b3c4c953e833e012ac21ad5ca0f Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Fri, 2 Feb 2024 08:01:50 +0100
-Subject: [PATCH 05/67] CirrusCI: drop FreeBSD 12
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Went EOL by the end of December 2023, and the pkg repos have been shut down.
-
-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: c2ce3466472e9c9eda79f5dc98eb701bc6fdba20
-master date: 2024-01-15 12:20:11 +0100
----
- .cirrus.yml | 6 ------
- 1 file changed, 6 deletions(-)
-
-diff --git a/.cirrus.yml b/.cirrus.yml
-index 7e0beb200d..63f3afb104 100644
---- a/.cirrus.yml
-+++ b/.cirrus.yml
-@@ -14,12 +14,6 @@ freebsd_template: &FREEBSD_TEMPLATE
- - ./configure --with-system-seabios=/usr/local/share/seabios/bios.bin
- - gmake -j`sysctl -n hw.ncpu` clang=y
-
--task:
-- name: 'FreeBSD 12'
-- freebsd_instance:
-- image_family: freebsd-12-4
-- << : *FREEBSD_TEMPLATE
--
- task:
- name: 'FreeBSD 13'
- freebsd_instance:
---
-2.44.0
-
diff --git a/0005-x86-spec-fix-reporting-of-BHB-clearing-usage-from-gu.patch b/0005-x86-spec-fix-reporting-of-BHB-clearing-usage-from-gu.patch
new file mode 100644
index 0000000..bad0428
--- /dev/null
+++ b/0005-x86-spec-fix-reporting-of-BHB-clearing-usage-from-gu.patch
@@ -0,0 +1,69 @@
+From 0b0c7dca70d64c35c86e5d503f67366ebe2b9138 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Mon, 29 Apr 2024 09:37:04 +0200
+Subject: [PATCH 05/56] x86/spec: fix reporting of BHB clearing usage from
+ guest entry points
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Reporting whether the BHB clearing on entry is done for the different domains
+types based on cpu_has_bhb_seq is unhelpful, as that variable signals whether
+there's a BHB clearing sequence selected, but that alone doesn't imply that
+such sequence is used from the PV and/or HVM entry points.
+
+Instead use opt_bhb_entry_{pv,hvm} which do signal whether BHB clearing is
+performed on entry from PV/HVM.
+
+Fixes: 689ad48ce9cf ('x86/spec-ctrl: Wire up the Native-BHI software sequences')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 049ab0b2c9f1f5edb54b505fef0bc575787dafe9
+master date: 2024-04-25 16:35:56 +0200
+---
+ xen/arch/x86/spec_ctrl.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index ba4349a024..8c67d6256a 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -634,7 +634,7 @@ static void __init print_details(enum ind_thunk thunk)
+ (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ||
+ boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ||
+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ||
+- cpu_has_bhb_seq || amd_virt_spec_ctrl ||
++ opt_bhb_entry_hvm || amd_virt_spec_ctrl ||
+ opt_eager_fpu || opt_verw_hvm) ? "" : " None",
+ boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "",
+ (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ||
+@@ -643,7 +643,7 @@ static void __init print_details(enum ind_thunk thunk)
+ opt_eager_fpu ? " EAGER_FPU" : "",
+ opt_verw_hvm ? " VERW" : "",
+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : "",
+- cpu_has_bhb_seq ? " BHB-entry" : "");
++ opt_bhb_entry_hvm ? " BHB-entry" : "");
+
+ #endif
+ #ifdef CONFIG_PV
+@@ -651,14 +651,14 @@ static void __init print_details(enum ind_thunk thunk)
+ (boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
+ boot_cpu_has(X86_FEATURE_SC_RSB_PV) ||
+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ||
+- cpu_has_bhb_seq ||
++ opt_bhb_entry_pv ||
+ opt_eager_fpu || opt_verw_pv) ? "" : " None",
+ boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "",
+ boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "",
+ opt_eager_fpu ? " EAGER_FPU" : "",
+ opt_verw_pv ? " VERW" : "",
+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : "",
+- cpu_has_bhb_seq ? " BHB-entry" : "");
++ opt_bhb_entry_pv ? " BHB-entry" : "");
+
+ printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n",
+ opt_xpti_hwdom ? "enabled" : "disabled",
+--
+2.45.2
+
diff --git a/0006-x86-intel-ensure-Global-Performance-Counter-Control-.patch b/0006-x86-intel-ensure-Global-Performance-Counter-Control-.patch
deleted file mode 100644
index ce07803..0000000
--- a/0006-x86-intel-ensure-Global-Performance-Counter-Control-.patch
+++ /dev/null
@@ -1,74 +0,0 @@
-From d0ad2cc5eac1b5d3cfd14204d377ce2384f52607 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Fri, 2 Feb 2024 08:02:20 +0100
-Subject: [PATCH 06/67] x86/intel: ensure Global Performance Counter Control is
- setup correctly
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-When Architectural Performance Monitoring is available, the PERF_GLOBAL_CTRL
-MSR contains per-counter enable bits that is ANDed with the enable bit in the
-counter EVNTSEL MSR in order for a PMC counter to be enabled.
-
-So far the watchdog code seems to have relied on the PERF_GLOBAL_CTRL enable
-bits being set by default, but at least on some Intel Sapphire and Emerald
-Rapids this is no longer the case, and Xen reports:
-
-Testing NMI watchdog on all CPUs: 0 40 stuck
-
-The first CPU on each package is started with PERF_GLOBAL_CTRL zeroed, so PMC0
-doesn't start counting when the enable bit in EVNTSEL0 is set, due to the
-relevant enable bit in PERF_GLOBAL_CTRL not being set.
-
-Check and adjust PERF_GLOBAL_CTRL during CPU initialization so that all the
-general-purpose PMCs are enabled. Doing so brings the state of the package-BSP
-PERF_GLOBAL_CTRL in line with the rest of the CPUs on the system.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-master commit: 6bdb965178bbb3fc50cd4418d4770a7789956e2c
-master date: 2024-01-17 10:40:52 +0100
----
- xen/arch/x86/cpu/intel.c | 23 ++++++++++++++++++++++-
- 1 file changed, 22 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c
-index b40ac696e6..96723b5d44 100644
---- a/xen/arch/x86/cpu/intel.c
-+++ b/xen/arch/x86/cpu/intel.c
-@@ -528,9 +528,30 @@ static void cf_check init_intel(struct cpuinfo_x86 *c)
- init_intel_cacheinfo(c);
- if (c->cpuid_level > 9) {
- unsigned eax = cpuid_eax(10);
-+ unsigned int cnt = (eax >> 8) & 0xff;
-+
- /* Check for version and the number of counters */
-- if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
-+ if ((eax & 0xff) && (cnt > 1) && (cnt <= 32)) {
-+ uint64_t global_ctrl;
-+ unsigned int cnt_mask = (1UL << cnt) - 1;
-+
-+ /*
-+ * On (some?) Sapphire/Emerald Rapids platforms each
-+ * package-BSP starts with all the enable bits for the
-+ * general-purpose PMCs cleared. Adjust so counters
-+ * can be enabled from EVNTSEL.
-+ */
-+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl);
-+ if ((global_ctrl & cnt_mask) != cnt_mask) {
-+ printk("CPU%u: invalid PERF_GLOBAL_CTRL: %#"
-+ PRIx64 " adjusting to %#" PRIx64 "\n",
-+ smp_processor_id(), global_ctrl,
-+ global_ctrl | cnt_mask);
-+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
-+ global_ctrl | cnt_mask);
-+ }
- __set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
-+ }
- }
-
- if ( !cpu_has(c, X86_FEATURE_XTOPOLOGY) )
---
-2.44.0
-
diff --git a/0006-x86-spec-adjust-logic-that-elides-lfence.patch b/0006-x86-spec-adjust-logic-that-elides-lfence.patch
new file mode 100644
index 0000000..6da96c4
--- /dev/null
+++ b/0006-x86-spec-adjust-logic-that-elides-lfence.patch
@@ -0,0 +1,75 @@
+From f0ff1d9cb96041a84a24857a6464628240deed4f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Mon, 29 Apr 2024 09:37:29 +0200
+Subject: [PATCH 06/56] x86/spec: adjust logic that elides lfence
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+It's currently too restrictive by just checking whether there's a BHB clearing
+sequence selected. It should instead check whether BHB clearing is used on
+entry from PV or HVM specifically.
+
+Switch to use opt_bhb_entry_{pv,hvm} instead, and then remove cpu_has_bhb_seq
+since it no longer has any users.
+
+Reported-by: Jan Beulich <jbeulich@suse.com>
+Fixes: 954c983abcee ('x86/spec-ctrl: Software BHB-clearing sequences')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 656ae8f1091bcefec9c46ec3ea3ac2118742d4f6
+master date: 2024-04-25 16:37:01 +0200
+---
+ xen/arch/x86/include/asm/cpufeature.h | 3 ---
+ xen/arch/x86/spec_ctrl.c | 6 +++---
+ 2 files changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h
+index 7a312c485e..3c57f55de0 100644
+--- a/xen/arch/x86/include/asm/cpufeature.h
++++ b/xen/arch/x86/include/asm/cpufeature.h
+@@ -228,9 +228,6 @@ static inline bool boot_cpu_has(unsigned int feat)
+ #define cpu_bug_fpu_ptrs boot_cpu_has(X86_BUG_FPU_PTRS)
+ #define cpu_bug_null_seg boot_cpu_has(X86_BUG_NULL_SEG)
+
+-#define cpu_has_bhb_seq (boot_cpu_has(X86_SPEC_BHB_TSX) || \
+- boot_cpu_has(X86_SPEC_BHB_LOOPS))
+-
+ enum _cache_type {
+ CACHE_TYPE_NULL = 0,
+ CACHE_TYPE_DATA = 1,
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 8c67d6256a..12c19b7eca 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -2328,7 +2328,7 @@ void __init init_speculation_mitigations(void)
+ * unconditional WRMSR. If we do have it, or we're not using any
+ * prior conditional block, then it's safe to drop the LFENCE.
+ */
+- if ( !cpu_has_bhb_seq &&
++ if ( !opt_bhb_entry_pv &&
+ (boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
+ !boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV)) )
+ setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_PV);
+@@ -2344,7 +2344,7 @@ void __init init_speculation_mitigations(void)
+ * active in the block that is skipped when interrupting guest
+ * context, then it's safe to drop the LFENCE.
+ */
+- if ( !cpu_has_bhb_seq &&
++ if ( !opt_bhb_entry_pv &&
+ (boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
+ (!boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) &&
+ !boot_cpu_has(X86_FEATURE_SC_RSB_PV))) )
+@@ -2356,7 +2356,7 @@ void __init init_speculation_mitigations(void)
+ * A BHB sequence, if used, is the only conditional action, so if we
+ * don't have it, we don't need the safety LFENCE.
+ */
+- if ( !cpu_has_bhb_seq )
++ if ( !opt_bhb_entry_hvm )
+ setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_VMX);
+ }
+
+--
+2.45.2
+
diff --git a/0007-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch b/0007-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch
deleted file mode 100644
index 2100acc..0000000
--- a/0007-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch
+++ /dev/null
@@ -1,65 +0,0 @@
-From eca5416f9b0e179de9553900de8de660ab09199d Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 2 Feb 2024 08:02:51 +0100
-Subject: [PATCH 07/67] x86/vmx: Fix IRQ handling for EXIT_REASON_INIT
-
-When receiving an INIT, a prior bugfix tried to ignore the INIT and continue
-onwards.
-
-Unfortunately it's not safe to return at that point in vmx_vmexit_handler().
-Just out of context in the first hunk is a local_irqs_enabled() which is
-depended-upon by the return-to-guest path, causing the following checklock
-failure in debug builds:
-
- (XEN) Error: INIT received - ignoring
- (XEN) CHECKLOCK FAILURE: prev irqsafe: 0, curr irqsafe 1
- (XEN) Xen BUG at common/spinlock.c:132
- (XEN) ----[ Xen-4.19-unstable x86_64 debug=y Tainted: H ]----
- ...
- (XEN) Xen call trace:
- (XEN) [<ffff82d040238e10>] R check_lock+0xcd/0xe1
- (XEN) [<ffff82d040238fe3>] F _spin_lock+0x1b/0x60
- (XEN) [<ffff82d0402ed6a8>] F pt_update_irq+0x32/0x3bb
- (XEN) [<ffff82d0402b9632>] F vmx_intr_assist+0x3b/0x51d
- (XEN) [<ffff82d040206447>] F vmx_asm_vmexit_handler+0xf7/0x210
-
-Luckily, this is benign in release builds. Accidentally having IRQs disabled
-when trying to take an IRQs-on lock isn't a deadlock-vulnerable pattern.
-
-Drop the problematic early return. In hindsight, it's wrong to skip other
-normal VMExit steps.
-
-Fixes: b1f11273d5a7 ("x86/vmx: Don't spuriously crash the domain when INIT is received")
-Reported-by: Reima ISHII <ishiir@g.ecc.u-tokyo.ac.jp>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: d1f8883aebe00f6a9632d77ab0cd5c6d02c9cbe4
-master date: 2024-01-18 20:59:06 +0000
----
- xen/arch/x86/hvm/vmx/vmx.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
-index 072288a5ef..31f4a861c6 100644
---- a/xen/arch/x86/hvm/vmx/vmx.c
-+++ b/xen/arch/x86/hvm/vmx/vmx.c
-@@ -4037,7 +4037,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
-
- case EXIT_REASON_INIT:
- printk(XENLOG_ERR "Error: INIT received - ignoring\n");
-- return; /* Renter the guest without further processing */
-+ break;
- }
-
- /* Now enable interrupts so it's safe to take locks. */
-@@ -4323,6 +4323,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
- break;
- }
- case EXIT_REASON_EXTERNAL_INTERRUPT:
-+ case EXIT_REASON_INIT:
- /* Already handled above. */
- break;
- case EXIT_REASON_TRIPLE_FAULT:
---
-2.44.0
-
diff --git a/0007-xen-xsm-Wire-up-get_dom0_console.patch b/0007-xen-xsm-Wire-up-get_dom0_console.patch
new file mode 100644
index 0000000..540541c
--- /dev/null
+++ b/0007-xen-xsm-Wire-up-get_dom0_console.patch
@@ -0,0 +1,66 @@
+From 026542c8577ab6af7c1dbc7446547bdc2bc705fd Mon Sep 17 00:00:00 2001
+From: Jason Andryuk <jason.andryuk@amd.com>
+Date: Tue, 21 May 2024 10:19:43 +0200
+Subject: [PATCH 07/56] xen/xsm: Wire up get_dom0_console
+
+An XSM hook for get_dom0_console is currently missing. Using XSM with
+a PVH dom0 shows:
+(XEN) FLASK: Denying unknown platform_op: 64.
+
+Wire up the hook, and allow it for dom0.
+
+Fixes: 4dd160583c ("x86/platform: introduce hypercall to get initial video console settings")
+Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
+Acked-by: Daniel P. Smith <dpsmith@apertussolutions.com>
+master commit: 647f7e50ebeeb8152974cad6a12affe474c74513
+master date: 2024-04-30 08:33:41 +0200
+---
+ tools/flask/policy/modules/dom0.te | 2 +-
+ xen/xsm/flask/hooks.c | 4 ++++
+ xen/xsm/flask/policy/access_vectors | 2 ++
+ 3 files changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/tools/flask/policy/modules/dom0.te b/tools/flask/policy/modules/dom0.te
+index f1dcff48e2..16b8c9646d 100644
+--- a/tools/flask/policy/modules/dom0.te
++++ b/tools/flask/policy/modules/dom0.te
+@@ -16,7 +16,7 @@ allow dom0_t xen_t:xen {
+ allow dom0_t xen_t:xen2 {
+ resource_op psr_cmt_op psr_alloc pmu_ctrl get_symbol
+ get_cpu_levelling_caps get_cpu_featureset livepatch_op
+- coverage_op
++ coverage_op get_dom0_console
+ };
+
+ # Allow dom0 to use all XENVER_ subops that have checks.
+diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
+index 78225f68c1..5e88c71b8e 100644
+--- a/xen/xsm/flask/hooks.c
++++ b/xen/xsm/flask/hooks.c
+@@ -1558,6 +1558,10 @@ static int cf_check flask_platform_op(uint32_t op)
+ return avc_has_perm(domain_sid(current->domain), SECINITSID_XEN,
+ SECCLASS_XEN2, XEN2__GET_SYMBOL, NULL);
+
++ case XENPF_get_dom0_console:
++ return avc_has_perm(domain_sid(current->domain), SECINITSID_XEN,
++ SECCLASS_XEN2, XEN2__GET_DOM0_CONSOLE, NULL);
++
+ default:
+ return avc_unknown_permission("platform_op", op);
+ }
+diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
+index 4e6710a63e..a35e3d4c51 100644
+--- a/xen/xsm/flask/policy/access_vectors
++++ b/xen/xsm/flask/policy/access_vectors
+@@ -99,6 +99,8 @@ class xen2
+ livepatch_op
+ # XEN_SYSCTL_coverage_op
+ coverage_op
++# XENPF_get_dom0_console
++ get_dom0_console
+ }
+
+ # Classes domain and domain2 consist of operations that a domain performs on
+--
+2.45.2
+
diff --git a/0008-x86-vmx-Disallow-the-use-of-inactivity-states.patch b/0008-x86-vmx-Disallow-the-use-of-inactivity-states.patch
deleted file mode 100644
index 3af45e8..0000000
--- a/0008-x86-vmx-Disallow-the-use-of-inactivity-states.patch
+++ /dev/null
@@ -1,126 +0,0 @@
-From 7bd612727df792671e44152a8205f0cf821ad984 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 2 Feb 2024 08:03:26 +0100
-Subject: [PATCH 08/67] x86/vmx: Disallow the use of inactivity states
-
-Right now, vvmx will blindly copy L12's ACTIVITY_STATE into the L02 VMCS and
-enter the vCPU. Luckily for us, nested-virt is explicitly unsupported for
-security bugs.
-
-The inactivity states are HLT, SHUTDOWN and WAIT-FOR-SIPI, and as noted by the
-SDM in Vol3 27.7 "Special Features of VM Entry":
-
- If VM entry ends with the logical processor in an inactive activity state,
- the VM entry generates any special bus cycle that is normally generated when
- that activity state is entered from the active state.
-
-Also,
-
- Some activity states unconditionally block certain events.
-
-I.e. A VMEntry with ACTIVITY=SHUTDOWN will initiate a platform reset, while a
-VMEntry with ACTIVITY=WAIT-FOR-SIPI will really block everything other than
-SIPIs.
-
-Both of these activity states are for the TXT ACM to use, not for regular
-hypervisors, and Xen doesn't support dropping the HLT intercept either.
-
-There are two paths in Xen which operate on ACTIVITY_STATE.
-
-1) The vmx_{get,set}_nonreg_state() helpers for VM-Fork.
-
- As regular VMs can't use any inactivity states, this is just duplicating
- the 0 from construct_vmcs(). Retain the ability to query activity_state,
- but crash the domain on any attempt to set an inactivity state.
-
-2) Nested virt, because of ACTIVITY_STATE in vmcs_gstate_field[].
-
- Explicitly hide the inactivity states in the guest's view of MSR_VMX_MISC,
- and remove ACTIVITY_STATE from vmcs_gstate_field[].
-
- In virtual_vmentry(), we should trigger a VMEntry failure for the use of
- any inactivity states, but there's no support for that in the code at all
- so leave a TODO for when we finally start working on nested-virt in
- earnest.
-
-Reported-by: Reima Ishii <ishiir@g.ecc.u-tokyo.ac.jp>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Tamas K Lengyel <tamas@tklengyel.com>
-master commit: 3643bb53a05b7c8fbac072c63bef1538f2a6d0d2
-master date: 2024-01-18 20:59:06 +0000
----
- xen/arch/x86/hvm/vmx/vmx.c | 8 +++++++-
- xen/arch/x86/hvm/vmx/vvmx.c | 9 +++++++--
- xen/arch/x86/include/asm/hvm/vmx/vmcs.h | 1 +
- 3 files changed, 15 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
-index 31f4a861c6..35d391d8e5 100644
---- a/xen/arch/x86/hvm/vmx/vmx.c
-+++ b/xen/arch/x86/hvm/vmx/vmx.c
-@@ -1499,7 +1499,13 @@ static void cf_check vmx_set_nonreg_state(struct vcpu *v,
- {
- vmx_vmcs_enter(v);
-
-- __vmwrite(GUEST_ACTIVITY_STATE, nrs->vmx.activity_state);
-+ if ( nrs->vmx.activity_state )
-+ {
-+ printk("Attempt to set %pv activity_state %#lx\n",
-+ v, nrs->vmx.activity_state);
-+ domain_crash(v->domain);
-+ }
-+
- __vmwrite(GUEST_INTERRUPTIBILITY_INFO, nrs->vmx.interruptibility_info);
- __vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, nrs->vmx.pending_dbg);
-
-diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
-index f8fe8d0c14..515cb5ae77 100644
---- a/xen/arch/x86/hvm/vmx/vvmx.c
-+++ b/xen/arch/x86/hvm/vmx/vvmx.c
-@@ -910,7 +910,10 @@ static const u16 vmcs_gstate_field[] = {
- GUEST_LDTR_AR_BYTES,
- GUEST_TR_AR_BYTES,
- GUEST_INTERRUPTIBILITY_INFO,
-+ /*
-+ * ACTIVITY_STATE is handled specially.
- GUEST_ACTIVITY_STATE,
-+ */
- GUEST_SYSENTER_CS,
- GUEST_PREEMPTION_TIMER,
- /* natural */
-@@ -1211,6 +1214,8 @@ static void virtual_vmentry(struct cpu_user_regs *regs)
- nvcpu->nv_vmentry_pending = 0;
- nvcpu->nv_vmswitch_in_progress = 1;
-
-+ /* TODO: Fail VMentry for GUEST_ACTIVITY_STATE != 0 */
-+
- /*
- * EFER handling:
- * hvm_set_efer won't work if CR0.PG = 1, so we change the value
-@@ -2327,8 +2332,8 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
- data = hvm_cr4_guest_valid_bits(d);
- break;
- case MSR_IA32_VMX_MISC:
-- /* Do not support CR3-target feature now */
-- data = host_data & ~VMX_MISC_CR3_TARGET;
-+ /* Do not support CR3-targets or activity states. */
-+ data = host_data & ~(VMX_MISC_CR3_TARGET | VMX_MISC_ACTIVITY_MASK);
- break;
- case MSR_IA32_VMX_EPT_VPID_CAP:
- data = nept_get_ept_vpid_cap();
-diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
-index 78404e42b3..0af021d5f5 100644
---- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
-+++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
-@@ -288,6 +288,7 @@ extern u32 vmx_secondary_exec_control;
- #define VMX_VPID_INVVPID_SINGLE_CONTEXT_RETAINING_GLOBAL 0x80000000000ULL
- extern u64 vmx_ept_vpid_cap;
-
-+#define VMX_MISC_ACTIVITY_MASK 0x000001c0
- #define VMX_MISC_PROC_TRACE 0x00004000
- #define VMX_MISC_CR3_TARGET 0x01ff0000
- #define VMX_MISC_VMWRITE_ALL 0x20000000
---
-2.44.0
-
diff --git a/0008-xen-x86-Fix-Syntax-warning-in-gen-cpuid.py.patch b/0008-xen-x86-Fix-Syntax-warning-in-gen-cpuid.py.patch
new file mode 100644
index 0000000..7c04f23
--- /dev/null
+++ b/0008-xen-x86-Fix-Syntax-warning-in-gen-cpuid.py.patch
@@ -0,0 +1,41 @@
+From 47cf06c09a2fa1ee92ea3e7718c8f8e0f1450d88 Mon Sep 17 00:00:00 2001
+From: Jason Andryuk <jason.andryuk@amd.com>
+Date: Tue, 21 May 2024 10:20:06 +0200
+Subject: [PATCH 08/56] xen/x86: Fix Syntax warning in gen-cpuid.py
+
+Python 3.12.2 warns:
+
+xen/tools/gen-cpuid.py:50: SyntaxWarning: invalid escape sequence '\s'
+ "\s+([\s\d]+\*[\s\d]+\+[\s\d]+)\)"
+xen/tools/gen-cpuid.py:51: SyntaxWarning: invalid escape sequence '\s'
+ "\s+/\*([\w!]*) .*$")
+
+Specify the strings as raw strings so '\s' is read as literal '\' + 's'.
+This avoids escaping all the '\'s in the strings.
+
+Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 08e79bba73d74a85d3ce6ff0f91c5205f1e05eda
+master date: 2024-04-30 08:34:37 +0200
+---
+ xen/tools/gen-cpuid.py | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
+index 02dd45a5ed..415d644db5 100755
+--- a/xen/tools/gen-cpuid.py
++++ b/xen/tools/gen-cpuid.py
+@@ -47,8 +47,8 @@ def parse_definitions(state):
+ """
+ feat_regex = re.compile(
+ r"^XEN_CPUFEATURE\(([A-Z0-9_]+),"
+- "\s+([\s\d]+\*[\s\d]+\+[\s\d]+)\)"
+- "\s+/\*([\w!]*) .*$")
++ r"\s+([\s\d]+\*[\s\d]+\+[\s\d]+)\)"
++ r"\s+/\*([\w!]*) .*$")
+
+ word_regex = re.compile(
+ r"^/\* .* word (\d*) \*/$")
+--
+2.45.2
+
diff --git a/0009-VT-d-correct-ATS-checking-for-root-complex-integrate.patch b/0009-VT-d-correct-ATS-checking-for-root-complex-integrate.patch
new file mode 100644
index 0000000..2d2dc91
--- /dev/null
+++ b/0009-VT-d-correct-ATS-checking-for-root-complex-integrate.patch
@@ -0,0 +1,63 @@
+From a4c5bbb9db07b27e66f7c47676b1c888e1bece20 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 21 May 2024 10:20:58 +0200
+Subject: [PATCH 09/56] VT-d: correct ATS checking for root complex integrated
+ devices
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Spec version 4.1 says
+
+"The ATSR structures identifies PCI Express Root-Ports supporting
+ Address Translation Services (ATS) transactions. Software must enable
+ ATS on endpoint devices behind a Root Port only if the Root Port is
+ reported as supporting ATS transactions."
+
+Clearly root complex integrated devices aren't "behind root ports",
+matching my observation on a SapphireRapids system having an ATS-
+capable root complex integrated device. Hence for such devices we
+shouldn't try to locate a corresponding ATSR.
+
+Since both pci_find_ext_capability() and pci_find_cap_offset() return
+"unsigned int", change "pos" to that type at the same time.
+
+Fixes: 903b93211f56 ("[VTD] laying the ground work for ATS")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 04e31583bab97e5042a44a1d00fce2760272635f
+master date: 2024-05-06 09:22:45 +0200
+---
+ xen/drivers/passthrough/vtd/x86/ats.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/xen/drivers/passthrough/vtd/x86/ats.c b/xen/drivers/passthrough/vtd/x86/ats.c
+index 1f5913bed9..61052ef580 100644
+--- a/xen/drivers/passthrough/vtd/x86/ats.c
++++ b/xen/drivers/passthrough/vtd/x86/ats.c
+@@ -44,7 +44,7 @@ struct acpi_drhd_unit *find_ats_dev_drhd(struct vtd_iommu *iommu)
+ int ats_device(const struct pci_dev *pdev, const struct acpi_drhd_unit *drhd)
+ {
+ struct acpi_drhd_unit *ats_drhd;
+- int pos;
++ unsigned int pos, expfl = 0;
+
+ if ( !ats_enabled || !iommu_qinval )
+ return 0;
+@@ -53,7 +53,12 @@ int ats_device(const struct pci_dev *pdev, const struct acpi_drhd_unit *drhd)
+ !ecap_dev_iotlb(drhd->iommu->ecap) )
+ return 0;
+
+- if ( !acpi_find_matched_atsr_unit(pdev) )
++ pos = pci_find_cap_offset(pdev->sbdf, PCI_CAP_ID_EXP);
++ if ( pos )
++ expfl = pci_conf_read16(pdev->sbdf, pos + PCI_EXP_FLAGS);
++
++ if ( MASK_EXTR(expfl, PCI_EXP_FLAGS_TYPE) != PCI_EXP_TYPE_RC_END &&
++ !acpi_find_matched_atsr_unit(pdev) )
+ return 0;
+
+ ats_drhd = find_ats_dev_drhd(drhd->iommu);
+--
+2.45.2
+
diff --git a/0009-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch b/0009-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch
deleted file mode 100644
index f33d27d..0000000
--- a/0009-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch
+++ /dev/null
@@ -1,70 +0,0 @@
-From afb85cf1e8f165abf88de9d8a6df625692a753b1 Mon Sep 17 00:00:00 2001
-From: Michal Orzel <michal.orzel@amd.com>
-Date: Fri, 2 Feb 2024 08:04:07 +0100
-Subject: [PATCH 09/67] lib{fdt,elf}: move lib{fdt,elf}-temp.o and their deps
- to $(targets)
-
-At the moment, trying to run xencov read/reset (calling SYSCTL_coverage_op
-under the hood) results in a crash. This is due to a profiler trying to
-access data in the .init.* sections (libfdt for Arm and libelf for x86)
-that are stripped after boot. Normally, the build system compiles any
-*.init.o file without COV_FLAGS. However, these two libraries are
-handled differently as sections will be renamed to init after linking.
-
-To override COV_FLAGS to empty for these libraries, lib{fdt,elf}.o were
-added to nocov-y. This worked until e321576f4047 ("xen/build: start using
-if_changed") that added lib{fdt,elf}-temp.o and their deps to extra-y.
-This way, even though these objects appear as prerequisites of
-lib{fdt,elf}.o and the settings should propagate to them, make can also
-build them as a prerequisite of __build, in which case COV_FLAGS would
-still have the unwanted flags. Fix it by switching to $(targets) instead.
-
-Also, for libfdt, append libfdt.o to nocov-y only if CONFIG_OVERLAY_DTB
-is not set. Otherwise, there is no section renaming and we should be able
-to run the coverage.
-
-Fixes: e321576f4047 ("xen/build: start using if_changed")
-Signed-off-by: Michal Orzel <michal.orzel@amd.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-master commit: 79519fcfa0605bbf19d8c02b979af3a2c8afed68
-master date: 2024-01-23 12:02:44 +0100
----
- xen/common/libelf/Makefile | 2 +-
- xen/common/libfdt/Makefile | 4 ++--
- 2 files changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/xen/common/libelf/Makefile b/xen/common/libelf/Makefile
-index 8a4522e4e1..917d12b006 100644
---- a/xen/common/libelf/Makefile
-+++ b/xen/common/libelf/Makefile
-@@ -13,4 +13,4 @@ $(obj)/libelf.o: $(obj)/libelf-temp.o FORCE
- $(obj)/libelf-temp.o: $(addprefix $(obj)/,$(libelf-objs)) FORCE
- $(call if_changed,ld)
-
--extra-y += libelf-temp.o $(libelf-objs)
-+targets += libelf-temp.o $(libelf-objs)
-diff --git a/xen/common/libfdt/Makefile b/xen/common/libfdt/Makefile
-index 75aaefa2e3..4d14fd61ba 100644
---- a/xen/common/libfdt/Makefile
-+++ b/xen/common/libfdt/Makefile
-@@ -2,9 +2,9 @@ include $(src)/Makefile.libfdt
-
- SECTIONS := text data $(SPECIAL_DATA_SECTIONS)
- OBJCOPYFLAGS := $(foreach s,$(SECTIONS),--rename-section .$(s)=.init.$(s))
-+nocov-y += libfdt.o
-
- obj-y += libfdt.o
--nocov-y += libfdt.o
-
- CFLAGS-y += -I$(srctree)/include/xen/libfdt/
-
-@@ -14,4 +14,4 @@ $(obj)/libfdt.o: $(obj)/libfdt-temp.o FORCE
- $(obj)/libfdt-temp.o: $(addprefix $(obj)/,$(LIBFDT_OBJS)) FORCE
- $(call if_changed,ld)
-
--extra-y += libfdt-temp.o $(LIBFDT_OBJS)
-+targets += libfdt-temp.o $(LIBFDT_OBJS)
---
-2.44.0
-
diff --git a/0010-tools-libxs-Open-dev-xen-xenbus-fds-as-O_CLOEXEC.patch b/0010-tools-libxs-Open-dev-xen-xenbus-fds-as-O_CLOEXEC.patch
new file mode 100644
index 0000000..9f9cdd7
--- /dev/null
+++ b/0010-tools-libxs-Open-dev-xen-xenbus-fds-as-O_CLOEXEC.patch
@@ -0,0 +1,47 @@
+From 2bc52041cacb33a301ebf939d69a021597941186 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 21 May 2024 10:21:47 +0200
+Subject: [PATCH 10/56] tools/libxs: Open /dev/xen/xenbus fds as O_CLOEXEC
+
+The header description for xs_open() goes as far as to suggest that the fd is
+O_CLOEXEC, but it isn't actually.
+
+`xl devd` has been observed leaking /dev/xen/xenbus into children.
+
+Link: https://github.com/QubesOS/qubes-issues/issues/8292
+Reported-by: Demi Marie Obenour <demi@invisiblethingslab.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+master commit: f4f2f3402b2f4985d69ffc0d46f845d05fd0b60f
+master date: 2024-05-07 15:18:36 +0100
+---
+ tools/libs/store/xs.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/tools/libs/store/xs.c b/tools/libs/store/xs.c
+index 140b9a2839..1498515073 100644
+--- a/tools/libs/store/xs.c
++++ b/tools/libs/store/xs.c
+@@ -54,6 +54,10 @@ struct xs_stored_msg {
+ #include <dlfcn.h>
+ #endif
+
++#ifndef O_CLOEXEC
++#define O_CLOEXEC 0
++#endif
++
+ struct xs_handle {
+ /* Communications channel to xenstore daemon. */
+ int fd;
+@@ -227,7 +231,7 @@ error:
+ static int get_dev(const char *connect_to)
+ {
+ /* We cannot open read-only because requests are writes */
+- return open(connect_to, O_RDWR);
++ return open(connect_to, O_RDWR | O_CLOEXEC);
+ }
+
+ static int all_restrict_cb(Xentoolcore__Active_Handle *ah, domid_t domid) {
+--
+2.45.2
+
diff --git a/0010-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch b/0010-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch
deleted file mode 100644
index 9b3b9a0..0000000
--- a/0010-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From 091466ba55d1e2e75738f751818ace2e3ed08ccf Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Fri, 2 Feb 2024 08:04:33 +0100
-Subject: [PATCH 10/67] x86/p2m-pt: fix off by one in entry check assert
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The MMIO RO rangeset overlap check is bogus: the rangeset is inclusive so the
-passed end mfn should be the last mfn to be mapped (not last + 1).
-
-Fixes: 6fa1755644d0 ('amd/npt/shadow: replace assert that prevents creating 2M/1G MMIO entries')
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: George Dunlap <george.dunlap@cloud.com>
-master commit: 610775d0dd61c1bd2f4720c755986098e6a5bafd
-master date: 2024-01-25 16:09:04 +0100
----
- xen/arch/x86/mm/p2m-pt.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c
-index eaba2b0fb4..f02ebae372 100644
---- a/xen/arch/x86/mm/p2m-pt.c
-+++ b/xen/arch/x86/mm/p2m-pt.c
-@@ -564,7 +564,7 @@ static void check_entry(mfn_t mfn, p2m_type_t new, p2m_type_t old,
- if ( new == p2m_mmio_direct )
- ASSERT(!mfn_eq(mfn, INVALID_MFN) &&
- !rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn),
-- mfn_x(mfn) + (1ul << order)));
-+ mfn_x(mfn) + (1UL << order) - 1));
- else if ( p2m_allows_invalid_mfn(new) || new == p2m_invalid ||
- new == p2m_mmio_dm )
- ASSERT(mfn_valid(mfn) || mfn_eq(mfn, INVALID_MFN));
---
-2.44.0
-
diff --git a/0011-tools-xentop-fix-sorting-bug-for-some-columns.patch b/0011-tools-xentop-fix-sorting-bug-for-some-columns.patch
deleted file mode 100644
index 6bf11d9..0000000
--- a/0011-tools-xentop-fix-sorting-bug-for-some-columns.patch
+++ /dev/null
@@ -1,67 +0,0 @@
-From 61da71968ea44964fd1dd2e449b053c77eb83139 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Cyril=20R=C3=A9bert=20=28zithro=29?= <slack@rabbit.lu>
-Date: Tue, 27 Feb 2024 14:06:53 +0100
-Subject: [PATCH 11/67] tools/xentop: fix sorting bug for some columns
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Sort doesn't work on columns VBD_OO, VBD_RD, VBD_WR and VBD_RSECT.
-Fix by adjusting variables names in compare functions.
-Bug fix only. No functional change.
-
-Fixes: 91c3e3dc91d6 ("tools/xentop: Display '-' when stats are not available.")
-Signed-off-by: Cyril Rébert (zithro) <slack@rabbit.lu>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: 29f17d837421f13c0e0010802de1b2d51d2ded4a
-master date: 2024-02-05 17:58:23 +0000
----
- tools/xentop/xentop.c | 10 +++++-----
- 1 file changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/tools/xentop/xentop.c b/tools/xentop/xentop.c
-index 950e8935c4..545bd5e96d 100644
---- a/tools/xentop/xentop.c
-+++ b/tools/xentop/xentop.c
-@@ -684,7 +684,7 @@ static int compare_vbd_oo(xenstat_domain *domain1, xenstat_domain *domain2)
- unsigned long long dom1_vbd_oo = 0, dom2_vbd_oo = 0;
-
- tot_vbd_reqs(domain1, FIELD_VBD_OO, &dom1_vbd_oo);
-- tot_vbd_reqs(domain1, FIELD_VBD_OO, &dom2_vbd_oo);
-+ tot_vbd_reqs(domain2, FIELD_VBD_OO, &dom2_vbd_oo);
-
- return -compare(dom1_vbd_oo, dom2_vbd_oo);
- }
-@@ -711,9 +711,9 @@ static int compare_vbd_rd(xenstat_domain *domain1, xenstat_domain *domain2)
- unsigned long long dom1_vbd_rd = 0, dom2_vbd_rd = 0;
-
- tot_vbd_reqs(domain1, FIELD_VBD_RD, &dom1_vbd_rd);
-- tot_vbd_reqs(domain1, FIELD_VBD_RD, &dom2_vbd_rd);
-+ tot_vbd_reqs(domain2, FIELD_VBD_RD, &dom2_vbd_rd);
-
-- return -compare(dom1_vbd_rd, dom1_vbd_rd);
-+ return -compare(dom1_vbd_rd, dom2_vbd_rd);
- }
-
- /* Prints number of total VBD READ requests statistic */
-@@ -738,7 +738,7 @@ static int compare_vbd_wr(xenstat_domain *domain1, xenstat_domain *domain2)
- unsigned long long dom1_vbd_wr = 0, dom2_vbd_wr = 0;
-
- tot_vbd_reqs(domain1, FIELD_VBD_WR, &dom1_vbd_wr);
-- tot_vbd_reqs(domain1, FIELD_VBD_WR, &dom2_vbd_wr);
-+ tot_vbd_reqs(domain2, FIELD_VBD_WR, &dom2_vbd_wr);
-
- return -compare(dom1_vbd_wr, dom2_vbd_wr);
- }
-@@ -765,7 +765,7 @@ static int compare_vbd_rsect(xenstat_domain *domain1, xenstat_domain *domain2)
- unsigned long long dom1_vbd_rsect = 0, dom2_vbd_rsect = 0;
-
- tot_vbd_reqs(domain1, FIELD_VBD_RSECT, &dom1_vbd_rsect);
-- tot_vbd_reqs(domain1, FIELD_VBD_RSECT, &dom2_vbd_rsect);
-+ tot_vbd_reqs(domain2, FIELD_VBD_RSECT, &dom2_vbd_rsect);
-
- return -compare(dom1_vbd_rsect, dom2_vbd_rsect);
- }
---
-2.44.0
-
diff --git a/0011-x86-cpu-policy-Fix-migration-from-Ice-Lake-to-Cascad.patch b/0011-x86-cpu-policy-Fix-migration-from-Ice-Lake-to-Cascad.patch
new file mode 100644
index 0000000..26eb3ec
--- /dev/null
+++ b/0011-x86-cpu-policy-Fix-migration-from-Ice-Lake-to-Cascad.patch
@@ -0,0 +1,92 @@
+From 0673eae8e53de5007dba35149527579819428323 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 21 May 2024 10:22:08 +0200
+Subject: [PATCH 11/56] x86/cpu-policy: Fix migration from Ice Lake to Cascade
+ Lake
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Ever since Xen 4.14, there has been a latent bug with migration.
+
+While some toolstacks can level the features properly, they don't shink
+feat.max_subleaf when all features have been dropped. This is because
+we *still* have not completed the toolstack side work for full CPU Policy
+objects.
+
+As a consequence, even when properly feature levelled, VMs can't migrate
+"backwards" across hardware which reduces feat.max_subleaf. One such example
+is Ice Lake (max_subleaf=2 for INTEL_PSFD) to Cascade Lake (max_subleaf=0).
+
+Extend the max policies feat.max_subleaf to the hightest number Xen knows
+about, but leave the default policies matching the host. This will allow VMs
+with a higher feat.max_subleaf than strictly necessary to migrate in.
+
+Eventually we'll manage to teach the toolstack how to avoid creating such VMs
+in the first place, but there's still more work to do there.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: a2330b51df267e20e66bbba6c5bf08f0570ed58b
+master date: 2024-05-07 16:56:46 +0100
+---
+ xen/arch/x86/cpu-policy.c | 22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
+index a822800f52..1aba6ed4ca 100644
+--- a/xen/arch/x86/cpu-policy.c
++++ b/xen/arch/x86/cpu-policy.c
+@@ -603,6 +603,13 @@ static void __init calculate_pv_max_policy(void)
+ unsigned int i;
+
+ *p = host_cpu_policy;
++
++ /*
++ * Some VMs may have a larger-than-necessary feat max_subleaf. Allow them
++ * to migrate in.
++ */
++ p->feat.max_subleaf = ARRAY_SIZE(p->feat.raw) - 1;
++
+ x86_cpu_policy_to_featureset(p, fs);
+
+ for ( i = 0; i < ARRAY_SIZE(fs); ++i )
+@@ -643,6 +650,10 @@ static void __init calculate_pv_def_policy(void)
+ unsigned int i;
+
+ *p = pv_max_cpu_policy;
++
++ /* Default to the same max_subleaf as the host. */
++ p->feat.max_subleaf = host_cpu_policy.feat.max_subleaf;
++
+ x86_cpu_policy_to_featureset(p, fs);
+
+ for ( i = 0; i < ARRAY_SIZE(fs); ++i )
+@@ -679,6 +690,13 @@ static void __init calculate_hvm_max_policy(void)
+ const uint32_t *mask;
+
+ *p = host_cpu_policy;
++
++ /*
++ * Some VMs may have a larger-than-necessary feat max_subleaf. Allow them
++ * to migrate in.
++ */
++ p->feat.max_subleaf = ARRAY_SIZE(p->feat.raw) - 1;
++
+ x86_cpu_policy_to_featureset(p, fs);
+
+ mask = hvm_hap_supported() ?
+@@ -780,6 +798,10 @@ static void __init calculate_hvm_def_policy(void)
+ const uint32_t *mask;
+
+ *p = hvm_max_cpu_policy;
++
++ /* Default to the same max_subleaf as the host. */
++ p->feat.max_subleaf = host_cpu_policy.feat.max_subleaf;
++
+ x86_cpu_policy_to_featureset(p, fs);
+
+ mask = hvm_hap_supported() ?
+--
+2.45.2
+
diff --git a/0012-amd-vi-fix-IVMD-memory-type-checks.patch b/0012-amd-vi-fix-IVMD-memory-type-checks.patch
deleted file mode 100644
index f38e39e..0000000
--- a/0012-amd-vi-fix-IVMD-memory-type-checks.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From 463aaf3fbf62d24e898ae0c2ba53d85ca0f94d3f Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 27 Feb 2024 14:07:12 +0100
-Subject: [PATCH 12/67] amd-vi: fix IVMD memory type checks
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The current code that parses the IVMD blocks is relaxed with regard to the
-restriction that such unity regions should always fall into memory ranges
-marked as reserved in the memory map.
-
-However the type checks for the IVMD addresses are inverted, and as a result
-IVMD ranges falling into RAM areas are accepted. Note that having such ranges
-in the first place is a firmware bug, as IVMD should always fall into reserved
-ranges.
-
-Fixes: ed6c77ebf0c1 ('AMD/IOMMU: check / convert IVMD ranges for being / to be reserved')
-Reported-by: Ox <oxjo@proton.me>
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Tested-by: oxjo <oxjo@proton.me>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 83afa313583019d9f159c122cecf867735d27ec5
-master date: 2024-02-06 11:56:13 +0100
----
- xen/drivers/passthrough/amd/iommu_acpi.c | 11 ++++++++---
- 1 file changed, 8 insertions(+), 3 deletions(-)
-
-diff --git a/xen/drivers/passthrough/amd/iommu_acpi.c b/xen/drivers/passthrough/amd/iommu_acpi.c
-index 3b577c9b39..3a7045c39b 100644
---- a/xen/drivers/passthrough/amd/iommu_acpi.c
-+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
-@@ -426,9 +426,14 @@ static int __init parse_ivmd_block(const struct acpi_ivrs_memory *ivmd_block)
- return -EIO;
- }
-
-- /* Types which won't be handed out are considered good enough. */
-- if ( !(type & (RAM_TYPE_RESERVED | RAM_TYPE_ACPI |
-- RAM_TYPE_UNUSABLE)) )
-+ /*
-+ * Types which aren't RAM are considered good enough.
-+ * Note that a page being partially RESERVED, ACPI or UNUSABLE will
-+ * force Xen into assuming the whole page as having that type in
-+ * practice.
-+ */
-+ if ( type & (RAM_TYPE_RESERVED | RAM_TYPE_ACPI |
-+ RAM_TYPE_UNUSABLE) )
- continue;
-
- AMD_IOMMU_ERROR("IVMD: page at %lx can't be converted\n", addr);
---
-2.44.0
-
diff --git a/0012-x86-ucode-Distinguish-ucode-already-up-to-date.patch b/0012-x86-ucode-Distinguish-ucode-already-up-to-date.patch
new file mode 100644
index 0000000..dd2f91a
--- /dev/null
+++ b/0012-x86-ucode-Distinguish-ucode-already-up-to-date.patch
@@ -0,0 +1,58 @@
+From a42c83b202cc034c43c723cf363dbbabac61b1af Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 21 May 2024 10:22:52 +0200
+Subject: [PATCH 12/56] x86/ucode: Distinguish "ucode already up to date"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Right now, Xen returns -ENOENT for both "the provided blob isn't correct for
+this CPU", and "the blob isn't newer than what's loaded".
+
+This in turn causes xen-ucode to exit with an error, when "nothing to do" is
+more commonly a success condition.
+
+Handle EEXIST specially and exit cleanly.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 648db37a155aca6f66d4cf3bb118417a728c3579
+master date: 2024-05-09 18:19:49 +0100
+---
+ tools/misc/xen-ucode.c | 5 ++++-
+ xen/arch/x86/cpu/microcode/core.c | 2 +-
+ 2 files changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/tools/misc/xen-ucode.c b/tools/misc/xen-ucode.c
+index c6ae6498d6..390969db3d 100644
+--- a/tools/misc/xen-ucode.c
++++ b/tools/misc/xen-ucode.c
+@@ -125,8 +125,11 @@ int main(int argc, char *argv[])
+ exit(1);
+ }
+
++ errno = 0;
+ ret = xc_microcode_update(xch, buf, len);
+- if ( ret )
++ if ( ret == -1 && errno == EEXIST )
++ printf("Microcode already up to date\n");
++ else if ( ret )
+ {
+ fprintf(stderr, "Failed to update microcode. (err: %s)\n",
+ strerror(errno));
+diff --git a/xen/arch/x86/cpu/microcode/core.c b/xen/arch/x86/cpu/microcode/core.c
+index 4e011cdc41..d5338ad345 100644
+--- a/xen/arch/x86/cpu/microcode/core.c
++++ b/xen/arch/x86/cpu/microcode/core.c
+@@ -640,7 +640,7 @@ static long cf_check microcode_update_helper(void *data)
+ "microcode: couldn't find any newer%s revision in the provided blob!\n",
+ opt_ucode_allow_same ? " (or the same)" : "");
+ microcode_free_patch(patch);
+- ret = -ENOENT;
++ ret = -EEXIST;
+
+ goto put;
+ }
+--
+2.45.2
+
diff --git a/0013-libxl-fix-population-of-the-online-vCPU-bitmap-for-P.patch b/0013-libxl-fix-population-of-the-online-vCPU-bitmap-for-P.patch
new file mode 100644
index 0000000..e5fb285
--- /dev/null
+++ b/0013-libxl-fix-population-of-the-online-vCPU-bitmap-for-P.patch
@@ -0,0 +1,61 @@
+From 9966e5413133157a630f7462518005fb898e582a Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 21 May 2024 10:23:27 +0200
+Subject: [PATCH 13/56] libxl: fix population of the online vCPU bitmap for PVH
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+libxl passes some information to libacpi to create the ACPI table for a PVH
+guest, and among that information it's a bitmap of which vCPUs are online
+which can be less than the maximum number of vCPUs assigned to the domain.
+
+While the population of the bitmap is done correctly for HVM based on the
+number of online vCPUs, for PVH the population of the bitmap is done based on
+the number of maximum vCPUs allowed. This leads to all local APIC entries in
+the MADT being set as enabled, which contradicts the data in xenstore if vCPUs
+is different than maximum vCPUs.
+
+Fix by copying the internal libxl bitmap that's populated based on the vCPUs
+parameter.
+
+Reported-by: Arthur Borsboom <arthurborsboom@gmail.com>
+Link: https://gitlab.com/libvirt/libvirt/-/issues/399
+Reported-by: Leigh Brown <leigh@solinno.co.uk>
+Fixes: 14c0d328da2b ('libxl/acpi: Build ACPI tables for HVMlite guests')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Tested-by: Leigh Brown <leigh@solinno.co.uk>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 5cc7347b04b2d0a3133754c7a9b936f614ec656a
+master date: 2024-05-11 00:13:43 +0100
+---
+ tools/libs/light/libxl_x86_acpi.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/tools/libs/light/libxl_x86_acpi.c b/tools/libs/light/libxl_x86_acpi.c
+index 620f3c700c..5cf261bd67 100644
+--- a/tools/libs/light/libxl_x86_acpi.c
++++ b/tools/libs/light/libxl_x86_acpi.c
+@@ -89,7 +89,7 @@ static int init_acpi_config(libxl__gc *gc,
+ uint32_t domid = dom->guest_domid;
+ xc_domaininfo_t info;
+ struct hvm_info_table *hvminfo;
+- int i, r, rc;
++ int r, rc;
+
+ config->dsdt_anycpu = config->dsdt_15cpu = dsdt_pvh;
+ config->dsdt_anycpu_len = config->dsdt_15cpu_len = dsdt_pvh_len;
+@@ -138,8 +138,8 @@ static int init_acpi_config(libxl__gc *gc,
+ hvminfo->nr_vcpus = info.max_vcpu_id + 1;
+ }
+
+- for (i = 0; i < hvminfo->nr_vcpus; i++)
+- hvminfo->vcpu_online[i / 8] |= 1 << (i & 7);
++ memcpy(hvminfo->vcpu_online, b_info->avail_vcpus.map,
++ b_info->avail_vcpus.size);
+
+ config->hvminfo = hvminfo;
+
+--
+2.45.2
+
diff --git a/0013-x86-hvm-Fix-fast-singlestep-state-persistence.patch b/0013-x86-hvm-Fix-fast-singlestep-state-persistence.patch
deleted file mode 100644
index 2a14354..0000000
--- a/0013-x86-hvm-Fix-fast-singlestep-state-persistence.patch
+++ /dev/null
@@ -1,86 +0,0 @@
-From 415f770d23f9fcbc02436560fa6583dcd8e1343f Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Petr=20Bene=C5=A1?= <w1benny@gmail.com>
-Date: Tue, 27 Feb 2024 14:07:45 +0100
-Subject: [PATCH 13/67] x86/hvm: Fix fast singlestep state persistence
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This patch addresses an issue where the fast singlestep setting would persist
-despite xc_domain_debug_control being called with XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF.
-Specifically, if fast singlestep was enabled in a VMI session and that session
-stopped before the MTF trap occurred, the fast singlestep setting remained
-active even though MTF itself was disabled. This led to a situation where, upon
-starting a new VMI session, the first event to trigger an EPT violation would
-cause the corresponding EPT event callback to be skipped due to the lingering
-fast singlestep setting.
-
-The fix ensures that the fast singlestep setting is properly reset when
-disabling single step debugging operations.
-
-Signed-off-by: Petr Beneš <w1benny@gmail.com>
-Reviewed-by: Tamas K Lengyel <tamas@tklengyel.com>
-master commit: 897def94b56175ce569673a05909d2f223e1e749
-master date: 2024-02-12 09:37:58 +0100
----
- xen/arch/x86/hvm/hvm.c | 34 ++++++++++++++++++++++++----------
- 1 file changed, 24 insertions(+), 10 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
-index d6c6ab8897..558dc3eddc 100644
---- a/xen/arch/x86/hvm/hvm.c
-+++ b/xen/arch/x86/hvm/hvm.c
-@@ -5153,26 +5153,40 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg)
-
- int hvm_debug_op(struct vcpu *v, int32_t op)
- {
-- int rc;
-+ int rc = 0;
-
- switch ( op )
- {
- case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON:
- case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF:
-- rc = -EOPNOTSUPP;
- if ( !cpu_has_monitor_trap_flag )
-- break;
-- rc = 0;
-- vcpu_pause(v);
-- v->arch.hvm.single_step =
-- (op == XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON);
-- vcpu_unpause(v); /* guest will latch new state */
-+ return -EOPNOTSUPP;
- break;
- default:
-- rc = -ENOSYS;
-- break;
-+ return -ENOSYS;
-+ }
-+
-+ vcpu_pause(v);
-+
-+ switch ( op )
-+ {
-+ case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON:
-+ v->arch.hvm.single_step = true;
-+ break;
-+
-+ case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF:
-+ v->arch.hvm.single_step = false;
-+ v->arch.hvm.fast_single_step.enabled = false;
-+ v->arch.hvm.fast_single_step.p2midx = 0;
-+ break;
-+
-+ default: /* Excluded above */
-+ ASSERT_UNREACHABLE();
-+ return -ENOSYS;
- }
-
-+ vcpu_unpause(v); /* guest will latch new state */
-+
- return rc;
- }
-
---
-2.44.0
-
diff --git a/0014-libxl-Fix-handling-XenStore-errors-in-device-creatio.patch b/0014-libxl-Fix-handling-XenStore-errors-in-device-creatio.patch
new file mode 100644
index 0000000..ac28521
--- /dev/null
+++ b/0014-libxl-Fix-handling-XenStore-errors-in-device-creatio.patch
@@ -0,0 +1,191 @@
+From 8271f0e8f23b63199caf0edcfe85ebc1c1412d1b Mon Sep 17 00:00:00 2001
+From: Demi Marie Obenour <demi@invisiblethingslab.com>
+Date: Tue, 21 May 2024 10:23:52 +0200
+Subject: [PATCH 14/56] libxl: Fix handling XenStore errors in device creation
+
+If xenstored runs out of memory it is possible for it to fail operations
+that should succeed. libxl wasn't robust against this, and could fail
+to ensure that the TTY path of a non-initial console was created and
+read-only for guests. This doesn't qualify for an XSA because guests
+should not be able to run xenstored out of memory, but it still needs to
+be fixed.
+
+Add the missing error checks to ensure that all errors are properly
+handled and that at no point can a guest make the TTY path of its
+frontend directory writable.
+
+Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+master commit: 531d3bea5e9357357eaf6d40f5784a1b4c29b910
+master date: 2024-05-11 00:13:43 +0100
+---
+ tools/libs/light/libxl_console.c | 11 ++---
+ tools/libs/light/libxl_device.c | 72 ++++++++++++++++++++------------
+ tools/libs/light/libxl_xshelp.c | 13 ++++--
+ 3 files changed, 60 insertions(+), 36 deletions(-)
+
+diff --git a/tools/libs/light/libxl_console.c b/tools/libs/light/libxl_console.c
+index cd7412a327..a563c9d3c7 100644
+--- a/tools/libs/light/libxl_console.c
++++ b/tools/libs/light/libxl_console.c
+@@ -351,11 +351,10 @@ int libxl__device_console_add(libxl__gc *gc, uint32_t domid,
+ flexarray_append(front, "protocol");
+ flexarray_append(front, LIBXL_XENCONSOLE_PROTOCOL);
+ }
+- libxl__device_generic_add(gc, XBT_NULL, device,
+- libxl__xs_kvs_of_flexarray(gc, back),
+- libxl__xs_kvs_of_flexarray(gc, front),
+- libxl__xs_kvs_of_flexarray(gc, ro_front));
+- rc = 0;
++ rc = libxl__device_generic_add(gc, XBT_NULL, device,
++ libxl__xs_kvs_of_flexarray(gc, back),
++ libxl__xs_kvs_of_flexarray(gc, front),
++ libxl__xs_kvs_of_flexarray(gc, ro_front));
+ out:
+ return rc;
+ }
+@@ -665,6 +664,8 @@ int libxl_device_channel_getinfo(libxl_ctx *ctx, uint32_t domid,
+ */
+ if (!val) val = "/NO-SUCH-PATH";
+ channelinfo->u.pty.path = strdup(val);
++ if (channelinfo->u.pty.path == NULL)
++ abort();
+ break;
+ default:
+ break;
+diff --git a/tools/libs/light/libxl_device.c b/tools/libs/light/libxl_device.c
+index 13da6e0573..3035501f2c 100644
+--- a/tools/libs/light/libxl_device.c
++++ b/tools/libs/light/libxl_device.c
+@@ -177,8 +177,13 @@ int libxl__device_generic_add(libxl__gc *gc, xs_transaction_t t,
+ ro_frontend_perms[1].perms = backend_perms[1].perms = XS_PERM_READ;
+
+ retry_transaction:
+- if (create_transaction)
++ if (create_transaction) {
+ t = xs_transaction_start(ctx->xsh);
++ if (t == XBT_NULL) {
++ LOGED(ERROR, device->domid, "xs_transaction_start failed");
++ return ERROR_FAIL;
++ }
++ }
+
+ /* FIXME: read frontend_path and check state before removing stuff */
+
+@@ -195,42 +200,55 @@ retry_transaction:
+ if (rc) goto out;
+ }
+
+- /* xxx much of this function lacks error checks! */
+-
+ if (fents || ro_fents) {
+- xs_rm(ctx->xsh, t, frontend_path);
+- xs_mkdir(ctx->xsh, t, frontend_path);
++ if (!xs_rm(ctx->xsh, t, frontend_path) && errno != ENOENT)
++ goto out;
++ if (!xs_mkdir(ctx->xsh, t, frontend_path))
++ goto out;
+ /* Console 0 is a special case. It doesn't use the regular PV
+ * state machine but also the frontend directory has
+ * historically contained other information, such as the
+ * vnc-port, which we don't want the guest fiddling with.
+ */
+ if ((device->kind == LIBXL__DEVICE_KIND_CONSOLE && device->devid == 0) ||
+- (device->kind == LIBXL__DEVICE_KIND_VUART))
+- xs_set_permissions(ctx->xsh, t, frontend_path,
+- ro_frontend_perms, ARRAY_SIZE(ro_frontend_perms));
+- else
+- xs_set_permissions(ctx->xsh, t, frontend_path,
+- frontend_perms, ARRAY_SIZE(frontend_perms));
+- xs_write(ctx->xsh, t, GCSPRINTF("%s/backend", frontend_path),
+- backend_path, strlen(backend_path));
+- if (fents)
+- libxl__xs_writev_perms(gc, t, frontend_path, fents,
+- frontend_perms, ARRAY_SIZE(frontend_perms));
+- if (ro_fents)
+- libxl__xs_writev_perms(gc, t, frontend_path, ro_fents,
+- ro_frontend_perms, ARRAY_SIZE(ro_frontend_perms));
++ (device->kind == LIBXL__DEVICE_KIND_VUART)) {
++ if (!xs_set_permissions(ctx->xsh, t, frontend_path,
++ ro_frontend_perms, ARRAY_SIZE(ro_frontend_perms)))
++ goto out;
++ } else {
++ if (!xs_set_permissions(ctx->xsh, t, frontend_path,
++ frontend_perms, ARRAY_SIZE(frontend_perms)))
++ goto out;
++ }
++ if (!xs_write(ctx->xsh, t, GCSPRINTF("%s/backend", frontend_path),
++ backend_path, strlen(backend_path)))
++ goto out;
++ if (fents) {
++ rc = libxl__xs_writev_perms(gc, t, frontend_path, fents,
++ frontend_perms, ARRAY_SIZE(frontend_perms));
++ if (rc) goto out;
++ }
++ if (ro_fents) {
++ rc = libxl__xs_writev_perms(gc, t, frontend_path, ro_fents,
++ ro_frontend_perms, ARRAY_SIZE(ro_frontend_perms));
++ if (rc) goto out;
++ }
+ }
+
+ if (bents) {
+ if (!libxl_only) {
+- xs_rm(ctx->xsh, t, backend_path);
+- xs_mkdir(ctx->xsh, t, backend_path);
+- xs_set_permissions(ctx->xsh, t, backend_path, backend_perms,
+- ARRAY_SIZE(backend_perms));
+- xs_write(ctx->xsh, t, GCSPRINTF("%s/frontend", backend_path),
+- frontend_path, strlen(frontend_path));
+- libxl__xs_writev(gc, t, backend_path, bents);
++ if (!xs_rm(ctx->xsh, t, backend_path) && errno != ENOENT)
++ goto out;
++ if (!xs_mkdir(ctx->xsh, t, backend_path))
++ goto out;
++ if (!xs_set_permissions(ctx->xsh, t, backend_path, backend_perms,
++ ARRAY_SIZE(backend_perms)))
++ goto out;
++ if (!xs_write(ctx->xsh, t, GCSPRINTF("%s/frontend", backend_path),
++ frontend_path, strlen(frontend_path)))
++ goto out;
++ rc = libxl__xs_writev(gc, t, backend_path, bents);
++ if (rc) goto out;
+ }
+
+ /*
+@@ -276,7 +294,7 @@ retry_transaction:
+ out:
+ if (create_transaction && t)
+ libxl__xs_transaction_abort(gc, &t);
+- return rc;
++ return rc != 0 ? rc : ERROR_FAIL;
+ }
+
+ typedef struct {
+diff --git a/tools/libs/light/libxl_xshelp.c b/tools/libs/light/libxl_xshelp.c
+index 751cd942d9..a6e34ab10f 100644
+--- a/tools/libs/light/libxl_xshelp.c
++++ b/tools/libs/light/libxl_xshelp.c
+@@ -60,10 +60,15 @@ int libxl__xs_writev_perms(libxl__gc *gc, xs_transaction_t t,
+ for (i = 0; kvs[i] != NULL; i += 2) {
+ path = GCSPRINTF("%s/%s", dir, kvs[i]);
+ if (path && kvs[i + 1]) {
+- int length = strlen(kvs[i + 1]);
+- xs_write(ctx->xsh, t, path, kvs[i + 1], length);
+- if (perms)
+- xs_set_permissions(ctx->xsh, t, path, perms, num_perms);
++ size_t length = strlen(kvs[i + 1]);
++ if (length > UINT_MAX)
++ return ERROR_FAIL;
++ if (!xs_write(ctx->xsh, t, path, kvs[i + 1], length))
++ return ERROR_FAIL;
++ if (perms) {
++ if (!xs_set_permissions(ctx->xsh, t, path, perms, num_perms))
++ return ERROR_FAIL;
++ }
+ }
+ }
+ return 0;
+--
+2.45.2
+
diff --git a/0014-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch b/0014-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch
deleted file mode 100644
index 6536674..0000000
--- a/0014-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch
+++ /dev/null
@@ -1,63 +0,0 @@
-From b3ae0e6201495216b12157bd8b2382b28fdd7dae Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 27 Feb 2024 14:08:20 +0100
-Subject: [PATCH 14/67] x86/HVM: tidy state on hvmemul_map_linear_addr()'s
- error path
-
-While in the vast majority of cases failure of the function will not
-be followed by re-invocation with the same emulation context, a few
-very specific insns - involving multiple independent writes, e.g. ENTER
-and PUSHA - exist where this can happen. Since failure of the function
-only signals to the caller that it ought to try an MMIO write instead,
-such failure also cannot be assumed to result in wholesale failure of
-emulation of the current insn. Instead we have to maintain internal
-state such that another invocation of the function with the same
-emulation context remains possible. To achieve that we need to reset MFN
-slots after putting page references on the error path.
-
-Note that all of this affects debugging code only, in causing an
-assertion to trigger (higher up in the function). There's otherwise no
-misbehavior - such a "leftover" slot would simply be overwritten by new
-contents in a release build.
-
-Also extend the related unmap() assertion, to further check for MFN 0.
-
-Fixes: 8cbd4fb0b7ea ("x86/hvm: implement hvmemul_write() using real mappings")
-Reported-by: Manuel Andreas <manuel.andreas@tum.de>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Paul Durrant <paul@xen.org>
-master commit: e72f951df407bc3be82faac64d8733a270036ba1
-master date: 2024-02-13 09:36:14 +0100
----
- xen/arch/x86/hvm/emulate.c | 7 ++++++-
- 1 file changed, 6 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
-index 275451dd36..27928dc3f3 100644
---- a/xen/arch/x86/hvm/emulate.c
-+++ b/xen/arch/x86/hvm/emulate.c
-@@ -697,7 +697,12 @@ static void *hvmemul_map_linear_addr(
- out:
- /* Drop all held references. */
- while ( mfn-- > hvmemul_ctxt->mfn )
-+ {
- put_page(mfn_to_page(*mfn));
-+#ifndef NDEBUG /* Clean slot for a subsequent map()'s error checking. */
-+ *mfn = _mfn(0);
-+#endif
-+ }
-
- return err;
- }
-@@ -719,7 +724,7 @@ static void hvmemul_unmap_linear_addr(
-
- for ( i = 0; i < nr_frames; i++ )
- {
-- ASSERT(mfn_valid(*mfn));
-+ ASSERT(mfn_x(*mfn) && mfn_valid(*mfn));
- paging_mark_dirty(currd, *mfn);
- put_page(mfn_to_page(*mfn));
-
---
-2.44.0
-
diff --git a/0015-build-Replace-which-with-command-v.patch b/0015-build-Replace-which-with-command-v.patch
deleted file mode 100644
index 57f21d4..0000000
--- a/0015-build-Replace-which-with-command-v.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-From 1330a5fe44ca91f98857b53fe8bbe06522d9db27 Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Tue, 27 Feb 2024 14:08:50 +0100
-Subject: [PATCH 15/67] build: Replace `which` with `command -v`
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The `which` command is not standard, may not exist on the build host,
-or may not behave as expected by the build system. It is recommended
-to use `command -v` to find out if a command exist and have its path,
-and it's part of a POSIX shell standard (at least, it seems to be
-mandatory since IEEE Std 1003.1-2008, but was optional before).
-
-Fixes: c8a8645f1efe ("xen/build: Automatically locate a suitable python interpreter")
-Fixes: 3b47bcdb6d38 ("xen/build: Use a distro version of figlet")
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: f93629b18b528a5ab1b1092949c5420069c7226c
-master date: 2024-02-19 12:45:48 +0100
----
- xen/Makefile | 4 ++--
- xen/build.mk | 2 +-
- 2 files changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/xen/Makefile b/xen/Makefile
-index dd0b004e1c..7ea13a6791 100644
---- a/xen/Makefile
-+++ b/xen/Makefile
-@@ -25,8 +25,8 @@ export XEN_BUILD_HOST := $(shell hostname)
- endif
-
- # Best effort attempt to find a python interpreter, defaulting to Python 3 if
--# available. Fall back to just `python` if `which` is nowhere to be found.
--PYTHON_INTERPRETER := $(word 1,$(shell which python3 python python2 2>/dev/null) python)
-+# available. Fall back to just `python`.
-+PYTHON_INTERPRETER := $(word 1,$(shell command -v python3 || command -v python || command -v python2) python)
- export PYTHON ?= $(PYTHON_INTERPRETER)
-
- export CHECKPOLICY ?= checkpolicy
-diff --git a/xen/build.mk b/xen/build.mk
-index 9ecb104f1e..b489f77b7c 100644
---- a/xen/build.mk
-+++ b/xen/build.mk
-@@ -1,6 +1,6 @@
- quiet_cmd_banner = BANNER $@
- define cmd_banner
-- if which figlet >/dev/null 2>&1 ; then \
-+ if command -v figlet >/dev/null 2>&1 ; then \
- echo " Xen $(XEN_FULLVERSION)" | figlet -f $< > $@.tmp; \
- else \
- echo " Xen $(XEN_FULLVERSION)" > $@.tmp; \
---
-2.44.0
-
diff --git a/0015-xen-sched-set-all-sched_resource-data-inside-locked-.patch b/0015-xen-sched-set-all-sched_resource-data-inside-locked-.patch
new file mode 100644
index 0000000..a8090d4
--- /dev/null
+++ b/0015-xen-sched-set-all-sched_resource-data-inside-locked-.patch
@@ -0,0 +1,84 @@
+From 3999b675cad5b717274d6493899b0eea8896f4d7 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 21 May 2024 10:24:26 +0200
+Subject: [PATCH 15/56] xen/sched: set all sched_resource data inside locked
+ region for new cpu
+
+When adding a cpu to a scheduler, set all data items of struct
+sched_resource inside the locked region, as otherwise a race might
+happen (e.g. when trying to access the cpupool of the cpu):
+
+ (XEN) ----[ Xen-4.19.0-1-d x86_64 debug=y Tainted: H ]----
+ (XEN) CPU: 45
+ (XEN) RIP: e008:[<ffff82d040244cbf>] common/sched/credit.c#csched_load_balance+0x41/0x877
+ (XEN) RFLAGS: 0000000000010092 CONTEXT: hypervisor
+ (XEN) rax: ffff82d040981618 rbx: ffff82d040981618 rcx: 0000000000000000
+ (XEN) rdx: 0000003ff68cd000 rsi: 000000000000002d rdi: ffff83103723d450
+ (XEN) rbp: ffff83207caa7d48 rsp: ffff83207caa7b98 r8: 0000000000000000
+ (XEN) r9: ffff831037253cf0 r10: ffff83103767c3f0 r11: 0000000000000009
+ (XEN) r12: ffff831037237990 r13: ffff831037237990 r14: ffff831037253720
+ (XEN) r15: 0000000000000000 cr0: 000000008005003b cr4: 0000000000f526e0
+ (XEN) cr3: 000000005bc2f000 cr2: 0000000000000010
+ (XEN) fsb: 0000000000000000 gsb: 0000000000000000 gss: 0000000000000000
+ (XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: 0000 cs: e008
+ (XEN) Xen code around <ffff82d040244cbf> (common/sched/credit.c#csched_load_balance+0x41/0x877):
+ (XEN) 48 8b 0c 10 48 8b 49 08 <48> 8b 79 10 48 89 bd b8 fe ff ff 49 8b 4e 28 48
+ <snip>
+ (XEN) Xen call trace:
+ (XEN) [<ffff82d040244cbf>] R common/sched/credit.c#csched_load_balance+0x41/0x877
+ (XEN) [<ffff82d040245a18>] F common/sched/credit.c#csched_schedule+0x36a/0x69f
+ (XEN) [<ffff82d040252644>] F common/sched/core.c#do_schedule+0xe8/0x433
+ (XEN) [<ffff82d0402572dd>] F common/sched/core.c#schedule+0x2e5/0x2f9
+ (XEN) [<ffff82d040232f35>] F common/softirq.c#__do_softirq+0x94/0xbe
+ (XEN) [<ffff82d040232fc8>] F do_softirq+0x13/0x15
+ (XEN) [<ffff82d0403075ef>] F arch/x86/domain.c#idle_loop+0x92/0xe6
+ (XEN)
+ (XEN) Pagetable walk from 0000000000000010:
+ (XEN) L4[0x000] = 000000103ff61063 ffffffffffffffff
+ (XEN) L3[0x000] = 000000103ff60063 ffffffffffffffff
+ (XEN) L2[0x000] = 0000001033dff063 ffffffffffffffff
+ (XEN) L1[0x000] = 0000000000000000 ffffffffffffffff
+ (XEN)
+ (XEN) ****************************************
+ (XEN) Panic on CPU 45:
+ (XEN) FATAL PAGE FAULT
+ (XEN) [error_code=0000]
+ (XEN) Faulting linear address: 0000000000000010
+ (XEN) ****************************************
+
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Fixes: a8c6c623192e ("sched: clarify use cases of schedule_cpu_switch()")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: d104a07524ffc92ae7a70dfe192c291de2a563cc
+master date: 2024-05-15 19:59:52 +0100
+---
+ xen/common/sched/core.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index 34ad39b9ad..3c2403ebcf 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -3179,6 +3179,8 @@ int schedule_cpu_add(unsigned int cpu, struct cpupool *c)
+
+ sr->scheduler = new_ops;
+ sr->sched_priv = ppriv;
++ sr->granularity = cpupool_get_granularity(c);
++ sr->cpupool = c;
+
+ /*
+ * Reroute the lock to the per pCPU lock as /last/ thing. In fact,
+@@ -3191,8 +3193,6 @@ int schedule_cpu_add(unsigned int cpu, struct cpupool *c)
+ /* _Not_ pcpu_schedule_unlock(): schedule_lock has changed! */
+ spin_unlock_irqrestore(old_lock, flags);
+
+- sr->granularity = cpupool_get_granularity(c);
+- sr->cpupool = c;
+ /* The cpu is added to a pool, trigger it to go pick up some work */
+ cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+
+--
+2.45.2
+
diff --git a/0016-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch b/0016-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch
deleted file mode 100644
index f75e07c..0000000
--- a/0016-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From b9745280736ee526374873aa3c4142596e2ba10b Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
- <marmarek@invisiblethingslab.com>
-Date: Tue, 27 Feb 2024 14:09:19 +0100
-Subject: [PATCH 16/67] libxl: Disable relocating memory for qemu-xen in
- stubdomain too
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-According to comments (and experiments) qemu-xen cannot handle memory
-reolcation done by hvmloader. The code was already disabled when running
-qemu-xen in dom0 (see libxl__spawn_local_dm()), but it was missed when
-adding qemu-xen support to stubdomain. Adjust libxl__spawn_stub_dm() to
-be consistent in this regard.
-
-Reported-by: Neowutran <xen@neowutran.ovh>
-Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
-Reviewed-by: Jason Andryuk <jandryuk@gmail.com>
-Acked-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: 97883aa269f6745a6ded232be3a855abb1297e0d
-master date: 2024-02-22 11:48:22 +0100
----
- tools/libs/light/libxl_dm.c | 10 ++++++++++
- 1 file changed, 10 insertions(+)
-
-diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c
-index 14b593110f..ed620a9d8e 100644
---- a/tools/libs/light/libxl_dm.c
-+++ b/tools/libs/light/libxl_dm.c
-@@ -2432,6 +2432,16 @@ void libxl__spawn_stub_dm(libxl__egc *egc, libxl__stub_dm_spawn_state *sdss)
- "%s",
- libxl_bios_type_to_string(guest_config->b_info.u.hvm.bios));
- }
-+ /* Disable relocating memory to make the MMIO hole larger
-+ * unless we're running qemu-traditional and vNUMA is not
-+ * configured. */
-+ libxl__xs_printf(gc, XBT_NULL,
-+ libxl__sprintf(gc, "%s/hvmloader/allow-memory-relocate",
-+ libxl__xs_get_dompath(gc, guest_domid)),
-+ "%d",
-+ guest_config->b_info.device_model_version
-+ == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL &&
-+ !libxl__vnuma_configured(&guest_config->b_info));
- ret = xc_domain_set_target(ctx->xch, dm_domid, guest_domid);
- if (ret<0) {
- LOGED(ERROR, guest_domid, "setting target domain %d -> %d",
---
-2.44.0
-
diff --git a/0016-x86-respect-mapcache_domain_init-failing.patch b/0016-x86-respect-mapcache_domain_init-failing.patch
new file mode 100644
index 0000000..db7ddfe
--- /dev/null
+++ b/0016-x86-respect-mapcache_domain_init-failing.patch
@@ -0,0 +1,38 @@
+From dfabab2cd9461ef9d21a708461f35d2ae4b55220 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 21 May 2024 10:25:08 +0200
+Subject: [PATCH 16/56] x86: respect mapcache_domain_init() failing
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The function itself properly handles and hands onwards failure from
+create_perdomain_mapping(). Therefore its caller should respect possible
+failure, too.
+
+Fixes: 4b28bf6ae90b ("x86: re-introduce map_domain_page() et al")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 7270fdc7a0028d4b7b26fd1b36c6b9e97abcf3da
+master date: 2024-05-15 19:59:52 +0100
+---
+ xen/arch/x86/domain.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index 307446273a..5feb0d0679 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -850,7 +850,8 @@ int arch_domain_create(struct domain *d,
+ }
+ else if ( is_pv_domain(d) )
+ {
+- mapcache_domain_init(d);
++ if ( (rc = mapcache_domain_init(d)) != 0 )
++ goto fail;
+
+ if ( (rc = pv_domain_initialise(d)) != 0 )
+ goto fail;
+--
+2.45.2
+
diff --git a/0017-build-make-sure-build-fails-when-running-kconfig-fai.patch b/0017-build-make-sure-build-fails-when-running-kconfig-fai.patch
deleted file mode 100644
index 1bb3aa8..0000000
--- a/0017-build-make-sure-build-fails-when-running-kconfig-fai.patch
+++ /dev/null
@@ -1,58 +0,0 @@
-From ea869977271f93945451908be9b6117ffd1fb02d Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 27 Feb 2024 14:09:37 +0100
-Subject: [PATCH 17/67] build: make sure build fails when running kconfig fails
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Because of using "-include", failure to (re)build auto.conf (with
-auto.conf.cmd produced as a secondary target) won't stop make from
-continuing the build. Arrange for it being possible to drop the - from
-Rules.mk, requiring that the include be skipped for tools-only targets.
-Note that relying on the inclusion in those cases wouldn't be correct
-anyway, as it might be a stale file (yet to be rebuilt) which would be
-included, while during initial build, the file would be absent
-altogether.
-
-Fixes: 8d4c17a90b0a ("xen/build: silence make warnings about missing auto.conf*")
-Reported-by: Roger Pau Monné <roger.pau@citrix.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: d34e5fa2e8db19f23081f46a3e710bb122130691
-master date: 2024-02-22 11:52:47 +0100
----
- xen/Makefile | 1 +
- xen/Rules.mk | 4 +++-
- 2 files changed, 4 insertions(+), 1 deletion(-)
-
-diff --git a/xen/Makefile b/xen/Makefile
-index 7ea13a6791..bac3684a36 100644
---- a/xen/Makefile
-+++ b/xen/Makefile
-@@ -374,6 +374,7 @@ $(KCONFIG_CONFIG): tools_fixdep
- # This exploits the 'multi-target pattern rule' trick.
- # The syncconfig should be executed only once to make all the targets.
- include/config/%.conf include/config/%.conf.cmd: $(KCONFIG_CONFIG)
-+ $(Q)rm -f include/config/auto.conf
- $(Q)$(MAKE) $(build)=tools/kconfig syncconfig
-
- ifeq ($(CONFIG_DEBUG),y)
-diff --git a/xen/Rules.mk b/xen/Rules.mk
-index 8af3dd7277..d759cccee3 100644
---- a/xen/Rules.mk
-+++ b/xen/Rules.mk
-@@ -15,7 +15,9 @@ srcdir := $(srctree)/$(src)
- PHONY := __build
- __build:
-
---include $(objtree)/include/config/auto.conf
-+ifneq ($(firstword $(subst /, ,$(obj))),tools)
-+include $(objtree)/include/config/auto.conf
-+endif
-
- include $(XEN_ROOT)/Config.mk
- include $(srctree)/scripts/Kbuild.include
---
-2.44.0
-
diff --git a/0017-tools-xentop-Fix-cpu-sort-order.patch b/0017-tools-xentop-Fix-cpu-sort-order.patch
new file mode 100644
index 0000000..de19ddc
--- /dev/null
+++ b/0017-tools-xentop-Fix-cpu-sort-order.patch
@@ -0,0 +1,76 @@
+From f3d20dd31770a70971f4f85521eec1e741d38695 Mon Sep 17 00:00:00 2001
+From: Leigh Brown <leigh@solinno.co.uk>
+Date: Tue, 21 May 2024 10:25:30 +0200
+Subject: [PATCH 17/56] tools/xentop: Fix cpu% sort order
+
+In compare_cpu_pct(), there is a double -> unsigned long long converion when
+calling compare(). In C, this discards the fractional part, resulting in an
+out-of order sorting such as:
+
+ NAME STATE CPU(sec) CPU(%)
+ xendd --b--- 4020 5.7
+ icecream --b--- 2600 3.8
+ Domain-0 -----r 1060 1.5
+ neon --b--- 827 1.1
+ cheese --b--- 225 0.7
+ pizza --b--- 359 0.5
+ cassini --b--- 490 0.4
+ fusilli --b--- 159 0.2
+ bob --b--- 502 0.2
+ blender --b--- 121 0.2
+ bread --b--- 69 0.1
+ chickpea --b--- 67 0.1
+ lentil --b--- 67 0.1
+
+Introduce compare_dbl() function and update compare_cpu_pct() to call it.
+
+Fixes: 49839b535b78 ("Add xenstat framework.")
+Signed-off-by: Leigh Brown <leigh@solinno.co.uk>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: e27fc7d15eab79e604e8b8728778594accc23cf1
+master date: 2024-05-15 19:59:52 +0100
+---
+ tools/xentop/xentop.c | 13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+diff --git a/tools/xentop/xentop.c b/tools/xentop/xentop.c
+index 545bd5e96d..c2a311befe 100644
+--- a/tools/xentop/xentop.c
++++ b/tools/xentop/xentop.c
+@@ -85,6 +85,7 @@ static void set_delay(const char *value);
+ static void set_prompt(const char *new_prompt, void (*func)(const char *));
+ static int handle_key(int);
+ static int compare(unsigned long long, unsigned long long);
++static int compare_dbl(double, double);
+ static int compare_domains(xenstat_domain **, xenstat_domain **);
+ static unsigned long long tot_net_bytes( xenstat_domain *, int);
+ static bool tot_vbd_reqs(xenstat_domain *, int, unsigned long long *);
+@@ -422,6 +423,16 @@ static int compare(unsigned long long i1, unsigned long long i2)
+ return 0;
+ }
+
++/* Compares two double precision numbers, returning -1,0,1 for <,=,> */
++static int compare_dbl(double d1, double d2)
++{
++ if (d1 < d2)
++ return -1;
++ if (d1 > d2)
++ return 1;
++ return 0;
++}
++
+ /* Comparison function for use with qsort. Compares two domains using the
+ * current sort field. */
+ static int compare_domains(xenstat_domain **domain1, xenstat_domain **domain2)
+@@ -523,7 +534,7 @@ static double get_cpu_pct(xenstat_domain *domain)
+
+ static int compare_cpu_pct(xenstat_domain *domain1, xenstat_domain *domain2)
+ {
+- return -compare(get_cpu_pct(domain1), get_cpu_pct(domain2));
++ return -compare_dbl(get_cpu_pct(domain1), get_cpu_pct(domain2));
+ }
+
+ /* Prints cpu percentage statistic */
+--
+2.45.2
+
diff --git a/0018-x86-mtrr-avoid-system-wide-rendezvous-when-setting-A.patch b/0018-x86-mtrr-avoid-system-wide-rendezvous-when-setting-A.patch
new file mode 100644
index 0000000..a57775d
--- /dev/null
+++ b/0018-x86-mtrr-avoid-system-wide-rendezvous-when-setting-A.patch
@@ -0,0 +1,60 @@
+From 7cdb1fa2ab0b5e11f66cada0370770404153c824 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 21 May 2024 10:25:39 +0200
+Subject: [PATCH 18/56] x86/mtrr: avoid system wide rendezvous when setting AP
+ MTRRs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+There's no point in forcing a system wide update of the MTRRs on all processors
+when there are no changes to be propagated. On AP startup it's only the AP
+that needs to write the system wide MTRR values in order to match the rest of
+the already online CPUs.
+
+We have occasionally seen the watchdog trigger during `xen-hptool cpu-online`
+in one Intel Cascade Lake box with 448 CPUs due to the re-setting of the MTRRs
+on all the CPUs in the system.
+
+While there adjust the comment to clarify why the system-wide resetting of the
+MTRR registers is not needed for the purposes of mtrr_ap_init().
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Release-acked-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: abd00b037da5ffa4e8c4508a5df0cd6eabb805a4
+master date: 2024-05-15 19:59:52 +0100
+---
+ xen/arch/x86/cpu/mtrr/main.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/xen/arch/x86/cpu/mtrr/main.c b/xen/arch/x86/cpu/mtrr/main.c
+index 90b235f57e..0a44ebbcb0 100644
+--- a/xen/arch/x86/cpu/mtrr/main.c
++++ b/xen/arch/x86/cpu/mtrr/main.c
+@@ -573,14 +573,15 @@ void mtrr_ap_init(void)
+ if (!mtrr_if || hold_mtrr_updates_on_aps)
+ return;
+ /*
+- * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed,
+- * but this routine will be called in cpu boot time, holding the lock
+- * breaks it. This routine is called in two cases: 1.very earily time
+- * of software resume, when there absolutely isn't mtrr entry changes;
+- * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to
+- * prevent mtrr entry changes
++ * hold_mtrr_updates_on_aps takes care of preventing unnecessary MTRR
++ * updates when batch starting the CPUs (see
++ * mtrr_aps_sync_{begin,end}()).
++ *
++ * Otherwise just apply the current system wide MTRR values to this AP.
++ * Note this doesn't require synchronization with the other CPUs, as
++ * there are strictly no modifications of the current MTRR values.
+ */
+- set_mtrr(~0U, 0, 0, 0);
++ mtrr_set_all();
+ }
+
+ /**
+--
+2.45.2
+
diff --git a/0018-x86emul-add-missing-EVEX.R-checks.patch b/0018-x86emul-add-missing-EVEX.R-checks.patch
deleted file mode 100644
index 12e7702..0000000
--- a/0018-x86emul-add-missing-EVEX.R-checks.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From 16f2e47eb1207d866f95cf694a60a7ceb8f96a36 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 27 Feb 2024 14:09:55 +0100
-Subject: [PATCH 18/67] x86emul: add missing EVEX.R' checks
-
-EVEX.R' is not ignored in 64-bit code when encoding a GPR or mask
-register. While for mask registers suitable checks are in place (there
-also covering EVEX.R), they were missing for the few cases where in
-EVEX-encoded instructions ModR/M.reg encodes a GPR. While for VPEXTRW
-the bit is replaced before an emulation stub is invoked, for
-VCVT{,T}{S,D,H}2{,U}SI this actually would have led to #UD from inside
-an emulation stub, in turn raising #UD to the guest, but accompanied by
-log messages indicating something's wrong in Xen nevertheless.
-
-Fixes: 001bd91ad864 ("x86emul: support AVX512{F,BW,DQ} extract insns")
-Fixes: baf4a376f550 ("x86emul: support AVX512F legacy-equivalent scalar int/FP conversion insns")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: cb319824bfa8d3c9ea0410cc71daaedc3e11aa2a
-master date: 2024-02-22 11:54:07 +0100
----
- xen/arch/x86/x86_emulate/x86_emulate.c | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
-index 0c0336f737..995670cbc8 100644
---- a/xen/arch/x86/x86_emulate/x86_emulate.c
-+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
-@@ -6829,7 +6829,8 @@ x86_emulate(
- CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */
- CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x78): /* vcvtts{s,d}2usi xmm/mem,reg */
- CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x79): /* vcvts{s,d}2usi xmm/mem,reg */
-- generate_exception_if((evex.reg != 0xf || !evex.RX || evex.opmsk ||
-+ generate_exception_if((evex.reg != 0xf || !evex.RX || !evex.R ||
-+ evex.opmsk ||
- (ea.type != OP_REG && evex.brs)),
- EXC_UD);
- host_and_vcpu_must_have(avx512f);
-@@ -10705,7 +10706,7 @@ x86_emulate(
- goto pextr;
-
- case X86EMUL_OPC_EVEX_66(0x0f, 0xc5): /* vpextrw $imm8,xmm,reg */
-- generate_exception_if(ea.type != OP_REG, EXC_UD);
-+ generate_exception_if(ea.type != OP_REG || !evex.R, EXC_UD);
- /* Convert to alternative encoding: We want to use a memory operand. */
- evex.opcx = ext_0f3a;
- b = 0x15;
---
-2.44.0
-
diff --git a/0001-update-Xen-version-to-4.17.4-pre.patch b/0019-update-Xen-version-to-4.18.3-pre.patch
index e1070c9..34f2b33 100644
--- a/0001-update-Xen-version-to-4.17.4-pre.patch
+++ b/0019-update-Xen-version-to-4.18.3-pre.patch
@@ -1,25 +1,25 @@
-From 4f6e9d4327eb5252f1e8cac97a095d8b8485dadb Mon Sep 17 00:00:00 2001
+From 01f7a3c792241d348a4e454a30afdf6c0d6cd71c Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 30 Jan 2024 14:36:44 +0100
-Subject: [PATCH 01/67] update Xen version to 4.17.4-pre
+Date: Tue, 21 May 2024 11:52:11 +0200
+Subject: [PATCH 19/56] update Xen version to 4.18.3-pre
---
xen/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/xen/Makefile b/xen/Makefile
-index a46e6330db..dd0b004e1c 100644
+index 657f6fa4e3..786ab61600 100644
--- a/xen/Makefile
+++ b/xen/Makefile
@@ -6,7 +6,7 @@ this-makefile := $(call lastword,$(MAKEFILE_LIST))
# All other places this is stored (eg. compile.h) should be autogenerated.
export XEN_VERSION = 4
- export XEN_SUBVERSION = 17
--export XEN_EXTRAVERSION ?= .3$(XEN_VENDORVERSION)
-+export XEN_EXTRAVERSION ?= .4-pre$(XEN_VENDORVERSION)
+ export XEN_SUBVERSION = 18
+-export XEN_EXTRAVERSION ?= .2$(XEN_VENDORVERSION)
++export XEN_EXTRAVERSION ?= .3-pre$(XEN_VENDORVERSION)
export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
-include xen-version
--
-2.44.0
+2.45.2
diff --git a/0019-xen-livepatch-fix-norevert-test-hook-setup-typo.patch b/0019-xen-livepatch-fix-norevert-test-hook-setup-typo.patch
deleted file mode 100644
index 1676f7a..0000000
--- a/0019-xen-livepatch-fix-norevert-test-hook-setup-typo.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From f6b12792542e372f36a71ea4c2563e6dd6e4fa57 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 27 Feb 2024 14:10:24 +0100
-Subject: [PATCH 19/67] xen/livepatch: fix norevert test hook setup typo
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The test code has a typo in using LIVEPATCH_APPLY_HOOK() instead of
-LIVEPATCH_REVERT_HOOK().
-
-Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker')
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
-master commit: f0622dd4fd6ae6ddb523a45d89ed9b8f3a9a8f36
-master date: 2024-02-26 10:13:46 +0100
----
- xen/test/livepatch/xen_action_hooks_norevert.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/test/livepatch/xen_action_hooks_norevert.c b/xen/test/livepatch/xen_action_hooks_norevert.c
-index 3e21ade6ab..c173855192 100644
---- a/xen/test/livepatch/xen_action_hooks_norevert.c
-+++ b/xen/test/livepatch/xen_action_hooks_norevert.c
-@@ -120,7 +120,7 @@ static void post_revert_hook(livepatch_payload_t *payload)
- printk(KERN_DEBUG "%s: Hook done.\n", __func__);
- }
-
--LIVEPATCH_APPLY_HOOK(revert_hook);
-+LIVEPATCH_REVERT_HOOK(revert_hook);
-
- LIVEPATCH_PREAPPLY_HOOK(pre_apply_hook);
- LIVEPATCH_POSTAPPLY_HOOK(post_apply_hook);
---
-2.44.0
-
diff --git a/0020-x86-ucode-Further-fixes-to-identify-ucode-already-up.patch b/0020-x86-ucode-Further-fixes-to-identify-ucode-already-up.patch
new file mode 100644
index 0000000..c00dce2
--- /dev/null
+++ b/0020-x86-ucode-Further-fixes-to-identify-ucode-already-up.patch
@@ -0,0 +1,92 @@
+From cd873f00bedca2f1afeaf13a78f70e719c5b1398 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 26 Jun 2024 13:36:13 +0200
+Subject: [PATCH 20/56] x86/ucode: Further fixes to identify "ucode already up
+ to date"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When the revision in hardware is newer than anything Xen has to hand,
+'microcode_cache' isn't set up. Then, `xen-ucode` initiates the update
+because it doesn't know whether the revisions across the system are symmetric
+or not. This involves the patch getting all the way into the
+apply_microcode() hooks before being found to be too old.
+
+This is all a giant mess and needs an overhaul, but in the short term simply
+adjust the apply_microcode() to return -EEXIST.
+
+Also, unconditionally print the preexisting microcode revision on boot. It's
+relevant information which is otherwise unavailable if Xen doesn't find new
+microcode to use.
+
+Fixes: 648db37a155a ("x86/ucode: Distinguish "ucode already up to date"")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 977d98e67c2e929c62aa1f495fc4c6341c45abb5
+master date: 2024-05-16 13:59:11 +0100
+---
+ xen/arch/x86/cpu/microcode/amd.c | 7 +++++--
+ xen/arch/x86/cpu/microcode/core.c | 2 ++
+ xen/arch/x86/cpu/microcode/intel.c | 7 +++++--
+ 3 files changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/xen/arch/x86/cpu/microcode/amd.c b/xen/arch/x86/cpu/microcode/amd.c
+index 75fc84e445..d8f7646e88 100644
+--- a/xen/arch/x86/cpu/microcode/amd.c
++++ b/xen/arch/x86/cpu/microcode/amd.c
+@@ -222,12 +222,15 @@ static int cf_check apply_microcode(const struct microcode_patch *patch)
+ uint32_t rev, old_rev = sig->rev;
+ enum microcode_match_result result = microcode_fits(patch);
+
++ if ( result == MIS_UCODE )
++ return -EINVAL;
++
+ /*
+ * Allow application of the same revision to pick up SMT-specific changes
+ * even if the revision of the other SMT thread is already up-to-date.
+ */
+- if ( result != NEW_UCODE && result != SAME_UCODE )
+- return -EINVAL;
++ if ( result == OLD_UCODE )
++ return -EEXIST;
+
+ if ( check_final_patch_levels(sig) )
+ {
+diff --git a/xen/arch/x86/cpu/microcode/core.c b/xen/arch/x86/cpu/microcode/core.c
+index d5338ad345..8a47f4471f 100644
+--- a/xen/arch/x86/cpu/microcode/core.c
++++ b/xen/arch/x86/cpu/microcode/core.c
+@@ -887,6 +887,8 @@ int __init early_microcode_init(unsigned long *module_map,
+
+ ucode_ops.collect_cpu_info();
+
++ printk(XENLOG_INFO "BSP microcode revision: 0x%08x\n", this_cpu(cpu_sig).rev);
++
+ /*
+ * Some hypervisors deliberately report a microcode revision of -1 to
+ * mean that they will not accept microcode updates.
+diff --git a/xen/arch/x86/cpu/microcode/intel.c b/xen/arch/x86/cpu/microcode/intel.c
+index 060c529a6e..a2d88e3ac0 100644
+--- a/xen/arch/x86/cpu/microcode/intel.c
++++ b/xen/arch/x86/cpu/microcode/intel.c
+@@ -294,10 +294,13 @@ static int cf_check apply_microcode(const struct microcode_patch *patch)
+
+ result = microcode_update_match(patch);
+
+- if ( result != NEW_UCODE &&
+- !(opt_ucode_allow_same && result == SAME_UCODE) )
++ if ( result == MIS_UCODE )
+ return -EINVAL;
+
++ if ( result == OLD_UCODE ||
++ (result == SAME_UCODE && !opt_ucode_allow_same) )
++ return -EEXIST;
++
+ wbinvd();
+
+ wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)patch->data);
+--
+2.45.2
+
diff --git a/0020-xen-cmdline-fix-printf-format-specifier-in-no_config.patch b/0020-xen-cmdline-fix-printf-format-specifier-in-no_config.patch
deleted file mode 100644
index b47d9ee..0000000
--- a/0020-xen-cmdline-fix-printf-format-specifier-in-no_config.patch
+++ /dev/null
@@ -1,38 +0,0 @@
-From 229e8a72ee4cde5698aaf42cc59ae57446dce60f Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 27 Feb 2024 14:10:39 +0100
-Subject: [PATCH 20/67] xen/cmdline: fix printf format specifier in
- no_config_param()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-'*' sets the width field, which is the minimum number of characters to output,
-but what we want in no_config_param() is the precision instead, which is '.*'
-as it imposes a maximum limit on the output.
-
-Fixes: 68d757df8dd2 ('x86/pv: Options to disable and/or compile out 32bit PV support')
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: ef101f525173cf51dc70f4c77862f6f10a8ddccf
-master date: 2024-02-26 10:17:40 +0100
----
- xen/include/xen/param.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/include/xen/param.h b/xen/include/xen/param.h
-index 93c3fe7cb7..e02e49635c 100644
---- a/xen/include/xen/param.h
-+++ b/xen/include/xen/param.h
-@@ -191,7 +191,7 @@ static inline void no_config_param(const char *cfg, const char *param,
- {
- int len = e ? ({ ASSERT(e >= s); e - s; }) : strlen(s);
-
-- printk(XENLOG_INFO "CONFIG_%s disabled - ignoring '%s=%*s' setting\n",
-+ printk(XENLOG_INFO "CONFIG_%s disabled - ignoring '%s=%.*s' setting\n",
- cfg, param, len, s);
- }
-
---
-2.44.0
-
diff --git a/0021-x86-altcall-use-a-union-as-register-type-for-functio.patch b/0021-x86-altcall-use-a-union-as-register-type-for-functio.patch
deleted file mode 100644
index ab050dd..0000000
--- a/0021-x86-altcall-use-a-union-as-register-type-for-functio.patch
+++ /dev/null
@@ -1,141 +0,0 @@
-From 1aafe054e7d1efbf8e8482a9cdd4be5753b79e2f Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 27 Feb 2024 14:11:04 +0100
-Subject: [PATCH 21/67] x86/altcall: use a union as register type for function
- parameters on clang
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The current code for alternative calls uses the caller parameter types as the
-types for the register variables that serve as function parameters:
-
-uint8_t foo;
-[...]
-alternative_call(myfunc, foo);
-
-Would expand roughly into:
-
-register unint8_t a1_ asm("rdi") = foo;
-register unsigned long a2_ asm("rsi");
-[...]
-asm volatile ("call *%c[addr](%%rip)"...);
-
-However with -O2 clang will generate incorrect code, given the following
-example:
-
-unsigned int func(uint8_t t)
-{
- return t;
-}
-
-static void bar(uint8_t b)
-{
- int ret_;
- register uint8_t di asm("rdi") = b;
- register unsigned long si asm("rsi");
- register unsigned long dx asm("rdx");
- register unsigned long cx asm("rcx");
- register unsigned long r8 asm("r8");
- register unsigned long r9 asm("r9");
- register unsigned long r10 asm("r10");
- register unsigned long r11 asm("r11");
-
- asm volatile ( "call %c[addr]"
- : "+r" (di), "=r" (si), "=r" (dx),
- "=r" (cx), "=r" (r8), "=r" (r9),
- "=r" (r10), "=r" (r11), "=a" (ret_)
- : [addr] "i" (&(func)), "g" (func)
- : "memory" );
-}
-
-void foo(unsigned int a)
-{
- bar(a);
-}
-
-Clang generates the following assembly code:
-
-func: # @func
- movl %edi, %eax
- retq
-foo: # @foo
- callq func
- retq
-
-Note the truncation of the unsigned int parameter 'a' of foo() to uint8_t when
-passed into bar() is lost. clang doesn't zero extend the parameters in the
-callee when required, as the psABI mandates.
-
-The above can be worked around by using a union when defining the register
-variables, so that `di` becomes:
-
-register union {
- uint8_t e;
- unsigned long r;
-} di asm("rdi") = { .e = b };
-
-Which results in following code generated for `foo()`:
-
-foo: # @foo
- movzbl %dil, %edi
- callq func
- retq
-
-So the truncation is not longer lost. Apply such workaround only when built
-with clang.
-
-Reported-by: Matthew Grooms <mgrooms@shrew.net>
-Link: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=277200
-Link: https://github.com/llvm/llvm-project/issues/12579
-Link: https://github.com/llvm/llvm-project/issues/82598
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-master commit: 2ce562b2a413cbdb2e1128989ed1722290a27c4e
-master date: 2024-02-26 10:18:01 +0100
----
- xen/arch/x86/include/asm/alternative.h | 25 +++++++++++++++++++++++++
- 1 file changed, 25 insertions(+)
-
-diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h
-index a7a82c2c03..bcb1dc94f4 100644
---- a/xen/arch/x86/include/asm/alternative.h
-+++ b/xen/arch/x86/include/asm/alternative.h
-@@ -167,9 +167,34 @@ extern void alternative_branches(void);
- #define ALT_CALL_arg5 "r8"
- #define ALT_CALL_arg6 "r9"
-
-+#ifdef CONFIG_CC_IS_CLANG
-+/*
-+ * Use a union with an unsigned long in order to prevent clang from
-+ * skipping a possible truncation of the value. By using the union any
-+ * truncation is carried before the call instruction, in turn covering
-+ * for ABI-non-compliance in that the necessary clipping / extension of
-+ * the value is supposed to be carried out in the callee.
-+ *
-+ * Note this behavior is not mandated by the standard, and hence could
-+ * stop being a viable workaround, or worse, could cause a different set
-+ * of code-generation issues in future clang versions.
-+ *
-+ * This has been reported upstream:
-+ * https://github.com/llvm/llvm-project/issues/12579
-+ * https://github.com/llvm/llvm-project/issues/82598
-+ */
-+#define ALT_CALL_ARG(arg, n) \
-+ register union { \
-+ typeof(arg) e; \
-+ unsigned long r; \
-+ } a ## n ## _ asm ( ALT_CALL_arg ## n ) = { \
-+ .e = ({ BUILD_BUG_ON(sizeof(arg) > sizeof(void *)); (arg); }) \
-+ }
-+#else
- #define ALT_CALL_ARG(arg, n) \
- register typeof(arg) a ## n ## _ asm ( ALT_CALL_arg ## n ) = \
- ({ BUILD_BUG_ON(sizeof(arg) > sizeof(void *)); (arg); })
-+#endif
- #define ALT_CALL_NO_ARG(n) \
- register unsigned long a ## n ## _ asm ( ALT_CALL_arg ## n )
-
---
-2.44.0
-
diff --git a/0021-x86-msi-prevent-watchdog-triggering-when-dumping-MSI.patch b/0021-x86-msi-prevent-watchdog-triggering-when-dumping-MSI.patch
new file mode 100644
index 0000000..8bcc63f
--- /dev/null
+++ b/0021-x86-msi-prevent-watchdog-triggering-when-dumping-MSI.patch
@@ -0,0 +1,44 @@
+From 1ffb29d132600e6a7965c2885505615a6fd6c647 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Wed, 26 Jun 2024 13:36:52 +0200
+Subject: [PATCH 21/56] x86/msi: prevent watchdog triggering when dumping MSI
+ state
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Use the same check that's used in dump_irqs().
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 594b22ca5be681ec1b42c34f321cc2600d582210
+master date: 2024-05-20 14:29:44 +0100
+---
+ xen/arch/x86/msi.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
+index a78367d7cf..3eaeffd1e0 100644
+--- a/xen/arch/x86/msi.c
++++ b/xen/arch/x86/msi.c
+@@ -17,6 +17,7 @@
+ #include <xen/param.h>
+ #include <xen/pci.h>
+ #include <xen/pci_regs.h>
++#include <xen/softirq.h>
+ #include <xen/iocap.h>
+ #include <xen/keyhandler.h>
+ #include <xen/pfn.h>
+@@ -1405,6 +1406,9 @@ static void cf_check dump_msi(unsigned char key)
+ unsigned long flags;
+ const char *type = "???";
+
++ if ( !(irq & 0x1f) )
++ process_pending_softirqs();
++
+ if ( !irq_desc_initialized(desc) )
+ continue;
+
+--
+2.45.2
+
diff --git a/0022-x86-irq-remove-offline-CPUs-from-old-CPU-mask-when-a.patch b/0022-x86-irq-remove-offline-CPUs-from-old-CPU-mask-when-a.patch
new file mode 100644
index 0000000..28fec3e
--- /dev/null
+++ b/0022-x86-irq-remove-offline-CPUs-from-old-CPU-mask-when-a.patch
@@ -0,0 +1,44 @@
+From 52e16bf065cb42b79d14ac74d701d1f9d8506430 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Wed, 26 Jun 2024 13:37:20 +0200
+Subject: [PATCH 22/56] x86/irq: remove offline CPUs from old CPU mask when
+ adjusting move_cleanup_count
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When adjusting move_cleanup_count to account for CPUs that are offline also
+adjust old_cpu_mask, otherwise further calls to fixup_irqs() could subtract
+those again and create an imbalance in move_cleanup_count.
+
+Fixes: 472e0b74c5c4 ('x86/IRQ: deal with move cleanup count state in fixup_irqs()')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: e63209d3ba2fd1b2f232babd14c9c679ffa7b09a
+master date: 2024-06-10 10:33:22 +0200
+---
+ xen/arch/x86/irq.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
+index e07006391a..db14df93db 100644
+--- a/xen/arch/x86/irq.c
++++ b/xen/arch/x86/irq.c
+@@ -2576,6 +2576,14 @@ void fixup_irqs(const cpumask_t *mask, bool verbose)
+ desc->arch.move_cleanup_count -= cpumask_weight(affinity);
+ if ( !desc->arch.move_cleanup_count )
+ release_old_vec(desc);
++ else
++ /*
++ * Adjust old_cpu_mask to account for the offline CPUs,
++ * otherwise further calls to fixup_irqs() could subtract those
++ * again and possibly underflow the counter.
++ */
++ cpumask_andnot(desc->arch.old_cpu_mask, desc->arch.old_cpu_mask,
++ affinity);
+ }
+
+ if ( !desc->action || cpumask_subset(desc->affinity, mask) )
+--
+2.45.2
+
diff --git a/0022-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch b/0022-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch
deleted file mode 100644
index ce01c1a..0000000
--- a/0022-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-From 91650010815f3da0834bc9781c4359350d1162a5 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 27 Feb 2024 14:11:40 +0100
-Subject: [PATCH 22/67] x86/spec: fix BRANCH_HARDEN option to only be set when
- build-enabled
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The current logic to handle the BRANCH_HARDEN option will report it as enabled
-even when build-time disabled. Fix this by only allowing the option to be set
-when support for it is built into Xen.
-
-Fixes: 2d6f36daa086 ('x86/nospec: Introduce CONFIG_SPECULATIVE_HARDEN_BRANCH')
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 60e00f77a5cc671d30c5ef3318f5b8e9b74e4aa3
-master date: 2024-02-26 16:06:42 +0100
----
- xen/arch/x86/spec_ctrl.c | 14 ++++++++++++--
- 1 file changed, 12 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 56e07d7536..661716d695 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -62,7 +62,8 @@ int8_t __initdata opt_psfd = -1;
- int8_t __ro_after_init opt_ibpb_ctxt_switch = -1;
- int8_t __read_mostly opt_eager_fpu = -1;
- int8_t __read_mostly opt_l1d_flush = -1;
--static bool __initdata opt_branch_harden = true;
-+static bool __initdata opt_branch_harden =
-+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH);
-
- bool __initdata bsp_delay_spec_ctrl;
- uint8_t __read_mostly default_xen_spec_ctrl;
-@@ -280,7 +281,16 @@ static int __init cf_check parse_spec_ctrl(const char *s)
- else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 )
- opt_l1d_flush = val;
- else if ( (val = parse_boolean("branch-harden", s, ss)) >= 0 )
-- opt_branch_harden = val;
-+ {
-+ if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) )
-+ opt_branch_harden = val;
-+ else
-+ {
-+ no_config_param("SPECULATIVE_HARDEN_BRANCH", "spec-ctrl", s,
-+ ss);
-+ rc = -EINVAL;
-+ }
-+ }
- else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 )
- opt_srb_lock = val;
- else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 )
---
-2.44.0
-
diff --git a/0023-CI-Update-FreeBSD-to-13.3.patch b/0023-CI-Update-FreeBSD-to-13.3.patch
new file mode 100644
index 0000000..6a6e7ae
--- /dev/null
+++ b/0023-CI-Update-FreeBSD-to-13.3.patch
@@ -0,0 +1,33 @@
+From 80f2d2c2a515a6b9a4ea1b128267c6e1b5085002 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 26 Jun 2024 13:37:58 +0200
+Subject: [PATCH 23/56] CI: Update FreeBSD to 13.3
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+Acked-by: Stefano Stabellini <sstabellini@kernel.org>
+master commit: 5ea7f2c9d7a1334b3b2bd5f67fab4d447b60613d
+master date: 2024-06-11 17:00:10 +0100
+---
+ .cirrus.yml | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/.cirrus.yml b/.cirrus.yml
+index 63f3afb104..e961877881 100644
+--- a/.cirrus.yml
++++ b/.cirrus.yml
+@@ -17,7 +17,7 @@ freebsd_template: &FREEBSD_TEMPLATE
+ task:
+ name: 'FreeBSD 13'
+ freebsd_instance:
+- image_family: freebsd-13-2
++ image_family: freebsd-13-3
+ << : *FREEBSD_TEMPLATE
+
+ task:
+--
+2.45.2
+
diff --git a/0023-x86-account-for-shadow-stack-in-exception-from-stub-.patch b/0023-x86-account-for-shadow-stack-in-exception-from-stub-.patch
deleted file mode 100644
index e23a764..0000000
--- a/0023-x86-account-for-shadow-stack-in-exception-from-stub-.patch
+++ /dev/null
@@ -1,212 +0,0 @@
-From 49f77602373b58b7bbdb40cea2b49d2f88d4003d Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 27 Feb 2024 14:12:11 +0100
-Subject: [PATCH 23/67] x86: account for shadow stack in exception-from-stub
- recovery
-
-Dealing with exceptions raised from within emulation stubs involves
-discarding return address (replaced by exception related information).
-Such discarding of course also requires removing the corresponding entry
-from the shadow stack.
-
-Also amend the comment in fixup_exception_return(), to further clarify
-why use of ptr[1] can't be an out-of-bounds access.
-
-This is CVE-2023-46841 / XSA-451.
-
-Fixes: 209fb9919b50 ("x86/extable: Adjust extable handling to be shadow stack compatible")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 91f5f7a9154919a765c3933521760acffeddbf28
-master date: 2024-02-27 13:49:22 +0100
----
- xen/arch/x86/extable.c | 20 ++++++----
- xen/arch/x86/include/asm/uaccess.h | 3 +-
- xen/arch/x86/traps.c | 63 +++++++++++++++++++++++++++---
- 3 files changed, 71 insertions(+), 15 deletions(-)
-
-diff --git a/xen/arch/x86/extable.c b/xen/arch/x86/extable.c
-index 6758ba1dca..dd9583f2a5 100644
---- a/xen/arch/x86/extable.c
-+++ b/xen/arch/x86/extable.c
-@@ -86,26 +86,29 @@ search_one_extable(const struct exception_table_entry *first,
- }
-
- unsigned long
--search_exception_table(const struct cpu_user_regs *regs)
-+search_exception_table(const struct cpu_user_regs *regs, unsigned long *stub_ra)
- {
- const struct virtual_region *region = find_text_region(regs->rip);
- unsigned long stub = this_cpu(stubs.addr);
-
- if ( region && region->ex )
-+ {
-+ *stub_ra = 0;
- return search_one_extable(region->ex, region->ex_end, regs->rip);
-+ }
-
- if ( regs->rip >= stub + STUB_BUF_SIZE / 2 &&
- regs->rip < stub + STUB_BUF_SIZE &&
- regs->rsp > (unsigned long)regs &&
- regs->rsp < (unsigned long)get_cpu_info() )
- {
-- unsigned long retptr = *(unsigned long *)regs->rsp;
-+ unsigned long retaddr = *(unsigned long *)regs->rsp, fixup;
-
-- region = find_text_region(retptr);
-- retptr = region && region->ex
-- ? search_one_extable(region->ex, region->ex_end, retptr)
-- : 0;
-- if ( retptr )
-+ region = find_text_region(retaddr);
-+ fixup = region && region->ex
-+ ? search_one_extable(region->ex, region->ex_end, retaddr)
-+ : 0;
-+ if ( fixup )
- {
- /*
- * Put trap number and error code on the stack (in place of the
-@@ -117,7 +120,8 @@ search_exception_table(const struct cpu_user_regs *regs)
- };
-
- *(unsigned long *)regs->rsp = token.raw;
-- return retptr;
-+ *stub_ra = retaddr;
-+ return fixup;
- }
- }
-
-diff --git a/xen/arch/x86/include/asm/uaccess.h b/xen/arch/x86/include/asm/uaccess.h
-index 684fccd95c..74bb222c03 100644
---- a/xen/arch/x86/include/asm/uaccess.h
-+++ b/xen/arch/x86/include/asm/uaccess.h
-@@ -421,7 +421,8 @@ union stub_exception_token {
- unsigned long raw;
- };
-
--extern unsigned long search_exception_table(const struct cpu_user_regs *regs);
-+extern unsigned long search_exception_table(const struct cpu_user_regs *regs,
-+ unsigned long *stub_ra);
- extern void sort_exception_tables(void);
- extern void sort_exception_table(struct exception_table_entry *start,
- const struct exception_table_entry *stop);
-diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
-index 06c4f3868b..7599bee361 100644
---- a/xen/arch/x86/traps.c
-+++ b/xen/arch/x86/traps.c
-@@ -856,7 +856,7 @@ void do_unhandled_trap(struct cpu_user_regs *regs)
- }
-
- static void fixup_exception_return(struct cpu_user_regs *regs,
-- unsigned long fixup)
-+ unsigned long fixup, unsigned long stub_ra)
- {
- if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
- {
-@@ -873,7 +873,8 @@ static void fixup_exception_return(struct cpu_user_regs *regs,
- /*
- * Search for %rip. The shstk currently looks like this:
- *
-- * ... [Likely pointed to by SSP]
-+ * tok [Supervisor token, == &tok | BUSY, only with FRED inactive]
-+ * ... [Pointed to by SSP for most exceptions, empty in IST cases]
- * %cs [== regs->cs]
- * %rip [== regs->rip]
- * SSP [Likely points to 3 slots higher, above %cs]
-@@ -891,7 +892,56 @@ static void fixup_exception_return(struct cpu_user_regs *regs,
- */
- if ( ptr[0] == regs->rip && ptr[1] == regs->cs )
- {
-+ unsigned long primary_shstk =
-+ (ssp & ~(STACK_SIZE - 1)) +
-+ (PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8;
-+
- wrss(fixup, ptr);
-+
-+ if ( !stub_ra )
-+ goto shstk_done;
-+
-+ /*
-+ * Stub recovery ought to happen only when the outer context
-+ * was on the main shadow stack. We need to also "pop" the
-+ * stub's return address from the interrupted context's shadow
-+ * stack. That is,
-+ * - if we're still on the main stack, we need to move the
-+ * entire stack (up to and including the exception frame)
-+ * up by one slot, incrementing the original SSP in the
-+ * exception frame,
-+ * - if we're on an IST stack, we need to increment the
-+ * original SSP.
-+ */
-+ BUG_ON((ptr[-1] ^ primary_shstk) >> PAGE_SHIFT);
-+
-+ if ( (ssp ^ primary_shstk) >> PAGE_SHIFT )
-+ {
-+ /*
-+ * We're on an IST stack. First make sure the two return
-+ * addresses actually match. Then increment the interrupted
-+ * context's SSP.
-+ */
-+ BUG_ON(stub_ra != *(unsigned long*)ptr[-1]);
-+ wrss(ptr[-1] + 8, &ptr[-1]);
-+ goto shstk_done;
-+ }
-+
-+ /* Make sure the two return addresses actually match. */
-+ BUG_ON(stub_ra != ptr[2]);
-+
-+ /* Move exception frame, updating SSP there. */
-+ wrss(ptr[1], &ptr[2]); /* %cs */
-+ wrss(ptr[0], &ptr[1]); /* %rip */
-+ wrss(ptr[-1] + 8, &ptr[0]); /* SSP */
-+
-+ /* Move all newer entries. */
-+ while ( --ptr != _p(ssp) )
-+ wrss(ptr[-1], &ptr[0]);
-+
-+ /* Finally account for our own stack having shifted up. */
-+ asm volatile ( "incsspd %0" :: "r" (2) );
-+
- goto shstk_done;
- }
- }
-@@ -912,7 +962,8 @@ static void fixup_exception_return(struct cpu_user_regs *regs,
-
- static bool extable_fixup(struct cpu_user_regs *regs, bool print)
- {
-- unsigned long fixup = search_exception_table(regs);
-+ unsigned long stub_ra = 0;
-+ unsigned long fixup = search_exception_table(regs, &stub_ra);
-
- if ( unlikely(fixup == 0) )
- return false;
-@@ -926,7 +977,7 @@ static bool extable_fixup(struct cpu_user_regs *regs, bool print)
- vector_name(regs->entry_vector), regs->error_code,
- _p(regs->rip), _p(regs->rip), _p(fixup));
-
-- fixup_exception_return(regs, fixup);
-+ fixup_exception_return(regs, fixup, stub_ra);
- this_cpu(last_extable_addr) = regs->rip;
-
- return true;
-@@ -1214,7 +1265,7 @@ void do_invalid_op(struct cpu_user_regs *regs)
- void (*fn)(struct cpu_user_regs *) = bug_ptr(bug);
-
- fn(regs);
-- fixup_exception_return(regs, (unsigned long)eip);
-+ fixup_exception_return(regs, (unsigned long)eip, 0);
- return;
- }
-
-@@ -1235,7 +1286,7 @@ void do_invalid_op(struct cpu_user_regs *regs)
- case BUGFRAME_warn:
- printk("Xen WARN at %s%s:%d\n", prefix, filename, lineno);
- show_execution_state(regs);
-- fixup_exception_return(regs, (unsigned long)eip);
-+ fixup_exception_return(regs, (unsigned long)eip, 0);
- return;
-
- case BUGFRAME_bug:
---
-2.44.0
-
diff --git a/0024-x86-smp-do-not-use-shorthand-IPI-destinations-in-CPU.patch b/0024-x86-smp-do-not-use-shorthand-IPI-destinations-in-CPU.patch
new file mode 100644
index 0000000..b69c88c
--- /dev/null
+++ b/0024-x86-smp-do-not-use-shorthand-IPI-destinations-in-CPU.patch
@@ -0,0 +1,98 @@
+From 98238d49ecb149a5ac07cb8032817904c404ac2b Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Wed, 26 Jun 2024 13:38:36 +0200
+Subject: [PATCH 24/56] x86/smp: do not use shorthand IPI destinations in CPU
+ hot{,un}plug contexts
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Due to the current rwlock logic, if the CPU calling get_cpu_maps() does
+so from a cpu_hotplug_{begin,done}() region the function will still
+return success, because a CPU taking the rwlock in read mode after
+having taken it in write mode is allowed. Such corner case makes using
+get_cpu_maps() alone not enough to prevent using the shorthand in CPU
+hotplug regions.
+
+Introduce a new helper to detect whether the current caller is between a
+cpu_hotplug_{begin,done}() region and use it in send_IPI_mask() to restrict
+shorthand usage.
+
+Fixes: 5500d265a2a8 ('x86/smp: use APIC ALLBUT destination shorthand when possible')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 171c52fba5d94e050d704770480dcb983490d0ad
+master date: 2024-06-12 14:29:31 +0200
+---
+ xen/arch/x86/smp.c | 2 +-
+ xen/common/cpu.c | 5 +++++
+ xen/include/xen/cpu.h | 10 ++++++++++
+ xen/include/xen/rwlock.h | 2 ++
+ 4 files changed, 18 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c
+index 3a331cbdbc..340fcafb46 100644
+--- a/xen/arch/x86/smp.c
++++ b/xen/arch/x86/smp.c
+@@ -88,7 +88,7 @@ void send_IPI_mask(const cpumask_t *mask, int vector)
+ * the system have been accounted for.
+ */
+ if ( system_state > SYS_STATE_smp_boot &&
+- !unaccounted_cpus && !disabled_cpus &&
++ !unaccounted_cpus && !disabled_cpus && !cpu_in_hotplug_context() &&
+ /* NB: get_cpu_maps lock requires enabled interrupts. */
+ local_irq_is_enabled() && (cpus_locked = get_cpu_maps()) &&
+ (park_offline_cpus ||
+diff --git a/xen/common/cpu.c b/xen/common/cpu.c
+index 8709db4d29..6e35b114c0 100644
+--- a/xen/common/cpu.c
++++ b/xen/common/cpu.c
+@@ -68,6 +68,11 @@ void cpu_hotplug_done(void)
+ write_unlock(&cpu_add_remove_lock);
+ }
+
++bool cpu_in_hotplug_context(void)
++{
++ return rw_is_write_locked_by_me(&cpu_add_remove_lock);
++}
++
+ static NOTIFIER_HEAD(cpu_chain);
+
+ void __init register_cpu_notifier(struct notifier_block *nb)
+diff --git a/xen/include/xen/cpu.h b/xen/include/xen/cpu.h
+index e1d4eb5967..6bf5786750 100644
+--- a/xen/include/xen/cpu.h
++++ b/xen/include/xen/cpu.h
+@@ -13,6 +13,16 @@ void put_cpu_maps(void);
+ void cpu_hotplug_begin(void);
+ void cpu_hotplug_done(void);
+
++/*
++ * Returns true when the caller CPU is between a cpu_hotplug_{begin,done}()
++ * region.
++ *
++ * This is required to safely identify hotplug contexts, as get_cpu_maps()
++ * would otherwise succeed because a caller holding the lock in write mode is
++ * allowed to acquire the same lock in read mode.
++ */
++bool cpu_in_hotplug_context(void);
++
+ /* Receive notification of CPU hotplug events. */
+ void register_cpu_notifier(struct notifier_block *nb);
+
+diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h
+index 9e35ee2edf..dc74d1c057 100644
+--- a/xen/include/xen/rwlock.h
++++ b/xen/include/xen/rwlock.h
+@@ -309,6 +309,8 @@ static always_inline void write_lock_irq(rwlock_t *l)
+
+ #define rw_is_locked(l) _rw_is_locked(l)
+ #define rw_is_write_locked(l) _rw_is_write_locked(l)
++#define rw_is_write_locked_by_me(l) \
++ lock_evaluate_nospec(_is_write_locked_by_me(atomic_read(&(l)->cnts)))
+
+
+ typedef struct percpu_rwlock percpu_rwlock_t;
+--
+2.45.2
+
diff --git a/0024-xen-arm-Fix-UBSAN-failure-in-start_xen.patch b/0024-xen-arm-Fix-UBSAN-failure-in-start_xen.patch
deleted file mode 100644
index 7bdd651..0000000
--- a/0024-xen-arm-Fix-UBSAN-failure-in-start_xen.patch
+++ /dev/null
@@ -1,52 +0,0 @@
-From 6cbccc4071ef49a8c591ecaddfdcb1cc26d28411 Mon Sep 17 00:00:00 2001
-From: Michal Orzel <michal.orzel@amd.com>
-Date: Thu, 8 Feb 2024 11:43:39 +0100
-Subject: [PATCH 24/67] xen/arm: Fix UBSAN failure in start_xen()
-
-When running Xen on arm32, in scenario where Xen is loaded at an address
-such as boot_phys_offset >= 2GB, UBSAN reports the following:
-
-(XEN) UBSAN: Undefined behaviour in arch/arm/setup.c:739:58
-(XEN) pointer operation underflowed 00200000 to 86800000
-(XEN) Xen WARN at common/ubsan/ubsan.c:172
-(XEN) ----[ Xen-4.19-unstable arm32 debug=y ubsan=y Not tainted ]----
-...
-(XEN) Xen call trace:
-(XEN) [<0031b4c0>] ubsan.c#ubsan_epilogue+0x18/0xf0 (PC)
-(XEN) [<0031d134>] __ubsan_handle_pointer_overflow+0xb8/0xd4 (LR)
-(XEN) [<0031d134>] __ubsan_handle_pointer_overflow+0xb8/0xd4
-(XEN) [<004d15a8>] start_xen+0xe0/0xbe0
-(XEN) [<0020007c>] head.o#primary_switched+0x4/0x30
-
-The failure is reported for the following line:
-(paddr_t)(uintptr_t)(_start + boot_phys_offset)
-
-This occurs because the compiler treats (ptr + size) with size bigger than
-PTRDIFF_MAX as undefined behavior. To address this, switch to macro
-virt_to_maddr(), given the future plans to eliminate boot_phys_offset.
-
-Signed-off-by: Michal Orzel <michal.orzel@amd.com>
-Reviewed-by: Luca Fancellu <luca.fancellu@arm.com>
-Tested-by: Luca Fancellu <luca.fancellu@arm.com>
-Acked-by: Julien Grall <jgrall@amazon.com>
-(cherry picked from commit e11f5766503c0ff074b4e0f888bbfc931518a169)
----
- xen/arch/arm/setup.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
-index 4395640019..9ee19c2bc1 100644
---- a/xen/arch/arm/setup.c
-+++ b/xen/arch/arm/setup.c
-@@ -1025,7 +1025,7 @@ void __init start_xen(unsigned long boot_phys_offset,
-
- /* Register Xen's load address as a boot module. */
- xen_bootmodule = add_boot_module(BOOTMOD_XEN,
-- (paddr_t)(uintptr_t)(_start + boot_phys_offset),
-+ virt_to_maddr(_start),
- (paddr_t)(uintptr_t)(_end - _start), false);
- BUG_ON(!xen_bootmodule);
-
---
-2.44.0
-
diff --git a/0025-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch b/0025-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch
deleted file mode 100644
index 28e489b..0000000
--- a/0025-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch
+++ /dev/null
@@ -1,67 +0,0 @@
-From 9c0d518eb8dc69430e6a8d767bd101dad19b846a Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 5 Mar 2024 11:56:31 +0100
-Subject: [PATCH 25/67] x86/HVM: hide SVM/VMX when their enabling is prohibited
- by firmware
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-... or we fail to enable the functionality on the BSP for other reasons.
-The only place where hardware announcing the feature is recorded is the
-raw CPU policy/featureset.
-
-Inspired by https://lore.kernel.org/all/20230921114940.957141-1-pbonzini@redhat.com/.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Roger Pau Monné <roger.pau@citrix.com>
-master commit: 0b5f149338e35a795bf609ce584640b0977f9e6c
-master date: 2024-01-09 14:06:34 +0100
----
- xen/arch/x86/hvm/svm/svm.c | 1 +
- xen/arch/x86/hvm/vmx/vmcs.c | 17 +++++++++++++++++
- 2 files changed, 18 insertions(+)
-
-diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
-index fd32600ae3..3c17464550 100644
---- a/xen/arch/x86/hvm/svm/svm.c
-+++ b/xen/arch/x86/hvm/svm/svm.c
-@@ -1669,6 +1669,7 @@ const struct hvm_function_table * __init start_svm(void)
-
- if ( _svm_cpu_up(true) )
- {
-+ setup_clear_cpu_cap(X86_FEATURE_SVM);
- printk("SVM: failed to initialise.\n");
- return NULL;
- }
-diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
-index bcbecc6945..b5ecc51b43 100644
---- a/xen/arch/x86/hvm/vmx/vmcs.c
-+++ b/xen/arch/x86/hvm/vmx/vmcs.c
-@@ -2163,6 +2163,23 @@ int __init vmx_vmcs_init(void)
-
- if ( !ret )
- register_keyhandler('v', vmcs_dump, "dump VT-x VMCSs", 1);
-+ else
-+ {
-+ setup_clear_cpu_cap(X86_FEATURE_VMX);
-+
-+ /*
-+ * _vmx_vcpu_up() may have made it past feature identification.
-+ * Make sure all dependent features are off as well.
-+ */
-+ vmx_basic_msr = 0;
-+ vmx_pin_based_exec_control = 0;
-+ vmx_cpu_based_exec_control = 0;
-+ vmx_secondary_exec_control = 0;
-+ vmx_vmexit_control = 0;
-+ vmx_vmentry_control = 0;
-+ vmx_ept_vpid_cap = 0;
-+ vmx_vmfunc = 0;
-+ }
-
- return ret;
- }
---
-2.44.0
-
diff --git a/0025-x86-irq-limit-interrupt-movement-done-by-fixup_irqs.patch b/0025-x86-irq-limit-interrupt-movement-done-by-fixup_irqs.patch
new file mode 100644
index 0000000..7c40bba
--- /dev/null
+++ b/0025-x86-irq-limit-interrupt-movement-done-by-fixup_irqs.patch
@@ -0,0 +1,104 @@
+From ce0a0cb0a74a909abf988f242aa228acdd2917fe Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Wed, 26 Jun 2024 13:39:11 +0200
+Subject: [PATCH 25/56] x86/irq: limit interrupt movement done by fixup_irqs()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The current check used in fixup_irqs() to decide whether to move around
+interrupts is based on the affinity mask, but such mask can have all bits set,
+and hence is unlikely to be a subset of the input mask. For example if an
+interrupt has an affinity mask of all 1s, any input to fixup_irqs() that's not
+an all set CPU mask would cause that interrupt to be shuffled around
+unconditionally.
+
+What fixup_irqs() care about is evacuating interrupts from CPUs not set on the
+input CPU mask, and for that purpose it should check whether the interrupt is
+assigned to a CPU not present in the input mask. Assume that ->arch.cpu_mask
+is a subset of the ->affinity mask, and keep the current logic that resets the
+->affinity mask if the interrupt has to be shuffled around.
+
+Doing the affinity movement based on ->arch.cpu_mask requires removing the
+special handling to ->arch.cpu_mask done for high priority vectors, otherwise
+the adjustment done to cpu_mask makes them always skip the CPU interrupt
+movement.
+
+While there also adjust the comment as to the purpose of fixup_irqs().
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: c7564d7366d865cc407e3d64bca816d07edee174
+master date: 2024-06-12 14:30:40 +0200
+---
+ xen/arch/x86/include/asm/irq.h | 2 +-
+ xen/arch/x86/irq.c | 21 +++++++++++----------
+ 2 files changed, 12 insertions(+), 11 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/irq.h b/xen/arch/x86/include/asm/irq.h
+index d7fb8ec7e8..71d4a8fc56 100644
+--- a/xen/arch/x86/include/asm/irq.h
++++ b/xen/arch/x86/include/asm/irq.h
+@@ -132,7 +132,7 @@ void free_domain_pirqs(struct domain *d);
+ int map_domain_emuirq_pirq(struct domain *d, int pirq, int emuirq);
+ int unmap_domain_pirq_emuirq(struct domain *d, int pirq);
+
+-/* Reset irq affinities to match the given CPU mask. */
++/* Evacuate interrupts assigned to CPUs not present in the input CPU mask. */
+ void fixup_irqs(const cpumask_t *mask, bool verbose);
+ void fixup_eoi(void);
+
+diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
+index db14df93db..566331bec1 100644
+--- a/xen/arch/x86/irq.c
++++ b/xen/arch/x86/irq.c
+@@ -2529,7 +2529,7 @@ static int __init cf_check setup_dump_irqs(void)
+ }
+ __initcall(setup_dump_irqs);
+
+-/* Reset irq affinities to match the given CPU mask. */
++/* Evacuate interrupts assigned to CPUs not present in the input CPU mask. */
+ void fixup_irqs(const cpumask_t *mask, bool verbose)
+ {
+ unsigned int irq;
+@@ -2553,19 +2553,15 @@ void fixup_irqs(const cpumask_t *mask, bool verbose)
+
+ vector = irq_to_vector(irq);
+ if ( vector >= FIRST_HIPRIORITY_VECTOR &&
+- vector <= LAST_HIPRIORITY_VECTOR )
++ vector <= LAST_HIPRIORITY_VECTOR &&
++ desc->handler == &no_irq_type )
+ {
+- cpumask_and(desc->arch.cpu_mask, desc->arch.cpu_mask, mask);
+-
+ /*
+ * This can in particular happen when parking secondary threads
+ * during boot and when the serial console wants to use a PCI IRQ.
+ */
+- if ( desc->handler == &no_irq_type )
+- {
+- spin_unlock(&desc->lock);
+- continue;
+- }
++ spin_unlock(&desc->lock);
++ continue;
+ }
+
+ if ( desc->arch.move_cleanup_count )
+@@ -2586,7 +2582,12 @@ void fixup_irqs(const cpumask_t *mask, bool verbose)
+ affinity);
+ }
+
+- if ( !desc->action || cpumask_subset(desc->affinity, mask) )
++ /*
++ * Avoid shuffling the interrupt around as long as current target CPUs
++ * are a subset of the input mask. What fixup_irqs() cares about is
++ * evacuating interrupts from CPUs not in the input mask.
++ */
++ if ( !desc->action || cpumask_subset(desc->arch.cpu_mask, mask) )
+ {
+ spin_unlock(&desc->lock);
+ continue;
+--
+2.45.2
+
diff --git a/0026-x86-EPT-correct-special-page-checking-in-epte_get_en.patch b/0026-x86-EPT-correct-special-page-checking-in-epte_get_en.patch
new file mode 100644
index 0000000..c94728a
--- /dev/null
+++ b/0026-x86-EPT-correct-special-page-checking-in-epte_get_en.patch
@@ -0,0 +1,46 @@
+From 6e647efaf2b02ce92bcf80bec47c18cca5084f8a Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 26 Jun 2024 13:39:44 +0200
+Subject: [PATCH 26/56] x86/EPT: correct special page checking in
+ epte_get_entry_emt()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+mfn_valid() granularity is (currently) 256Mb. Therefore the start of a
+1Gb page passing the test doesn't necessarily mean all parts of such a
+range would also pass. Yet using the result of mfn_to_page() on an MFN
+which doesn't pass mfn_valid() checking is liable to result in a crash
+(the invocation of mfn_to_page() alone is presumably "just" UB in such a
+case).
+
+Fixes: ca24b2ffdbd9 ("x86/hvm: set 'ipat' in EPT for special pages")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 5540b94e8191059eb9cbbe98ac316232a42208f6
+master date: 2024-06-13 16:53:34 +0200
+---
+ xen/arch/x86/mm/p2m-ept.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
+index 85c4e8e54f..1aa6bbc771 100644
+--- a/xen/arch/x86/mm/p2m-ept.c
++++ b/xen/arch/x86/mm/p2m-ept.c
+@@ -518,8 +518,12 @@ int epte_get_entry_emt(struct domain *d, gfn_t gfn, mfn_t mfn,
+ }
+
+ for ( special_pgs = i = 0; i < (1ul << order); i++ )
+- if ( is_special_page(mfn_to_page(mfn_add(mfn, i))) )
++ {
++ mfn_t cur = mfn_add(mfn, i);
++
++ if ( mfn_valid(cur) && is_special_page(mfn_to_page(cur)) )
+ special_pgs++;
++ }
+
+ if ( special_pgs )
+ {
+--
+2.45.2
+
diff --git a/0026-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch b/0026-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch
deleted file mode 100644
index 4b051ea..0000000
--- a/0026-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch
+++ /dev/null
@@ -1,86 +0,0 @@
-From b75bee183210318150e678e14b35224d7c73edb6 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 5 Mar 2024 11:57:02 +0100
-Subject: [PATCH 26/67] xen/sched: Fix UB shift in compat_set_timer_op()
-
-Tamas reported this UBSAN failure from fuzzing:
-
- (XEN) ================================================================================
- (XEN) UBSAN: Undefined behaviour in common/sched/compat.c:48:37
- (XEN) left shift of negative value -2147425536
- (XEN) ----[ Xen-4.19-unstable x86_64 debug=y ubsan=y Not tainted ]----
- ...
- (XEN) Xen call trace:
- (XEN) [<ffff82d040307c1c>] R ubsan.c#ubsan_epilogue+0xa/0xd9
- (XEN) [<ffff82d040308afb>] F __ubsan_handle_shift_out_of_bounds+0x11a/0x1c5
- (XEN) [<ffff82d040307758>] F compat_set_timer_op+0x41/0x43
- (XEN) [<ffff82d04040e4cc>] F hvm_do_multicall_call+0x77f/0xa75
- (XEN) [<ffff82d040519462>] F arch_do_multicall_call+0xec/0xf1
- (XEN) [<ffff82d040261567>] F do_multicall+0x1dc/0xde3
- (XEN) [<ffff82d04040d2b3>] F hvm_hypercall+0xa00/0x149a
- (XEN) [<ffff82d0403cd072>] F vmx_vmexit_handler+0x1596/0x279c
- (XEN) [<ffff82d0403d909b>] F vmx_asm_vmexit_handler+0xdb/0x200
-
-Left-shifting any negative value is strictly undefined behaviour in C, and
-the two parameters here come straight from the guest.
-
-The fuzzer happened to choose lo 0xf, hi 0x8000e300.
-
-Switch everything to be unsigned values, making the shift well defined.
-
-As GCC documents:
-
- As an extension to the C language, GCC does not use the latitude given in
- C99 and C11 only to treat certain aspects of signed '<<' as undefined.
- However, -fsanitize=shift (and -fsanitize=undefined) will diagnose such
- cases.
-
-this was deemed not to need an XSA.
-
-Note: The unsigned -> signed conversion for do_set_timer_op()'s s_time_t
-parameter is also well defined. C makes it implementation defined, and GCC
-defines it as reduction modulo 2^N to be within range of the new type.
-
-Fixes: 2942f45e09fb ("Enable compatibility mode operation for HYPERVISOR_sched_op and HYPERVISOR_set_timer_op.")
-Reported-by: Tamas K Lengyel <tamas@tklengyel.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: ae6d4fd876765e6d623eec67d14f5d0464be09cb
-master date: 2024-02-01 19:52:44 +0000
----
- xen/common/sched/compat.c | 4 ++--
- xen/include/hypercall-defs.c | 2 +-
- 2 files changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/xen/common/sched/compat.c b/xen/common/sched/compat.c
-index 040b4caca2..b827fdecb8 100644
---- a/xen/common/sched/compat.c
-+++ b/xen/common/sched/compat.c
-@@ -39,9 +39,9 @@ static int compat_poll(struct compat_sched_poll *compat)
-
- #include "core.c"
-
--int compat_set_timer_op(u32 lo, s32 hi)
-+int compat_set_timer_op(uint32_t lo, uint32_t hi)
- {
-- return do_set_timer_op(((s64)hi << 32) | lo);
-+ return do_set_timer_op(((uint64_t)hi << 32) | lo);
- }
-
- /*
-diff --git a/xen/include/hypercall-defs.c b/xen/include/hypercall-defs.c
-index 1896121074..c442dee284 100644
---- a/xen/include/hypercall-defs.c
-+++ b/xen/include/hypercall-defs.c
-@@ -127,7 +127,7 @@ xenoprof_op(int op, void *arg)
-
- #ifdef CONFIG_COMPAT
- prefix: compat
--set_timer_op(uint32_t lo, int32_t hi)
-+set_timer_op(uint32_t lo, uint32_t hi)
- multicall(multicall_entry_compat_t *call_list, uint32_t nr_calls)
- memory_op(unsigned int cmd, void *arg)
- #ifdef CONFIG_IOREQ_SERVER
---
-2.44.0
-
diff --git a/0027-x86-EPT-avoid-marking-non-present-entries-for-re-con.patch b/0027-x86-EPT-avoid-marking-non-present-entries-for-re-con.patch
new file mode 100644
index 0000000..23e8946
--- /dev/null
+++ b/0027-x86-EPT-avoid-marking-non-present-entries-for-re-con.patch
@@ -0,0 +1,85 @@
+From d31385be5c8e8bc5efb6f8848057bd0c69e8274a Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 26 Jun 2024 13:40:11 +0200
+Subject: [PATCH 27/56] x86/EPT: avoid marking non-present entries for
+ re-configuring
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+For non-present entries EMT, like most other fields, is meaningless to
+hardware. Make the logic in ept_set_entry() setting the field (and iPAT)
+conditional upon dealing with a present entry, leaving the value at 0
+otherwise. This has two effects for epte_get_entry_emt() which we'll
+want to leverage subsequently:
+1) The call moved here now won't be issued with INVALID_MFN anymore (a
+ respective BUG_ON() is being added).
+2) Neither of the other two calls could now be issued with a truncated
+ form of INVALID_MFN anymore (as long as there's no bug anywhere
+ marking an entry present when that was populated using INVALID_MFN).
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 777c71d31325bc55ba1cc3f317d4155fe519ab0b
+master date: 2024-06-13 16:54:17 +0200
+---
+ xen/arch/x86/mm/p2m-ept.c | 29 ++++++++++++++++++-----------
+ 1 file changed, 18 insertions(+), 11 deletions(-)
+
+diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
+index 1aa6bbc771..641d61b350 100644
+--- a/xen/arch/x86/mm/p2m-ept.c
++++ b/xen/arch/x86/mm/p2m-ept.c
+@@ -649,6 +649,8 @@ static int cf_check resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
+ if ( e.emt != MTRR_NUM_TYPES )
+ break;
+
++ ASSERT(is_epte_present(&e));
++
+ if ( level == 0 )
+ {
+ for ( gfn -= i, i = 0; i < EPT_PAGETABLE_ENTRIES; ++i )
+@@ -914,17 +916,6 @@ ept_set_entry(struct p2m_domain *p2m, gfn_t gfn_, mfn_t mfn,
+
+ if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
+ {
+- bool ipat;
+- int emt = epte_get_entry_emt(p2m->domain, _gfn(gfn), mfn,
+- i * EPT_TABLE_ORDER, &ipat,
+- p2mt);
+-
+- if ( emt >= 0 )
+- new_entry.emt = emt;
+- else /* ept_handle_misconfig() will need to take care of this. */
+- new_entry.emt = MTRR_NUM_TYPES;
+-
+- new_entry.ipat = ipat;
+ new_entry.sp = !!i;
+ new_entry.sa_p2mt = p2mt;
+ new_entry.access = p2ma;
+@@ -940,6 +931,22 @@ ept_set_entry(struct p2m_domain *p2m, gfn_t gfn_, mfn_t mfn,
+ need_modify_vtd_table = 0;
+
+ ept_p2m_type_to_flags(p2m, &new_entry);
++
++ if ( is_epte_present(&new_entry) )
++ {
++ bool ipat;
++ int emt = epte_get_entry_emt(p2m->domain, _gfn(gfn), mfn,
++ i * EPT_TABLE_ORDER, &ipat,
++ p2mt);
++
++ BUG_ON(mfn_eq(mfn, INVALID_MFN));
++
++ if ( emt >= 0 )
++ new_entry.emt = emt;
++ else /* ept_handle_misconfig() will need to take care of this. */
++ new_entry.emt = MTRR_NUM_TYPES;
++ new_entry.ipat = ipat;
++ }
+ }
+
+ if ( sve != -1 )
+--
+2.45.2
+
diff --git a/0027-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch b/0027-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch
deleted file mode 100644
index 845247a..0000000
--- a/0027-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-From 76ea2aab3652cc34e474de0905f0a9cd4df7d087 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Mar 2024 11:57:41 +0100
-Subject: [PATCH 27/67] x86/spec: print the built-in SPECULATIVE_HARDEN_*
- options
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Just like it's done for INDIRECT_THUNK and SHADOW_PAGING.
-
-Reported-by: Jan Beulich <jbeulich@suse.com>
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 6e9507f7d51fe49df8bc70f83e49ce06c92e4e54
-master date: 2024-02-27 14:57:52 +0100
----
- xen/arch/x86/spec_ctrl.c | 14 +++++++++++++-
- 1 file changed, 13 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 661716d695..93f1cf3bb5 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -488,13 +488,25 @@ static void __init print_details(enum ind_thunk thunk)
- (e21a & cpufeat_mask(X86_FEATURE_SBPB)) ? " SBPB" : "");
-
- /* Compiled-in support which pertains to mitigations. */
-- if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) )
-+ if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) ||
-+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_ARRAY) ||
-+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) ||
-+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) )
- printk(" Compiled-in support:"
- #ifdef CONFIG_INDIRECT_THUNK
- " INDIRECT_THUNK"
- #endif
- #ifdef CONFIG_SHADOW_PAGING
- " SHADOW_PAGING"
-+#endif
-+#ifdef CONFIG_SPECULATIVE_HARDEN_ARRAY
-+ " HARDEN_ARRAY"
-+#endif
-+#ifdef CONFIG_SPECULATIVE_HARDEN_BRANCH
-+ " HARDEN_BRANCH"
-+#endif
-+#ifdef CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS
-+ " HARDEN_GUEST_ACCESS"
- #endif
- "\n");
-
---
-2.44.0
-
diff --git a/0028-x86-EPT-drop-questionable-mfn_valid-from-epte_get_en.patch b/0028-x86-EPT-drop-questionable-mfn_valid-from-epte_get_en.patch
new file mode 100644
index 0000000..ee495d4
--- /dev/null
+++ b/0028-x86-EPT-drop-questionable-mfn_valid-from-epte_get_en.patch
@@ -0,0 +1,47 @@
+From 3b777c2ce4ea8cf67b79a5496e51201145606798 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 26 Jun 2024 13:40:35 +0200
+Subject: [PATCH 28/56] x86/EPT: drop questionable mfn_valid() from
+ epte_get_entry_emt()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+mfn_valid() is RAM-focused; it will often return false for MMIO. Yet
+access to actual MMIO space should not generally be restricted to UC
+only; especially video frame buffer accesses are unduly affected by such
+a restriction.
+
+Since, as of 777c71d31325 ("x86/EPT: avoid marking non-present entries
+for re-configuring"), the function won't be called with INVALID_MFN or,
+worse, truncated forms thereof anymore, we call fully drop that check.
+
+Fixes: 81fd0d3ca4b2 ("x86/hvm: simplify 'mmio_direct' check in epte_get_entry_emt()")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 4fdd8d75566fdad06667a79ec0ce6f43cc466c54
+master date: 2024-06-13 16:55:22 +0200
+---
+ xen/arch/x86/mm/p2m-ept.c | 6 ------
+ 1 file changed, 6 deletions(-)
+
+diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
+index 641d61b350..d325424e97 100644
+--- a/xen/arch/x86/mm/p2m-ept.c
++++ b/xen/arch/x86/mm/p2m-ept.c
+@@ -500,12 +500,6 @@ int epte_get_entry_emt(struct domain *d, gfn_t gfn, mfn_t mfn,
+ return -1;
+ }
+
+- if ( !mfn_valid(mfn) )
+- {
+- *ipat = true;
+- return X86_MT_UC;
+- }
+-
+ /*
+ * Conditional must be kept in sync with the code in
+ * {iomem,ioports}_{permit,deny}_access().
+--
+2.45.2
+
diff --git a/0028-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch b/0028-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch
deleted file mode 100644
index dfbf516..0000000
--- a/0028-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch
+++ /dev/null
@@ -1,67 +0,0 @@
-From 693455c3c370e535eb6cd065800ff91e147815fa Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Mar 2024 11:58:04 +0100
-Subject: [PATCH 28/67] x86/spec: fix INDIRECT_THUNK option to only be set when
- build-enabled
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Attempt to provide a more helpful error message when the user attempts to set
-spec-ctrl=bti-thunk option but the support is build-time disabled.
-
-While there also adjust the command line documentation to mention
-CONFIG_INDIRECT_THUNK instead of INDIRECT_THUNK.
-
-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 8441fa806a3b778867867cd0159fa1722e90397e
-master date: 2024-02-27 14:58:20 +0100
----
- docs/misc/xen-command-line.pandoc | 10 +++++-----
- xen/arch/x86/spec_ctrl.c | 7 ++++++-
- 2 files changed, 11 insertions(+), 6 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index 05f613c71c..2006697226 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -2378,11 +2378,11 @@ guests to use.
- performance reasons dom0 is unprotected by default. If it is necessary to
- protect dom0 too, boot with `spec-ctrl=ibpb-entry`.
-
--If Xen was compiled with INDIRECT_THUNK support, `bti-thunk=` can be used to
--select which of the thunks gets patched into the `__x86_indirect_thunk_%reg`
--locations. The default thunk is `retpoline` (generally preferred), with the
--alternatives being `jmp` (a `jmp *%reg` gadget, minimal overhead), and
--`lfence` (an `lfence; jmp *%reg` gadget).
-+If Xen was compiled with `CONFIG_INDIRECT_THUNK` support, `bti-thunk=` can be
-+used to select which of the thunks gets patched into the
-+`__x86_indirect_thunk_%reg` locations. The default thunk is `retpoline`
-+(generally preferred), with the alternatives being `jmp` (a `jmp *%reg` gadget,
-+minimal overhead), and `lfence` (an `lfence; jmp *%reg` gadget).
-
- On hardware supporting IBRS (Indirect Branch Restricted Speculation), the
- `ibrs=` option can be used to force or prevent Xen using the feature itself.
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 93f1cf3bb5..098fa3184d 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -253,7 +253,12 @@ static int __init cf_check parse_spec_ctrl(const char *s)
- {
- s += 10;
-
-- if ( !cmdline_strcmp(s, "retpoline") )
-+ if ( !IS_ENABLED(CONFIG_INDIRECT_THUNK) )
-+ {
-+ no_config_param("INDIRECT_THUNK", "spec-ctrl", s - 10, ss);
-+ rc = -EINVAL;
-+ }
-+ else if ( !cmdline_strcmp(s, "retpoline") )
- opt_thunk = THUNK_RETPOLINE;
- else if ( !cmdline_strcmp(s, "lfence") )
- opt_thunk = THUNK_LFENCE;
---
-2.44.0
-
diff --git a/0029-x86-Intel-unlock-CPUID-earlier-for-the-BSP.patch b/0029-x86-Intel-unlock-CPUID-earlier-for-the-BSP.patch
new file mode 100644
index 0000000..6722508
--- /dev/null
+++ b/0029-x86-Intel-unlock-CPUID-earlier-for-the-BSP.patch
@@ -0,0 +1,105 @@
+From c4b284912695a5802433512b913e968eda01544f Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 26 Jun 2024 13:41:05 +0200
+Subject: [PATCH 29/56] x86/Intel: unlock CPUID earlier for the BSP
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Intel CPUs have a MSR bit to limit CPUID enumeration to leaf two. If
+this bit is set by the BIOS then CPUID evaluation does not work when
+data from any leaf greater than two is needed; early_cpu_init() in
+particular wants to collect leaf 7 data.
+
+Cure this by unlocking CPUID right before evaluating anything which
+depends on the maximum CPUID leaf being greater than two.
+
+Inspired by (and description cloned from) Linux commit 0c2f6d04619e
+("x86/topology/intel: Unlock CPUID before evaluating anything").
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: fa4d026737a47cd1d66ffb797a29150b4453aa9f
+master date: 2024-06-18 15:12:44 +0200
+---
+ xen/arch/x86/cpu/common.c | 3 ++-
+ xen/arch/x86/cpu/cpu.h | 2 ++
+ xen/arch/x86/cpu/intel.c | 29 +++++++++++++++++------------
+ 3 files changed, 21 insertions(+), 13 deletions(-)
+
+diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
+index 26eed2ade1..edec0a2546 100644
+--- a/xen/arch/x86/cpu/common.c
++++ b/xen/arch/x86/cpu/common.c
+@@ -336,7 +336,8 @@ void __init early_cpu_init(bool verbose)
+
+ c->x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx);
+ switch (c->x86_vendor) {
+- case X86_VENDOR_INTEL: actual_cpu = intel_cpu_dev; break;
++ case X86_VENDOR_INTEL: intel_unlock_cpuid_leaves(c);
++ actual_cpu = intel_cpu_dev; break;
+ case X86_VENDOR_AMD: actual_cpu = amd_cpu_dev; break;
+ case X86_VENDOR_CENTAUR: actual_cpu = centaur_cpu_dev; break;
+ case X86_VENDOR_SHANGHAI: actual_cpu = shanghai_cpu_dev; break;
+diff --git a/xen/arch/x86/cpu/cpu.h b/xen/arch/x86/cpu/cpu.h
+index e3d06278b3..8be65e975a 100644
+--- a/xen/arch/x86/cpu/cpu.h
++++ b/xen/arch/x86/cpu/cpu.h
+@@ -24,3 +24,5 @@ void amd_init_lfence(struct cpuinfo_x86 *c);
+ void amd_init_ssbd(const struct cpuinfo_x86 *c);
+ void amd_init_spectral_chicken(void);
+ void detect_zen2_null_seg_behaviour(void);
++
++void intel_unlock_cpuid_leaves(struct cpuinfo_x86 *c);
+diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c
+index deb7b70464..0dc7c27601 100644
+--- a/xen/arch/x86/cpu/intel.c
++++ b/xen/arch/x86/cpu/intel.c
+@@ -303,10 +303,24 @@ static void __init noinline intel_init_levelling(void)
+ ctxt_switch_masking = intel_ctxt_switch_masking;
+ }
+
+-static void cf_check early_init_intel(struct cpuinfo_x86 *c)
++/* Unmask CPUID levels if masked. */
++void intel_unlock_cpuid_leaves(struct cpuinfo_x86 *c)
+ {
+- u64 misc_enable, disable;
++ uint64_t misc_enable, disable;
++
++ rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
++
++ disable = misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID;
++ if (disable) {
++ wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable);
++ bootsym(trampoline_misc_enable_off) |= disable;
++ c->cpuid_level = cpuid_eax(0);
++ printk(KERN_INFO "revised cpuid level: %u\n", c->cpuid_level);
++ }
++}
+
++static void cf_check early_init_intel(struct cpuinfo_x86 *c)
++{
+ /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
+ if (c->x86 == 15 && c->x86_cache_alignment == 64)
+ c->x86_cache_alignment = 128;
+@@ -315,16 +329,7 @@ static void cf_check early_init_intel(struct cpuinfo_x86 *c)
+ bootsym(trampoline_misc_enable_off) & MSR_IA32_MISC_ENABLE_XD_DISABLE)
+ printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n");
+
+- /* Unmask CPUID levels and NX if masked: */
+- rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
+-
+- disable = misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID;
+- if (disable) {
+- wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable);
+- bootsym(trampoline_misc_enable_off) |= disable;
+- printk(KERN_INFO "revised cpuid level: %d\n",
+- cpuid_eax(0));
+- }
++ intel_unlock_cpuid_leaves(c);
+
+ /* CPUID workaround for Intel 0F33/0F34 CPU */
+ if (boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 3 &&
+--
+2.45.2
+
diff --git a/0029-x86-spec-do-not-print-thunk-option-selection-if-not-.patch b/0029-x86-spec-do-not-print-thunk-option-selection-if-not-.patch
deleted file mode 100644
index 71e6633..0000000
--- a/0029-x86-spec-do-not-print-thunk-option-selection-if-not-.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From 0ce25b46ab2fb53a1b58f7682ca14971453f4f2c Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Mar 2024 11:58:36 +0100
-Subject: [PATCH 29/67] x86/spec: do not print thunk option selection if not
- built-in
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Since the thunk built-in enable is printed as part of the "Compiled-in
-support:" line, avoid printing anything in "Xen settings:" if the thunk is
-disabled at build time.
-
-Note the BTI-Thunk option printing is also adjusted to print a colon in the
-same way the other options on the line do.
-
-Requested-by: Jan Beulich <jbeulich@suse.com>
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 576528a2a742069af203e90c613c5c93e23c9755
-master date: 2024-02-27 14:58:40 +0100
----
- xen/arch/x86/spec_ctrl.c | 11 ++++++-----
- 1 file changed, 6 insertions(+), 5 deletions(-)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 098fa3184d..25a18ac598 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -516,11 +516,12 @@ static void __init print_details(enum ind_thunk thunk)
- "\n");
-
- /* Settings for Xen's protection, irrespective of guests. */
-- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n",
-- thunk == THUNK_NONE ? "N/A" :
-- thunk == THUNK_RETPOLINE ? "RETPOLINE" :
-- thunk == THUNK_LFENCE ? "LFENCE" :
-- thunk == THUNK_JMP ? "JMP" : "?",
-+ printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n",
-+ thunk != THUNK_NONE ? "BTI-Thunk: " : "",
-+ thunk == THUNK_NONE ? "" :
-+ thunk == THUNK_RETPOLINE ? "RETPOLINE, " :
-+ thunk == THUNK_LFENCE ? "LFENCE, " :
-+ thunk == THUNK_JMP ? "JMP, " : "?, ",
- (!boot_cpu_has(X86_FEATURE_IBRSB) &&
- !boot_cpu_has(X86_FEATURE_IBRS)) ? "No" :
- (default_xen_spec_ctrl & SPEC_CTRL_IBRS) ? "IBRS+" : "IBRS-",
---
-2.44.0
-
diff --git a/0030-x86-irq-deal-with-old_cpu_mask-for-interrupts-in-mov.patch b/0030-x86-irq-deal-with-old_cpu_mask-for-interrupts-in-mov.patch
new file mode 100644
index 0000000..785df10
--- /dev/null
+++ b/0030-x86-irq-deal-with-old_cpu_mask-for-interrupts-in-mov.patch
@@ -0,0 +1,84 @@
+From 39a6170c15bf369a2b26c855ea7621387ed4070b Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Wed, 26 Jun 2024 13:41:35 +0200
+Subject: [PATCH 30/56] x86/irq: deal with old_cpu_mask for interrupts in
+ movement in fixup_irqs()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Given the current logic it's possible for ->arch.old_cpu_mask to get out of
+sync: if a CPU set in old_cpu_mask is offlined and then onlined
+again without old_cpu_mask having been updated the data in the mask will no
+longer be accurate, as when brought back online the CPU will no longer have
+old_vector configured to handle the old interrupt source.
+
+If there's an interrupt movement in progress, and the to be offlined CPU (which
+is the call context) is in the old_cpu_mask, clear it and update the mask, so
+it doesn't contain stale data.
+
+Note that when the system is going down fixup_irqs() will be called by
+smp_send_stop() from CPU 0 with a mask with only CPU 0 on it, effectively
+asking to move all interrupts to the current caller (CPU 0) which is the only
+CPU to remain online. In that case we don't care to migrate interrupts that
+are in the process of being moved, as it's likely we won't be able to move all
+interrupts to CPU 0 due to vector shortage anyway.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 817d1cd627be668c358d038f0fadbf7d24d417d3
+master date: 2024-06-18 15:14:49 +0200
+---
+ xen/arch/x86/irq.c | 29 ++++++++++++++++++++++++++++-
+ 1 file changed, 28 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
+index 566331bec1..f877327975 100644
+--- a/xen/arch/x86/irq.c
++++ b/xen/arch/x86/irq.c
+@@ -2539,7 +2539,7 @@ void fixup_irqs(const cpumask_t *mask, bool verbose)
+ for ( irq = 0; irq < nr_irqs; irq++ )
+ {
+ bool break_affinity = false, set_affinity = true;
+- unsigned int vector;
++ unsigned int vector, cpu = smp_processor_id();
+ cpumask_t *affinity = this_cpu(scratch_cpumask);
+
+ if ( irq == 2 )
+@@ -2582,6 +2582,33 @@ void fixup_irqs(const cpumask_t *mask, bool verbose)
+ affinity);
+ }
+
++ if ( desc->arch.move_in_progress &&
++ /*
++ * Only attempt to adjust the mask if the current CPU is going
++ * offline, otherwise the whole system is going down and leaving
++ * stale data in the masks is fine.
++ */
++ !cpu_online(cpu) &&
++ cpumask_test_cpu(cpu, desc->arch.old_cpu_mask) )
++ {
++ /*
++ * This CPU is going offline, remove it from ->arch.old_cpu_mask
++ * and possibly release the old vector if the old mask becomes
++ * empty.
++ *
++ * Note cleaning ->arch.old_cpu_mask is required if the CPU is
++ * brought offline and then online again, as when re-onlined the
++ * per-cpu vector table will no longer have ->arch.old_vector
++ * setup, and hence ->arch.old_cpu_mask would be stale.
++ */
++ cpumask_clear_cpu(cpu, desc->arch.old_cpu_mask);
++ if ( cpumask_empty(desc->arch.old_cpu_mask) )
++ {
++ desc->arch.move_in_progress = 0;
++ release_old_vec(desc);
++ }
++ }
++
+ /*
+ * Avoid shuffling the interrupt around as long as current target CPUs
+ * are a subset of the input mask. What fixup_irqs() cares about is
+--
+2.45.2
+
diff --git a/0030-xen-livepatch-register-livepatch-regions-when-loaded.patch b/0030-xen-livepatch-register-livepatch-regions-when-loaded.patch
deleted file mode 100644
index f521ecc..0000000
--- a/0030-xen-livepatch-register-livepatch-regions-when-loaded.patch
+++ /dev/null
@@ -1,159 +0,0 @@
-From b11917de0cd261a878beaf50c18a689bde0b2f50 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Mar 2024 11:59:26 +0100
-Subject: [PATCH 30/67] xen/livepatch: register livepatch regions when loaded
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Currently livepatch regions are registered as virtual regions only after the
-livepatch has been applied.
-
-This can lead to issues when using the pre-apply or post-revert hooks, as at
-that point the livepatch is not in the virtual regions list. If a livepatch
-pre-apply hook contains a WARN() it would trigger an hypervisor crash, as the
-code to handle the bug frame won't be able to find the instruction pointer that
-triggered the #UD in any of the registered virtual regions, and hence crash.
-
-Fix this by adding the livepatch payloads as virtual regions as soon as loaded,
-and only remove them once the payload is unloaded. This requires some changes
-to the virtual regions code, as the removal of the virtual regions is no longer
-done in stop machine context, and hence an RCU barrier is added in order to
-make sure there are no users of the virtual region after it's been removed from
-the list.
-
-Fixes: 8313c864fa95 ('livepatch: Implement pre-|post- apply|revert hooks')
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
-master commit: a57b4074ab39bee78b6c116277f0a9963bd8e687
-master date: 2024-02-28 16:57:25 +0000
----
- xen/common/livepatch.c | 4 ++--
- xen/common/virtual_region.c | 44 ++++++++++++++-----------------------
- 2 files changed, 19 insertions(+), 29 deletions(-)
-
-diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
-index c2ae84d18b..537e9f33e4 100644
---- a/xen/common/livepatch.c
-+++ b/xen/common/livepatch.c
-@@ -1015,6 +1015,7 @@ static int build_symbol_table(struct payload *payload,
- static void free_payload(struct payload *data)
- {
- ASSERT(spin_is_locked(&payload_lock));
-+ unregister_virtual_region(&data->region);
- list_del(&data->list);
- payload_cnt--;
- payload_version++;
-@@ -1114,6 +1115,7 @@ static int livepatch_upload(struct xen_sysctl_livepatch_upload *upload)
- INIT_LIST_HEAD(&data->list);
- INIT_LIST_HEAD(&data->applied_list);
-
-+ register_virtual_region(&data->region);
- list_add_tail(&data->list, &payload_list);
- payload_cnt++;
- payload_version++;
-@@ -1330,7 +1332,6 @@ static inline void apply_payload_tail(struct payload *data)
- * The applied_list is iterated by the trap code.
- */
- list_add_tail_rcu(&data->applied_list, &applied_list);
-- register_virtual_region(&data->region);
-
- data->state = LIVEPATCH_STATE_APPLIED;
- }
-@@ -1376,7 +1377,6 @@ static inline void revert_payload_tail(struct payload *data)
- * The applied_list is iterated by the trap code.
- */
- list_del_rcu(&data->applied_list);
-- unregister_virtual_region(&data->region);
-
- data->reverted = true;
- data->state = LIVEPATCH_STATE_CHECKED;
-diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c
-index 5f89703f51..9f12c30efe 100644
---- a/xen/common/virtual_region.c
-+++ b/xen/common/virtual_region.c
-@@ -23,14 +23,8 @@ static struct virtual_region core_init __initdata = {
- };
-
- /*
-- * RCU locking. Additions are done either at startup (when there is only
-- * one CPU) or when all CPUs are running without IRQs.
-- *
-- * Deletions are bit tricky. We do it when Live Patch (all CPUs running
-- * without IRQs) or during bootup (when clearing the init).
-- *
-- * Hence we use list_del_rcu (which sports an memory fence) and a spinlock
-- * on deletion.
-+ * RCU locking. Modifications to the list must be done in exclusive mode, and
-+ * hence need to hold the spinlock.
- *
- * All readers of virtual_region_list MUST use list_for_each_entry_rcu.
- */
-@@ -58,41 +52,36 @@ const struct virtual_region *find_text_region(unsigned long addr)
-
- void register_virtual_region(struct virtual_region *r)
- {
-- ASSERT(!local_irq_is_enabled());
-+ unsigned long flags;
-
-+ spin_lock_irqsave(&virtual_region_lock, flags);
- list_add_tail_rcu(&r->list, &virtual_region_list);
-+ spin_unlock_irqrestore(&virtual_region_lock, flags);
- }
-
--static void remove_virtual_region(struct virtual_region *r)
-+/*
-+ * Suggest inline so when !CONFIG_LIVEPATCH the function is not left
-+ * unreachable after init code is removed.
-+ */
-+static void inline remove_virtual_region(struct virtual_region *r)
- {
- unsigned long flags;
-
- spin_lock_irqsave(&virtual_region_lock, flags);
- list_del_rcu(&r->list);
- spin_unlock_irqrestore(&virtual_region_lock, flags);
-- /*
-- * We do not need to invoke call_rcu.
-- *
-- * This is due to the fact that on the deletion we have made sure
-- * to use spinlocks (to guard against somebody else calling
-- * unregister_virtual_region) and list_deletion spiced with
-- * memory barrier.
-- *
-- * That protects us from corrupting the list as the readers all
-- * use list_for_each_entry_rcu which is safe against concurrent
-- * deletions.
-- */
- }
-
-+#ifdef CONFIG_LIVEPATCH
- void unregister_virtual_region(struct virtual_region *r)
- {
-- /* Expected to be called from Live Patch - which has IRQs disabled. */
-- ASSERT(!local_irq_is_enabled());
--
- remove_virtual_region(r);
-+
-+ /* Assert that no CPU might be using the removed region. */
-+ rcu_barrier();
- }
-
--#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_X86)
-+#ifdef CONFIG_X86
- void relax_virtual_region_perms(void)
- {
- const struct virtual_region *region;
-@@ -116,7 +105,8 @@ void tighten_virtual_region_perms(void)
- PAGE_HYPERVISOR_RX);
- rcu_read_unlock(&rcu_virtual_region_lock);
- }
--#endif
-+#endif /* CONFIG_X86 */
-+#endif /* CONFIG_LIVEPATCH */
-
- void __init unregister_init_virtual_region(void)
- {
---
-2.44.0
-
diff --git a/0031-x86-irq-handle-moving-interrupts-in-_assign_irq_vect.patch b/0031-x86-irq-handle-moving-interrupts-in-_assign_irq_vect.patch
new file mode 100644
index 0000000..96e87cd
--- /dev/null
+++ b/0031-x86-irq-handle-moving-interrupts-in-_assign_irq_vect.patch
@@ -0,0 +1,172 @@
+From 3a8f4ec75d8ed8da6370deac95c341cbada96802 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Wed, 26 Jun 2024 13:42:05 +0200
+Subject: [PATCH 31/56] x86/irq: handle moving interrupts in
+ _assign_irq_vector()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Currently there's logic in fixup_irqs() that attempts to prevent
+_assign_irq_vector() from failing, as fixup_irqs() is required to evacuate all
+interrupts from the CPUs not present in the input mask. The current logic in
+fixup_irqs() is incomplete, as it doesn't deal with interrupts that have
+move_cleanup_count > 0 and a non-empty ->arch.old_cpu_mask field.
+
+Instead of attempting to fixup the interrupt descriptor in fixup_irqs() so that
+_assign_irq_vector() cannot fail, introduce logic in _assign_irq_vector()
+to deal with interrupts that have either move_{in_progress,cleanup_count} set
+and no remaining online CPUs in ->arch.cpu_mask.
+
+If _assign_irq_vector() is requested to move an interrupt in the state
+described above, first attempt to see if ->arch.old_cpu_mask contains any valid
+CPUs that could be used as fallback, and if that's the case do move the
+interrupt back to the previous destination. Note this is easier because the
+vector hasn't been released yet, so there's no need to allocate and setup a new
+vector on the destination.
+
+Due to the logic in fixup_irqs() that clears offline CPUs from
+->arch.old_cpu_mask (and releases the old vector if the mask becomes empty) it
+shouldn't be possible to get into _assign_irq_vector() with
+->arch.move_{in_progress,cleanup_count} set but no online CPUs in
+->arch.old_cpu_mask.
+
+However if ->arch.move_{in_progress,cleanup_count} is set and the interrupt has
+also changed affinity, it's possible the members of ->arch.old_cpu_mask are no
+longer part of the affinity set, move the interrupt to a different CPU part of
+the provided mask and keep the current ->arch.old_{cpu_mask,vector} for the
+pending interrupt movement to be completed.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 369558924a642bbb0cb731e9a3375958867cb17b
+master date: 2024-06-18 15:15:10 +0200
+---
+ xen/arch/x86/irq.c | 97 ++++++++++++++++++++++++++++++++--------------
+ 1 file changed, 68 insertions(+), 29 deletions(-)
+
+diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
+index f877327975..13ef61a5b7 100644
+--- a/xen/arch/x86/irq.c
++++ b/xen/arch/x86/irq.c
+@@ -553,7 +553,58 @@ static int _assign_irq_vector(struct irq_desc *desc, const cpumask_t *mask)
+ }
+
+ if ( desc->arch.move_in_progress || desc->arch.move_cleanup_count )
+- return -EAGAIN;
++ {
++ /*
++ * If the current destination is online refuse to shuffle. Retry after
++ * the in-progress movement has finished.
++ */
++ if ( cpumask_intersects(desc->arch.cpu_mask, &cpu_online_map) )
++ return -EAGAIN;
++
++ /*
++ * Due to the logic in fixup_irqs() that clears offlined CPUs from
++ * ->arch.old_cpu_mask it shouldn't be possible to get here with
++ * ->arch.move_{in_progress,cleanup_count} set and no online CPUs in
++ * ->arch.old_cpu_mask.
++ */
++ ASSERT(valid_irq_vector(desc->arch.old_vector));
++ ASSERT(cpumask_intersects(desc->arch.old_cpu_mask, &cpu_online_map));
++
++ if ( cpumask_intersects(desc->arch.old_cpu_mask, mask) )
++ {
++ /*
++ * Fallback to the old destination if moving is in progress and the
++ * current destination is to be offlined. This is only possible if
++ * the CPUs in old_cpu_mask intersect with the affinity mask passed
++ * in the 'mask' parameter.
++ */
++ desc->arch.vector = desc->arch.old_vector;
++ cpumask_and(desc->arch.cpu_mask, desc->arch.old_cpu_mask, mask);
++
++ /* Undo any possibly done cleanup. */
++ for_each_cpu(cpu, desc->arch.cpu_mask)
++ per_cpu(vector_irq, cpu)[desc->arch.vector] = irq;
++
++ /* Cancel the pending move and release the current vector. */
++ desc->arch.old_vector = IRQ_VECTOR_UNASSIGNED;
++ cpumask_clear(desc->arch.old_cpu_mask);
++ desc->arch.move_in_progress = 0;
++ desc->arch.move_cleanup_count = 0;
++ if ( desc->arch.used_vectors )
++ {
++ ASSERT(test_bit(old_vector, desc->arch.used_vectors));
++ clear_bit(old_vector, desc->arch.used_vectors);
++ }
++
++ return 0;
++ }
++
++ /*
++ * There's an interrupt movement in progress but the destination(s) in
++ * ->arch.old_cpu_mask are not suitable given the 'mask' parameter, go
++ * through the full logic to find a new vector in a suitable CPU.
++ */
++ }
+
+ err = -ENOSPC;
+
+@@ -609,7 +660,22 @@ next:
+ current_vector = vector;
+ current_offset = offset;
+
+- if ( valid_irq_vector(old_vector) )
++ if ( desc->arch.move_in_progress || desc->arch.move_cleanup_count )
++ {
++ ASSERT(!cpumask_intersects(desc->arch.cpu_mask, &cpu_online_map));
++ /*
++ * Special case when evacuating an interrupt from a CPU to be
++ * offlined and the interrupt was already in the process of being
++ * moved. Leave ->arch.old_{vector,cpu_mask} as-is and just
++ * replace ->arch.{cpu_mask,vector} with the new destination.
++ * Cleanup will be done normally for the old fields, just release
++ * the current vector here.
++ */
++ if ( desc->arch.used_vectors &&
++ !test_and_clear_bit(old_vector, desc->arch.used_vectors) )
++ ASSERT_UNREACHABLE();
++ }
++ else if ( valid_irq_vector(old_vector) )
+ {
+ cpumask_and(desc->arch.old_cpu_mask, desc->arch.cpu_mask,
+ &cpu_online_map);
+@@ -2620,33 +2686,6 @@ void fixup_irqs(const cpumask_t *mask, bool verbose)
+ continue;
+ }
+
+- /*
+- * In order for the affinity adjustment below to be successful, we
+- * need _assign_irq_vector() to succeed. This in particular means
+- * clearing desc->arch.move_in_progress if this would otherwise
+- * prevent the function from succeeding. Since there's no way for the
+- * flag to get cleared anymore when there's no possible destination
+- * left (the only possibility then would be the IRQs enabled window
+- * after this loop), there's then also no race with us doing it here.
+- *
+- * Therefore the logic here and there need to remain in sync.
+- */
+- if ( desc->arch.move_in_progress &&
+- !cpumask_intersects(mask, desc->arch.cpu_mask) )
+- {
+- unsigned int cpu;
+-
+- cpumask_and(affinity, desc->arch.old_cpu_mask, &cpu_online_map);
+-
+- spin_lock(&vector_lock);
+- for_each_cpu(cpu, affinity)
+- per_cpu(vector_irq, cpu)[desc->arch.old_vector] = ~irq;
+- spin_unlock(&vector_lock);
+-
+- release_old_vec(desc);
+- desc->arch.move_in_progress = 0;
+- }
+-
+ if ( !cpumask_intersects(mask, desc->affinity) )
+ {
+ break_affinity = true;
+--
+2.45.2
+
diff --git a/0031-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch b/0031-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch
deleted file mode 100644
index c778639..0000000
--- a/0031-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch
+++ /dev/null
@@ -1,149 +0,0 @@
-From c54cf903b06fb1933fad053cc547580c92c856ea Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Mar 2024 11:59:35 +0100
-Subject: [PATCH 31/67] xen/livepatch: search for symbols in all loaded
- payloads
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-When checking if an address belongs to a patch, or when resolving a symbol,
-take into account all loaded livepatch payloads, even if not applied.
-
-This is required in order for the pre-apply and post-revert hooks to work
-properly, or else Xen won't detect the instruction pointer belonging to those
-hooks as being part of the currently active text.
-
-Move the RCU handling to be used for payload_list instead of applied_list, as
-now the calls from trap code will iterate over the payload_list.
-
-Fixes: 8313c864fa95 ('livepatch: Implement pre-|post- apply|revert hooks')
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
-master commit: d2daa40fb3ddb8f83e238e57854bd878924cde90
-master date: 2024-02-28 16:57:25 +0000
----
- xen/common/livepatch.c | 49 +++++++++++++++---------------------------
- 1 file changed, 17 insertions(+), 32 deletions(-)
-
-diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
-index 537e9f33e4..a129ab9973 100644
---- a/xen/common/livepatch.c
-+++ b/xen/common/livepatch.c
-@@ -36,13 +36,14 @@
- * caller in schedule_work.
- */
- static DEFINE_SPINLOCK(payload_lock);
--static LIST_HEAD(payload_list);
--
- /*
-- * Patches which have been applied. Need RCU in case we crash (and then
-- * traps code would iterate via applied_list) when adding entries on the list.
-+ * Need RCU in case we crash (and then traps code would iterate via
-+ * payload_list) when adding entries on the list.
- */
--static DEFINE_RCU_READ_LOCK(rcu_applied_lock);
-+static DEFINE_RCU_READ_LOCK(rcu_payload_lock);
-+static LIST_HEAD(payload_list);
-+
-+/* Patches which have been applied. Only modified from stop machine context. */
- static LIST_HEAD(applied_list);
-
- static unsigned int payload_cnt;
-@@ -111,12 +112,8 @@ bool_t is_patch(const void *ptr)
- const struct payload *data;
- bool_t r = 0;
-
-- /*
-- * Only RCU locking since this list is only ever changed during apply
-- * or revert context. And in case it dies there we need an safe list.
-- */
-- rcu_read_lock(&rcu_applied_lock);
-- list_for_each_entry_rcu ( data, &applied_list, applied_list )
-+ rcu_read_lock(&rcu_payload_lock);
-+ list_for_each_entry_rcu ( data, &payload_list, list )
- {
- if ( (ptr >= data->rw_addr &&
- ptr < (data->rw_addr + data->rw_size)) ||
-@@ -130,7 +127,7 @@ bool_t is_patch(const void *ptr)
- }
-
- }
-- rcu_read_unlock(&rcu_applied_lock);
-+ rcu_read_unlock(&rcu_payload_lock);
-
- return r;
- }
-@@ -166,12 +163,8 @@ static const char *cf_check livepatch_symbols_lookup(
- const void *va = (const void *)addr;
- const char *n = NULL;
-
-- /*
-- * Only RCU locking since this list is only ever changed during apply
-- * or revert context. And in case it dies there we need an safe list.
-- */
-- rcu_read_lock(&rcu_applied_lock);
-- list_for_each_entry_rcu ( data, &applied_list, applied_list )
-+ rcu_read_lock(&rcu_payload_lock);
-+ list_for_each_entry_rcu ( data, &payload_list, list )
- {
- if ( va < data->text_addr ||
- va >= (data->text_addr + data->text_size) )
-@@ -200,7 +193,7 @@ static const char *cf_check livepatch_symbols_lookup(
- n = data->symtab[best].name;
- break;
- }
-- rcu_read_unlock(&rcu_applied_lock);
-+ rcu_read_unlock(&rcu_payload_lock);
-
- return n;
- }
-@@ -1016,7 +1009,8 @@ static void free_payload(struct payload *data)
- {
- ASSERT(spin_is_locked(&payload_lock));
- unregister_virtual_region(&data->region);
-- list_del(&data->list);
-+ list_del_rcu(&data->list);
-+ rcu_barrier();
- payload_cnt--;
- payload_version++;
- free_payload_data(data);
-@@ -1116,7 +1110,7 @@ static int livepatch_upload(struct xen_sysctl_livepatch_upload *upload)
- INIT_LIST_HEAD(&data->applied_list);
-
- register_virtual_region(&data->region);
-- list_add_tail(&data->list, &payload_list);
-+ list_add_tail_rcu(&data->list, &payload_list);
- payload_cnt++;
- payload_version++;
- }
-@@ -1327,11 +1321,7 @@ static int apply_payload(struct payload *data)
-
- static inline void apply_payload_tail(struct payload *data)
- {
-- /*
-- * We need RCU variant (which has barriers) in case we crash here.
-- * The applied_list is iterated by the trap code.
-- */
-- list_add_tail_rcu(&data->applied_list, &applied_list);
-+ list_add_tail(&data->applied_list, &applied_list);
-
- data->state = LIVEPATCH_STATE_APPLIED;
- }
-@@ -1371,12 +1361,7 @@ static int revert_payload(struct payload *data)
-
- static inline void revert_payload_tail(struct payload *data)
- {
--
-- /*
-- * We need RCU variant (which has barriers) in case we crash here.
-- * The applied_list is iterated by the trap code.
-- */
-- list_del_rcu(&data->applied_list);
-+ list_del(&data->applied_list);
-
- data->reverted = true;
- data->state = LIVEPATCH_STATE_CHECKED;
---
-2.44.0
-
diff --git a/0032-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch b/0032-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch
deleted file mode 100644
index 76af9ef..0000000
--- a/0032-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch
+++ /dev/null
@@ -1,186 +0,0 @@
-From 5564323f643715f9d364df88e0eb9c7d6fd2c22b Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Mar 2024 11:59:43 +0100
-Subject: [PATCH 32/67] xen/livepatch: fix norevert test attempt to open-code
- revert
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The purpose of the norevert test is to install a dummy handler that replaces
-the internal Xen revert code, and then perform the revert in the post-revert
-hook. For that purpose the usage of the previous common_livepatch_revert() is
-not enough, as that just reverts specific functions, but not the whole state of
-the payload.
-
-Remove both common_livepatch_{apply,revert}() and instead expose
-revert_payload{,_tail}() in order to perform the patch revert from the
-post-revert hook.
-
-Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker')
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
-master commit: cdae267ce10d04d71d1687b5701ff2911a96b6dc
-master date: 2024-02-28 16:57:25 +0000
----
- xen/common/livepatch.c | 41 +++++++++++++++++--
- xen/include/xen/livepatch.h | 32 ++-------------
- .../livepatch/xen_action_hooks_norevert.c | 22 +++-------
- 3 files changed, 46 insertions(+), 49 deletions(-)
-
-diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
-index a129ab9973..a5068a2217 100644
---- a/xen/common/livepatch.c
-+++ b/xen/common/livepatch.c
-@@ -1310,7 +1310,22 @@ static int apply_payload(struct payload *data)
- ASSERT(!local_irq_is_enabled());
-
- for ( i = 0; i < data->nfuncs; i++ )
-- common_livepatch_apply(&data->funcs[i], &data->fstate[i]);
-+ {
-+ const struct livepatch_func *func = &data->funcs[i];
-+ struct livepatch_fstate *state = &data->fstate[i];
-+
-+ /* If the action has been already executed on this function, do nothing. */
-+ if ( state->applied == LIVEPATCH_FUNC_APPLIED )
-+ {
-+ printk(XENLOG_WARNING LIVEPATCH
-+ "%s: %s has been already applied before\n",
-+ __func__, func->name);
-+ continue;
-+ }
-+
-+ arch_livepatch_apply(func, state);
-+ state->applied = LIVEPATCH_FUNC_APPLIED;
-+ }
-
- arch_livepatch_revive();
-
-@@ -1326,7 +1341,7 @@ static inline void apply_payload_tail(struct payload *data)
- data->state = LIVEPATCH_STATE_APPLIED;
- }
-
--static int revert_payload(struct payload *data)
-+int revert_payload(struct payload *data)
- {
- unsigned int i;
- int rc;
-@@ -1341,7 +1356,25 @@ static int revert_payload(struct payload *data)
- }
-
- for ( i = 0; i < data->nfuncs; i++ )
-- common_livepatch_revert(&data->funcs[i], &data->fstate[i]);
-+ {
-+ const struct livepatch_func *func = &data->funcs[i];
-+ struct livepatch_fstate *state = &data->fstate[i];
-+
-+ /*
-+ * If the apply action hasn't been executed on this function, do
-+ * nothing.
-+ */
-+ if ( !func->old_addr || state->applied == LIVEPATCH_FUNC_NOT_APPLIED )
-+ {
-+ printk(XENLOG_WARNING LIVEPATCH
-+ "%s: %s has not been applied before\n",
-+ __func__, func->name);
-+ continue;
-+ }
-+
-+ arch_livepatch_revert(func, state);
-+ state->applied = LIVEPATCH_FUNC_NOT_APPLIED;
-+ }
-
- /*
- * Since we are running with IRQs disabled and the hooks may call common
-@@ -1359,7 +1392,7 @@ static int revert_payload(struct payload *data)
- return 0;
- }
-
--static inline void revert_payload_tail(struct payload *data)
-+void revert_payload_tail(struct payload *data)
- {
- list_del(&data->applied_list);
-
-diff --git a/xen/include/xen/livepatch.h b/xen/include/xen/livepatch.h
-index 537d3d58b6..c9ee58fd37 100644
---- a/xen/include/xen/livepatch.h
-+++ b/xen/include/xen/livepatch.h
-@@ -136,35 +136,11 @@ void arch_livepatch_post_action(void);
- void arch_livepatch_mask(void);
- void arch_livepatch_unmask(void);
-
--static inline void common_livepatch_apply(const struct livepatch_func *func,
-- struct livepatch_fstate *state)
--{
-- /* If the action has been already executed on this function, do nothing. */
-- if ( state->applied == LIVEPATCH_FUNC_APPLIED )
-- {
-- printk(XENLOG_WARNING LIVEPATCH "%s: %s has been already applied before\n",
-- __func__, func->name);
-- return;
-- }
--
-- arch_livepatch_apply(func, state);
-- state->applied = LIVEPATCH_FUNC_APPLIED;
--}
-+/* Only for testing purposes. */
-+struct payload;
-+int revert_payload(struct payload *data);
-+void revert_payload_tail(struct payload *data);
-
--static inline void common_livepatch_revert(const struct livepatch_func *func,
-- struct livepatch_fstate *state)
--{
-- /* If the apply action hasn't been executed on this function, do nothing. */
-- if ( !func->old_addr || state->applied == LIVEPATCH_FUNC_NOT_APPLIED )
-- {
-- printk(XENLOG_WARNING LIVEPATCH "%s: %s has not been applied before\n",
-- __func__, func->name);
-- return;
-- }
--
-- arch_livepatch_revert(func, state);
-- state->applied = LIVEPATCH_FUNC_NOT_APPLIED;
--}
- #else
-
- /*
-diff --git a/xen/test/livepatch/xen_action_hooks_norevert.c b/xen/test/livepatch/xen_action_hooks_norevert.c
-index c173855192..c5fbab1746 100644
---- a/xen/test/livepatch/xen_action_hooks_norevert.c
-+++ b/xen/test/livepatch/xen_action_hooks_norevert.c
-@@ -96,26 +96,14 @@ static int revert_hook(livepatch_payload_t *payload)
-
- static void post_revert_hook(livepatch_payload_t *payload)
- {
-- int i;
-+ unsigned long flags;
-
- printk(KERN_DEBUG "%s: Hook starting.\n", __func__);
-
-- for (i = 0; i < payload->nfuncs; i++)
-- {
-- const struct livepatch_func *func = &payload->funcs[i];
-- struct livepatch_fstate *fstate = &payload->fstate[i];
--
-- BUG_ON(revert_cnt != 1);
-- BUG_ON(fstate->applied != LIVEPATCH_FUNC_APPLIED);
--
-- /* Outside of quiesce zone: MAY TRIGGER HOST CRASH/UNDEFINED BEHAVIOR */
-- arch_livepatch_quiesce();
-- common_livepatch_revert(payload);
-- arch_livepatch_revive();
-- BUG_ON(fstate->applied == LIVEPATCH_FUNC_APPLIED);
--
-- printk(KERN_DEBUG "%s: post reverted: %s\n", __func__, func->name);
-- }
-+ local_irq_save(flags);
-+ BUG_ON(revert_payload(payload));
-+ revert_payload_tail(payload);
-+ local_irq_restore(flags);
-
- printk(KERN_DEBUG "%s: Hook done.\n", __func__);
- }
---
-2.44.0
-
diff --git a/0032-xen-ubsan-Fix-UB-in-type_descriptor-declaration.patch b/0032-xen-ubsan-Fix-UB-in-type_descriptor-declaration.patch
new file mode 100644
index 0000000..c7c0968
--- /dev/null
+++ b/0032-xen-ubsan-Fix-UB-in-type_descriptor-declaration.patch
@@ -0,0 +1,39 @@
+From 5397ab9995f7354e7f8122a8a91c810256afa3d1 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 26 Jun 2024 13:42:30 +0200
+Subject: [PATCH 32/56] xen/ubsan: Fix UB in type_descriptor declaration
+
+struct type_descriptor is arranged with a NUL terminated string following the
+kind/info fields.
+
+The only reason this doesn't trip UBSAN detection itself (on more modern
+compilers at least) is because struct type_descriptor is only referenced in
+suppressed regions.
+
+Switch the declaration to be a real flexible member. No functional change.
+
+Fixes: 00fcf4dd8eb4 ("xen/ubsan: Import ubsan implementation from Linux 4.13")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: bd59af99700f075d06a6d47a16f777c9519928e0
+master date: 2024-06-18 14:55:04 +0100
+---
+ xen/common/ubsan/ubsan.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/common/ubsan/ubsan.h b/xen/common/ubsan/ubsan.h
+index a3159040fe..3db42e75b1 100644
+--- a/xen/common/ubsan/ubsan.h
++++ b/xen/common/ubsan/ubsan.h
+@@ -10,7 +10,7 @@ enum {
+ struct type_descriptor {
+ u16 type_kind;
+ u16 type_info;
+- char type_name[1];
++ char type_name[];
+ };
+
+ struct source_location {
+--
+2.45.2
+
diff --git a/0033-x86-xstate-Fix-initialisation-of-XSS-cache.patch b/0033-x86-xstate-Fix-initialisation-of-XSS-cache.patch
new file mode 100644
index 0000000..1a8c724
--- /dev/null
+++ b/0033-x86-xstate-Fix-initialisation-of-XSS-cache.patch
@@ -0,0 +1,74 @@
+From 4ee1df89d9c92609e5fff3c9b261ce4b1bb88e42 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 26 Jun 2024 13:43:19 +0200
+Subject: [PATCH 33/56] x86/xstate: Fix initialisation of XSS cache
+
+The clobbering of this_cpu(xcr0) and this_cpu(xss) to architecturally invalid
+values is to force the subsequent set_xcr0() and set_msr_xss() to reload the
+hardware register.
+
+While XCR0 is reloaded in xstate_init(), MSR_XSS isn't. This causes
+get_msr_xss() to return the invalid value, and logic of the form:
+
+ old = get_msr_xss();
+ set_msr_xss(new);
+ ...
+ set_msr_xss(old);
+
+to try and restore said invalid value.
+
+The architecturally invalid value must be purged from the cache, meaning the
+hardware register must be written at least once. This in turn highlights that
+the invalid value must only be used in the case that the hardware register is
+available.
+
+Fixes: f7f4a523927f ("x86/xstate: reset cached register values on resume")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 9e6dbbe8bf400aacb99009ddffa91d2a0c312b39
+master date: 2024-06-19 13:00:06 +0100
+---
+ xen/arch/x86/xstate.c | 18 +++++++++++-------
+ 1 file changed, 11 insertions(+), 7 deletions(-)
+
+diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
+index f442610fc5..ca76f98fe2 100644
+--- a/xen/arch/x86/xstate.c
++++ b/xen/arch/x86/xstate.c
+@@ -641,13 +641,6 @@ void xstate_init(struct cpuinfo_x86 *c)
+ return;
+ }
+
+- /*
+- * Zap the cached values to make set_xcr0() and set_msr_xss() really
+- * write it.
+- */
+- this_cpu(xcr0) = 0;
+- this_cpu(xss) = ~0;
+-
+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+ feature_mask = (((u64)edx << 32) | eax) & XCNTXT_MASK;
+ BUG_ON(!valid_xcr0(feature_mask));
+@@ -657,8 +650,19 @@ void xstate_init(struct cpuinfo_x86 *c)
+ * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size.
+ */
+ set_in_cr4(X86_CR4_OSXSAVE);
++
++ /*
++ * Zap the cached values to make set_xcr0() and set_msr_xss() really write
++ * the hardware register.
++ */
++ this_cpu(xcr0) = 0;
+ if ( !set_xcr0(feature_mask) )
+ BUG();
++ if ( cpu_has_xsaves )
++ {
++ this_cpu(xss) = ~0;
++ set_msr_xss(0);
++ }
+
+ if ( bsp )
+ {
+--
+2.45.2
+
diff --git a/0033-xen-livepatch-properly-build-the-noapply-and-norever.patch b/0033-xen-livepatch-properly-build-the-noapply-and-norever.patch
deleted file mode 100644
index 76803c6..0000000
--- a/0033-xen-livepatch-properly-build-the-noapply-and-norever.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-From a59106b27609b6ae2873bd6755949b1258290872 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Mar 2024 11:59:51 +0100
-Subject: [PATCH 33/67] xen/livepatch: properly build the noapply and norevert
- tests
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-It seems the build variables for those tests where copy-pasted from
-xen_action_hooks_marker-objs and not adjusted to use the correct source files.
-
-Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker')
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
-master commit: e579677095782c7dec792597ba8b037b7d716b32
-master date: 2024-02-28 16:57:25 +0000
----
- xen/test/livepatch/Makefile | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/xen/test/livepatch/Makefile b/xen/test/livepatch/Makefile
-index c258ab0b59..d987a8367f 100644
---- a/xen/test/livepatch/Makefile
-+++ b/xen/test/livepatch/Makefile
-@@ -118,12 +118,12 @@ xen_action_hooks_marker-objs := xen_action_hooks_marker.o xen_hello_world_func.o
- $(obj)/xen_action_hooks_noapply.o: $(obj)/config.h
-
- extra-y += xen_action_hooks_noapply.livepatch
--xen_action_hooks_noapply-objs := xen_action_hooks_marker.o xen_hello_world_func.o note.o xen_note.o
-+xen_action_hooks_noapply-objs := xen_action_hooks_noapply.o xen_hello_world_func.o note.o xen_note.o
-
- $(obj)/xen_action_hooks_norevert.o: $(obj)/config.h
-
- extra-y += xen_action_hooks_norevert.livepatch
--xen_action_hooks_norevert-objs := xen_action_hooks_marker.o xen_hello_world_func.o note.o xen_note.o
-+xen_action_hooks_norevert-objs := xen_action_hooks_norevert.o xen_hello_world_func.o note.o xen_note.o
-
- EXPECT_BYTES_COUNT := 8
- CODE_GET_EXPECT=$(shell $(OBJDUMP) -d --insn-width=1 $(1) | sed -n -e '/<'$(2)'>:$$/,/^$$/ p' | tail -n +2 | head -n $(EXPECT_BYTES_COUNT) | awk '{$$0=$$2; printf "%s", substr($$0,length-1)}' | sed 's/.\{2\}/0x&,/g' | sed 's/^/{/;s/,$$/}/g')
---
-2.44.0
-
diff --git a/0034-libxl-Fix-segfault-in-device_model_spawn_outcome.patch b/0034-libxl-Fix-segfault-in-device_model_spawn_outcome.patch
deleted file mode 100644
index 7f23a73..0000000
--- a/0034-libxl-Fix-segfault-in-device_model_spawn_outcome.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From c4ee68eda9937743527fff41f4ede0f6a3228080 Mon Sep 17 00:00:00 2001
-From: Jason Andryuk <jandryuk@gmail.com>
-Date: Tue, 5 Mar 2024 12:00:30 +0100
-Subject: [PATCH 34/67] libxl: Fix segfault in device_model_spawn_outcome
-
-libxl__spawn_qdisk_backend() explicitly sets guest_config to NULL when
-starting QEMU (the usual launch through libxl__spawn_local_dm() has a
-guest_config though).
-
-Bail early on a NULL guest_config/d_config. This skips the QMP queries
-for chardevs and VNC, but this xenpv QEMU instance isn't expected to
-provide those - only qdisk (or 9pfs backends after an upcoming change).
-
-Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
-Acked-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: d4f3d35f043f6ef29393166b0dd131c8102cf255
-master date: 2024-02-29 08:18:38 +0100
----
- tools/libs/light/libxl_dm.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c
-index ed620a9d8e..29b43ed20a 100644
---- a/tools/libs/light/libxl_dm.c
-+++ b/tools/libs/light/libxl_dm.c
-@@ -3172,8 +3172,8 @@ static void device_model_spawn_outcome(libxl__egc *egc,
-
- /* Check if spawn failed */
- if (rc) goto out;
--
-- if (d_config->b_info.device_model_version
-+ /* d_config is NULL for xl devd/libxl__spawn_qemu_xenpv_backend(). */
-+ if (d_config && d_config->b_info.device_model_version
- == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) {
- rc = libxl__ev_time_register_rel(ao, &dmss->timeout,
- devise_model_postconfig_timeout,
---
-2.44.0
-
diff --git a/0034-x86-cpuid-Fix-handling-of-XSAVE-dynamic-leaves.patch b/0034-x86-cpuid-Fix-handling-of-XSAVE-dynamic-leaves.patch
new file mode 100644
index 0000000..1905728
--- /dev/null
+++ b/0034-x86-cpuid-Fix-handling-of-XSAVE-dynamic-leaves.patch
@@ -0,0 +1,72 @@
+From 9b43092d54b5f9e9d39d9f20393671e303b19e81 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 26 Jun 2024 13:43:44 +0200
+Subject: [PATCH 34/56] x86/cpuid: Fix handling of XSAVE dynamic leaves
+
+[ This is a minimal backport of commit 71cacfb035f4 ("x86/cpuid: Fix handling
+ of XSAVE dynamic leaves") to fix the bugs without depending on the large
+ rework of XSTATE handling in Xen 4.19 ]
+
+First, if XSAVE is available in hardware but not visible to the guest, the
+dynamic leaves shouldn't be filled in.
+
+Second, the comment concerning XSS state is wrong. VT-x doesn't manage
+host/guest state automatically, but there is provision for "host only" bits to
+be set, so the implications are still accurate.
+
+In Xen 4.18, no XSS states are supported, so it's safe to keep deferring to
+real hardware.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 71cacfb035f4a78ee10970dc38a3baa04d387451
+master date: 2024-06-19 13:00:06 +0100
+---
+ xen/arch/x86/cpuid.c | 30 +++++++++++++-----------------
+ 1 file changed, 13 insertions(+), 17 deletions(-)
+
+diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
+index 455a09b2dd..f6fd6cc6b3 100644
+--- a/xen/arch/x86/cpuid.c
++++ b/xen/arch/x86/cpuid.c
+@@ -330,24 +330,20 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf,
+ case XSTATE_CPUID:
+ switch ( subleaf )
+ {
+- case 1:
+- if ( p->xstate.xsavec || p->xstate.xsaves )
+- {
+- /*
+- * TODO: Figure out what to do for XSS state. VT-x manages
+- * host vs guest MSR_XSS automatically, so as soon as we start
+- * supporting any XSS states, the wrong XSS will be in
+- * context.
+- */
+- BUILD_BUG_ON(XSTATE_XSAVES_ONLY != 0);
+-
+- /*
+- * Read CPUID[0xD,0/1].EBX from hardware. They vary with
+- * enabled XSTATE, and appropraite XCR0|XSS are in context.
+- */
++ /*
++ * Read CPUID[0xd,0/1].EBX from hardware. They vary with enabled
++ * XSTATE, and the appropriate XCR0 is in context.
++ */
+ case 0:
+- res->b = cpuid_count_ebx(leaf, subleaf);
+- }
++ if ( p->basic.xsave )
++ res->b = cpuid_count_ebx(0xd, 0);
++ break;
++
++ case 1:
++ /* This only works because Xen doesn't support XSS states yet. */
++ BUILD_BUG_ON(XSTATE_XSAVES_ONLY != 0);
++ if ( p->xstate.xsavec )
++ res->b = cpuid_count_ebx(0xd, 1);
+ break;
+ }
+ break;
+--
+2.45.2
+
diff --git a/0035-x86-altcall-always-use-a-temporary-parameter-stashin.patch b/0035-x86-altcall-always-use-a-temporary-parameter-stashin.patch
deleted file mode 100644
index 177c73b..0000000
--- a/0035-x86-altcall-always-use-a-temporary-parameter-stashin.patch
+++ /dev/null
@@ -1,197 +0,0 @@
-From 2f49d9f89c14519d4cb1e06ab8370cf4ba50fab7 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 5 Mar 2024 12:00:47 +0100
-Subject: [PATCH 35/67] x86/altcall: always use a temporary parameter stashing
- variable
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The usage in ALT_CALL_ARG() on clang of:
-
-register union {
- typeof(arg) e;
- const unsigned long r;
-} ...
-
-When `arg` is the first argument to alternative_{,v}call() and
-const_vlapic_vcpu() is used results in clang 3.5.0 complaining with:
-
-arch/x86/hvm/vlapic.c:141:47: error: non-const static data member must be initialized out of line
- alternative_call(hvm_funcs.test_pir, const_vlapic_vcpu(vlapic), vec) )
-
-Workaround this by pulling `arg1` into a local variable, like it's done for
-further arguments (arg2, arg3...)
-
-Originally arg1 wasn't pulled into a variable because for the a1_ register
-local variable the possible clobbering as a result of operators on other
-variables don't matter:
-
-https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
-
-Note clang version 3.8.1 seems to already be fixed and don't require the
-workaround, but since it's harmless do it uniformly everywhere.
-
-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Fixes: 2ce562b2a413 ('x86/altcall: use a union as register type for function parameters on clang')
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-master commit: c20850540ad6a32f4fc17bde9b01c92b0df18bf0
-master date: 2024-02-29 08:21:49 +0100
----
- xen/arch/x86/include/asm/alternative.h | 36 +++++++++++++++++---------
- 1 file changed, 24 insertions(+), 12 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h
-index bcb1dc94f4..fa04481316 100644
---- a/xen/arch/x86/include/asm/alternative.h
-+++ b/xen/arch/x86/include/asm/alternative.h
-@@ -253,21 +253,24 @@ extern void alternative_branches(void);
- })
-
- #define alternative_vcall1(func, arg) ({ \
-- ALT_CALL_ARG(arg, 1); \
-+ typeof(arg) v1_ = (arg); \
-+ ALT_CALL_ARG(v1_, 1); \
- ALT_CALL_NO_ARG2; \
- (void)sizeof(func(arg)); \
- (void)alternative_callN(1, int, func); \
- })
-
- #define alternative_call1(func, arg) ({ \
-- ALT_CALL_ARG(arg, 1); \
-+ typeof(arg) v1_ = (arg); \
-+ ALT_CALL_ARG(v1_, 1); \
- ALT_CALL_NO_ARG2; \
- alternative_callN(1, typeof(func(arg)), func); \
- })
-
- #define alternative_vcall2(func, arg1, arg2) ({ \
-+ typeof(arg1) v1_ = (arg1); \
- typeof(arg2) v2_ = (arg2); \
-- ALT_CALL_ARG(arg1, 1); \
-+ ALT_CALL_ARG(v1_, 1); \
- ALT_CALL_ARG(v2_, 2); \
- ALT_CALL_NO_ARG3; \
- (void)sizeof(func(arg1, arg2)); \
-@@ -275,17 +278,19 @@ extern void alternative_branches(void);
- })
-
- #define alternative_call2(func, arg1, arg2) ({ \
-+ typeof(arg1) v1_ = (arg1); \
- typeof(arg2) v2_ = (arg2); \
-- ALT_CALL_ARG(arg1, 1); \
-+ ALT_CALL_ARG(v1_, 1); \
- ALT_CALL_ARG(v2_, 2); \
- ALT_CALL_NO_ARG3; \
- alternative_callN(2, typeof(func(arg1, arg2)), func); \
- })
-
- #define alternative_vcall3(func, arg1, arg2, arg3) ({ \
-+ typeof(arg1) v1_ = (arg1); \
- typeof(arg2) v2_ = (arg2); \
- typeof(arg3) v3_ = (arg3); \
-- ALT_CALL_ARG(arg1, 1); \
-+ ALT_CALL_ARG(v1_, 1); \
- ALT_CALL_ARG(v2_, 2); \
- ALT_CALL_ARG(v3_, 3); \
- ALT_CALL_NO_ARG4; \
-@@ -294,9 +299,10 @@ extern void alternative_branches(void);
- })
-
- #define alternative_call3(func, arg1, arg2, arg3) ({ \
-+ typeof(arg1) v1_ = (arg1); \
- typeof(arg2) v2_ = (arg2); \
- typeof(arg3) v3_ = (arg3); \
-- ALT_CALL_ARG(arg1, 1); \
-+ ALT_CALL_ARG(v1_, 1); \
- ALT_CALL_ARG(v2_, 2); \
- ALT_CALL_ARG(v3_, 3); \
- ALT_CALL_NO_ARG4; \
-@@ -305,10 +311,11 @@ extern void alternative_branches(void);
- })
-
- #define alternative_vcall4(func, arg1, arg2, arg3, arg4) ({ \
-+ typeof(arg1) v1_ = (arg1); \
- typeof(arg2) v2_ = (arg2); \
- typeof(arg3) v3_ = (arg3); \
- typeof(arg4) v4_ = (arg4); \
-- ALT_CALL_ARG(arg1, 1); \
-+ ALT_CALL_ARG(v1_, 1); \
- ALT_CALL_ARG(v2_, 2); \
- ALT_CALL_ARG(v3_, 3); \
- ALT_CALL_ARG(v4_, 4); \
-@@ -318,10 +325,11 @@ extern void alternative_branches(void);
- })
-
- #define alternative_call4(func, arg1, arg2, arg3, arg4) ({ \
-+ typeof(arg1) v1_ = (arg1); \
- typeof(arg2) v2_ = (arg2); \
- typeof(arg3) v3_ = (arg3); \
- typeof(arg4) v4_ = (arg4); \
-- ALT_CALL_ARG(arg1, 1); \
-+ ALT_CALL_ARG(v1_, 1); \
- ALT_CALL_ARG(v2_, 2); \
- ALT_CALL_ARG(v3_, 3); \
- ALT_CALL_ARG(v4_, 4); \
-@@ -332,11 +340,12 @@ extern void alternative_branches(void);
- })
-
- #define alternative_vcall5(func, arg1, arg2, arg3, arg4, arg5) ({ \
-+ typeof(arg1) v1_ = (arg1); \
- typeof(arg2) v2_ = (arg2); \
- typeof(arg3) v3_ = (arg3); \
- typeof(arg4) v4_ = (arg4); \
- typeof(arg5) v5_ = (arg5); \
-- ALT_CALL_ARG(arg1, 1); \
-+ ALT_CALL_ARG(v1_, 1); \
- ALT_CALL_ARG(v2_, 2); \
- ALT_CALL_ARG(v3_, 3); \
- ALT_CALL_ARG(v4_, 4); \
-@@ -347,11 +356,12 @@ extern void alternative_branches(void);
- })
-
- #define alternative_call5(func, arg1, arg2, arg3, arg4, arg5) ({ \
-+ typeof(arg1) v1_ = (arg1); \
- typeof(arg2) v2_ = (arg2); \
- typeof(arg3) v3_ = (arg3); \
- typeof(arg4) v4_ = (arg4); \
- typeof(arg5) v5_ = (arg5); \
-- ALT_CALL_ARG(arg1, 1); \
-+ ALT_CALL_ARG(v1_, 1); \
- ALT_CALL_ARG(v2_, 2); \
- ALT_CALL_ARG(v3_, 3); \
- ALT_CALL_ARG(v4_, 4); \
-@@ -363,12 +373,13 @@ extern void alternative_branches(void);
- })
-
- #define alternative_vcall6(func, arg1, arg2, arg3, arg4, arg5, arg6) ({ \
-+ typeof(arg1) v1_ = (arg1); \
- typeof(arg2) v2_ = (arg2); \
- typeof(arg3) v3_ = (arg3); \
- typeof(arg4) v4_ = (arg4); \
- typeof(arg5) v5_ = (arg5); \
- typeof(arg6) v6_ = (arg6); \
-- ALT_CALL_ARG(arg1, 1); \
-+ ALT_CALL_ARG(v1_, 1); \
- ALT_CALL_ARG(v2_, 2); \
- ALT_CALL_ARG(v3_, 3); \
- ALT_CALL_ARG(v4_, 4); \
-@@ -379,12 +390,13 @@ extern void alternative_branches(void);
- })
-
- #define alternative_call6(func, arg1, arg2, arg3, arg4, arg5, arg6) ({ \
-+ typeof(arg1) v1_ = (arg1); \
- typeof(arg2) v2_ = (arg2); \
- typeof(arg3) v3_ = (arg3); \
- typeof(arg4) v4_ = (arg4); \
- typeof(arg5) v5_ = (arg5); \
- typeof(arg6) v6_ = (arg6); \
-- ALT_CALL_ARG(arg1, 1); \
-+ ALT_CALL_ARG(v1_, 1); \
- ALT_CALL_ARG(v2_, 2); \
- ALT_CALL_ARG(v3_, 3); \
- ALT_CALL_ARG(v4_, 4); \
---
-2.44.0
-
diff --git a/0035-x86-irq-forward-pending-interrupts-to-new-destinatio.patch b/0035-x86-irq-forward-pending-interrupts-to-new-destinatio.patch
new file mode 100644
index 0000000..f05b09e
--- /dev/null
+++ b/0035-x86-irq-forward-pending-interrupts-to-new-destinatio.patch
@@ -0,0 +1,143 @@
+From e95d30f9e5eed0c5d9dbf72d4cc3ae373152ab10 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Wed, 26 Jun 2024 13:44:08 +0200
+Subject: [PATCH 35/56] x86/irq: forward pending interrupts to new destination
+ in fixup_irqs()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+fixup_irqs() is used to evacuate interrupts from to be offlined CPUs. Given
+the CPU is to become offline, the normal migration logic used by Xen where the
+vector in the previous target(s) is left configured until the interrupt is
+received on the new destination is not suitable.
+
+Instead attempt to do as much as possible in order to prevent loosing
+interrupts. If fixup_irqs() is called from the CPU to be offlined (as is
+currently the case for CPU hot unplug) attempt to forward pending vectors when
+interrupts that target the current CPU are migrated to a different destination.
+
+Additionally, for interrupts that have already been moved from the current CPU
+prior to the call to fixup_irqs() but that haven't been delivered to the new
+destination (iow: interrupts with move_in_progress set and the current CPU set
+in ->arch.old_cpu_mask) also check whether the previous vector is pending and
+forward it to the new destination.
+
+This allows us to remove the window with interrupts enabled at the bottom of
+fixup_irqs(). Such window wasn't safe anyway: references to the CPU to become
+offline are removed from interrupts masks, but the per-CPU vector_irq[] array
+is not updated to reflect those changes (as the CPU is going offline anyway).
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: e2bb28d621584fce15c907002ddc7c6772644b64
+master date: 2024-06-20 12:09:32 +0200
+---
+ xen/arch/x86/include/asm/apic.h | 5 ++++
+ xen/arch/x86/irq.c | 46 ++++++++++++++++++++++++++++-----
+ 2 files changed, 45 insertions(+), 6 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/apic.h b/xen/arch/x86/include/asm/apic.h
+index 7625c0ecd6..ad8d7cc054 100644
+--- a/xen/arch/x86/include/asm/apic.h
++++ b/xen/arch/x86/include/asm/apic.h
+@@ -145,6 +145,11 @@ static __inline bool_t apic_isr_read(u8 vector)
+ (vector & 0x1f)) & 1;
+ }
+
++static inline bool apic_irr_read(unsigned int vector)
++{
++ return apic_read(APIC_IRR + (vector / 32 * 0x10)) & (1U << (vector % 32));
++}
++
+ static __inline u32 get_apic_id(void) /* Get the physical APIC id */
+ {
+ u32 id = apic_read(APIC_ID);
+diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
+index 13ef61a5b7..290f8d26e7 100644
+--- a/xen/arch/x86/irq.c
++++ b/xen/arch/x86/irq.c
+@@ -2604,7 +2604,7 @@ void fixup_irqs(const cpumask_t *mask, bool verbose)
+
+ for ( irq = 0; irq < nr_irqs; irq++ )
+ {
+- bool break_affinity = false, set_affinity = true;
++ bool break_affinity = false, set_affinity = true, check_irr = false;
+ unsigned int vector, cpu = smp_processor_id();
+ cpumask_t *affinity = this_cpu(scratch_cpumask);
+
+@@ -2657,6 +2657,25 @@ void fixup_irqs(const cpumask_t *mask, bool verbose)
+ !cpu_online(cpu) &&
+ cpumask_test_cpu(cpu, desc->arch.old_cpu_mask) )
+ {
++ /*
++ * This to be offlined CPU was the target of an interrupt that's
++ * been moved, and the new destination target hasn't yet
++ * acknowledged any interrupt from it.
++ *
++ * We know the interrupt is configured to target the new CPU at
++ * this point, so we can check IRR for any pending vectors and
++ * forward them to the new destination.
++ *
++ * Note that for the other case of an interrupt movement being in
++ * progress (move_cleanup_count being non-zero) we know the new
++ * destination has already acked at least one interrupt from this
++ * source, and hence there's no need to forward any stale
++ * interrupts.
++ */
++ if ( apic_irr_read(desc->arch.old_vector) )
++ send_IPI_mask(cpumask_of(cpumask_any(desc->arch.cpu_mask)),
++ desc->arch.vector);
++
+ /*
+ * This CPU is going offline, remove it from ->arch.old_cpu_mask
+ * and possibly release the old vector if the old mask becomes
+@@ -2697,6 +2716,14 @@ void fixup_irqs(const cpumask_t *mask, bool verbose)
+ if ( desc->handler->disable )
+ desc->handler->disable(desc);
+
++ /*
++ * If the current CPU is going offline and is (one of) the target(s) of
++ * the interrupt, signal to check whether there are any pending vectors
++ * to be handled in the local APIC after the interrupt has been moved.
++ */
++ if ( !cpu_online(cpu) && cpumask_test_cpu(cpu, desc->arch.cpu_mask) )
++ check_irr = true;
++
+ if ( desc->handler->set_affinity )
+ desc->handler->set_affinity(desc, affinity);
+ else if ( !(warned++) )
+@@ -2707,6 +2734,18 @@ void fixup_irqs(const cpumask_t *mask, bool verbose)
+
+ cpumask_copy(affinity, desc->affinity);
+
++ if ( check_irr && apic_irr_read(vector) )
++ /*
++ * Forward pending interrupt to the new destination, this CPU is
++ * going offline and otherwise the interrupt would be lost.
++ *
++ * Do the IRR check as late as possible before releasing the irq
++ * desc in order for any in-flight interrupts to be delivered to
++ * the lapic.
++ */
++ send_IPI_mask(cpumask_of(cpumask_any(desc->arch.cpu_mask)),
++ desc->arch.vector);
++
+ spin_unlock(&desc->lock);
+
+ if ( !verbose )
+@@ -2718,11 +2757,6 @@ void fixup_irqs(const cpumask_t *mask, bool verbose)
+ printk("Broke affinity for IRQ%u, new: %*pb\n",
+ irq, CPUMASK_PR(affinity));
+ }
+-
+- /* That doesn't seem sufficient. Give it 1ms. */
+- local_irq_enable();
+- mdelay(1);
+- local_irq_disable();
+ }
+
+ void fixup_eoi(void)
+--
+2.45.2
+
diff --git a/0036-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch b/0036-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch
deleted file mode 100644
index b91ff52..0000000
--- a/0036-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch
+++ /dev/null
@@ -1,102 +0,0 @@
-From 54dacb5c02cba4676879ed077765734326b78e39 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 5 Mar 2024 12:01:22 +0100
-Subject: [PATCH 36/67] x86/cpu-policy: Allow for levelling of VERW side
- effects
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-MD_CLEAR and FB_CLEAR need OR-ing across a migrate pool. Allow this, by
-having them unconditinally set in max, with the host values reflected in
-default. Annotate the bits as having special properies.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-master commit: de17162cafd27f2865a3102a2ec0f386a02ed03d
-master date: 2024-03-01 20:14:19 +0000
----
- xen/arch/x86/cpu-policy.c | 24 +++++++++++++++++++++
- xen/arch/x86/include/asm/cpufeature.h | 1 +
- xen/include/public/arch-x86/cpufeatureset.h | 4 ++--
- 3 files changed, 27 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
-index f0f2c8a1c0..7b875a7221 100644
---- a/xen/arch/x86/cpu-policy.c
-+++ b/xen/arch/x86/cpu-policy.c
-@@ -435,6 +435,16 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs)
- __set_bit(X86_FEATURE_RSBA, fs);
- __set_bit(X86_FEATURE_RRSBA, fs);
-
-+ /*
-+ * These bits indicate that the VERW instruction may have gained
-+ * scrubbing side effects. With pooling, they mean "you might migrate
-+ * somewhere where scrubbing is necessary", and may need exposing on
-+ * unaffected hardware. This is fine, because the VERW instruction
-+ * has been around since the 286.
-+ */
-+ __set_bit(X86_FEATURE_MD_CLEAR, fs);
-+ __set_bit(X86_FEATURE_FB_CLEAR, fs);
-+
- /*
- * The Gather Data Sampling microcode mitigation (August 2023) has an
- * adverse performance impact on the CLWB instruction on SKX/CLX/CPX.
-@@ -469,6 +479,20 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs)
- cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) )
- __clear_bit(X86_FEATURE_RDRAND, fs);
-
-+ /*
-+ * These bits indicate that the VERW instruction may have gained
-+ * scrubbing side effects. The max policy has them set for migration
-+ * reasons, so reset the default policy back to the host values in
-+ * case we're unaffected.
-+ */
-+ __clear_bit(X86_FEATURE_MD_CLEAR, fs);
-+ if ( cpu_has_md_clear )
-+ __set_bit(X86_FEATURE_MD_CLEAR, fs);
-+
-+ __clear_bit(X86_FEATURE_FB_CLEAR, fs);
-+ if ( cpu_has_fb_clear )
-+ __set_bit(X86_FEATURE_FB_CLEAR, fs);
-+
- /*
- * The Gather Data Sampling microcode mitigation (August 2023) has an
- * adverse performance impact on the CLWB instruction on SKX/CLX/CPX.
-diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h
-index 9ef7756593..ec824e8954 100644
---- a/xen/arch/x86/include/asm/cpufeature.h
-+++ b/xen/arch/x86/include/asm/cpufeature.h
-@@ -136,6 +136,7 @@
- #define cpu_has_avx512_4fmaps boot_cpu_has(X86_FEATURE_AVX512_4FMAPS)
- #define cpu_has_avx512_vp2intersect boot_cpu_has(X86_FEATURE_AVX512_VP2INTERSECT)
- #define cpu_has_srbds_ctrl boot_cpu_has(X86_FEATURE_SRBDS_CTRL)
-+#define cpu_has_md_clear boot_cpu_has(X86_FEATURE_MD_CLEAR)
- #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)
- #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
- #define cpu_has_serialize boot_cpu_has(X86_FEATURE_SERIALIZE)
-diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
-index 94d211df2f..aec1407613 100644
---- a/xen/include/public/arch-x86/cpufeatureset.h
-+++ b/xen/include/public/arch-x86/cpufeatureset.h
-@@ -260,7 +260,7 @@ XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single
- XEN_CPUFEATURE(FSRM, 9*32+ 4) /*A Fast Short REP MOVS */
- XEN_CPUFEATURE(AVX512_VP2INTERSECT, 9*32+8) /*a VP2INTERSECT{D,Q} insns */
- XEN_CPUFEATURE(SRBDS_CTRL, 9*32+ 9) /* MSR_MCU_OPT_CTRL and RNGDS_MITG_DIS. */
--XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*A VERW clears microarchitectural buffers */
-+XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*!A VERW clears microarchitectural buffers */
- XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */
- XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */
- XEN_CPUFEATURE(SERIALIZE, 9*32+14) /*A SERIALIZE insn */
-@@ -321,7 +321,7 @@ XEN_CPUFEATURE(DOITM, 16*32+12) /* Data Operand Invariant Timing
- XEN_CPUFEATURE(SBDR_SSDP_NO, 16*32+13) /*A No Shared Buffer Data Read or Sideband Stale Data Propagation */
- XEN_CPUFEATURE(FBSDP_NO, 16*32+14) /*A No Fill Buffer Stale Data Propagation */
- XEN_CPUFEATURE(PSDP_NO, 16*32+15) /*A No Primary Stale Data Propagation */
--XEN_CPUFEATURE(FB_CLEAR, 16*32+17) /*A Fill Buffers cleared by VERW */
-+XEN_CPUFEATURE(FB_CLEAR, 16*32+17) /*!A Fill Buffers cleared by VERW */
- XEN_CPUFEATURE(FB_CLEAR_CTRL, 16*32+18) /* MSR_OPT_CPU_CTRL.FB_CLEAR_DIS */
- XEN_CPUFEATURE(RRSBA, 16*32+19) /*! Restricted RSB Alternative */
- XEN_CPUFEATURE(BHI_NO, 16*32+20) /*A No Branch History Injection */
---
-2.44.0
-
diff --git a/0036-x86-re-run-exception-from-stub-recovery-selftests-wi.patch b/0036-x86-re-run-exception-from-stub-recovery-selftests-wi.patch
new file mode 100644
index 0000000..a552e9c
--- /dev/null
+++ b/0036-x86-re-run-exception-from-stub-recovery-selftests-wi.patch
@@ -0,0 +1,84 @@
+From 5ac3cbbf83e1f955aeaf5d0f503099f5249b5c25 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Thu, 4 Jul 2024 14:06:19 +0200
+Subject: [PATCH 36/56] x86: re-run exception-from-stub recovery selftests with
+ CET-SS enabled
+
+On the BSP, shadow stacks are enabled only relatively late in the
+booting process. They in particular aren't active yet when initcalls are
+run. Keep the testing there, but invoke that testing a 2nd time when
+shadow stacks are active, to make sure we won't regress that case after
+addressing XSA-451.
+
+While touching this code, switch the guard from NDEBUG to CONFIG_DEBUG,
+such that IS_ENABLED() can validly be used at the new call site.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: cfe3ad67127b86e1b1c06993b86422673a51b050
+master date: 2024-02-27 13:49:52 +0100
+---
+ xen/arch/x86/extable.c | 8 +++++---
+ xen/arch/x86/include/asm/setup.h | 2 ++
+ xen/arch/x86/setup.c | 4 ++++
+ 3 files changed, 11 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/extable.c b/xen/arch/x86/extable.c
+index 8ffcd346d7..12cc9935d8 100644
+--- a/xen/arch/x86/extable.c
++++ b/xen/arch/x86/extable.c
+@@ -128,10 +128,11 @@ search_exception_table(const struct cpu_user_regs *regs, unsigned long *stub_ra)
+ return 0;
+ }
+
+-#ifndef NDEBUG
++#ifdef CONFIG_DEBUG
++#include <asm/setup.h>
+ #include <asm/traps.h>
+
+-static int __init cf_check stub_selftest(void)
++int __init cf_check stub_selftest(void)
+ {
+ static const struct {
+ uint8_t opc[8];
+@@ -155,7 +156,8 @@ static int __init cf_check stub_selftest(void)
+ unsigned int i;
+ bool fail = false;
+
+- printk("Running stub recovery selftests...\n");
++ printk("%s stub recovery selftests...\n",
++ system_state < SYS_STATE_active ? "Running" : "Re-running");
+
+ for ( i = 0; i < ARRAY_SIZE(tests); ++i )
+ {
+diff --git a/xen/arch/x86/include/asm/setup.h b/xen/arch/x86/include/asm/setup.h
+index 9a460e4db8..14d15048eb 100644
+--- a/xen/arch/x86/include/asm/setup.h
++++ b/xen/arch/x86/include/asm/setup.h
+@@ -38,6 +38,8 @@ void *bootstrap_map(const module_t *mod);
+
+ int xen_in_range(unsigned long mfn);
+
++int cf_check stub_selftest(void);
++
+ extern uint8_t kbd_shift_flags;
+
+ #ifdef NDEBUG
+diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
+index 25017b5d96..f2592c3dc9 100644
+--- a/xen/arch/x86/setup.c
++++ b/xen/arch/x86/setup.c
+@@ -738,6 +738,10 @@ static void noreturn init_done(void)
+
+ system_state = SYS_STATE_active;
+
++ /* Re-run stub recovery self-tests with CET-SS active. */
++ if ( IS_ENABLED(CONFIG_DEBUG) && cpu_has_xen_shstk )
++ stub_selftest();
++
+ domain_unpause_by_systemcontroller(dom0);
+
+ /* MUST be done prior to removing .init data. */
+--
+2.45.2
+
diff --git a/0037-hvmloader-PCI-skip-huge-BARs-in-certain-calculations.patch b/0037-hvmloader-PCI-skip-huge-BARs-in-certain-calculations.patch
deleted file mode 100644
index a46f913..0000000
--- a/0037-hvmloader-PCI-skip-huge-BARs-in-certain-calculations.patch
+++ /dev/null
@@ -1,99 +0,0 @@
-From 1e9808227c10717228969e924cab49cad4af6265 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 12 Mar 2024 12:08:48 +0100
-Subject: [PATCH 37/67] hvmloader/PCI: skip huge BARs in certain calculations
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-BARs of size 2Gb and up can't possibly fit below 4Gb: Both the bottom of
-the lower 2Gb range and the top of the higher 2Gb range have special
-purpose. Don't even have them influence whether to (perhaps) relocate
-low RAM.
-
-Reported-by: Neowutran <xen@neowutran.ovh>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Roger Pau Monné <roger.pau@citrix.com>
-master commit: 57acad12a09ffa490e870ebe17596aad858f0191
-master date: 2024-03-06 10:19:29 +0100
----
- tools/firmware/hvmloader/pci.c | 28 ++++++++++++++++++++--------
- 1 file changed, 20 insertions(+), 8 deletions(-)
-
-diff --git a/tools/firmware/hvmloader/pci.c b/tools/firmware/hvmloader/pci.c
-index 257a6feb61..c3c61ca060 100644
---- a/tools/firmware/hvmloader/pci.c
-+++ b/tools/firmware/hvmloader/pci.c
-@@ -33,6 +33,13 @@ uint32_t pci_mem_start = HVM_BELOW_4G_MMIO_START;
- const uint32_t pci_mem_end = RESERVED_MEMBASE;
- uint64_t pci_hi_mem_start = 0, pci_hi_mem_end = 0;
-
-+/*
-+ * BARs larger than this value are put in 64-bit space unconditionally. That
-+ * is, such BARs also don't play into the determination of how big the lowmem
-+ * MMIO hole needs to be.
-+ */
-+#define BAR_RELOC_THRESH GB(1)
-+
- enum virtual_vga virtual_vga = VGA_none;
- unsigned long igd_opregion_pgbase = 0;
-
-@@ -286,9 +293,11 @@ void pci_setup(void)
- bars[i].bar_reg = bar_reg;
- bars[i].bar_sz = bar_sz;
-
-- if ( ((bar_data & PCI_BASE_ADDRESS_SPACE) ==
-- PCI_BASE_ADDRESS_SPACE_MEMORY) ||
-- (bar_reg == PCI_ROM_ADDRESS) )
-+ if ( is_64bar && bar_sz > BAR_RELOC_THRESH )
-+ bar64_relocate = 1;
-+ else if ( ((bar_data & PCI_BASE_ADDRESS_SPACE) ==
-+ PCI_BASE_ADDRESS_SPACE_MEMORY) ||
-+ (bar_reg == PCI_ROM_ADDRESS) )
- mmio_total += bar_sz;
-
- nr_bars++;
-@@ -367,7 +376,7 @@ void pci_setup(void)
- pci_mem_start = hvm_info->low_mem_pgend << PAGE_SHIFT;
- }
-
-- if ( mmio_total > (pci_mem_end - pci_mem_start) )
-+ if ( mmio_total > (pci_mem_end - pci_mem_start) || bar64_relocate )
- {
- printf("Low MMIO hole not large enough for all devices,"
- " relocating some BARs to 64-bit\n");
-@@ -430,7 +439,8 @@ void pci_setup(void)
-
- /*
- * Relocate to high memory if the total amount of MMIO needed
-- * is more than the low MMIO available. Because devices are
-+ * is more than the low MMIO available or BARs bigger than
-+ * BAR_RELOC_THRESH are present. Because devices are
- * processed in order of bar_sz, this will preferentially
- * relocate larger devices to high memory first.
- *
-@@ -446,8 +456,9 @@ void pci_setup(void)
- * the code here assumes it to be.)
- * Should either of those two conditions change, this code will break.
- */
-- using_64bar = bars[i].is_64bar && bar64_relocate
-- && (mmio_total > (mem_resource.max - mem_resource.base));
-+ using_64bar = bars[i].is_64bar && bar64_relocate &&
-+ (mmio_total > (mem_resource.max - mem_resource.base) ||
-+ bar_sz > BAR_RELOC_THRESH);
- bar_data = pci_readl(devfn, bar_reg);
-
- if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
-@@ -467,7 +478,8 @@ void pci_setup(void)
- resource = &mem_resource;
- bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
- }
-- mmio_total -= bar_sz;
-+ if ( bar_sz <= BAR_RELOC_THRESH )
-+ mmio_total -= bar_sz;
- }
- else
- {
---
-2.44.0
-
diff --git a/0037-tools-tests-don-t-let-test-xenstore-write-nodes-exce.patch b/0037-tools-tests-don-t-let-test-xenstore-write-nodes-exce.patch
new file mode 100644
index 0000000..cc7e47d
--- /dev/null
+++ b/0037-tools-tests-don-t-let-test-xenstore-write-nodes-exce.patch
@@ -0,0 +1,41 @@
+From 0ebfa35965257343ba3d8377be91ad8512a9c749 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 4 Jul 2024 14:06:54 +0200
+Subject: [PATCH 37/56] tools/tests: don't let test-xenstore write nodes
+ exceeding default size
+
+Today test-xenstore will write nodes with 3000 bytes node data. This
+size is exceeding the default quota for the allowed node size. While
+working in dom0 with C-xenstored, OCAML-xenstored does not like that.
+
+Use a size of 2000 instead, which is lower than the allowed default
+node size of 2048.
+
+Fixes: 3afc5e4a5b75 ("tools/tests: add xenstore testing framework")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 642005e310483c490b0725fab4672f2b77fdf2ba
+master date: 2024-05-02 18:15:31 +0100
+---
+ tools/tests/xenstore/test-xenstore.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tools/tests/xenstore/test-xenstore.c b/tools/tests/xenstore/test-xenstore.c
+index d491dac53b..73a7011d21 100644
+--- a/tools/tests/xenstore/test-xenstore.c
++++ b/tools/tests/xenstore/test-xenstore.c
+@@ -408,9 +408,9 @@ static int test_ta3_deinit(uintptr_t par)
+ #define TEST(s, f, p, l) { s, f ## _init, f, f ## _deinit, (uintptr_t)(p), l }
+ struct test tests[] = {
+ TEST("read 1", test_read, 1, "Read node with 1 byte data"),
+-TEST("read 3000", test_read, 3000, "Read node with 3000 bytes data"),
++TEST("read 2000", test_read, 2000, "Read node with 2000 bytes data"),
+ TEST("write 1", test_write, 1, "Write node with 1 byte data"),
+-TEST("write 3000", test_write, 3000, "Write node with 3000 bytes data"),
++TEST("write 2000", test_write, 2000, "Write node with 2000 bytes data"),
+ TEST("dir", test_dir, 0, "List directory"),
+ TEST("rm node", test_rm, 0, "Remove single node"),
+ TEST("rm dir", test_rm, WRITE_BUFFERS_N, "Remove node with sub-nodes"),
+--
+2.45.2
+
diff --git a/0038-tools-tests-let-test-xenstore-exit-with-non-0-status.patch b/0038-tools-tests-let-test-xenstore-exit-with-non-0-status.patch
new file mode 100644
index 0000000..ee0a497
--- /dev/null
+++ b/0038-tools-tests-let-test-xenstore-exit-with-non-0-status.patch
@@ -0,0 +1,57 @@
+From 22f623622cc60571be9cccc323a1d17749683667 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 4 Jul 2024 14:07:12 +0200
+Subject: [PATCH 38/56] tools/tests: let test-xenstore exit with non-0 status
+ in case of error
+
+In case a test is failing in test-xenstore, let the tool exit with an
+exit status other than 0.
+
+Fix a typo in an error message.
+
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Fixes: 3afc5e4a5b75 ("tools/tests: add xenstore testing framework")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+master commit: 2d4ba205591ba64f31149ae31051678159ee9e11
+master date: 2024-05-02 18:15:46 +0100
+---
+ tools/tests/xenstore/test-xenstore.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/tools/tests/xenstore/test-xenstore.c b/tools/tests/xenstore/test-xenstore.c
+index 73a7011d21..7a9bd9afb3 100644
+--- a/tools/tests/xenstore/test-xenstore.c
++++ b/tools/tests/xenstore/test-xenstore.c
+@@ -506,14 +506,14 @@ int main(int argc, char *argv[])
+ stop = time(NULL) + randtime;
+ srandom((unsigned int)stop);
+
+- while ( time(NULL) < stop )
++ while ( time(NULL) < stop && !ret )
+ {
+ t = random() % ARRAY_SIZE(tests);
+ ret = call_test(tests + t, iters, true);
+ }
+ }
+ else
+- for ( t = 0; t < ARRAY_SIZE(tests); t++ )
++ for ( t = 0; t < ARRAY_SIZE(tests) && !ret; t++ )
+ {
+ if ( !test || !strcmp(test, tests[t].name) )
+ ret = call_test(tests + t, iters, false);
+@@ -525,10 +525,10 @@ int main(int argc, char *argv[])
+ xs_close(xsh);
+
+ if ( ta_loops )
+- printf("Exhaustive transaction retries (%d) occurrred %d times.\n",
++ printf("Exhaustive transaction retries (%d) occurred %d times.\n",
+ MAX_TA_LOOPS, ta_loops);
+
+- return 0;
++ return ret ? 3 : 0;
+ }
+
+ /*
+--
+2.45.2
+
diff --git a/0038-x86-mm-fix-detection-of-last-L1-entry-in-modify_xen_.patch b/0038-x86-mm-fix-detection-of-last-L1-entry-in-modify_xen_.patch
deleted file mode 100644
index 66b4db3..0000000
--- a/0038-x86-mm-fix-detection-of-last-L1-entry-in-modify_xen_.patch
+++ /dev/null
@@ -1,41 +0,0 @@
-From 1f94117bec55a7b934fed3dfd3529db624eb441f Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 12 Mar 2024 12:08:59 +0100
-Subject: [PATCH 38/67] x86/mm: fix detection of last L1 entry in
- modify_xen_mappings_lite()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The current logic to detect when to switch to the next L1 table is incorrectly
-using l2_table_offset() in order to notice when the last entry on the current
-L1 table has been reached.
-
-It should instead use l1_table_offset() to check whether the index has wrapped
-to point to the first entry, and so the next L1 table should be used.
-
-Fixes: 8676092a0f16 ('x86/livepatch: Fix livepatch application when CET is active')
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 7c81558208de7858251b62f168a449be84305595
-master date: 2024-03-11 11:09:42 +0000
----
- xen/arch/x86/mm.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index e884a6fdbd..330c4abcd1 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -5963,7 +5963,7 @@ void init_or_livepatch modify_xen_mappings_lite(
-
- v += 1UL << L1_PAGETABLE_SHIFT;
-
-- if ( l2_table_offset(v) == 0 )
-+ if ( l1_table_offset(v) == 0 )
- break;
- }
-
---
-2.44.0
-
diff --git a/0039-LICENSES-Add-MIT-0-MIT-No-Attribution.patch b/0039-LICENSES-Add-MIT-0-MIT-No-Attribution.patch
new file mode 100644
index 0000000..8b2c4ec
--- /dev/null
+++ b/0039-LICENSES-Add-MIT-0-MIT-No-Attribution.patch
@@ -0,0 +1,58 @@
+From 75b4f9474a1aa33a6f9e0986b51c390f9b38ae5a Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 4 Jul 2024 14:08:11 +0200
+Subject: [PATCH 39/56] LICENSES: Add MIT-0 (MIT No Attribution)
+
+We are about to import code licensed under MIT-0. It's compatible for us to
+use, so identify it as a permitted license.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Acked-by: Christian Lindig <christian.lindig@cloud.com>
+master commit: 219cdff3fb7b4a03ab14869584f111e0f623b330
+master date: 2024-05-23 15:04:40 +0100
+---
+ LICENSES/MIT-0 | 31 +++++++++++++++++++++++++++++++
+ 1 file changed, 31 insertions(+)
+ create mode 100644 LICENSES/MIT-0
+
+diff --git a/LICENSES/MIT-0 b/LICENSES/MIT-0
+new file mode 100644
+index 0000000000..70fb90ee34
+--- /dev/null
++++ b/LICENSES/MIT-0
+@@ -0,0 +1,31 @@
++Valid-License-Identifier: MIT-0
++
++SPDX-URL: https://spdx.org/licenses/MIT-0.html
++
++Usage-Guide:
++
++ To use the MIT-0 License put the following SPDX tag/value pair into a
++ comment according to the placement guidelines in the licensing rules
++ documentation:
++ SPDX-License-Identifier: MIT-0
++
++License-Text:
++
++MIT No Attribution
++
++Copyright <year> <copyright holder>
++
++Permission is hereby granted, free of charge, to any person obtaining a copy
++of this software and associated documentation files (the "Software"), to deal
++in the Software without restriction, including without limitation the rights
++to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
++copies of the Software, and to permit persons to whom the Software is
++furnished to do so.
++
++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++SOFTWARE.
+--
+2.45.2
+
diff --git a/0039-x86-entry-Introduce-EFRAME_-constants.patch b/0039-x86-entry-Introduce-EFRAME_-constants.patch
deleted file mode 100644
index c280286..0000000
--- a/0039-x86-entry-Introduce-EFRAME_-constants.patch
+++ /dev/null
@@ -1,314 +0,0 @@
-From e691f99f17198906f813b85dcabafe5addb9a57a Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Sat, 27 Jan 2024 17:52:09 +0000
-Subject: [PATCH 39/67] x86/entry: Introduce EFRAME_* constants
-
-restore_all_guest() does a lot of manipulation of the stack after popping the
-GPRs, and uses raw %rsp displacements to do so. Also, almost all entrypaths
-use raw %rsp displacements prior to pushing GPRs.
-
-Provide better mnemonics, to aid readability and reduce the chance of errors
-when editing.
-
-No functional change. The resulting binary is identical.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 37541208f119a9c552c6c6c3246ea61be0d44035)
----
- xen/arch/x86/x86_64/asm-offsets.c | 17 ++++++++
- xen/arch/x86/x86_64/compat/entry.S | 2 +-
- xen/arch/x86/x86_64/entry.S | 70 +++++++++++++++---------------
- 3 files changed, 53 insertions(+), 36 deletions(-)
-
-diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
-index 287dac101a..31fa63b77f 100644
---- a/xen/arch/x86/x86_64/asm-offsets.c
-+++ b/xen/arch/x86/x86_64/asm-offsets.c
-@@ -51,6 +51,23 @@ void __dummy__(void)
- OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, es);
- BLANK();
-
-+ /*
-+ * EFRAME_* is for the entry/exit logic where %rsp is pointing at
-+ * UREGS_error_code and GPRs are still/already guest values.
-+ */
-+#define OFFSET_EF(sym, mem) \
-+ DEFINE(sym, offsetof(struct cpu_user_regs, mem) - \
-+ offsetof(struct cpu_user_regs, error_code))
-+
-+ OFFSET_EF(EFRAME_entry_vector, entry_vector);
-+ OFFSET_EF(EFRAME_rip, rip);
-+ OFFSET_EF(EFRAME_cs, cs);
-+ OFFSET_EF(EFRAME_eflags, eflags);
-+ OFFSET_EF(EFRAME_rsp, rsp);
-+ BLANK();
-+
-+#undef OFFSET_EF
-+
- OFFSET(VCPU_processor, struct vcpu, processor);
- OFFSET(VCPU_domain, struct vcpu, domain);
- OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info);
-diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
-index 253bb1688c..7c211314d8 100644
---- a/xen/arch/x86/x86_64/compat/entry.S
-+++ b/xen/arch/x86/x86_64/compat/entry.S
-@@ -15,7 +15,7 @@ ENTRY(entry_int82)
- ENDBR64
- ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
- pushq $0
-- movl $HYPERCALL_VECTOR, 4(%rsp)
-+ movl $HYPERCALL_VECTOR, EFRAME_entry_vector(%rsp)
- SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */
-
- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
-diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
-index 585b0c9551..412cbeb3ec 100644
---- a/xen/arch/x86/x86_64/entry.S
-+++ b/xen/arch/x86/x86_64/entry.S
-@@ -190,15 +190,15 @@ restore_all_guest:
- SPEC_CTRL_EXIT_TO_PV /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
-
- RESTORE_ALL
-- testw $TRAP_syscall,4(%rsp)
-+ testw $TRAP_syscall, EFRAME_entry_vector(%rsp)
- jz iret_exit_to_guest
-
-- movq 24(%rsp),%r11 # RFLAGS
-+ mov EFRAME_eflags(%rsp), %r11
- andq $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), %r11
- orq $X86_EFLAGS_IF,%r11
-
- /* Don't use SYSRET path if the return address is not canonical. */
-- movq 8(%rsp),%rcx
-+ mov EFRAME_rip(%rsp), %rcx
- sarq $47,%rcx
- incl %ecx
- cmpl $1,%ecx
-@@ -213,20 +213,20 @@ restore_all_guest:
- ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK
- #endif
-
-- movq 8(%rsp), %rcx # RIP
-- cmpw $FLAT_USER_CS32,16(%rsp)# CS
-- movq 32(%rsp),%rsp # RSP
-+ mov EFRAME_rip(%rsp), %rcx
-+ cmpw $FLAT_USER_CS32, EFRAME_cs(%rsp)
-+ mov EFRAME_rsp(%rsp), %rsp
- je 1f
- sysretq
- 1: sysretl
-
- ALIGN
- .Lrestore_rcx_iret_exit_to_guest:
-- movq 8(%rsp), %rcx # RIP
-+ mov EFRAME_rip(%rsp), %rcx
- /* No special register assumptions. */
- iret_exit_to_guest:
-- andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), 24(%rsp)
-- orl $X86_EFLAGS_IF,24(%rsp)
-+ andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp)
-+ orl $X86_EFLAGS_IF, EFRAME_eflags(%rsp)
- addq $8,%rsp
- .Lft0: iretq
- _ASM_PRE_EXTABLE(.Lft0, handle_exception)
-@@ -257,7 +257,7 @@ ENTRY(lstar_enter)
- pushq $FLAT_KERNEL_CS64
- pushq %rcx
- pushq $0
-- movl $TRAP_syscall, 4(%rsp)
-+ movl $TRAP_syscall, EFRAME_entry_vector(%rsp)
- SAVE_ALL
-
- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
-@@ -294,7 +294,7 @@ ENTRY(cstar_enter)
- pushq $FLAT_USER_CS32
- pushq %rcx
- pushq $0
-- movl $TRAP_syscall, 4(%rsp)
-+ movl $TRAP_syscall, EFRAME_entry_vector(%rsp)
- SAVE_ALL
-
- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
-@@ -335,7 +335,7 @@ GLOBAL(sysenter_eflags_saved)
- pushq $3 /* ring 3 null cs */
- pushq $0 /* null rip */
- pushq $0
-- movl $TRAP_syscall, 4(%rsp)
-+ movl $TRAP_syscall, EFRAME_entry_vector(%rsp)
- SAVE_ALL
-
- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
-@@ -389,7 +389,7 @@ ENTRY(int80_direct_trap)
- ENDBR64
- ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
- pushq $0
-- movl $0x80, 4(%rsp)
-+ movl $0x80, EFRAME_entry_vector(%rsp)
- SAVE_ALL
-
- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
-@@ -649,7 +649,7 @@ ret_from_intr:
- .section .init.text, "ax", @progbits
- ENTRY(early_page_fault)
- ENDBR64
-- movl $TRAP_page_fault, 4(%rsp)
-+ movl $TRAP_page_fault, EFRAME_entry_vector(%rsp)
- SAVE_ALL
- movq %rsp, %rdi
- call do_early_page_fault
-@@ -716,7 +716,7 @@ ENTRY(common_interrupt)
-
- ENTRY(page_fault)
- ENDBR64
-- movl $TRAP_page_fault,4(%rsp)
-+ movl $TRAP_page_fault, EFRAME_entry_vector(%rsp)
- /* No special register assumptions. */
- GLOBAL(handle_exception)
- ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
-@@ -892,90 +892,90 @@ FATAL_exception_with_ints_disabled:
- ENTRY(divide_error)
- ENDBR64
- pushq $0
-- movl $TRAP_divide_error,4(%rsp)
-+ movl $TRAP_divide_error, EFRAME_entry_vector(%rsp)
- jmp handle_exception
-
- ENTRY(coprocessor_error)
- ENDBR64
- pushq $0
-- movl $TRAP_copro_error,4(%rsp)
-+ movl $TRAP_copro_error, EFRAME_entry_vector(%rsp)
- jmp handle_exception
-
- ENTRY(simd_coprocessor_error)
- ENDBR64
- pushq $0
-- movl $TRAP_simd_error,4(%rsp)
-+ movl $TRAP_simd_error, EFRAME_entry_vector(%rsp)
- jmp handle_exception
-
- ENTRY(device_not_available)
- ENDBR64
- pushq $0
-- movl $TRAP_no_device,4(%rsp)
-+ movl $TRAP_no_device, EFRAME_entry_vector(%rsp)
- jmp handle_exception
-
- ENTRY(debug)
- ENDBR64
- pushq $0
-- movl $TRAP_debug,4(%rsp)
-+ movl $TRAP_debug, EFRAME_entry_vector(%rsp)
- jmp handle_ist_exception
-
- ENTRY(int3)
- ENDBR64
- pushq $0
-- movl $TRAP_int3,4(%rsp)
-+ movl $TRAP_int3, EFRAME_entry_vector(%rsp)
- jmp handle_exception
-
- ENTRY(overflow)
- ENDBR64
- pushq $0
-- movl $TRAP_overflow,4(%rsp)
-+ movl $TRAP_overflow, EFRAME_entry_vector(%rsp)
- jmp handle_exception
-
- ENTRY(bounds)
- ENDBR64
- pushq $0
-- movl $TRAP_bounds,4(%rsp)
-+ movl $TRAP_bounds, EFRAME_entry_vector(%rsp)
- jmp handle_exception
-
- ENTRY(invalid_op)
- ENDBR64
- pushq $0
-- movl $TRAP_invalid_op,4(%rsp)
-+ movl $TRAP_invalid_op, EFRAME_entry_vector(%rsp)
- jmp handle_exception
-
- ENTRY(invalid_TSS)
- ENDBR64
-- movl $TRAP_invalid_tss,4(%rsp)
-+ movl $TRAP_invalid_tss, EFRAME_entry_vector(%rsp)
- jmp handle_exception
-
- ENTRY(segment_not_present)
- ENDBR64
-- movl $TRAP_no_segment,4(%rsp)
-+ movl $TRAP_no_segment, EFRAME_entry_vector(%rsp)
- jmp handle_exception
-
- ENTRY(stack_segment)
- ENDBR64
-- movl $TRAP_stack_error,4(%rsp)
-+ movl $TRAP_stack_error, EFRAME_entry_vector(%rsp)
- jmp handle_exception
-
- ENTRY(general_protection)
- ENDBR64
-- movl $TRAP_gp_fault,4(%rsp)
-+ movl $TRAP_gp_fault, EFRAME_entry_vector(%rsp)
- jmp handle_exception
-
- ENTRY(alignment_check)
- ENDBR64
-- movl $TRAP_alignment_check,4(%rsp)
-+ movl $TRAP_alignment_check, EFRAME_entry_vector(%rsp)
- jmp handle_exception
-
- ENTRY(entry_CP)
- ENDBR64
-- movl $X86_EXC_CP, 4(%rsp)
-+ movl $X86_EXC_CP, EFRAME_entry_vector(%rsp)
- jmp handle_exception
-
- ENTRY(double_fault)
- ENDBR64
-- movl $TRAP_double_fault,4(%rsp)
-+ movl $TRAP_double_fault, EFRAME_entry_vector(%rsp)
- /* Set AC to reduce chance of further SMAP faults */
- ALTERNATIVE "", stac, X86_FEATURE_XEN_SMAP
- SAVE_ALL
-@@ -1001,7 +1001,7 @@ ENTRY(double_fault)
- ENTRY(nmi)
- ENDBR64
- pushq $0
-- movl $TRAP_nmi,4(%rsp)
-+ movl $TRAP_nmi, EFRAME_entry_vector(%rsp)
- handle_ist_exception:
- ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
- SAVE_ALL
-@@ -1134,7 +1134,7 @@ handle_ist_exception:
- ENTRY(machine_check)
- ENDBR64
- pushq $0
-- movl $TRAP_machine_check,4(%rsp)
-+ movl $TRAP_machine_check, EFRAME_entry_vector(%rsp)
- jmp handle_ist_exception
-
- /* No op trap handler. Required for kexec crash path. */
-@@ -1171,7 +1171,7 @@ autogen_stubs: /* Automatically generated stubs. */
- 1:
- ENDBR64
- pushq $0
-- movb $vec,4(%rsp)
-+ movb $vec, EFRAME_entry_vector(%rsp)
- jmp common_interrupt
-
- entrypoint 1b
-@@ -1185,7 +1185,7 @@ autogen_stubs: /* Automatically generated stubs. */
- test $8,%spl /* 64bit exception frames are 16 byte aligned, but the word */
- jz 2f /* size is 8 bytes. Check whether the processor gave us an */
- pushq $0 /* error code, and insert an empty one if not. */
--2: movb $vec,4(%rsp)
-+2: movb $vec, EFRAME_entry_vector(%rsp)
- jmp handle_exception
-
- entrypoint 1b
---
-2.44.0
-
diff --git a/0040-tools-Import-stand-alone-sd_notify-implementation-fr.patch b/0040-tools-Import-stand-alone-sd_notify-implementation-fr.patch
new file mode 100644
index 0000000..990158d
--- /dev/null
+++ b/0040-tools-Import-stand-alone-sd_notify-implementation-fr.patch
@@ -0,0 +1,130 @@
+From 1743102a92479834c8e17b20697129e05b7c8313 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 4 Jul 2024 14:10:10 +0200
+Subject: [PATCH 40/56] tools: Import stand-alone sd_notify() implementation
+ from systemd
+
+... in order to avoid linking against the whole of libsystemd.
+
+Only minimal changes to the upstream copy, to function as a drop-in
+replacement for sd_notify() and as a header-only library.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Acked-by: Christian Lindig <christian.lindig@cloud.com>
+master commit: 78510f3a1522f2856330ffa429e0e35f8aab4277
+master date: 2024-05-23 15:04:40 +0100
+master commit: 78510f3a1522f2856330ffa429e0e35f8aab4277
+master date: 2024-05-23 15:04:40 +0100
+---
+ tools/include/xen-sd-notify.h | 98 +++++++++++++++++++++++++++++++++++
+ 1 file changed, 98 insertions(+)
+ create mode 100644 tools/include/xen-sd-notify.h
+
+diff --git a/tools/include/xen-sd-notify.h b/tools/include/xen-sd-notify.h
+new file mode 100644
+index 0000000000..28c9b20f15
+--- /dev/null
++++ b/tools/include/xen-sd-notify.h
+@@ -0,0 +1,98 @@
++/* SPDX-License-Identifier: MIT-0 */
++
++/*
++ * Implement the systemd notify protocol without external dependencies.
++ * Supports both readiness notification on startup and on reloading,
++ * according to the protocol defined at:
++ * https://www.freedesktop.org/software/systemd/man/latest/sd_notify.html
++ * This protocol is guaranteed to be stable as per:
++ * https://systemd.io/PORTABILITY_AND_STABILITY/
++ *
++ * Differences from the upstream copy:
++ * - Rename/rework as a drop-in replacement for systemd/sd-daemon.h
++ * - Only take the subset Xen cares about
++ * - Respect -Wdeclaration-after-statement
++ */
++
++#ifndef XEN_SD_NOTIFY
++#define XEN_SD_NOTIFY
++
++#include <errno.h>
++#include <stddef.h>
++#include <stdlib.h>
++#include <sys/socket.h>
++#include <sys/un.h>
++#include <unistd.h>
++
++static inline void xen_sd_closep(int *fd) {
++ if (!fd || *fd < 0)
++ return;
++
++ close(*fd);
++ *fd = -1;
++}
++
++static inline int xen_sd_notify(const char *message) {
++ union sockaddr_union {
++ struct sockaddr sa;
++ struct sockaddr_un sun;
++ } socket_addr = {
++ .sun.sun_family = AF_UNIX,
++ };
++ size_t path_length, message_length;
++ ssize_t written;
++ const char *socket_path;
++ int __attribute__((cleanup(xen_sd_closep))) fd = -1;
++
++ /* Verify the argument first */
++ if (!message)
++ return -EINVAL;
++
++ message_length = strlen(message);
++ if (message_length == 0)
++ return -EINVAL;
++
++ /* If the variable is not set, the protocol is a noop */
++ socket_path = getenv("NOTIFY_SOCKET");
++ if (!socket_path)
++ return 0; /* Not set? Nothing to do */
++
++ /* Only AF_UNIX is supported, with path or abstract sockets */
++ if (socket_path[0] != '/' && socket_path[0] != '@')
++ return -EAFNOSUPPORT;
++
++ path_length = strlen(socket_path);
++ /* Ensure there is room for NUL byte */
++ if (path_length >= sizeof(socket_addr.sun.sun_path))
++ return -E2BIG;
++
++ memcpy(socket_addr.sun.sun_path, socket_path, path_length);
++
++ /* Support for abstract socket */
++ if (socket_addr.sun.sun_path[0] == '@')
++ socket_addr.sun.sun_path[0] = 0;
++
++ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0);
++ if (fd < 0)
++ return -errno;
++
++ if (connect(fd, &socket_addr.sa, offsetof(struct sockaddr_un, sun_path) + path_length) != 0)
++ return -errno;
++
++ written = write(fd, message, message_length);
++ if (written != (ssize_t) message_length)
++ return written < 0 ? -errno : -EPROTO;
++
++ return 1; /* Notified! */
++}
++
++static inline int sd_notify(int unset_environment, const char *message) {
++ int r = xen_sd_notify(message);
++
++ if (unset_environment)
++ unsetenv("NOTIFY_SOCKET");
++
++ return r;
++}
++
++#endif /* XEN_SD_NOTIFY */
+--
+2.45.2
+
diff --git a/0040-x86-Resync-intel-family.h-from-Linux.patch b/0040-x86-Resync-intel-family.h-from-Linux.patch
deleted file mode 100644
index 84e0304..0000000
--- a/0040-x86-Resync-intel-family.h-from-Linux.patch
+++ /dev/null
@@ -1,98 +0,0 @@
-From abc43cf5a6579f1aa0decf0a2349cdd2d2473117 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 27 Feb 2024 16:07:39 +0000
-Subject: [PATCH 40/67] x86: Resync intel-family.h from Linux
-
-From v6.8-rc6
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 195e75371b13c4f7ecdf7b5c50aed0d02f2d7ce8)
----
- xen/arch/x86/include/asm/intel-family.h | 38 ++++++++++++++++++++++---
- 1 file changed, 34 insertions(+), 4 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/intel-family.h b/xen/arch/x86/include/asm/intel-family.h
-index ffc49151be..b65e9c46b9 100644
---- a/xen/arch/x86/include/asm/intel-family.h
-+++ b/xen/arch/x86/include/asm/intel-family.h
-@@ -26,6 +26,9 @@
- * _G - parts with extra graphics on
- * _X - regular server parts
- * _D - micro server parts
-+ * _N,_P - other mobile parts
-+ * _H - premium mobile parts
-+ * _S - other client parts
- *
- * Historical OPTDIFFs:
- *
-@@ -37,6 +40,9 @@
- * their own names :-(
- */
-
-+/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */
-+#define INTEL_FAM6_ANY X86_MODEL_ANY
-+
- #define INTEL_FAM6_CORE_YONAH 0x0E
-
- #define INTEL_FAM6_CORE2_MEROM 0x0F
-@@ -93,8 +99,6 @@
- #define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */
- #define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */
-
--#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */
--
- #define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */
-
- #define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */
-@@ -102,12 +106,31 @@
-
- #define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */
-
-+#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF
-+
-+#define INTEL_FAM6_GRANITERAPIDS_X 0xAD
-+#define INTEL_FAM6_GRANITERAPIDS_D 0xAE
-+
-+/* "Hybrid" Processors (P-Core/E-Core) */
-+
-+#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */
-+
- #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
- #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
-
--#define INTEL_FAM6_RAPTORLAKE 0xB7
-+#define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */
-+#define INTEL_FAM6_RAPTORLAKE_P 0xBA
-+#define INTEL_FAM6_RAPTORLAKE_S 0xBF
-+
-+#define INTEL_FAM6_METEORLAKE 0xAC
-+#define INTEL_FAM6_METEORLAKE_L 0xAA
-+
-+#define INTEL_FAM6_ARROWLAKE_H 0xC5
-+#define INTEL_FAM6_ARROWLAKE 0xC6
-+
-+#define INTEL_FAM6_LUNARLAKE_M 0xBD
-
--/* "Small Core" Processors (Atom) */
-+/* "Small Core" Processors (Atom/E-Core) */
-
- #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
- #define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */
-@@ -134,6 +157,13 @@
- #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */
- #define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */
-
-+#define INTEL_FAM6_ATOM_GRACEMONT 0xBE /* Alderlake N */
-+
-+#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */
-+#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */
-+
-+#define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */
-+
- /* Xeon Phi */
-
- #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
---
-2.44.0
-
diff --git a/0041-tools-c-o-xenstored-Don-t-link-against-libsystemd.patch b/0041-tools-c-o-xenstored-Don-t-link-against-libsystemd.patch
new file mode 100644
index 0000000..5bf3f98
--- /dev/null
+++ b/0041-tools-c-o-xenstored-Don-t-link-against-libsystemd.patch
@@ -0,0 +1,87 @@
+From 77cf215157d267a7776f3c4ec32e89064dcd84cd Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 4 Jul 2024 14:10:29 +0200
+Subject: [PATCH 41/56] tools/{c,o}xenstored: Don't link against libsystemd
+
+Use the local freestanding wrapper instead.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Acked-by: Christian Lindig <christian.lindig@cloud.com>
+master commit: caf864482689a5dd6a945759b6372bb260d49665
+master date: 2024-05-23 15:04:40 +0100
+---
+ tools/ocaml/xenstored/Makefile | 3 +--
+ tools/ocaml/xenstored/systemd_stubs.c | 2 +-
+ tools/xenstored/Makefile | 5 -----
+ tools/xenstored/core.c | 4 ++--
+ 4 files changed, 4 insertions(+), 10 deletions(-)
+
+diff --git a/tools/ocaml/xenstored/Makefile b/tools/ocaml/xenstored/Makefile
+index e8aaecf2e6..fa45305d8c 100644
+--- a/tools/ocaml/xenstored/Makefile
++++ b/tools/ocaml/xenstored/Makefile
+@@ -4,8 +4,7 @@ include $(OCAML_TOPLEVEL)/common.make
+
+ # Include configure output (config.h)
+ CFLAGS += -include $(XEN_ROOT)/tools/config.h
+-CFLAGS-$(CONFIG_SYSTEMD) += $(SYSTEMD_CFLAGS)
+-LDFLAGS-$(CONFIG_SYSTEMD) += $(SYSTEMD_LIBS)
++CFLAGS-$(CONFIG_SYSTEMD) += $(CFLAGS_xeninclude)
+
+ CFLAGS += $(CFLAGS-y)
+ CFLAGS += $(APPEND_CFLAGS)
+diff --git a/tools/ocaml/xenstored/systemd_stubs.c b/tools/ocaml/xenstored/systemd_stubs.c
+index f4c875075a..7dbbdd35bf 100644
+--- a/tools/ocaml/xenstored/systemd_stubs.c
++++ b/tools/ocaml/xenstored/systemd_stubs.c
+@@ -25,7 +25,7 @@
+
+ #if defined(HAVE_SYSTEMD)
+
+-#include <systemd/sd-daemon.h>
++#include <xen-sd-notify.h>
+
+ CAMLprim value ocaml_sd_notify_ready(value ignore)
+ {
+diff --git a/tools/xenstored/Makefile b/tools/xenstored/Makefile
+index e0897ed1ba..09adfe1d50 100644
+--- a/tools/xenstored/Makefile
++++ b/tools/xenstored/Makefile
+@@ -9,11 +9,6 @@ xenstored: LDLIBS += $(LDLIBS_libxenctrl)
+ xenstored: LDLIBS += -lrt
+ xenstored: LDLIBS += $(SOCKET_LIBS)
+
+-ifeq ($(CONFIG_SYSTEMD),y)
+-$(XENSTORED_OBJS-y): CFLAGS += $(SYSTEMD_CFLAGS)
+-xenstored: LDLIBS += $(SYSTEMD_LIBS)
+-endif
+-
+ TARGETS := xenstored
+
+ .PHONY: all
+diff --git a/tools/xenstored/core.c b/tools/xenstored/core.c
+index edd07711db..dfe98e7bfc 100644
+--- a/tools/xenstored/core.c
++++ b/tools/xenstored/core.c
+@@ -61,7 +61,7 @@
+ #endif
+
+ #if defined(XEN_SYSTEMD_ENABLED)
+-#include <systemd/sd-daemon.h>
++#include <xen-sd-notify.h>
+ #endif
+
+ extern xenevtchn_handle *xce_handle; /* in domain.c */
+@@ -3000,7 +3000,7 @@ int main(int argc, char *argv[])
+ #if defined(XEN_SYSTEMD_ENABLED)
+ if (!live_update) {
+ sd_notify(1, "READY=1");
+- fprintf(stderr, SD_NOTICE "xenstored is ready\n");
++ fprintf(stderr, "xenstored is ready\n");
+ }
+ #endif
+
+--
+2.45.2
+
diff --git a/0041-x86-vmx-Perform-VERW-flushing-later-in-the-VMExit-pa.patch b/0041-x86-vmx-Perform-VERW-flushing-later-in-the-VMExit-pa.patch
deleted file mode 100644
index 871f10f..0000000
--- a/0041-x86-vmx-Perform-VERW-flushing-later-in-the-VMExit-pa.patch
+++ /dev/null
@@ -1,146 +0,0 @@
-From 77f2bec134049aba29b9b459f955022722d10847 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 23 Jun 2023 11:32:00 +0100
-Subject: [PATCH 41/67] x86/vmx: Perform VERW flushing later in the VMExit path
-
-Broken out of the following patch because this change is subtle enough on its
-own. See it for the rational of why we're moving VERW.
-
-As for how, extend the trick already used to hold one condition in
-flags (RESUME vs LAUNCH) through the POPing of GPRs.
-
-Move the MOV CR earlier. Intel specify flags to be undefined across it.
-
-Encode the two conditions we want using SF and PF. See the code comment for
-exactly how.
-
-Leave a comment to explain the lack of any content around
-SPEC_CTRL_EXIT_TO_VMX, but leave the block in place. Sods law says if we
-delete it, we'll need to reintroduce it.
-
-This is part of XSA-452 / CVE-2023-28746.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 475fa20b7384464210f42bad7195f87bd6f1c63f)
----
- xen/arch/x86/hvm/vmx/entry.S | 36 +++++++++++++++++++++---
- xen/arch/x86/include/asm/asm_defns.h | 8 ++++++
- xen/arch/x86/include/asm/spec_ctrl_asm.h | 7 +++++
- xen/arch/x86/x86_64/asm-offsets.c | 1 +
- 4 files changed, 48 insertions(+), 4 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
-index 5f5de45a13..cdde76e138 100644
---- a/xen/arch/x86/hvm/vmx/entry.S
-+++ b/xen/arch/x86/hvm/vmx/entry.S
-@@ -87,17 +87,39 @@ UNLIKELY_END(realmode)
-
- /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
- /* SPEC_CTRL_EXIT_TO_VMX Req: %rsp=regs/cpuinfo Clob: */
-- DO_SPEC_CTRL_COND_VERW
-+ /*
-+ * All speculation safety work happens to be elsewhere. VERW is after
-+ * popping the GPRs, while restoring the guest MSR_SPEC_CTRL is left
-+ * to the MSR load list.
-+ */
-
- mov VCPU_hvm_guest_cr2(%rbx),%rax
-+ mov %rax, %cr2
-+
-+ /*
-+ * We need to perform two conditional actions (VERW, and Resume vs
-+ * Launch) after popping GPRs. With some cunning, we can encode both
-+ * of these in eflags together.
-+ *
-+ * Parity is only calculated over the bottom byte of the answer, while
-+ * Sign is simply the top bit.
-+ *
-+ * Therefore, the final OR instruction ends up producing:
-+ * SF = VCPU_vmx_launched
-+ * PF = !SCF_verw
-+ */
-+ BUILD_BUG_ON(SCF_verw & ~0xff)
-+ movzbl VCPU_vmx_launched(%rbx), %ecx
-+ shl $31, %ecx
-+ movzbl CPUINFO_spec_ctrl_flags(%rsp), %eax
-+ and $SCF_verw, %eax
-+ or %eax, %ecx
-
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- pop %rbp
-- mov %rax,%cr2
-- cmpb $0,VCPU_vmx_launched(%rbx)
- pop %rbx
- pop %r11
- pop %r10
-@@ -108,7 +130,13 @@ UNLIKELY_END(realmode)
- pop %rdx
- pop %rsi
- pop %rdi
-- je .Lvmx_launch
-+
-+ jpe .L_skip_verw
-+ /* VERW clobbers ZF, but preserves all others, including SF. */
-+ verw STK_REL(CPUINFO_verw_sel, CPUINFO_error_code)(%rsp)
-+.L_skip_verw:
-+
-+ jns .Lvmx_launch
-
- /*.Lvmx_resume:*/
- VMRESUME
-diff --git a/xen/arch/x86/include/asm/asm_defns.h b/xen/arch/x86/include/asm/asm_defns.h
-index d9431180cf..abc6822b08 100644
---- a/xen/arch/x86/include/asm/asm_defns.h
-+++ b/xen/arch/x86/include/asm/asm_defns.h
-@@ -81,6 +81,14 @@ register unsigned long current_stack_pointer asm("rsp");
-
- #ifdef __ASSEMBLY__
-
-+.macro BUILD_BUG_ON condstr, cond:vararg
-+ .if \cond
-+ .error "Condition \"\condstr\" not satisfied"
-+ .endif
-+.endm
-+/* preprocessor macro to make error message more user friendly */
-+#define BUILD_BUG_ON(cond) BUILD_BUG_ON #cond, cond
-+
- #ifdef HAVE_AS_QUOTED_SYM
- #define SUBSECTION_LBL(tag) \
- .ifndef .L.tag; \
-diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-index f4b8b9d956..ca9cb0f5dd 100644
---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
-+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-@@ -164,6 +164,13 @@
- #endif
- .endm
-
-+/*
-+ * Helper to improve the readibility of stack dispacements with %rsp in
-+ * unusual positions. Both @field and @top_of_stack should be constants from
-+ * the same object. @top_of_stack should be where %rsp is currently pointing.
-+ */
-+#define STK_REL(field, top_of_stk) ((field) - (top_of_stk))
-+
- .macro DO_SPEC_CTRL_COND_VERW
- /*
- * Requires %rsp=cpuinfo
-diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
-index 31fa63b77f..a4e94d6930 100644
---- a/xen/arch/x86/x86_64/asm-offsets.c
-+++ b/xen/arch/x86/x86_64/asm-offsets.c
-@@ -135,6 +135,7 @@ void __dummy__(void)
- #endif
-
- OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs);
-+ OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code);
- OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel);
- OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
- OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset);
---
-2.44.0
-
diff --git a/0042-tools-Drop-libsystemd-as-a-dependency.patch b/0042-tools-Drop-libsystemd-as-a-dependency.patch
new file mode 100644
index 0000000..168680e
--- /dev/null
+++ b/0042-tools-Drop-libsystemd-as-a-dependency.patch
@@ -0,0 +1,648 @@
+From 7967bd358e93ed83e01813a8d0dfd68aa67f5780 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 4 Jul 2024 14:10:40 +0200
+Subject: [PATCH 42/56] tools: Drop libsystemd as a dependency
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+There are no more users, and we want to disuade people from introducing new
+users just for sd_notify() and friends. Drop the dependency.
+
+We still want the overall --with{,out}-systemd to gate the generation of the
+service/unit/mount/etc files.
+
+Rerun autogen.sh, and mark the dependency as removed in the build containers.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Acked-by: Christian Lindig <christian.lindig@cloud.com>
+
+tools: (Actually) drop libsystemd as a dependency
+
+When reinstating some of systemd.m4 between v1 and v2, I reintroduced a little
+too much. While {c,o}xenstored are indeed no longer linked against
+libsystemd, ./configure still looks for it.
+
+Drop this too.
+
+Fixes: ae26101f6bfc ("tools: Drop libsystemd as a dependency")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: ae26101f6bfc8185adcdb9165d469bdc467780db
+master date: 2024-05-23 15:04:40 +0100
+master commit: 6ef4fa1e7fe78c1dae07b451292b07facfce4902
+master date: 2024-05-30 12:15:25 +0100
+---
+ CHANGELOG.md | 7 +-
+ config/Tools.mk.in | 2 -
+ m4/systemd.m4 | 17 --
+ tools/configure | 485 +--------------------------------------------
+ 4 files changed, 7 insertions(+), 504 deletions(-)
+
+diff --git a/CHANGELOG.md b/CHANGELOG.md
+index fa54d59df1..ceca12eb5f 100644
+--- a/CHANGELOG.md
++++ b/CHANGELOG.md
+@@ -4,7 +4,12 @@ Notable changes to Xen will be documented in this file.
+
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
+
+-## [4.18.2](https://xenbits.xen.org/gitweb/?p=xen.git;a=shortlog;h=RELEASE-4.18.2)
++## [4.18.3](https://xenbits.xen.org/gitweb/?p=xen.git;a=shortlog;h=RELEASE-4.18.3)
++
++### Changed
++ - When building with Systemd support (./configure --enable-systemd), remove
++ libsystemd as a build dependency. Systemd Notify support is retained, now
++ using a standalone library implementation.
+
+ ## [4.18.1](https://xenbits.xen.org/gitweb/?p=xen.git;a=shortlog;h=RELEASE-4.18.1)
+
+diff --git a/config/Tools.mk.in b/config/Tools.mk.in
+index b54ab21f96..50fbef841f 100644
+--- a/config/Tools.mk.in
++++ b/config/Tools.mk.in
+@@ -52,8 +52,6 @@ CONFIG_PYGRUB := @pygrub@
+ CONFIG_LIBFSIMAGE := @libfsimage@
+
+ CONFIG_SYSTEMD := @systemd@
+-SYSTEMD_CFLAGS := @SYSTEMD_CFLAGS@
+-SYSTEMD_LIBS := @SYSTEMD_LIBS@
+ XEN_SYSTEMD_DIR := @SYSTEMD_DIR@
+ XEN_SYSTEMD_MODULES_LOAD := @SYSTEMD_MODULES_LOAD@
+ CONFIG_9PFS := @ninepfs@
+diff --git a/m4/systemd.m4 b/m4/systemd.m4
+index 112dc11b5e..ab12ea313d 100644
+--- a/m4/systemd.m4
++++ b/m4/systemd.m4
+@@ -41,15 +41,6 @@ AC_DEFUN([AX_ALLOW_SYSTEMD_OPTS], [
+ ])
+
+ AC_DEFUN([AX_CHECK_SYSTEMD_LIBS], [
+- PKG_CHECK_MODULES([SYSTEMD], [libsystemd-daemon],,
+- [PKG_CHECK_MODULES([SYSTEMD], [libsystemd >= 209])]
+- )
+- dnl pkg-config older than 0.24 does not set these for
+- dnl PKG_CHECK_MODULES() worth also noting is that as of version 208
+- dnl of systemd pkg-config --cflags currently yields no extra flags yet.
+- AC_SUBST([SYSTEMD_CFLAGS])
+- AC_SUBST([SYSTEMD_LIBS])
+-
+ AS_IF([test "x$SYSTEMD_DIR" = x], [
+ dnl In order to use the line below we need to fix upstream systemd
+ dnl to properly ${prefix} for child variables in
+@@ -95,13 +86,6 @@ AC_DEFUN([AX_CHECK_SYSTEMD], [
+ ],[systemd=n])
+ ])
+
+-AC_DEFUN([AX_CHECK_SYSTEMD_ENABLE_AVAILABLE], [
+- PKG_CHECK_MODULES([SYSTEMD], [libsystemd-daemon], [systemd="y"],[
+- PKG_CHECK_MODULES([SYSTEMD], [libsystemd >= 209],
+- [systemd="y"],[systemd="n"])
+- ])
+-])
+-
+ dnl Enables systemd by default and requires a --disable-systemd option flag
+ dnl to configure if you want to disable.
+ AC_DEFUN([AX_ENABLE_SYSTEMD], [
+@@ -121,6 +105,5 @@ dnl to have systemd build libraries it will be enabled. You can always force
+ dnl disable with --disable-systemd
+ AC_DEFUN([AX_AVAILABLE_SYSTEMD], [
+ AX_ALLOW_SYSTEMD_OPTS()
+- AX_CHECK_SYSTEMD_ENABLE_AVAILABLE()
+ AX_CHECK_SYSTEMD()
+ ])
+diff --git a/tools/configure b/tools/configure
+index 38c0808d3a..7bb935d23b 100755
+--- a/tools/configure
++++ b/tools/configure
+@@ -626,8 +626,6 @@ ac_subst_vars='LTLIBOBJS
+ LIBOBJS
+ pvshim
+ ninepfs
+-SYSTEMD_LIBS
+-SYSTEMD_CFLAGS
+ SYSTEMD_MODULES_LOAD
+ SYSTEMD_DIR
+ systemd
+@@ -864,9 +862,7 @@ pixman_LIBS
+ libzstd_CFLAGS
+ libzstd_LIBS
+ LIBNL3_CFLAGS
+-LIBNL3_LIBS
+-SYSTEMD_CFLAGS
+-SYSTEMD_LIBS'
++LIBNL3_LIBS'
+
+
+ # Initialize some variables set by options.
+@@ -1621,10 +1617,6 @@ Some influential environment variables:
+ LIBNL3_CFLAGS
+ C compiler flags for LIBNL3, overriding pkg-config
+ LIBNL3_LIBS linker flags for LIBNL3, overriding pkg-config
+- SYSTEMD_CFLAGS
+- C compiler flags for SYSTEMD, overriding pkg-config
+- SYSTEMD_LIBS
+- linker flags for SYSTEMD, overriding pkg-config
+
+ Use these variables to override the choices made by `configure' or to help
+ it to find libraries and programs with nonstandard names/locations.
+@@ -3889,8 +3881,6 @@ esac
+
+
+
+-
+-
+
+
+
+@@ -9540,223 +9530,6 @@ fi
+
+
+
+-
+-pkg_failed=no
+-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SYSTEMD" >&5
+-$as_echo_n "checking for SYSTEMD... " >&6; }
+-
+-if test -n "$SYSTEMD_CFLAGS"; then
+- pkg_cv_SYSTEMD_CFLAGS="$SYSTEMD_CFLAGS"
+- elif test -n "$PKG_CONFIG"; then
+- if test -n "$PKG_CONFIG" && \
+- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd-daemon\""; } >&5
+- ($PKG_CONFIG --exists --print-errors "libsystemd-daemon") 2>&5
+- ac_status=$?
+- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+- test $ac_status = 0; }; then
+- pkg_cv_SYSTEMD_CFLAGS=`$PKG_CONFIG --cflags "libsystemd-daemon" 2>/dev/null`
+- test "x$?" != "x0" && pkg_failed=yes
+-else
+- pkg_failed=yes
+-fi
+- else
+- pkg_failed=untried
+-fi
+-if test -n "$SYSTEMD_LIBS"; then
+- pkg_cv_SYSTEMD_LIBS="$SYSTEMD_LIBS"
+- elif test -n "$PKG_CONFIG"; then
+- if test -n "$PKG_CONFIG" && \
+- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd-daemon\""; } >&5
+- ($PKG_CONFIG --exists --print-errors "libsystemd-daemon") 2>&5
+- ac_status=$?
+- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+- test $ac_status = 0; }; then
+- pkg_cv_SYSTEMD_LIBS=`$PKG_CONFIG --libs "libsystemd-daemon" 2>/dev/null`
+- test "x$?" != "x0" && pkg_failed=yes
+-else
+- pkg_failed=yes
+-fi
+- else
+- pkg_failed=untried
+-fi
+-
+-
+-
+-if test $pkg_failed = yes; then
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+-$as_echo "no" >&6; }
+-
+-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+- _pkg_short_errors_supported=yes
+-else
+- _pkg_short_errors_supported=no
+-fi
+- if test $_pkg_short_errors_supported = yes; then
+- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsystemd-daemon" 2>&1`
+- else
+- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsystemd-daemon" 2>&1`
+- fi
+- # Put the nasty error message in config.log where it belongs
+- echo "$SYSTEMD_PKG_ERRORS" >&5
+-
+-
+-
+-pkg_failed=no
+-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SYSTEMD" >&5
+-$as_echo_n "checking for SYSTEMD... " >&6; }
+-
+-if test -n "$SYSTEMD_CFLAGS"; then
+- pkg_cv_SYSTEMD_CFLAGS="$SYSTEMD_CFLAGS"
+- elif test -n "$PKG_CONFIG"; then
+- if test -n "$PKG_CONFIG" && \
+- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5
+- ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5
+- ac_status=$?
+- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+- test $ac_status = 0; }; then
+- pkg_cv_SYSTEMD_CFLAGS=`$PKG_CONFIG --cflags "libsystemd >= 209" 2>/dev/null`
+- test "x$?" != "x0" && pkg_failed=yes
+-else
+- pkg_failed=yes
+-fi
+- else
+- pkg_failed=untried
+-fi
+-if test -n "$SYSTEMD_LIBS"; then
+- pkg_cv_SYSTEMD_LIBS="$SYSTEMD_LIBS"
+- elif test -n "$PKG_CONFIG"; then
+- if test -n "$PKG_CONFIG" && \
+- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5
+- ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5
+- ac_status=$?
+- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+- test $ac_status = 0; }; then
+- pkg_cv_SYSTEMD_LIBS=`$PKG_CONFIG --libs "libsystemd >= 209" 2>/dev/null`
+- test "x$?" != "x0" && pkg_failed=yes
+-else
+- pkg_failed=yes
+-fi
+- else
+- pkg_failed=untried
+-fi
+-
+-
+-
+-if test $pkg_failed = yes; then
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+-$as_echo "no" >&6; }
+-
+-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+- _pkg_short_errors_supported=yes
+-else
+- _pkg_short_errors_supported=no
+-fi
+- if test $_pkg_short_errors_supported = yes; then
+- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsystemd >= 209" 2>&1`
+- else
+- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsystemd >= 209" 2>&1`
+- fi
+- # Put the nasty error message in config.log where it belongs
+- echo "$SYSTEMD_PKG_ERRORS" >&5
+-
+- systemd="n"
+-elif test $pkg_failed = untried; then
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+-$as_echo "no" >&6; }
+- systemd="n"
+-else
+- SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS
+- SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+-$as_echo "yes" >&6; }
+- systemd="y"
+-fi
+-
+-elif test $pkg_failed = untried; then
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+-$as_echo "no" >&6; }
+-
+-
+-pkg_failed=no
+-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SYSTEMD" >&5
+-$as_echo_n "checking for SYSTEMD... " >&6; }
+-
+-if test -n "$SYSTEMD_CFLAGS"; then
+- pkg_cv_SYSTEMD_CFLAGS="$SYSTEMD_CFLAGS"
+- elif test -n "$PKG_CONFIG"; then
+- if test -n "$PKG_CONFIG" && \
+- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5
+- ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5
+- ac_status=$?
+- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+- test $ac_status = 0; }; then
+- pkg_cv_SYSTEMD_CFLAGS=`$PKG_CONFIG --cflags "libsystemd >= 209" 2>/dev/null`
+- test "x$?" != "x0" && pkg_failed=yes
+-else
+- pkg_failed=yes
+-fi
+- else
+- pkg_failed=untried
+-fi
+-if test -n "$SYSTEMD_LIBS"; then
+- pkg_cv_SYSTEMD_LIBS="$SYSTEMD_LIBS"
+- elif test -n "$PKG_CONFIG"; then
+- if test -n "$PKG_CONFIG" && \
+- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5
+- ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5
+- ac_status=$?
+- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+- test $ac_status = 0; }; then
+- pkg_cv_SYSTEMD_LIBS=`$PKG_CONFIG --libs "libsystemd >= 209" 2>/dev/null`
+- test "x$?" != "x0" && pkg_failed=yes
+-else
+- pkg_failed=yes
+-fi
+- else
+- pkg_failed=untried
+-fi
+-
+-
+-
+-if test $pkg_failed = yes; then
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+-$as_echo "no" >&6; }
+-
+-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+- _pkg_short_errors_supported=yes
+-else
+- _pkg_short_errors_supported=no
+-fi
+- if test $_pkg_short_errors_supported = yes; then
+- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsystemd >= 209" 2>&1`
+- else
+- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsystemd >= 209" 2>&1`
+- fi
+- # Put the nasty error message in config.log where it belongs
+- echo "$SYSTEMD_PKG_ERRORS" >&5
+-
+- systemd="n"
+-elif test $pkg_failed = untried; then
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+-$as_echo "no" >&6; }
+- systemd="n"
+-else
+- SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS
+- SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+-$as_echo "yes" >&6; }
+- systemd="y"
+-fi
+-
+-else
+- SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS
+- SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+-$as_echo "yes" >&6; }
+- systemd="y"
+-fi
+-
+-
+ if test "x$enable_systemd" != "xno"; then :
+
+ if test "x$systemd" = "xy" ; then :
+@@ -9766,262 +9539,6 @@ $as_echo "#define HAVE_SYSTEMD 1" >>confdefs.h
+
+ systemd=y
+
+-
+-pkg_failed=no
+-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SYSTEMD" >&5
+-$as_echo_n "checking for SYSTEMD... " >&6; }
+-
+-if test -n "$SYSTEMD_CFLAGS"; then
+- pkg_cv_SYSTEMD_CFLAGS="$SYSTEMD_CFLAGS"
+- elif test -n "$PKG_CONFIG"; then
+- if test -n "$PKG_CONFIG" && \
+- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd-daemon\""; } >&5
+- ($PKG_CONFIG --exists --print-errors "libsystemd-daemon") 2>&5
+- ac_status=$?
+- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+- test $ac_status = 0; }; then
+- pkg_cv_SYSTEMD_CFLAGS=`$PKG_CONFIG --cflags "libsystemd-daemon" 2>/dev/null`
+- test "x$?" != "x0" && pkg_failed=yes
+-else
+- pkg_failed=yes
+-fi
+- else
+- pkg_failed=untried
+-fi
+-if test -n "$SYSTEMD_LIBS"; then
+- pkg_cv_SYSTEMD_LIBS="$SYSTEMD_LIBS"
+- elif test -n "$PKG_CONFIG"; then
+- if test -n "$PKG_CONFIG" && \
+- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd-daemon\""; } >&5
+- ($PKG_CONFIG --exists --print-errors "libsystemd-daemon") 2>&5
+- ac_status=$?
+- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+- test $ac_status = 0; }; then
+- pkg_cv_SYSTEMD_LIBS=`$PKG_CONFIG --libs "libsystemd-daemon" 2>/dev/null`
+- test "x$?" != "x0" && pkg_failed=yes
+-else
+- pkg_failed=yes
+-fi
+- else
+- pkg_failed=untried
+-fi
+-
+-
+-
+-if test $pkg_failed = yes; then
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+-$as_echo "no" >&6; }
+-
+-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+- _pkg_short_errors_supported=yes
+-else
+- _pkg_short_errors_supported=no
+-fi
+- if test $_pkg_short_errors_supported = yes; then
+- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsystemd-daemon" 2>&1`
+- else
+- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsystemd-daemon" 2>&1`
+- fi
+- # Put the nasty error message in config.log where it belongs
+- echo "$SYSTEMD_PKG_ERRORS" >&5
+-
+-
+-pkg_failed=no
+-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SYSTEMD" >&5
+-$as_echo_n "checking for SYSTEMD... " >&6; }
+-
+-if test -n "$SYSTEMD_CFLAGS"; then
+- pkg_cv_SYSTEMD_CFLAGS="$SYSTEMD_CFLAGS"
+- elif test -n "$PKG_CONFIG"; then
+- if test -n "$PKG_CONFIG" && \
+- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5
+- ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5
+- ac_status=$?
+- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+- test $ac_status = 0; }; then
+- pkg_cv_SYSTEMD_CFLAGS=`$PKG_CONFIG --cflags "libsystemd >= 209" 2>/dev/null`
+- test "x$?" != "x0" && pkg_failed=yes
+-else
+- pkg_failed=yes
+-fi
+- else
+- pkg_failed=untried
+-fi
+-if test -n "$SYSTEMD_LIBS"; then
+- pkg_cv_SYSTEMD_LIBS="$SYSTEMD_LIBS"
+- elif test -n "$PKG_CONFIG"; then
+- if test -n "$PKG_CONFIG" && \
+- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5
+- ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5
+- ac_status=$?
+- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+- test $ac_status = 0; }; then
+- pkg_cv_SYSTEMD_LIBS=`$PKG_CONFIG --libs "libsystemd >= 209" 2>/dev/null`
+- test "x$?" != "x0" && pkg_failed=yes
+-else
+- pkg_failed=yes
+-fi
+- else
+- pkg_failed=untried
+-fi
+-
+-
+-
+-if test $pkg_failed = yes; then
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+-$as_echo "no" >&6; }
+-
+-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+- _pkg_short_errors_supported=yes
+-else
+- _pkg_short_errors_supported=no
+-fi
+- if test $_pkg_short_errors_supported = yes; then
+- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsystemd >= 209" 2>&1`
+- else
+- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsystemd >= 209" 2>&1`
+- fi
+- # Put the nasty error message in config.log where it belongs
+- echo "$SYSTEMD_PKG_ERRORS" >&5
+-
+- as_fn_error $? "Package requirements (libsystemd >= 209) were not met:
+-
+-$SYSTEMD_PKG_ERRORS
+-
+-Consider adjusting the PKG_CONFIG_PATH environment variable if you
+-installed software in a non-standard prefix.
+-
+-Alternatively, you may set the environment variables SYSTEMD_CFLAGS
+-and SYSTEMD_LIBS to avoid the need to call pkg-config.
+-See the pkg-config man page for more details." "$LINENO" 5
+-elif test $pkg_failed = untried; then
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+-$as_echo "no" >&6; }
+- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+-as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it
+-is in your PATH or set the PKG_CONFIG environment variable to the full
+-path to pkg-config.
+-
+-Alternatively, you may set the environment variables SYSTEMD_CFLAGS
+-and SYSTEMD_LIBS to avoid the need to call pkg-config.
+-See the pkg-config man page for more details.
+-
+-To get pkg-config, see <http://pkg-config.freedesktop.org/>.
+-See \`config.log' for more details" "$LINENO" 5; }
+-else
+- SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS
+- SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+-$as_echo "yes" >&6; }
+-
+-fi
+-
+-elif test $pkg_failed = untried; then
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+-$as_echo "no" >&6; }
+-
+-pkg_failed=no
+-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SYSTEMD" >&5
+-$as_echo_n "checking for SYSTEMD... " >&6; }
+-
+-if test -n "$SYSTEMD_CFLAGS"; then
+- pkg_cv_SYSTEMD_CFLAGS="$SYSTEMD_CFLAGS"
+- elif test -n "$PKG_CONFIG"; then
+- if test -n "$PKG_CONFIG" && \
+- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5
+- ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5
+- ac_status=$?
+- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+- test $ac_status = 0; }; then
+- pkg_cv_SYSTEMD_CFLAGS=`$PKG_CONFIG --cflags "libsystemd >= 209" 2>/dev/null`
+- test "x$?" != "x0" && pkg_failed=yes
+-else
+- pkg_failed=yes
+-fi
+- else
+- pkg_failed=untried
+-fi
+-if test -n "$SYSTEMD_LIBS"; then
+- pkg_cv_SYSTEMD_LIBS="$SYSTEMD_LIBS"
+- elif test -n "$PKG_CONFIG"; then
+- if test -n "$PKG_CONFIG" && \
+- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5
+- ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5
+- ac_status=$?
+- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+- test $ac_status = 0; }; then
+- pkg_cv_SYSTEMD_LIBS=`$PKG_CONFIG --libs "libsystemd >= 209" 2>/dev/null`
+- test "x$?" != "x0" && pkg_failed=yes
+-else
+- pkg_failed=yes
+-fi
+- else
+- pkg_failed=untried
+-fi
+-
+-
+-
+-if test $pkg_failed = yes; then
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+-$as_echo "no" >&6; }
+-
+-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+- _pkg_short_errors_supported=yes
+-else
+- _pkg_short_errors_supported=no
+-fi
+- if test $_pkg_short_errors_supported = yes; then
+- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsystemd >= 209" 2>&1`
+- else
+- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsystemd >= 209" 2>&1`
+- fi
+- # Put the nasty error message in config.log where it belongs
+- echo "$SYSTEMD_PKG_ERRORS" >&5
+-
+- as_fn_error $? "Package requirements (libsystemd >= 209) were not met:
+-
+-$SYSTEMD_PKG_ERRORS
+-
+-Consider adjusting the PKG_CONFIG_PATH environment variable if you
+-installed software in a non-standard prefix.
+-
+-Alternatively, you may set the environment variables SYSTEMD_CFLAGS
+-and SYSTEMD_LIBS to avoid the need to call pkg-config.
+-See the pkg-config man page for more details." "$LINENO" 5
+-elif test $pkg_failed = untried; then
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+-$as_echo "no" >&6; }
+- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+-as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it
+-is in your PATH or set the PKG_CONFIG environment variable to the full
+-path to pkg-config.
+-
+-Alternatively, you may set the environment variables SYSTEMD_CFLAGS
+-and SYSTEMD_LIBS to avoid the need to call pkg-config.
+-See the pkg-config man page for more details.
+-
+-To get pkg-config, see <http://pkg-config.freedesktop.org/>.
+-See \`config.log' for more details" "$LINENO" 5; }
+-else
+- SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS
+- SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+-$as_echo "yes" >&6; }
+-
+-fi
+-
+-else
+- SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS
+- SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+-$as_echo "yes" >&6; }
+-
+-fi
+-
+-
+-
+ if test "x$SYSTEMD_DIR" = x; then :
+
+ SYSTEMD_DIR="\$(prefix)/lib/systemd/system/"
+--
+2.45.2
+
diff --git a/0042-x86-spec-ctrl-Perform-VERW-flushing-later-in-exit-pa.patch b/0042-x86-spec-ctrl-Perform-VERW-flushing-later-in-exit-pa.patch
deleted file mode 100644
index ac78acd..0000000
--- a/0042-x86-spec-ctrl-Perform-VERW-flushing-later-in-exit-pa.patch
+++ /dev/null
@@ -1,209 +0,0 @@
-From 76af773de5d3e68b7140cc9c5343be6746c9101c Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Sat, 27 Jan 2024 18:20:56 +0000
-Subject: [PATCH 42/67] x86/spec-ctrl: Perform VERW flushing later in exit
- paths
-
-On parts vulnerable to RFDS, VERW's side effects are extended to scrub all
-non-architectural entries in various Physical Register Files. To remove all
-of Xen's values, the VERW must be after popping the GPRs.
-
-Rework SPEC_CTRL_COND_VERW to default to an CPUINFO_error_code %rsp position,
-but with overrides for other contexts. Identify that it clobbers eflags; this
-is particularly relevant for the SYSRET path.
-
-For the IST exit return to Xen, have the main SPEC_CTRL_EXIT_TO_XEN put a
-shadow copy of spec_ctrl_flags, as GPRs can't be used at the point we want to
-issue the VERW.
-
-This is part of XSA-452 / CVE-2023-28746.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 0a666cf2cd99df6faf3eebc81a1fc286e4eca4c7)
----
- xen/arch/x86/include/asm/spec_ctrl_asm.h | 36 ++++++++++++++++--------
- xen/arch/x86/x86_64/asm-offsets.c | 13 +++++++--
- xen/arch/x86/x86_64/compat/entry.S | 6 ++++
- xen/arch/x86/x86_64/entry.S | 21 +++++++++++++-
- 4 files changed, 61 insertions(+), 15 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-index ca9cb0f5dd..97a97b2b82 100644
---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
-+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
-@@ -171,16 +171,23 @@
- */
- #define STK_REL(field, top_of_stk) ((field) - (top_of_stk))
-
--.macro DO_SPEC_CTRL_COND_VERW
-+.macro SPEC_CTRL_COND_VERW \
-+ scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_error_code), \
-+ sel=STK_REL(CPUINFO_verw_sel, CPUINFO_error_code)
- /*
-- * Requires %rsp=cpuinfo
-+ * Requires \scf and \sel as %rsp-relative expressions
-+ * Clobbers eflags
-+ *
-+ * VERW needs to run after guest GPRs have been restored, where only %rsp is
-+ * good to use. Default to expecting %rsp pointing at CPUINFO_error_code.
-+ * Contexts where this is not true must provide an alternative \scf and \sel.
- *
- * Issue a VERW for its flushing side effect, if indicated. This is a Spectre
- * v1 gadget, but the IRET/VMEntry is serialising.
- */
-- testb $SCF_verw, CPUINFO_spec_ctrl_flags(%rsp)
-+ testb $SCF_verw, \scf(%rsp)
- jz .L\@_verw_skip
-- verw CPUINFO_verw_sel(%rsp)
-+ verw \sel(%rsp)
- .L\@_verw_skip:
- .endm
-
-@@ -298,8 +305,6 @@
- */
- ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV
-
-- DO_SPEC_CTRL_COND_VERW
--
- ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
- .endm
-
-@@ -379,7 +384,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
- */
- .macro SPEC_CTRL_EXIT_TO_XEN
- /*
-- * Requires %r12=ist_exit, %r14=stack_end
-+ * Requires %r12=ist_exit, %r14=stack_end, %rsp=regs
- * Clobbers %rax, %rbx, %rcx, %rdx
- */
- movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx
-@@ -407,11 +412,18 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
- test %r12, %r12
- jz .L\@_skip_ist_exit
-
-- /* Logically DO_SPEC_CTRL_COND_VERW but without the %rsp=cpuinfo dependency */
-- testb $SCF_verw, %bl
-- jz .L\@_skip_verw
-- verw STACK_CPUINFO_FIELD(verw_sel)(%r14)
--.L\@_skip_verw:
-+ /*
-+ * Stash SCF and verw_sel above eflags in the case of an IST_exit. The
-+ * VERW logic needs to run after guest GPRs have been restored; i.e. where
-+ * we cannot use %r12 or %r14 for the purposes they have here.
-+ *
-+ * When the CPU pushed this exception frame, it zero-extended eflags.
-+ * Therefore it is safe for the VERW logic to look at the stashed SCF
-+ * outside of the ist_exit condition. Also, this stashing won't influence
-+ * any other restore_all_guest() paths.
-+ */
-+ or $(__HYPERVISOR_DS32 << 16), %ebx
-+ mov %ebx, UREGS_eflags + 4(%rsp) /* EFRAME_shadow_scf/sel */
-
- ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
-
-diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
-index a4e94d6930..4cd5938d7b 100644
---- a/xen/arch/x86/x86_64/asm-offsets.c
-+++ b/xen/arch/x86/x86_64/asm-offsets.c
-@@ -55,14 +55,22 @@ void __dummy__(void)
- * EFRAME_* is for the entry/exit logic where %rsp is pointing at
- * UREGS_error_code and GPRs are still/already guest values.
- */
--#define OFFSET_EF(sym, mem) \
-+#define OFFSET_EF(sym, mem, ...) \
- DEFINE(sym, offsetof(struct cpu_user_regs, mem) - \
-- offsetof(struct cpu_user_regs, error_code))
-+ offsetof(struct cpu_user_regs, error_code) __VA_ARGS__)
-
- OFFSET_EF(EFRAME_entry_vector, entry_vector);
- OFFSET_EF(EFRAME_rip, rip);
- OFFSET_EF(EFRAME_cs, cs);
- OFFSET_EF(EFRAME_eflags, eflags);
-+
-+ /*
-+ * These aren't real fields. They're spare space, used by the IST
-+ * exit-to-xen path.
-+ */
-+ OFFSET_EF(EFRAME_shadow_scf, eflags, +4);
-+ OFFSET_EF(EFRAME_shadow_sel, eflags, +6);
-+
- OFFSET_EF(EFRAME_rsp, rsp);
- BLANK();
-
-@@ -136,6 +144,7 @@ void __dummy__(void)
-
- OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs);
- OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code);
-+ OFFSET(CPUINFO_rip, struct cpu_info, guest_cpu_user_regs.rip);
- OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel);
- OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
- OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset);
-diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
-index 7c211314d8..3b2fbcd873 100644
---- a/xen/arch/x86/x86_64/compat/entry.S
-+++ b/xen/arch/x86/x86_64/compat/entry.S
-@@ -161,6 +161,12 @@ ENTRY(compat_restore_all_guest)
- SPEC_CTRL_EXIT_TO_PV /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
-
- RESTORE_ALL adj=8 compat=1
-+
-+ /* Account for ev/ec having already been popped off the stack. */
-+ SPEC_CTRL_COND_VERW \
-+ scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_rip), \
-+ sel=STK_REL(CPUINFO_verw_sel, CPUINFO_rip)
-+
- .Lft0: iretq
- _ASM_PRE_EXTABLE(.Lft0, handle_exception)
-
-diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
-index 412cbeb3ec..ef517e2945 100644
---- a/xen/arch/x86/x86_64/entry.S
-+++ b/xen/arch/x86/x86_64/entry.S
-@@ -214,6 +214,9 @@ restore_all_guest:
- #endif
-
- mov EFRAME_rip(%rsp), %rcx
-+
-+ SPEC_CTRL_COND_VERW /* Req: %rsp=eframe Clob: efl */
-+
- cmpw $FLAT_USER_CS32, EFRAME_cs(%rsp)
- mov EFRAME_rsp(%rsp), %rsp
- je 1f
-@@ -227,6 +230,9 @@ restore_all_guest:
- iret_exit_to_guest:
- andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp)
- orl $X86_EFLAGS_IF, EFRAME_eflags(%rsp)
-+
-+ SPEC_CTRL_COND_VERW /* Req: %rsp=eframe Clob: efl */
-+
- addq $8,%rsp
- .Lft0: iretq
- _ASM_PRE_EXTABLE(.Lft0, handle_exception)
-@@ -679,9 +685,22 @@ UNLIKELY_START(ne, exit_cr3)
- UNLIKELY_END(exit_cr3)
-
- /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
-- SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end, Clob: abcd */
-+ SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end %rsp=regs, Clob: abcd */
-
- RESTORE_ALL adj=8
-+
-+ /*
-+ * When the CPU pushed this exception frame, it zero-extended eflags.
-+ * For an IST exit, SPEC_CTRL_EXIT_TO_XEN stashed shadow copies of
-+ * spec_ctrl_flags and ver_sel above eflags, as we can't use any GPRs,
-+ * and we're at a random place on the stack, not in a CPUFINFO block.
-+ *
-+ * Account for ev/ec having already been popped off the stack.
-+ */
-+ SPEC_CTRL_COND_VERW \
-+ scf=STK_REL(EFRAME_shadow_scf, EFRAME_rip), \
-+ sel=STK_REL(EFRAME_shadow_sel, EFRAME_rip)
-+
- iretq
-
- ENTRY(common_interrupt)
---
-2.44.0
-
diff --git a/0043-x86-ioapic-Fix-signed-shifts-in-io_apic.c.patch b/0043-x86-ioapic-Fix-signed-shifts-in-io_apic.c.patch
new file mode 100644
index 0000000..c368c1d
--- /dev/null
+++ b/0043-x86-ioapic-Fix-signed-shifts-in-io_apic.c.patch
@@ -0,0 +1,46 @@
+From 0dc5fbee17cd2bcb1aa6a1cf420dd80381587de8 Mon Sep 17 00:00:00 2001
+From: Matthew Barnes <matthew.barnes@cloud.com>
+Date: Thu, 4 Jul 2024 14:11:03 +0200
+Subject: [PATCH 43/56] x86/ioapic: Fix signed shifts in io_apic.c
+
+There exists bitshifts in the IOAPIC code where signed integers are
+shifted to the left by up to 31 bits, which is undefined behaviour.
+
+This patch fixes this by changing the integers from signed to unsigned.
+
+Signed-off-by: Matthew Barnes <matthew.barnes@cloud.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: c5746b021e573184fb92b601a0e93a295485054e
+master date: 2024-06-21 15:09:26 +0100
+---
+ xen/arch/x86/io_apic.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
+index 0ef61fb2f1..c5342789e8 100644
+--- a/xen/arch/x86/io_apic.c
++++ b/xen/arch/x86/io_apic.c
+@@ -1692,7 +1692,8 @@ static void cf_check mask_and_ack_level_ioapic_irq(struct irq_desc *desc)
+ !io_apic_level_ack_pending(desc->irq))
+ move_masked_irq(desc);
+
+- if ( !(v & (1 << (i & 0x1f))) ) {
++ if ( !(v & (1U << (i & 0x1f))) )
++ {
+ spin_lock(&ioapic_lock);
+ __edge_IO_APIC_irq(desc->irq);
+ __level_IO_APIC_irq(desc->irq);
+@@ -1756,7 +1757,8 @@ static void cf_check end_level_ioapic_irq_new(struct irq_desc *desc, u8 vector)
+ !io_apic_level_ack_pending(desc->irq) )
+ move_native_irq(desc);
+
+- if (!(v & (1 << (i & 0x1f)))) {
++ if ( !(v & (1U << (i & 0x1f))) )
++ {
+ spin_lock(&ioapic_lock);
+ __mask_IO_APIC_irq(desc->irq);
+ __edge_IO_APIC_irq(desc->irq);
+--
+2.45.2
+
diff --git a/0043-x86-spec-ctrl-Rename-VERW-related-options.patch b/0043-x86-spec-ctrl-Rename-VERW-related-options.patch
deleted file mode 100644
index 38edc15..0000000
--- a/0043-x86-spec-ctrl-Rename-VERW-related-options.patch
+++ /dev/null
@@ -1,248 +0,0 @@
-From d55d52961d13d4fcd1441fcfca98f690e687b941 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Mon, 12 Feb 2024 17:50:43 +0000
-Subject: [PATCH 43/67] x86/spec-ctrl: Rename VERW related options
-
-VERW is going to be used for a 3rd purpose, and the existing nomenclature
-didn't survive the Stale MMIO issues terribly well.
-
-Rename the command line option from `md-clear=` to `verw=`. This is more
-consistent with other options which tend to be named based on what they're
-doing, not which feature enumeration they use behind the scenes. Retain
-`md-clear=` as a deprecated alias.
-
-Rename opt_md_clear_{pv,hvm} and opt_fb_clear_mmio to opt_verw_{pv,hvm,mmio},
-which has a side effect of making spec_ctrl_init_domain() rather clearer to
-follow.
-
-No functional change.
-
-This is part of XSA-452 / CVE-2023-28746.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit f7603ca252e4226739eb3129a5290ee3da3f8ea4)
----
- docs/misc/xen-command-line.pandoc | 15 ++++----
- xen/arch/x86/spec_ctrl.c | 62 ++++++++++++++++---------------
- 2 files changed, 40 insertions(+), 37 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index 2006697226..d909ec94fe 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -2324,7 +2324,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
-
- ### spec-ctrl (x86)
- > `= List of [ <bool>, xen=<bool>, {pv,hvm}=<bool>,
--> {msr-sc,rsb,md-clear,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
-+> {msr-sc,rsb,verw,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
- > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd,
- > eager-fpu,l1d-flush,branch-harden,srb-lock,
- > unpriv-mmio,gds-mit,div-scrub}=<bool> ]`
-@@ -2349,7 +2349,7 @@ in place for guests to use.
-
- Use of a positive boolean value for either of these options is invalid.
-
--The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `md-clear=` and `ibpb-entry=` options
-+The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `verw=` and `ibpb-entry=` options
- offer fine grained control over the primitives by Xen. These impact Xen's
- ability to protect itself, and/or Xen's ability to virtualise support for
- guests to use.
-@@ -2366,11 +2366,12 @@ guests to use.
- guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc.
- * `rsb=` offers control over whether to overwrite the Return Stack Buffer /
- Return Address Stack on entry to Xen and on idle.
--* `md-clear=` offers control over whether to use VERW to flush
-- microarchitectural buffers on idle and exit from Xen. *Note: For
-- compatibility with development versions of this fix, `mds=` is also accepted
-- on Xen 4.12 and earlier as an alias. Consult vendor documentation in
-- preference to here.*
-+* `verw=` offers control over whether to use VERW for its scrubbing side
-+ effects at appropriate privilege transitions. The exact side effects are
-+ microarchitecture and microcode specific. *Note: `md-clear=` is accepted as
-+ a deprecated alias. For compatibility with development versions of XSA-297,
-+ `mds=` is also accepted on Xen 4.12 and earlier as an alias. Consult vendor
-+ documentation in preference to here.*
- * `ibpb-entry=` offers control over whether IBPB (Indirect Branch Prediction
- Barrier) is used on entry to Xen. This is used by default on hardware
- vulnerable to Branch Type Confusion, and hardware vulnerable to Speculative
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 25a18ac598..e12ec9930c 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -37,8 +37,8 @@ static bool __initdata opt_msr_sc_pv = true;
- static bool __initdata opt_msr_sc_hvm = true;
- static int8_t __initdata opt_rsb_pv = -1;
- static bool __initdata opt_rsb_hvm = true;
--static int8_t __ro_after_init opt_md_clear_pv = -1;
--static int8_t __ro_after_init opt_md_clear_hvm = -1;
-+static int8_t __ro_after_init opt_verw_pv = -1;
-+static int8_t __ro_after_init opt_verw_hvm = -1;
-
- static int8_t __ro_after_init opt_ibpb_entry_pv = -1;
- static int8_t __ro_after_init opt_ibpb_entry_hvm = -1;
-@@ -78,7 +78,7 @@ static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination.
-
- static int8_t __initdata opt_srb_lock = -1;
- static bool __initdata opt_unpriv_mmio;
--static bool __ro_after_init opt_fb_clear_mmio;
-+static bool __ro_after_init opt_verw_mmio;
- static int8_t __initdata opt_gds_mit = -1;
- static int8_t __initdata opt_div_scrub = -1;
-
-@@ -120,8 +120,8 @@ static int __init cf_check parse_spec_ctrl(const char *s)
- disable_common:
- opt_rsb_pv = false;
- opt_rsb_hvm = false;
-- opt_md_clear_pv = 0;
-- opt_md_clear_hvm = 0;
-+ opt_verw_pv = 0;
-+ opt_verw_hvm = 0;
- opt_ibpb_entry_pv = 0;
- opt_ibpb_entry_hvm = 0;
- opt_ibpb_entry_dom0 = false;
-@@ -152,14 +152,14 @@ static int __init cf_check parse_spec_ctrl(const char *s)
- {
- opt_msr_sc_pv = val;
- opt_rsb_pv = val;
-- opt_md_clear_pv = val;
-+ opt_verw_pv = val;
- opt_ibpb_entry_pv = val;
- }
- else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
- {
- opt_msr_sc_hvm = val;
- opt_rsb_hvm = val;
-- opt_md_clear_hvm = val;
-+ opt_verw_hvm = val;
- opt_ibpb_entry_hvm = val;
- }
- else if ( (val = parse_boolean("msr-sc", s, ss)) != -1 )
-@@ -204,21 +204,22 @@ static int __init cf_check parse_spec_ctrl(const char *s)
- break;
- }
- }
-- else if ( (val = parse_boolean("md-clear", s, ss)) != -1 )
-+ else if ( (val = parse_boolean("verw", s, ss)) != -1 ||
-+ (val = parse_boolean("md-clear", s, ss)) != -1 )
- {
- switch ( val )
- {
- case 0:
- case 1:
-- opt_md_clear_pv = opt_md_clear_hvm = val;
-+ opt_verw_pv = opt_verw_hvm = val;
- break;
-
- case -2:
-- s += strlen("md-clear=");
-+ s += (*s == 'v') ? strlen("verw=") : strlen("md-clear=");
- if ( (val = parse_boolean("pv", s, ss)) >= 0 )
-- opt_md_clear_pv = val;
-+ opt_verw_pv = val;
- else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
-- opt_md_clear_hvm = val;
-+ opt_verw_hvm = val;
- else
- default:
- rc = -EINVAL;
-@@ -540,8 +541,8 @@ static void __init print_details(enum ind_thunk thunk)
- opt_srb_lock ? " SRB_LOCK+" : " SRB_LOCK-",
- opt_ibpb_ctxt_switch ? " IBPB-ctxt" : "",
- opt_l1d_flush ? " L1D_FLUSH" : "",
-- opt_md_clear_pv || opt_md_clear_hvm ||
-- opt_fb_clear_mmio ? " VERW" : "",
-+ opt_verw_pv || opt_verw_hvm ||
-+ opt_verw_mmio ? " VERW" : "",
- opt_div_scrub ? " DIV" : "",
- opt_branch_harden ? " BRANCH_HARDEN" : "");
-
-@@ -562,13 +563,13 @@ static void __init print_details(enum ind_thunk thunk)
- boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ||
- boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ||
- amd_virt_spec_ctrl ||
-- opt_eager_fpu || opt_md_clear_hvm) ? "" : " None",
-+ opt_eager_fpu || opt_verw_hvm) ? "" : " None",
- boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "",
- (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ||
- amd_virt_spec_ctrl) ? " MSR_VIRT_SPEC_CTRL" : "",
- boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB" : "",
- opt_eager_fpu ? " EAGER_FPU" : "",
-- opt_md_clear_hvm ? " MD_CLEAR" : "",
-+ opt_verw_hvm ? " VERW" : "",
- boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : "");
-
- #endif
-@@ -577,11 +578,11 @@ static void __init print_details(enum ind_thunk thunk)
- (boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
- boot_cpu_has(X86_FEATURE_SC_RSB_PV) ||
- boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ||
-- opt_eager_fpu || opt_md_clear_pv) ? "" : " None",
-+ opt_eager_fpu || opt_verw_pv) ? "" : " None",
- boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "",
- boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "",
- opt_eager_fpu ? " EAGER_FPU" : "",
-- opt_md_clear_pv ? " MD_CLEAR" : "",
-+ opt_verw_pv ? " VERW" : "",
- boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : "");
-
- printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n",
-@@ -1514,8 +1515,8 @@ void spec_ctrl_init_domain(struct domain *d)
- {
- bool pv = is_pv_domain(d);
-
-- bool verw = ((pv ? opt_md_clear_pv : opt_md_clear_hvm) ||
-- (opt_fb_clear_mmio && is_iommu_enabled(d)));
-+ bool verw = ((pv ? opt_verw_pv : opt_verw_hvm) ||
-+ (opt_verw_mmio && is_iommu_enabled(d)));
-
- bool ibpb = ((pv ? opt_ibpb_entry_pv : opt_ibpb_entry_hvm) &&
- (d->domain_id != 0 || opt_ibpb_entry_dom0));
-@@ -1878,19 +1879,20 @@ void __init init_speculation_mitigations(void)
- * the return-to-guest path.
- */
- if ( opt_unpriv_mmio )
-- opt_fb_clear_mmio = cpu_has_fb_clear;
-+ opt_verw_mmio = cpu_has_fb_clear;
-
- /*
- * By default, enable PV and HVM mitigations on MDS-vulnerable hardware.
- * This will only be a token effort for MLPDS/MFBDS when HT is enabled,
- * but it is somewhat better than nothing.
- */
-- if ( opt_md_clear_pv == -1 )
-- opt_md_clear_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
-- boot_cpu_has(X86_FEATURE_MD_CLEAR));
-- if ( opt_md_clear_hvm == -1 )
-- opt_md_clear_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
-- boot_cpu_has(X86_FEATURE_MD_CLEAR));
-+ if ( opt_verw_pv == -1 )
-+ opt_verw_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
-+ cpu_has_md_clear);
-+
-+ if ( opt_verw_hvm == -1 )
-+ opt_verw_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
-+ cpu_has_md_clear);
-
- /*
- * Enable MDS/MMIO defences as applicable. The Idle blocks need using if
-@@ -1903,12 +1905,12 @@ void __init init_speculation_mitigations(void)
- * MDS mitigations. L1D_FLUSH is not safe for MMIO mitigations.)
- *
- * After calculating the appropriate idle setting, simplify
-- * opt_md_clear_hvm to mean just "should we VERW on the way into HVM
-+ * opt_verw_hvm to mean just "should we VERW on the way into HVM
- * guests", so spec_ctrl_init_domain() can calculate suitable settings.
- */
-- if ( opt_md_clear_pv || opt_md_clear_hvm || opt_fb_clear_mmio )
-+ if ( opt_verw_pv || opt_verw_hvm || opt_verw_mmio )
- setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE);
-- opt_md_clear_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush;
-+ opt_verw_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush;
-
- /*
- * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT
---
-2.44.0
-
diff --git a/0044-tools-xl-Open-xldevd.log-with-O_CLOEXEC.patch b/0044-tools-xl-Open-xldevd.log-with-O_CLOEXEC.patch
new file mode 100644
index 0000000..39dc3eb
--- /dev/null
+++ b/0044-tools-xl-Open-xldevd.log-with-O_CLOEXEC.patch
@@ -0,0 +1,53 @@
+From 2b3bf02c4f5e44d7d7bd3636530c9ebc837dea87 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 4 Jul 2024 14:11:36 +0200
+Subject: [PATCH 44/56] tools/xl: Open xldevd.log with O_CLOEXEC
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+`xl devd` has been observed leaking /var/log/xldevd.log into children.
+
+Note this is specifically safe; dup2() leaves O_CLOEXEC disabled on newfd, so
+after setting up stdout/stderr, it's only the logfile fd which will close on
+exec().
+
+Link: https://github.com/QubesOS/qubes-issues/issues/8292
+Reported-by: Demi Marie Obenour <demi@invisiblethingslab.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+Reviewed-by: Demi Marie Obenour <demi@invisiblethingslab.com>
+Acked-by: Anthony PERARD <anthony.perard@vates.tech>
+master commit: ba52b3b624e4a1a976908552364eba924ca45430
+master date: 2024-06-24 16:22:59 +0100
+---
+ tools/xl/xl_utils.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/tools/xl/xl_utils.c b/tools/xl/xl_utils.c
+index 17489d1829..b0d23b2cdb 100644
+--- a/tools/xl/xl_utils.c
++++ b/tools/xl/xl_utils.c
+@@ -27,6 +27,10 @@
+ #include "xl.h"
+ #include "xl_utils.h"
+
++#ifndef O_CLOEXEC
++#define O_CLOEXEC 0
++#endif
++
+ void dolog(const char *file, int line, const char *func, const char *fmt, ...)
+ {
+ va_list ap;
+@@ -270,7 +274,7 @@ int do_daemonize(const char *name, const char *pidfile)
+ exit(-1);
+ }
+
+- CHK_SYSCALL(logfile = open(fullname, O_WRONLY|O_CREAT|O_APPEND, 0644));
++ CHK_SYSCALL(logfile = open(fullname, O_WRONLY | O_CREAT | O_APPEND | O_CLOEXEC, 0644));
+ free(fullname);
+ assert(logfile >= 3);
+
+--
+2.45.2
+
diff --git a/0044-x86-spec-ctrl-VERW-handling-adjustments.patch b/0044-x86-spec-ctrl-VERW-handling-adjustments.patch
deleted file mode 100644
index e2458c9..0000000
--- a/0044-x86-spec-ctrl-VERW-handling-adjustments.patch
+++ /dev/null
@@ -1,171 +0,0 @@
-From 6663430b442fdf9698bd8e03f701a4547309ad71 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 5 Mar 2024 19:33:37 +0000
-Subject: [PATCH 44/67] x86/spec-ctrl: VERW-handling adjustments
-
-... before we add yet more complexity to this logic. Mostly expanded
-comments, but with three minor changes.
-
-1) Introduce cpu_has_useful_md_clear to simplify later logic in this patch and
- future ones.
-
-2) We only ever need SC_VERW_IDLE when SMT is active. If SMT isn't active,
- then there's no re-partition of pipeline resources based on thread-idleness
- to worry about.
-
-3) The logic to adjust HVM VERW based on L1D_FLUSH is unmaintainable and, as
- it turns out, wrong. SKIP_L1DFL is just a hint bit, whereas opt_l1d_flush
- is the relevant decision of whether to use L1D_FLUSH based on
- susceptibility and user preference.
-
- Rewrite the logic so it can be followed, and incorporate the fact that when
- FB_CLEAR is visible, L1D_FLUSH isn't a safe substitution.
-
-This is part of XSA-452 / CVE-2023-28746.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 1eb91a8a06230b4b64228c9a380194f8cfe6c5e2)
----
- xen/arch/x86/spec_ctrl.c | 99 +++++++++++++++++++++++++++++-----------
- 1 file changed, 73 insertions(+), 26 deletions(-)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index e12ec9930c..adb6bc74e8 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -1531,7 +1531,7 @@ void __init init_speculation_mitigations(void)
- {
- enum ind_thunk thunk = THUNK_DEFAULT;
- bool has_spec_ctrl, ibrs = false, hw_smt_enabled;
-- bool cpu_has_bug_taa, retpoline_safe;
-+ bool cpu_has_bug_taa, cpu_has_useful_md_clear, retpoline_safe;
-
- hw_smt_enabled = check_smt_enabled();
-
-@@ -1867,50 +1867,97 @@ void __init init_speculation_mitigations(void)
- "enabled. Please assess your configuration and choose an\n"
- "explicit 'smt=<bool>' setting. See XSA-273.\n");
-
-+ /*
-+ * A brief summary of VERW-related changes.
-+ *
-+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html
-+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html
-+ *
-+ * Relevant ucodes:
-+ *
-+ * - May 2019, for MDS. Introduces the MD_CLEAR CPUID bit and VERW side
-+ * effects to scrub Store/Load/Fill buffers as applicable. MD_CLEAR
-+ * exists architecturally, even when the side effects have been removed.
-+ *
-+ * Use VERW to scrub on return-to-guest. Parts with L1D_FLUSH to
-+ * mitigate L1TF have the same side effect, so no need to do both.
-+ *
-+ * Various Atoms suffer from Store-buffer sampling only. Store buffers
-+ * are statically partitioned between non-idle threads, so scrubbing is
-+ * wanted when going idle too.
-+ *
-+ * Load ports and Fill buffers are competitively shared between threads.
-+ * SMT must be disabled for VERW scrubbing to be fully effective.
-+ *
-+ * - November 2019, for TAA. Extended VERW side effects to TSX-enabled
-+ * MDS_NO parts.
-+ *
-+ * - February 2022, for Client TSX de-feature. Removed VERW side effects
-+ * from Client CPUs only.
-+ *
-+ * - May 2022, for MMIO Stale Data. (Re)introduced Fill Buffer scrubbing
-+ * on all MMIO-affected parts which didn't already have it for MDS
-+ * reasons, enumerating FB_CLEAR on those parts only.
-+ *
-+ * If FB_CLEAR is enumerated, L1D_FLUSH does not have the same scrubbing
-+ * side effects as VERW and cannot be used in its place.
-+ */
- mds_calculations();
-
- /*
-- * Parts which enumerate FB_CLEAR are those which are post-MDS_NO and have
-- * reintroduced the VERW fill buffer flushing side effect because of a
-- * susceptibility to FBSDP.
-+ * Parts which enumerate FB_CLEAR are those with now-updated microcode
-+ * which weren't susceptible to the original MFBDS (and therefore didn't
-+ * have Fill Buffer scrubbing side effects to begin with, or were Client
-+ * MDS_NO non-TAA_NO parts where the scrubbing was removed), but have had
-+ * the scrubbing reintroduced because of a susceptibility to FBSDP.
- *
- * If unprivileged guests have (or will have) MMIO mappings, we can
- * mitigate cross-domain leakage of fill buffer data by issuing VERW on
-- * the return-to-guest path.
-+ * the return-to-guest path. This is only a token effort if SMT is
-+ * active.
- */
- if ( opt_unpriv_mmio )
- opt_verw_mmio = cpu_has_fb_clear;
-
- /*
-- * By default, enable PV and HVM mitigations on MDS-vulnerable hardware.
-- * This will only be a token effort for MLPDS/MFBDS when HT is enabled,
-- * but it is somewhat better than nothing.
-+ * MD_CLEAR is enumerated architecturally forevermore, even after the
-+ * scrubbing side effects have been removed. Create ourselves an version
-+ * which expressed whether we think MD_CLEAR is having any useful side
-+ * effect.
-+ */
-+ cpu_has_useful_md_clear = (cpu_has_md_clear &&
-+ (cpu_has_bug_mds || cpu_has_bug_msbds_only));
-+
-+ /*
-+ * By default, use VERW scrubbing on applicable hardware, if we think it's
-+ * going to have an effect. This will only be a token effort for
-+ * MLPDS/MFBDS when SMT is enabled.
- */
- if ( opt_verw_pv == -1 )
-- opt_verw_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
-- cpu_has_md_clear);
-+ opt_verw_pv = cpu_has_useful_md_clear;
-
- if ( opt_verw_hvm == -1 )
-- opt_verw_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
-- cpu_has_md_clear);
-+ opt_verw_hvm = cpu_has_useful_md_clear;
-
- /*
-- * Enable MDS/MMIO defences as applicable. The Idle blocks need using if
-- * either the PV or HVM MDS defences are used, or if we may give MMIO
-- * access to untrusted guests.
-- *
-- * HVM is more complicated. The MD_CLEAR microcode extends L1D_FLUSH with
-- * equivalent semantics to avoid needing to perform both flushes on the
-- * HVM path. Therefore, we don't need VERW in addition to L1D_FLUSH (for
-- * MDS mitigations. L1D_FLUSH is not safe for MMIO mitigations.)
-- *
-- * After calculating the appropriate idle setting, simplify
-- * opt_verw_hvm to mean just "should we VERW on the way into HVM
-- * guests", so spec_ctrl_init_domain() can calculate suitable settings.
-+ * If SMT is active, and we're protecting against MDS or MMIO stale data,
-+ * we need to scrub before going idle as well as on return to guest.
-+ * Various pipeline resources are repartitioned amongst non-idle threads.
- */
-- if ( opt_verw_pv || opt_verw_hvm || opt_verw_mmio )
-+ if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) ||
-+ opt_verw_mmio) && hw_smt_enabled )
- setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE);
-- opt_verw_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush;
-+
-+ /*
-+ * After calculating the appropriate idle setting, simplify opt_verw_hvm
-+ * to mean just "should we VERW on the way into HVM guests", so
-+ * spec_ctrl_init_domain() can calculate suitable settings.
-+ *
-+ * It is only safe to use L1D_FLUSH in place of VERW when MD_CLEAR is the
-+ * only *_CLEAR we can see.
-+ */
-+ if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear )
-+ opt_verw_hvm = false;
-
- /*
- * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT
---
-2.44.0
-
diff --git a/0045-pirq_cleanup_check-leaks.patch b/0045-pirq_cleanup_check-leaks.patch
new file mode 100644
index 0000000..dcf96c7
--- /dev/null
+++ b/0045-pirq_cleanup_check-leaks.patch
@@ -0,0 +1,84 @@
+From c9f50d2c5f29b630603e2b95f29e5b6e416a6187 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Thu, 4 Jul 2024 14:11:57 +0200
+Subject: [PATCH 45/56] pirq_cleanup_check() leaks
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Its original introduction had two issues: For one the "common" part of
+the checks (carried out in the macro) was inverted. And then after
+removal from the radix tree the structure wasn't scheduled for freeing.
+(All structures still left in the radix tree would be freed upon domain
+destruction, though.)
+
+For the freeing to be safe even if it didn't use RCU (i.e. to avoid use-
+after-free), re-arrange checks/operations in evtchn_close(), such that
+the pointer wouldn't be used anymore after calling pirq_cleanup_check()
+(noting that unmap_domain_pirq_emuirq() itself calls the function in the
+success case).
+
+Fixes: c24536b636f2 ("replace d->nr_pirqs sized arrays with radix tree")
+Fixes: 79858fee307c ("xen: fix hvm_domain_use_pirq's behavior")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: daa90dfea9175c07f13d1a2d901857b2dd14d080
+master date: 2024-07-02 08:35:56 +0200
+---
+ xen/arch/x86/irq.c | 1 +
+ xen/common/event_channel.c | 11 ++++++++---
+ xen/include/xen/irq.h | 2 +-
+ 3 files changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
+index 290f8d26e7..00be3b88e8 100644
+--- a/xen/arch/x86/irq.c
++++ b/xen/arch/x86/irq.c
+@@ -1413,6 +1413,7 @@ void (pirq_cleanup_check)(struct pirq *pirq, struct domain *d)
+
+ if ( radix_tree_delete(&d->pirq_tree, pirq->pirq) != pirq )
+ BUG();
++ free_pirq_struct(pirq);
+ }
+
+ /* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */
+diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
+index 66f924a7b0..b1a6215c37 100644
+--- a/xen/common/event_channel.c
++++ b/xen/common/event_channel.c
+@@ -705,11 +705,16 @@ int evtchn_close(struct domain *d1, int port1, bool guest)
+ if ( !is_hvm_domain(d1) )
+ pirq_guest_unbind(d1, pirq);
+ pirq->evtchn = 0;
+- pirq_cleanup_check(pirq, d1);
+ #ifdef CONFIG_X86
+- if ( is_hvm_domain(d1) && domain_pirq_to_irq(d1, pirq->pirq) > 0 )
+- unmap_domain_pirq_emuirq(d1, pirq->pirq);
++ if ( !is_hvm_domain(d1) ||
++ domain_pirq_to_irq(d1, pirq->pirq) <= 0 ||
++ unmap_domain_pirq_emuirq(d1, pirq->pirq) < 0 )
++ /*
++ * The successful path of unmap_domain_pirq_emuirq() will have
++ * called pirq_cleanup_check() already.
++ */
+ #endif
++ pirq_cleanup_check(pirq, d1);
+ }
+ unlink_pirq_port(chn1, d1->vcpu[chn1->notify_vcpu_id]);
+ break;
+diff --git a/xen/include/xen/irq.h b/xen/include/xen/irq.h
+index 65083135e1..5dcd2d8f0c 100644
+--- a/xen/include/xen/irq.h
++++ b/xen/include/xen/irq.h
+@@ -180,7 +180,7 @@ extern struct pirq *pirq_get_info(struct domain *d, int pirq);
+ void pirq_cleanup_check(struct pirq *pirq, struct domain *d);
+
+ #define pirq_cleanup_check(pirq, d) \
+- ((pirq)->evtchn ? pirq_cleanup_check(pirq, d) : (void)0)
++ (!(pirq)->evtchn ? pirq_cleanup_check(pirq, d) : (void)0)
+
+ extern void pirq_guest_eoi(struct pirq *pirq);
+ extern void desc_guest_eoi(struct irq_desc *desc, struct pirq *pirq);
+--
+2.45.2
+
diff --git a/0045-x86-spec-ctrl-Mitigation-Register-File-Data-Sampling.patch b/0045-x86-spec-ctrl-Mitigation-Register-File-Data-Sampling.patch
deleted file mode 100644
index 4a10524..0000000
--- a/0045-x86-spec-ctrl-Mitigation-Register-File-Data-Sampling.patch
+++ /dev/null
@@ -1,320 +0,0 @@
-From d85481135d87abbbf1feab18b749288fa08b65f2 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 22 Jun 2023 23:32:19 +0100
-Subject: [PATCH 45/67] x86/spec-ctrl: Mitigation Register File Data Sampling
-
-RFDS affects Atom cores, also branded E-cores, between the Goldmont and
-Gracemont microarchitectures. This includes Alder Lake and Raptor Lake hybrid
-clien systems which have a mix of Gracemont and other types of cores.
-
-Two new bits have been defined; RFDS_CLEAR to indicate VERW has more side
-effets, and RFDS_NO to incidate that the system is unaffected. Plenty of
-unaffected CPUs won't be getting RFDS_NO retrofitted in microcode, so we
-synthesise it. Alder Lake and Raptor Lake Xeon-E's are unaffected due to
-their platform configuration, and we must use the Hybrid CPUID bit to
-distinguish them from their non-Xeon counterparts.
-
-Like MD_CLEAR and FB_CLEAR, RFDS_CLEAR needs OR-ing across a resource pool, so
-set it in the max policies and reflect the host setting in default.
-
-This is part of XSA-452 / CVE-2023-28746.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit fb5b6f6744713410c74cfc12b7176c108e3c9a31)
----
- tools/misc/xen-cpuid.c | 5 +-
- xen/arch/x86/cpu-policy.c | 5 +
- xen/arch/x86/include/asm/cpufeature.h | 3 +
- xen/arch/x86/include/asm/msr-index.h | 2 +
- xen/arch/x86/spec_ctrl.c | 100 +++++++++++++++++++-
- xen/include/public/arch-x86/cpufeatureset.h | 3 +
- 6 files changed, 111 insertions(+), 7 deletions(-)
-
-diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
-index aefc140d66..5ceea8be07 100644
---- a/tools/misc/xen-cpuid.c
-+++ b/tools/misc/xen-cpuid.c
-@@ -172,7 +172,7 @@ static const char *const str_7d0[32] =
- [ 8] = "avx512-vp2intersect", [ 9] = "srbds-ctrl",
- [10] = "md-clear", [11] = "rtm-always-abort",
- /* 12 */ [13] = "tsx-force-abort",
-- [14] = "serialize",
-+ [14] = "serialize", [15] = "hybrid",
- [16] = "tsxldtrk",
- [18] = "pconfig",
- [20] = "cet-ibt",
-@@ -237,7 +237,8 @@ static const char *const str_m10Al[32] =
- [20] = "bhi-no", [21] = "xapic-status",
- /* 22 */ [23] = "ovrclk-status",
- [24] = "pbrsb-no", [25] = "gds-ctrl",
-- [26] = "gds-no",
-+ [26] = "gds-no", [27] = "rfds-no",
-+ [28] = "rfds-clear",
- };
-
- static const char *const str_m10Ah[32] =
-diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
-index 7b875a7221..96c2cee1a8 100644
---- a/xen/arch/x86/cpu-policy.c
-+++ b/xen/arch/x86/cpu-policy.c
-@@ -444,6 +444,7 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs)
- */
- __set_bit(X86_FEATURE_MD_CLEAR, fs);
- __set_bit(X86_FEATURE_FB_CLEAR, fs);
-+ __set_bit(X86_FEATURE_RFDS_CLEAR, fs);
-
- /*
- * The Gather Data Sampling microcode mitigation (August 2023) has an
-@@ -493,6 +494,10 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs)
- if ( cpu_has_fb_clear )
- __set_bit(X86_FEATURE_FB_CLEAR, fs);
-
-+ __clear_bit(X86_FEATURE_RFDS_CLEAR, fs);
-+ if ( cpu_has_rfds_clear )
-+ __set_bit(X86_FEATURE_RFDS_CLEAR, fs);
-+
- /*
- * The Gather Data Sampling microcode mitigation (August 2023) has an
- * adverse performance impact on the CLWB instruction on SKX/CLX/CPX.
-diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h
-index ec824e8954..a6b8af1296 100644
---- a/xen/arch/x86/include/asm/cpufeature.h
-+++ b/xen/arch/x86/include/asm/cpufeature.h
-@@ -140,6 +140,7 @@
- #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)
- #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
- #define cpu_has_serialize boot_cpu_has(X86_FEATURE_SERIALIZE)
-+#define cpu_has_hybrid boot_cpu_has(X86_FEATURE_HYBRID)
- #define cpu_has_avx512_fp16 boot_cpu_has(X86_FEATURE_AVX512_FP16)
- #define cpu_has_arch_caps boot_cpu_has(X86_FEATURE_ARCH_CAPS)
-
-@@ -161,6 +162,8 @@
- #define cpu_has_rrsba boot_cpu_has(X86_FEATURE_RRSBA)
- #define cpu_has_gds_ctrl boot_cpu_has(X86_FEATURE_GDS_CTRL)
- #define cpu_has_gds_no boot_cpu_has(X86_FEATURE_GDS_NO)
-+#define cpu_has_rfds_no boot_cpu_has(X86_FEATURE_RFDS_NO)
-+#define cpu_has_rfds_clear boot_cpu_has(X86_FEATURE_RFDS_CLEAR)
-
- /* Synthesized. */
- #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
-diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h
-index 6abf7bc34a..9b5f67711f 100644
---- a/xen/arch/x86/include/asm/msr-index.h
-+++ b/xen/arch/x86/include/asm/msr-index.h
-@@ -88,6 +88,8 @@
- #define ARCH_CAPS_PBRSB_NO (_AC(1, ULL) << 24)
- #define ARCH_CAPS_GDS_CTRL (_AC(1, ULL) << 25)
- #define ARCH_CAPS_GDS_NO (_AC(1, ULL) << 26)
-+#define ARCH_CAPS_RFDS_NO (_AC(1, ULL) << 27)
-+#define ARCH_CAPS_RFDS_CLEAR (_AC(1, ULL) << 28)
-
- #define MSR_FLUSH_CMD 0x0000010b
- #define FLUSH_CMD_L1D (_AC(1, ULL) << 0)
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index adb6bc74e8..1ee81e2dfe 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -24,6 +24,7 @@
-
- #include <asm/amd.h>
- #include <asm/hvm/svm/svm.h>
-+#include <asm/intel-family.h>
- #include <asm/microcode.h>
- #include <asm/msr.h>
- #include <asm/pv/domain.h>
-@@ -447,7 +448,7 @@ static void __init print_details(enum ind_thunk thunk)
- * Hardware read-only information, stating immunity to certain issues, or
- * suggestions of which mitigation to use.
- */
-- printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
-+ printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
- (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "",
- (caps & ARCH_CAPS_EIBRS) ? " EIBRS" : "",
- (caps & ARCH_CAPS_RSBA) ? " RSBA" : "",
-@@ -463,6 +464,7 @@ static void __init print_details(enum ind_thunk thunk)
- (caps & ARCH_CAPS_FB_CLEAR) ? " FB_CLEAR" : "",
- (caps & ARCH_CAPS_PBRSB_NO) ? " PBRSB_NO" : "",
- (caps & ARCH_CAPS_GDS_NO) ? " GDS_NO" : "",
-+ (caps & ARCH_CAPS_RFDS_NO) ? " RFDS_NO" : "",
- (e8b & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS)) ? " IBRS_ALWAYS" : "",
- (e8b & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS)) ? " STIBP_ALWAYS" : "",
- (e8b & cpufeat_mask(X86_FEATURE_IBRS_FAST)) ? " IBRS_FAST" : "",
-@@ -473,7 +475,7 @@ static void __init print_details(enum ind_thunk thunk)
- (e21a & cpufeat_mask(X86_FEATURE_SRSO_NO)) ? " SRSO_NO" : "");
-
- /* Hardware features which need driving to mitigate issues. */
-- printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
-+ printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
- (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ||
- (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBPB" : "",
- (e8b & cpufeat_mask(X86_FEATURE_IBRS)) ||
-@@ -491,6 +493,7 @@ static void __init print_details(enum ind_thunk thunk)
- (caps & ARCH_CAPS_TSX_CTRL) ? " TSX_CTRL" : "",
- (caps & ARCH_CAPS_FB_CLEAR_CTRL) ? " FB_CLEAR_CTRL" : "",
- (caps & ARCH_CAPS_GDS_CTRL) ? " GDS_CTRL" : "",
-+ (caps & ARCH_CAPS_RFDS_CLEAR) ? " RFDS_CLEAR" : "",
- (e21a & cpufeat_mask(X86_FEATURE_SBPB)) ? " SBPB" : "");
-
- /* Compiled-in support which pertains to mitigations. */
-@@ -1359,6 +1362,83 @@ static __init void mds_calculations(void)
- }
- }
-
-+/*
-+ * Register File Data Sampling affects Atom cores from the Goldmont to
-+ * Gracemont microarchitectures. The March 2024 microcode adds RFDS_NO to
-+ * some but not all unaffected parts, and RFDS_CLEAR to affected parts still
-+ * in support.
-+ *
-+ * Alder Lake and Raptor Lake client CPUs have a mix of P cores
-+ * (Golden/Raptor Cove, not vulnerable) and E cores (Gracemont,
-+ * vulnerable), and both enumerate RFDS_CLEAR.
-+ *
-+ * Both exist in a Xeon SKU, which has the E cores (Gracemont) disabled by
-+ * platform configuration, and enumerate RFDS_NO.
-+ *
-+ * With older parts, or with out-of-date microcode, synthesise RFDS_NO when
-+ * safe to do so.
-+ *
-+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html
-+ */
-+static void __init rfds_calculations(void)
-+{
-+ /* RFDS is only known to affect Intel Family 6 processors at this time. */
-+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-+ boot_cpu_data.x86 != 6 )
-+ return;
-+
-+ /*
-+ * If RFDS_NO or RFDS_CLEAR are visible, we've either got suitable
-+ * microcode, or an RFDS-aware hypervisor is levelling us in a pool.
-+ */
-+ if ( cpu_has_rfds_no || cpu_has_rfds_clear )
-+ return;
-+
-+ /* If we're virtualised, don't attempt to synthesise RFDS_NO. */
-+ if ( cpu_has_hypervisor )
-+ return;
-+
-+ /*
-+ * Not all CPUs are expected to get a microcode update enumerating one of
-+ * RFDS_{NO,CLEAR}, or we might have out-of-date microcode.
-+ */
-+ switch ( boot_cpu_data.x86_model )
-+ {
-+ case INTEL_FAM6_ALDERLAKE:
-+ case INTEL_FAM6_RAPTORLAKE:
-+ /*
-+ * Alder Lake and Raptor Lake might be a client SKU (with the
-+ * Gracemont cores active, and therefore vulnerable) or might be a
-+ * server SKU (with the Gracemont cores disabled, and therefore not
-+ * vulnerable).
-+ *
-+ * See if the CPU identifies as hybrid to distinguish the two cases.
-+ */
-+ if ( !cpu_has_hybrid )
-+ break;
-+ fallthrough;
-+ case INTEL_FAM6_ALDERLAKE_L:
-+ case INTEL_FAM6_RAPTORLAKE_P:
-+ case INTEL_FAM6_RAPTORLAKE_S:
-+
-+ case INTEL_FAM6_ATOM_GOLDMONT: /* Apollo Lake */
-+ case INTEL_FAM6_ATOM_GOLDMONT_D: /* Denverton */
-+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS: /* Gemini Lake */
-+ case INTEL_FAM6_ATOM_TREMONT_D: /* Snow Ridge / Parker Ridge */
-+ case INTEL_FAM6_ATOM_TREMONT: /* Elkhart Lake */
-+ case INTEL_FAM6_ATOM_TREMONT_L: /* Jasper Lake */
-+ case INTEL_FAM6_ATOM_GRACEMONT: /* Alder Lake N */
-+ return;
-+ }
-+
-+ /*
-+ * We appear to be on an unaffected CPU which didn't enumerate RFDS_NO,
-+ * perhaps because of it's age or because of out-of-date microcode.
-+ * Synthesise it.
-+ */
-+ setup_force_cpu_cap(X86_FEATURE_RFDS_NO);
-+}
-+
- static bool __init cpu_has_gds(void)
- {
- /*
-@@ -1872,6 +1952,7 @@ void __init init_speculation_mitigations(void)
- *
- * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html
- * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html
-+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html
- *
- * Relevant ucodes:
- *
-@@ -1901,8 +1982,12 @@ void __init init_speculation_mitigations(void)
- *
- * If FB_CLEAR is enumerated, L1D_FLUSH does not have the same scrubbing
- * side effects as VERW and cannot be used in its place.
-+ *
-+ * - March 2023, for RFDS. Enumerate RFDS_CLEAR to mean that VERW now
-+ * scrubs non-architectural entries from certain register files.
- */
- mds_calculations();
-+ rfds_calculations();
-
- /*
- * Parts which enumerate FB_CLEAR are those with now-updated microcode
-@@ -1934,15 +2019,19 @@ void __init init_speculation_mitigations(void)
- * MLPDS/MFBDS when SMT is enabled.
- */
- if ( opt_verw_pv == -1 )
-- opt_verw_pv = cpu_has_useful_md_clear;
-+ opt_verw_pv = cpu_has_useful_md_clear || cpu_has_rfds_clear;
-
- if ( opt_verw_hvm == -1 )
-- opt_verw_hvm = cpu_has_useful_md_clear;
-+ opt_verw_hvm = cpu_has_useful_md_clear || cpu_has_rfds_clear;
-
- /*
- * If SMT is active, and we're protecting against MDS or MMIO stale data,
- * we need to scrub before going idle as well as on return to guest.
- * Various pipeline resources are repartitioned amongst non-idle threads.
-+ *
-+ * We don't need to scrub on idle for RFDS. There are no affected cores
-+ * which support SMT, despite there being affected cores in hybrid systems
-+ * which have SMT elsewhere in the platform.
- */
- if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) ||
- opt_verw_mmio) && hw_smt_enabled )
-@@ -1956,7 +2045,8 @@ void __init init_speculation_mitigations(void)
- * It is only safe to use L1D_FLUSH in place of VERW when MD_CLEAR is the
- * only *_CLEAR we can see.
- */
-- if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear )
-+ if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear &&
-+ !cpu_has_rfds_clear )
- opt_verw_hvm = false;
-
- /*
-diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
-index aec1407613..113e6cadc1 100644
---- a/xen/include/public/arch-x86/cpufeatureset.h
-+++ b/xen/include/public/arch-x86/cpufeatureset.h
-@@ -264,6 +264,7 @@ XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*!A VERW clears microarchitectural buffe
- XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */
- XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */
- XEN_CPUFEATURE(SERIALIZE, 9*32+14) /*A SERIALIZE insn */
-+XEN_CPUFEATURE(HYBRID, 9*32+15) /* Heterogeneous platform */
- XEN_CPUFEATURE(TSXLDTRK, 9*32+16) /*a TSX load tracking suspend/resume insns */
- XEN_CPUFEATURE(CET_IBT, 9*32+20) /* CET - Indirect Branch Tracking */
- XEN_CPUFEATURE(AVX512_FP16, 9*32+23) /* AVX512 FP16 instructions */
-@@ -330,6 +331,8 @@ XEN_CPUFEATURE(OVRCLK_STATUS, 16*32+23) /* MSR_OVERCLOCKING_STATUS */
- XEN_CPUFEATURE(PBRSB_NO, 16*32+24) /*A No Post-Barrier RSB predictions */
- XEN_CPUFEATURE(GDS_CTRL, 16*32+25) /* MCU_OPT_CTRL.GDS_MIT_{DIS,LOCK} */
- XEN_CPUFEATURE(GDS_NO, 16*32+26) /*A No Gather Data Sampling */
-+XEN_CPUFEATURE(RFDS_NO, 16*32+27) /*A No Register File Data Sampling */
-+XEN_CPUFEATURE(RFDS_CLEAR, 16*32+28) /*!A Register File(s) cleared by VERW */
-
- /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */
-
---
-2.44.0
-
diff --git a/0046-tools-dombuilder-Correct-the-length-calculation-in-x.patch b/0046-tools-dombuilder-Correct-the-length-calculation-in-x.patch
new file mode 100644
index 0000000..b25f15d
--- /dev/null
+++ b/0046-tools-dombuilder-Correct-the-length-calculation-in-x.patch
@@ -0,0 +1,44 @@
+From 8e51c8f1d45fad242a315fa17ba3582c02e66840 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 4 Jul 2024 14:12:31 +0200
+Subject: [PATCH 46/56] tools/dombuilder: Correct the length calculation in
+ xc_dom_alloc_segment()
+
+xc_dom_alloc_segment() is passed a size in bytes, calculates a size in pages
+from it, then fills in the new segment information with a bytes value
+re-calculated from the number of pages.
+
+This causes the module information given to the guest (MB, or PVH) to have
+incorrect sizes; specifically, sizes rounded up to the next page.
+
+This in turn is problematic for Xen. When Xen finds a gzipped module, it
+peeks at the end metadata to judge the decompressed size, which is a -4
+backreference from the reported end of the module.
+
+Fill in seg->vend using the correct number of bytes.
+
+Fixes: ea7c8a3d0e82 ("libxc: reorganize domain builder guest memory allocator")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Anthony PERARD <anthony.perard@vates.tech>
+master commit: 4c3a618b0adaa0cd59e0fa0898bb60978b8b3a5f
+master date: 2024-07-02 10:50:18 +0100
+---
+ tools/libs/guest/xg_dom_core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/libs/guest/xg_dom_core.c b/tools/libs/guest/xg_dom_core.c
+index c4f4e7f3e2..f5521d528b 100644
+--- a/tools/libs/guest/xg_dom_core.c
++++ b/tools/libs/guest/xg_dom_core.c
+@@ -601,7 +601,7 @@ int xc_dom_alloc_segment(struct xc_dom_image *dom,
+ memset(ptr, 0, pages * page_size);
+
+ seg->vstart = start;
+- seg->vend = dom->virt_alloc_end;
++ seg->vend = start + size;
+
+ DOMPRINTF("%-20s: %-12s : 0x%" PRIx64 " -> 0x%" PRIx64
+ " (pfn 0x%" PRIpfn " + 0x%" PRIpfn " pages)",
+--
+2.45.2
+
diff --git a/0046-x86-paging-Delete-update_cr3-s-do_locking-parameter.patch b/0046-x86-paging-Delete-update_cr3-s-do_locking-parameter.patch
deleted file mode 100644
index ce397a1..0000000
--- a/0046-x86-paging-Delete-update_cr3-s-do_locking-parameter.patch
+++ /dev/null
@@ -1,161 +0,0 @@
-From bf70ce8b3449c49eb828d5b1f4934a49b00fef35 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Wed, 20 Sep 2023 20:06:53 +0100
-Subject: [PATCH 46/67] x86/paging: Delete update_cr3()'s do_locking parameter
-
-Nicola reports that the XSA-438 fix introduced new MISRA violations because of
-some incidental tidying it tried to do. The parameter is useless, so resolve
-the MISRA regression by removing it.
-
-hap_update_cr3() discards the parameter entirely, while sh_update_cr3() uses
-it to distinguish internal and external callers and therefore whether the
-paging lock should be taken.
-
-However, we have paging_lock_recursive() for this purpose, which also avoids
-the ability for the shadow internal callers to accidentally not hold the lock.
-
-Fixes: fb0ff49fe9f7 ("x86/shadow: defer releasing of PV's top-level shadow reference")
-Reported-by: Nicola Vetrini <nicola.vetrini@bugseng.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Release-acked-by: Henry Wang <Henry.Wang@arm.com>
-(cherry picked from commit e71157d1ac2a7fbf413130663cf0a93ff9fbcf7e)
----
- xen/arch/x86/include/asm/paging.h | 5 ++---
- xen/arch/x86/mm/hap/hap.c | 5 ++---
- xen/arch/x86/mm/shadow/common.c | 2 +-
- xen/arch/x86/mm/shadow/multi.c | 17 ++++++++---------
- xen/arch/x86/mm/shadow/none.c | 3 +--
- 5 files changed, 14 insertions(+), 18 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/paging.h b/xen/arch/x86/include/asm/paging.h
-index 94c590f31a..809ff35d9a 100644
---- a/xen/arch/x86/include/asm/paging.h
-+++ b/xen/arch/x86/include/asm/paging.h
-@@ -138,8 +138,7 @@ struct paging_mode {
- paddr_t ga, uint32_t *pfec,
- unsigned int *page_order);
- #endif
-- pagetable_t (*update_cr3 )(struct vcpu *v, bool do_locking,
-- bool noflush);
-+ pagetable_t (*update_cr3 )(struct vcpu *v, bool noflush);
- void (*update_paging_modes )(struct vcpu *v);
- bool (*flush_tlb )(const unsigned long *vcpu_bitmap);
-
-@@ -312,7 +311,7 @@ static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v,
- * as the value to load into the host CR3 to schedule this vcpu */
- static inline pagetable_t paging_update_cr3(struct vcpu *v, bool noflush)
- {
-- return paging_get_hostmode(v)->update_cr3(v, 1, noflush);
-+ return paging_get_hostmode(v)->update_cr3(v, noflush);
- }
-
- /* Update all the things that are derived from the guest's CR0/CR3/CR4.
-diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
-index 57a19c3d59..3ad39a7dd7 100644
---- a/xen/arch/x86/mm/hap/hap.c
-+++ b/xen/arch/x86/mm/hap/hap.c
-@@ -739,8 +739,7 @@ static bool cf_check hap_invlpg(struct vcpu *v, unsigned long linear)
- return 1;
- }
-
--static pagetable_t cf_check hap_update_cr3(
-- struct vcpu *v, bool do_locking, bool noflush)
-+static pagetable_t cf_check hap_update_cr3(struct vcpu *v, bool noflush)
- {
- v->arch.hvm.hw_cr[3] = v->arch.hvm.guest_cr[3];
- hvm_update_guest_cr3(v, noflush);
-@@ -826,7 +825,7 @@ static void cf_check hap_update_paging_modes(struct vcpu *v)
- }
-
- /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
-- hap_update_cr3(v, 0, false);
-+ hap_update_cr3(v, false);
-
- unlock:
- paging_unlock(d);
-diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
-index c0940f939e..18714dbd02 100644
---- a/xen/arch/x86/mm/shadow/common.c
-+++ b/xen/arch/x86/mm/shadow/common.c
-@@ -2579,7 +2579,7 @@ static void sh_update_paging_modes(struct vcpu *v)
- }
- #endif /* OOS */
-
-- v->arch.paging.mode->update_cr3(v, 0, false);
-+ v->arch.paging.mode->update_cr3(v, false);
- }
-
- void cf_check shadow_update_paging_modes(struct vcpu *v)
-diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
-index c92b354a78..e54a507b54 100644
---- a/xen/arch/x86/mm/shadow/multi.c
-+++ b/xen/arch/x86/mm/shadow/multi.c
-@@ -2506,7 +2506,7 @@ static int cf_check sh_page_fault(
- * In any case, in the PAE case, the ASSERT is not true; it can
- * happen because of actions the guest is taking. */
- #if GUEST_PAGING_LEVELS == 3
-- v->arch.paging.mode->update_cr3(v, 0, false);
-+ v->arch.paging.mode->update_cr3(v, false);
- #else
- ASSERT(d->is_shutting_down);
- #endif
-@@ -3224,17 +3224,13 @@ static void cf_check sh_detach_old_tables(struct vcpu *v)
- }
- }
-
--static pagetable_t cf_check sh_update_cr3(struct vcpu *v, bool do_locking,
-- bool noflush)
-+static pagetable_t cf_check sh_update_cr3(struct vcpu *v, bool noflush)
- /* Updates vcpu->arch.cr3 after the guest has changed CR3.
- * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
- * if appropriate).
- * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works;
- * this function will call hvm_update_guest_cr(v, 3) to tell them where the
- * shadow tables are.
-- * If do_locking != 0, assume we are being called from outside the
-- * shadow code, and must take and release the paging lock; otherwise
-- * that is the caller's responsibility.
- */
- {
- struct domain *d = v->domain;
-@@ -3252,7 +3248,11 @@ static pagetable_t cf_check sh_update_cr3(struct vcpu *v, bool do_locking,
- return old_entry;
- }
-
-- if ( do_locking ) paging_lock(v->domain);
-+ /*
-+ * This is used externally (with the paging lock not taken) and internally
-+ * by the shadow code (with the lock already taken).
-+ */
-+ paging_lock_recursive(v->domain);
-
- #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
- /* Need to resync all the shadow entries on a TLB flush. Resync
-@@ -3480,8 +3480,7 @@ static pagetable_t cf_check sh_update_cr3(struct vcpu *v, bool do_locking,
- shadow_sync_other_vcpus(v);
- #endif
-
-- /* Release the lock, if we took it (otherwise it's the caller's problem) */
-- if ( do_locking ) paging_unlock(v->domain);
-+ paging_unlock(v->domain);
-
- return old_entry;
- }
-diff --git a/xen/arch/x86/mm/shadow/none.c b/xen/arch/x86/mm/shadow/none.c
-index 743c0ffb85..7e4e386cd0 100644
---- a/xen/arch/x86/mm/shadow/none.c
-+++ b/xen/arch/x86/mm/shadow/none.c
-@@ -52,8 +52,7 @@ static unsigned long cf_check _gva_to_gfn(
- }
- #endif
-
--static pagetable_t cf_check _update_cr3(struct vcpu *v, bool do_locking,
-- bool noflush)
-+static pagetable_t cf_check _update_cr3(struct vcpu *v, bool noflush)
- {
- ASSERT_UNREACHABLE();
- return pagetable_null();
---
-2.44.0
-
diff --git a/0047-tools-libxs-Fix-CLOEXEC-handling-in-get_dev.patch b/0047-tools-libxs-Fix-CLOEXEC-handling-in-get_dev.patch
new file mode 100644
index 0000000..aabae58
--- /dev/null
+++ b/0047-tools-libxs-Fix-CLOEXEC-handling-in-get_dev.patch
@@ -0,0 +1,95 @@
+From d1b3bbb46402af77089906a97c413c14ed1740d2 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 4 Jul 2024 14:13:10 +0200
+Subject: [PATCH 47/56] tools/libxs: Fix CLOEXEC handling in get_dev()
+
+Move the O_CLOEXEC compatibility outside of an #ifdef USE_PTHREAD block.
+
+Introduce set_cloexec() to wrap fcntl() setting FD_CLOEXEC. It will be reused
+for other CLOEXEC fixes too.
+
+Use set_cloexec() when O_CLOEXEC isn't available as a best-effort fallback.
+
+Fixes: f4f2f3402b2f ("tools/libxs: Open /dev/xen/xenbus fds as O_CLOEXEC")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Acked-by: Anthony PERARD <anthony.perard@vates.tech>
+master commit: bf7c1464706adfa903f1e7d59383d042c3a88e39
+master date: 2024-07-02 10:51:06 +0100
+---
+ tools/libs/store/xs.c | 38 ++++++++++++++++++++++++++++++++------
+ 1 file changed, 32 insertions(+), 6 deletions(-)
+
+diff --git a/tools/libs/store/xs.c b/tools/libs/store/xs.c
+index 1498515073..037e79d98b 100644
+--- a/tools/libs/store/xs.c
++++ b/tools/libs/store/xs.c
+@@ -40,6 +40,10 @@
+ #include <xentoolcore_internal.h>
+ #include <xen_list.h>
+
++#ifndef O_CLOEXEC
++#define O_CLOEXEC 0
++#endif
++
+ struct xs_stored_msg {
+ XEN_TAILQ_ENTRY(struct xs_stored_msg) list;
+ struct xsd_sockmsg hdr;
+@@ -54,10 +58,6 @@ struct xs_stored_msg {
+ #include <dlfcn.h>
+ #endif
+
+-#ifndef O_CLOEXEC
+-#define O_CLOEXEC 0
+-#endif
+-
+ struct xs_handle {
+ /* Communications channel to xenstore daemon. */
+ int fd;
+@@ -176,6 +176,16 @@ static bool setnonblock(int fd, int nonblock) {
+ return true;
+ }
+
++static bool set_cloexec(int fd)
++{
++ int flags = fcntl(fd, F_GETFL);
++
++ if (flags < 0)
++ return false;
++
++ return fcntl(fd, flags | FD_CLOEXEC) >= 0;
++}
++
+ int xs_fileno(struct xs_handle *h)
+ {
+ char c = 0;
+@@ -230,8 +240,24 @@ error:
+
+ static int get_dev(const char *connect_to)
+ {
+- /* We cannot open read-only because requests are writes */
+- return open(connect_to, O_RDWR | O_CLOEXEC);
++ int fd, saved_errno;
++
++ fd = open(connect_to, O_RDWR | O_CLOEXEC);
++ if (fd < 0)
++ return -1;
++
++ /* Compat for non-O_CLOEXEC environments. Racy. */
++ if (!O_CLOEXEC && !set_cloexec(fd))
++ goto error;
++
++ return fd;
++
++error:
++ saved_errno = errno;
++ close(fd);
++ errno = saved_errno;
++
++ return -1;
+ }
+
+ static int all_restrict_cb(Xentoolcore__Active_Handle *ah, domid_t domid) {
+--
+2.45.2
+
diff --git a/0047-xen-Swap-order-of-actions-in-the-FREE-macros.patch b/0047-xen-Swap-order-of-actions-in-the-FREE-macros.patch
deleted file mode 100644
index 3e58906..0000000
--- a/0047-xen-Swap-order-of-actions-in-the-FREE-macros.patch
+++ /dev/null
@@ -1,58 +0,0 @@
-From 0a53565f1886201cc8a8afe9b2619ee297c20955 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 2 Feb 2024 00:39:42 +0000
-Subject: [PATCH 47/67] xen: Swap order of actions in the FREE*() macros
-
-Wherever possible, it is a good idea to NULL out the visible reference to an
-object prior to freeing it. The FREE*() macros already collect together both
-parts, making it easy to adjust.
-
-This has a marginal code generation improvement, as some of the calls to the
-free() function can be tailcall optimised.
-
-No functional change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit c4f427ec879e7c0df6d44d02561e8bee838a293e)
----
- xen/include/xen/mm.h | 3 ++-
- xen/include/xen/xmalloc.h | 7 ++++---
- 2 files changed, 6 insertions(+), 4 deletions(-)
-
-diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
-index 3dc61bcc3c..211685a5d2 100644
---- a/xen/include/xen/mm.h
-+++ b/xen/include/xen/mm.h
-@@ -80,8 +80,9 @@ bool scrub_free_pages(void);
-
- /* Free an allocation, and zero the pointer to it. */
- #define FREE_XENHEAP_PAGES(p, o) do { \
-- free_xenheap_pages(p, o); \
-+ void *_ptr_ = (p); \
- (p) = NULL; \
-+ free_xenheap_pages(_ptr_, o); \
- } while ( false )
- #define FREE_XENHEAP_PAGE(p) FREE_XENHEAP_PAGES(p, 0)
-
-diff --git a/xen/include/xen/xmalloc.h b/xen/include/xen/xmalloc.h
-index 16979a117c..d857298011 100644
---- a/xen/include/xen/xmalloc.h
-+++ b/xen/include/xen/xmalloc.h
-@@ -66,9 +66,10 @@
- extern void xfree(void *);
-
- /* Free an allocation, and zero the pointer to it. */
--#define XFREE(p) do { \
-- xfree(p); \
-- (p) = NULL; \
-+#define XFREE(p) do { \
-+ void *_ptr_ = (p); \
-+ (p) = NULL; \
-+ xfree(_ptr_); \
- } while ( false )
-
- /* Underlying functions */
---
-2.44.0
-
diff --git a/0048-tools-libxs-Fix-CLOEXEC-handling-in-get_socket.patch b/0048-tools-libxs-Fix-CLOEXEC-handling-in-get_socket.patch
new file mode 100644
index 0000000..e01a6b4
--- /dev/null
+++ b/0048-tools-libxs-Fix-CLOEXEC-handling-in-get_socket.patch
@@ -0,0 +1,60 @@
+From d689bb4d2cd3ccdb0067b0ca953cccbc5ab375ae Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 4 Jul 2024 14:13:18 +0200
+Subject: [PATCH 48/56] tools/libxs: Fix CLOEXEC handling in get_socket()
+
+get_socket() opens a socket, then uses fcntl() to set CLOEXEC. This is racy
+with exec().
+
+Open the socket with SOCK_CLOEXEC. Use the same compatibility strategy as
+O_CLOEXEC on ancient versions of Linux.
+
+Reported-by: Frediano Ziglio <frediano.ziglio@cloud.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Acked-by: Anthony PERARD <anthony.perard@vates.tech>
+master commit: 1957dd6aff931877fc22699d8f2d4be8728014ba
+master date: 2024-07-02 10:51:11 +0100
+---
+ tools/libs/store/xs.c | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+diff --git a/tools/libs/store/xs.c b/tools/libs/store/xs.c
+index 037e79d98b..11a766c508 100644
+--- a/tools/libs/store/xs.c
++++ b/tools/libs/store/xs.c
+@@ -44,6 +44,10 @@
+ #define O_CLOEXEC 0
+ #endif
+
++#ifndef SOCK_CLOEXEC
++#define SOCK_CLOEXEC 0
++#endif
++
+ struct xs_stored_msg {
+ XEN_TAILQ_ENTRY(struct xs_stored_msg) list;
+ struct xsd_sockmsg hdr;
+@@ -207,16 +211,14 @@ int xs_fileno(struct xs_handle *h)
+ static int get_socket(const char *connect_to)
+ {
+ struct sockaddr_un addr;
+- int sock, saved_errno, flags;
++ int sock, saved_errno;
+
+- sock = socket(PF_UNIX, SOCK_STREAM, 0);
++ sock = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ if (sock < 0)
+ return -1;
+
+- if ((flags = fcntl(sock, F_GETFD)) < 0)
+- goto error;
+- flags |= FD_CLOEXEC;
+- if (fcntl(sock, F_SETFD, flags) < 0)
++ /* Compat for non-SOCK_CLOEXEC environments. Racy. */
++ if (!SOCK_CLOEXEC && !set_cloexec(sock))
+ goto error;
+
+ addr.sun_family = AF_UNIX;
+--
+2.45.2
+
diff --git a/0048-x86-spinlock-introduce-support-for-blocking-speculat.patch b/0048-x86-spinlock-introduce-support-for-blocking-speculat.patch
deleted file mode 100644
index ecf0830..0000000
--- a/0048-x86-spinlock-introduce-support-for-blocking-speculat.patch
+++ /dev/null
@@ -1,331 +0,0 @@
-From 9d2f136328aab5537b7180a1b23e171893ebe455 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 13 Feb 2024 13:08:05 +0100
-Subject: [PATCH 48/67] x86/spinlock: introduce support for blocking
- speculation into critical regions
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Introduce a new Kconfig option to block speculation into lock protected
-critical regions. The Kconfig option is enabled by default, but the mitigation
-won't be engaged unless it's explicitly enabled in the command line using
-`spec-ctrl=lock-harden`.
-
-Convert the spinlock acquire macros into always-inline functions, and introduce
-a speculation barrier after the lock has been taken. Note the speculation
-barrier is not placed inside the implementation of the spin lock functions, as
-to prevent speculation from falling through the call to the lock functions
-resulting in the barrier also being skipped.
-
-trylock variants are protected using a construct akin to the existing
-evaluate_nospec().
-
-This patch only implements the speculation barrier for x86.
-
-Note spin locks are the only locking primitive taken care in this change,
-further locking primitives will be adjusted by separate changes.
-
-This is part of XSA-453 / CVE-2024-2193
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 7ef0084418e188d05f338c3e028fbbe8b6924afa)
----
- docs/misc/xen-command-line.pandoc | 7 ++++-
- xen/arch/x86/include/asm/cpufeatures.h | 2 +-
- xen/arch/x86/include/asm/nospec.h | 26 ++++++++++++++++++
- xen/arch/x86/spec_ctrl.c | 26 +++++++++++++++---
- xen/common/Kconfig | 17 ++++++++++++
- xen/include/xen/nospec.h | 15 +++++++++++
- xen/include/xen/spinlock.h | 37 +++++++++++++++++++++-----
- 7 files changed, 119 insertions(+), 11 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index d909ec94fe..e1d56407dd 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -2327,7 +2327,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
- > {msr-sc,rsb,verw,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
- > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd,
- > eager-fpu,l1d-flush,branch-harden,srb-lock,
--> unpriv-mmio,gds-mit,div-scrub}=<bool> ]`
-+> unpriv-mmio,gds-mit,div-scrub,lock-harden}=<bool> ]`
-
- Controls for speculative execution sidechannel mitigations. By default, Xen
- will pick the most appropriate mitigations based on compiled in support,
-@@ -2454,6 +2454,11 @@ On all hardware, the `div-scrub=` option can be used to force or prevent Xen
- from mitigating the DIV-leakage vulnerability. By default, Xen will mitigate
- DIV-leakage on hardware believed to be vulnerable.
-
-+If Xen is compiled with `CONFIG_SPECULATIVE_HARDEN_LOCK`, the `lock-harden=`
-+boolean can be used to force or prevent Xen from using speculation barriers to
-+protect lock critical regions. This mitigation won't be engaged by default,
-+and needs to be explicitly enabled on the command line.
-+
- ### sync_console
- > `= <boolean>`
-
-diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h
-index c3aad21c3b..7e8221fd85 100644
---- a/xen/arch/x86/include/asm/cpufeatures.h
-+++ b/xen/arch/x86/include/asm/cpufeatures.h
-@@ -24,7 +24,7 @@ XEN_CPUFEATURE(APERFMPERF, X86_SYNTH( 8)) /* APERFMPERF */
- XEN_CPUFEATURE(MFENCE_RDTSC, X86_SYNTH( 9)) /* MFENCE synchronizes RDTSC */
- XEN_CPUFEATURE(XEN_SMEP, X86_SYNTH(10)) /* SMEP gets used by Xen itself */
- XEN_CPUFEATURE(XEN_SMAP, X86_SYNTH(11)) /* SMAP gets used by Xen itself */
--/* Bit 12 unused. */
-+XEN_CPUFEATURE(SC_NO_LOCK_HARDEN, X86_SYNTH(12)) /* (Disable) Lock critical region hardening */
- XEN_CPUFEATURE(IND_THUNK_LFENCE, X86_SYNTH(13)) /* Use IND_THUNK_LFENCE */
- XEN_CPUFEATURE(IND_THUNK_JMP, X86_SYNTH(14)) /* Use IND_THUNK_JMP */
- XEN_CPUFEATURE(SC_NO_BRANCH_HARDEN, X86_SYNTH(15)) /* (Disable) Conditional branch hardening */
-diff --git a/xen/arch/x86/include/asm/nospec.h b/xen/arch/x86/include/asm/nospec.h
-index 7150e76b87..0725839e19 100644
---- a/xen/arch/x86/include/asm/nospec.h
-+++ b/xen/arch/x86/include/asm/nospec.h
-@@ -38,6 +38,32 @@ static always_inline void block_speculation(void)
- barrier_nospec_true();
- }
-
-+static always_inline void arch_block_lock_speculation(void)
-+{
-+ alternative("lfence", "", X86_FEATURE_SC_NO_LOCK_HARDEN);
-+}
-+
-+/* Allow to insert a read memory barrier into conditionals */
-+static always_inline bool barrier_lock_true(void)
-+{
-+ alternative("lfence #nospec-true", "", X86_FEATURE_SC_NO_LOCK_HARDEN);
-+ return true;
-+}
-+
-+static always_inline bool barrier_lock_false(void)
-+{
-+ alternative("lfence #nospec-false", "", X86_FEATURE_SC_NO_LOCK_HARDEN);
-+ return false;
-+}
-+
-+static always_inline bool arch_lock_evaluate_nospec(bool condition)
-+{
-+ if ( condition )
-+ return barrier_lock_true();
-+ else
-+ return barrier_lock_false();
-+}
-+
- #endif /* _ASM_X86_NOSPEC_H */
-
- /*
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 1ee81e2dfe..ac21af2c5c 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -65,6 +65,7 @@ int8_t __read_mostly opt_eager_fpu = -1;
- int8_t __read_mostly opt_l1d_flush = -1;
- static bool __initdata opt_branch_harden =
- IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH);
-+static bool __initdata opt_lock_harden;
-
- bool __initdata bsp_delay_spec_ctrl;
- uint8_t __read_mostly default_xen_spec_ctrl;
-@@ -133,6 +134,7 @@ static int __init cf_check parse_spec_ctrl(const char *s)
- opt_ssbd = false;
- opt_l1d_flush = 0;
- opt_branch_harden = false;
-+ opt_lock_harden = false;
- opt_srb_lock = 0;
- opt_unpriv_mmio = false;
- opt_gds_mit = 0;
-@@ -298,6 +300,16 @@ static int __init cf_check parse_spec_ctrl(const char *s)
- rc = -EINVAL;
- }
- }
-+ else if ( (val = parse_boolean("lock-harden", s, ss)) >= 0 )
-+ {
-+ if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_LOCK) )
-+ opt_lock_harden = val;
-+ else
-+ {
-+ no_config_param("SPECULATIVE_HARDEN_LOCK", "spec-ctrl", s, ss);
-+ rc = -EINVAL;
-+ }
-+ }
- else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 )
- opt_srb_lock = val;
- else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 )
-@@ -500,7 +512,8 @@ static void __init print_details(enum ind_thunk thunk)
- if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) ||
- IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_ARRAY) ||
- IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) ||
-- IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) )
-+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) ||
-+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_LOCK) )
- printk(" Compiled-in support:"
- #ifdef CONFIG_INDIRECT_THUNK
- " INDIRECT_THUNK"
-@@ -516,11 +529,14 @@ static void __init print_details(enum ind_thunk thunk)
- #endif
- #ifdef CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS
- " HARDEN_GUEST_ACCESS"
-+#endif
-+#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK
-+ " HARDEN_LOCK"
- #endif
- "\n");
-
- /* Settings for Xen's protection, irrespective of guests. */
-- printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n",
-+ printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s%s\n",
- thunk != THUNK_NONE ? "BTI-Thunk: " : "",
- thunk == THUNK_NONE ? "" :
- thunk == THUNK_RETPOLINE ? "RETPOLINE, " :
-@@ -547,7 +563,8 @@ static void __init print_details(enum ind_thunk thunk)
- opt_verw_pv || opt_verw_hvm ||
- opt_verw_mmio ? " VERW" : "",
- opt_div_scrub ? " DIV" : "",
-- opt_branch_harden ? " BRANCH_HARDEN" : "");
-+ opt_branch_harden ? " BRANCH_HARDEN" : "",
-+ opt_lock_harden ? " LOCK_HARDEN" : "");
-
- /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */
- if ( cpu_has_bug_l1tf || opt_pv_l1tf_hwdom || opt_pv_l1tf_domu )
-@@ -1930,6 +1947,9 @@ void __init init_speculation_mitigations(void)
- if ( !opt_branch_harden )
- setup_force_cpu_cap(X86_FEATURE_SC_NO_BRANCH_HARDEN);
-
-+ if ( !opt_lock_harden )
-+ setup_force_cpu_cap(X86_FEATURE_SC_NO_LOCK_HARDEN);
-+
- /*
- * We do not disable HT by default on affected hardware.
- *
-diff --git a/xen/common/Kconfig b/xen/common/Kconfig
-index e7794cb7f6..cd73851538 100644
---- a/xen/common/Kconfig
-+++ b/xen/common/Kconfig
-@@ -173,6 +173,23 @@ config SPECULATIVE_HARDEN_GUEST_ACCESS
-
- If unsure, say Y.
-
-+config SPECULATIVE_HARDEN_LOCK
-+ bool "Speculative lock context hardening"
-+ default y
-+ depends on X86
-+ help
-+ Contemporary processors may use speculative execution as a
-+ performance optimisation, but this can potentially be abused by an
-+ attacker to leak data via speculative sidechannels.
-+
-+ One source of data leakage is via speculative accesses to lock
-+ critical regions.
-+
-+ This option is disabled by default at run time, and needs to be
-+ enabled on the command line.
-+
-+ If unsure, say Y.
-+
- endmenu
-
- config DIT_DEFAULT
-diff --git a/xen/include/xen/nospec.h b/xen/include/xen/nospec.h
-index 76255bc46e..4552846403 100644
---- a/xen/include/xen/nospec.h
-+++ b/xen/include/xen/nospec.h
-@@ -70,6 +70,21 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
- #define array_access_nospec(array, index) \
- (array)[array_index_nospec(index, ARRAY_SIZE(array))]
-
-+static always_inline void block_lock_speculation(void)
-+{
-+#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK
-+ arch_block_lock_speculation();
-+#endif
-+}
-+
-+static always_inline bool lock_evaluate_nospec(bool condition)
-+{
-+#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK
-+ return arch_lock_evaluate_nospec(condition);
-+#endif
-+ return condition;
-+}
-+
- #endif /* XEN_NOSPEC_H */
-
- /*
-diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
-index 961891bea4..daf48fdea7 100644
---- a/xen/include/xen/spinlock.h
-+++ b/xen/include/xen/spinlock.h
-@@ -1,6 +1,7 @@
- #ifndef __SPINLOCK_H__
- #define __SPINLOCK_H__
-
-+#include <xen/nospec.h>
- #include <xen/time.h>
- #include <asm/system.h>
- #include <asm/spinlock.h>
-@@ -189,13 +190,30 @@ int _spin_trylock_recursive(spinlock_t *lock);
- void _spin_lock_recursive(spinlock_t *lock);
- void _spin_unlock_recursive(spinlock_t *lock);
-
--#define spin_lock(l) _spin_lock(l)
--#define spin_lock_cb(l, c, d) _spin_lock_cb(l, c, d)
--#define spin_lock_irq(l) _spin_lock_irq(l)
-+static always_inline void spin_lock(spinlock_t *l)
-+{
-+ _spin_lock(l);
-+ block_lock_speculation();
-+}
-+
-+static always_inline void spin_lock_cb(spinlock_t *l, void (*c)(void *data),
-+ void *d)
-+{
-+ _spin_lock_cb(l, c, d);
-+ block_lock_speculation();
-+}
-+
-+static always_inline void spin_lock_irq(spinlock_t *l)
-+{
-+ _spin_lock_irq(l);
-+ block_lock_speculation();
-+}
-+
- #define spin_lock_irqsave(l, f) \
- ({ \
- BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long)); \
- ((f) = _spin_lock_irqsave(l)); \
-+ block_lock_speculation(); \
- })
-
- #define spin_unlock(l) _spin_unlock(l)
-@@ -203,7 +221,7 @@ void _spin_unlock_recursive(spinlock_t *lock);
- #define spin_unlock_irqrestore(l, f) _spin_unlock_irqrestore(l, f)
-
- #define spin_is_locked(l) _spin_is_locked(l)
--#define spin_trylock(l) _spin_trylock(l)
-+#define spin_trylock(l) lock_evaluate_nospec(_spin_trylock(l))
-
- #define spin_trylock_irqsave(lock, flags) \
- ({ \
-@@ -224,8 +242,15 @@ void _spin_unlock_recursive(spinlock_t *lock);
- * are any critical regions that cannot form part of such a set, they can use
- * standard spin_[un]lock().
- */
--#define spin_trylock_recursive(l) _spin_trylock_recursive(l)
--#define spin_lock_recursive(l) _spin_lock_recursive(l)
-+#define spin_trylock_recursive(l) \
-+ lock_evaluate_nospec(_spin_trylock_recursive(l))
-+
-+static always_inline void spin_lock_recursive(spinlock_t *l)
-+{
-+ _spin_lock_recursive(l);
-+ block_lock_speculation();
-+}
-+
- #define spin_unlock_recursive(l) _spin_unlock_recursive(l)
-
- #endif /* __SPINLOCK_H__ */
---
-2.44.0
-
diff --git a/0049-rwlock-introduce-support-for-blocking-speculation-in.patch b/0049-rwlock-introduce-support-for-blocking-speculation-in.patch
deleted file mode 100644
index 593b588..0000000
--- a/0049-rwlock-introduce-support-for-blocking-speculation-in.patch
+++ /dev/null
@@ -1,125 +0,0 @@
-From 7454dad6ee15f9fa6d84fc285d366b86f3d47494 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 13 Feb 2024 16:08:52 +0100
-Subject: [PATCH 49/67] rwlock: introduce support for blocking speculation into
- critical regions
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Introduce inline wrappers as required and add direct calls to
-block_lock_speculation() in order to prevent speculation into the rwlock
-protected critical regions.
-
-Note the rwlock primitives are adjusted to use the non speculation safe variants
-of the spinlock handlers, as a speculation barrier is added in the rwlock
-calling wrappers.
-
-trylock variants are protected by using lock_evaluate_nospec().
-
-This is part of XSA-453 / CVE-2024-2193
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit a1fb15f61692b1fa9945fc51f55471ace49cdd59)
----
- xen/common/rwlock.c | 14 +++++++++++---
- xen/include/xen/rwlock.h | 34 ++++++++++++++++++++++++++++------
- 2 files changed, 39 insertions(+), 9 deletions(-)
-
-diff --git a/xen/common/rwlock.c b/xen/common/rwlock.c
-index aa15529bbe..cda06b9d6e 100644
---- a/xen/common/rwlock.c
-+++ b/xen/common/rwlock.c
-@@ -34,8 +34,11 @@ void queue_read_lock_slowpath(rwlock_t *lock)
-
- /*
- * Put the reader into the wait queue.
-+ *
-+ * Use the speculation unsafe helper, as it's the caller responsibility to
-+ * issue a speculation barrier if required.
- */
-- spin_lock(&lock->lock);
-+ _spin_lock(&lock->lock);
-
- /*
- * At the head of the wait queue now, wait until the writer state
-@@ -64,8 +67,13 @@ void queue_write_lock_slowpath(rwlock_t *lock)
- {
- u32 cnts;
-
-- /* Put the writer into the wait queue. */
-- spin_lock(&lock->lock);
-+ /*
-+ * Put the writer into the wait queue.
-+ *
-+ * Use the speculation unsafe helper, as it's the caller responsibility to
-+ * issue a speculation barrier if required.
-+ */
-+ _spin_lock(&lock->lock);
-
- /* Try to acquire the lock directly if no reader is present. */
- if ( !atomic_read(&lock->cnts) &&
-diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h
-index 0cc9167715..fd0458be94 100644
---- a/xen/include/xen/rwlock.h
-+++ b/xen/include/xen/rwlock.h
-@@ -247,27 +247,49 @@ static inline int _rw_is_write_locked(rwlock_t *lock)
- return (atomic_read(&lock->cnts) & _QW_WMASK) == _QW_LOCKED;
- }
-
--#define read_lock(l) _read_lock(l)
--#define read_lock_irq(l) _read_lock_irq(l)
-+static always_inline void read_lock(rwlock_t *l)
-+{
-+ _read_lock(l);
-+ block_lock_speculation();
-+}
-+
-+static always_inline void read_lock_irq(rwlock_t *l)
-+{
-+ _read_lock_irq(l);
-+ block_lock_speculation();
-+}
-+
- #define read_lock_irqsave(l, f) \
- ({ \
- BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long)); \
- ((f) = _read_lock_irqsave(l)); \
-+ block_lock_speculation(); \
- })
-
- #define read_unlock(l) _read_unlock(l)
- #define read_unlock_irq(l) _read_unlock_irq(l)
- #define read_unlock_irqrestore(l, f) _read_unlock_irqrestore(l, f)
--#define read_trylock(l) _read_trylock(l)
-+#define read_trylock(l) lock_evaluate_nospec(_read_trylock(l))
-+
-+static always_inline void write_lock(rwlock_t *l)
-+{
-+ _write_lock(l);
-+ block_lock_speculation();
-+}
-+
-+static always_inline void write_lock_irq(rwlock_t *l)
-+{
-+ _write_lock_irq(l);
-+ block_lock_speculation();
-+}
-
--#define write_lock(l) _write_lock(l)
--#define write_lock_irq(l) _write_lock_irq(l)
- #define write_lock_irqsave(l, f) \
- ({ \
- BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long)); \
- ((f) = _write_lock_irqsave(l)); \
-+ block_lock_speculation(); \
- })
--#define write_trylock(l) _write_trylock(l)
-+#define write_trylock(l) lock_evaluate_nospec(_write_trylock(l))
-
- #define write_unlock(l) _write_unlock(l)
- #define write_unlock_irq(l) _write_unlock_irq(l)
---
-2.44.0
-
diff --git a/0049-tools-libxs-Fix-CLOEXEC-handling-in-xs_fileno.patch b/0049-tools-libxs-Fix-CLOEXEC-handling-in-xs_fileno.patch
new file mode 100644
index 0000000..564cece
--- /dev/null
+++ b/0049-tools-libxs-Fix-CLOEXEC-handling-in-xs_fileno.patch
@@ -0,0 +1,109 @@
+From 26b8ff1861a870e01456b31bf999f25df5538ebf Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 4 Jul 2024 14:13:30 +0200
+Subject: [PATCH 49/56] tools/libxs: Fix CLOEXEC handling in xs_fileno()
+
+xs_fileno() opens a pipe on first use to communicate between the watch thread
+and the main thread. Nothing ever sets CLOEXEC on the file descriptors.
+
+Check for the availability of the pipe2() function with configure. Despite
+starting life as Linux-only, FreeBSD and NetBSD have gained it.
+
+When pipe2() isn't available, try our best with pipe() and set_cloexec().
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Acked-by: Anthony PERARD <anthony.perard@vates.tech>
+master commit: a2ff677852f0ce05fa335e8e5682bf2ae0c916ee
+master date: 2024-07-02 10:52:59 +0100
+---
+ tools/config.h.in | 3 +++
+ tools/configure | 12 ++++++++++++
+ tools/configure.ac | 2 ++
+ tools/libs/store/xs.c | 16 +++++++++++++++-
+ 4 files changed, 32 insertions(+), 1 deletion(-)
+
+diff --git a/tools/config.h.in b/tools/config.h.in
+index 0bb2fe08a1..50ad60fcb0 100644
+--- a/tools/config.h.in
++++ b/tools/config.h.in
+@@ -39,6 +39,9 @@
+ /* Define to 1 if you have the <memory.h> header file. */
+ #undef HAVE_MEMORY_H
+
++/* Define to 1 if you have the `pipe2' function. */
++#undef HAVE_PIPE2
++
+ /* pygrub enabled */
+ #undef HAVE_PYGRUB
+
+diff --git a/tools/configure b/tools/configure
+index 7bb935d23b..e35112b5c5 100755
+--- a/tools/configure
++++ b/tools/configure
+@@ -9751,6 +9751,18 @@ if test "$ax_found" = "0"; then :
+ fi
+
+
++for ac_func in pipe2
++do :
++ ac_fn_c_check_func "$LINENO" "pipe2" "ac_cv_func_pipe2"
++if test "x$ac_cv_func_pipe2" = xyes; then :
++ cat >>confdefs.h <<_ACEOF
++#define HAVE_PIPE2 1
++_ACEOF
++
++fi
++done
++
++
+ cat >confcache <<\_ACEOF
+ # This file is a shell script that caches the results of configure
+ # tests run on this system so they can be shared between configure
+diff --git a/tools/configure.ac b/tools/configure.ac
+index 618ef8c63f..53ac20af1e 100644
+--- a/tools/configure.ac
++++ b/tools/configure.ac
+@@ -543,4 +543,6 @@ AS_IF([test "x$pvshim" = "xy"], [
+
+ AX_FIND_HEADER([INCLUDE_ENDIAN_H], [endian.h sys/endian.h])
+
++AC_CHECK_FUNCS([pipe2])
++
+ AC_OUTPUT()
+diff --git a/tools/libs/store/xs.c b/tools/libs/store/xs.c
+index 11a766c508..c8845b69e2 100644
+--- a/tools/libs/store/xs.c
++++ b/tools/libs/store/xs.c
+@@ -190,13 +190,27 @@ static bool set_cloexec(int fd)
+ return fcntl(fd, flags | FD_CLOEXEC) >= 0;
+ }
+
++static int pipe_cloexec(int fds[2])
++{
++#if HAVE_PIPE2
++ return pipe2(fds, O_CLOEXEC);
++#else
++ if (pipe(fds) < 0)
++ return -1;
++ /* Best effort to set CLOEXEC. Racy. */
++ set_cloexec(fds[0]);
++ set_cloexec(fds[1]);
++ return 0;
++#endif
++}
++
+ int xs_fileno(struct xs_handle *h)
+ {
+ char c = 0;
+
+ mutex_lock(&h->watch_mutex);
+
+- if ((h->watch_pipe[0] == -1) && (pipe(h->watch_pipe) != -1)) {
++ if ((h->watch_pipe[0] == -1) && (pipe_cloexec(h->watch_pipe) != -1)) {
+ /* Kick things off if the watch list is already non-empty. */
+ if (!XEN_TAILQ_EMPTY(&h->watch_list))
+ while (write(h->watch_pipe[1], &c, 1) != 1)
+--
+2.45.2
+
diff --git a/0050-cmdline-document-and-enforce-extra_guest_irqs-upper-.patch b/0050-cmdline-document-and-enforce-extra_guest_irqs-upper-.patch
new file mode 100644
index 0000000..f7f61e8
--- /dev/null
+++ b/0050-cmdline-document-and-enforce-extra_guest_irqs-upper-.patch
@@ -0,0 +1,156 @@
+From 30c695ddaf067cbe7a98037474e7910109238807 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Thu, 4 Jul 2024 14:14:16 +0200
+Subject: [PATCH 50/56] cmdline: document and enforce "extra_guest_irqs" upper
+ bounds
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+PHYSDEVOP_pirq_eoi_gmfn_v<N> accepting just a single GFN implies that no
+more than 32k pIRQ-s can be used by a domain on x86. Document this upper
+bound.
+
+To also enforce the limit, (ab)use both arch_hwdom_irqs() (changing its
+parameter type) and setup_system_domains(). This is primarily to avoid
+exposing the two static variables or introducing yet further arch hooks.
+
+While touching arch_hwdom_irqs() also mark it hwdom-init.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+
+amend 'cmdline: document and enforce "extra_guest_irqs" upper bounds'
+
+Address late review comments for what is now commit 17f6d398f765:
+- bound max_irqs right away against nr_irqs
+- introduce a #define for a constant used twice
+
+Requested-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 17f6d398f76597f8009ec0530842fb8705ece7ba
+master date: 2024-07-02 12:00:27 +0200
+master commit: 1f56accba33ffea0abf7d1c6384710823d10cbd6
+master date: 2024-07-03 14:03:27 +0200
+---
+ docs/misc/xen-command-line.pandoc | 3 ++-
+ xen/arch/x86/io_apic.c | 17 ++++++++++-------
+ xen/common/domain.c | 24 ++++++++++++++++++++++--
+ xen/include/xen/irq.h | 3 ++-
+ 4 files changed, 36 insertions(+), 11 deletions(-)
+
+diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
+index 10a09bbf23..d857bd0f89 100644
+--- a/docs/misc/xen-command-line.pandoc
++++ b/docs/misc/xen-command-line.pandoc
+@@ -1175,7 +1175,8 @@ common for all domUs, while the optional second number (preceded by a comma)
+ is for dom0. Changing the setting for domU has no impact on dom0 and vice
+ versa. For example to change dom0 without changing domU, use
+ `extra_guest_irqs=,512`. The default value for Dom0 and an eventual separate
+-hardware domain is architecture dependent.
++hardware domain is architecture dependent. The upper limit for both values on
++x86 is such that the resulting total number of IRQs can't be higher than 32768.
+ Note that specifying zero as domU value means zero, while for dom0 it means
+ to use the default.
+
+diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
+index c5342789e8..f7591fd091 100644
+--- a/xen/arch/x86/io_apic.c
++++ b/xen/arch/x86/io_apic.c
+@@ -2664,18 +2664,21 @@ void __init ioapic_init(void)
+ nr_irqs_gsi, nr_irqs - nr_irqs_gsi);
+ }
+
+-unsigned int arch_hwdom_irqs(domid_t domid)
++unsigned int __hwdom_init arch_hwdom_irqs(const struct domain *d)
+ {
+ unsigned int n = fls(num_present_cpus());
++ /* Bounding by the domain pirq EOI bitmap capacity. */
++ const unsigned int max_irqs = min_t(unsigned int, nr_irqs,
++ PAGE_SIZE * BITS_PER_BYTE);
+
+- if ( !domid )
+- n = min(n, dom0_max_vcpus());
+- n = min(nr_irqs_gsi + n * NR_DYNAMIC_VECTORS, nr_irqs);
++ if ( is_system_domain(d) )
++ return max_irqs;
+
+- /* Bounded by the domain pirq eoi bitmap gfn. */
+- n = min_t(unsigned int, n, PAGE_SIZE * BITS_PER_BYTE);
++ if ( !d->domain_id )
++ n = min(n, dom0_max_vcpus());
++ n = min(nr_irqs_gsi + n * NR_DYNAMIC_VECTORS, max_irqs);
+
+- printk("Dom%d has maximum %u PIRQs\n", domid, n);
++ printk("%pd has maximum %u PIRQs\n", d, n);
+
+ return n;
+ }
+diff --git a/xen/common/domain.c b/xen/common/domain.c
+index 003f4ab125..62832a5860 100644
+--- a/xen/common/domain.c
++++ b/xen/common/domain.c
+@@ -351,7 +351,8 @@ static int late_hwdom_init(struct domain *d)
+ }
+
+ static unsigned int __read_mostly extra_hwdom_irqs;
+-static unsigned int __read_mostly extra_domU_irqs = 32;
++#define DEFAULT_EXTRA_DOMU_IRQS 32U
++static unsigned int __read_mostly extra_domU_irqs = DEFAULT_EXTRA_DOMU_IRQS;
+
+ static int __init cf_check parse_extra_guest_irqs(const char *s)
+ {
+@@ -688,7 +689,7 @@ struct domain *domain_create(domid_t domid,
+ d->nr_pirqs = nr_static_irqs + extra_domU_irqs;
+ else
+ d->nr_pirqs = extra_hwdom_irqs ? nr_static_irqs + extra_hwdom_irqs
+- : arch_hwdom_irqs(domid);
++ : arch_hwdom_irqs(d);
+ d->nr_pirqs = min(d->nr_pirqs, nr_irqs);
+
+ radix_tree_init(&d->pirq_tree);
+@@ -812,6 +813,25 @@ void __init setup_system_domains(void)
+ if ( IS_ERR(dom_xen) )
+ panic("Failed to create d[XEN]: %ld\n", PTR_ERR(dom_xen));
+
++#ifdef CONFIG_HAS_PIRQ
++ /* Bound-check values passed via "extra_guest_irqs=". */
++ {
++ unsigned int n = max(arch_hwdom_irqs(dom_xen), nr_static_irqs);
++
++ if ( extra_hwdom_irqs > n - nr_static_irqs )
++ {
++ extra_hwdom_irqs = n - nr_static_irqs;
++ printk(XENLOG_WARNING "hwdom IRQs bounded to %u\n", n);
++ }
++ if ( extra_domU_irqs >
++ max(DEFAULT_EXTRA_DOMU_IRQS, n - nr_static_irqs) )
++ {
++ extra_domU_irqs = n - nr_static_irqs;
++ printk(XENLOG_WARNING "domU IRQs bounded to %u\n", n);
++ }
++ }
++#endif
++
+ /*
+ * Initialise our DOMID_IO domain.
+ * This domain owns I/O pages that are within the range of the page_info
+diff --git a/xen/include/xen/irq.h b/xen/include/xen/irq.h
+index 5dcd2d8f0c..bef170bcb6 100644
+--- a/xen/include/xen/irq.h
++++ b/xen/include/xen/irq.h
+@@ -196,8 +196,9 @@ extern struct irq_desc *pirq_spin_lock_irq_desc(
+
+ unsigned int set_desc_affinity(struct irq_desc *desc, const cpumask_t *mask);
+
++/* When passed a system domain, this returns the maximum permissible value. */
+ #ifndef arch_hwdom_irqs
+-unsigned int arch_hwdom_irqs(domid_t domid);
++unsigned int arch_hwdom_irqs(const struct domain *d);
+ #endif
+
+ #ifndef arch_evtchn_bind_pirq
+--
+2.45.2
+
diff --git a/0050-percpu-rwlock-introduce-support-for-blocking-specula.patch b/0050-percpu-rwlock-introduce-support-for-blocking-specula.patch
deleted file mode 100644
index 1da2128..0000000
--- a/0050-percpu-rwlock-introduce-support-for-blocking-specula.patch
+++ /dev/null
@@ -1,87 +0,0 @@
-From 468a368b2e5a38fc0be8e9e5f475820f7e4a6b4f Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 13 Feb 2024 17:57:38 +0100
-Subject: [PATCH 50/67] percpu-rwlock: introduce support for blocking
- speculation into critical regions
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Add direct calls to block_lock_speculation() where required in order to prevent
-speculation into the lock protected critical regions. Also convert
-_percpu_read_lock() from inline to always_inline.
-
-Note that _percpu_write_lock() has been modified the use the non speculation
-safe of the locking primites, as a speculation is added unconditionally by the
-calling wrapper.
-
-This is part of XSA-453 / CVE-2024-2193
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit f218daf6d3a3b847736d37c6a6b76031a0d08441)
----
- xen/common/rwlock.c | 6 +++++-
- xen/include/xen/rwlock.h | 14 ++++++++++----
- 2 files changed, 15 insertions(+), 5 deletions(-)
-
-diff --git a/xen/common/rwlock.c b/xen/common/rwlock.c
-index cda06b9d6e..4da0ed8fad 100644
---- a/xen/common/rwlock.c
-+++ b/xen/common/rwlock.c
-@@ -125,8 +125,12 @@ void _percpu_write_lock(percpu_rwlock_t **per_cpudata,
- /*
- * First take the write lock to protect against other writers or slow
- * path readers.
-+ *
-+ * Note we use the speculation unsafe variant of write_lock(), as the
-+ * calling wrapper already adds a speculation barrier after the lock has
-+ * been taken.
- */
-- write_lock(&percpu_rwlock->rwlock);
-+ _write_lock(&percpu_rwlock->rwlock);
-
- /* Now set the global variable so that readers start using read_lock. */
- percpu_rwlock->writer_activating = 1;
-diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h
-index fd0458be94..abe0804bf7 100644
---- a/xen/include/xen/rwlock.h
-+++ b/xen/include/xen/rwlock.h
-@@ -326,8 +326,8 @@ static inline void _percpu_rwlock_owner_check(percpu_rwlock_t **per_cpudata,
- #define percpu_rwlock_resource_init(l, owner) \
- (*(l) = (percpu_rwlock_t)PERCPU_RW_LOCK_UNLOCKED(&get_per_cpu_var(owner)))
-
--static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
-- percpu_rwlock_t *percpu_rwlock)
-+static always_inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
-+ percpu_rwlock_t *percpu_rwlock)
- {
- /* Validate the correct per_cpudata variable has been provided. */
- _percpu_rwlock_owner_check(per_cpudata, percpu_rwlock);
-@@ -362,6 +362,8 @@ static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
- }
- else
- {
-+ /* Other branch already has a speculation barrier in read_lock(). */
-+ block_lock_speculation();
- /* All other paths have implicit check_lock() calls via read_lock(). */
- check_lock(&percpu_rwlock->rwlock.lock.debug, false);
- }
-@@ -410,8 +412,12 @@ static inline void _percpu_write_unlock(percpu_rwlock_t **per_cpudata,
- _percpu_read_lock(&get_per_cpu_var(percpu), lock)
- #define percpu_read_unlock(percpu, lock) \
- _percpu_read_unlock(&get_per_cpu_var(percpu), lock)
--#define percpu_write_lock(percpu, lock) \
-- _percpu_write_lock(&get_per_cpu_var(percpu), lock)
-+
-+#define percpu_write_lock(percpu, lock) \
-+({ \
-+ _percpu_write_lock(&get_per_cpu_var(percpu), lock); \
-+ block_lock_speculation(); \
-+})
- #define percpu_write_unlock(percpu, lock) \
- _percpu_write_unlock(&get_per_cpu_var(percpu), lock)
-
---
-2.44.0
-
diff --git a/0051-locking-attempt-to-ensure-lock-wrappers-are-always-i.patch b/0051-locking-attempt-to-ensure-lock-wrappers-are-always-i.patch
deleted file mode 100644
index 822836d..0000000
--- a/0051-locking-attempt-to-ensure-lock-wrappers-are-always-i.patch
+++ /dev/null
@@ -1,405 +0,0 @@
-From 2cc5e57be680a516aa5cdef4281856d09b9d0ea6 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Mon, 4 Mar 2024 14:29:36 +0100
-Subject: [PATCH 51/67] locking: attempt to ensure lock wrappers are always
- inline
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-In order to prevent the locking speculation barriers from being inside of
-`call`ed functions that could be speculatively bypassed.
-
-While there also add an extra locking barrier to _mm_write_lock() in the branch
-taken when the lock is already held.
-
-Note some functions are switched to use the unsafe variants (without speculation
-barrier) of the locking primitives, but a speculation barrier is always added
-to the exposed public lock wrapping helper. That's the case with
-sched_spin_lock_double() or pcidevs_lock() for example.
-
-This is part of XSA-453 / CVE-2024-2193
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 197ecd838a2aaf959a469df3696d4559c4f8b762)
----
- xen/arch/x86/hvm/vpt.c | 10 +++++++---
- xen/arch/x86/include/asm/irq.h | 1 +
- xen/arch/x86/mm/mm-locks.h | 28 +++++++++++++++-------------
- xen/arch/x86/mm/p2m-pod.c | 2 +-
- xen/common/event_channel.c | 5 +++--
- xen/common/grant_table.c | 6 +++---
- xen/common/sched/core.c | 19 ++++++++++++-------
- xen/common/sched/private.h | 26 ++++++++++++++++++++++++--
- xen/common/timer.c | 8 +++++---
- xen/drivers/passthrough/pci.c | 5 +++--
- xen/include/xen/event.h | 4 ++--
- xen/include/xen/pci.h | 8 ++++++--
- 12 files changed, 82 insertions(+), 40 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c
-index cb1d81bf9e..66f1095245 100644
---- a/xen/arch/x86/hvm/vpt.c
-+++ b/xen/arch/x86/hvm/vpt.c
-@@ -161,7 +161,7 @@ static int pt_irq_masked(struct periodic_time *pt)
- * pt->vcpu field, because another thread holding the pt_migrate lock
- * may already be spinning waiting for your vcpu lock.
- */
--static void pt_vcpu_lock(struct vcpu *v)
-+static always_inline void pt_vcpu_lock(struct vcpu *v)
- {
- spin_lock(&v->arch.hvm.tm_lock);
- }
-@@ -180,9 +180,13 @@ static void pt_vcpu_unlock(struct vcpu *v)
- * need to take an additional lock that protects against pt->vcpu
- * changing.
- */
--static void pt_lock(struct periodic_time *pt)
-+static always_inline void pt_lock(struct periodic_time *pt)
- {
-- read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
-+ /*
-+ * Use the speculation unsafe variant for the first lock, as the following
-+ * lock taking helper already includes a speculation barrier.
-+ */
-+ _read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
- spin_lock(&pt->vcpu->arch.hvm.tm_lock);
- }
-
-diff --git a/xen/arch/x86/include/asm/irq.h b/xen/arch/x86/include/asm/irq.h
-index f6a0207a80..823d627fd0 100644
---- a/xen/arch/x86/include/asm/irq.h
-+++ b/xen/arch/x86/include/asm/irq.h
-@@ -178,6 +178,7 @@ void cf_check irq_complete_move(struct irq_desc *);
-
- extern struct irq_desc *irq_desc;
-
-+/* Not speculation safe, only used for AP bringup. */
- void lock_vector_lock(void);
- void unlock_vector_lock(void);
-
-diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
-index c1523aeccf..265239c49f 100644
---- a/xen/arch/x86/mm/mm-locks.h
-+++ b/xen/arch/x86/mm/mm-locks.h
-@@ -86,8 +86,8 @@ static inline void _set_lock_level(int l)
- this_cpu(mm_lock_level) = l;
- }
-
--static inline void _mm_lock(const struct domain *d, mm_lock_t *l,
-- const char *func, int level, int rec)
-+static always_inline void _mm_lock(const struct domain *d, mm_lock_t *l,
-+ const char *func, int level, int rec)
- {
- if ( !((mm_locked_by_me(l)) && rec) )
- _check_lock_level(d, level);
-@@ -137,8 +137,8 @@ static inline int mm_write_locked_by_me(mm_rwlock_t *l)
- return (l->locker == get_processor_id());
- }
-
--static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
-- const char *func, int level)
-+static always_inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
-+ const char *func, int level)
- {
- if ( !mm_write_locked_by_me(l) )
- {
-@@ -149,6 +149,8 @@ static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
- l->unlock_level = _get_lock_level();
- _set_lock_level(_lock_level(d, level));
- }
-+ else
-+ block_speculation();
- l->recurse_count++;
- }
-
-@@ -162,8 +164,8 @@ static inline void mm_write_unlock(mm_rwlock_t *l)
- percpu_write_unlock(p2m_percpu_rwlock, &l->lock);
- }
-
--static inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l,
-- int level)
-+static always_inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l,
-+ int level)
- {
- _check_lock_level(d, level);
- percpu_read_lock(p2m_percpu_rwlock, &l->lock);
-@@ -178,15 +180,15 @@ static inline void mm_read_unlock(mm_rwlock_t *l)
-
- /* This wrapper uses the line number to express the locking order below */
- #define declare_mm_lock(name) \
-- static inline void mm_lock_##name(const struct domain *d, mm_lock_t *l, \
-- const char *func, int rec) \
-+ static always_inline void mm_lock_##name( \
-+ const struct domain *d, mm_lock_t *l, const char *func, int rec) \
- { _mm_lock(d, l, func, MM_LOCK_ORDER_##name, rec); }
- #define declare_mm_rwlock(name) \
-- static inline void mm_write_lock_##name(const struct domain *d, \
-- mm_rwlock_t *l, const char *func) \
-+ static always_inline void mm_write_lock_##name( \
-+ const struct domain *d, mm_rwlock_t *l, const char *func) \
- { _mm_write_lock(d, l, func, MM_LOCK_ORDER_##name); } \
-- static inline void mm_read_lock_##name(const struct domain *d, \
-- mm_rwlock_t *l) \
-+ static always_inline void mm_read_lock_##name(const struct domain *d, \
-+ mm_rwlock_t *l) \
- { _mm_read_lock(d, l, MM_LOCK_ORDER_##name); }
- /* These capture the name of the calling function */
- #define mm_lock(name, d, l) mm_lock_##name(d, l, __func__, 0)
-@@ -321,7 +323,7 @@ declare_mm_lock(altp2mlist)
- #define MM_LOCK_ORDER_altp2m 40
- declare_mm_rwlock(altp2m);
-
--static inline void p2m_lock(struct p2m_domain *p)
-+static always_inline void p2m_lock(struct p2m_domain *p)
- {
- if ( p2m_is_altp2m(p) )
- mm_write_lock(altp2m, p->domain, &p->lock);
-diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
-index fc110506dc..99dbcb3101 100644
---- a/xen/arch/x86/mm/p2m-pod.c
-+++ b/xen/arch/x86/mm/p2m-pod.c
-@@ -36,7 +36,7 @@
- #define superpage_aligned(_x) (((_x)&(SUPERPAGE_PAGES-1))==0)
-
- /* Enforce lock ordering when grabbing the "external" page_alloc lock */
--static inline void lock_page_alloc(struct p2m_domain *p2m)
-+static always_inline void lock_page_alloc(struct p2m_domain *p2m)
- {
- page_alloc_mm_pre_lock(p2m->domain);
- spin_lock(&(p2m->domain->page_alloc_lock));
-diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
-index f5e0b12d15..dada9f15f5 100644
---- a/xen/common/event_channel.c
-+++ b/xen/common/event_channel.c
-@@ -62,7 +62,7 @@
- * just assume the event channel is free or unbound at the moment when the
- * evtchn_read_trylock() returns false.
- */
--static inline void evtchn_write_lock(struct evtchn *evtchn)
-+static always_inline void evtchn_write_lock(struct evtchn *evtchn)
- {
- write_lock(&evtchn->lock);
-
-@@ -364,7 +364,8 @@ int evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc, evtchn_port_t port)
- return rc;
- }
-
--static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn)
-+static always_inline void double_evtchn_lock(struct evtchn *lchn,
-+ struct evtchn *rchn)
- {
- ASSERT(lchn != rchn);
-
-diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
-index ee7cc496b8..62a8685cd5 100644
---- a/xen/common/grant_table.c
-+++ b/xen/common/grant_table.c
-@@ -410,7 +410,7 @@ static inline void act_set_gfn(struct active_grant_entry *act, gfn_t gfn)
-
- static DEFINE_PERCPU_RWLOCK_GLOBAL(grant_rwlock);
-
--static inline void grant_read_lock(struct grant_table *gt)
-+static always_inline void grant_read_lock(struct grant_table *gt)
- {
- percpu_read_lock(grant_rwlock, &gt->lock);
- }
-@@ -420,7 +420,7 @@ static inline void grant_read_unlock(struct grant_table *gt)
- percpu_read_unlock(grant_rwlock, &gt->lock);
- }
-
--static inline void grant_write_lock(struct grant_table *gt)
-+static always_inline void grant_write_lock(struct grant_table *gt)
- {
- percpu_write_lock(grant_rwlock, &gt->lock);
- }
-@@ -457,7 +457,7 @@ nr_active_grant_frames(struct grant_table *gt)
- return num_act_frames_from_sha_frames(nr_grant_frames(gt));
- }
-
--static inline struct active_grant_entry *
-+static always_inline struct active_grant_entry *
- active_entry_acquire(struct grant_table *t, grant_ref_t e)
- {
- struct active_grant_entry *act;
-diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
-index 078beb1adb..29bbab5ac6 100644
---- a/xen/common/sched/core.c
-+++ b/xen/common/sched/core.c
-@@ -348,23 +348,28 @@ uint64_t get_cpu_idle_time(unsigned int cpu)
- * This avoids dead- or live-locks when this code is running on both
- * cpus at the same time.
- */
--static void sched_spin_lock_double(spinlock_t *lock1, spinlock_t *lock2,
-- unsigned long *flags)
-+static always_inline void sched_spin_lock_double(
-+ spinlock_t *lock1, spinlock_t *lock2, unsigned long *flags)
- {
-+ /*
-+ * In order to avoid extra overhead, use the locking primitives without the
-+ * speculation barrier, and introduce a single barrier here.
-+ */
- if ( lock1 == lock2 )
- {
-- spin_lock_irqsave(lock1, *flags);
-+ *flags = _spin_lock_irqsave(lock1);
- }
- else if ( lock1 < lock2 )
- {
-- spin_lock_irqsave(lock1, *flags);
-- spin_lock(lock2);
-+ *flags = _spin_lock_irqsave(lock1);
-+ _spin_lock(lock2);
- }
- else
- {
-- spin_lock_irqsave(lock2, *flags);
-- spin_lock(lock1);
-+ *flags = _spin_lock_irqsave(lock2);
-+ _spin_lock(lock1);
- }
-+ block_lock_speculation();
- }
-
- static void sched_spin_unlock_double(spinlock_t *lock1, spinlock_t *lock2,
-diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
-index 0527a8c70d..24a93dd0c1 100644
---- a/xen/common/sched/private.h
-+++ b/xen/common/sched/private.h
-@@ -207,8 +207,24 @@ DECLARE_PER_CPU(cpumask_t, cpumask_scratch);
- #define cpumask_scratch (&this_cpu(cpumask_scratch))
- #define cpumask_scratch_cpu(c) (&per_cpu(cpumask_scratch, c))
-
-+/*
-+ * Deal with _spin_lock_irqsave() returning the flags value instead of storing
-+ * it in a passed parameter.
-+ */
-+#define _sched_spinlock0(lock, irq) _spin_lock##irq(lock)
-+#define _sched_spinlock1(lock, irq, arg) ({ \
-+ BUILD_BUG_ON(sizeof(arg) != sizeof(unsigned long)); \
-+ (arg) = _spin_lock##irq(lock); \
-+})
-+
-+#define _sched_spinlock__(nr) _sched_spinlock ## nr
-+#define _sched_spinlock_(nr) _sched_spinlock__(nr)
-+#define _sched_spinlock(lock, irq, args...) \
-+ _sched_spinlock_(count_args(args))(lock, irq, ## args)
-+
- #define sched_lock(kind, param, cpu, irq, arg...) \
--static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
-+static always_inline spinlock_t \
-+*kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
- { \
- for ( ; ; ) \
- { \
-@@ -220,10 +236,16 @@ static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
- * \
- * It may also be the case that v->processor may change but the \
- * lock may be the same; this will succeed in that case. \
-+ * \
-+ * Use the speculation unsafe locking helper, there's a speculation \
-+ * barrier before returning to the caller. \
- */ \
-- spin_lock##irq(lock, ## arg); \
-+ _sched_spinlock(lock, irq, ## arg); \
- if ( likely(lock == get_sched_res(cpu)->schedule_lock) ) \
-+ { \
-+ block_lock_speculation(); \
- return lock; \
-+ } \
- spin_unlock##irq(lock, ## arg); \
- } \
- }
-diff --git a/xen/common/timer.c b/xen/common/timer.c
-index 9b5016d5ed..459668d417 100644
---- a/xen/common/timer.c
-+++ b/xen/common/timer.c
-@@ -240,7 +240,7 @@ static inline void deactivate_timer(struct timer *timer)
- list_add(&timer->inactive, &per_cpu(timers, timer->cpu).inactive);
- }
-
--static inline bool_t timer_lock(struct timer *timer)
-+static inline bool_t timer_lock_unsafe(struct timer *timer)
- {
- unsigned int cpu;
-
-@@ -254,7 +254,8 @@ static inline bool_t timer_lock(struct timer *timer)
- rcu_read_unlock(&timer_cpu_read_lock);
- return 0;
- }
-- spin_lock(&per_cpu(timers, cpu).lock);
-+ /* Use the speculation unsafe variant, the wrapper has the barrier. */
-+ _spin_lock(&per_cpu(timers, cpu).lock);
- if ( likely(timer->cpu == cpu) )
- break;
- spin_unlock(&per_cpu(timers, cpu).lock);
-@@ -267,8 +268,9 @@ static inline bool_t timer_lock(struct timer *timer)
- #define timer_lock_irqsave(t, flags) ({ \
- bool_t __x; \
- local_irq_save(flags); \
-- if ( !(__x = timer_lock(t)) ) \
-+ if ( !(__x = timer_lock_unsafe(t)) ) \
- local_irq_restore(flags); \
-+ block_lock_speculation(); \
- __x; \
- })
-
-diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
-index 8c62b14d19..1b3d285166 100644
---- a/xen/drivers/passthrough/pci.c
-+++ b/xen/drivers/passthrough/pci.c
-@@ -52,9 +52,10 @@ struct pci_seg {
-
- static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED;
-
--void pcidevs_lock(void)
-+/* Do not use, as it has no speculation barrier, use pcidevs_lock() instead. */
-+void pcidevs_lock_unsafe(void)
- {
-- spin_lock_recursive(&_pcidevs_lock);
-+ _spin_lock_recursive(&_pcidevs_lock);
- }
-
- void pcidevs_unlock(void)
-diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
-index 8eae9984a9..dd96e84c69 100644
---- a/xen/include/xen/event.h
-+++ b/xen/include/xen/event.h
-@@ -114,12 +114,12 @@ void notify_via_xen_event_channel(struct domain *ld, int lport);
- #define bucket_from_port(d, p) \
- ((group_from_port(d, p))[((p) % EVTCHNS_PER_GROUP) / EVTCHNS_PER_BUCKET])
-
--static inline void evtchn_read_lock(struct evtchn *evtchn)
-+static always_inline void evtchn_read_lock(struct evtchn *evtchn)
- {
- read_lock(&evtchn->lock);
- }
-
--static inline bool evtchn_read_trylock(struct evtchn *evtchn)
-+static always_inline bool evtchn_read_trylock(struct evtchn *evtchn)
- {
- return read_trylock(&evtchn->lock);
- }
-diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
-index 5975ca2f30..b373f139d1 100644
---- a/xen/include/xen/pci.h
-+++ b/xen/include/xen/pci.h
-@@ -155,8 +155,12 @@ struct pci_dev {
- * devices, it also sync the access to the msi capability that is not
- * interrupt handling related (the mask bit register).
- */
--
--void pcidevs_lock(void);
-+void pcidevs_lock_unsafe(void);
-+static always_inline void pcidevs_lock(void)
-+{
-+ pcidevs_lock_unsafe();
-+ block_lock_speculation();
-+}
- void pcidevs_unlock(void);
- bool_t __must_check pcidevs_locked(void);
-
---
-2.44.0
-
diff --git a/0051-x86-entry-don-t-clear-DF-when-raising-UD-for-lack-of.patch b/0051-x86-entry-don-t-clear-DF-when-raising-UD-for-lack-of.patch
new file mode 100644
index 0000000..acefc8e
--- /dev/null
+++ b/0051-x86-entry-don-t-clear-DF-when-raising-UD-for-lack-of.patch
@@ -0,0 +1,58 @@
+From 7e636b8a16412d4f0d94b2b24d7ebcd2c749afff Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Thu, 4 Jul 2024 14:14:49 +0200
+Subject: [PATCH 51/56] x86/entry: don't clear DF when raising #UD for lack of
+ syscall handler
+
+While doing so is intentional when invoking the actual callback, to
+mimic a hard-coded SYCALL_MASK / FMASK MSR, the same should not be done
+when no handler is available and hence #UD is raised.
+
+Fixes: ca6fcf4321b3 ("x86/pv: Inject #UD for missing SYSCALL callbacks")
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: d2fe9ab3048d503869ec81bc49db07e55a4a2386
+master date: 2024-07-02 12:01:21 +0200
+---
+ xen/arch/x86/x86_64/entry.S | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index 054fcb225f..d3def49ea3 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -38,6 +38,14 @@ switch_to_kernel:
+ setc %cl
+ leal (,%rcx,TBF_INTERRUPT),%ecx
+
++ /*
++ * The PV ABI hardcodes the (guest-inaccessible and virtual)
++ * SYSCALL_MASK MSR such that DF (and nothing else) would be cleared.
++ * Note that the equivalent of IF (VGCF_syscall_disables_events) is
++ * dealt with separately above.
++ */
++ mov $~X86_EFLAGS_DF, %esi
++
+ test %rax, %rax
+ UNLIKELY_START(z, syscall_no_callback) /* TB_eip == 0 => #UD */
+ mov VCPU_trap_ctxt(%rbx), %rdi
+@@ -47,12 +55,14 @@ UNLIKELY_START(z, syscall_no_callback) /* TB_eip == 0 => #UD */
+ testb $4, X86_EXC_UD * TRAPINFO_sizeof + TRAPINFO_flags(%rdi)
+ setnz %cl
+ lea TBF_EXCEPTION(, %rcx, TBF_INTERRUPT), %ecx
++ or $~0, %esi /* Don't clear DF */
+ UNLIKELY_END(syscall_no_callback)
+
+ movq %rax,TRAPBOUNCE_eip(%rdx)
+ movb %cl,TRAPBOUNCE_flags(%rdx)
+ call create_bounce_frame
+- andl $~X86_EFLAGS_DF,UREGS_eflags(%rsp)
++ /* Conditionally clear DF */
++ and %esi, UREGS_eflags(%rsp)
+ /* %rbx: struct vcpu */
+ test_all_events:
+ ASSERT_NOT_IN_ATOMIC
+--
+2.45.2
+
diff --git a/0052-evtchn-build-fix-for-Arm.patch b/0052-evtchn-build-fix-for-Arm.patch
new file mode 100644
index 0000000..6cbeb10
--- /dev/null
+++ b/0052-evtchn-build-fix-for-Arm.patch
@@ -0,0 +1,43 @@
+From 45c5333935628e7c80de0bd5a9d9eff50b305b16 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Thu, 4 Jul 2024 16:57:29 +0200
+Subject: [PATCH 52/56] evtchn: build fix for Arm
+
+When backporting daa90dfea917 ("pirq_cleanup_check() leaks") I neglected
+to pay attention to it depending on 13a7b0f9f747 ("restrict concept of
+pIRQ to x86"). That one doesn't want backporting imo, so use / adjust
+custom #ifdef-ary to address the immediate issue of pirq_cleanup_check()
+not being available on Arm.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+---
+ xen/common/event_channel.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
+index b1a6215c37..e6ec556603 100644
+--- a/xen/common/event_channel.c
++++ b/xen/common/event_channel.c
+@@ -643,7 +643,9 @@ static int evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
+ if ( rc != 0 )
+ {
+ info->evtchn = 0;
++#ifdef CONFIG_X86
+ pirq_cleanup_check(info, d);
++#endif
+ goto out;
+ }
+
+@@ -713,8 +715,8 @@ int evtchn_close(struct domain *d1, int port1, bool guest)
+ * The successful path of unmap_domain_pirq_emuirq() will have
+ * called pirq_cleanup_check() already.
+ */
+-#endif
+ pirq_cleanup_check(pirq, d1);
++#endif
+ }
+ unlink_pirq_port(chn1, d1->vcpu[chn1->notify_vcpu_id]);
+ break;
+--
+2.45.2
+
diff --git a/0052-x86-mm-add-speculation-barriers-to-open-coded-locks.patch b/0052-x86-mm-add-speculation-barriers-to-open-coded-locks.patch
deleted file mode 100644
index 9e20f78..0000000
--- a/0052-x86-mm-add-speculation-barriers-to-open-coded-locks.patch
+++ /dev/null
@@ -1,73 +0,0 @@
-From 074b4c8987db235a0b86798810c045f68e4775b6 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Mon, 4 Mar 2024 18:08:48 +0100
-Subject: [PATCH 52/67] x86/mm: add speculation barriers to open coded locks
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Add a speculation barrier to the clearly identified open-coded lock taking
-functions.
-
-Note that the memory sharing page_lock() replacement (_page_lock()) is left
-as-is, as the code is experimental and not security supported.
-
-This is part of XSA-453 / CVE-2024-2193
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 42a572a38e22a97d86a4b648a22597628d5b42e4)
----
- xen/arch/x86/include/asm/mm.h | 4 +++-
- xen/arch/x86/mm.c | 6 ++++--
- 2 files changed, 7 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
-index a5d7fdd32e..5845b729c3 100644
---- a/xen/arch/x86/include/asm/mm.h
-+++ b/xen/arch/x86/include/asm/mm.h
-@@ -393,7 +393,9 @@ const struct platform_bad_page *get_platform_badpages(unsigned int *array_size);
- * The use of PGT_locked in mem_sharing does not collide, since mem_sharing is
- * only supported for hvm guests, which do not have PV PTEs updated.
- */
--int page_lock(struct page_info *page);
-+int page_lock_unsafe(struct page_info *page);
-+#define page_lock(pg) lock_evaluate_nospec(page_lock_unsafe(pg))
-+
- void page_unlock(struct page_info *page);
-
- void put_page_type(struct page_info *page);
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index 330c4abcd1..8d19d719bd 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -2033,7 +2033,7 @@ static inline bool current_locked_page_ne_check(struct page_info *page) {
- #define current_locked_page_ne_check(x) true
- #endif
-
--int page_lock(struct page_info *page)
-+int page_lock_unsafe(struct page_info *page)
- {
- unsigned long x, nx;
-
-@@ -2094,7 +2094,7 @@ void page_unlock(struct page_info *page)
- * l3t_lock(), so to avoid deadlock we must avoid grabbing them in
- * reverse order.
- */
--static void l3t_lock(struct page_info *page)
-+static always_inline void l3t_lock(struct page_info *page)
- {
- unsigned long x, nx;
-
-@@ -2103,6 +2103,8 @@ static void l3t_lock(struct page_info *page)
- cpu_relax();
- nx = x | PGT_locked;
- } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x );
-+
-+ block_lock_speculation();
- }
-
- static void l3t_unlock(struct page_info *page)
---
-2.44.0
-
diff --git a/0053-x86-IRQ-avoid-double-unlock-in-map_domain_pirq.patch b/0053-x86-IRQ-avoid-double-unlock-in-map_domain_pirq.patch
new file mode 100644
index 0000000..686e142
--- /dev/null
+++ b/0053-x86-IRQ-avoid-double-unlock-in-map_domain_pirq.patch
@@ -0,0 +1,53 @@
+From d46a1ce3175dc45e97a8c9b89b0d0ff46145ae64 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 16 Jul 2024 14:14:43 +0200
+Subject: [PATCH 53/56] x86/IRQ: avoid double unlock in map_domain_pirq()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Forever since its introduction the main loop in the function dealing
+with multi-vector MSI had error exit points ("break") with different
+properties: In one case no IRQ descriptor lock is being held.
+Nevertheless the subsequent error cleanup path assumed such a lock would
+uniformly need releasing. Identify the case by setting "desc" to NULL,
+thus allowing the unlock to be skipped as necessary.
+
+This is CVE-2024-31143 / XSA-458.
+
+Coverity ID: 1605298
+Fixes: d1b6d0a02489 ("x86: enable multi-vector MSI")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 57338346f29cea7b183403561bdc5f407163b846
+master date: 2024-07-16 14:09:14 +0200
+---
+ xen/arch/x86/irq.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
+index 00be3b88e8..5dae8bd1b9 100644
+--- a/xen/arch/x86/irq.c
++++ b/xen/arch/x86/irq.c
+@@ -2287,6 +2287,7 @@ int map_domain_pirq(
+
+ set_domain_irq_pirq(d, irq, info);
+ spin_unlock_irqrestore(&desc->lock, flags);
++ desc = NULL;
+
+ info = NULL;
+ irq = create_irq(NUMA_NO_NODE, true);
+@@ -2322,7 +2323,9 @@ int map_domain_pirq(
+
+ if ( ret )
+ {
+- spin_unlock_irqrestore(&desc->lock, flags);
++ if ( desc )
++ spin_unlock_irqrestore(&desc->lock, flags);
++
+ pci_disable_msi(msi_desc);
+ if ( nr )
+ {
+--
+2.45.2
+
diff --git a/0053-x86-protect-conditional-lock-taking-from-speculative.patch b/0053-x86-protect-conditional-lock-taking-from-speculative.patch
deleted file mode 100644
index f0caa24..0000000
--- a/0053-x86-protect-conditional-lock-taking-from-speculative.patch
+++ /dev/null
@@ -1,216 +0,0 @@
-From 0ebd2e49bcd0f566ba6b9158555942aab8e41332 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Mon, 4 Mar 2024 16:24:21 +0100
-Subject: [PATCH 53/67] x86: protect conditional lock taking from speculative
- execution
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Conditionally taken locks that use the pattern:
-
-if ( lock )
- spin_lock(...);
-
-Need an else branch in order to issue an speculation barrier in the else case,
-just like it's done in case the lock needs to be acquired.
-
-eval_nospec() could be used on the condition itself, but that would result in a
-double barrier on the branch where the lock is taken.
-
-Introduce a new pair of helpers, {gfn,spin}_lock_if() that can be used to
-conditionally take a lock in a speculation safe way.
-
-This is part of XSA-453 / CVE-2024-2193
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 03cf7ca23e0e876075954c558485b267b7d02406)
----
- xen/arch/x86/mm.c | 35 +++++++++++++----------------------
- xen/arch/x86/mm/mm-locks.h | 9 +++++++++
- xen/arch/x86/mm/p2m.c | 5 ++---
- xen/include/xen/spinlock.h | 8 ++++++++
- 4 files changed, 32 insertions(+), 25 deletions(-)
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index 8d19d719bd..d31b8d56ff 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -5023,8 +5023,7 @@ static l3_pgentry_t *virt_to_xen_l3e(unsigned long v)
- if ( !l3t )
- return NULL;
- UNMAP_DOMAIN_PAGE(l3t);
-- if ( locking )
-- spin_lock(&map_pgdir_lock);
-+ spin_lock_if(locking, &map_pgdir_lock);
- if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
- {
- l4_pgentry_t l4e = l4e_from_mfn(l3mfn, __PAGE_HYPERVISOR);
-@@ -5061,8 +5060,7 @@ static l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
- return NULL;
- }
- UNMAP_DOMAIN_PAGE(l2t);
-- if ( locking )
-- spin_lock(&map_pgdir_lock);
-+ spin_lock_if(locking, &map_pgdir_lock);
- if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
- {
- l3e_write(pl3e, l3e_from_mfn(l2mfn, __PAGE_HYPERVISOR));
-@@ -5100,8 +5098,7 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
- return NULL;
- }
- UNMAP_DOMAIN_PAGE(l1t);
-- if ( locking )
-- spin_lock(&map_pgdir_lock);
-+ spin_lock_if(locking, &map_pgdir_lock);
- if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
- {
- l2e_write(pl2e, l2e_from_mfn(l1mfn, __PAGE_HYPERVISOR));
-@@ -5132,6 +5129,8 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
- do { \
- if ( locking ) \
- l3t_lock(page); \
-+ else \
-+ block_lock_speculation(); \
- } while ( false )
-
- #define L3T_UNLOCK(page) \
-@@ -5347,8 +5346,7 @@ int map_pages_to_xen(
- if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL )
- flush_flags |= FLUSH_TLB_GLOBAL;
-
-- if ( locking )
-- spin_lock(&map_pgdir_lock);
-+ spin_lock_if(locking, &map_pgdir_lock);
- if ( (l3e_get_flags(*pl3e) & _PAGE_PRESENT) &&
- (l3e_get_flags(*pl3e) & _PAGE_PSE) )
- {
-@@ -5452,8 +5450,7 @@ int map_pages_to_xen(
- if ( l2e_get_flags(*pl2e) & _PAGE_GLOBAL )
- flush_flags |= FLUSH_TLB_GLOBAL;
-
-- if ( locking )
-- spin_lock(&map_pgdir_lock);
-+ spin_lock_if(locking, &map_pgdir_lock);
- if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) &&
- (l2e_get_flags(*pl2e) & _PAGE_PSE) )
- {
-@@ -5494,8 +5491,7 @@ int map_pages_to_xen(
- unsigned long base_mfn;
- const l1_pgentry_t *l1t;
-
-- if ( locking )
-- spin_lock(&map_pgdir_lock);
-+ spin_lock_if(locking, &map_pgdir_lock);
-
- ol2e = *pl2e;
- /*
-@@ -5549,8 +5545,7 @@ int map_pages_to_xen(
- unsigned long base_mfn;
- const l2_pgentry_t *l2t;
-
-- if ( locking )
-- spin_lock(&map_pgdir_lock);
-+ spin_lock_if(locking, &map_pgdir_lock);
-
- ol3e = *pl3e;
- /*
-@@ -5694,8 +5689,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
- l3e_get_flags(*pl3e)));
- UNMAP_DOMAIN_PAGE(l2t);
-
-- if ( locking )
-- spin_lock(&map_pgdir_lock);
-+ spin_lock_if(locking, &map_pgdir_lock);
- if ( (l3e_get_flags(*pl3e) & _PAGE_PRESENT) &&
- (l3e_get_flags(*pl3e) & _PAGE_PSE) )
- {
-@@ -5754,8 +5748,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
- l2e_get_flags(*pl2e) & ~_PAGE_PSE));
- UNMAP_DOMAIN_PAGE(l1t);
-
-- if ( locking )
-- spin_lock(&map_pgdir_lock);
-+ spin_lock_if(locking, &map_pgdir_lock);
- if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) &&
- (l2e_get_flags(*pl2e) & _PAGE_PSE) )
- {
-@@ -5799,8 +5792,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
- */
- if ( (nf & _PAGE_PRESENT) || ((v != e) && (l1_table_offset(v) != 0)) )
- continue;
-- if ( locking )
-- spin_lock(&map_pgdir_lock);
-+ spin_lock_if(locking, &map_pgdir_lock);
-
- /*
- * L2E may be already cleared, or set to a superpage, by
-@@ -5847,8 +5839,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
- if ( (nf & _PAGE_PRESENT) ||
- ((v != e) && (l2_table_offset(v) + l1_table_offset(v) != 0)) )
- continue;
-- if ( locking )
-- spin_lock(&map_pgdir_lock);
-+ spin_lock_if(locking, &map_pgdir_lock);
-
- /*
- * L3E may be already cleared, or set to a superpage, by
-diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
-index 265239c49f..3ea2d8eb03 100644
---- a/xen/arch/x86/mm/mm-locks.h
-+++ b/xen/arch/x86/mm/mm-locks.h
-@@ -347,6 +347,15 @@ static inline void p2m_unlock(struct p2m_domain *p)
- #define p2m_locked_by_me(p) mm_write_locked_by_me(&(p)->lock)
- #define gfn_locked_by_me(p,g) p2m_locked_by_me(p)
-
-+static always_inline void gfn_lock_if(bool condition, struct p2m_domain *p2m,
-+ gfn_t gfn, unsigned int order)
-+{
-+ if ( condition )
-+ gfn_lock(p2m, gfn, order);
-+ else
-+ block_lock_speculation();
-+}
-+
- /* PoD lock (per-p2m-table)
- *
- * Protects private PoD data structs: entry and cache
-diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
-index b28c899b5e..1fa9e01012 100644
---- a/xen/arch/x86/mm/p2m.c
-+++ b/xen/arch/x86/mm/p2m.c
-@@ -292,9 +292,8 @@ mfn_t p2m_get_gfn_type_access(struct p2m_domain *p2m, gfn_t gfn,
- if ( q & P2M_UNSHARE )
- q |= P2M_ALLOC;
-
-- if ( locked )
-- /* Grab the lock here, don't release until put_gfn */
-- gfn_lock(p2m, gfn, 0);
-+ /* Grab the lock here, don't release until put_gfn */
-+ gfn_lock_if(locked, p2m, gfn, 0);
-
- mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
-
-diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
-index daf48fdea7..7e75d0e2e7 100644
---- a/xen/include/xen/spinlock.h
-+++ b/xen/include/xen/spinlock.h
-@@ -216,6 +216,14 @@ static always_inline void spin_lock_irq(spinlock_t *l)
- block_lock_speculation(); \
- })
-
-+/* Conditionally take a spinlock in a speculation safe way. */
-+static always_inline void spin_lock_if(bool condition, spinlock_t *l)
-+{
-+ if ( condition )
-+ _spin_lock(l);
-+ block_lock_speculation();
-+}
-+
- #define spin_unlock(l) _spin_unlock(l)
- #define spin_unlock_irq(l) _spin_unlock_irq(l)
- #define spin_unlock_irqrestore(l, f) _spin_unlock_irqrestore(l, f)
---
-2.44.0
-
diff --git a/0054-tools-ipxe-update-for-fixing-build-with-GCC12.patch b/0054-tools-ipxe-update-for-fixing-build-with-GCC12.patch
deleted file mode 100644
index 90efaf8..0000000
--- a/0054-tools-ipxe-update-for-fixing-build-with-GCC12.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From a01c0b0f9691a8350e74938329892f949669119e Mon Sep 17 00:00:00 2001
-From: Olaf Hering <olaf@aepfle.de>
-Date: Wed, 27 Mar 2024 12:27:03 +0100
-Subject: [PATCH 54/67] tools: ipxe: update for fixing build with GCC12
-
-Use a snapshot which includes commit
-b0ded89e917b48b73097d3b8b88dfa3afb264ed0 ("[build] Disable dangling
-pointer checking for GCC"), which fixes build with gcc12.
-
-Signed-off-by: Olaf Hering <olaf@aepfle.de>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 18a36b4a9b088875486cfe33a2d4a8ae7eb4ab47
-master date: 2023-04-25 23:47:45 +0100
----
- tools/firmware/etherboot/Makefile | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/firmware/etherboot/Makefile b/tools/firmware/etherboot/Makefile
-index 4bc3633ba3..7a56fe8014 100644
---- a/tools/firmware/etherboot/Makefile
-+++ b/tools/firmware/etherboot/Makefile
-@@ -11,7 +11,7 @@ IPXE_GIT_URL ?= git://git.ipxe.org/ipxe.git
- endif
-
- # put an updated tar.gz on xenbits after changes to this variable
--IPXE_GIT_TAG := 3c040ad387099483102708bb1839110bc788cefb
-+IPXE_GIT_TAG := 1d1cf74a5e58811822bee4b3da3cff7282fcdfca
-
- IPXE_TARBALL_URL ?= $(XEN_EXTFILES_URL)/ipxe-git-$(IPXE_GIT_TAG).tar.gz
-
---
-2.44.0
-
diff --git a/0054-x86-physdev-Return-pirq-that-irq-was-already-mapped-.patch b/0054-x86-physdev-Return-pirq-that-irq-was-already-mapped-.patch
new file mode 100644
index 0000000..5e245f9
--- /dev/null
+++ b/0054-x86-physdev-Return-pirq-that-irq-was-already-mapped-.patch
@@ -0,0 +1,38 @@
+From f9f3062f11e144438fac9e9da6aa4cb41a6009b1 Mon Sep 17 00:00:00 2001
+From: Jiqian Chen <Jiqian.Chen@amd.com>
+Date: Thu, 25 Jul 2024 16:20:17 +0200
+Subject: [PATCH 54/56] x86/physdev: Return pirq that irq was already mapped to
+
+Fix bug introduced by 0762e2502f1f ("x86/physdev: factor out the code to allocate and
+map a pirq"). After that re-factoring, when pirq<0 and current_pirq>0, it means
+caller want to allocate a free pirq for irq but irq already has a mapped pirq, then
+it returns the negative pirq, so it fails. However, the logic before that
+re-factoring is different, it should return the current_pirq that irq was already
+mapped to and make the call success.
+
+Fixes: 0762e2502f1f ("x86/physdev: factor out the code to allocate and map a pirq")
+Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
+Signed-off-by: Huang Rui <ray.huang@amd.com>
+Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 0d2b87b5adfc19e87e9027d996db204c66a47f30
+master date: 2024-07-08 14:46:12 +0100
+---
+ xen/arch/x86/irq.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
+index 5dae8bd1b9..6b1f338eae 100644
+--- a/xen/arch/x86/irq.c
++++ b/xen/arch/x86/irq.c
+@@ -2914,6 +2914,7 @@ static int allocate_pirq(struct domain *d, int index, int pirq, int irq,
+ d->domain_id, index, pirq, current_pirq);
+ if ( current_pirq < 0 )
+ return -EBUSY;
++ pirq = current_pirq;
+ }
+ else if ( type == MAP_PIRQ_TYPE_MULTI_MSI )
+ {
+--
+2.45.2
+
diff --git a/0055-tools-libxs-Fix-fcntl-invocation-in-set_cloexec.patch b/0055-tools-libxs-Fix-fcntl-invocation-in-set_cloexec.patch
new file mode 100644
index 0000000..e4cc09e
--- /dev/null
+++ b/0055-tools-libxs-Fix-fcntl-invocation-in-set_cloexec.patch
@@ -0,0 +1,57 @@
+From 81f1e807fadb8111d71b78191e01ca688d74eac7 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 25 Jul 2024 16:20:53 +0200
+Subject: [PATCH 55/56] tools/libxs: Fix fcntl() invocation in set_cloexec()
+
+set_cloexec() had a bit too much copy&pate from setnonblock(), and
+insufficient testing on ancient versions of Linux...
+
+As written (emulating ancient linux by undef'ing O_CLOEXEC), strace shows:
+
+ open("/dev/xen/xenbus", O_RDWR) = 3
+ fcntl(3, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
+ fcntl(3, 0x8003 /* F_??? */, 0x7ffe4a771d90) = -1 EINVAL (Invalid argument)
+ close(3) = 0
+
+which is obviously nonsense.
+
+Switch F_GETFL -> F_GETFD, and fix the second invocation to use F_SETFD. With
+this, strace is rather happer:
+
+ open("/dev/xen/xenbus", O_RDWR) = 3
+ fcntl(3, F_GETFD) = 0
+ fcntl(3, F_SETFD, FD_CLOEXEC) = 0
+
+Fixes: bf7c1464706a ("tools/libxs: Fix CLOEXEC handling in get_dev()")
+Reported-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+master commit: 37810b52d003f8a04af41d7b1f85eff24af9f804
+master date: 2024-07-09 15:32:18 +0100
+---
+ tools/libs/store/xs.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tools/libs/store/xs.c b/tools/libs/store/xs.c
+index c8845b69e2..38a6ce3cf2 100644
+--- a/tools/libs/store/xs.c
++++ b/tools/libs/store/xs.c
+@@ -182,12 +182,12 @@ static bool setnonblock(int fd, int nonblock) {
+
+ static bool set_cloexec(int fd)
+ {
+- int flags = fcntl(fd, F_GETFL);
++ int flags = fcntl(fd, F_GETFD);
+
+ if (flags < 0)
+ return false;
+
+- return fcntl(fd, flags | FD_CLOEXEC) >= 0;
++ return fcntl(fd, F_SETFD, flags | FD_CLOEXEC) >= 0;
+ }
+
+ static int pipe_cloexec(int fds[2])
+--
+2.45.2
+
diff --git a/0055-x86-mm-use-block_lock_speculation-in-_mm_write_lock.patch b/0055-x86-mm-use-block_lock_speculation-in-_mm_write_lock.patch
deleted file mode 100644
index 719234c..0000000
--- a/0055-x86-mm-use-block_lock_speculation-in-_mm_write_lock.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-From a153b8b42e9027ba3057bc7c8bf55e4d71e86ec3 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Wed, 27 Mar 2024 12:28:24 +0100
-Subject: [PATCH 55/67] x86/mm: use block_lock_speculation() in
- _mm_write_lock()
-
-I can only guess that using block_speculation() there was a leftover
-from, earlier on, SPECULATIVE_HARDEN_LOCK depending on
-SPECULATIVE_HARDEN_BRANCH.
-
-Fixes: 197ecd838a2a ("locking: attempt to ensure lock wrappers are always inline")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 62018f08708a5ff6ef8fc8ff2aaaac46e5a60430
-master date: 2024-03-18 13:53:37 +0100
----
- xen/arch/x86/mm/mm-locks.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
-index 3ea2d8eb03..7d6e4d2a7c 100644
---- a/xen/arch/x86/mm/mm-locks.h
-+++ b/xen/arch/x86/mm/mm-locks.h
-@@ -150,7 +150,7 @@ static always_inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
- _set_lock_level(_lock_level(d, level));
- }
- else
-- block_speculation();
-+ block_lock_speculation();
- l->recurse_count++;
- }
-
---
-2.44.0
-
diff --git a/0056-x86-altcall-fix-clang-code-gen-when-using-altcall-in.patch b/0056-x86-altcall-fix-clang-code-gen-when-using-altcall-in.patch
new file mode 100644
index 0000000..c94c516
--- /dev/null
+++ b/0056-x86-altcall-fix-clang-code-gen-when-using-altcall-in.patch
@@ -0,0 +1,85 @@
+From d078d0aa86e9e3b937f673dc89306b3afd09d560 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Thu, 25 Jul 2024 16:21:17 +0200
+Subject: [PATCH 56/56] x86/altcall: fix clang code-gen when using altcall in
+ loop constructs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Yet another clang code generation issue when using altcalls.
+
+The issue this time is with using loop constructs around alternative_{,v}call
+instances using parameter types smaller than the register size.
+
+Given the following example code:
+
+static void bar(bool b)
+{
+ unsigned int i;
+
+ for ( i = 0; i < 10; i++ )
+ {
+ int ret_;
+ register union {
+ bool e;
+ unsigned long r;
+ } di asm("rdi") = { .e = b };
+ register unsigned long si asm("rsi");
+ register unsigned long dx asm("rdx");
+ register unsigned long cx asm("rcx");
+ register unsigned long r8 asm("r8");
+ register unsigned long r9 asm("r9");
+ register unsigned long r10 asm("r10");
+ register unsigned long r11 asm("r11");
+
+ asm volatile ( "call %c[addr]"
+ : "+r" (di), "=r" (si), "=r" (dx),
+ "=r" (cx), "=r" (r8), "=r" (r9),
+ "=r" (r10), "=r" (r11), "=a" (ret_)
+ : [addr] "i" (&(func)), "g" (func)
+ : "memory" );
+ }
+}
+
+See: https://godbolt.org/z/qvxMGd84q
+
+Clang will generate machine code that only resets the low 8 bits of %rdi
+between loop calls, leaving the rest of the register possibly containing
+garbage from the use of %rdi inside the called function. Note also that clang
+doesn't truncate the input parameters at the callee, thus breaking the psABI.
+
+Fix this by turning the `e` element in the anonymous union into an array that
+consumes the same space as an unsigned long, as this forces clang to reset the
+whole %rdi register instead of just the low 8 bits.
+
+Fixes: 2ce562b2a413 ('x86/altcall: use a union as register type for function parameters on clang')
+Suggested-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: d51b2f5ea1915fe058f730b0ec542cf84254fca0
+master date: 2024-07-23 13:59:30 +0200
+---
+ xen/arch/x86/include/asm/alternative.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h
+index 0d3697f1de..e63b459276 100644
+--- a/xen/arch/x86/include/asm/alternative.h
++++ b/xen/arch/x86/include/asm/alternative.h
+@@ -185,10 +185,10 @@ extern void alternative_branches(void);
+ */
+ #define ALT_CALL_ARG(arg, n) \
+ register union { \
+- typeof(arg) e; \
++ typeof(arg) e[sizeof(long) / sizeof(arg)]; \
+ unsigned long r; \
+ } a ## n ## _ asm ( ALT_CALL_arg ## n ) = { \
+- .e = ({ BUILD_BUG_ON(sizeof(arg) > sizeof(void *)); (arg); }) \
++ .e[0] = ({ BUILD_BUG_ON(sizeof(arg) > sizeof(void *)); (arg); })\
+ }
+ #else
+ #define ALT_CALL_ARG(arg, n) \
+--
+2.45.2
+
diff --git a/0056-x86-boot-Fix-setup_apic_nmi_watchdog-to-fail-more-cl.patch b/0056-x86-boot-Fix-setup_apic_nmi_watchdog-to-fail-more-cl.patch
deleted file mode 100644
index 5d549c1..0000000
--- a/0056-x86-boot-Fix-setup_apic_nmi_watchdog-to-fail-more-cl.patch
+++ /dev/null
@@ -1,120 +0,0 @@
-From 471b53c6a092940f3629990d9ca946aa22bd8535 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Wed, 27 Mar 2024 12:29:11 +0100
-Subject: [PATCH 56/67] x86/boot: Fix setup_apic_nmi_watchdog() to fail more
- cleanly
-
-Right now, if the user requests the watchdog on the command line,
-setup_apic_nmi_watchdog() will blindly assume that setting up the watchdog
-worked. Reuse nmi_perfctr_msr to identify when the watchdog has been
-configured.
-
-Rearrange setup_p6_watchdog() to not set nmi_perfctr_msr until the sanity
-checks are complete. Turn setup_p4_watchdog() into a void function, matching
-the others.
-
-If the watchdog isn't set up, inform the user and override to NMI_NONE, which
-will prevent check_nmi_watchdog() from claiming that all CPUs are stuck.
-
-e.g.:
-
- (XEN) alt table ffff82d040697c38 -> ffff82d0406a97f0
- (XEN) Failed to configure NMI watchdog
- (XEN) Brought up 512 CPUs
- (XEN) Scheduling granularity: cpu, 1 CPU per sched-resource
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: f658321374687c7339235e1ac643e0427acff717
-master date: 2024-03-19 18:29:37 +0000
----
- xen/arch/x86/nmi.c | 25 ++++++++++++-------------
- 1 file changed, 12 insertions(+), 13 deletions(-)
-
-diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c
-index 7656023748..7c9591b65e 100644
---- a/xen/arch/x86/nmi.c
-+++ b/xen/arch/x86/nmi.c
-@@ -323,8 +323,6 @@ static void setup_p6_watchdog(unsigned counter)
- {
- unsigned int evntsel;
-
-- nmi_perfctr_msr = MSR_P6_PERFCTR(0);
--
- if ( !nmi_p6_event_width && current_cpu_data.cpuid_level >= 0xa )
- nmi_p6_event_width = MASK_EXTR(cpuid_eax(0xa), P6_EVENT_WIDTH_MASK);
- if ( !nmi_p6_event_width )
-@@ -334,6 +332,8 @@ static void setup_p6_watchdog(unsigned counter)
- nmi_p6_event_width > BITS_PER_LONG )
- return;
-
-+ nmi_perfctr_msr = MSR_P6_PERFCTR(0);
-+
- clear_msr_range(MSR_P6_EVNTSEL(0), 2);
- clear_msr_range(MSR_P6_PERFCTR(0), 2);
-
-@@ -349,13 +349,13 @@ static void setup_p6_watchdog(unsigned counter)
- wrmsr(MSR_P6_EVNTSEL(0), evntsel, 0);
- }
-
--static int setup_p4_watchdog(void)
-+static void setup_p4_watchdog(void)
- {
- uint64_t misc_enable;
-
- rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
- if (!(misc_enable & MSR_IA32_MISC_ENABLE_PERF_AVAIL))
-- return 0;
-+ return;
-
- nmi_perfctr_msr = MSR_P4_IQ_PERFCTR0;
- nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
-@@ -378,13 +378,12 @@ static int setup_p4_watchdog(void)
- clear_msr_range(0x3E0, 2);
- clear_msr_range(MSR_P4_BPU_CCCR0, 18);
- clear_msr_range(MSR_P4_BPU_PERFCTR0, 18);
--
-+
- wrmsrl(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0);
- wrmsrl(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE);
- write_watchdog_counter("P4_IQ_COUNTER0");
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- wrmsrl(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val);
-- return 1;
- }
-
- void setup_apic_nmi_watchdog(void)
-@@ -399,8 +398,6 @@ void setup_apic_nmi_watchdog(void)
- case 0xf ... 0x19:
- setup_k7_watchdog();
- break;
-- default:
-- return;
- }
- break;
- case X86_VENDOR_INTEL:
-@@ -411,14 +408,16 @@ void setup_apic_nmi_watchdog(void)
- : CORE_EVENT_CPU_CLOCKS_NOT_HALTED);
- break;
- case 15:
-- if (!setup_p4_watchdog())
-- return;
-+ setup_p4_watchdog();
- break;
-- default:
-- return;
- }
- break;
-- default:
-+ }
-+
-+ if ( nmi_perfctr_msr == 0 )
-+ {
-+ printk(XENLOG_WARNING "Failed to configure NMI watchdog\n");
-+ nmi_watchdog = NMI_NONE;
- return;
- }
-
---
-2.44.0
-
diff --git a/0057-x86-PoD-tie-together-P2M-update-and-increment-of-ent.patch b/0057-x86-PoD-tie-together-P2M-update-and-increment-of-ent.patch
deleted file mode 100644
index dedc1c2..0000000
--- a/0057-x86-PoD-tie-together-P2M-update-and-increment-of-ent.patch
+++ /dev/null
@@ -1,61 +0,0 @@
-From bfb69205376d94ff91b09a337c47fb665ee12da3 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Wed, 27 Mar 2024 12:29:33 +0100
-Subject: [PATCH 57/67] x86/PoD: tie together P2M update and increment of entry
- count
-
-When not holding the PoD lock across the entire region covering P2M
-update and stats update, the entry count - if to be incorrect at all -
-should indicate too large a value in preference to a too small one, to
-avoid functions bailing early when they find the count is zero. However,
-instead of moving the increment ahead (and adjust back upon failure),
-extend the PoD-locked region.
-
-Fixes: 99af3cd40b6e ("x86/mm: Rework locking in the PoD layer")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: George Dunlap <george.dunlap@cloud.com>
-master commit: cc950c49ae6a6690f7fc3041a1f43122c250d250
-master date: 2024-03-21 09:48:10 +0100
----
- xen/arch/x86/mm/p2m-pod.c | 15 ++++++++++++---
- 1 file changed, 12 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
-index 99dbcb3101..e903db9d93 100644
---- a/xen/arch/x86/mm/p2m-pod.c
-+++ b/xen/arch/x86/mm/p2m-pod.c
-@@ -1370,19 +1370,28 @@ mark_populate_on_demand(struct domain *d, unsigned long gfn_l,
- }
- }
-
-+ /*
-+ * P2M update and stats increment need to collectively be under PoD lock,
-+ * to prevent code elsewhere observing PoD entry count being zero despite
-+ * there actually still being PoD entries (created by the p2m_set_entry()
-+ * invocation below).
-+ */
-+ pod_lock(p2m);
-+
- /* Now, actually do the two-way mapping */
- rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
- p2m_populate_on_demand, p2m->default_access);
- if ( rc == 0 )
- {
-- pod_lock(p2m);
- p2m->pod.entry_count += 1UL << order;
- p2m->pod.entry_count -= pod_count;
- BUG_ON(p2m->pod.entry_count < 0);
-- pod_unlock(p2m);
-+ }
-+
-+ pod_unlock(p2m);
-
-+ if ( rc == 0 )
- ioreq_request_mapcache_invalidate(d);
-- }
- else if ( order )
- {
- /*
---
-2.44.0
-
diff --git a/0058-tools-oxenstored-Use-Map-instead-of-Hashtbl-for-quot.patch b/0058-tools-oxenstored-Use-Map-instead-of-Hashtbl-for-quot.patch
deleted file mode 100644
index dfc7f5a..0000000
--- a/0058-tools-oxenstored-Use-Map-instead-of-Hashtbl-for-quot.patch
+++ /dev/null
@@ -1,143 +0,0 @@
-From 7abd305607938b846da1a37dd1bda7bf7d47dba5 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edwin.torok@cloud.com>
-Date: Wed, 31 Jan 2024 10:52:55 +0000
-Subject: [PATCH 58/67] tools/oxenstored: Use Map instead of Hashtbl for quotas
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-On a stress test running 1000 VMs flamegraphs have shown that
-`oxenstored` spends a large amount of time in `Hashtbl.copy` and the GC.
-
-Hashtable complexity:
- * read/write: O(1) average
- * copy: O(domains) -- copying the entire table
-
-Map complexity:
- * read/write: O(log n) worst case
- * copy: O(1) -- a word copy
-
-We always perform at least one 'copy' when processing each xenstore
-packet (regardless whether it is a readonly operation or inside a
-transaction or not), so the actual complexity per packet is:
- * Hashtbl: O(domains)
- * Map: O(log domains)
-
-Maps are the clear winner, and a better fit for the immutable xenstore
-tree.
-
-Signed-off-by: Edwin Török <edwin.torok@cloud.com>
-Acked-by: Christian Lindig <christian.lindig@cloud.com>
-(cherry picked from commit b6cf604207fd0a04451a48f2ce6d05fb66c612ab)
----
- tools/ocaml/xenstored/quota.ml | 65 ++++++++++++++++++----------------
- 1 file changed, 34 insertions(+), 31 deletions(-)
-
-diff --git a/tools/ocaml/xenstored/quota.ml b/tools/ocaml/xenstored/quota.ml
-index 6e3d6401ae..ee8dd22581 100644
---- a/tools/ocaml/xenstored/quota.ml
-+++ b/tools/ocaml/xenstored/quota.ml
-@@ -23,66 +23,69 @@ let activate = ref true
- let maxent = ref (1000)
- let maxsize = ref (2048)
-
-+module Domid = struct
-+ type t = Xenctrl.domid
-+ let compare (a:t) (b:t) = compare a b
-+end
-+
-+module DomidMap = Map.Make(Domid)
-+
- type t = {
- maxent: int; (* max entities per domU *)
- maxsize: int; (* max size of data store in one node *)
-- cur: (Xenctrl.domid, int) Hashtbl.t; (* current domains quota *)
-+ mutable cur: int DomidMap.t; (* current domains quota *)
- }
-
- let to_string quota domid =
-- if Hashtbl.mem quota.cur domid
-- then Printf.sprintf "dom%i quota: %i/%i" domid (Hashtbl.find quota.cur domid) quota.maxent
-- else Printf.sprintf "dom%i quota: not set" domid
-+ try
-+ Printf.sprintf "dom%i quota: %i/%i" domid (DomidMap.find domid quota.cur) quota.maxent
-+ with Not_found ->
-+ Printf.sprintf "dom%i quota: not set" domid
-
- let create () =
-- { maxent = !maxent; maxsize = !maxsize; cur = Hashtbl.create 100; }
-+ { maxent = !maxent; maxsize = !maxsize; cur = DomidMap.empty; }
-
--let copy quota = { quota with cur = (Hashtbl.copy quota.cur) }
-+let copy quota = { quota with cur = quota.cur }
-
--let del quota id = Hashtbl.remove quota.cur id
-+let del quota id = { quota with cur = DomidMap.remove id quota.cur }
-
- let _check quota id size =
- if size > quota.maxsize then (
- warn "domain %u err create entry: data too big %d" id size;
- raise Data_too_big
- );
-- if id > 0 && Hashtbl.mem quota.cur id then
-- let entry = Hashtbl.find quota.cur id in
-+ if id > 0 then
-+ try
-+ let entry = DomidMap.find id quota.cur in
- if entry >= quota.maxent then (
- warn "domain %u cannot create entry: quota reached" id;
- raise Limit_reached
- )
-+ with Not_found -> ()
-
- let check quota id size =
- if !activate then
- _check quota id size
-
--let get_entry quota id = Hashtbl.find quota.cur id
-+let find_or_zero quota_cur id =
-+ try DomidMap.find id quota_cur with Not_found -> 0
-
--let set_entry quota id nb =
-- if nb = 0
-- then Hashtbl.remove quota.cur id
-- else begin
-- if Hashtbl.mem quota.cur id then
-- Hashtbl.replace quota.cur id nb
-- else
-- Hashtbl.add quota.cur id nb
-- end
-+let update_entry quota_cur id diff =
-+ let nb = diff + find_or_zero quota_cur id in
-+ if nb = 0 then DomidMap.remove id quota_cur
-+ else DomidMap.add id nb quota_cur
-
- let del_entry quota id =
-- try
-- let nb = get_entry quota id in
-- set_entry quota id (nb - 1)
-- with Not_found -> ()
-+ quota.cur <- update_entry quota.cur id (-1)
-
- let add_entry quota id =
-- let nb = try get_entry quota id with Not_found -> 0 in
-- set_entry quota id (nb + 1)
--
--let add quota diff =
-- Hashtbl.iter (fun id nb -> set_entry quota id (get_entry quota id + nb)) diff.cur
-+ quota.cur <- update_entry quota.cur id (+1)
-
- let merge orig_quota mod_quota dest_quota =
-- Hashtbl.iter (fun id nb -> let diff = nb - (try get_entry orig_quota id with Not_found -> 0) in
-- if diff <> 0 then
-- set_entry dest_quota id ((try get_entry dest_quota id with Not_found -> 0) + diff)) mod_quota.cur
-+ let fold_merge id nb dest =
-+ match nb - find_or_zero orig_quota.cur id with
-+ | 0 -> dest (* not modified *)
-+ | diff -> update_entry dest id diff (* update with [x=x+diff] *)
-+ in
-+ dest_quota.cur <- DomidMap.fold fold_merge mod_quota.cur dest_quota.cur
-+ (* dest_quota = dest_quota + (mod_quota - orig_quota) *)
---
-2.44.0
-
diff --git a/0059-tools-oxenstored-Make-Quota.t-pure.patch b/0059-tools-oxenstored-Make-Quota.t-pure.patch
deleted file mode 100644
index 7616b90..0000000
--- a/0059-tools-oxenstored-Make-Quota.t-pure.patch
+++ /dev/null
@@ -1,121 +0,0 @@
-From f38a815a54000ca51ff5165b2863d60b6bbea49c Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edwin.torok@cloud.com>
-Date: Wed, 31 Jan 2024 10:52:56 +0000
-Subject: [PATCH 59/67] tools/oxenstored: Make Quota.t pure
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Now that we no longer have a hashtable inside we can make Quota.t pure, and
-push the mutable update to its callers. Store.t already had a mutable Quota.t
-field.
-
-No functional change.
-
-Signed-off-by: Edwin Török <edwin.torok@cloud.com>
-Acked-by: Christian Lindig <christian.lindig@cloud.com>
-(cherry picked from commit 098d868e52ac0165b7f36e22b767ea70cef70054)
----
- tools/ocaml/xenstored/quota.ml | 8 ++++----
- tools/ocaml/xenstored/store.ml | 17 ++++++++++-------
- 2 files changed, 14 insertions(+), 11 deletions(-)
-
-diff --git a/tools/ocaml/xenstored/quota.ml b/tools/ocaml/xenstored/quota.ml
-index ee8dd22581..b3ab678c72 100644
---- a/tools/ocaml/xenstored/quota.ml
-+++ b/tools/ocaml/xenstored/quota.ml
-@@ -33,7 +33,7 @@ module DomidMap = Map.Make(Domid)
- type t = {
- maxent: int; (* max entities per domU *)
- maxsize: int; (* max size of data store in one node *)
-- mutable cur: int DomidMap.t; (* current domains quota *)
-+ cur: int DomidMap.t; (* current domains quota *)
- }
-
- let to_string quota domid =
-@@ -76,10 +76,10 @@ let update_entry quota_cur id diff =
- else DomidMap.add id nb quota_cur
-
- let del_entry quota id =
-- quota.cur <- update_entry quota.cur id (-1)
-+ {quota with cur = update_entry quota.cur id (-1)}
-
- let add_entry quota id =
-- quota.cur <- update_entry quota.cur id (+1)
-+ {quota with cur = update_entry quota.cur id (+1)}
-
- let merge orig_quota mod_quota dest_quota =
- let fold_merge id nb dest =
-@@ -87,5 +87,5 @@ let merge orig_quota mod_quota dest_quota =
- | 0 -> dest (* not modified *)
- | diff -> update_entry dest id diff (* update with [x=x+diff] *)
- in
-- dest_quota.cur <- DomidMap.fold fold_merge mod_quota.cur dest_quota.cur
-+ {dest_quota with cur = DomidMap.fold fold_merge mod_quota.cur dest_quota.cur}
- (* dest_quota = dest_quota + (mod_quota - orig_quota) *)
-diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml
-index c94dbf3a62..5dd965db15 100644
---- a/tools/ocaml/xenstored/store.ml
-+++ b/tools/ocaml/xenstored/store.ml
-@@ -85,7 +85,9 @@ let check_owner node connection =
- raise Define.Permission_denied;
- end
-
--let rec recurse fct node = fct node; SymbolMap.iter (fun _ -> recurse fct) node.children
-+let rec recurse fct node acc =
-+ let acc = fct node acc in
-+ SymbolMap.fold (fun _ -> recurse fct) node.children acc
-
- (** [recurse_filter_map f tree] applies [f] on each node in the tree recursively,
- possibly removing some nodes.
-@@ -408,7 +410,7 @@ let dump_buffer store = dump_store_buf store.root
- let set_node store path node orig_quota mod_quota =
- let root = Path.set_node store.root path node in
- store.root <- root;
-- Quota.merge orig_quota mod_quota store.quota
-+ store.quota <- Quota.merge orig_quota mod_quota store.quota
-
- let write store perm path value =
- let node, existing = get_deepest_existing_node store path in
-@@ -422,7 +424,7 @@ let write store perm path value =
- let root, node_created = path_write store perm path value in
- store.root <- root;
- if node_created
-- then Quota.add_entry store.quota owner
-+ then store.quota <- Quota.add_entry store.quota owner
-
- let mkdir store perm path =
- let node, existing = get_deepest_existing_node store path in
-@@ -431,7 +433,7 @@ let mkdir store perm path =
- if not (existing || (Perms.Connection.is_dom0 perm)) then Quota.check store.quota owner 0;
- store.root <- path_mkdir store perm path;
- if not existing then
-- Quota.add_entry store.quota owner
-+ store.quota <- Quota.add_entry store.quota owner
-
- let rm store perm path =
- let rmed_node = Path.get_node store.root path in
-@@ -439,7 +441,7 @@ let rm store perm path =
- | None -> raise Define.Doesnt_exist
- | Some rmed_node ->
- store.root <- path_rm store perm path;
-- Node.recurse (fun node -> Quota.del_entry store.quota (Node.get_owner node)) rmed_node
-+ store.quota <- Node.recurse (fun node quota -> Quota.del_entry quota (Node.get_owner node)) rmed_node store.quota
-
- let setperms store perm path nperms =
- match Path.get_node store.root path with
-@@ -450,8 +452,9 @@ let setperms store perm path nperms =
- if not ((old_owner = new_owner) || (Perms.Connection.is_dom0 perm)) then
- raise Define.Permission_denied;
- store.root <- path_setperms store perm path nperms;
-- Quota.del_entry store.quota old_owner;
-- Quota.add_entry store.quota new_owner
-+ store.quota <-
-+ let quota = Quota.del_entry store.quota old_owner in
-+ Quota.add_entry quota new_owner
-
- let reset_permissions store domid =
- Logging.info "store|node" "Cleaning up xenstore ACLs for domid %d" domid;
---
-2.44.0
-
diff --git a/0060-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch b/0060-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch
deleted file mode 100644
index ce2b89d..0000000
--- a/0060-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From bb27e11c56963e170d1f6d2fbddbc956f7164121 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 2 Apr 2024 16:17:25 +0200
-Subject: [PATCH 60/67] x86/cpu-policy: Hide x2APIC from PV guests
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-PV guests can't write to MSR_APIC_BASE (in order to set EXTD), nor can they
-access any of the x2APIC MSR range. Therefore they mustn't see the x2APIC
-CPUID bit saying that they can.
-
-Right now, the host x2APIC flag filters into PV guests, meaning that PV guests
-generally see x2APIC except on Zen1-and-older AMD systems.
-
-Linux works around this by explicitly hiding the bit itself, and filtering
-EXTD out of MSR_APIC_BASE reads. NetBSD behaves more in the spirit of PV
-guests, and entirely ignores the APIC when built as a PV guest.
-
-Change the annotation from !A to !S. This has a consequence of stripping it
-out of both PV featuremasks. However, as existing guests may have seen the
-bit, set it back into the PV Max policy; a VM which saw the bit and is alive
-enough to migrate will have ignored it one way or another.
-
-Hiding x2APIC does change the contents of leaf 0xb, but as the information is
-nonsense to begin with, this is likely an improvement on the status quo.
-
-Xen's blind assumption that APIC_ID = vCPU_ID * 2 isn't interlinked with the
-host's topology structure, where a PV guest may see real host values, and the
-APIC_IDs are useless without an MADT to start with. Dom0 is the only PV VM to
-get an MADT but it's the host one, meaning the two sets of APIC_IDs are from
-different address spaces.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-master commit: 5420aa165dfa5fe95dd84bb71cb96c15459935b1
-master date: 2024-03-01 20:14:19 +0000
----
- xen/arch/x86/cpu-policy.c | 11 +++++++++--
- xen/include/public/arch-x86/cpufeatureset.h | 2 +-
- 2 files changed, 10 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
-index 96c2cee1a8..ed64d56294 100644
---- a/xen/arch/x86/cpu-policy.c
-+++ b/xen/arch/x86/cpu-policy.c
-@@ -559,6 +559,14 @@ static void __init calculate_pv_max_policy(void)
- for ( i = 0; i < ARRAY_SIZE(fs); ++i )
- fs[i] &= pv_max_featuremask[i];
-
-+ /*
-+ * Xen at the time of writing (Feb 2024, 4.19 dev cycle) used to leak the
-+ * host x2APIC capability into PV guests, but never supported the guest
-+ * trying to turn x2APIC mode on. Tolerate an incoming VM which saw the
-+ * x2APIC CPUID bit and is alive enough to migrate.
-+ */
-+ __set_bit(X86_FEATURE_X2APIC, fs);
-+
- /*
- * If Xen isn't virtualising MSR_SPEC_CTRL for PV guests (functional
- * availability, or admin choice), hide the feature.
-@@ -837,11 +845,10 @@ void recalculate_cpuid_policy(struct domain *d)
- }
-
- /*
-- * Allow the toolstack to set HTT, X2APIC and CMP_LEGACY. These bits
-+ * Allow the toolstack to set HTT and CMP_LEGACY. These bits
- * affect how to interpret topology information in other cpuid leaves.
- */
- __set_bit(X86_FEATURE_HTT, max_fs);
-- __set_bit(X86_FEATURE_X2APIC, max_fs);
- __set_bit(X86_FEATURE_CMP_LEGACY, max_fs);
-
- /*
-diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
-index 113e6cadc1..bc971f3c6f 100644
---- a/xen/include/public/arch-x86/cpufeatureset.h
-+++ b/xen/include/public/arch-x86/cpufeatureset.h
-@@ -123,7 +123,7 @@ XEN_CPUFEATURE(PCID, 1*32+17) /*H Process Context ID */
- XEN_CPUFEATURE(DCA, 1*32+18) /* Direct Cache Access */
- XEN_CPUFEATURE(SSE4_1, 1*32+19) /*A Streaming SIMD Extensions 4.1 */
- XEN_CPUFEATURE(SSE4_2, 1*32+20) /*A Streaming SIMD Extensions 4.2 */
--XEN_CPUFEATURE(X2APIC, 1*32+21) /*!A Extended xAPIC */
-+XEN_CPUFEATURE(X2APIC, 1*32+21) /*!S Extended xAPIC */
- XEN_CPUFEATURE(MOVBE, 1*32+22) /*A movbe instruction */
- XEN_CPUFEATURE(POPCNT, 1*32+23) /*A POPCNT instruction */
- XEN_CPUFEATURE(TSC_DEADLINE, 1*32+24) /*S TSC Deadline Timer */
---
-2.44.0
-
diff --git a/0061-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch b/0061-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch
deleted file mode 100644
index d1b8786..0000000
--- a/0061-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch
+++ /dev/null
@@ -1,85 +0,0 @@
-From 70ad9c5fdeac4814050080c87e06d44292ecf868 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 2 Apr 2024 16:18:05 +0200
-Subject: [PATCH 61/67] x86/cpu-policy: Fix visibility of HTT/CMP_LEGACY in max
- policies
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The block in recalculate_cpuid_policy() predates the proper split between
-default and max policies, and was a "slightly max for a toolstack which knows
-about it" capability. It didn't get transformed properly in Xen 4.14.
-
-Because Xen will accept a VM with HTT/CMP_LEGACY seen, they should be visible
-in the max polices. Keep the default policy matching host settings.
-
-This manifested as an incorrectly-rejected migration across XenServer's Xen
-4.13 -> 4.17 upgrade, as Xapi is slowly growing the logic to check a VM
-against the target max policy.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-master commit: e2d8a652251660c3252d92b442e1a9c5d6e6a1e9
-master date: 2024-03-01 20:14:19 +0000
----
- xen/arch/x86/cpu-policy.c | 29 ++++++++++++++++++++++-------
- 1 file changed, 22 insertions(+), 7 deletions(-)
-
-diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
-index ed64d56294..24acd12ce2 100644
---- a/xen/arch/x86/cpu-policy.c
-+++ b/xen/arch/x86/cpu-policy.c
-@@ -458,6 +458,16 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs)
- raw_cpu_policy.feat.clwb )
- __set_bit(X86_FEATURE_CLWB, fs);
- }
-+
-+ /*
-+ * Topology information inside the guest is entirely at the toolstack's
-+ * discretion, and bears no relationship to the host we're running on.
-+ *
-+ * HTT identifies p->basic.lppp as valid
-+ * CMP_LEGACY identifies p->extd.nc as valid
-+ */
-+ __set_bit(X86_FEATURE_HTT, fs);
-+ __set_bit(X86_FEATURE_CMP_LEGACY, fs);
- }
-
- static void __init guest_common_default_feature_adjustments(uint32_t *fs)
-@@ -512,6 +522,18 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs)
- __clear_bit(X86_FEATURE_CLWB, fs);
- }
-
-+ /*
-+ * Topology information is at the toolstack's discretion so these are
-+ * unconditionally set in max, but pick a default which matches the host.
-+ */
-+ __clear_bit(X86_FEATURE_HTT, fs);
-+ if ( cpu_has_htt )
-+ __set_bit(X86_FEATURE_HTT, fs);
-+
-+ __clear_bit(X86_FEATURE_CMP_LEGACY, fs);
-+ if ( cpu_has_cmp_legacy )
-+ __set_bit(X86_FEATURE_CMP_LEGACY, fs);
-+
- /*
- * On certain hardware, speculative or errata workarounds can result in
- * TSX being placed in "force-abort" mode, where it doesn't actually
-@@ -844,13 +866,6 @@ void recalculate_cpuid_policy(struct domain *d)
- }
- }
-
-- /*
-- * Allow the toolstack to set HTT and CMP_LEGACY. These bits
-- * affect how to interpret topology information in other cpuid leaves.
-- */
-- __set_bit(X86_FEATURE_HTT, max_fs);
-- __set_bit(X86_FEATURE_CMP_LEGACY, max_fs);
--
- /*
- * 32bit PV domains can't use any Long Mode features, and cannot use
- * SYSCALL on non-AMD hardware.
---
-2.44.0
-
diff --git a/0062-xen-virtual-region-Rename-the-start-end-fields.patch b/0062-xen-virtual-region-Rename-the-start-end-fields.patch
deleted file mode 100644
index 9dbd5c9..0000000
--- a/0062-xen-virtual-region-Rename-the-start-end-fields.patch
+++ /dev/null
@@ -1,140 +0,0 @@
-From 2392e958ec6fd2e48e011781344cf94dee6d6142 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 2 Apr 2024 16:18:51 +0200
-Subject: [PATCH 62/67] xen/virtual-region: Rename the start/end fields
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-... to text_{start,end}. We're about to introduce another start/end pair.
-
-Despite it's name, struct virtual_region has always been a module-ish
-description. Call this out specifically.
-
-As minor cleanup, replace ROUNDUP(x, PAGE_SIZE) with the more concise
-PAGE_ALIGN() ahead of duplicating the example.
-
-No functional change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
-master commit: 989556c6f8ca080f5f202417af97d1188b9ba52a
-master date: 2024-03-07 14:24:42 +0000
----
- xen/common/livepatch.c | 9 +++++----
- xen/common/virtual_region.c | 19 ++++++++++---------
- xen/include/xen/virtual_region.h | 11 +++++++++--
- 3 files changed, 24 insertions(+), 15 deletions(-)
-
-diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
-index a5068a2217..29395f286f 100644
---- a/xen/common/livepatch.c
-+++ b/xen/common/livepatch.c
-@@ -785,8 +785,8 @@ static int prepare_payload(struct payload *payload,
- region = &payload->region;
-
- region->symbols_lookup = livepatch_symbols_lookup;
-- region->start = payload->text_addr;
-- region->end = payload->text_addr + payload->text_size;
-+ region->text_start = payload->text_addr;
-+ region->text_end = payload->text_addr + payload->text_size;
-
- /* Optional sections. */
- for ( i = 0; i < BUGFRAME_NR; i++ )
-@@ -823,8 +823,9 @@ static int prepare_payload(struct payload *payload,
- const void *instr = ALT_ORIG_PTR(a);
- const void *replacement = ALT_REPL_PTR(a);
-
-- if ( (instr < region->start && instr >= region->end) ||
-- (replacement < region->start && replacement >= region->end) )
-+ if ( (instr < region->text_start && instr >= region->text_end) ||
-+ (replacement < region->text_start &&
-+ replacement >= region->text_end) )
- {
- printk(XENLOG_ERR LIVEPATCH "%s Alt patching outside payload: %p\n",
- elf->name, instr);
-diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c
-index 9f12c30efe..b22ffb75c4 100644
---- a/xen/common/virtual_region.c
-+++ b/xen/common/virtual_region.c
-@@ -11,15 +11,15 @@
-
- static struct virtual_region core = {
- .list = LIST_HEAD_INIT(core.list),
-- .start = _stext,
-- .end = _etext,
-+ .text_start = _stext,
-+ .text_end = _etext,
- };
-
- /* Becomes irrelevant when __init sections are cleared. */
- static struct virtual_region core_init __initdata = {
- .list = LIST_HEAD_INIT(core_init.list),
-- .start = _sinittext,
-- .end = _einittext,
-+ .text_start = _sinittext,
-+ .text_end = _einittext,
- };
-
- /*
-@@ -39,7 +39,8 @@ const struct virtual_region *find_text_region(unsigned long addr)
- rcu_read_lock(&rcu_virtual_region_lock);
- list_for_each_entry_rcu( region, &virtual_region_list, list )
- {
-- if ( (void *)addr >= region->start && (void *)addr < region->end )
-+ if ( (void *)addr >= region->text_start &&
-+ (void *)addr < region->text_end )
- {
- rcu_read_unlock(&rcu_virtual_region_lock);
- return region;
-@@ -88,8 +89,8 @@ void relax_virtual_region_perms(void)
-
- rcu_read_lock(&rcu_virtual_region_lock);
- list_for_each_entry_rcu( region, &virtual_region_list, list )
-- modify_xen_mappings_lite((unsigned long)region->start,
-- ROUNDUP((unsigned long)region->end, PAGE_SIZE),
-+ modify_xen_mappings_lite((unsigned long)region->text_start,
-+ PAGE_ALIGN((unsigned long)region->text_end),
- PAGE_HYPERVISOR_RWX);
- rcu_read_unlock(&rcu_virtual_region_lock);
- }
-@@ -100,8 +101,8 @@ void tighten_virtual_region_perms(void)
-
- rcu_read_lock(&rcu_virtual_region_lock);
- list_for_each_entry_rcu( region, &virtual_region_list, list )
-- modify_xen_mappings_lite((unsigned long)region->start,
-- ROUNDUP((unsigned long)region->end, PAGE_SIZE),
-+ modify_xen_mappings_lite((unsigned long)region->text_start,
-+ PAGE_ALIGN((unsigned long)region->text_end),
- PAGE_HYPERVISOR_RX);
- rcu_read_unlock(&rcu_virtual_region_lock);
- }
-diff --git a/xen/include/xen/virtual_region.h b/xen/include/xen/virtual_region.h
-index d053620711..442a45bf1f 100644
---- a/xen/include/xen/virtual_region.h
-+++ b/xen/include/xen/virtual_region.h
-@@ -9,11 +9,18 @@
- #include <xen/list.h>
- #include <xen/symbols.h>
-
-+/*
-+ * Despite it's name, this is a module(ish) description.
-+ *
-+ * There's one region for the runtime .text/etc, one region for .init during
-+ * boot only, and one region per livepatch.
-+ */
- struct virtual_region
- {
- struct list_head list;
-- const void *start; /* Virtual address start. */
-- const void *end; /* Virtual address end. */
-+
-+ const void *text_start; /* .text virtual address start. */
-+ const void *text_end; /* .text virtual address end. */
-
- /* If this is NULL the default lookup mechanism is used. */
- symbols_lookup_t *symbols_lookup;
---
-2.44.0
-
diff --git a/0063-xen-virtual-region-Include-rodata-pointers.patch b/0063-xen-virtual-region-Include-rodata-pointers.patch
deleted file mode 100644
index 9f51d4d..0000000
--- a/0063-xen-virtual-region-Include-rodata-pointers.patch
+++ /dev/null
@@ -1,71 +0,0 @@
-From 335cbb55567b20df8e8bd2d1b340609e272ddab6 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 2 Apr 2024 16:19:11 +0200
-Subject: [PATCH 63/67] xen/virtual-region: Include rodata pointers
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-These are optional. .init doesn't distinguish types of data like this, and
-livepatches don't necesserily have any .rodata either.
-
-No functional change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
-master commit: ef969144a425e39f5b214a875b5713d0ea8575fb
-master date: 2024-03-07 14:24:42 +0000
----
- xen/common/livepatch.c | 6 ++++++
- xen/common/virtual_region.c | 2 ++
- xen/include/xen/virtual_region.h | 3 +++
- 3 files changed, 11 insertions(+)
-
-diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
-index 29395f286f..28c09ddf58 100644
---- a/xen/common/livepatch.c
-+++ b/xen/common/livepatch.c
-@@ -788,6 +788,12 @@ static int prepare_payload(struct payload *payload,
- region->text_start = payload->text_addr;
- region->text_end = payload->text_addr + payload->text_size;
-
-+ if ( payload->ro_size )
-+ {
-+ region->rodata_start = payload->ro_addr;
-+ region->rodata_end = payload->ro_addr + payload->ro_size;
-+ }
-+
- /* Optional sections. */
- for ( i = 0; i < BUGFRAME_NR; i++ )
- {
-diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c
-index b22ffb75c4..9c566f8ec9 100644
---- a/xen/common/virtual_region.c
-+++ b/xen/common/virtual_region.c
-@@ -13,6 +13,8 @@ static struct virtual_region core = {
- .list = LIST_HEAD_INIT(core.list),
- .text_start = _stext,
- .text_end = _etext,
-+ .rodata_start = _srodata,
-+ .rodata_end = _erodata,
- };
-
- /* Becomes irrelevant when __init sections are cleared. */
-diff --git a/xen/include/xen/virtual_region.h b/xen/include/xen/virtual_region.h
-index 442a45bf1f..dcdc95ba49 100644
---- a/xen/include/xen/virtual_region.h
-+++ b/xen/include/xen/virtual_region.h
-@@ -22,6 +22,9 @@ struct virtual_region
- const void *text_start; /* .text virtual address start. */
- const void *text_end; /* .text virtual address end. */
-
-+ const void *rodata_start; /* .rodata virtual address start (optional). */
-+ const void *rodata_end; /* .rodata virtual address end. */
-+
- /* If this is NULL the default lookup mechanism is used. */
- symbols_lookup_t *symbols_lookup;
-
---
-2.44.0
-
diff --git a/0064-x86-livepatch-Relax-permissions-on-rodata-too.patch b/0064-x86-livepatch-Relax-permissions-on-rodata-too.patch
deleted file mode 100644
index bc80769..0000000
--- a/0064-x86-livepatch-Relax-permissions-on-rodata-too.patch
+++ /dev/null
@@ -1,85 +0,0 @@
-From c3ff11b11c21777a9b1c616607705f3a7340b391 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 2 Apr 2024 16:19:36 +0200
-Subject: [PATCH 64/67] x86/livepatch: Relax permissions on rodata too
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This reinstates the capability to patch .rodata in load/unload hooks, which
-was lost when we stopped using CR0.WP=0 to patch.
-
-This turns out to be rather less of a large TODO than I thought at the time.
-
-Fixes: 8676092a0f16 ("x86/livepatch: Fix livepatch application when CET is active")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
-master commit: b083b1c393dc8961acf0959b1d2e0ad459985ae3
-master date: 2024-03-07 14:24:42 +0000
----
- xen/arch/x86/livepatch.c | 4 ++--
- xen/common/virtual_region.c | 12 ++++++++++++
- 2 files changed, 14 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/livepatch.c b/xen/arch/x86/livepatch.c
-index ee539f001b..4f76127e1f 100644
---- a/xen/arch/x86/livepatch.c
-+++ b/xen/arch/x86/livepatch.c
-@@ -62,7 +62,7 @@ int arch_livepatch_safety_check(void)
- int noinline arch_livepatch_quiesce(void)
- {
- /*
-- * Relax perms on .text to be RWX, so we can modify them.
-+ * Relax perms on .text/.rodata, so we can modify them.
- *
- * This relaxes perms globally, but all other CPUs are waiting on us.
- */
-@@ -75,7 +75,7 @@ int noinline arch_livepatch_quiesce(void)
- void noinline arch_livepatch_revive(void)
- {
- /*
-- * Reinstate perms on .text to be RX. This also cleans out the dirty
-+ * Reinstate perms on .text/.rodata. This also cleans out the dirty
- * bits, which matters when CET Shstk is active.
- *
- * The other CPUs waiting for us could in principle have re-walked while
-diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c
-index 9c566f8ec9..aefc08e75f 100644
---- a/xen/common/virtual_region.c
-+++ b/xen/common/virtual_region.c
-@@ -91,9 +91,15 @@ void relax_virtual_region_perms(void)
-
- rcu_read_lock(&rcu_virtual_region_lock);
- list_for_each_entry_rcu( region, &virtual_region_list, list )
-+ {
- modify_xen_mappings_lite((unsigned long)region->text_start,
- PAGE_ALIGN((unsigned long)region->text_end),
- PAGE_HYPERVISOR_RWX);
-+ if ( region->rodata_start )
-+ modify_xen_mappings_lite((unsigned long)region->rodata_start,
-+ PAGE_ALIGN((unsigned long)region->rodata_end),
-+ PAGE_HYPERVISOR_RW);
-+ }
- rcu_read_unlock(&rcu_virtual_region_lock);
- }
-
-@@ -103,9 +109,15 @@ void tighten_virtual_region_perms(void)
-
- rcu_read_lock(&rcu_virtual_region_lock);
- list_for_each_entry_rcu( region, &virtual_region_list, list )
-+ {
- modify_xen_mappings_lite((unsigned long)region->text_start,
- PAGE_ALIGN((unsigned long)region->text_end),
- PAGE_HYPERVISOR_RX);
-+ if ( region->rodata_start )
-+ modify_xen_mappings_lite((unsigned long)region->rodata_start,
-+ PAGE_ALIGN((unsigned long)region->rodata_end),
-+ PAGE_HYPERVISOR_RO);
-+ }
- rcu_read_unlock(&rcu_virtual_region_lock);
- }
- #endif /* CONFIG_X86 */
---
-2.44.0
-
diff --git a/0065-x86-boot-Improve-the-boot-watchdog-determination-of-.patch b/0065-x86-boot-Improve-the-boot-watchdog-determination-of-.patch
deleted file mode 100644
index 4a46326..0000000
--- a/0065-x86-boot-Improve-the-boot-watchdog-determination-of-.patch
+++ /dev/null
@@ -1,106 +0,0 @@
-From 846fb984b506135917c2862d2e4607005d6afdeb Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 2 Apr 2024 16:20:09 +0200
-Subject: [PATCH 65/67] x86/boot: Improve the boot watchdog determination of
- stuck cpus
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Right now, check_nmi_watchdog() has two processing loops over all online CPUs
-using prev_nmi_count as storage.
-
-Use a cpumask_t instead (1/32th as much initdata) and have wait_for_nmis()
-make the determination of whether it is stuck, rather than having both
-functions needing to agree on how many ticks mean stuck.
-
-More importantly though, it means we can use the standard cpumask
-infrastructure, including turning this:
-
- (XEN) Brought up 512 CPUs
- (XEN) Testing NMI watchdog on all CPUs: {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511} stuck
-
-into the rather more manageable:
-
- (XEN) Brought up 512 CPUs
- (XEN) Testing NMI watchdog on all CPUs: {0-511} stuck
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-master commit: 9e18f339830c828798aef465556d4029d83476a0
-master date: 2024-03-19 18:29:37 +0000
----
- xen/arch/x86/nmi.c | 33 ++++++++++++++-------------------
- 1 file changed, 14 insertions(+), 19 deletions(-)
-
-diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c
-index 7c9591b65e..dd31034ac8 100644
---- a/xen/arch/x86/nmi.c
-+++ b/xen/arch/x86/nmi.c
-@@ -150,6 +150,8 @@ int nmi_active;
-
- static void __init cf_check wait_for_nmis(void *p)
- {
-+ cpumask_t *stuck_cpus = p;
-+ unsigned int cpu = smp_processor_id();
- unsigned int start_count = this_cpu(nmi_count);
- unsigned long ticks = 10 * 1000 * cpu_khz / nmi_hz;
- unsigned long s, e;
-@@ -158,42 +160,35 @@ static void __init cf_check wait_for_nmis(void *p)
- do {
- cpu_relax();
- if ( this_cpu(nmi_count) >= start_count + 2 )
-- break;
-+ return;
-+
- e = rdtsc();
-- } while( e - s < ticks );
-+ } while ( e - s < ticks );
-+
-+ /* Timeout. Mark ourselves as stuck. */
-+ cpumask_set_cpu(cpu, stuck_cpus);
- }
-
- void __init check_nmi_watchdog(void)
- {
-- static unsigned int __initdata prev_nmi_count[NR_CPUS];
-- int cpu;
-- bool ok = true;
-+ static cpumask_t __initdata stuck_cpus;
-
- if ( nmi_watchdog == NMI_NONE )
- return;
-
- printk("Testing NMI watchdog on all CPUs:");
-
-- for_each_online_cpu ( cpu )
-- prev_nmi_count[cpu] = per_cpu(nmi_count, cpu);
--
- /*
- * Wait at most 10 ticks for 2 watchdog NMIs on each CPU.
- * Busy-wait on all CPUs: the LAPIC counter that the NMI watchdog
- * uses only runs while the core's not halted
- */
-- on_selected_cpus(&cpu_online_map, wait_for_nmis, NULL, 1);
--
-- for_each_online_cpu ( cpu )
-- {
-- if ( per_cpu(nmi_count, cpu) - prev_nmi_count[cpu] < 2 )
-- {
-- printk(" %d", cpu);
-- ok = false;
-- }
-- }
-+ on_selected_cpus(&cpu_online_map, wait_for_nmis, &stuck_cpus, 1);
-
-- printk(" %s\n", ok ? "ok" : "stuck");
-+ if ( cpumask_empty(&stuck_cpus) )
-+ printk("ok\n");
-+ else
-+ printk("{%*pbl} stuck\n", CPUMASK_PR(&stuck_cpus));
-
- /*
- * Now that we know it works we can reduce NMI frequency to
---
-2.44.0
-
diff --git a/0066-x86-boot-Support-the-watchdog-on-newer-AMD-systems.patch b/0066-x86-boot-Support-the-watchdog-on-newer-AMD-systems.patch
deleted file mode 100644
index e501861..0000000
--- a/0066-x86-boot-Support-the-watchdog-on-newer-AMD-systems.patch
+++ /dev/null
@@ -1,48 +0,0 @@
-From 2777b499f1f6d5cea68f9479f82d055542b822ad Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 2 Apr 2024 16:20:30 +0200
-Subject: [PATCH 66/67] x86/boot: Support the watchdog on newer AMD systems
-
-The MSRs used by setup_k7_watchdog() are architectural in 64bit. The Unit
-Select (0x76, cycles not in halt state) isn't, but it hasn't changed in 25
-years, making this a trend likely to continue.
-
-Drop the family check. If the Unit Select does happen to change meaning in
-the future, check_nmi_watchdog() will still notice the watchdog not operating
-as expected.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 131892e0dcc1265b621c2b7d844cb9e7c3a4404f
-master date: 2024-03-19 18:29:37 +0000
----
- xen/arch/x86/nmi.c | 11 ++++-------
- 1 file changed, 4 insertions(+), 7 deletions(-)
-
-diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c
-index dd31034ac8..c7c51614a6 100644
---- a/xen/arch/x86/nmi.c
-+++ b/xen/arch/x86/nmi.c
-@@ -386,15 +386,12 @@ void setup_apic_nmi_watchdog(void)
- if ( nmi_watchdog == NMI_NONE )
- return;
-
-- switch (boot_cpu_data.x86_vendor) {
-+ switch ( boot_cpu_data.x86_vendor )
-+ {
- case X86_VENDOR_AMD:
-- switch (boot_cpu_data.x86) {
-- case 6:
-- case 0xf ... 0x19:
-- setup_k7_watchdog();
-- break;
-- }
-+ setup_k7_watchdog();
- break;
-+
- case X86_VENDOR_INTEL:
- switch (boot_cpu_data.x86) {
- case 6:
---
-2.44.0
-
diff --git a/0067-tests-resource-Fix-HVM-guest-in-SHADOW-builds.patch b/0067-tests-resource-Fix-HVM-guest-in-SHADOW-builds.patch
deleted file mode 100644
index 5ce4e17..0000000
--- a/0067-tests-resource-Fix-HVM-guest-in-SHADOW-builds.patch
+++ /dev/null
@@ -1,110 +0,0 @@
-From 9bc40dbcf9eafccc1923b2555286bf6a2af03b7a Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 2 Apr 2024 16:24:07 +0200
-Subject: [PATCH 67/67] tests/resource: Fix HVM guest in !SHADOW builds
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Right now, test-resource always creates HVM Shadow guests. But if Xen has
-SHADOW compiled out, running the test yields:
-
- $./test-resource
- XENMEM_acquire_resource tests
- Test x86 PV
- Created d1
- Test grant table
- Test x86 PVH
- Skip: 95 - Operation not supported
-
-and doesn't really test HVM guests, but doesn't fail either.
-
-There's nothing paging-mode-specific about this test, so default to HAP if
-possible and provide a more specific message if neither HAP or Shadow are
-available.
-
-As we've got physinfo to hand, also provide more specific message about the
-absence of PV or HVM support.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Roger Pau Monné <roger.pau@citrix.com>
-master commit: 0263dc9069ddb66335c72a159e09050b1600e56a
-master date: 2024-03-01 20:14:19 +0000
----
- tools/tests/resource/test-resource.c | 39 ++++++++++++++++++++++++++++
- 1 file changed, 39 insertions(+)
-
-diff --git a/tools/tests/resource/test-resource.c b/tools/tests/resource/test-resource.c
-index 0a950072f9..e2c4ba3478 100644
---- a/tools/tests/resource/test-resource.c
-+++ b/tools/tests/resource/test-resource.c
-@@ -20,6 +20,8 @@ static xc_interface *xch;
- static xenforeignmemory_handle *fh;
- static xengnttab_handle *gh;
-
-+static xc_physinfo_t physinfo;
-+
- static void test_gnttab(uint32_t domid, unsigned int nr_frames,
- unsigned long gfn)
- {
-@@ -172,6 +174,37 @@ static void test_domain_configurations(void)
-
- printf("Test %s\n", t->name);
-
-+#if defined(__x86_64__) || defined(__i386__)
-+ if ( t->create.flags & XEN_DOMCTL_CDF_hvm )
-+ {
-+ if ( !(physinfo.capabilities & XEN_SYSCTL_PHYSCAP_hvm) )
-+ {
-+ printf(" Skip: HVM not available\n");
-+ continue;
-+ }
-+
-+ /*
-+ * On x86, use HAP guests if possible, but skip if neither HAP nor
-+ * SHADOW is available.
-+ */
-+ if ( physinfo.capabilities & XEN_SYSCTL_PHYSCAP_hap )
-+ t->create.flags |= XEN_DOMCTL_CDF_hap;
-+ else if ( !(physinfo.capabilities & XEN_SYSCTL_PHYSCAP_shadow) )
-+ {
-+ printf(" Skip: Neither HAP or SHADOW available\n");
-+ continue;
-+ }
-+ }
-+ else
-+ {
-+ if ( !(physinfo.capabilities & XEN_SYSCTL_PHYSCAP_pv) )
-+ {
-+ printf(" Skip: PV not available\n");
-+ continue;
-+ }
-+ }
-+#endif
-+
- rc = xc_domain_create(xch, &domid, &t->create);
- if ( rc )
- {
-@@ -214,6 +247,8 @@ static void test_domain_configurations(void)
-
- int main(int argc, char **argv)
- {
-+ int rc;
-+
- printf("XENMEM_acquire_resource tests\n");
-
- xch = xc_interface_open(NULL, NULL, 0);
-@@ -227,6 +262,10 @@ int main(int argc, char **argv)
- if ( !gh )
- err(1, "xengnttab_open");
-
-+ rc = xc_physinfo(xch, &physinfo);
-+ if ( rc )
-+ err(1, "Failed to obtain physinfo");
-+
- test_domain_configurations();
-
- return !!nr_failures;
---
-2.44.0
-
diff --git a/info.txt b/info.txt
index fa9f510..ccc4d4e 100644
--- a/info.txt
+++ b/info.txt
@@ -1,6 +1,6 @@
-Xen upstream patchset #1 for 4.17.4-pre
+Xen upstream patchset #0 for 4.18.3-pre
Containing patches from
-RELEASE-4.17.3 (07f413d7ffb06eab36045bd19f53555de1cacf62)
+RELEASE-4.18.2 (844f9931c6c207588a70f897262c628cd542f75a)
to
-staging-4.17 (9bc40dbcf9eafccc1923b2555286bf6a2af03b7a)
+staging-4.18 (d078d0aa86e9e3b937f673dc89306b3afd09d560)