Xen 4.15.4-pre-patchset-14.15.4-pre-patchset-1

Signed-off-by: Florian Schmaus <flow@gentoo.org>
author: Florian Schmaus <flow@gentoo.org> 2022-10-19 10:38:35 +0200
committer: Florian Schmaus <flow@gentoo.org> 2022-10-19 10:38:35 +0200
commit: 4a9cd4d8a4efd8f2e52483117b4009122393c6a6 (patch)
tree: 8a467d1618431f5353fa676ca58b32bb18ba21aa
parent: Xen 4.15.4-pre-patchset-0.1 (diff)
download: xen-upstream-patches-4a9cd4d8a4efd8f2e52483117b4009122393c6a6.tar.gz
xen-upstream-patches-4a9cd4d8a4efd8f2e52483117b4009122393c6a6.tar.bz2
xen-upstream-patches-4a9cd4d8a4efd8f2e52483117b4009122393c6a6.zip
68 files changed, 4858 insertions, 44 deletions
diff --git a/0001-build-fix-exported-variable-name-CFLAGS_stack_bounda.patch b/0001-build-fix-exported-variable-name-CFLAGS_stack_bounda.patch
index 96eb282..32ff417 100644
--- a/0001-build-fix-exported-variable-name-CFLAGS_stack_bounda.patch
+++ b/0001-build-fix-exported-variable-name-CFLAGS_stack_bounda.patch
@@ -1,7 +1,7 @@
 From f6e26ce7d9317abc41130ead6dc2443a7e2dde00 Mon Sep 17 00:00:00 2001
 From: Anthony PERARD <anthony.perard@citrix.com>
 Date: Tue, 12 Jul 2022 11:20:46 +0200
-Subject: [PATCH 01/21] build: fix exported variable name CFLAGS_stack_boundary
+Subject: [PATCH 01/67] build: fix exported variable name CFLAGS_stack_boundary
 
 Exporting a variable with a dash doesn't work reliably, they may be
 striped from the environment when calling a sub-make or sub-shell.
@@ -63,5 +63,5 @@ index e857c0f2cc2c..a5b2041f9b96 100644
  obj-y := stub.o
  obj-$(XEN_BUILD_EFI) := $(filter-out %.init.o,$(EFIOBJ))
 -- 
-2.35.1
+2.37.3
 
diff --git a/0002-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch b/0002-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch
index 45e4cfd..9f2f8e4 100644
--- a/0002-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch
+++ b/0002-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch
@@ -1,7 +1,7 @@
 From b89b932cfe86556c5de4ad56702aed83142e22a3 Mon Sep 17 00:00:00 2001
 From: Jan Beulich <jbeulich@suse.com>
 Date: Tue, 12 Jul 2022 11:21:14 +0200
-Subject: [PATCH 02/21] IOMMU/x86: work around bogus gcc12 warning in
+Subject: [PATCH 02/67] IOMMU/x86: work around bogus gcc12 warning in
  hvm_gsi_eoi()
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
@@ -48,5 +48,5 @@ index 9544f3234e65..50865eec2c04 100644
  
  /*
 -- 
-2.35.1
+2.37.3
 
diff --git a/0003-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch b/0003-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch
index b79f4b3..777ef8a 100644
--- a/0003-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch
+++ b/0003-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch
@@ -2,7 +2,7 @@ From b53df5b4341fa97614ad064a7c8e781c88b6ed71 Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
  <marmarek@invisiblethingslab.com>
 Date: Tue, 12 Jul 2022 11:22:09 +0200
-Subject: [PATCH 03/21] ehci-dbgp: fix selecting n-th ehci controller
+Subject: [PATCH 03/67] ehci-dbgp: fix selecting n-th ehci controller
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit
@@ -32,5 +32,5 @@ index c893d246defa..66b4811af24a 100644
          dbgp->cap = find_dbgp(dbgp, num);
          if ( !dbgp->cap )
 -- 
-2.35.1
+2.37.3
 
diff --git a/0004-tools-xenstored-Harden-corrupt.patch b/0004-tools-xenstored-Harden-corrupt.patch
index 8b30166..62b7ec9 100644
--- a/0004-tools-xenstored-Harden-corrupt.patch
+++ b/0004-tools-xenstored-Harden-corrupt.patch
@@ -1,7 +1,7 @@
 From 7fe638c28fa693d8bb8f9419de1220d4359a1b2d Mon Sep 17 00:00:00 2001
 From: Julien Grall <jgrall@amazon.com>
 Date: Tue, 12 Jul 2022 11:23:01 +0200
-Subject: [PATCH 04/21] tools/xenstored: Harden corrupt()
+Subject: [PATCH 04/67] tools/xenstored: Harden corrupt()
 
 At the moment, corrupt() is neither checking for allocation failure
 nor freeing the allocated memory.
@@ -40,5 +40,5 @@ index 8033c1e0eb28..9172dd767140 100644
  	check_store();
  }
 -- 
-2.35.1
+2.37.3
 
diff --git a/0005-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch b/0005-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch
index 158e2b0..7d79c2e 100644
--- a/0005-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch
+++ b/0005-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch
@@ -1,7 +1,7 @@
 From 799a8d49237a62ea0d33c3756a6a7f665b8389b2 Mon Sep 17 00:00:00 2001
 From: Andrew Cooper <andrew.cooper3@citrix.com>
 Date: Tue, 12 Jul 2022 11:23:32 +0200
-Subject: [PATCH 05/21] x86/spec-ctrl: Only adjust MSR_SPEC_CTRL for idle with
+Subject: [PATCH 05/67] x86/spec-ctrl: Only adjust MSR_SPEC_CTRL for idle with
  legacy IBRS
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
@@ -89,5 +89,5 @@ index 68f6c46c470c..12283573cdd5 100644
       * Disable shadowing before updating the MSR.  There are no SMP issues
       * here; only local processor ordering concerns.
 -- 
-2.35.1
+2.37.3
 
diff --git a/0006-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch b/0006-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch
index 65670fd..965c965 100644
--- a/0006-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch
+++ b/0006-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch
@@ -1,7 +1,7 @@
 From cd5081e8c31651e623d86532306b4c56bbcb6e6d Mon Sep 17 00:00:00 2001
 From: Andrew Cooper <andrew.cooper3@citrix.com>
 Date: Tue, 12 Jul 2022 11:24:11 +0200
-Subject: [PATCH 06/21] x86/spec-ctrl: Knobs for STIBP and PSFD, and follow
+Subject: [PATCH 06/67] x86/spec-ctrl: Knobs for STIBP and PSFD, and follow
  hardware STIBP hint
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
@@ -230,5 +230,5 @@ index eb7fb70e86f9..8212227ee02a 100644
      /*
       * PV guests can poison the RSB to any virtual address from which
 -- 
-2.35.1
+2.37.3
 
diff --git a/0007-libxc-fix-compilation-error-with-gcc13.patch b/0007-libxc-fix-compilation-error-with-gcc13.patch
index b46552f..9a1ca92 100644
--- a/0007-libxc-fix-compilation-error-with-gcc13.patch
+++ b/0007-libxc-fix-compilation-error-with-gcc13.patch
@@ -1,7 +1,7 @@
 From 77deab4233b5d9ec5cf214fdc1652424fd4fc9d6 Mon Sep 17 00:00:00 2001
 From: Charles Arnold <carnold@suse.com>
 Date: Tue, 12 Jul 2022 11:24:39 +0200
-Subject: [PATCH 07/21] libxc: fix compilation error with gcc13
+Subject: [PATCH 07/67] libxc: fix compilation error with gcc13
 
 xc_psr.c:161:5: error: conflicting types for 'xc_psr_cmt_get_data'
 due to enum/integer mismatch;
@@ -29,5 +29,5 @@ index 318920166c5e..2013200b9eff 100644
  int xc_psr_cmt_enabled(xc_interface *xch);
  
 -- 
-2.35.1
+2.37.3
 
diff --git a/0008-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch b/0008-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch
index 94f729b..22a1ebe 100644
--- a/0008-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch
+++ b/0008-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch
@@ -1,7 +1,7 @@
 From 5be1f46f435f8b05608b1eae029cb17d8bd3a560 Mon Sep 17 00:00:00 2001
 From: Andrew Cooper <andrew.cooper3@citrix.com>
 Date: Tue, 12 Jul 2022 11:25:05 +0200
-Subject: [PATCH 08/21] x86/spec-ctrl: Honour spec-ctrl=0 for unpriv-mmio
+Subject: [PATCH 08/67] x86/spec-ctrl: Honour spec-ctrl=0 for unpriv-mmio
  sub-option
 
 This was an oversight from when unpriv-mmio was introduced.
@@ -28,5 +28,5 @@ index 8212227ee02a..06790897e496 100644
          else if ( val > 0 )
              rc = -EINVAL;
 -- 
-2.35.1
+2.37.3
 
diff --git a/0009-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch b/0009-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch
index 1b8787f..53a8b70 100644
--- a/0009-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch
+++ b/0009-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch
@@ -1,7 +1,7 @@
 From ae417706870333bb52ebcf33c527809cdd2d7265 Mon Sep 17 00:00:00 2001
 From: Andrew Cooper <andrew.cooper3@citrix.com>
 Date: Tue, 12 Jul 2022 11:25:40 +0200
-Subject: [PATCH 09/21] xen/cmdline: Extend parse_boolean() to signal a name
+Subject: [PATCH 09/67] xen/cmdline: Extend parse_boolean() to signal a name
  match
 
 This will help parsing a sub-option which has boolean and non-boolean options
@@ -83,5 +83,5 @@ index 1198c7c0b207..be7498135170 100644
  int parse_boolean(const char *name, const char *s, const char *e);
  
 -- 
-2.35.1
+2.37.3
 
diff --git a/0010-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch b/0010-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch
index a808523..36577d6 100644
--- a/0010-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch
+++ b/0010-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch
@@ -1,7 +1,7 @@
 From 08bfd4d01185e94fda1be9dd79a981d890a9085e Mon Sep 17 00:00:00 2001
 From: Andrew Cooper <andrew.cooper3@citrix.com>
 Date: Tue, 12 Jul 2022 11:26:14 +0200
-Subject: [PATCH 10/21] x86/spec-ctrl: Add fine-grained cmdline suboptions for
+Subject: [PATCH 10/67] x86/spec-ctrl: Add fine-grained cmdline suboptions for
  primitives
 
 Support controling the PV/HVM suboption of msr-sc/rsb/md-clear, which
@@ -133,5 +133,5 @@ index 06790897e496..225fe08259b3 100644
  
          /* Xen's speculative sidechannel mitigation settings. */
 -- 
-2.35.1
+2.37.3
 
diff --git a/0011-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch b/0011-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch
index b597673..dc468c8 100644
--- a/0011-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch
+++ b/0011-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch
@@ -1,7 +1,7 @@
 From f241cc48dabeef6cb0b381db62f2562b0a3970eb Mon Sep 17 00:00:00 2001
 From: Anthony PERARD <anthony.perard@citrix.com>
 Date: Tue, 12 Jul 2022 11:26:47 +0200
-Subject: [PATCH 11/21] tools/helpers: fix build of xen-init-dom0 with -Werror
+Subject: [PATCH 11/67] tools/helpers: fix build of xen-init-dom0 with -Werror
 
 Missing prototype of asprintf() without _GNU_SOURCE.
 
@@ -24,5 +24,5 @@ index c99224a4b607..b4861c9e8041 100644
  #include <stdint.h>
  #include <string.h>
 -- 
-2.35.1
+2.37.3
 
diff --git a/0012-libxl-check-return-value-of-libxl__xs_directory-in-n.patch b/0012-libxl-check-return-value-of-libxl__xs_directory-in-n.patch
index 898889b..74fee03 100644
--- a/0012-libxl-check-return-value-of-libxl__xs_directory-in-n.patch
+++ b/0012-libxl-check-return-value-of-libxl__xs_directory-in-n.patch
@@ -1,7 +1,7 @@
 From d470a54087e0fbd813dae4d773ad0b830eeec4a1 Mon Sep 17 00:00:00 2001
 From: Anthony PERARD <anthony.perard@citrix.com>
 Date: Tue, 12 Jul 2022 11:26:58 +0200
-Subject: [PATCH 12/21] libxl: check return value of libxl__xs_directory in
+Subject: [PATCH 12/67] libxl: check return value of libxl__xs_directory in
  name2bdf
 
 libxl__xs_directory() can potentially return NULL without setting `n`.
@@ -34,5 +34,5 @@ index 92bf86b2bebd..a5f5cdf62b80 100644
  
      for (i = 0; i < n; i++) {
 -- 
-2.35.1
+2.37.3
 
diff --git a/0013-update-Xen-version-to-4.15.4-pre.patch b/0013-update-Xen-version-to-4.15.4-pre.patch
index 664e9df..8626fdd 100644
--- a/0013-update-Xen-version-to-4.15.4-pre.patch
+++ b/0013-update-Xen-version-to-4.15.4-pre.patch
@@ -1,7 +1,7 @@
 From 505771bb1dffdf6f763fad18ee49a913b98abfea Mon Sep 17 00:00:00 2001
 From: Jan Beulich <jbeulich@suse.com>
 Date: Tue, 12 Jul 2022 11:28:33 +0200
-Subject: [PATCH 13/21] update Xen version to 4.15.4-pre
+Subject: [PATCH 13/67] update Xen version to 4.15.4-pre
 
 ---
  xen/Makefile | 2 +-
@@ -21,5 +21,5 @@ index e9a88325c467..cd66bb3b1c84 100644
  -include xen-version
  
 -- 
-2.35.1
+2.37.3
 
diff --git a/0014-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch b/0014-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch
index 681282e..a21b4d8 100644
--- a/0014-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch
+++ b/0014-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch
@@ -1,7 +1,7 @@
 From 156ab775769d39b2dfb048ccd34dee7e86ba83a2 Mon Sep 17 00:00:00 2001
 From: Andrew Cooper <andrew.cooper3@citrix.com>
 Date: Fri, 1 Jul 2022 15:59:40 +0100
-Subject: [PATCH 14/21] x86/spec-ctrl: Rework spec_ctrl_flags context switching
+Subject: [PATCH 14/67] x86/spec-ctrl: Rework spec_ctrl_flags context switching
 
 We are shortly going to need to context switch new bits in both the vcpu and
 S3 paths.  Introduce SCF_IST_MASK and SCF_DOM_MASK, and rework d->arch.verw
@@ -163,5 +163,5 @@ index 5a590bac44aa..66b00d511fc6 100644
  .macro SPEC_CTRL_ENTRY_FROM_INTR_IST
  /*
 -- 
-2.35.1
+2.37.3
 
diff --git a/0015-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch b/0015-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch
index 553dbd2..49351ae 100644
--- a/0015-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch
+++ b/0015-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch
@@ -1,7 +1,7 @@
 From 2cfbca32b9dc3a8d6520549ff468a7f550daf1b1 Mon Sep 17 00:00:00 2001
 From: Andrew Cooper <andrew.cooper3@citrix.com>
 Date: Tue, 28 Jun 2022 14:36:56 +0100
-Subject: [PATCH 15/21] x86/spec-ctrl: Rename SCF_ist_wrmsr to SCF_ist_sc_msr
+Subject: [PATCH 15/67] x86/spec-ctrl: Rename SCF_ist_wrmsr to SCF_ist_sc_msr
 
 We are about to introduce SCF_ist_ibpb, at which point SCF_ist_wrmsr becomes
 ambiguous.
@@ -106,5 +106,5 @@ index 66b00d511fc6..0ff1b118f882 100644
  
      DO_SPEC_CTRL_EXIT_TO_XEN
 -- 
-2.35.1
+2.37.3
 
diff --git a/0016-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch b/0016-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch
index 9ed0093..f114f6d 100644
--- a/0016-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch
+++ b/0016-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch
@@ -1,7 +1,7 @@
 From c707015bf118df2c43e3a48b3774916322fca50a Mon Sep 17 00:00:00 2001
 From: Andrew Cooper <andrew.cooper3@citrix.com>
 Date: Mon, 4 Jul 2022 21:32:17 +0100
-Subject: [PATCH 16/21] x86/spec-ctrl: Rename opt_ibpb to opt_ibpb_ctxt_switch
+Subject: [PATCH 16/67] x86/spec-ctrl: Rename opt_ibpb to opt_ibpb_ctxt_switch
 
 We are about to introduce the use of IBPB at different points in Xen, making
 opt_ibpb ambiguous.  Rename it to opt_ibpb_ctxt_switch.
@@ -93,5 +93,5 @@ index 6f8b0e09348e..fd8162ca9ab9 100644
  extern int8_t opt_eager_fpu;
  extern int8_t opt_l1d_flush;
 -- 
-2.35.1
+2.37.3
 
diff --git a/0017-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch b/0017-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch
index bae2818..e162148 100644
--- a/0017-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch
+++ b/0017-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch
@@ -1,7 +1,7 @@
 From d7f5fb1e2abd0d56cada9bfcf96ab530d214d9aa Mon Sep 17 00:00:00 2001
 From: Andrew Cooper <andrew.cooper3@citrix.com>
 Date: Fri, 1 Jul 2022 15:59:40 +0100
-Subject: [PATCH 17/21] x86/spec-ctrl: Rework SPEC_CTRL_ENTRY_FROM_INTR_IST
+Subject: [PATCH 17/67] x86/spec-ctrl: Rework SPEC_CTRL_ENTRY_FROM_INTR_IST
 
 We are shortly going to add a conditional IBPB in this path.
 
@@ -102,5 +102,5 @@ index 0ff1b118f882..15e24cde00d1 100644
  
      /* Opencoded UNLIKELY_START() with no condition. */
 -- 
-2.35.1
+2.37.3
 
diff --git a/0018-x86-spec-ctrl-Support-IBPB-on-entry.patch b/0018-x86-spec-ctrl-Support-IBPB-on-entry.patch
index 06efb27..1de9d4c 100644
--- a/0018-x86-spec-ctrl-Support-IBPB-on-entry.patch
+++ b/0018-x86-spec-ctrl-Support-IBPB-on-entry.patch
@@ -1,7 +1,7 @@
 From f0d78e0c11d3984c74f34a7325f862dee93a5835 Mon Sep 17 00:00:00 2001
 From: Andrew Cooper <andrew.cooper3@citrix.com>
 Date: Thu, 24 Feb 2022 13:44:33 +0000
-Subject: [PATCH 18/21] x86/spec-ctrl: Support IBPB-on-entry
+Subject: [PATCH 18/67] x86/spec-ctrl: Support IBPB-on-entry
 
 We are going to need this to mitigate Branch Type Confusion on AMD/Hygon CPUs,
 but as we've talked about using it in other cases too, arrange to support it
@@ -296,5 +296,5 @@ index 15e24cde00d1..9eb4ad9ab71d 100644
      jz .L\@_skip_rsb
  
 -- 
-2.35.1
+2.37.3
 
diff --git a/0019-x86-cpuid-Enumeration-for-BTC_NO.patch b/0019-x86-cpuid-Enumeration-for-BTC_NO.patch
index 91c38ee..a4444f4 100644
--- a/0019-x86-cpuid-Enumeration-for-BTC_NO.patch
+++ b/0019-x86-cpuid-Enumeration-for-BTC_NO.patch
@@ -1,7 +1,7 @@
 From 2b29ac476fa0c91655906fac3512202e514ecbed Mon Sep 17 00:00:00 2001
 From: Andrew Cooper <andrew.cooper3@citrix.com>
 Date: Mon, 16 May 2022 15:48:24 +0100
-Subject: [PATCH 19/21] x86/cpuid: Enumeration for BTC_NO
+Subject: [PATCH 19/67] x86/cpuid: Enumeration for BTC_NO
 
 BTC_NO indicates that hardware is not succeptable to Branch Type Confusion.
 
@@ -102,5 +102,5 @@ index 9686c82ed75c..1bbc7da4b53c 100644
  /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
  XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A  AVX512 Neural Network Instructions */
 -- 
-2.35.1
+2.37.3
 
diff --git a/0020-x86-spec-ctrl-Enable-Zen2-chickenbit.patch b/0020-x86-spec-ctrl-Enable-Zen2-chickenbit.patch
index 9fd2fe0..4d12421 100644
--- a/0020-x86-spec-ctrl-Enable-Zen2-chickenbit.patch
+++ b/0020-x86-spec-ctrl-Enable-Zen2-chickenbit.patch
@@ -1,7 +1,7 @@
 From 409976bed91f61fb7b053d536d2fc87cf3ad7018 Mon Sep 17 00:00:00 2001
 From: Andrew Cooper <andrew.cooper3@citrix.com>
 Date: Tue, 15 Mar 2022 18:30:25 +0000
-Subject: [PATCH 20/21] x86/spec-ctrl: Enable Zen2 chickenbit
+Subject: [PATCH 20/67] x86/spec-ctrl: Enable Zen2 chickenbit
 
 ... as instructed in the Branch Type Confusion whitepaper.
 
@@ -101,5 +101,5 @@ index 1e743461e91d..b4a360723b14 100644
  #define MSR_AMD64_DR0_ADDRESS_MASK	0xc0011027
  #define MSR_AMD64_DR1_ADDRESS_MASK	0xc0011019
 -- 
-2.35.1
+2.37.3
 
diff --git a/0021-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch b/0021-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch
index 12ecc5b..b676ba3 100644
--- a/0021-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch
+++ b/0021-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch
@@ -1,7 +1,7 @@
 From 35bf91d30f1a480dcf5bfd99b79384b2b283da7f Mon Sep 17 00:00:00 2001
 From: Andrew Cooper <andrew.cooper3@citrix.com>
 Date: Mon, 27 Jun 2022 19:29:40 +0100
-Subject: [PATCH 21/21] x86/spec-ctrl: Mitigate Branch Type Confusion when
+Subject: [PATCH 21/67] x86/spec-ctrl: Mitigate Branch Type Confusion when
  possible
 
 Branch Type Confusion affects AMD/Hygon CPUs on Zen2 and earlier.  To
@@ -301,5 +301,5 @@ index 10cd0cd2518f..33e845991b0a 100644
  extern int8_t opt_eager_fpu;
  extern int8_t opt_l1d_flush;
 -- 
-2.35.1
+2.37.3
 
diff --git a/0022-x86-mm-correct-TLB-flush-condition-in-_get_page_type.patch b/0022-x86-mm-correct-TLB-flush-condition-in-_get_page_type.patch
new file mode 100644
index 0000000..81f5b9a
--- /dev/null
+++ b/0022-x86-mm-correct-TLB-flush-condition-in-_get_page_type.patch
@@ -0,0 +1,45 @@
+From 3859f3ee7e37323ae5e0014c07ba8d3a4d7890b2 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 26 Jul 2022 15:03:14 +0200
+Subject: [PATCH 22/67] x86/mm: correct TLB flush condition in _get_page_type()
+
+When this logic was moved, it was moved across the point where nx is
+updated to hold the new type for the page. IOW originally it was
+equivalent to using x (and perhaps x would better have been used), but
+now it isn't anymore. Switch to using x, which then brings things in
+line again with the slightly earlier comment there (now) talking about
+transitions _from_ writable.
+
+I have to confess though that I cannot make a direct connection between
+the reported observed behavior of guests leaving several pages around
+with pending general references and the change here. Repeated testing,
+nevertheless, confirms the reported issue is no longer there.
+
+This is CVE-2022-33745 / XSA-408.
+
+Reported-by: Charles Arnold <carnold@suse.com>
+Fixes: 8cc5036bc385 ("x86/pv: Fix ABAC cmpxchg() race in _get_page_type()")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: a9949efb288fd6e21bbaf9d5826207c7c41cda27
+master date: 2022-07-26 14:54:34 +0200
+---
+ xen/arch/x86/mm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 7d0747017db5..c88dc749d431 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2992,7 +2992,7 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+             if ( unlikely(!cpumask_empty(mask)) &&
+                  /* Shadow mode: track only writable pages. */
+                  (!shadow_mode_enabled(d) ||
+-                  ((nx & PGT_type_mask) == PGT_writable_page)) )
++                  ((x & PGT_type_mask) == PGT_writable_page)) )
+             {
+                 perfc_incr(need_flush_tlb_flush);
+                 /*
+-- 
+2.37.3
+
diff --git a/0023-xl-relax-freemem-s-retry-calculation.patch b/0023-xl-relax-freemem-s-retry-calculation.patch
new file mode 100644
index 0000000..d7dda30
--- /dev/null
+++ b/0023-xl-relax-freemem-s-retry-calculation.patch
@@ -0,0 +1,80 @@
+From 2173d9c8be28d5f33c0e299a363ac994867d111b Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 27 Jul 2022 09:28:46 +0200
+Subject: [PATCH 23/67] xl: relax freemem()'s retry calculation
+
+While in principle possible also under other conditions as long as other
+parallel operations potentially consuming memory aren't "locked out", in
+particular with IOMMU large page mappings used in Dom0 (for PV when in
+strict mode; for PVH when not sharing page tables with HAP) ballooning
+out of individual pages can actually lead to less free memory available
+afterwards. This is because to split a large page, one or more page
+table pages are necessary (one per level that is split).
+
+When rebooting a guest I've observed freemem() to fail: A single page
+was required to be ballooned out (presumably because of heap
+fragmentation in the hypervisor). This ballooning out of a single page
+of course went fast, but freemem() then found that it would require to
+balloon out another page. This repeating just another time leads to the
+function to signal failure to the caller - without having come anywhere
+near the designated 30s that the whole process is allowed to not make
+any progress at all.
+
+Convert from a simple retry count to actually calculating elapsed time,
+subtracting from an initial credit of 30s. Don't go as far as limiting
+the "wait_secs" value passed to libxl_wait_for_memory_target(), though.
+While this leads to the overall process now possibly taking longer (if
+the previous iteration ended very close to the intended 30s), this
+compensates to some degree for the value passed really meaning "allowed
+to run for this long without making progress".
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+master commit: e58370df76eacf1f7ca0340e9b96430c77b41a79
+master date: 2022-07-12 15:25:00 +0200
+---
+ tools/xl/xl_vmcontrol.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/tools/xl/xl_vmcontrol.c b/tools/xl/xl_vmcontrol.c
+index 435155a03396..5dee7730ca76 100644
+--- a/tools/xl/xl_vmcontrol.c
++++ b/tools/xl/xl_vmcontrol.c
+@@ -321,7 +321,8 @@ static int domain_wait_event(uint32_t domid, libxl_event **event_r)
+  */
+ static bool freemem(uint32_t domid, libxl_domain_config *d_config)
+ {
+-    int rc, retries = 3;
++    int rc;
++    double credit = 30;
+     uint64_t need_memkb, free_memkb;
+ 
+     if (!autoballoon)
+@@ -332,6 +333,8 @@ static bool freemem(uint32_t domid, libxl_domain_config *d_config)
+         return false;
+ 
+     do {
++        time_t start;
++
+         rc = libxl_get_free_memory(ctx, &free_memkb);
+         if (rc < 0)
+             return false;
+@@ -345,12 +348,13 @@ static bool freemem(uint32_t domid, libxl_domain_config *d_config)
+ 
+         /* wait until dom0 reaches its target, as long as we are making
+          * progress */
++        start = time(NULL);
+         rc = libxl_wait_for_memory_target(ctx, 0, 10);
+         if (rc < 0)
+             return false;
+ 
+-        retries--;
+-    } while (retries > 0);
++        credit -= difftime(time(NULL), start);
++    } while (credit > 0);
+ 
+     return false;
+ }
+-- 
+2.37.3
+
diff --git a/0024-tools-init-xenstore-domain-fix-memory-map-for-PVH-st.patch b/0024-tools-init-xenstore-domain-fix-memory-map-for-PVH-st.patch
new file mode 100644
index 0000000..fbb1448
--- /dev/null
+++ b/0024-tools-init-xenstore-domain-fix-memory-map-for-PVH-st.patch
@@ -0,0 +1,59 @@
+From a2684d9cbbfb02b268be7e551674f709db0617a4 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Wed, 27 Jul 2022 09:29:08 +0200
+Subject: [PATCH 24/67] tools/init-xenstore-domain: fix memory map for PVH
+ stubdom
+
+In case of maxmem != memsize the E820 map of the PVH stubdom is wrong,
+as it is missing the RAM above memsize.
+
+Additionally the memory map should only specify the Xen special pages
+as reserved.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+master commit: 134d53f577076d4f26091e25762f27cc3c73bf58
+master date: 2022-07-12 15:25:20 +0200
+---
+ tools/helpers/init-xenstore-domain.c | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/tools/helpers/init-xenstore-domain.c b/tools/helpers/init-xenstore-domain.c
+index 6836002f0bad..32689abd7479 100644
+--- a/tools/helpers/init-xenstore-domain.c
++++ b/tools/helpers/init-xenstore-domain.c
+@@ -72,8 +72,9 @@ static int build(xc_interface *xch)
+     char cmdline[512];
+     int rv, xs_fd;
+     struct xc_dom_image *dom = NULL;
+-    int limit_kb = (maxmem ? : (memory + 1)) * 1024;
++    int limit_kb = (maxmem ? : memory) * 1024 + X86_HVM_NR_SPECIAL_PAGES * 4;
+     uint64_t mem_size = MB(memory);
++    uint64_t max_size = MB(maxmem ? : memory);
+     struct e820entry e820[3];
+     struct xen_domctl_createdomain config = {
+         .ssidref = SECINITSID_DOMU,
+@@ -157,13 +158,16 @@ static int build(xc_interface *xch)
+         dom->mmio_start = LAPIC_BASE_ADDRESS;
+         dom->max_vcpus = 1;
+         e820[0].addr = 0;
+-        e820[0].size = dom->lowmem_end;
++        e820[0].size = (max_size > LAPIC_BASE_ADDRESS) ?
++                       LAPIC_BASE_ADDRESS : max_size;
+         e820[0].type = E820_RAM;
+-        e820[1].addr = LAPIC_BASE_ADDRESS;
+-        e820[1].size = dom->mmio_size;
++        e820[1].addr = (X86_HVM_END_SPECIAL_REGION -
++                        X86_HVM_NR_SPECIAL_PAGES) << XC_PAGE_SHIFT;
++        e820[1].size = X86_HVM_NR_SPECIAL_PAGES << XC_PAGE_SHIFT;
+         e820[1].type = E820_RESERVED;
+         e820[2].addr = GB(4);
+-        e820[2].size = dom->highmem_end - GB(4);
++        e820[2].size = (max_size > LAPIC_BASE_ADDRESS) ?
++                       max_size - LAPIC_BASE_ADDRESS : 0;
+         e820[2].type = E820_RAM;
+     }
+ 
+-- 
+2.37.3
+
diff --git a/0025-xl-move-freemem-s-credit-expired-loop-exit.patch b/0025-xl-move-freemem-s-credit-expired-loop-exit.patch
new file mode 100644
index 0000000..c3a1965
--- /dev/null
+++ b/0025-xl-move-freemem-s-credit-expired-loop-exit.patch
@@ -0,0 +1,55 @@
+From c37099426ea678c1d5b6c99ae5ad6834f4edd2e6 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 27 Jul 2022 09:29:31 +0200
+Subject: [PATCH 25/67] xl: move freemem()'s "credit expired" loop exit
+
+Move the "credit expired" loop exit to the middle of the loop,
+immediately after "return true". This way having reached the goal on the
+last iteration would be reported as success to the caller, rather than
+as "timed out".
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+master commit: d8f8cb8bdd02fad3b6986ae93511f750fa7f7e6a
+master date: 2022-07-18 17:48:18 +0200
+---
+ tools/xl/xl_vmcontrol.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/tools/xl/xl_vmcontrol.c b/tools/xl/xl_vmcontrol.c
+index 5dee7730ca76..d1c6f8aae67a 100644
+--- a/tools/xl/xl_vmcontrol.c
++++ b/tools/xl/xl_vmcontrol.c
+@@ -332,7 +332,7 @@ static bool freemem(uint32_t domid, libxl_domain_config *d_config)
+     if (rc < 0)
+         return false;
+ 
+-    do {
++    for (;;) {
+         time_t start;
+ 
+         rc = libxl_get_free_memory(ctx, &free_memkb);
+@@ -342,6 +342,9 @@ static bool freemem(uint32_t domid, libxl_domain_config *d_config)
+         if (free_memkb >= need_memkb)
+             return true;
+ 
++        if (credit <= 0)
++            return false;
++
+         rc = libxl_set_memory_target(ctx, 0, free_memkb - need_memkb, 1, 0);
+         if (rc < 0)
+             return false;
+@@ -354,9 +357,7 @@ static bool freemem(uint32_t domid, libxl_domain_config *d_config)
+             return false;
+ 
+         credit -= difftime(time(NULL), start);
+-    } while (credit > 0);
+-
+-    return false;
++    }
+ }
+ 
+ static void reload_domain_config(uint32_t domid,
+-- 
+2.37.3
+
diff --git a/0026-x86-spec-ctrl-correct-per-guest-type-reporting-of-MD.patch b/0026-x86-spec-ctrl-correct-per-guest-type-reporting-of-MD.patch
new file mode 100644
index 0000000..fbf3f41
--- /dev/null
+++ b/0026-x86-spec-ctrl-correct-per-guest-type-reporting-of-MD.patch
@@ -0,0 +1,56 @@
+From 5f1d0179e15d726622a49044a825894d5010df15 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 27 Jul 2022 09:29:54 +0200
+Subject: [PATCH 26/67] x86/spec-ctrl: correct per-guest-type reporting of
+ MD_CLEAR
+
+There are command line controls for this and the default also isn't "always
+enable when hardware supports it", which logging should take into account.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: fdbf8bdfebc2ed323c521848f642cc4f6b8cb662
+master date: 2022-07-19 08:36:53 +0200
+---
+ xen/arch/x86/spec_ctrl.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 563519ce0e31..f7b0251c42bc 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -511,13 +511,12 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+     printk("  Support for HVM VMs:%s%s%s%s%s%s\n",
+            (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ||
+             boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ||
+-            boot_cpu_has(X86_FEATURE_MD_CLEAR)   ||
+             boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ||
+-            opt_eager_fpu)                           ? ""               : " None",
++            opt_eager_fpu || opt_md_clear_hvm)       ? ""               : " None",
+            boot_cpu_has(X86_FEATURE_SC_MSR_HVM)      ? " MSR_SPEC_CTRL" : "",
+            boot_cpu_has(X86_FEATURE_SC_RSB_HVM)      ? " RSB"           : "",
+            opt_eager_fpu                             ? " EAGER_FPU"     : "",
+-           boot_cpu_has(X86_FEATURE_MD_CLEAR)        ? " MD_CLEAR"      : "",
++           opt_md_clear_hvm                          ? " MD_CLEAR"      : "",
+            boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM)  ? " IBPB-entry"    : "");
+ 
+ #endif
+@@ -525,13 +524,12 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+     printk("  Support for PV VMs:%s%s%s%s%s%s\n",
+            (boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
+             boot_cpu_has(X86_FEATURE_SC_RSB_PV) ||
+-            boot_cpu_has(X86_FEATURE_MD_CLEAR)  ||
+             boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ||
+-            opt_eager_fpu)                           ? ""               : " None",
++            opt_eager_fpu || opt_md_clear_pv)        ? ""               : " None",
+            boot_cpu_has(X86_FEATURE_SC_MSR_PV)       ? " MSR_SPEC_CTRL" : "",
+            boot_cpu_has(X86_FEATURE_SC_RSB_PV)       ? " RSB"           : "",
+            opt_eager_fpu                             ? " EAGER_FPU"     : "",
+-           boot_cpu_has(X86_FEATURE_MD_CLEAR)        ? " MD_CLEAR"      : "",
++           opt_md_clear_pv                           ? " MD_CLEAR"      : "",
+            boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV)   ? " IBPB-entry"    : "");
+ 
+     printk("  XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n",
+-- 
+2.37.3
+
diff --git a/0027-x86-deal-with-gcc12-release-build-issues.patch b/0027-x86-deal-with-gcc12-release-build-issues.patch
new file mode 100644
index 0000000..d26f6d3
--- /dev/null
+++ b/0027-x86-deal-with-gcc12-release-build-issues.patch
@@ -0,0 +1,65 @@
+From a095c6cde8a717325cc31bb393c547cad5e16e35 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 27 Jul 2022 09:30:24 +0200
+Subject: [PATCH 27/67] x86: deal with gcc12 release build issues
+
+While a number of issues we previously had with pre-release gcc12 were
+fixed in the final release, we continue to have one issue (with multiple
+instances) when doing release builds (i.e. at higher optimization
+levels): The compiler takes issue with subtracting (always 1 in our
+case) from artifical labels (expressed as array) marking the end of
+certain regions. This isn't an unreasonable position to take. Simply
+hide the "array-ness" by casting to an integer type. To keep things
+looking consistently, apply the same cast also on the respective
+expressions dealing with the starting addresses. (Note how
+efi_arch_memory_setup()'s l2_table_offset() invocations avoid a similar
+issue by already having the necessary casts.) In is_xen_fixed_mfn()
+further switch from __pa() to virt_to_maddr() to better match the left
+sides of the <= operators.
+
+Reported-by: Charles Arnold <carnold@suse.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 9723507daf2120131410c91980d4e4d9b0d0aa90
+master date: 2022-07-19 08:37:29 +0200
+---
+ xen/arch/x86/efi/efi-boot.h | 6 +++---
+ xen/include/asm-x86/mm.h    | 4 ++--
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
+index 2541ba1f320a..84fd77931456 100644
+--- a/xen/arch/x86/efi/efi-boot.h
++++ b/xen/arch/x86/efi/efi-boot.h
+@@ -624,10 +624,10 @@ static void __init efi_arch_memory_setup(void)
+      * appropriate l2 slots to map.
+      */
+ #define l2_4G_offset(a)                                                 \
+-    (((UINTN)(a) >> L2_PAGETABLE_SHIFT) & (4 * L2_PAGETABLE_ENTRIES - 1))
++    (((a) >> L2_PAGETABLE_SHIFT) & (4 * L2_PAGETABLE_ENTRIES - 1))
+ 
+-    for ( i  = l2_4G_offset(_start);
+-          i <= l2_4G_offset(_end - 1); ++i )
++    for ( i  = l2_4G_offset((UINTN)_start);
++          i <= l2_4G_offset((UINTN)_end - 1); ++i )
+     {
+         l2_pgentry_t pte = l2e_from_paddr(i << L2_PAGETABLE_SHIFT,
+                                           __PAGE_HYPERVISOR | _PAGE_PSE);
+diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
+index 5c19b71eca70..71dd28f126c3 100644
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -309,8 +309,8 @@ struct page_info
+ #define is_xen_heap_mfn(mfn) \
+     (mfn_valid(mfn) && is_xen_heap_page(mfn_to_page(mfn)))
+ #define is_xen_fixed_mfn(mfn)                     \
+-    (((mfn_to_maddr(mfn)) >= __pa(_stext)) &&     \
+-     ((mfn_to_maddr(mfn)) <= __pa(__2M_rwdata_end - 1)))
++    (((mfn_to_maddr(mfn)) >= virt_to_maddr((unsigned long)_stext)) && \
++     ((mfn_to_maddr(mfn)) <= virt_to_maddr((unsigned long)__2M_rwdata_end - 1)))
+ 
+ #define PRtype_info "016lx"/* should only be used for printk's */
+ 
+-- 
+2.37.3
+
diff --git a/0028-x86emul-add-memory-operand-low-bits-checks-for-ENQCM.patch b/0028-x86emul-add-memory-operand-low-bits-checks-for-ENQCM.patch
new file mode 100644
index 0000000..26b959e
--- /dev/null
+++ b/0028-x86emul-add-memory-operand-low-bits-checks-for-ENQCM.patch
@@ -0,0 +1,45 @@
+From 4799a202a9017360708c18aa8cd699bd8d6be08b Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 27 Jul 2022 09:31:01 +0200
+Subject: [PATCH 28/67] x86emul: add memory operand low bits checks for
+ ENQCMD{,S}
+
+Already ISE rev 044 added text to this effect; rev 045 further dropped
+leftover earlier text indicating the contrary:
+- ENQCMD requires the low 32 bits of the memory operand to be clear,
+- ENDCMDS requires bits 20...30 of the memory operand to be clear.
+
+Fixes: d27385968741 ("x86emul: support ENQCMD insns")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: d620c66bdbe5510c3bae89be8cc7ca9a2a6cbaba
+master date: 2022-07-20 15:46:48 +0200
+---
+ xen/arch/x86/x86_emulate/x86_emulate.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
+index 5e297f797187..247c14dc4e68 100644
+--- a/xen/arch/x86/x86_emulate/x86_emulate.c
++++ b/xen/arch/x86/x86_emulate/x86_emulate.c
+@@ -10464,6 +10464,7 @@ x86_emulate(
+             goto done;
+         if ( vex.pfx == vex_f2 ) /* enqcmd */
+         {
++            generate_exception_if(mmvalp->data32[0], EXC_GP, 0);
+             fail_if(!ops->read_msr);
+             if ( (rc = ops->read_msr(MSR_PASID, &msr_val,
+                                      ctxt)) != X86EMUL_OKAY )
+@@ -10471,7 +10472,8 @@ x86_emulate(
+             generate_exception_if(!(msr_val & PASID_VALID), EXC_GP, 0);
+             mmvalp->data32[0] = MASK_EXTR(msr_val, PASID_PASID_MASK);
+         }
+-        mmvalp->data32[0] &= ~0x7ff00000;
++        else
++            generate_exception_if(mmvalp->data32[0] & 0x7ff00000, EXC_GP, 0);
+         state->blk = blk_enqcmd;
+         if ( (rc = ops->blk(x86_seg_es, src.val, mmvalp, 64, &_regs.eflags,
+                             state, ctxt)) != X86EMUL_OKAY )
+-- 
+2.37.3
+
diff --git a/0029-x86-also-suppress-use-of-MMX-insns.patch b/0029-x86-also-suppress-use-of-MMX-insns.patch
new file mode 100644
index 0000000..1298a47
--- /dev/null
+++ b/0029-x86-also-suppress-use-of-MMX-insns.patch
@@ -0,0 +1,39 @@
+From 30d3de4c61c297e12662df1fdb89af335947e59d Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 27 Jul 2022 09:31:31 +0200
+Subject: [PATCH 29/67] x86: also suppress use of MMX insns
+
+Passing -mno-sse alone is not enough: The compiler may still find
+(questionable) reasons to use MMX insns. In particular with gcc12 use
+of MOVD+PUNPCKLDQ+MOVQ was observed in an apparent attempt to auto-
+vectorize the storing of two adjacent zeroes, 32 bits each.
+
+Reported-by: ChrisD <chris@dalessio.org>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 6fe2e39a0243bddba60f83b77b972a5922d25eb8
+master date: 2022-07-20 15:48:49 +0200
+---
+ xen/arch/x86/arch.mk | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/arch.mk b/xen/arch/x86/arch.mk
+index 456e5d5c1ad7..c4337a1a118c 100644
+--- a/xen/arch/x86/arch.mk
++++ b/xen/arch/x86/arch.mk
+@@ -37,9 +37,9 @@ $(call as-option-add,CFLAGS,CC,\
+ 
+ CFLAGS += -mno-red-zone -fpic
+ 
+-# Xen doesn't use SSE interally.  If the compiler supports it, also skip the
+-# SSE setup for variadic function calls.
+-CFLAGS += -mno-sse $(call cc-option,$(CC),-mskip-rax-setup)
++# Xen doesn't use MMX or SSE interally.  If the compiler supports it, also skip
++# the SSE setup for variadic function calls.
++CFLAGS += -mno-mmx -mno-sse $(call cc-option,$(CC),-mskip-rax-setup)
+ 
+ # Compile with thunk-extern, indirect-branch-register if avaiable.
+ CFLAGS-$(CONFIG_INDIRECT_THUNK) += -mindirect-branch=thunk-extern
+-- 
+2.37.3
+
diff --git a/0030-common-memory-Fix-ifdefs-for-ptdom_max_order.patch b/0030-common-memory-Fix-ifdefs-for-ptdom_max_order.patch
new file mode 100644
index 0000000..a9bf845
--- /dev/null
+++ b/0030-common-memory-Fix-ifdefs-for-ptdom_max_order.patch
@@ -0,0 +1,52 @@
+From b64f1c9e3e3a2a416c7bb5aab77ba5d2cba98638 Mon Sep 17 00:00:00 2001
+From: Luca Fancellu <luca.fancellu@arm.com>
+Date: Wed, 27 Jul 2022 09:31:49 +0200
+Subject: [PATCH 30/67] common/memory: Fix ifdefs for ptdom_max_order
+
+In common/memory.c the ifdef code surrounding ptdom_max_order is
+using HAS_PASSTHROUGH instead of CONFIG_HAS_PASSTHROUGH, fix the
+problem using the correct macro.
+
+Fixes: e0d44c1f9461 ("build: convert HAS_PASSTHROUGH use to Kconfig")
+Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 5707470bf3103ebae43697a7ac2faced6cd35f92
+master date: 2022-07-26 08:33:46 +0200
+---
+ xen/common/memory.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/xen/common/memory.c b/xen/common/memory.c
+index 297b98a562b2..95b2b934e4a2 100644
+--- a/xen/common/memory.c
++++ b/xen/common/memory.c
+@@ -58,7 +58,7 @@ struct memop_args {
+ static unsigned int __read_mostly domu_max_order = CONFIG_DOMU_MAX_ORDER;
+ static unsigned int __read_mostly ctldom_max_order = CONFIG_CTLDOM_MAX_ORDER;
+ static unsigned int __read_mostly hwdom_max_order = CONFIG_HWDOM_MAX_ORDER;
+-#ifdef HAS_PASSTHROUGH
++#ifdef CONFIG_HAS_PASSTHROUGH
+ static unsigned int __read_mostly ptdom_max_order = CONFIG_PTDOM_MAX_ORDER;
+ #endif
+ 
+@@ -70,7 +70,7 @@ static int __init parse_max_order(const char *s)
+         ctldom_max_order = simple_strtoul(s, &s, 0);
+     if ( *s == ',' && *++s != ',' )
+         hwdom_max_order = simple_strtoul(s, &s, 0);
+-#ifdef HAS_PASSTHROUGH
++#ifdef CONFIG_HAS_PASSTHROUGH
+     if ( *s == ',' && *++s != ',' )
+         ptdom_max_order = simple_strtoul(s, &s, 0);
+ #endif
+@@ -83,7 +83,7 @@ static unsigned int max_order(const struct domain *d)
+ {
+     unsigned int order = domu_max_order;
+ 
+-#ifdef HAS_PASSTHROUGH
++#ifdef CONFIG_HAS_PASSTHROUGH
+     if ( cache_flush_permitted(d) && order < ptdom_max_order )
+         order = ptdom_max_order;
+ #endif
+-- 
+2.37.3
+
diff --git a/0031-tools-libxl-env-variable-to-signal-whether-disk-nic-.patch b/0031-tools-libxl-env-variable-to-signal-whether-disk-nic-.patch
new file mode 100644
index 0000000..a52055a
--- /dev/null
+++ b/0031-tools-libxl-env-variable-to-signal-whether-disk-nic-.patch
@@ -0,0 +1,107 @@
+From 1b9845dcf959421db3a071a6bc0aa9d8edbffb50 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Wed, 3 Aug 2022 12:41:18 +0200
+Subject: [PATCH 31/67] tools/libxl: env variable to signal whether disk/nic
+ backend is trusted
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Introduce support in libxl for fetching the default backend trusted
+option for disk and nic devices.
+
+Users can set LIBXL_{DISK,NIC}_BACKEND_UNTRUSTED environment variable
+to notify libxl of whether the backends for disk and nic devices
+should be trusted.  Such information is passed into the frontend so it
+can take the appropriate measures.
+
+This is part of XSA-403.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
+---
+ docs/man/xl.1.pod.in          | 18 ++++++++++++++++++
+ tools/libs/light/libxl_disk.c |  5 +++++
+ tools/libs/light/libxl_nic.c  |  7 +++++++
+ 3 files changed, 30 insertions(+)
+
+diff --git a/docs/man/xl.1.pod.in b/docs/man/xl.1.pod.in
+index e2176bd696cb..45e1430aeb74 100644
+--- a/docs/man/xl.1.pod.in
++++ b/docs/man/xl.1.pod.in
+@@ -1946,6 +1946,24 @@ shows the decimal value. For non-linear mode, it shows hexadecimal value.
+ 
+ =back
+ 
++=head1 ENVIRONMENT
++
++=over 4
++
++=item B<LIBXL_DISK_BACKEND_UNTRUSTED>
++
++Set this environment variable to "1" to suggest to the guest that the disk
++backend shouldn't be trusted. If the variable is absent or set to "0", the
++backend will be trusted.
++
++=item B<LIBXL_NIC_BACKEND_UNTRUSTED>
++
++Set this environment variable to "1" to suggest to the guest that the network
++backend shouldn't be trusted. If the variable is absent or set to "0", the
++backend will be trusted.
++
++=back
++
+ =head1 IGNORED FOR COMPATIBILITY WITH XM
+ 
+ xl is mostly command-line compatible with the old xm utility used with
+diff --git a/tools/libs/light/libxl_disk.c b/tools/libs/light/libxl_disk.c
+index 93936d0dd0f8..67d1cc18578f 100644
+--- a/tools/libs/light/libxl_disk.c
++++ b/tools/libs/light/libxl_disk.c
+@@ -246,6 +246,7 @@ static void device_disk_add(libxl__egc *egc, uint32_t domid,
+     libxl_domain_config d_config;
+     libxl_device_disk disk_saved;
+     libxl__flock *lock = NULL;
++    const char *envvar;
+ 
+     libxl_domain_config_init(&d_config);
+     libxl_device_disk_init(&disk_saved);
+@@ -395,6 +396,10 @@ static void device_disk_add(libxl__egc *egc, uint32_t domid,
+         flexarray_append(front, GCSPRINTF("%d", device->devid));
+         flexarray_append(front, "device-type");
+         flexarray_append(front, disk->is_cdrom ? "cdrom" : "disk");
++        flexarray_append(front, "trusted");
++        envvar = getenv("LIBXL_DISK_BACKEND_UNTRUSTED");
++        /* Set "trusted=1" if envvar missing or is "0". */
++        flexarray_append(front, !envvar || !strcmp("0", envvar) ? "1" : "0");
+ 
+         /*
+          * Old PV kernel disk frontends before 2.6.26 rely on tool stack to
+diff --git a/tools/libs/light/libxl_nic.c b/tools/libs/light/libxl_nic.c
+index 0b9e70c9d13d..f87890d1d65f 100644
+--- a/tools/libs/light/libxl_nic.c
++++ b/tools/libs/light/libxl_nic.c
+@@ -132,6 +132,8 @@ static int libxl__set_xenstore_nic(libxl__gc *gc, uint32_t domid,
+                                    flexarray_t *back, flexarray_t *front,
+                                    flexarray_t *ro_front)
+ {
++    const char *envvar;
++
+     flexarray_grow(back, 2);
+ 
+     if (nic->script)
+@@ -255,6 +257,11 @@ static int libxl__set_xenstore_nic(libxl__gc *gc, uint32_t domid,
+     flexarray_append(back, "hotplug-status");
+     flexarray_append(back, "");
+ 
++    flexarray_append(front, "trusted");
++    envvar = getenv("LIBXL_NIC_BACKEND_UNTRUSTED");
++    /* Set "trusted=1" if envvar missing or is "0". */
++    flexarray_append(front, !envvar || !strcmp("0", envvar) ? "1" : "0");
++
+     return 0;
+ }
+ 
+-- 
+2.37.3
+
diff --git a/0032-x86-msr-fix-X2APIC_LAST.patch b/0032-x86-msr-fix-X2APIC_LAST.patch
new file mode 100644
index 0000000..ac42842
--- /dev/null
+++ b/0032-x86-msr-fix-X2APIC_LAST.patch
@@ -0,0 +1,66 @@
+From df3395f6b2d759aba39fb67a7bc0fe49147c8b39 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
+Date: Wed, 3 Aug 2022 12:41:49 +0200
+Subject: [PATCH 32/67] x86/msr: fix X2APIC_LAST
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The latest Intel manual now says the X2APIC reserved range is only
+0x800 to 0x8ff (NOT 0xbff).
+This changed between SDM 68 (Nov 2018) and SDM 69 (Jan 2019).
+The AMD manual documents 0x800-0x8ff too.
+
+There are non-X2APIC MSRs in the 0x900-0xbff range now:
+e.g. 0x981 is IA32_TME_CAPABILITY, an architectural MSR.
+
+The new MSR in this range appears to have been introduced in Icelake,
+so this commit should be backported to Xen versions supporting Icelake.
+
+Signed-off-by: Edwin Török <edvin.torok@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 13316827faadbb4f72ae6c625af9938d8f976f86
+master date: 2022-07-27 12:57:10 +0200
+---
+ xen/arch/x86/hvm/vmx/vmx.c      | 4 ++--
+ xen/include/asm-x86/msr-index.h | 2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
+index 868151a2e533..775b36433e24 100644
+--- a/xen/arch/x86/hvm/vmx/vmx.c
++++ b/xen/arch/x86/hvm/vmx/vmx.c
+@@ -3401,7 +3401,7 @@ void vmx_vlapic_msr_changed(struct vcpu *v)
+             if ( cpu_has_vmx_apic_reg_virt )
+             {
+                 for ( msr = MSR_X2APIC_FIRST;
+-                      msr <= MSR_X2APIC_FIRST + 0xff; msr++ )
++                      msr <= MSR_X2APIC_LAST; msr++ )
+                     vmx_clear_msr_intercept(v, msr, VMX_MSR_R);
+ 
+                 vmx_set_msr_intercept(v, MSR_X2APIC_PPR, VMX_MSR_R);
+@@ -3422,7 +3422,7 @@ void vmx_vlapic_msr_changed(struct vcpu *v)
+     if ( !(v->arch.hvm.vmx.secondary_exec_control &
+            SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE) )
+         for ( msr = MSR_X2APIC_FIRST;
+-              msr <= MSR_X2APIC_FIRST + 0xff; msr++ )
++              msr <= MSR_X2APIC_LAST; msr++ )
+             vmx_set_msr_intercept(v, msr, VMX_MSR_RW);
+ 
+     vmx_update_secondary_exec_control(v);
+diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
+index b4a360723b14..f1b2cf5460c1 100644
+--- a/xen/include/asm-x86/msr-index.h
++++ b/xen/include/asm-x86/msr-index.h
+@@ -459,7 +459,7 @@
+ #define MSR_IA32_TSC_ADJUST		0x0000003b
+ 
+ #define MSR_X2APIC_FIRST                0x00000800
+-#define MSR_X2APIC_LAST                 0x00000bff
++#define MSR_X2APIC_LAST                 0x000008ff
+ 
+ #define MSR_X2APIC_TPR                  0x00000808
+ #define MSR_X2APIC_PPR                  0x0000080a
+-- 
+2.37.3
+
diff --git a/0033-x86-spec-ctrl-Use-IST-RSB-protection-for-SVM-systems.patch b/0033-x86-spec-ctrl-Use-IST-RSB-protection-for-SVM-systems.patch
new file mode 100644
index 0000000..46780c4
--- /dev/null
+++ b/0033-x86-spec-ctrl-Use-IST-RSB-protection-for-SVM-systems.patch
@@ -0,0 +1,54 @@
+From 8ae0b4d1331c14fb9e30a42987c0152c9b00f530 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Mon, 15 Aug 2022 15:40:05 +0200
+Subject: [PATCH 33/67] x86/spec-ctrl: Use IST RSB protection for !SVM systems
+
+There is a corner case where a VT-x guest which manages to reliably trigger
+non-fatal #MC's could evade the rogue RSB speculation protections that were
+supposed to be in place.
+
+This is a lack of defence in depth; Xen does not architecturally execute more
+RET than CALL instructions, so an attacker would have to locate a different
+gadget (e.g. SpectreRSB) first to execute a transient path of excess RET
+instructions.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: e570e8d520ab542d8d35666b95cb3a0125b7b110
+master date: 2022-08-05 12:16:24 +0100
+---
+ xen/arch/x86/spec_ctrl.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index f7b0251c42bc..ac73806eacd8 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -1279,8 +1279,24 @@ void __init init_speculation_mitigations(void)
+      * mappings.
+      */
+     if ( opt_rsb_hvm )
++    {
+         setup_force_cpu_cap(X86_FEATURE_SC_RSB_HVM);
+ 
++        /*
++         * For SVM, Xen's RSB safety actions are performed before STGI, so
++         * behave atomically with respect to IST sources.
++         *
++         * For VT-x, NMIs are atomic with VMExit (the NMI gets queued but not
++         * delivered) whereas other IST sources are not atomic.  Specifically,
++         * #MC can hit ahead the RSB safety action in the vmexit path.
++         *
++         * Therefore, it is necessary for the IST logic to protect Xen against
++         * possible rogue RSB speculation.
++         */
++        if ( !cpu_has_svm )
++            default_spec_ctrl_flags |= SCF_ist_rsb;
++    }
++
+     ibpb_calculations();
+ 
+     /* Check whether Eager FPU should be enabled by default. */
+-- 
+2.37.3
+
diff --git a/0034-x86-Expose-more-MSR_ARCH_CAPS-to-hwdom.patch b/0034-x86-Expose-more-MSR_ARCH_CAPS-to-hwdom.patch
new file mode 100644
index 0000000..6a73c21
--- /dev/null
+++ b/0034-x86-Expose-more-MSR_ARCH_CAPS-to-hwdom.patch
@@ -0,0 +1,68 @@
+From 5efcae1eb30ff24e100954e00889a568c1745ea1 Mon Sep 17 00:00:00 2001
+From: Jason Andryuk <jandryuk@gmail.com>
+Date: Mon, 15 Aug 2022 15:40:47 +0200
+Subject: [PATCH 34/67] x86: Expose more MSR_ARCH_CAPS to hwdom
+
+commit e46474278a0e ("x86/intel: Expose MSR_ARCH_CAPS to dom0") started
+exposing MSR_ARCH_CAPS to dom0.  More bits in MSR_ARCH_CAPS have since
+been defined, but they haven't been exposed.  Update the list to allow
+them through.
+
+As one example, this allows a Linux Dom0 to know that it has the
+appropriate microcode via FB_CLEAR.  Notably, and with the updated
+microcode, this changes dom0's
+/sys/devices/system/cpu/vulnerabilities/mmio_stale_data changes from:
+
+  "Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown"
+
+to:
+
+  "Mitigation: Clear CPU buffers; SMT Host state unknown"
+
+This exposes the MMIO Stale Data and Intel Branch History Injection
+(BHI) controls as well as the page size change MCE issue bit.
+
+Fixes: commit 2ebe8fe9b7e0 ("x86/spec-ctrl: Enumeration for MMIO Stale Data controls")
+Fixes: commit cea9ae062295 ("x86/spec-ctrl: Enumeration for new Intel BHI controls")
+Fixes: commit 59e89cdabc71 ("x86/vtx: Disable executable EPT superpages to work around CVE-2018-12207")
+Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: e83cd54611fec5b7a539fa1281a14319143490e6
+master date: 2022-08-09 16:35:25 +0100
+---
+ xen/arch/x86/msr.c              | 5 ++++-
+ xen/include/asm-x86/msr-index.h | 2 ++
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
+index 0739d00e74f1..aa9face9aad3 100644
+--- a/xen/arch/x86/msr.c
++++ b/xen/arch/x86/msr.c
+@@ -145,7 +145,10 @@ int init_domain_msr_policy(struct domain *d)
+ 
+         mp->arch_caps.raw = val &
+             (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA |
+-             ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_TAA_NO);
++             ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_IF_PSCHANGE_MC_NO |
++             ARCH_CAPS_TAA_NO | ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO |
++             ARCH_CAPS_PSDP_NO | ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA |
++             ARCH_CAPS_BHI_NO);
+     }
+ 
+     d->arch.msr = mp;
+diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
+index f1b2cf5460c1..49ca1f1845e6 100644
+--- a/xen/include/asm-x86/msr-index.h
++++ b/xen/include/asm-x86/msr-index.h
+@@ -64,6 +64,8 @@
+ #define  ARCH_CAPS_PSDP_NO                  (_AC(1, ULL) << 15)
+ #define  ARCH_CAPS_FB_CLEAR                 (_AC(1, ULL) << 17)
+ #define  ARCH_CAPS_FB_CLEAR_CTRL            (_AC(1, ULL) << 18)
++#define  ARCH_CAPS_RRSBA                    (_AC(1, ULL) << 19)
++#define  ARCH_CAPS_BHI_NO                   (_AC(1, ULL) << 20)
+ 
+ #define MSR_FLUSH_CMD                       0x0000010b
+ #define  FLUSH_CMD_L1D                      (_AC(1, ULL) <<  0)
+-- 
+2.37.3
+
diff --git a/0035-xen-sched-setup-dom0-vCPUs-affinity-only-once.patch b/0035-xen-sched-setup-dom0-vCPUs-affinity-only-once.patch
new file mode 100644
index 0000000..0dfb3b4
--- /dev/null
+++ b/0035-xen-sched-setup-dom0-vCPUs-affinity-only-once.patch
@@ -0,0 +1,123 @@
+From 1e31848cdd8d2ff3cb76f364f04f9771f9b3a8b1 Mon Sep 17 00:00:00 2001
+From: Dario Faggioli <dfaggioli@suse.com>
+Date: Mon, 15 Aug 2022 15:41:25 +0200
+Subject: [PATCH 35/67] xen/sched: setup dom0 vCPUs affinity only once
+
+Right now, affinity for dom0 vCPUs is setup in two steps. This is a
+problem as, at least in Credit2, unit_insert() sees and uses the
+"intermediate" affinity, and place the vCPUs on CPUs where they cannot
+be run. And this in turn results in boot hangs, if the "dom0_nodes"
+parameter is used.
+
+Fix this by setting up the affinity properly once and for all, in
+sched_init_vcpu() called by create_vcpu().
+
+Note that, unless a soft-affinity is explicitly specified for dom0 (by
+using the relaxed mode of "dom0_nodes") we set it to the default, which
+is all CPUs, instead of computing it basing on hard affinity (if any).
+This is because hard and soft affinity should be considered as
+independent user controlled properties. In fact, if we dor derive dom0's
+soft-affinity from its boot-time hard-affinity, such computed value will
+continue to be used even if later the user changes the hard-affinity.
+And this could result in the vCPUs behaving differently than what the
+user wanted and expects.
+
+Fixes: dafd936dddbd ("Make credit2 the default scheduler")
+Reported-by: Olaf Hering <ohering@suse.de>
+Signed-off-by: Dario Faggioli <dfaggioli@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: c79e4d209be3ed2a6b8e97c35944786ed2a66b94
+master date: 2022-08-11 11:46:22 +0200
+---
+ xen/common/sched/core.c | 63 +++++++++++++++++++++++++----------------
+ 1 file changed, 39 insertions(+), 24 deletions(-)
+
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index 8f4b1ca10d1c..f07bd2681fcb 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -571,12 +571,46 @@ int sched_init_vcpu(struct vcpu *v)
+         return 1;
+     }
+ 
+-    /*
+-     * Initialize affinity settings. The idler, and potentially
+-     * domain-0 VCPUs, are pinned onto their respective physical CPUs.
+-     */
+-    if ( is_idle_domain(d) || (is_hardware_domain(d) && opt_dom0_vcpus_pin) )
++    if ( is_idle_domain(d) )
++    {
++        /* Idle vCPUs are always pinned onto their respective pCPUs */
+         sched_set_affinity(unit, cpumask_of(processor), &cpumask_all);
++    }
++    else if ( pv_shim && v->vcpu_id == 0 )
++    {
++        /*
++         * PV-shim: vcpus are pinned 1:1. Initially only 1 cpu is online,
++         * others will be dealt with when onlining them. This avoids pinning
++         * a vcpu to a not yet online cpu here.
++         */
++        sched_set_affinity(unit, cpumask_of(0), cpumask_of(0));
++    }
++    else if ( is_hardware_domain(d) && opt_dom0_vcpus_pin )
++    {
++        /*
++         * If dom0_vcpus_pin is specified, dom0 vCPUs are pinned 1:1 to
++         * their respective pCPUs too.
++         */
++        sched_set_affinity(unit, cpumask_of(processor), &cpumask_all);
++    }
++#ifdef CONFIG_X86
++    else if ( d->domain_id == 0 )
++    {
++        /*
++         * In absence of dom0_vcpus_pin instead, the hard and soft affinity of
++         * dom0 is controlled by the (x86 only) dom0_nodes parameter. At this
++         * point it has been parsed and decoded into the dom0_cpus mask.
++         *
++         * Note that we always honor what user explicitly requested, for both
++         * hard and soft affinity, without doing any dynamic computation of
++         * either of them.
++         */
++        if ( !dom0_affinity_relaxed )
++            sched_set_affinity(unit, &dom0_cpus, &cpumask_all);
++        else
++            sched_set_affinity(unit, &cpumask_all, &dom0_cpus);
++    }
++#endif
+     else
+         sched_set_affinity(unit, &cpumask_all, &cpumask_all);
+ 
+@@ -3386,29 +3420,10 @@ void wait(void)
+ void __init sched_setup_dom0_vcpus(struct domain *d)
+ {
+     unsigned int i;
+-    struct sched_unit *unit;
+ 
+     for ( i = 1; i < d->max_vcpus; i++ )
+         vcpu_create(d, i);
+ 
+-    /*
+-     * PV-shim: vcpus are pinned 1:1.
+-     * Initially only 1 cpu is online, others will be dealt with when
+-     * onlining them. This avoids pinning a vcpu to a not yet online cpu here.
+-     */
+-    if ( pv_shim )
+-        sched_set_affinity(d->vcpu[0]->sched_unit,
+-                           cpumask_of(0), cpumask_of(0));
+-    else
+-    {
+-        for_each_sched_unit ( d, unit )
+-        {
+-            if ( !opt_dom0_vcpus_pin && !dom0_affinity_relaxed )
+-                sched_set_affinity(unit, &dom0_cpus, NULL);
+-            sched_set_affinity(unit, NULL, &dom0_cpus);
+-        }
+-    }
+-
+     domain_update_node_affinity(d);
+ }
+ #endif
+-- 
+2.37.3
+
diff --git a/0036-tools-libxl-Replace-deprecated-sdl-option-on-QEMU-co.patch b/0036-tools-libxl-Replace-deprecated-sdl-option-on-QEMU-co.patch
new file mode 100644
index 0000000..1637236
--- /dev/null
+++ b/0036-tools-libxl-Replace-deprecated-sdl-option-on-QEMU-co.patch
@@ -0,0 +1,38 @@
+From c373ad3d084614a93c55e25dc20e70ffc7574971 Mon Sep 17 00:00:00 2001
+From: Anthony PERARD <anthony.perard@citrix.com>
+Date: Mon, 15 Aug 2022 15:42:09 +0200
+Subject: [PATCH 36/67] tools/libxl: Replace deprecated -sdl option on QEMU
+ command line
+
+"-sdl" is deprecated upstream since 6695e4c0fd9e ("softmmu/vl:
+Deprecate the -sdl and -curses option"), QEMU v6.2, and the option is
+removed by 707d93d4abc6 ("ui: Remove deprecated options "-sdl" and
+"-curses""), in upcoming QEMU v7.1.
+
+Instead, use "-display sdl", available since 1472a95bab1e ("Introduce
+-display argument"), before QEMU v1.0.
+
+Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
+Reviewed-by: Jason Andryuk <jandryuk@gmail.com>
+master commit: 41fcb3af8ad6d4c9f65a9d72798e6d18afec55ac
+master date: 2022-08-11 11:47:11 +0200
+---
+ tools/libs/light/libxl_dm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c
+index 24f6e73b0a77..ae5f35e0c3fd 100644
+--- a/tools/libs/light/libxl_dm.c
++++ b/tools/libs/light/libxl_dm.c
+@@ -1349,7 +1349,7 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
+     flexarray_append_pair(dm_args, "-display", "none");
+ 
+     if (sdl && !is_stubdom) {
+-        flexarray_append(dm_args, "-sdl");
++        flexarray_append_pair(dm_args, "-display", "sdl");
+         if (sdl->display)
+             flexarray_append_pair(dm_envs, "DISPLAY", sdl->display);
+         if (sdl->xauthority)
+-- 
+2.37.3
+
diff --git a/0037-x86-spec-ctrl-Enumeration-for-PBRSB_NO.patch b/0037-x86-spec-ctrl-Enumeration-for-PBRSB_NO.patch
new file mode 100644
index 0000000..d27766b
--- /dev/null
+++ b/0037-x86-spec-ctrl-Enumeration-for-PBRSB_NO.patch
@@ -0,0 +1,67 @@
+From fba0c22e79922085c46527eb1391123aadfb24d1 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Mon, 15 Aug 2022 15:42:31 +0200
+Subject: [PATCH 37/67] x86/spec-ctrl: Enumeration for PBRSB_NO
+
+The PBRSB_NO bit indicates that the CPU is not vulnerable to the Post-Barrier
+RSB speculative vulnerability.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: b874e47eb13feb75be3ee7b5dc4ae9c97d80d774
+master date: 2022-08-11 16:19:50 +0100
+---
+ xen/arch/x86/msr.c              | 2 +-
+ xen/arch/x86/spec_ctrl.c        | 3 ++-
+ xen/include/asm-x86/msr-index.h | 1 +
+ 3 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
+index aa9face9aad3..9bced8d36caa 100644
+--- a/xen/arch/x86/msr.c
++++ b/xen/arch/x86/msr.c
+@@ -148,7 +148,7 @@ int init_domain_msr_policy(struct domain *d)
+              ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_IF_PSCHANGE_MC_NO |
+              ARCH_CAPS_TAA_NO | ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO |
+              ARCH_CAPS_PSDP_NO | ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA |
+-             ARCH_CAPS_BHI_NO);
++             ARCH_CAPS_BHI_NO | ARCH_CAPS_PBRSB_NO);
+     }
+ 
+     d->arch.msr = mp;
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index ac73806eacd8..3ff602bd0281 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -419,7 +419,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+      * Hardware read-only information, stating immunity to certain issues, or
+      * suggestions of which mitigation to use.
+      */
+-    printk("  Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
++    printk("  Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+            (caps & ARCH_CAPS_RDCL_NO)                        ? " RDCL_NO"        : "",
+            (caps & ARCH_CAPS_IBRS_ALL)                       ? " IBRS_ALL"       : "",
+            (caps & ARCH_CAPS_RSBA)                           ? " RSBA"           : "",
+@@ -431,6 +431,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+            (caps & ARCH_CAPS_SBDR_SSDP_NO)                   ? " SBDR_SSDP_NO"   : "",
+            (caps & ARCH_CAPS_FBSDP_NO)                       ? " FBSDP_NO"       : "",
+            (caps & ARCH_CAPS_PSDP_NO)                        ? " PSDP_NO"        : "",
++           (caps & ARCH_CAPS_PBRSB_NO)                       ? " PBRSB_NO"       : "",
+            (e8b  & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS))    ? " IBRS_ALWAYS"    : "",
+            (e8b  & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS))   ? " STIBP_ALWAYS"   : "",
+            (e8b  & cpufeat_mask(X86_FEATURE_IBRS_FAST))      ? " IBRS_FAST"      : "",
+diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
+index 49ca1f1845e6..5a830f76a8d4 100644
+--- a/xen/include/asm-x86/msr-index.h
++++ b/xen/include/asm-x86/msr-index.h
+@@ -66,6 +66,7 @@
+ #define  ARCH_CAPS_FB_CLEAR_CTRL            (_AC(1, ULL) << 18)
+ #define  ARCH_CAPS_RRSBA                    (_AC(1, ULL) << 19)
+ #define  ARCH_CAPS_BHI_NO                   (_AC(1, ULL) << 20)
++#define  ARCH_CAPS_PBRSB_NO                 (_AC(1, ULL) << 24)
+ 
+ #define MSR_FLUSH_CMD                       0x0000010b
+ #define  FLUSH_CMD_L1D                      (_AC(1, ULL) <<  0)
+-- 
+2.37.3
+
diff --git a/0038-x86-amd-only-call-setup_force_cpu_cap-for-boot-CPU.patch b/0038-x86-amd-only-call-setup_force_cpu_cap-for-boot-CPU.patch
new file mode 100644
index 0000000..e0e0f87
--- /dev/null
+++ b/0038-x86-amd-only-call-setup_force_cpu_cap-for-boot-CPU.patch
@@ -0,0 +1,33 @@
+From 104a54a307b08945365faf6d285cd5a02f94a80f Mon Sep 17 00:00:00 2001
+From: Ross Lagerwall <ross.lagerwall@citrix.com>
+Date: Mon, 15 Aug 2022 15:43:08 +0200
+Subject: [PATCH 38/67] x86/amd: only call setup_force_cpu_cap for boot CPU
+
+This should only be called for the boot CPU to avoid calling _init code
+after it has been unloaded.
+
+Fixes: 062868a5a8b4 ("x86/amd: Work around CLFLUSH ordering on older parts")
+Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 31b41ce858c8bd5159212d40969f8e0b7124bbf0
+master date: 2022-08-11 17:44:26 +0200
+---
+ xen/arch/x86/cpu/amd.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
+index 60dbe61a61ca..a8d2fb8a1590 100644
+--- a/xen/arch/x86/cpu/amd.c
++++ b/xen/arch/x86/cpu/amd.c
+@@ -820,7 +820,7 @@ static void init_amd(struct cpuinfo_x86 *c)
+ 	 * everything, including reads and writes to address, and
+ 	 * LFENCE/SFENCE instructions.
+ 	 */
+-	if (!cpu_has_clflushopt)
++	if (c == &boot_cpu_data && !cpu_has_clflushopt)
+ 		setup_force_cpu_cap(X86_BUG_CLFLUSH_MFENCE);
+ 
+ 	switch(c->x86)
+-- 
+2.37.3
+
diff --git a/0039-build-x86-suppress-GNU-ld-2.39-warning-about-RWX-loa.patch b/0039-build-x86-suppress-GNU-ld-2.39-warning-about-RWX-loa.patch
new file mode 100644
index 0000000..50d83b6
--- /dev/null
+++ b/0039-build-x86-suppress-GNU-ld-2.39-warning-about-RWX-loa.patch
@@ -0,0 +1,38 @@
+From a075900cf768fe45f270b6f1d09c4e99281da142 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 15 Aug 2022 15:43:56 +0200
+Subject: [PATCH 39/67] build/x86: suppress GNU ld 2.39 warning about RWX load
+ segments
+
+Commit 68f5aac012b9 ("build: suppress future GNU ld warning about RWX
+load segments") didn't quite cover all the cases: Apparently I missed
+ones in the building of 32-bit helper objects because of only looking at
+incremental builds (where those wouldn't normally be re-built). Clone
+the workaround there to the specific Makefile in question.
+
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 3eb1865ae305772b558757904d81951e31de43de
+master date: 2022-08-11 17:45:12 +0200
+---
+ xen/arch/x86/boot/build32.mk | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/xen/arch/x86/boot/build32.mk b/xen/arch/x86/boot/build32.mk
+index e90680cd9f52..d2fae5cf9eee 100644
+--- a/xen/arch/x86/boot/build32.mk
++++ b/xen/arch/x86/boot/build32.mk
+@@ -8,6 +8,9 @@ CFLAGS += -Werror -fno-builtin -g0 -msoft-float
+ CFLAGS += -I$(BASEDIR)/include
+ CFLAGS := $(filter-out -flto,$(CFLAGS)) 
+ 
++LDFLAGS_DIRECT-$(shell $(LD) -v --warn-rwx-segments >/dev/null 2>&1 && echo y) := --no-warn-rwx-segments
++LDFLAGS_DIRECT += $(LDFLAGS_DIRECT-y)
++
+ # NB. awk invocation is a portable alternative to 'head -n -1'
+ %.S: %.bin
+ 	(od -v -t x $< | tr -s ' ' | awk 'NR > 1 {print s} {s=$$0}' | \
+-- 
+2.37.3
+
diff --git a/0040-PCI-simplify-and-thus-correct-pci_get_pdev-_by_domai.patch b/0040-PCI-simplify-and-thus-correct-pci_get_pdev-_by_domai.patch
new file mode 100644
index 0000000..c29e5ac
--- /dev/null
+++ b/0040-PCI-simplify-and-thus-correct-pci_get_pdev-_by_domai.patch
@@ -0,0 +1,153 @@
+From 9acedc3c58c31930737edbe212f2ccf437a0b757 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 15 Aug 2022 15:44:23 +0200
+Subject: [PATCH 40/67] PCI: simplify (and thus correct)
+ pci_get_pdev{,_by_domain}()
+
+The last "wildcard" use of either function went away with f591755823a7
+("IOMMU/PCI: don't let domain cleanup continue when device de-assignment
+failed"). Don't allow them to be called this way anymore. Besides
+simplifying the code this also fixes two bugs:
+
+1) When seg != -1, the outer loops should have been terminated after the
+   first iteration, or else a device with the same BDF but on another
+   segment could be found / returned.
+
+Reported-by: Rahul Singh <rahul.singh@arm.com>
+
+2) When seg == -1 calling get_pseg() is bogus. The function (taking a
+   u16) would look for segment 0xffff, which might exist. If it exists,
+   we might then find / return a wrong device.
+
+In pci_get_pdev_by_domain() also switch from using the per-segment list
+to using the per-domain one, with the exception of the hardware domain
+(see the code comment there).
+
+While there also constify "pseg" and drop "pdev"'s already previously
+unnecessary initializer.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Rahul Singh <rahul.singh@arm.com>
+Tested-by: Rahul Singh <rahul.singh@arm.com>
+master commit: 8cf6e0738906fc269af40135ed82a07815dd3b9c
+master date: 2022-08-12 08:34:33 +0200
+---
+ xen/drivers/passthrough/pci.c | 61 +++++++++++++++--------------------
+ xen/include/xen/pci.h         |  6 ++--
+ 2 files changed, 29 insertions(+), 38 deletions(-)
+
+diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
+index bbacbe41dac4..9b81b941c8bb 100644
+--- a/xen/drivers/passthrough/pci.c
++++ b/xen/drivers/passthrough/pci.c
+@@ -528,30 +528,19 @@ int __init pci_ro_device(int seg, int bus, int devfn)
+     return 0;
+ }
+ 
+-struct pci_dev *pci_get_pdev(int seg, int bus, int devfn)
++struct pci_dev *pci_get_pdev(uint16_t seg, uint8_t bus, uint8_t devfn)
+ {
+-    struct pci_seg *pseg = get_pseg(seg);
+-    struct pci_dev *pdev = NULL;
++    const struct pci_seg *pseg = get_pseg(seg);
++    struct pci_dev *pdev;
+ 
+     ASSERT(pcidevs_locked());
+-    ASSERT(seg != -1 || bus == -1);
+-    ASSERT(bus != -1 || devfn == -1);
+ 
+     if ( !pseg )
+-    {
+-        if ( seg == -1 )
+-            radix_tree_gang_lookup(&pci_segments, (void **)&pseg, 0, 1);
+-        if ( !pseg )
+-            return NULL;
+-    }
++        return NULL;
+ 
+-    do {
+-        list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
+-            if ( (pdev->bus == bus || bus == -1) &&
+-                 (pdev->devfn == devfn || devfn == -1) )
+-                return pdev;
+-    } while ( radix_tree_gang_lookup(&pci_segments, (void **)&pseg,
+-                                     pseg->nr + 1, 1) );
++    list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
++        if ( pdev->bus == bus && pdev->devfn == devfn )
++            return pdev;
+ 
+     return NULL;
+ }
+@@ -577,31 +566,33 @@ struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn)
+     return pdev;
+ }
+ 
+-struct pci_dev *pci_get_pdev_by_domain(const struct domain *d, int seg,
+-                                       int bus, int devfn)
++struct pci_dev *pci_get_pdev_by_domain(const struct domain *d, uint16_t seg,
++                                       uint8_t bus, uint8_t devfn)
+ {
+-    struct pci_seg *pseg = get_pseg(seg);
+-    struct pci_dev *pdev = NULL;
++    struct pci_dev *pdev;
+ 
+-    ASSERT(seg != -1 || bus == -1);
+-    ASSERT(bus != -1 || devfn == -1);
+-
+-    if ( !pseg )
++    /*
++     * The hardware domain owns the majority of the devices in the system.
++     * When there are multiple segments, traversing the per-segment list is
++     * likely going to be faster, whereas for a single segment the difference
++     * shouldn't be that large.
++     */
++    if ( is_hardware_domain(d) )
+     {
+-        if ( seg == -1 )
+-            radix_tree_gang_lookup(&pci_segments, (void **)&pseg, 0, 1);
++        const struct pci_seg *pseg = get_pseg(seg);
++
+         if ( !pseg )
+             return NULL;
+-    }
+ 
+-    do {
+         list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
+-            if ( (pdev->bus == bus || bus == -1) &&
+-                 (pdev->devfn == devfn || devfn == -1) &&
+-                 (pdev->domain == d) )
++            if ( pdev->bus == bus && pdev->devfn == devfn &&
++                 pdev->domain == d )
++                return pdev;
++    }
++    else
++        list_for_each_entry ( pdev, &d->pdev_list, domain_list )
++            if ( pdev->bus == bus && pdev->devfn == devfn )
+                 return pdev;
+-    } while ( radix_tree_gang_lookup(&pci_segments, (void **)&pseg,
+-                                     pseg->nr + 1, 1) );
+ 
+     return NULL;
+ }
+diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
+index 8e3d4d94543a..cd238ae852b0 100644
+--- a/xen/include/xen/pci.h
++++ b/xen/include/xen/pci.h
+@@ -166,10 +166,10 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
+ int pci_remove_device(u16 seg, u8 bus, u8 devfn);
+ int pci_ro_device(int seg, int bus, int devfn);
+ int pci_hide_device(unsigned int seg, unsigned int bus, unsigned int devfn);
+-struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
++struct pci_dev *pci_get_pdev(uint16_t seg, uint8_t bus, uint8_t devfn);
+ struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn);
+-struct pci_dev *pci_get_pdev_by_domain(const struct domain *, int seg,
+-                                       int bus, int devfn);
++struct pci_dev *pci_get_pdev_by_domain(const struct domain *, uint16_t seg,
++                                       uint8_t bus, uint8_t devfn);
+ void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
+ 
+ uint8_t pci_conf_read8(pci_sbdf_t sbdf, unsigned int reg);
+-- 
+2.37.3
+
diff --git a/0041-xen-arm-p2m-Prevent-adding-mapping-when-domain-is-dy.patch b/0041-xen-arm-p2m-Prevent-adding-mapping-when-domain-is-dy.patch
new file mode 100644
index 0000000..3fa0e43
--- /dev/null
+++ b/0041-xen-arm-p2m-Prevent-adding-mapping-when-domain-is-dy.patch
@@ -0,0 +1,62 @@
+From 09fc590c15773c2471946a78740c6b02e8c34a45 Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Tue, 11 Oct 2022 15:05:53 +0200
+Subject: [PATCH 41/67] xen/arm: p2m: Prevent adding mapping when domain is
+ dying
+
+During the domain destroy process, the domain will still be accessible
+until it is fully destroyed. So does the P2M because we don't bail
+out early if is_dying is non-zero. If a domain has permission to
+modify the other domain's P2M (i.e. dom0, or a stubdomain), then
+foreign mapping can be added past relinquish_p2m_mapping().
+
+Therefore, we need to prevent mapping to be added when the domain
+is dying. This commit prevents such adding of mapping by adding the
+d->is_dying check to p2m_set_entry(). Also this commit enhances the
+check in relinquish_p2m_mapping() to make sure that no mappings can
+be added in the P2M after the P2M lock is released.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Tested-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+master commit: 3ebe773293e3b945460a3d6f54f3b91915397bab
+master date: 2022-10-11 14:20:18 +0200
+---
+ xen/arch/arm/p2m.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index 2ddd06801a82..8398251c518b 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -1093,6 +1093,15 @@ int p2m_set_entry(struct p2m_domain *p2m,
+ {
+     int rc = 0;
+ 
++    /*
++     * Any reference taken by the P2M mappings (e.g. foreign mapping) will
++     * be dropped in relinquish_p2m_mapping(). As the P2M will still
++     * be accessible after, we need to prevent mapping to be added when the
++     * domain is dying.
++     */
++    if ( unlikely(p2m->domain->is_dying) )
++        return -ENOMEM;
++
+     while ( nr )
+     {
+         unsigned long mask;
+@@ -1613,6 +1622,8 @@ int relinquish_p2m_mapping(struct domain *d)
+     unsigned int order;
+     gfn_t start, end;
+ 
++    BUG_ON(!d->is_dying);
++    /* No mappings can be added in the P2M after the P2M lock is released. */
+     p2m_write_lock(p2m);
+ 
+     start = p2m->lowest_mapped_gfn;
+-- 
+2.37.3
+
diff --git a/0042-xen-arm-p2m-Handle-preemption-when-freeing-intermedi.patch b/0042-xen-arm-p2m-Handle-preemption-when-freeing-intermedi.patch
new file mode 100644
index 0000000..8217a06
--- /dev/null
+++ b/0042-xen-arm-p2m-Handle-preemption-when-freeing-intermedi.patch
@@ -0,0 +1,167 @@
+From 0d805f9fba4bc155d15047685024f7d842e925e4 Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Tue, 11 Oct 2022 15:06:36 +0200
+Subject: [PATCH 42/67] xen/arm: p2m: Handle preemption when freeing
+ intermediate page tables
+
+At the moment the P2M page tables will be freed when the domain structure
+is freed without any preemption. As the P2M is quite large, iterating
+through this may take more time than it is reasonable without intermediate
+preemption (to run softirqs and perhaps scheduler).
+
+Split p2m_teardown() in two parts: one preemptible and called when
+relinquishing the resources, the other one non-preemptible and called
+when freeing the domain structure.
+
+As we are now freeing the P2M pages early, we also need to prevent
+further allocation if someone call p2m_set_entry() past p2m_teardown()
+(I wasn't able to prove this will never happen). This is done by
+the checking domain->is_dying from previous patch in p2m_set_entry().
+
+Similarly, we want to make sure that no-one can accessed the free
+pages. Therefore the root is cleared before freeing pages.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Tested-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+master commit: 3202084566bba0ef0c45caf8c24302f83d92f9c8
+master date: 2022-10-11 14:20:56 +0200
+---
+ xen/arch/arm/domain.c     | 10 +++++++--
+ xen/arch/arm/p2m.c        | 47 ++++++++++++++++++++++++++++++++++++---
+ xen/include/asm-arm/p2m.h | 13 +++++++++--
+ 3 files changed, 63 insertions(+), 7 deletions(-)
+
+diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
+index 5eaf4c718ec3..223ec9694df1 100644
+--- a/xen/arch/arm/domain.c
++++ b/xen/arch/arm/domain.c
+@@ -779,10 +779,10 @@ fail:
+ void arch_domain_destroy(struct domain *d)
+ {
+     /* IOMMU page table is shared with P2M, always call
+-     * iommu_domain_destroy() before p2m_teardown().
++     * iommu_domain_destroy() before p2m_final_teardown().
+      */
+     iommu_domain_destroy(d);
+-    p2m_teardown(d);
++    p2m_final_teardown(d);
+     domain_vgic_free(d);
+     domain_vuart_free(d);
+     free_xenheap_page(d->shared_info);
+@@ -984,6 +984,7 @@ enum {
+     PROG_xen,
+     PROG_page,
+     PROG_mapping,
++    PROG_p2m,
+     PROG_done,
+ };
+ 
+@@ -1038,6 +1039,11 @@ int domain_relinquish_resources(struct domain *d)
+         if ( ret )
+             return ret;
+ 
++    PROGRESS(p2m):
++        ret = p2m_teardown(d);
++        if ( ret )
++            return ret;
++
+     PROGRESS(done):
+         break;
+ 
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index 8398251c518b..4ad3e0606e9c 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -1530,17 +1530,58 @@ static void p2m_free_vmid(struct domain *d)
+     spin_unlock(&vmid_alloc_lock);
+ }
+ 
+-void p2m_teardown(struct domain *d)
++int p2m_teardown(struct domain *d)
+ {
+     struct p2m_domain *p2m = p2m_get_hostp2m(d);
++    unsigned long count = 0;
+     struct page_info *pg;
++    unsigned int i;
++    int rc = 0;
++
++    p2m_write_lock(p2m);
++
++    /*
++     * We are about to free the intermediate page-tables, so clear the
++     * root to prevent any walk to use them.
++     */
++    for ( i = 0; i < P2M_ROOT_PAGES; i++ )
++        clear_and_clean_page(p2m->root + i);
++
++    /*
++     * The domain will not be scheduled anymore, so in theory we should
++     * not need to flush the TLBs. Do it for safety purpose.
++     *
++     * Note that all the devices have already been de-assigned. So we don't
++     * need to flush the IOMMU TLB here.
++     */
++    p2m_force_tlb_flush_sync(p2m);
++
++    while ( (pg = page_list_remove_head(&p2m->pages)) )
++    {
++        free_domheap_page(pg);
++        count++;
++        /* Arbitrarily preempt every 512 iterations */
++        if ( !(count % 512) && hypercall_preempt_check() )
++        {
++            rc = -ERESTART;
++            break;
++        }
++    }
++
++    p2m_write_unlock(p2m);
++
++    return rc;
++}
++
++void p2m_final_teardown(struct domain *d)
++{
++    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+ 
+     /* p2m not actually initialized */
+     if ( !p2m->domain )
+         return;
+ 
+-    while ( (pg = page_list_remove_head(&p2m->pages)) )
+-        free_domheap_page(pg);
++    ASSERT(page_list_empty(&p2m->pages));
+ 
+     if ( p2m->root )
+         free_domheap_pages(p2m->root, P2M_ROOT_ORDER);
+diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
+index 6a2108398fd7..3a2d51b35d71 100644
+--- a/xen/include/asm-arm/p2m.h
++++ b/xen/include/asm-arm/p2m.h
+@@ -192,8 +192,17 @@ void setup_virt_paging(void);
+ /* Init the datastructures for later use by the p2m code */
+ int p2m_init(struct domain *d);
+ 
+-/* Return all the p2m resources to Xen. */
+-void p2m_teardown(struct domain *d);
++/*
++ * The P2M resources are freed in two parts:
++ *  - p2m_teardown() will be called when relinquish the resources. It
++ *    will free large resources (e.g. intermediate page-tables) that
++ *    requires preemption.
++ *  - p2m_final_teardown() will be called when domain struct is been
++ *    freed. This *cannot* be preempted and therefore one small
++ *    resources should be freed here.
++ */
++int p2m_teardown(struct domain *d);
++void p2m_final_teardown(struct domain *d);
+ 
+ /*
+  * Remove mapping refcount on each mapping page in the p2m
+-- 
+2.37.3
+
diff --git a/0043-x86-p2m-add-option-to-skip-root-pagetable-removal-in.patch b/0043-x86-p2m-add-option-to-skip-root-pagetable-removal-in.patch
new file mode 100644
index 0000000..f3f7e3a
--- /dev/null
+++ b/0043-x86-p2m-add-option-to-skip-root-pagetable-removal-in.patch
@@ -0,0 +1,138 @@
+From 0f3eab90f327210d91e8e31a769376f286e8819a Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 11 Oct 2022 15:07:25 +0200
+Subject: [PATCH 43/67] x86/p2m: add option to skip root pagetable removal in
+ p2m_teardown()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Add a new parameter to p2m_teardown() in order to select whether the
+root page table should also be freed.  Note that all users are
+adjusted to pass the parameter to remove the root page tables, so
+behavior is not modified.
+
+No functional change intended.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Suggested-by: Julien Grall <julien@xen.org>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+master commit: 1df52a270225527ae27bfa2fc40347bf93b78357
+master date: 2022-10-11 14:21:23 +0200
+---
+ xen/arch/x86/mm/hap/hap.c       |  6 +++---
+ xen/arch/x86/mm/p2m.c           | 20 ++++++++++++++++----
+ xen/arch/x86/mm/shadow/common.c |  4 ++--
+ xen/include/asm-x86/p2m.h       |  2 +-
+ 4 files changed, 22 insertions(+), 10 deletions(-)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index 47a7487fa7a3..a8f5a19da917 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -541,18 +541,18 @@ void hap_final_teardown(struct domain *d)
+         }
+ 
+         for ( i = 0; i < MAX_ALTP2M; i++ )
+-            p2m_teardown(d->arch.altp2m_p2m[i]);
++            p2m_teardown(d->arch.altp2m_p2m[i], true);
+     }
+ 
+     /* Destroy nestedp2m's first */
+     for (i = 0; i < MAX_NESTEDP2M; i++) {
+-        p2m_teardown(d->arch.nested_p2m[i]);
++        p2m_teardown(d->arch.nested_p2m[i], true);
+     }
+ 
+     if ( d->arch.paging.hap.total_pages != 0 )
+         hap_teardown(d, NULL);
+ 
+-    p2m_teardown(p2m_get_hostp2m(d));
++    p2m_teardown(p2m_get_hostp2m(d), true);
+     /* Free any memory that the p2m teardown released */
+     paging_lock(d);
+     hap_set_allocation(d, 0, NULL);
+diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
+index 85681dee2623..8ba73082c1bf 100644
+--- a/xen/arch/x86/mm/p2m.c
++++ b/xen/arch/x86/mm/p2m.c
+@@ -741,11 +741,11 @@ int p2m_alloc_table(struct p2m_domain *p2m)
+  * hvm fixme: when adding support for pvh non-hardware domains, this path must
+  * cleanup any foreign p2m types (release refcnts on them).
+  */
+-void p2m_teardown(struct p2m_domain *p2m)
++void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
+ /* Return all the p2m pages to Xen.
+  * We know we don't have any extra mappings to these pages */
+ {
+-    struct page_info *pg;
++    struct page_info *pg, *root_pg = NULL;
+     struct domain *d;
+ 
+     if (p2m == NULL)
+@@ -755,10 +755,22 @@ void p2m_teardown(struct p2m_domain *p2m)
+ 
+     p2m_lock(p2m);
+     ASSERT(atomic_read(&d->shr_pages) == 0);
+-    p2m->phys_table = pagetable_null();
++
++    if ( remove_root )
++        p2m->phys_table = pagetable_null();
++    else if ( !pagetable_is_null(p2m->phys_table) )
++    {
++        root_pg = pagetable_get_page(p2m->phys_table);
++        clear_domain_page(pagetable_get_mfn(p2m->phys_table));
++    }
+ 
+     while ( (pg = page_list_remove_head(&p2m->pages)) )
+-        d->arch.paging.free_page(d, pg);
++        if ( pg != root_pg )
++            d->arch.paging.free_page(d, pg);
++
++    if ( root_pg )
++        page_list_add(root_pg, &p2m->pages);
++
+     p2m_unlock(p2m);
+ }
+ 
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 4a8882430b3f..abe6d4334382 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2768,7 +2768,7 @@ int shadow_enable(struct domain *d, u32 mode)
+     paging_unlock(d);
+  out_unlocked:
+     if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) )
+-        p2m_teardown(p2m);
++        p2m_teardown(p2m, true);
+     if ( rv != 0 && pg != NULL )
+     {
+         pg->count_info &= ~PGC_count_mask;
+@@ -2933,7 +2933,7 @@ void shadow_final_teardown(struct domain *d)
+         shadow_teardown(d, NULL);
+ 
+     /* It is now safe to pull down the p2m map. */
+-    p2m_teardown(p2m_get_hostp2m(d));
++    p2m_teardown(p2m_get_hostp2m(d), true);
+     /* Free any shadow memory that the p2m teardown released */
+     paging_lock(d);
+     shadow_set_allocation(d, 0, NULL);
+diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
+index 46e8b94a49df..46eb51d44cf5 100644
+--- a/xen/include/asm-x86/p2m.h
++++ b/xen/include/asm-x86/p2m.h
+@@ -619,7 +619,7 @@ int p2m_init(struct domain *d);
+ int p2m_alloc_table(struct p2m_domain *p2m);
+ 
+ /* Return all the p2m resources to Xen. */
+-void p2m_teardown(struct p2m_domain *p2m);
++void p2m_teardown(struct p2m_domain *p2m, bool remove_root);
+ void p2m_final_teardown(struct domain *d);
+ 
+ /* Add a page to a domain's p2m table */
+-- 
+2.37.3
+
diff --git a/0044-x86-HAP-adjust-monitor-table-related-error-handling.patch b/0044-x86-HAP-adjust-monitor-table-related-error-handling.patch
new file mode 100644
index 0000000..39db626
--- /dev/null
+++ b/0044-x86-HAP-adjust-monitor-table-related-error-handling.patch
@@ -0,0 +1,77 @@
+From d24a10a91d46a56e1d406239643ec651a31033d4 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 15:07:42 +0200
+Subject: [PATCH 44/67] x86/HAP: adjust monitor table related error handling
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+hap_make_monitor_table() will return INVALID_MFN if it encounters an
+error condition, but hap_update_paging_modes() wasn’t handling this
+value, resulting in an inappropriate value being stored in
+monitor_table. This would subsequently misguide at least
+hap_vcpu_teardown(). Avoid this by bailing early.
+
+Further, when a domain has/was already crashed or (perhaps less
+important as there's no such path known to lead here) is already dying,
+avoid calling domain_crash() on it again - that's at best confusing.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 5b44a61180f4f2e4f490a28400c884dd357ff45d
+master date: 2022-10-11 14:21:56 +0200
+---
+ xen/arch/x86/mm/hap/hap.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index a8f5a19da917..d75dc2b9ed3d 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -39,6 +39,7 @@
+ #include <asm/domain.h>
+ #include <xen/numa.h>
+ #include <asm/hvm/nestedhvm.h>
++#include <public/sched.h>
+ 
+ #include "private.h"
+ 
+@@ -405,8 +406,13 @@ static mfn_t hap_make_monitor_table(struct vcpu *v)
+     return m4mfn;
+ 
+  oom:
+-    printk(XENLOG_G_ERR "out of memory building monitor pagetable\n");
+-    domain_crash(d);
++    if ( !d->is_dying &&
++         (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
++    {
++        printk(XENLOG_G_ERR "%pd: out of memory building monitor pagetable\n",
++               d);
++        domain_crash(d);
++    }
+     return INVALID_MFN;
+ }
+ 
+@@ -766,6 +772,9 @@ static void hap_update_paging_modes(struct vcpu *v)
+     if ( pagetable_is_null(v->arch.hvm.monitor_table) )
+     {
+         mfn_t mmfn = hap_make_monitor_table(v);
++
++        if ( mfn_eq(mmfn, INVALID_MFN) )
++            goto unlock;
+         v->arch.hvm.monitor_table = pagetable_from_mfn(mmfn);
+         make_cr3(v, mmfn);
+         hvm_update_host_cr3(v);
+@@ -774,6 +783,7 @@ static void hap_update_paging_modes(struct vcpu *v)
+     /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
+     hap_update_cr3(v, 0, false);
+ 
++ unlock:
+     paging_unlock(d);
+     put_gfn(d, cr3_gfn);
+ }
+-- 
+2.37.3
+
diff --git a/0045-x86-shadow-tolerate-failure-of-sh_set_toplevel_shado.patch b/0045-x86-shadow-tolerate-failure-of-sh_set_toplevel_shado.patch
new file mode 100644
index 0000000..7cf356d
--- /dev/null
+++ b/0045-x86-shadow-tolerate-failure-of-sh_set_toplevel_shado.patch
@@ -0,0 +1,76 @@
+From 95f6d555ec84383f7daaf3374f65bec5ff4351f5 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 15:07:57 +0200
+Subject: [PATCH 45/67] x86/shadow: tolerate failure of
+ sh_set_toplevel_shadow()
+
+Subsequently sh_set_toplevel_shadow() will be adjusted to install a
+blank entry in case prealloc fails. There are, in fact, pre-existing
+error paths which would put in place a blank entry. The 4- and 2-level
+code in sh_update_cr3(), however, assume the top level entry to be
+valid.
+
+Hence bail from the function in the unlikely event that it's not. Note
+that 3-level logic works differently: In particular a guest is free to
+supply a PDPTR pointing at 4 non-present (or otherwise deemed invalid)
+entries. The guest will crash, but we already cope with that.
+
+Really mfn_valid() is likely wrong to use in sh_set_toplevel_shadow(),
+and it should instead be !mfn_eq(gmfn, INVALID_MFN). Avoid such a change
+in security context, but add a respective assertion.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: eac000978c1feb5a9ee3236ab0c0da9a477e5336
+master date: 2022-10-11 14:22:24 +0200
+---
+ xen/arch/x86/mm/shadow/common.c |  1 +
+ xen/arch/x86/mm/shadow/multi.c  | 10 ++++++++++
+ 2 files changed, 11 insertions(+)
+
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index abe6d4334382..0ab2ac6b7a3c 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2583,6 +2583,7 @@ void sh_set_toplevel_shadow(struct vcpu *v,
+     /* Now figure out the new contents: is this a valid guest MFN? */
+     if ( !mfn_valid(gmfn) )
+     {
++        ASSERT(mfn_eq(gmfn, INVALID_MFN));
+         new_entry = pagetable_null();
+         goto install_new_entry;
+     }
+diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
+index 9b43cb116c47..7e0494cf7faa 100644
+--- a/xen/arch/x86/mm/shadow/multi.c
++++ b/xen/arch/x86/mm/shadow/multi.c
+@@ -3697,6 +3697,11 @@ sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+     if ( sh_remove_write_access(d, gmfn, 4, 0) != 0 )
+         guest_flush_tlb_mask(d, d->dirty_cpumask);
+     sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow, sh_make_shadow);
++    if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) )
++    {
++        ASSERT(d->is_dying || d->is_shutting_down);
++        return;
++    }
+     if ( !shadow_mode_external(d) && !is_pv_32bit_domain(d) )
+     {
+         mfn_t smfn = pagetable_get_mfn(v->arch.paging.shadow.shadow_table[0]);
+@@ -3757,6 +3762,11 @@ sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+     if ( sh_remove_write_access(d, gmfn, 2, 0) != 0 )
+         guest_flush_tlb_mask(d, d->dirty_cpumask);
+     sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow, sh_make_shadow);
++    if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) )
++    {
++        ASSERT(d->is_dying || d->is_shutting_down);
++        return;
++    }
+ #else
+ #error This should never happen
+ #endif
+-- 
+2.37.3
+
diff --git a/0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch b/0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch
new file mode 100644
index 0000000..62be72a
--- /dev/null
+++ b/0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch
@@ -0,0 +1,279 @@
+From 1e26afa846fb9a00b9155280eeae3b8cb8375dd6 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 11 Oct 2022 15:08:14 +0200
+Subject: [PATCH 46/67] x86/shadow: tolerate failure in shadow_prealloc()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Prevent _shadow_prealloc() from calling BUG() when unable to fulfill
+the pre-allocation and instead return true/false.  Modify
+shadow_prealloc() to crash the domain on allocation failure (if the
+domain is not already dying), as shadow cannot operate normally after
+that.  Modify callers to also gracefully handle {_,}shadow_prealloc()
+failing to fulfill the request.
+
+Note this in turn requires adjusting the callers of
+sh_make_monitor_table() also to handle it returning INVALID_MFN.
+sh_update_paging_modes() is also modified to add additional error
+paths in case of allocation failure, some of those will return with
+null monitor page tables (and the domain likely crashed).  This is no
+different that current error paths, but the newly introduced ones are
+more likely to trigger.
+
+The now added failure points in sh_update_paging_modes() also require
+that on some error return paths the previous structures are cleared,
+and thus monitor table is null.
+
+While there adjust the 'type' parameter type of shadow_prealloc() to
+unsigned int rather than u32.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+master commit: b7f93c6afb12b6061e2d19de2f39ea09b569ac68
+master date: 2022-10-11 14:22:53 +0200
+---
+ xen/arch/x86/mm/shadow/common.c  | 69 ++++++++++++++++++++++++--------
+ xen/arch/x86/mm/shadow/hvm.c     |  4 +-
+ xen/arch/x86/mm/shadow/multi.c   | 11 +++--
+ xen/arch/x86/mm/shadow/private.h |  3 +-
+ 4 files changed, 66 insertions(+), 21 deletions(-)
+
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 0ab2ac6b7a3c..fc4f7f78ce43 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -36,6 +36,7 @@
+ #include <asm/flushtlb.h>
+ #include <asm/shadow.h>
+ #include <xen/numa.h>
++#include <public/sched.h>
+ #include "private.h"
+ 
+ DEFINE_PER_CPU(uint32_t,trace_shadow_path_flags);
+@@ -927,14 +928,15 @@ static inline void trace_shadow_prealloc_unpin(struct domain *d, mfn_t smfn)
+ 
+ /* Make sure there are at least count order-sized pages
+  * available in the shadow page pool. */
+-static void _shadow_prealloc(struct domain *d, unsigned int pages)
++static bool __must_check _shadow_prealloc(struct domain *d, unsigned int pages)
+ {
+     struct vcpu *v;
+     struct page_info *sp, *t;
+     mfn_t smfn;
+     int i;
+ 
+-    if ( d->arch.paging.shadow.free_pages >= pages ) return;
++    if ( d->arch.paging.shadow.free_pages >= pages )
++        return true;
+ 
+     /* Shouldn't have enabled shadows if we've no vcpus. */
+     ASSERT(d->vcpu && d->vcpu[0]);
+@@ -950,7 +952,8 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages)
+         sh_unpin(d, smfn);
+ 
+         /* See if that freed up enough space */
+-        if ( d->arch.paging.shadow.free_pages >= pages ) return;
++        if ( d->arch.paging.shadow.free_pages >= pages )
++            return true;
+     }
+ 
+     /* Stage two: all shadow pages are in use in hierarchies that are
+@@ -973,7 +976,7 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages)
+                 if ( d->arch.paging.shadow.free_pages >= pages )
+                 {
+                     guest_flush_tlb_mask(d, d->dirty_cpumask);
+-                    return;
++                    return true;
+                 }
+             }
+         }
+@@ -986,7 +989,12 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages)
+            d->arch.paging.shadow.total_pages,
+            d->arch.paging.shadow.free_pages,
+            d->arch.paging.shadow.p2m_pages);
+-    BUG();
++
++    ASSERT(d->is_dying);
++
++    guest_flush_tlb_mask(d, d->dirty_cpumask);
++
++    return false;
+ }
+ 
+ /* Make sure there are at least count pages of the order according to
+@@ -994,9 +1002,19 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages)
+  * This must be called before any calls to shadow_alloc().  Since this
+  * will free existing shadows to make room, it must be called early enough
+  * to avoid freeing shadows that the caller is currently working on. */
+-void shadow_prealloc(struct domain *d, u32 type, unsigned int count)
++bool shadow_prealloc(struct domain *d, unsigned int type, unsigned int count)
+ {
+-    return _shadow_prealloc(d, shadow_size(type) * count);
++    bool ret = _shadow_prealloc(d, shadow_size(type) * count);
++
++    if ( !ret && !d->is_dying &&
++         (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
++        /*
++         * Failing to allocate memory required for shadow usage can only result in
++         * a domain crash, do it here rather that relying on every caller to do it.
++         */
++        domain_crash(d);
++
++    return ret;
+ }
+ 
+ /* Deliberately free all the memory we can: this will tear down all of
+@@ -1215,7 +1233,7 @@ void shadow_free(struct domain *d, mfn_t smfn)
+ static struct page_info *
+ shadow_alloc_p2m_page(struct domain *d)
+ {
+-    struct page_info *pg;
++    struct page_info *pg = NULL;
+ 
+     /* This is called both from the p2m code (which never holds the
+      * paging lock) and the log-dirty code (which always does). */
+@@ -1233,16 +1251,18 @@ shadow_alloc_p2m_page(struct domain *d)
+                     d->arch.paging.shadow.p2m_pages,
+                     shadow_min_acceptable_pages(d));
+         }
+-        paging_unlock(d);
+-        return NULL;
++        goto out;
+     }
+ 
+-    shadow_prealloc(d, SH_type_p2m_table, 1);
++    if ( !shadow_prealloc(d, SH_type_p2m_table, 1) )
++        goto out;
++
+     pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0));
+     d->arch.paging.shadow.p2m_pages++;
+     d->arch.paging.shadow.total_pages--;
+     ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask));
+ 
++ out:
+     paging_unlock(d);
+ 
+     return pg;
+@@ -1333,7 +1353,9 @@ int shadow_set_allocation(struct domain *d, unsigned int pages, bool *preempted)
+         else if ( d->arch.paging.shadow.total_pages > pages )
+         {
+             /* Need to return memory to domheap */
+-            _shadow_prealloc(d, 1);
++            if ( !_shadow_prealloc(d, 1) )
++                return -ENOMEM;
++
+             sp = page_list_remove_head(&d->arch.paging.shadow.freelist);
+             ASSERT(sp);
+             /*
+@@ -2401,12 +2423,13 @@ static void sh_update_paging_modes(struct vcpu *v)
+     if ( mfn_eq(v->arch.paging.shadow.oos_snapshot[0], INVALID_MFN) )
+     {
+         int i;
++
++        if ( !shadow_prealloc(d, SH_type_oos_snapshot, SHADOW_OOS_PAGES) )
++            return;
++
+         for(i = 0; i < SHADOW_OOS_PAGES; i++)
+-        {
+-            shadow_prealloc(d, SH_type_oos_snapshot, 1);
+             v->arch.paging.shadow.oos_snapshot[i] =
+                 shadow_alloc(d, SH_type_oos_snapshot, 0);
+-        }
+     }
+ #endif /* OOS */
+ 
+@@ -2470,6 +2493,9 @@ static void sh_update_paging_modes(struct vcpu *v)
+             mfn_t mmfn = sh_make_monitor_table(
+                              v, v->arch.paging.mode->shadow.shadow_levels);
+ 
++            if ( mfn_eq(mmfn, INVALID_MFN) )
++                return;
++
+             v->arch.hvm.monitor_table = pagetable_from_mfn(mmfn);
+             make_cr3(v, mmfn);
+             hvm_update_host_cr3(v);
+@@ -2508,6 +2534,12 @@ static void sh_update_paging_modes(struct vcpu *v)
+                 v->arch.hvm.monitor_table = pagetable_null();
+                 new_mfn = sh_make_monitor_table(
+                               v, v->arch.paging.mode->shadow.shadow_levels);
++                if ( mfn_eq(new_mfn, INVALID_MFN) )
++                {
++                    sh_destroy_monitor_table(v, old_mfn,
++                                             old_mode->shadow.shadow_levels);
++                    return;
++                }
+                 v->arch.hvm.monitor_table = pagetable_from_mfn(new_mfn);
+                 SHADOW_PRINTK("new monitor table %"PRI_mfn "\n",
+                                mfn_x(new_mfn));
+@@ -2593,7 +2625,12 @@ void sh_set_toplevel_shadow(struct vcpu *v,
+     if ( !mfn_valid(smfn) )
+     {
+         /* Make sure there's enough free shadow memory. */
+-        shadow_prealloc(d, root_type, 1);
++        if ( !shadow_prealloc(d, root_type, 1) )
++        {
++            new_entry = pagetable_null();
++            goto install_new_entry;
++        }
++
+         /* Shadow the page. */
+         smfn = make_shadow(v, gmfn, root_type);
+     }
+diff --git a/xen/arch/x86/mm/shadow/hvm.c b/xen/arch/x86/mm/shadow/hvm.c
+index 87fc57704f25..d68796c495b7 100644
+--- a/xen/arch/x86/mm/shadow/hvm.c
++++ b/xen/arch/x86/mm/shadow/hvm.c
+@@ -700,7 +700,9 @@ mfn_t sh_make_monitor_table(const struct vcpu *v, unsigned int shadow_levels)
+     ASSERT(!pagetable_get_pfn(v->arch.hvm.monitor_table));
+ 
+     /* Guarantee we can get the memory we need */
+-    shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS);
++    if ( !shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS) )
++        return INVALID_MFN;
++
+     m4mfn = shadow_alloc(d, SH_type_monitor_table, 0);
+     mfn_to_page(m4mfn)->shadow_flags = 4;
+ 
+diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
+index 7e0494cf7faa..6a9f82d39ce6 100644
+--- a/xen/arch/x86/mm/shadow/multi.c
++++ b/xen/arch/x86/mm/shadow/multi.c
+@@ -2825,9 +2825,14 @@ static int sh_page_fault(struct vcpu *v,
+      * Preallocate shadow pages *before* removing writable accesses
+      * otherwhise an OOS L1 might be demoted and promoted again with
+      * writable mappings. */
+-    shadow_prealloc(d,
+-                    SH_type_l1_shadow,
+-                    GUEST_PAGING_LEVELS < 4 ? 1 : GUEST_PAGING_LEVELS - 1);
++    if ( !shadow_prealloc(d, SH_type_l1_shadow,
++                          GUEST_PAGING_LEVELS < 4
++                          ? 1 : GUEST_PAGING_LEVELS - 1) )
++    {
++        paging_unlock(d);
++        put_gfn(d, gfn_x(gfn));
++        return 0;
++    }
+ 
+     rc = gw_remove_write_accesses(v, va, &gw);
+ 
+diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h
+index 911db46e7399..3fe0388e7c4f 100644
+--- a/xen/arch/x86/mm/shadow/private.h
++++ b/xen/arch/x86/mm/shadow/private.h
+@@ -351,7 +351,8 @@ void shadow_promote(struct domain *d, mfn_t gmfn, u32 type);
+ void shadow_demote(struct domain *d, mfn_t gmfn, u32 type);
+ 
+ /* Shadow page allocation functions */
+-void  shadow_prealloc(struct domain *d, u32 shadow_type, unsigned int count);
++bool __must_check shadow_prealloc(struct domain *d, unsigned int shadow_type,
++                                  unsigned int count);
+ mfn_t shadow_alloc(struct domain *d,
+                     u32 shadow_type,
+                     unsigned long backpointer);
+-- 
+2.37.3
+
diff --git a/0047-x86-p2m-refuse-new-allocations-for-dying-domains.patch b/0047-x86-p2m-refuse-new-allocations-for-dying-domains.patch
new file mode 100644
index 0000000..c81cfab
--- /dev/null
+++ b/0047-x86-p2m-refuse-new-allocations-for-dying-domains.patch
@@ -0,0 +1,100 @@
+From 4f9b535194f70582863f2a78f113547d8822b2b9 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 11 Oct 2022 15:08:28 +0200
+Subject: [PATCH 47/67] x86/p2m: refuse new allocations for dying domains
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This will in particular prevent any attempts to add entries to the p2m,
+once - in a subsequent change - non-root entries have been removed.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+master commit: ff600a8cf8e36f8ecbffecf96a035952e022ab87
+master date: 2022-10-11 14:23:22 +0200
+---
+ xen/arch/x86/mm/hap/hap.c       |  5 ++++-
+ xen/arch/x86/mm/shadow/common.c | 18 ++++++++++++++----
+ 2 files changed, 18 insertions(+), 5 deletions(-)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index d75dc2b9ed3d..787991233e53 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -245,6 +245,9 @@ static struct page_info *hap_alloc(struct domain *d)
+ 
+     ASSERT(paging_locked_by_me(d));
+ 
++    if ( unlikely(d->is_dying) )
++        return NULL;
++
+     pg = page_list_remove_head(&d->arch.paging.hap.freelist);
+     if ( unlikely(!pg) )
+         return NULL;
+@@ -281,7 +284,7 @@ static struct page_info *hap_alloc_p2m_page(struct domain *d)
+         d->arch.paging.hap.p2m_pages++;
+         ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask));
+     }
+-    else if ( !d->arch.paging.p2m_alloc_failed )
++    else if ( !d->arch.paging.p2m_alloc_failed && !d->is_dying )
+     {
+         d->arch.paging.p2m_alloc_failed = 1;
+         dprintk(XENLOG_ERR, "d%i failed to allocate from HAP pool\n",
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index fc4f7f78ce43..9ad7e5a88650 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -938,6 +938,10 @@ static bool __must_check _shadow_prealloc(struct domain *d, unsigned int pages)
+     if ( d->arch.paging.shadow.free_pages >= pages )
+         return true;
+ 
++    if ( unlikely(d->is_dying) )
++        /* No reclaim when the domain is dying, teardown will take care of it. */
++        return false;
++
+     /* Shouldn't have enabled shadows if we've no vcpus. */
+     ASSERT(d->vcpu && d->vcpu[0]);
+ 
+@@ -990,7 +994,7 @@ static bool __must_check _shadow_prealloc(struct domain *d, unsigned int pages)
+            d->arch.paging.shadow.free_pages,
+            d->arch.paging.shadow.p2m_pages);
+ 
+-    ASSERT(d->is_dying);
++    ASSERT_UNREACHABLE();
+ 
+     guest_flush_tlb_mask(d, d->dirty_cpumask);
+ 
+@@ -1004,10 +1008,13 @@ static bool __must_check _shadow_prealloc(struct domain *d, unsigned int pages)
+  * to avoid freeing shadows that the caller is currently working on. */
+ bool shadow_prealloc(struct domain *d, unsigned int type, unsigned int count)
+ {
+-    bool ret = _shadow_prealloc(d, shadow_size(type) * count);
++    bool ret;
+ 
+-    if ( !ret && !d->is_dying &&
+-         (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
++    if ( unlikely(d->is_dying) )
++       return false;
++
++    ret = _shadow_prealloc(d, shadow_size(type) * count);
++    if ( !ret && (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
+         /*
+          * Failing to allocate memory required for shadow usage can only result in
+          * a domain crash, do it here rather that relying on every caller to do it.
+@@ -1235,6 +1242,9 @@ shadow_alloc_p2m_page(struct domain *d)
+ {
+     struct page_info *pg = NULL;
+ 
++    if ( unlikely(d->is_dying) )
++       return NULL;
++
+     /* This is called both from the p2m code (which never holds the
+      * paging lock) and the log-dirty code (which always does). */
+     paging_lock_recursive(d);
+-- 
+2.37.3
+
diff --git a/0048-x86-p2m-truly-free-paging-pool-memory-for-dying-doma.patch b/0048-x86-p2m-truly-free-paging-pool-memory-for-dying-doma.patch
new file mode 100644
index 0000000..c3d5a2c
--- /dev/null
+++ b/0048-x86-p2m-truly-free-paging-pool-memory-for-dying-doma.patch
@@ -0,0 +1,115 @@
+From 7f055b011a657f8f16b0df242301efb312058eea Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 11 Oct 2022 15:08:42 +0200
+Subject: [PATCH 48/67] x86/p2m: truly free paging pool memory for dying
+ domains
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Modify {hap,shadow}_free to free the page immediately if the domain is
+dying, so that pages don't accumulate in the pool when
+{shadow,hap}_final_teardown() get called. This is to limit the amount of
+work which needs to be done there (in a non-preemptable manner).
+
+Note the call to shadow_free() in shadow_free_p2m_page() is moved after
+increasing total_pages, so that the decrease done in shadow_free() in
+case the domain is dying doesn't underflow the counter, even if just for
+a short interval.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+master commit: f50a2c0e1d057c00d6061f40ae24d068226052ad
+master date: 2022-10-11 14:23:51 +0200
+---
+ xen/arch/x86/mm/hap/hap.c       | 12 ++++++++++++
+ xen/arch/x86/mm/shadow/common.c | 28 +++++++++++++++++++++++++---
+ 2 files changed, 37 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index 787991233e53..aef2297450e1 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -265,6 +265,18 @@ static void hap_free(struct domain *d, mfn_t mfn)
+ 
+     ASSERT(paging_locked_by_me(d));
+ 
++    /*
++     * For dying domains, actually free the memory here. This way less work is
++     * left to hap_final_teardown(), which cannot easily have preemption checks
++     * added.
++     */
++    if ( unlikely(d->is_dying) )
++    {
++        free_domheap_page(pg);
++        d->arch.paging.hap.total_pages--;
++        return;
++    }
++
+     d->arch.paging.hap.free_pages++;
+     page_list_add_tail(pg, &d->arch.paging.hap.freelist);
+ }
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 9ad7e5a88650..366956c146aa 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -1184,6 +1184,7 @@ mfn_t shadow_alloc(struct domain *d,
+ void shadow_free(struct domain *d, mfn_t smfn)
+ {
+     struct page_info *next = NULL, *sp = mfn_to_page(smfn);
++    bool dying = ACCESS_ONCE(d->is_dying);
+     struct page_list_head *pin_list;
+     unsigned int pages;
+     u32 shadow_type;
+@@ -1226,11 +1227,32 @@ void shadow_free(struct domain *d, mfn_t smfn)
+          * just before the allocator hands the page out again. */
+         page_set_tlbflush_timestamp(sp);
+         perfc_decr(shadow_alloc_count);
+-        page_list_add_tail(sp, &d->arch.paging.shadow.freelist);
++
++        /*
++         * For dying domains, actually free the memory here. This way less
++         * work is left to shadow_final_teardown(), which cannot easily have
++         * preemption checks added.
++         */
++        if ( unlikely(dying) )
++        {
++            /*
++             * The backpointer field (sh.back) used by shadow code aliases the
++             * domain owner field, unconditionally clear it here to avoid
++             * free_domheap_page() attempting to parse it.
++             */
++            page_set_owner(sp, NULL);
++            free_domheap_page(sp);
++        }
++        else
++            page_list_add_tail(sp, &d->arch.paging.shadow.freelist);
++
+         sp = next;
+     }
+ 
+-    d->arch.paging.shadow.free_pages += pages;
++    if ( unlikely(dying) )
++        d->arch.paging.shadow.total_pages -= pages;
++    else
++        d->arch.paging.shadow.free_pages += pages;
+ }
+ 
+ /* Divert a page from the pool to be used by the p2m mapping.
+@@ -1300,9 +1322,9 @@ shadow_free_p2m_page(struct domain *d, struct page_info *pg)
+      * paging lock) and the log-dirty code (which always does). */
+     paging_lock_recursive(d);
+ 
+-    shadow_free(d, page_to_mfn(pg));
+     d->arch.paging.shadow.p2m_pages--;
+     d->arch.paging.shadow.total_pages++;
++    shadow_free(d, page_to_mfn(pg));
+ 
+     paging_unlock(d);
+ }
+-- 
+2.37.3
+
diff --git a/0049-x86-p2m-free-the-paging-memory-pool-preemptively.patch b/0049-x86-p2m-free-the-paging-memory-pool-preemptively.patch
new file mode 100644
index 0000000..83502a6
--- /dev/null
+++ b/0049-x86-p2m-free-the-paging-memory-pool-preemptively.patch
@@ -0,0 +1,181 @@
+From 686c920fa9389fe2b6b619643024ed98b4b7d51f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 11 Oct 2022 15:08:58 +0200
+Subject: [PATCH 49/67] x86/p2m: free the paging memory pool preemptively
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The paging memory pool is currently freed in two different places:
+from {shadow,hap}_teardown() via domain_relinquish_resources() and
+from {shadow,hap}_final_teardown() via complete_domain_destroy().
+While the former does handle preemption, the later doesn't.
+
+Attempt to move as much p2m related freeing as possible to happen
+before the call to {shadow,hap}_teardown(), so that most memory can be
+freed in a preemptive way.  In order to avoid causing issues to
+existing callers leave the root p2m page tables set and free them in
+{hap,shadow}_final_teardown().  Also modify {hap,shadow}_free to free
+the page immediately if the domain is dying, so that pages don't
+accumulate in the pool when {shadow,hap}_final_teardown() get called.
+
+Move altp2m_vcpu_disable_ve() to be done in hap_teardown(), as that's
+the place where altp2m_active gets disabled now.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Reported-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+master commit: e7aa55c0aab36d994bf627c92bd5386ae167e16e
+master date: 2022-10-11 14:24:21 +0200
+---
+ xen/arch/x86/domain.c           |  7 ------
+ xen/arch/x86/mm/hap/hap.c       | 42 ++++++++++++++++++++-------------
+ xen/arch/x86/mm/shadow/common.c | 12 ++++++++++
+ 3 files changed, 38 insertions(+), 23 deletions(-)
+
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index 2838f976d729..ce6ddcf31397 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -38,7 +38,6 @@
+ #include <xen/livepatch.h>
+ #include <public/sysctl.h>
+ #include <public/hvm/hvm_vcpu.h>
+-#include <asm/altp2m.h>
+ #include <asm/regs.h>
+ #include <asm/mc146818rtc.h>
+ #include <asm/system.h>
+@@ -2358,12 +2357,6 @@ int domain_relinquish_resources(struct domain *d)
+             vpmu_destroy(v);
+         }
+ 
+-        if ( altp2m_active(d) )
+-        {
+-            for_each_vcpu ( d, v )
+-                altp2m_vcpu_disable_ve(v);
+-        }
+-
+         if ( is_pv_domain(d) )
+         {
+             for_each_vcpu ( d, v )
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index aef2297450e1..a44fcfd95e1e 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -28,6 +28,7 @@
+ #include <xen/domain_page.h>
+ #include <xen/guest_access.h>
+ #include <xen/keyhandler.h>
++#include <asm/altp2m.h>
+ #include <asm/event.h>
+ #include <asm/page.h>
+ #include <asm/current.h>
+@@ -546,24 +547,8 @@ void hap_final_teardown(struct domain *d)
+     unsigned int i;
+ 
+     if ( hvm_altp2m_supported() )
+-    {
+-        d->arch.altp2m_active = 0;
+-
+-        if ( d->arch.altp2m_eptp )
+-        {
+-            free_xenheap_page(d->arch.altp2m_eptp);
+-            d->arch.altp2m_eptp = NULL;
+-        }
+-
+-        if ( d->arch.altp2m_visible_eptp )
+-        {
+-            free_xenheap_page(d->arch.altp2m_visible_eptp);
+-            d->arch.altp2m_visible_eptp = NULL;
+-        }
+-
+         for ( i = 0; i < MAX_ALTP2M; i++ )
+             p2m_teardown(d->arch.altp2m_p2m[i], true);
+-    }
+ 
+     /* Destroy nestedp2m's first */
+     for (i = 0; i < MAX_NESTEDP2M; i++) {
+@@ -578,6 +563,8 @@ void hap_final_teardown(struct domain *d)
+     paging_lock(d);
+     hap_set_allocation(d, 0, NULL);
+     ASSERT(d->arch.paging.hap.p2m_pages == 0);
++    ASSERT(d->arch.paging.hap.free_pages == 0);
++    ASSERT(d->arch.paging.hap.total_pages == 0);
+     paging_unlock(d);
+ }
+ 
+@@ -603,6 +590,7 @@ void hap_vcpu_teardown(struct vcpu *v)
+ void hap_teardown(struct domain *d, bool *preempted)
+ {
+     struct vcpu *v;
++    unsigned int i;
+ 
+     ASSERT(d->is_dying);
+     ASSERT(d != current->domain);
+@@ -611,6 +599,28 @@ void hap_teardown(struct domain *d, bool *preempted)
+     for_each_vcpu ( d, v )
+         hap_vcpu_teardown(v);
+ 
++    /* Leave the root pt in case we get further attempts to modify the p2m. */
++    if ( hvm_altp2m_supported() )
++    {
++        if ( altp2m_active(d) )
++            for_each_vcpu ( d, v )
++                altp2m_vcpu_disable_ve(v);
++
++        d->arch.altp2m_active = 0;
++
++        FREE_XENHEAP_PAGE(d->arch.altp2m_eptp);
++        FREE_XENHEAP_PAGE(d->arch.altp2m_visible_eptp);
++
++        for ( i = 0; i < MAX_ALTP2M; i++ )
++            p2m_teardown(d->arch.altp2m_p2m[i], false);
++    }
++
++    /* Destroy nestedp2m's after altp2m. */
++    for ( i = 0; i < MAX_NESTEDP2M; i++ )
++        p2m_teardown(d->arch.nested_p2m[i], false);
++
++    p2m_teardown(p2m_get_hostp2m(d), false);
++
+     paging_lock(d); /* Keep various asserts happy */
+ 
+     if ( d->arch.paging.hap.total_pages != 0 )
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 366956c146aa..680766fd5170 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2891,8 +2891,17 @@ void shadow_teardown(struct domain *d, bool *preempted)
+     for_each_vcpu ( d, v )
+         shadow_vcpu_teardown(v);
+ 
++    p2m_teardown(p2m_get_hostp2m(d), false);
++
+     paging_lock(d);
+ 
++    /*
++     * Reclaim all shadow memory so that shadow_set_allocation() doesn't find
++     * in-use pages, as _shadow_prealloc() will no longer try to reclaim pages
++     * because the domain is dying.
++     */
++    shadow_blow_tables(d);
++
+ #if (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC))
+     /* Free the virtual-TLB array attached to each vcpu */
+     for_each_vcpu(d, v)
+@@ -3013,6 +3022,9 @@ void shadow_final_teardown(struct domain *d)
+                    d->arch.paging.shadow.total_pages,
+                    d->arch.paging.shadow.free_pages,
+                    d->arch.paging.shadow.p2m_pages);
++    ASSERT(!d->arch.paging.shadow.total_pages);
++    ASSERT(!d->arch.paging.shadow.free_pages);
++    ASSERT(!d->arch.paging.shadow.p2m_pages);
+     paging_unlock(d);
+ }
+ 
+-- 
+2.37.3
+
diff --git a/0050-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch b/0050-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch
new file mode 100644
index 0000000..23e10ba
--- /dev/null
+++ b/0050-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch
@@ -0,0 +1,197 @@
+From b03074bb47d10c9373688b3661c7c31da01c21a3 Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Tue, 11 Oct 2022 15:09:12 +0200
+Subject: [PATCH 50/67] xen/x86: p2m: Add preemption in p2m_teardown()
+
+The list p2m->pages contain all the pages used by the P2M. On large
+instance this can be quite large and the time spent to call
+d->arch.paging.free_page() will take more than 1ms for a 80GB guest
+on a Xen running in nested environment on a c5.metal.
+
+By extrapolation, it would take > 100ms for a 8TB guest (what we
+current security support). So add some preemption in p2m_teardown()
+and propagate to the callers. Note there are 3 places where
+the preemption is not enabled:
+    - hap_final_teardown()/shadow_final_teardown(): We are
+      preventing update the P2M once the domain is dying (so
+      no more pages could be allocated) and most of the P2M pages
+      will be freed in preemptive manneer when relinquishing the
+      resources. So this is fine to disable preemption.
+    - shadow_enable(): This is fine because it will undo the allocation
+      that may have been made by p2m_alloc_table() (so only the root
+      page table).
+
+The preemption is arbitrarily checked every 1024 iterations.
+
+We now need to include <xen/event.h> in p2m-basic in order to
+import the definition for local_events_need_delivery() used by
+general_preempt_check(). Ideally, the inclusion should happen in
+xen/sched.h but it opened a can of worms.
+
+Note that with the current approach, Xen doesn't keep track on whether
+the alt/nested P2Ms have been cleared. So there are some redundant work.
+However, this is not expected to incurr too much overhead (the P2M lock
+shouldn't be contended during teardown). So this is optimization is
+left outside of the security event.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+master commit: 8a2111250b424edc49c65c4d41b276766d30635c
+master date: 2022-10-11 14:24:48 +0200
+---
+ xen/arch/x86/mm/hap/hap.c       | 22 ++++++++++++++++------
+ xen/arch/x86/mm/p2m.c           | 18 +++++++++++++++---
+ xen/arch/x86/mm/shadow/common.c | 12 +++++++++---
+ xen/include/asm-x86/p2m.h       |  2 +-
+ 4 files changed, 41 insertions(+), 13 deletions(-)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index a44fcfd95e1e..1f9a157a0c34 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -548,17 +548,17 @@ void hap_final_teardown(struct domain *d)
+ 
+     if ( hvm_altp2m_supported() )
+         for ( i = 0; i < MAX_ALTP2M; i++ )
+-            p2m_teardown(d->arch.altp2m_p2m[i], true);
++            p2m_teardown(d->arch.altp2m_p2m[i], true, NULL);
+ 
+     /* Destroy nestedp2m's first */
+     for (i = 0; i < MAX_NESTEDP2M; i++) {
+-        p2m_teardown(d->arch.nested_p2m[i], true);
++        p2m_teardown(d->arch.nested_p2m[i], true, NULL);
+     }
+ 
+     if ( d->arch.paging.hap.total_pages != 0 )
+         hap_teardown(d, NULL);
+ 
+-    p2m_teardown(p2m_get_hostp2m(d), true);
++    p2m_teardown(p2m_get_hostp2m(d), true, NULL);
+     /* Free any memory that the p2m teardown released */
+     paging_lock(d);
+     hap_set_allocation(d, 0, NULL);
+@@ -612,14 +612,24 @@ void hap_teardown(struct domain *d, bool *preempted)
+         FREE_XENHEAP_PAGE(d->arch.altp2m_visible_eptp);
+ 
+         for ( i = 0; i < MAX_ALTP2M; i++ )
+-            p2m_teardown(d->arch.altp2m_p2m[i], false);
++        {
++            p2m_teardown(d->arch.altp2m_p2m[i], false, preempted);
++            if ( preempted && *preempted )
++                return;
++        }
+     }
+ 
+     /* Destroy nestedp2m's after altp2m. */
+     for ( i = 0; i < MAX_NESTEDP2M; i++ )
+-        p2m_teardown(d->arch.nested_p2m[i], false);
++    {
++        p2m_teardown(d->arch.nested_p2m[i], false, preempted);
++        if ( preempted && *preempted )
++            return;
++    }
+ 
+-    p2m_teardown(p2m_get_hostp2m(d), false);
++    p2m_teardown(p2m_get_hostp2m(d), false, preempted);
++    if ( preempted && *preempted )
++        return;
+ 
+     paging_lock(d); /* Keep various asserts happy */
+ 
+diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
+index 8ba73082c1bf..107f6778a6e1 100644
+--- a/xen/arch/x86/mm/p2m.c
++++ b/xen/arch/x86/mm/p2m.c
+@@ -741,12 +741,13 @@ int p2m_alloc_table(struct p2m_domain *p2m)
+  * hvm fixme: when adding support for pvh non-hardware domains, this path must
+  * cleanup any foreign p2m types (release refcnts on them).
+  */
+-void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
++void p2m_teardown(struct p2m_domain *p2m, bool remove_root, bool *preempted)
+ /* Return all the p2m pages to Xen.
+  * We know we don't have any extra mappings to these pages */
+ {
+     struct page_info *pg, *root_pg = NULL;
+     struct domain *d;
++    unsigned int i = 0;
+ 
+     if (p2m == NULL)
+         return;
+@@ -765,8 +766,19 @@ void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
+     }
+ 
+     while ( (pg = page_list_remove_head(&p2m->pages)) )
+-        if ( pg != root_pg )
+-            d->arch.paging.free_page(d, pg);
++    {
++        if ( pg == root_pg )
++            continue;
++
++        d->arch.paging.free_page(d, pg);
++
++        /* Arbitrarily check preemption every 1024 iterations */
++        if ( preempted && !(++i % 1024) && general_preempt_check() )
++        {
++            *preempted = true;
++            break;
++        }
++    }
+ 
+     if ( root_pg )
+         page_list_add(root_pg, &p2m->pages);
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 680766fd5170..8f7fddcee1e5 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2837,8 +2837,12 @@ int shadow_enable(struct domain *d, u32 mode)
+  out_locked:
+     paging_unlock(d);
+  out_unlocked:
++    /*
++     * This is fine to ignore the preemption here because only the root
++     * will be allocated by p2m_alloc_table().
++     */
+     if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) )
+-        p2m_teardown(p2m, true);
++        p2m_teardown(p2m, true, NULL);
+     if ( rv != 0 && pg != NULL )
+     {
+         pg->count_info &= ~PGC_count_mask;
+@@ -2891,7 +2895,9 @@ void shadow_teardown(struct domain *d, bool *preempted)
+     for_each_vcpu ( d, v )
+         shadow_vcpu_teardown(v);
+ 
+-    p2m_teardown(p2m_get_hostp2m(d), false);
++    p2m_teardown(p2m_get_hostp2m(d), false, preempted);
++    if ( preempted && *preempted )
++        return;
+ 
+     paging_lock(d);
+ 
+@@ -3012,7 +3018,7 @@ void shadow_final_teardown(struct domain *d)
+         shadow_teardown(d, NULL);
+ 
+     /* It is now safe to pull down the p2m map. */
+-    p2m_teardown(p2m_get_hostp2m(d), true);
++    p2m_teardown(p2m_get_hostp2m(d), true, NULL);
+     /* Free any shadow memory that the p2m teardown released */
+     paging_lock(d);
+     shadow_set_allocation(d, 0, NULL);
+diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
+index 46eb51d44cf5..edbe4cee2717 100644
+--- a/xen/include/asm-x86/p2m.h
++++ b/xen/include/asm-x86/p2m.h
+@@ -619,7 +619,7 @@ int p2m_init(struct domain *d);
+ int p2m_alloc_table(struct p2m_domain *p2m);
+ 
+ /* Return all the p2m resources to Xen. */
+-void p2m_teardown(struct p2m_domain *p2m, bool remove_root);
++void p2m_teardown(struct p2m_domain *p2m, bool remove_root, bool *preempted);
+ void p2m_final_teardown(struct domain *d);
+ 
+ /* Add a page to a domain's p2m table */
+-- 
+2.37.3
+
diff --git a/0051-libxl-docs-Use-arch-specific-default-paging-memory.patch b/0051-libxl-docs-Use-arch-specific-default-paging-memory.patch
new file mode 100644
index 0000000..f3bded4
--- /dev/null
+++ b/0051-libxl-docs-Use-arch-specific-default-paging-memory.patch
@@ -0,0 +1,147 @@
+From 0c0680d6e7953ca4c91699e60060c732f9ead5c1 Mon Sep 17 00:00:00 2001
+From: Henry Wang <Henry.Wang@arm.com>
+Date: Tue, 11 Oct 2022 15:09:32 +0200
+Subject: [PATCH 51/67] libxl, docs: Use arch-specific default paging memory
+
+The default paging memory (descibed in `shadow_memory` entry in xl
+config) in libxl is used to determine the memory pool size for xl
+guests. Currently this size is only used for x86, and contains a part
+of RAM to shadow the resident processes. Since on Arm there is no
+shadow mode guests, so the part of RAM to shadow the resident processes
+is not necessary. Therefore, this commit splits the function
+`libxl_get_required_shadow_memory()` to arch specific helpers and
+renamed the helper to `libxl__arch_get_required_paging_memory()`.
+
+On x86, this helper calls the original value from
+`libxl_get_required_shadow_memory()` so no functional change intended.
+
+On Arm, this helper returns 1MB per vcpu plus 4KB per MiB of RAM
+for the P2M map and additional 512KB.
+
+Also update the xl.cfg documentation to add Arm documentation
+according to code changes and correct the comment style following Xen
+coding style.
+
+This is part of CVE-2022-33747 / XSA-409.
+
+Suggested-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+master commit: 156a239ea288972425f967ac807b3cb5b5e14874
+master date: 2022-10-11 14:28:37 +0200
+---
+ docs/man/xl.cfg.5.pod.in       |  5 +++++
+ tools/libs/light/libxl_arch.h  |  4 ++++
+ tools/libs/light/libxl_arm.c   | 12 ++++++++++++
+ tools/libs/light/libxl_utils.c |  9 ++-------
+ tools/libs/light/libxl_x86.c   | 13 +++++++++++++
+ 5 files changed, 36 insertions(+), 7 deletions(-)
+
+diff --git a/docs/man/xl.cfg.5.pod.in b/docs/man/xl.cfg.5.pod.in
+index 56370a37dbb1..af7fae7c52f9 100644
+--- a/docs/man/xl.cfg.5.pod.in
++++ b/docs/man/xl.cfg.5.pod.in
+@@ -1746,6 +1746,11 @@ are not using hardware assisted paging (i.e. you are using shadow
+ mode) and your guest workload consists of a very large number of
+ similar processes then increasing this value may improve performance.
+ 
++On Arm, this field is used to determine the size of the guest P2M pages
++pool, and the default value is 1MB per vCPU plus 4KB per MB of RAM for
++the P2M map. Users should adjust this value if bigger P2M pool size is
++needed.
++
+ =back
+ 
+ =head3 Processor and Platform Features
+diff --git a/tools/libs/light/libxl_arch.h b/tools/libs/light/libxl_arch.h
+index 8527fc5c6c23..6741b7f6f457 100644
+--- a/tools/libs/light/libxl_arch.h
++++ b/tools/libs/light/libxl_arch.h
+@@ -90,6 +90,10 @@ void libxl__arch_update_domain_config(libxl__gc *gc,
+                                       libxl_domain_config *dst,
+                                       const libxl_domain_config *src);
+ 
++_hidden
++unsigned long libxl__arch_get_required_paging_memory(unsigned long maxmem_kb,
++                                                     unsigned int smp_cpus);
++
+ #if defined(__i386__) || defined(__x86_64__)
+ 
+ #define LAPIC_BASE_ADDRESS  0xfee00000
+diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
+index e2901f13b724..d59b464192c2 100644
+--- a/tools/libs/light/libxl_arm.c
++++ b/tools/libs/light/libxl_arm.c
+@@ -154,6 +154,18 @@ out:
+     return rc;
+ }
+ 
++unsigned long libxl__arch_get_required_paging_memory(unsigned long maxmem_kb,
++                                                     unsigned int smp_cpus)
++{
++    /*
++     * 256 pages (1MB) per vcpu,
++     * plus 1 page per MiB of RAM for the P2M map,
++     * This is higher than the minimum that Xen would allocate if no value
++     * were given (but the Xen minimum is for safety, not performance).
++     */
++    return 4 * (256 * smp_cpus + maxmem_kb / 1024);
++}
++
+ static struct arch_info {
+     const char *guest_type;
+     const char *timer_compat;
+diff --git a/tools/libs/light/libxl_utils.c b/tools/libs/light/libxl_utils.c
+index 4699c4a0a36f..e276c0ee9cc3 100644
+--- a/tools/libs/light/libxl_utils.c
++++ b/tools/libs/light/libxl_utils.c
+@@ -18,6 +18,7 @@
+ #include <ctype.h>
+ 
+ #include "libxl_internal.h"
++#include "libxl_arch.h"
+ #include "_paths.h"
+ 
+ #ifndef LIBXL_HAVE_NONCONST_LIBXL_BASENAME_RETURN_VALUE
+@@ -39,13 +40,7 @@ char *libxl_basename(const char *name)
+ 
+ unsigned long libxl_get_required_shadow_memory(unsigned long maxmem_kb, unsigned int smp_cpus)
+ {
+-    /* 256 pages (1MB) per vcpu,
+-       plus 1 page per MiB of RAM for the P2M map,
+-       plus 1 page per MiB of RAM to shadow the resident processes.
+-       This is higher than the minimum that Xen would allocate if no value
+-       were given (but the Xen minimum is for safety, not performance).
+-     */
+-    return 4 * (256 * smp_cpus + 2 * (maxmem_kb / 1024));
++    return libxl__arch_get_required_paging_memory(maxmem_kb, smp_cpus);
+ }
+ 
+ char *libxl_domid_to_name(libxl_ctx *ctx, uint32_t domid)
+diff --git a/tools/libs/light/libxl_x86.c b/tools/libs/light/libxl_x86.c
+index 18c3c77ccde3..4d66478fe9dd 100644
+--- a/tools/libs/light/libxl_x86.c
++++ b/tools/libs/light/libxl_x86.c
+@@ -882,6 +882,19 @@ void libxl__arch_update_domain_config(libxl__gc *gc,
+                     libxl_defbool_val(src->b_info.arch_x86.msr_relaxed));
+ }
+ 
++unsigned long libxl__arch_get_required_paging_memory(unsigned long maxmem_kb,
++                                                     unsigned int smp_cpus)
++{
++    /*
++     * 256 pages (1MB) per vcpu,
++     * plus 1 page per MiB of RAM for the P2M map,
++     * plus 1 page per MiB of RAM to shadow the resident processes.
++     * This is higher than the minimum that Xen would allocate if no value
++     * were given (but the Xen minimum is for safety, not performance).
++     */
++    return 4 * (256 * smp_cpus + 2 * (maxmem_kb / 1024));
++}
++
+ /*
+  * Local variables:
+  * mode: C
+-- 
+2.37.3
+
diff --git a/0052-xen-arm-Construct-the-P2M-pages-pool-for-guests.patch b/0052-xen-arm-Construct-the-P2M-pages-pool-for-guests.patch
new file mode 100644
index 0000000..77093a7
--- /dev/null
+++ b/0052-xen-arm-Construct-the-P2M-pages-pool-for-guests.patch
@@ -0,0 +1,189 @@
+From 45336d8f88725aec65ee177b1b09abf6eef1dc8d Mon Sep 17 00:00:00 2001
+From: Henry Wang <Henry.Wang@arm.com>
+Date: Tue, 11 Oct 2022 15:09:58 +0200
+Subject: [PATCH 52/67] xen/arm: Construct the P2M pages pool for guests
+
+This commit constructs the p2m pages pool for guests from the
+data structure and helper perspective.
+
+This is implemented by:
+
+- Adding a `struct paging_domain` which contains a freelist, a
+counter variable and a spinlock to `struct arch_domain` to
+indicate the free p2m pages and the number of p2m total pages in
+the p2m pages pool.
+
+- Adding a helper `p2m_get_allocation` to get the p2m pool size.
+
+- Adding a helper `p2m_set_allocation` to set the p2m pages pool
+size. This helper should be called before allocating memory for
+a guest.
+
+- Adding a helper `p2m_teardown_allocation` to free the p2m pages
+pool. This helper should be called during the xl domain destory.
+
+This is part of CVE-2022-33747 / XSA-409.
+
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+master commit: 55914f7fc91a468649b8a3ec3f53ae1c4aca6670
+master date: 2022-10-11 14:28:39 +0200
+---
+ xen/arch/arm/p2m.c           | 88 ++++++++++++++++++++++++++++++++++++
+ xen/include/asm-arm/domain.h | 10 ++++
+ xen/include/asm-arm/p2m.h    |  4 ++
+ 3 files changed, 102 insertions(+)
+
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index 4ad3e0606e9c..6883d8627702 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -50,6 +50,92 @@ static uint64_t generate_vttbr(uint16_t vmid, mfn_t root_mfn)
+     return (mfn_to_maddr(root_mfn) | ((uint64_t)vmid << 48));
+ }
+ 
++/* Return the size of the pool, rounded up to the nearest MB */
++unsigned int p2m_get_allocation(struct domain *d)
++{
++    unsigned long nr_pages = ACCESS_ONCE(d->arch.paging.p2m_total_pages);
++
++    return ROUNDUP(nr_pages, 1 << (20 - PAGE_SHIFT)) >> (20 - PAGE_SHIFT);
++}
++
++/*
++ * Set the pool of pages to the required number of pages.
++ * Returns 0 for success, non-zero for failure.
++ * Call with d->arch.paging.lock held.
++ */
++int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted)
++{
++    struct page_info *pg;
++
++    ASSERT(spin_is_locked(&d->arch.paging.lock));
++
++    for ( ; ; )
++    {
++        if ( d->arch.paging.p2m_total_pages < pages )
++        {
++            /* Need to allocate more memory from domheap */
++            pg = alloc_domheap_page(NULL, 0);
++            if ( pg == NULL )
++            {
++                printk(XENLOG_ERR "Failed to allocate P2M pages.\n");
++                return -ENOMEM;
++            }
++            ACCESS_ONCE(d->arch.paging.p2m_total_pages) =
++                d->arch.paging.p2m_total_pages + 1;
++            page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
++        }
++        else if ( d->arch.paging.p2m_total_pages > pages )
++        {
++            /* Need to return memory to domheap */
++            pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
++            if( pg )
++            {
++                ACCESS_ONCE(d->arch.paging.p2m_total_pages) =
++                    d->arch.paging.p2m_total_pages - 1;
++                free_domheap_page(pg);
++            }
++            else
++            {
++                printk(XENLOG_ERR
++                       "Failed to free P2M pages, P2M freelist is empty.\n");
++                return -ENOMEM;
++            }
++        }
++        else
++            break;
++
++        /* Check to see if we need to yield and try again */
++        if ( preempted && general_preempt_check() )
++        {
++            *preempted = true;
++            return -ERESTART;
++        }
++    }
++
++    return 0;
++}
++
++int p2m_teardown_allocation(struct domain *d)
++{
++    int ret = 0;
++    bool preempted = false;
++
++    spin_lock(&d->arch.paging.lock);
++    if ( d->arch.paging.p2m_total_pages != 0 )
++    {
++        ret = p2m_set_allocation(d, 0, &preempted);
++        if ( preempted )
++        {
++            spin_unlock(&d->arch.paging.lock);
++            return -ERESTART;
++        }
++        ASSERT(d->arch.paging.p2m_total_pages == 0);
++    }
++    spin_unlock(&d->arch.paging.lock);
++
++    return ret;
++}
++
+ /* Unlock the flush and do a P2M TLB flush if necessary */
+ void p2m_write_unlock(struct p2m_domain *p2m)
+ {
+@@ -1602,7 +1688,9 @@ int p2m_init(struct domain *d)
+     unsigned int cpu;
+ 
+     rwlock_init(&p2m->lock);
++    spin_lock_init(&d->arch.paging.lock);
+     INIT_PAGE_LIST_HEAD(&p2m->pages);
++    INIT_PAGE_LIST_HEAD(&d->arch.paging.p2m_freelist);
+ 
+     p2m->vmid = INVALID_VMID;
+ 
+diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
+index bb0a6adbe00b..1d8935778f3b 100644
+--- a/xen/include/asm-arm/domain.h
++++ b/xen/include/asm-arm/domain.h
+@@ -40,6 +40,14 @@ struct vtimer {
+     uint64_t cval;
+ };
+ 
++struct paging_domain {
++    spinlock_t lock;
++    /* Free P2M pages from the pre-allocated P2M pool */
++    struct page_list_head p2m_freelist;
++    /* Number of pages from the pre-allocated P2M pool */
++    unsigned long p2m_total_pages;
++};
++
+ struct arch_domain
+ {
+ #ifdef CONFIG_ARM_64
+@@ -51,6 +59,8 @@ struct arch_domain
+ 
+     struct hvm_domain hvm;
+ 
++    struct paging_domain paging;
++
+     struct vmmio vmmio;
+ 
+     /* Continuable domain_relinquish_resources(). */
+diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
+index 3a2d51b35d71..18675b234570 100644
+--- a/xen/include/asm-arm/p2m.h
++++ b/xen/include/asm-arm/p2m.h
+@@ -218,6 +218,10 @@ void p2m_restore_state(struct vcpu *n);
+ /* Print debugging/statistial info about a domain's p2m */
+ void p2m_dump_info(struct domain *d);
+ 
++unsigned int p2m_get_allocation(struct domain *d);
++int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted);
++int p2m_teardown_allocation(struct domain *d);
++
+ static inline void p2m_write_lock(struct p2m_domain *p2m)
+ {
+     write_lock(&p2m->lock);
+-- 
+2.37.3
+
diff --git a/0053-xen-arm-libxl-Implement-XEN_DOMCTL_shadow_op-for-Arm.patch b/0053-xen-arm-libxl-Implement-XEN_DOMCTL_shadow_op-for-Arm.patch
new file mode 100644
index 0000000..52ce67c
--- /dev/null
+++ b/0053-xen-arm-libxl-Implement-XEN_DOMCTL_shadow_op-for-Arm.patch
@@ -0,0 +1,108 @@
+From c5215044578e88b401a1296ed6302df05c113c5f Mon Sep 17 00:00:00 2001
+From: Henry Wang <Henry.Wang@arm.com>
+Date: Tue, 11 Oct 2022 15:10:16 +0200
+Subject: [PATCH 53/67] xen/arm, libxl: Implement XEN_DOMCTL_shadow_op for Arm
+
+This commit implements the `XEN_DOMCTL_shadow_op` support in Xen
+for Arm. The p2m pages pool size for xl guests is supposed to be
+determined by `XEN_DOMCTL_shadow_op`. Hence, this commit:
+
+- Introduces a function `p2m_domctl` and implements the subops
+`XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION` and
+`XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION` of `XEN_DOMCTL_shadow_op`.
+
+- Adds the `XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION` support in libxl.
+
+Therefore enabling the setting of shadow memory pool size
+when creating a guest from xl and getting shadow memory pool size
+from Xen.
+
+Note that the `XEN_DOMCTL_shadow_op` added in this commit is only
+a dummy op, and the functionality of setting/getting p2m memory pool
+size for xl guests will be added in following commits.
+
+This is part of CVE-2022-33747 / XSA-409.
+
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+master commit: cf2a68d2ffbc3ce95e01449d46180bddb10d24a0
+master date: 2022-10-11 14:28:42 +0200
+---
+ tools/libs/light/libxl_arm.c | 12 ++++++++++++
+ xen/arch/arm/domctl.c        | 32 ++++++++++++++++++++++++++++++++
+ 2 files changed, 44 insertions(+)
+
+diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
+index d59b464192c2..d21f614ed788 100644
+--- a/tools/libs/light/libxl_arm.c
++++ b/tools/libs/light/libxl_arm.c
+@@ -131,6 +131,18 @@ int libxl__arch_domain_create(libxl__gc *gc,
+                               libxl__domain_build_state *state,
+                               uint32_t domid)
+ {
++    libxl_ctx *ctx = libxl__gc_owner(gc);
++    unsigned int shadow_mb = DIV_ROUNDUP(d_config->b_info.shadow_memkb, 1024);
++
++    int r = xc_shadow_control(ctx->xch, domid,
++                              XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION,
++                              &shadow_mb, 0);
++    if (r) {
++        LOGED(ERROR, domid,
++              "Failed to set %u MiB shadow allocation", shadow_mb);
++        return ERROR_FAIL;
++    }
++
+     return 0;
+ }
+ 
+diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
+index a8c48b0beaab..a049bc7f3e52 100644
+--- a/xen/arch/arm/domctl.c
++++ b/xen/arch/arm/domctl.c
+@@ -45,11 +45,43 @@ static int handle_vuart_init(struct domain *d,
+     return rc;
+ }
+ 
++static long p2m_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
++                       XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
++{
++    if ( unlikely(d == current->domain) )
++    {
++        printk(XENLOG_ERR "Tried to do a p2m domctl op on itself.\n");
++        return -EINVAL;
++    }
++
++    if ( unlikely(d->is_dying) )
++    {
++        printk(XENLOG_ERR "Tried to do a p2m domctl op on dying domain %u\n",
++               d->domain_id);
++        return -EINVAL;
++    }
++
++    switch ( sc->op )
++    {
++    case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
++        return 0;
++    case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
++        return 0;
++    default:
++    {
++        printk(XENLOG_ERR "Bad p2m domctl op %u\n", sc->op);
++        return -EINVAL;
++    }
++    }
++}
++
+ long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
+                     XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+ {
+     switch ( domctl->cmd )
+     {
++    case XEN_DOMCTL_shadow_op:
++        return p2m_domctl(d, &domctl->u.shadow_op, u_domctl);
+     case XEN_DOMCTL_cacheflush:
+     {
+         gfn_t s = _gfn(domctl->u.cacheflush.start_pfn);
+-- 
+2.37.3
+
diff --git a/0054-xen-arm-Allocate-and-free-P2M-pages-from-the-P2M-poo.patch b/0054-xen-arm-Allocate-and-free-P2M-pages-from-the-P2M-poo.patch
new file mode 100644
index 0000000..3ef7019
--- /dev/null
+++ b/0054-xen-arm-Allocate-and-free-P2M-pages-from-the-P2M-poo.patch
@@ -0,0 +1,289 @@
+From 7ad38a39f08aadc1578bdb46ccabaad79ed0faee Mon Sep 17 00:00:00 2001
+From: Henry Wang <Henry.Wang@arm.com>
+Date: Tue, 11 Oct 2022 15:10:34 +0200
+Subject: [PATCH 54/67] xen/arm: Allocate and free P2M pages from the P2M pool
+
+This commit sets/tearsdown of p2m pages pool for non-privileged Arm
+guests by calling `p2m_set_allocation` and `p2m_teardown_allocation`.
+
+- For dom0, P2M pages should come from heap directly instead of p2m
+pool, so that the kernel may take advantage of the extended regions.
+
+- For xl guests, the setting of the p2m pool is called in
+`XEN_DOMCTL_shadow_op` and the p2m pool is destroyed in
+`domain_relinquish_resources`. Note that domctl->u.shadow_op.mb is
+updated with the new size when setting the p2m pool.
+
+- For dom0less domUs, the setting of the p2m pool is called before
+allocating memory during domain creation. Users can specify the p2m
+pool size by `xen,domain-p2m-mem-mb` dts property.
+
+To actually allocate/free pages from the p2m pool, this commit adds
+two helper functions namely `p2m_alloc_page` and `p2m_free_page` to
+`struct p2m_domain`. By replacing the `alloc_domheap_page` and
+`free_domheap_page` with these two helper functions, p2m pages can
+be added/removed from the list of p2m pool rather than from the heap.
+
+Since page from `p2m_alloc_page` is cleaned, take the opportunity
+to remove the redundant `clean_page` in `p2m_create_table`.
+
+This is part of CVE-2022-33747 / XSA-409.
+
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+master commit: cbea5a1149ca7fd4b7cdbfa3ec2e4f109b601ff7
+master date: 2022-10-11 14:28:44 +0200
+---
+ docs/misc/arm/device-tree/booting.txt |  8 ++++
+ xen/arch/arm/domain.c                 |  6 +++
+ xen/arch/arm/domain_build.c           | 29 ++++++++++++++
+ xen/arch/arm/domctl.c                 | 23 ++++++++++-
+ xen/arch/arm/p2m.c                    | 57 +++++++++++++++++++++++++--
+ 5 files changed, 118 insertions(+), 5 deletions(-)
+
+diff --git a/docs/misc/arm/device-tree/booting.txt b/docs/misc/arm/device-tree/booting.txt
+index 5243bc7fd344..470c9491a781 100644
+--- a/docs/misc/arm/device-tree/booting.txt
++++ b/docs/misc/arm/device-tree/booting.txt
+@@ -164,6 +164,14 @@ with the following properties:
+     Both #address-cells and #size-cells need to be specified because
+     both sub-nodes (described shortly) have reg properties.
+ 
++- xen,domain-p2m-mem-mb
++
++    Optional. A 32-bit integer specifying the amount of megabytes of RAM
++    used for the domain P2M pool. This is in-sync with the shadow_memory
++    option in xl.cfg. Leaving this field empty in device tree will lead to
++    the default size of domain P2M pool, i.e. 1MB per guest vCPU plus 4KB
++    per MB of guest RAM plus 512KB for guest extended regions.
++
+ Under the "xen,domain" compatible node, one or more sub-nodes are present
+ for the DomU kernel and ramdisk.
+ 
+diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
+index 223ec9694df1..a5ffd952ecd0 100644
+--- a/xen/arch/arm/domain.c
++++ b/xen/arch/arm/domain.c
+@@ -985,6 +985,7 @@ enum {
+     PROG_page,
+     PROG_mapping,
+     PROG_p2m,
++    PROG_p2m_pool,
+     PROG_done,
+ };
+ 
+@@ -1044,6 +1045,11 @@ int domain_relinquish_resources(struct domain *d)
+         if ( ret )
+             return ret;
+ 
++    PROGRESS(p2m_pool):
++        ret = p2m_teardown_allocation(d);
++        if( ret )
++            return ret;
++
+     PROGRESS(done):
+         break;
+ 
+diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
+index 26c13429488d..df0ec84f034c 100644
+--- a/xen/arch/arm/domain_build.c
++++ b/xen/arch/arm/domain_build.c
+@@ -2333,6 +2333,21 @@ static void __init find_gnttab_region(struct domain *d,
+            kinfo->gnttab_start, kinfo->gnttab_start + kinfo->gnttab_size);
+ }
+ 
++static unsigned long __init domain_p2m_pages(unsigned long maxmem_kb,
++                                             unsigned int smp_cpus)
++{
++    /*
++     * Keep in sync with libxl__get_required_paging_memory().
++     * 256 pages (1MB) per vcpu, plus 1 page per MiB of RAM for the P2M map,
++     * plus 128 pages to cover extended regions.
++     */
++    unsigned long memkb = 4 * (256 * smp_cpus + (maxmem_kb / 1024) + 128);
++
++    BUILD_BUG_ON(PAGE_SIZE != SZ_4K);
++
++    return DIV_ROUND_UP(memkb, 1024) << (20 - PAGE_SHIFT);
++}
++
+ static int __init construct_domain(struct domain *d, struct kernel_info *kinfo)
+ {
+     unsigned int i;
+@@ -2424,6 +2439,8 @@ static int __init construct_domU(struct domain *d,
+     struct kernel_info kinfo = {};
+     int rc;
+     u64 mem;
++    u32 p2m_mem_mb;
++    unsigned long p2m_pages;
+ 
+     rc = dt_property_read_u64(node, "memory", &mem);
+     if ( !rc )
+@@ -2433,6 +2450,18 @@ static int __init construct_domU(struct domain *d,
+     }
+     kinfo.unassigned_mem = (paddr_t)mem * SZ_1K;
+ 
++    rc = dt_property_read_u32(node, "xen,domain-p2m-mem-mb", &p2m_mem_mb);
++    /* If xen,domain-p2m-mem-mb is not specified, use the default value. */
++    p2m_pages = rc ?
++                p2m_mem_mb << (20 - PAGE_SHIFT) :
++                domain_p2m_pages(mem, d->max_vcpus);
++
++    spin_lock(&d->arch.paging.lock);
++    rc = p2m_set_allocation(d, p2m_pages, NULL);
++    spin_unlock(&d->arch.paging.lock);
++    if ( rc != 0 )
++        return rc;
++
+     printk("*** LOADING DOMU cpus=%u memory=%"PRIx64"KB ***\n", d->max_vcpus, mem);
+ 
+     kinfo.vpl011 = dt_property_read_bool(node, "vpl011");
+diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
+index a049bc7f3e52..4ab5ed4ab24d 100644
+--- a/xen/arch/arm/domctl.c
++++ b/xen/arch/arm/domctl.c
+@@ -48,6 +48,9 @@ static int handle_vuart_init(struct domain *d,
+ static long p2m_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
+                        XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+ {
++    long rc;
++    bool preempted = false;
++
+     if ( unlikely(d == current->domain) )
+     {
+         printk(XENLOG_ERR "Tried to do a p2m domctl op on itself.\n");
+@@ -64,9 +67,27 @@ static long p2m_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
+     switch ( sc->op )
+     {
+     case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
+-        return 0;
++    {
++        /* Allow and handle preemption */
++        spin_lock(&d->arch.paging.lock);
++        rc = p2m_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
++        spin_unlock(&d->arch.paging.lock);
++
++        if ( preempted )
++            /* Not finished. Set up to re-run the call. */
++            rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h",
++                                               u_domctl);
++        else
++            /* Finished. Return the new allocation. */
++            sc->mb = p2m_get_allocation(d);
++
++        return rc;
++    }
+     case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
++    {
++        sc->mb = p2m_get_allocation(d);
+         return 0;
++    }
+     default:
+     {
+         printk(XENLOG_ERR "Bad p2m domctl op %u\n", sc->op);
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index 6883d8627702..c1055ff2a745 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -50,6 +50,54 @@ static uint64_t generate_vttbr(uint16_t vmid, mfn_t root_mfn)
+     return (mfn_to_maddr(root_mfn) | ((uint64_t)vmid << 48));
+ }
+ 
++static struct page_info *p2m_alloc_page(struct domain *d)
++{
++    struct page_info *pg;
++
++    spin_lock(&d->arch.paging.lock);
++    /*
++     * For hardware domain, there should be no limit in the number of pages that
++     * can be allocated, so that the kernel may take advantage of the extended
++     * regions. Hence, allocate p2m pages for hardware domains from heap.
++     */
++    if ( is_hardware_domain(d) )
++    {
++        pg = alloc_domheap_page(NULL, 0);
++        if ( pg == NULL )
++        {
++            printk(XENLOG_G_ERR "Failed to allocate P2M pages for hwdom.\n");
++            spin_unlock(&d->arch.paging.lock);
++            return NULL;
++        }
++    }
++    else
++    {
++        pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
++        if ( unlikely(!pg) )
++        {
++            spin_unlock(&d->arch.paging.lock);
++            return NULL;
++        }
++        d->arch.paging.p2m_total_pages--;
++    }
++    spin_unlock(&d->arch.paging.lock);
++
++    return pg;
++}
++
++static void p2m_free_page(struct domain *d, struct page_info *pg)
++{
++    spin_lock(&d->arch.paging.lock);
++    if ( is_hardware_domain(d) )
++        free_domheap_page(pg);
++    else
++    {
++        d->arch.paging.p2m_total_pages++;
++        page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
++    }
++    spin_unlock(&d->arch.paging.lock);
++}
++
+ /* Return the size of the pool, rounded up to the nearest MB */
+ unsigned int p2m_get_allocation(struct domain *d)
+ {
+@@ -751,7 +799,7 @@ static int p2m_create_table(struct p2m_domain *p2m, lpae_t *entry)
+ 
+     ASSERT(!p2m_is_valid(*entry));
+ 
+-    page = alloc_domheap_page(NULL, 0);
++    page = p2m_alloc_page(p2m->domain);
+     if ( page == NULL )
+         return -ENOMEM;
+ 
+@@ -878,7 +926,7 @@ static void p2m_free_entry(struct p2m_domain *p2m,
+     pg = mfn_to_page(mfn);
+ 
+     page_list_del(pg, &p2m->pages);
+-    free_domheap_page(pg);
++    p2m_free_page(p2m->domain, pg);
+ }
+ 
+ static bool p2m_split_superpage(struct p2m_domain *p2m, lpae_t *entry,
+@@ -902,7 +950,7 @@ static bool p2m_split_superpage(struct p2m_domain *p2m, lpae_t *entry,
+     ASSERT(level < target);
+     ASSERT(p2m_is_superpage(*entry, level));
+ 
+-    page = alloc_domheap_page(NULL, 0);
++    page = p2m_alloc_page(p2m->domain);
+     if ( !page )
+         return false;
+ 
+@@ -1644,7 +1692,7 @@ int p2m_teardown(struct domain *d)
+ 
+     while ( (pg = page_list_remove_head(&p2m->pages)) )
+     {
+-        free_domheap_page(pg);
++        p2m_free_page(p2m->domain, pg);
+         count++;
+         /* Arbitrarily preempt every 512 iterations */
+         if ( !(count % 512) && hypercall_preempt_check() )
+@@ -1668,6 +1716,7 @@ void p2m_final_teardown(struct domain *d)
+         return;
+ 
+     ASSERT(page_list_empty(&p2m->pages));
++    ASSERT(page_list_empty(&d->arch.paging.p2m_freelist));
+ 
+     if ( p2m->root )
+         free_domheap_pages(p2m->root, P2M_ROOT_ORDER);
+-- 
+2.37.3
+
diff --git a/0055-gnttab-correct-locking-on-transitive-grant-copy-erro.patch b/0055-gnttab-correct-locking-on-transitive-grant-copy-erro.patch
new file mode 100644
index 0000000..be83ce5
--- /dev/null
+++ b/0055-gnttab-correct-locking-on-transitive-grant-copy-erro.patch
@@ -0,0 +1,66 @@
+From bb43a10fefe494ab747b020fef3e823b63fc566d Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 15:11:01 +0200
+Subject: [PATCH 55/67] gnttab: correct locking on transitive grant copy error
+ path
+
+While the comment next to the lock dropping in preparation of
+recursively calling acquire_grant_for_copy() mistakenly talks about the
+rd == td case (excluded a few lines further up), the same concerns apply
+to the calling of release_grant_for_copy() on a subsequent error path.
+
+This is CVE-2022-33748 / XSA-411.
+
+Fixes: ad48fb963dbf ("gnttab: fix transitive grant handling")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+master commit: 6e3aab858eef614a21a782a3b73acc88e74690ea
+master date: 2022-10-11 14:29:30 +0200
+---
+ xen/common/grant_table.c | 19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
+index 77bba9806937..0523beb9b734 100644
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -2608,9 +2608,8 @@ acquire_grant_for_copy(
+                      trans_domid);
+ 
+         /*
+-         * acquire_grant_for_copy() could take the lock on the
+-         * remote table (if rd == td), so we have to drop the lock
+-         * here and reacquire.
++         * acquire_grant_for_copy() will take the lock on the remote table,
++         * so we have to drop the lock here and reacquire.
+          */
+         active_entry_release(act);
+         grant_read_unlock(rgt);
+@@ -2647,11 +2646,25 @@ acquire_grant_for_copy(
+                           act->trans_gref != trans_gref ||
+                           !act->is_sub_page)) )
+         {
++            /*
++             * Like above for acquire_grant_for_copy() we need to drop and then
++             * re-acquire the locks here to prevent lock order inversion issues.
++             * Unlike for acquire_grant_for_copy() we don't need to re-check
++             * anything, as release_grant_for_copy() doesn't depend on the grant
++             * table entry: It only updates internal state and the status flags.
++             */
++            active_entry_release(act);
++            grant_read_unlock(rgt);
++
+             release_grant_for_copy(td, trans_gref, readonly);
+             rcu_unlock_domain(td);
++
++            grant_read_lock(rgt);
++            act = active_entry_acquire(rgt, gref);
+             reduce_status_for_pin(rd, act, status, readonly);
+             active_entry_release(act);
+             grant_read_unlock(rgt);
++
+             put_page(*page);
+             *page = NULL;
+             return ERESTART;
+-- 
+2.37.3
+
diff --git a/0056-tools-libxl-Replace-deprecated-soundhw-on-QEMU-comma.patch b/0056-tools-libxl-Replace-deprecated-soundhw-on-QEMU-comma.patch
new file mode 100644
index 0000000..c5d2c9c
--- /dev/null
+++ b/0056-tools-libxl-Replace-deprecated-soundhw-on-QEMU-comma.patch
@@ -0,0 +1,112 @@
+From d65ebacb78901b695bc5e8a075ad1ad865a78928 Mon Sep 17 00:00:00 2001
+From: Anthony PERARD <anthony.perard@citrix.com>
+Date: Tue, 11 Oct 2022 15:13:15 +0200
+Subject: [PATCH 56/67] tools/libxl: Replace deprecated -soundhw on QEMU
+ command line
+
+-soundhw is deprecated since 825ff02911c9 ("audio: add soundhw
+deprecation notice"), QEMU v5.1, and is been remove for upcoming v7.1
+by 039a68373c45 ("introduce -audio as a replacement for -soundhw").
+
+Instead we can just add the sound card with "-device", for most option
+that "-soundhw" could handle. "-device" is an option that existed
+before QEMU 1.0, and could already be used to add audio hardware.
+
+The list of possible option for libxl's "soundhw" is taken the list
+from QEMU 7.0.
+
+The list of options for "soundhw" are listed in order of preference in
+the manual. The first three (hda, ac97, es1370) are PCI devices and
+easy to test on Linux, and the last four are ISA devices which doesn't
+seems to work out of the box on linux.
+
+The sound card 'pcspk' isn't listed even if it used to be accepted by
+'-soundhw' because QEMU crash when trying to add it to a Xen domain.
+Also, it wouldn't work with "-device" might need to be "-machine
+pcspk-audiodev=default" instead.
+
+Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
+Reviewed-by: Jason Andryuk <jandryuk@gmail.com>
+master commit: 62ca138c2c052187783aca3957d3f47c4dcfd683
+master date: 2022-08-18 09:25:50 +0200
+---
+ docs/man/xl.cfg.5.pod.in                  |  6 +++---
+ tools/libs/light/libxl_dm.c               | 19 ++++++++++++++++++-
+ tools/libs/light/libxl_types_internal.idl | 10 ++++++++++
+ 3 files changed, 31 insertions(+), 4 deletions(-)
+
+diff --git a/docs/man/xl.cfg.5.pod.in b/docs/man/xl.cfg.5.pod.in
+index af7fae7c52f9..ef9505f91341 100644
+--- a/docs/man/xl.cfg.5.pod.in
++++ b/docs/man/xl.cfg.5.pod.in
+@@ -2523,9 +2523,9 @@ The form serial=DEVICE is also accepted for backwards compatibility.
+ 
+ =item B<soundhw="DEVICE">
+ 
+-Select the virtual sound card to expose to the guest. The valid
+-devices are defined by the device model configuration, please see the
+-B<qemu(1)> manpage for details. The default is not to export any sound
++Select the virtual sound card to expose to the guest. The valid devices are
++B<hda>, B<ac97>, B<es1370>, B<adlib>, B<cs4231a>, B<gus>, B<sb16> if there are
++available with the device model QEMU. The default is not to export any sound
+ device.
+ 
+ =item B<vkb_device=BOOLEAN>
+diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c
+index ae5f35e0c3fd..b86e8ccc858f 100644
+--- a/tools/libs/light/libxl_dm.c
++++ b/tools/libs/light/libxl_dm.c
+@@ -1204,6 +1204,7 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
+     uint64_t ram_size;
+     const char *path, *chardev;
+     bool is_stubdom = libxl_defbool_val(b_info->device_model_stubdomain);
++    int rc;
+ 
+     dm_args = flexarray_make(gc, 16, 1);
+     dm_envs = flexarray_make(gc, 16, 1);
+@@ -1531,7 +1532,23 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
+             }
+         }
+         if (b_info->u.hvm.soundhw) {
+-            flexarray_vappend(dm_args, "-soundhw", b_info->u.hvm.soundhw, NULL);
++            libxl__qemu_soundhw soundhw;
++
++            rc = libxl__qemu_soundhw_from_string(b_info->u.hvm.soundhw, &soundhw);
++            if (rc) {
++                LOGD(ERROR, guest_domid, "Unknown soundhw option '%s'", b_info->u.hvm.soundhw);
++                return ERROR_INVAL;
++            }
++
++            switch (soundhw) {
++            case LIBXL__QEMU_SOUNDHW_HDA:
++                flexarray_vappend(dm_args, "-device", "intel-hda",
++                                  "-device", "hda-duplex", NULL);
++                break;
++            default:
++                flexarray_append_pair(dm_args, "-device",
++                                      (char*)libxl__qemu_soundhw_to_string(soundhw));
++            }
+         }
+         if (!libxl__acpi_defbool_val(b_info)) {
+             flexarray_append(dm_args, "-no-acpi");
+diff --git a/tools/libs/light/libxl_types_internal.idl b/tools/libs/light/libxl_types_internal.idl
+index 3593e21dbb64..caa08d3229cd 100644
+--- a/tools/libs/light/libxl_types_internal.idl
++++ b/tools/libs/light/libxl_types_internal.idl
+@@ -55,3 +55,13 @@ libxl__device_action = Enumeration("device_action", [
+     (1, "ADD"),
+     (2, "REMOVE"),
+     ])
++
++libxl__qemu_soundhw = Enumeration("qemu_soundhw", [
++    (1, "ac97"),
++    (2, "adlib"),
++    (3, "cs4231a"),
++    (4, "es1370"),
++    (5, "gus"),
++    (6, "hda"),
++    (7, "sb16"),
++    ])
+-- 
+2.37.3
+
diff --git a/0057-x86-CPUID-surface-suitable-value-in-EBX-of-XSTATE-su.patch b/0057-x86-CPUID-surface-suitable-value-in-EBX-of-XSTATE-su.patch
new file mode 100644
index 0000000..9b1cce8
--- /dev/null
+++ b/0057-x86-CPUID-surface-suitable-value-in-EBX-of-XSTATE-su.patch
@@ -0,0 +1,44 @@
+From 7923ea47e578bca30a6e45951a9da09e827ff028 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 15:14:05 +0200
+Subject: [PATCH 57/67] x86/CPUID: surface suitable value in EBX of XSTATE
+ subleaf 1
+
+While the SDM isn't very clear about this, our present behavior make
+Linux 5.19 unhappy. As of commit 8ad7e8f69695 ("x86/fpu/xsave: Support
+XSAVEC in the kernel") they're using this CPUID output also to size
+the compacted area used by XSAVEC. Getting back zero there isn't really
+liked, yet for PV that's the default on capable hardware: XSAVES isn't
+exposed to PV domains.
+
+Considering that the size reported is that of the compacted save area,
+I view Linux'es assumption as appropriate (short of the SDM properly
+considering the case). Therefore we need to populate the field also when
+only XSAVEC is supported for a guest.
+
+Fixes: 460b9a4b3630 ("x86/xsaves: enable xsaves/xrstors for hvm guest")
+Fixes: 8d050ed1097c ("x86: don't expose XSAVES capability to PV guests")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: c3bd0b83ea5b7c0da6542687436042eeea1e7909
+master date: 2022-08-24 14:23:59 +0200
+---
+ xen/arch/x86/cpuid.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
+index ee2c4ea03a89..11c95178f110 100644
+--- a/xen/arch/x86/cpuid.c
++++ b/xen/arch/x86/cpuid.c
+@@ -1052,7 +1052,7 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf,
+         switch ( subleaf )
+         {
+         case 1:
+-            if ( p->xstate.xsaves )
++            if ( p->xstate.xsavec || p->xstate.xsaves )
+             {
+                 /*
+                  * TODO: Figure out what to do for XSS state.  VT-x manages
+-- 
+2.37.3
+
diff --git a/0058-xen-sched-introduce-cpupool_update_node_affinity.patch b/0058-xen-sched-introduce-cpupool_update_node_affinity.patch
new file mode 100644
index 0000000..c15edb8
--- /dev/null
+++ b/0058-xen-sched-introduce-cpupool_update_node_affinity.patch
@@ -0,0 +1,257 @@
+From 735b10844489babf52d3193193285a7311cf2c39 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 11 Oct 2022 15:14:22 +0200
+Subject: [PATCH 58/67] xen/sched: introduce cpupool_update_node_affinity()
+
+For updating the node affinities of all domains in a cpupool add a new
+function cpupool_update_node_affinity().
+
+In order to avoid multiple allocations of cpumasks carve out memory
+allocation and freeing from domain_update_node_affinity() into new
+helpers, which can be used by cpupool_update_node_affinity().
+
+Modify domain_update_node_affinity() to take an additional parameter
+for passing the allocated memory in and to allocate and free the memory
+via the new helpers in case NULL was passed.
+
+This will help later to pre-allocate the cpumasks in order to avoid
+allocations in stop-machine context.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: a83fa1e2b96ace65b45dde6954d67012633a082b
+master date: 2022-09-05 11:42:30 +0100
+---
+ xen/common/sched/core.c    | 54 ++++++++++++++++++++++++++------------
+ xen/common/sched/cpupool.c | 39 +++++++++++++++------------
+ xen/common/sched/private.h |  7 +++++
+ xen/include/xen/sched.h    |  9 ++++++-
+ 4 files changed, 74 insertions(+), 35 deletions(-)
+
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index f07bd2681fcb..065a83eca912 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -1824,9 +1824,28 @@ int vcpu_affinity_domctl(struct domain *d, uint32_t cmd,
+     return ret;
+ }
+ 
+-void domain_update_node_affinity(struct domain *d)
++bool alloc_affinity_masks(struct affinity_masks *affinity)
+ {
+-    cpumask_var_t dom_cpumask, dom_cpumask_soft;
++    if ( !alloc_cpumask_var(&affinity->hard) )
++        return false;
++    if ( !alloc_cpumask_var(&affinity->soft) )
++    {
++        free_cpumask_var(affinity->hard);
++        return false;
++    }
++
++    return true;
++}
++
++void free_affinity_masks(struct affinity_masks *affinity)
++{
++    free_cpumask_var(affinity->soft);
++    free_cpumask_var(affinity->hard);
++}
++
++void domain_update_node_aff(struct domain *d, struct affinity_masks *affinity)
++{
++    struct affinity_masks masks;
+     cpumask_t *dom_affinity;
+     const cpumask_t *online;
+     struct sched_unit *unit;
+@@ -1836,14 +1855,16 @@ void domain_update_node_affinity(struct domain *d)
+     if ( !d->vcpu || !d->vcpu[0] )
+         return;
+ 
+-    if ( !zalloc_cpumask_var(&dom_cpumask) )
+-        return;
+-    if ( !zalloc_cpumask_var(&dom_cpumask_soft) )
++    if ( !affinity )
+     {
+-        free_cpumask_var(dom_cpumask);
+-        return;
++        affinity = &masks;
++        if ( !alloc_affinity_masks(affinity) )
++            return;
+     }
+ 
++    cpumask_clear(affinity->hard);
++    cpumask_clear(affinity->soft);
++
+     online = cpupool_domain_master_cpumask(d);
+ 
+     spin_lock(&d->node_affinity_lock);
+@@ -1864,22 +1885,21 @@ void domain_update_node_affinity(struct domain *d)
+          */
+         for_each_sched_unit ( d, unit )
+         {
+-            cpumask_or(dom_cpumask, dom_cpumask, unit->cpu_hard_affinity);
+-            cpumask_or(dom_cpumask_soft, dom_cpumask_soft,
+-                       unit->cpu_soft_affinity);
++            cpumask_or(affinity->hard, affinity->hard, unit->cpu_hard_affinity);
++            cpumask_or(affinity->soft, affinity->soft, unit->cpu_soft_affinity);
+         }
+         /* Filter out non-online cpus */
+-        cpumask_and(dom_cpumask, dom_cpumask, online);
+-        ASSERT(!cpumask_empty(dom_cpumask));
++        cpumask_and(affinity->hard, affinity->hard, online);
++        ASSERT(!cpumask_empty(affinity->hard));
+         /* And compute the intersection between hard, online and soft */
+-        cpumask_and(dom_cpumask_soft, dom_cpumask_soft, dom_cpumask);
++        cpumask_and(affinity->soft, affinity->soft, affinity->hard);
+ 
+         /*
+          * If not empty, the intersection of hard, soft and online is the
+          * narrowest set we want. If empty, we fall back to hard&online.
+          */
+-        dom_affinity = cpumask_empty(dom_cpumask_soft) ?
+-                           dom_cpumask : dom_cpumask_soft;
++        dom_affinity = cpumask_empty(affinity->soft) ? affinity->hard
++                                                     : affinity->soft;
+ 
+         nodes_clear(d->node_affinity);
+         for_each_cpu ( cpu, dom_affinity )
+@@ -1888,8 +1908,8 @@ void domain_update_node_affinity(struct domain *d)
+ 
+     spin_unlock(&d->node_affinity_lock);
+ 
+-    free_cpumask_var(dom_cpumask_soft);
+-    free_cpumask_var(dom_cpumask);
++    if ( affinity == &masks )
++        free_affinity_masks(affinity);
+ }
+ 
+ typedef long ret_t;
+diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
+index 8c6e6eb9ccd5..45b6ff99561a 100644
+--- a/xen/common/sched/cpupool.c
++++ b/xen/common/sched/cpupool.c
+@@ -401,6 +401,25 @@ int cpupool_move_domain(struct domain *d, struct cpupool *c)
+     return ret;
+ }
+ 
++/* Update affinities of all domains in a cpupool. */
++static void cpupool_update_node_affinity(const struct cpupool *c)
++{
++    struct affinity_masks masks;
++    struct domain *d;
++
++    if ( !alloc_affinity_masks(&masks) )
++        return;
++
++    rcu_read_lock(&domlist_read_lock);
++
++    for_each_domain_in_cpupool(d, c)
++        domain_update_node_aff(d, &masks);
++
++    rcu_read_unlock(&domlist_read_lock);
++
++    free_affinity_masks(&masks);
++}
++
+ /*
+  * assign a specific cpu to a cpupool
+  * cpupool_lock must be held
+@@ -408,7 +427,6 @@ int cpupool_move_domain(struct domain *d, struct cpupool *c)
+ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
+ {
+     int ret;
+-    struct domain *d;
+     const cpumask_t *cpus;
+ 
+     cpus = sched_get_opt_cpumask(c->gran, cpu);
+@@ -433,12 +451,7 @@ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
+ 
+     rcu_read_unlock(&sched_res_rculock);
+ 
+-    rcu_read_lock(&domlist_read_lock);
+-    for_each_domain_in_cpupool(d, c)
+-    {
+-        domain_update_node_affinity(d);
+-    }
+-    rcu_read_unlock(&domlist_read_lock);
++    cpupool_update_node_affinity(c);
+ 
+     return 0;
+ }
+@@ -447,18 +460,14 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
+ {
+     int cpu = cpupool_moving_cpu;
+     const cpumask_t *cpus;
+-    struct domain *d;
+     int ret;
+ 
+     if ( c != cpupool_cpu_moving )
+         return -EADDRNOTAVAIL;
+ 
+-    /*
+-     * We need this for scanning the domain list, both in
+-     * cpu_disable_scheduler(), and at the bottom of this function.
+-     */
+     rcu_read_lock(&domlist_read_lock);
+     ret = cpu_disable_scheduler(cpu);
++    rcu_read_unlock(&domlist_read_lock);
+ 
+     rcu_read_lock(&sched_res_rculock);
+     cpus = get_sched_res(cpu)->cpus;
+@@ -485,11 +494,7 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
+     }
+     rcu_read_unlock(&sched_res_rculock);
+ 
+-    for_each_domain_in_cpupool(d, c)
+-    {
+-        domain_update_node_affinity(d);
+-    }
+-    rcu_read_unlock(&domlist_read_lock);
++    cpupool_update_node_affinity(c);
+ 
+     return ret;
+ }
+diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
+index 92d0d4961063..6e036f8c8077 100644
+--- a/xen/common/sched/private.h
++++ b/xen/common/sched/private.h
+@@ -593,6 +593,13 @@ affinity_balance_cpumask(const struct sched_unit *unit, int step,
+         cpumask_copy(mask, unit->cpu_hard_affinity);
+ }
+ 
++struct affinity_masks {
++    cpumask_var_t hard;
++    cpumask_var_t soft;
++};
++
++bool alloc_affinity_masks(struct affinity_masks *affinity);
++void free_affinity_masks(struct affinity_masks *affinity);
+ void sched_rm_cpu(unsigned int cpu);
+ const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu);
+ void schedule_dump(struct cpupool *c);
+diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
+index 701963f84cb8..4e25627d9685 100644
+--- a/xen/include/xen/sched.h
++++ b/xen/include/xen/sched.h
+@@ -649,8 +649,15 @@ static inline void get_knownalive_domain(struct domain *d)
+     ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED));
+ }
+ 
++struct affinity_masks;
++
+ int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity);
+-void domain_update_node_affinity(struct domain *d);
++void domain_update_node_aff(struct domain *d, struct affinity_masks *affinity);
++
++static inline void domain_update_node_affinity(struct domain *d)
++{
++    domain_update_node_aff(d, NULL);
++}
+ 
+ /*
+  * To be implemented by each architecture, sanity checking the configuration
+-- 
+2.37.3
+
diff --git a/0059-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch b/0059-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch
new file mode 100644
index 0000000..587eef7
--- /dev/null
+++ b/0059-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch
@@ -0,0 +1,263 @@
+From d638c2085f71f694344b34e70eb1b371c86b00f0 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 11 Oct 2022 15:15:14 +0200
+Subject: [PATCH 59/67] xen/sched: carve out memory allocation and freeing from
+ schedule_cpu_rm()
+
+In order to prepare not allocating or freeing memory from
+schedule_cpu_rm(), move this functionality to dedicated functions.
+
+For now call those functions from schedule_cpu_rm().
+
+No change of behavior expected.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: d42be6f83480b3ada286dc18444331a816be88a3
+master date: 2022-09-05 11:42:30 +0100
+---
+ xen/common/sched/core.c    | 143 ++++++++++++++++++++++---------------
+ xen/common/sched/private.h |  11 +++
+ 2 files changed, 98 insertions(+), 56 deletions(-)
+
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index 065a83eca912..2decb1161a63 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -3221,6 +3221,75 @@ out:
+     return ret;
+ }
+ 
++/*
++ * Allocate all memory needed for free_cpu_rm_data(), as allocations cannot
++ * be made in stop_machine() context.
++ *
++ * Between alloc_cpu_rm_data() and the real cpu removal action the relevant
++ * contents of struct sched_resource can't change, as the cpu in question is
++ * locked against any other movement to or from cpupools, and the data copied
++ * by alloc_cpu_rm_data() is modified only in case the cpu in question is
++ * being moved from or to a cpupool.
++ */
++struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu)
++{
++    struct cpu_rm_data *data;
++    const struct sched_resource *sr;
++    unsigned int idx;
++
++    rcu_read_lock(&sched_res_rculock);
++
++    sr = get_sched_res(cpu);
++    data = xmalloc_flex_struct(struct cpu_rm_data, sr, sr->granularity - 1);
++    if ( !data )
++        goto out;
++
++    data->old_ops = sr->scheduler;
++    data->vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
++    data->ppriv_old = sr->sched_priv;
++
++    for ( idx = 0; idx < sr->granularity - 1; idx++ )
++    {
++        data->sr[idx] = sched_alloc_res();
++        if ( data->sr[idx] )
++        {
++            data->sr[idx]->sched_unit_idle = sched_alloc_unit_mem();
++            if ( !data->sr[idx]->sched_unit_idle )
++            {
++                sched_res_free(&data->sr[idx]->rcu);
++                data->sr[idx] = NULL;
++            }
++        }
++        if ( !data->sr[idx] )
++        {
++            while ( idx > 0 )
++                sched_res_free(&data->sr[--idx]->rcu);
++            XFREE(data);
++            goto out;
++        }
++
++        data->sr[idx]->curr = data->sr[idx]->sched_unit_idle;
++        data->sr[idx]->scheduler = &sched_idle_ops;
++        data->sr[idx]->granularity = 1;
++
++        /* We want the lock not to change when replacing the resource. */
++        data->sr[idx]->schedule_lock = sr->schedule_lock;
++    }
++
++ out:
++    rcu_read_unlock(&sched_res_rculock);
++
++    return data;
++}
++
++void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu)
++{
++    sched_free_udata(mem->old_ops, mem->vpriv_old);
++    sched_free_pdata(mem->old_ops, mem->ppriv_old, cpu);
++
++    xfree(mem);
++}
++
+ /*
+  * Remove a pCPU from its cpupool. Its scheduler becomes &sched_idle_ops
+  * (the idle scheduler).
+@@ -3229,53 +3298,23 @@ out:
+  */
+ int schedule_cpu_rm(unsigned int cpu)
+ {
+-    void *ppriv_old, *vpriv_old;
+-    struct sched_resource *sr, **sr_new = NULL;
++    struct sched_resource *sr;
++    struct cpu_rm_data *data;
+     struct sched_unit *unit;
+-    struct scheduler *old_ops;
+     spinlock_t *old_lock;
+     unsigned long flags;
+-    int idx, ret = -ENOMEM;
++    int idx = 0;
+     unsigned int cpu_iter;
+ 
++    data = alloc_cpu_rm_data(cpu);
++    if ( !data )
++        return -ENOMEM;
++
+     rcu_read_lock(&sched_res_rculock);
+ 
+     sr = get_sched_res(cpu);
+-    old_ops = sr->scheduler;
+ 
+-    if ( sr->granularity > 1 )
+-    {
+-        sr_new = xmalloc_array(struct sched_resource *, sr->granularity - 1);
+-        if ( !sr_new )
+-            goto out;
+-        for ( idx = 0; idx < sr->granularity - 1; idx++ )
+-        {
+-            sr_new[idx] = sched_alloc_res();
+-            if ( sr_new[idx] )
+-            {
+-                sr_new[idx]->sched_unit_idle = sched_alloc_unit_mem();
+-                if ( !sr_new[idx]->sched_unit_idle )
+-                {
+-                    sched_res_free(&sr_new[idx]->rcu);
+-                    sr_new[idx] = NULL;
+-                }
+-            }
+-            if ( !sr_new[idx] )
+-            {
+-                for ( idx--; idx >= 0; idx-- )
+-                    sched_res_free(&sr_new[idx]->rcu);
+-                goto out;
+-            }
+-            sr_new[idx]->curr = sr_new[idx]->sched_unit_idle;
+-            sr_new[idx]->scheduler = &sched_idle_ops;
+-            sr_new[idx]->granularity = 1;
+-
+-            /* We want the lock not to change when replacing the resource. */
+-            sr_new[idx]->schedule_lock = sr->schedule_lock;
+-        }
+-    }
+-
+-    ret = 0;
++    ASSERT(sr->granularity);
+     ASSERT(sr->cpupool != NULL);
+     ASSERT(cpumask_test_cpu(cpu, &cpupool_free_cpus));
+     ASSERT(!cpumask_test_cpu(cpu, sr->cpupool->cpu_valid));
+@@ -3283,10 +3322,6 @@ int schedule_cpu_rm(unsigned int cpu)
+     /* See comment in schedule_cpu_add() regarding lock switching. */
+     old_lock = pcpu_schedule_lock_irqsave(cpu, &flags);
+ 
+-    vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
+-    ppriv_old = sr->sched_priv;
+-
+-    idx = 0;
+     for_each_cpu ( cpu_iter, sr->cpus )
+     {
+         per_cpu(sched_res_idx, cpu_iter) = 0;
+@@ -3300,27 +3335,27 @@ int schedule_cpu_rm(unsigned int cpu)
+         else
+         {
+             /* Initialize unit. */
+-            unit = sr_new[idx]->sched_unit_idle;
+-            unit->res = sr_new[idx];
++            unit = data->sr[idx]->sched_unit_idle;
++            unit->res = data->sr[idx];
+             unit->is_running = true;
+             sched_unit_add_vcpu(unit, idle_vcpu[cpu_iter]);
+             sched_domain_insert_unit(unit, idle_vcpu[cpu_iter]->domain);
+ 
+             /* Adjust cpu masks of resources (old and new). */
+             cpumask_clear_cpu(cpu_iter, sr->cpus);
+-            cpumask_set_cpu(cpu_iter, sr_new[idx]->cpus);
++            cpumask_set_cpu(cpu_iter, data->sr[idx]->cpus);
+             cpumask_set_cpu(cpu_iter, &sched_res_mask);
+ 
+             /* Init timer. */
+-            init_timer(&sr_new[idx]->s_timer, s_timer_fn, NULL, cpu_iter);
++            init_timer(&data->sr[idx]->s_timer, s_timer_fn, NULL, cpu_iter);
+ 
+             /* Last resource initializations and insert resource pointer. */
+-            sr_new[idx]->master_cpu = cpu_iter;
+-            set_sched_res(cpu_iter, sr_new[idx]);
++            data->sr[idx]->master_cpu = cpu_iter;
++            set_sched_res(cpu_iter, data->sr[idx]);
+ 
+             /* Last action: set the new lock pointer. */
+             smp_mb();
+-            sr_new[idx]->schedule_lock = &sched_free_cpu_lock;
++            data->sr[idx]->schedule_lock = &sched_free_cpu_lock;
+ 
+             idx++;
+         }
+@@ -3336,16 +3371,12 @@ int schedule_cpu_rm(unsigned int cpu)
+     /* _Not_ pcpu_schedule_unlock(): schedule_lock may have changed! */
+     spin_unlock_irqrestore(old_lock, flags);
+ 
+-    sched_deinit_pdata(old_ops, ppriv_old, cpu);
++    sched_deinit_pdata(data->old_ops, data->ppriv_old, cpu);
+ 
+-    sched_free_udata(old_ops, vpriv_old);
+-    sched_free_pdata(old_ops, ppriv_old, cpu);
+-
+-out:
+     rcu_read_unlock(&sched_res_rculock);
+-    xfree(sr_new);
++    free_cpu_rm_data(data, cpu);
+ 
+-    return ret;
++    return 0;
+ }
+ 
+ struct scheduler *scheduler_get_default(void)
+diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
+index 6e036f8c8077..ff3185425219 100644
+--- a/xen/common/sched/private.h
++++ b/xen/common/sched/private.h
+@@ -600,6 +600,15 @@ struct affinity_masks {
+ 
+ bool alloc_affinity_masks(struct affinity_masks *affinity);
+ void free_affinity_masks(struct affinity_masks *affinity);
++
++/* Memory allocation related data for schedule_cpu_rm(). */
++struct cpu_rm_data {
++    const struct scheduler *old_ops;
++    void *ppriv_old;
++    void *vpriv_old;
++    struct sched_resource *sr[];
++};
++
+ void sched_rm_cpu(unsigned int cpu);
+ const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu);
+ void schedule_dump(struct cpupool *c);
+@@ -608,6 +617,8 @@ struct scheduler *scheduler_alloc(unsigned int sched_id);
+ void scheduler_free(struct scheduler *sched);
+ int cpu_disable_scheduler(unsigned int cpu);
+ int schedule_cpu_add(unsigned int cpu, struct cpupool *c);
++struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu);
++void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu);
+ int schedule_cpu_rm(unsigned int cpu);
+ int sched_move_domain(struct domain *d, struct cpupool *c);
+ struct cpupool *cpupool_get_by_id(unsigned int poolid);
+-- 
+2.37.3
+
diff --git a/0060-xen-sched-fix-cpu-hotplug.patch b/0060-xen-sched-fix-cpu-hotplug.patch
new file mode 100644
index 0000000..3e158f4
--- /dev/null
+++ b/0060-xen-sched-fix-cpu-hotplug.patch
@@ -0,0 +1,307 @@
+From d17680808b4c8015e31070c971e1ee548170ae34 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 11 Oct 2022 15:15:41 +0200
+Subject: [PATCH 60/67] xen/sched: fix cpu hotplug
+
+Cpu unplugging is calling schedule_cpu_rm() via stop_machine_run() with
+interrupts disabled, thus any memory allocation or freeing must be
+avoided.
+
+Since commit 5047cd1d5dea ("xen/common: Use enhanced
+ASSERT_ALLOC_CONTEXT in xmalloc()") this restriction is being enforced
+via an assertion, which will now fail.
+
+Fix this by allocating needed memory before entering stop_machine_run()
+and freeing any memory only after having finished stop_machine_run().
+
+Fixes: 1ec410112cdd ("xen/sched: support differing granularity in schedule_cpu_[add/rm]()")
+Reported-by: Gao Ruifeng <ruifeng.gao@intel.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: d84473689611eed32fd90b27e614f28af767fa3f
+master date: 2022-09-05 11:42:30 +0100
+---
+ xen/common/sched/core.c    | 25 +++++++++++---
+ xen/common/sched/cpupool.c | 69 +++++++++++++++++++++++++++++---------
+ xen/common/sched/private.h |  5 +--
+ 3 files changed, 77 insertions(+), 22 deletions(-)
+
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index 2decb1161a63..900aab8f66a7 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -3231,7 +3231,7 @@ out:
+  * by alloc_cpu_rm_data() is modified only in case the cpu in question is
+  * being moved from or to a cpupool.
+  */
+-struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu)
++struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu, bool aff_alloc)
+ {
+     struct cpu_rm_data *data;
+     const struct sched_resource *sr;
+@@ -3244,6 +3244,17 @@ struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu)
+     if ( !data )
+         goto out;
+ 
++    if ( aff_alloc )
++    {
++        if ( !alloc_affinity_masks(&data->affinity) )
++        {
++            XFREE(data);
++            goto out;
++        }
++    }
++    else
++        memset(&data->affinity, 0, sizeof(data->affinity));
++
+     data->old_ops = sr->scheduler;
+     data->vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
+     data->ppriv_old = sr->sched_priv;
+@@ -3264,6 +3275,7 @@ struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu)
+         {
+             while ( idx > 0 )
+                 sched_res_free(&data->sr[--idx]->rcu);
++            free_affinity_masks(&data->affinity);
+             XFREE(data);
+             goto out;
+         }
+@@ -3286,6 +3298,7 @@ void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu)
+ {
+     sched_free_udata(mem->old_ops, mem->vpriv_old);
+     sched_free_pdata(mem->old_ops, mem->ppriv_old, cpu);
++    free_affinity_masks(&mem->affinity);
+ 
+     xfree(mem);
+ }
+@@ -3296,17 +3309,18 @@ void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu)
+  * The cpu is already marked as "free" and not valid any longer for its
+  * cpupool.
+  */
+-int schedule_cpu_rm(unsigned int cpu)
++int schedule_cpu_rm(unsigned int cpu, struct cpu_rm_data *data)
+ {
+     struct sched_resource *sr;
+-    struct cpu_rm_data *data;
+     struct sched_unit *unit;
+     spinlock_t *old_lock;
+     unsigned long flags;
+     int idx = 0;
+     unsigned int cpu_iter;
++    bool free_data = !data;
+ 
+-    data = alloc_cpu_rm_data(cpu);
++    if ( !data )
++        data = alloc_cpu_rm_data(cpu, false);
+     if ( !data )
+         return -ENOMEM;
+ 
+@@ -3374,7 +3388,8 @@ int schedule_cpu_rm(unsigned int cpu)
+     sched_deinit_pdata(data->old_ops, data->ppriv_old, cpu);
+ 
+     rcu_read_unlock(&sched_res_rculock);
+-    free_cpu_rm_data(data, cpu);
++    if ( free_data )
++        free_cpu_rm_data(data, cpu);
+ 
+     return 0;
+ }
+diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
+index 45b6ff99561a..b5a948639aad 100644
+--- a/xen/common/sched/cpupool.c
++++ b/xen/common/sched/cpupool.c
+@@ -402,22 +402,28 @@ int cpupool_move_domain(struct domain *d, struct cpupool *c)
+ }
+ 
+ /* Update affinities of all domains in a cpupool. */
+-static void cpupool_update_node_affinity(const struct cpupool *c)
++static void cpupool_update_node_affinity(const struct cpupool *c,
++                                         struct affinity_masks *masks)
+ {
+-    struct affinity_masks masks;
++    struct affinity_masks local_masks;
+     struct domain *d;
+ 
+-    if ( !alloc_affinity_masks(&masks) )
+-        return;
++    if ( !masks )
++    {
++        if ( !alloc_affinity_masks(&local_masks) )
++            return;
++        masks = &local_masks;
++    }
+ 
+     rcu_read_lock(&domlist_read_lock);
+ 
+     for_each_domain_in_cpupool(d, c)
+-        domain_update_node_aff(d, &masks);
++        domain_update_node_aff(d, masks);
+ 
+     rcu_read_unlock(&domlist_read_lock);
+ 
+-    free_affinity_masks(&masks);
++    if ( masks == &local_masks )
++        free_affinity_masks(masks);
+ }
+ 
+ /*
+@@ -451,15 +457,17 @@ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
+ 
+     rcu_read_unlock(&sched_res_rculock);
+ 
+-    cpupool_update_node_affinity(c);
++    cpupool_update_node_affinity(c, NULL);
+ 
+     return 0;
+ }
+ 
+-static int cpupool_unassign_cpu_finish(struct cpupool *c)
++static int cpupool_unassign_cpu_finish(struct cpupool *c,
++                                       struct cpu_rm_data *mem)
+ {
+     int cpu = cpupool_moving_cpu;
+     const cpumask_t *cpus;
++    struct affinity_masks *masks = mem ? &mem->affinity : NULL;
+     int ret;
+ 
+     if ( c != cpupool_cpu_moving )
+@@ -482,7 +490,7 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
+      */
+     if ( !ret )
+     {
+-        ret = schedule_cpu_rm(cpu);
++        ret = schedule_cpu_rm(cpu, mem);
+         if ( ret )
+             cpumask_andnot(&cpupool_free_cpus, &cpupool_free_cpus, cpus);
+         else
+@@ -494,7 +502,7 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
+     }
+     rcu_read_unlock(&sched_res_rculock);
+ 
+-    cpupool_update_node_affinity(c);
++    cpupool_update_node_affinity(c, masks);
+ 
+     return ret;
+ }
+@@ -558,7 +566,7 @@ static long cpupool_unassign_cpu_helper(void *info)
+                       cpupool_cpu_moving->cpupool_id, cpupool_moving_cpu);
+     spin_lock(&cpupool_lock);
+ 
+-    ret = cpupool_unassign_cpu_finish(c);
++    ret = cpupool_unassign_cpu_finish(c, NULL);
+ 
+     spin_unlock(&cpupool_lock);
+     debugtrace_printk("cpupool_unassign_cpu ret=%ld\n", ret);
+@@ -701,7 +709,7 @@ static int cpupool_cpu_add(unsigned int cpu)
+  * This function is called in stop_machine context, so we can be sure no
+  * non-idle vcpu is active on the system.
+  */
+-static void cpupool_cpu_remove(unsigned int cpu)
++static void cpupool_cpu_remove(unsigned int cpu, struct cpu_rm_data *mem)
+ {
+     int ret;
+ 
+@@ -709,7 +717,7 @@ static void cpupool_cpu_remove(unsigned int cpu)
+ 
+     if ( !cpumask_test_cpu(cpu, &cpupool_free_cpus) )
+     {
+-        ret = cpupool_unassign_cpu_finish(cpupool0);
++        ret = cpupool_unassign_cpu_finish(cpupool0, mem);
+         BUG_ON(ret);
+     }
+     cpumask_clear_cpu(cpu, &cpupool_free_cpus);
+@@ -775,7 +783,7 @@ static void cpupool_cpu_remove_forced(unsigned int cpu)
+         {
+             ret = cpupool_unassign_cpu_start(c, master_cpu);
+             BUG_ON(ret);
+-            ret = cpupool_unassign_cpu_finish(c);
++            ret = cpupool_unassign_cpu_finish(c, NULL);
+             BUG_ON(ret);
+         }
+     }
+@@ -993,12 +1001,24 @@ void dump_runq(unsigned char key)
+ static int cpu_callback(
+     struct notifier_block *nfb, unsigned long action, void *hcpu)
+ {
++    static struct cpu_rm_data *mem;
++
+     unsigned int cpu = (unsigned long)hcpu;
+     int rc = 0;
+ 
+     switch ( action )
+     {
+     case CPU_DOWN_FAILED:
++        if ( system_state <= SYS_STATE_active )
++        {
++            if ( mem )
++            {
++                free_cpu_rm_data(mem, cpu);
++                mem = NULL;
++            }
++            rc = cpupool_cpu_add(cpu);
++        }
++        break;
+     case CPU_ONLINE:
+         if ( system_state <= SYS_STATE_active )
+             rc = cpupool_cpu_add(cpu);
+@@ -1006,12 +1026,31 @@ static int cpu_callback(
+     case CPU_DOWN_PREPARE:
+         /* Suspend/Resume don't change assignments of cpus to cpupools. */
+         if ( system_state <= SYS_STATE_active )
++        {
+             rc = cpupool_cpu_remove_prologue(cpu);
++            if ( !rc )
++            {
++                ASSERT(!mem);
++                mem = alloc_cpu_rm_data(cpu, true);
++                rc = mem ? 0 : -ENOMEM;
++            }
++        }
+         break;
+     case CPU_DYING:
+         /* Suspend/Resume don't change assignments of cpus to cpupools. */
+         if ( system_state <= SYS_STATE_active )
+-            cpupool_cpu_remove(cpu);
++        {
++            ASSERT(mem);
++            cpupool_cpu_remove(cpu, mem);
++        }
++        break;
++    case CPU_DEAD:
++        if ( system_state <= SYS_STATE_active )
++        {
++            ASSERT(mem);
++            free_cpu_rm_data(mem, cpu);
++            mem = NULL;
++        }
+         break;
+     case CPU_RESUME_FAILED:
+         cpupool_cpu_remove_forced(cpu);
+diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
+index ff3185425219..3bab78ccb240 100644
+--- a/xen/common/sched/private.h
++++ b/xen/common/sched/private.h
+@@ -603,6 +603,7 @@ void free_affinity_masks(struct affinity_masks *affinity);
+ 
+ /* Memory allocation related data for schedule_cpu_rm(). */
+ struct cpu_rm_data {
++    struct affinity_masks affinity;
+     const struct scheduler *old_ops;
+     void *ppriv_old;
+     void *vpriv_old;
+@@ -617,9 +618,9 @@ struct scheduler *scheduler_alloc(unsigned int sched_id);
+ void scheduler_free(struct scheduler *sched);
+ int cpu_disable_scheduler(unsigned int cpu);
+ int schedule_cpu_add(unsigned int cpu, struct cpupool *c);
+-struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu);
++struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu, bool aff_alloc);
+ void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu);
+-int schedule_cpu_rm(unsigned int cpu);
++int schedule_cpu_rm(unsigned int cpu, struct cpu_rm_data *mem);
+ int sched_move_domain(struct domain *d, struct cpupool *c);
+ struct cpupool *cpupool_get_by_id(unsigned int poolid);
+ void cpupool_put(struct cpupool *pool);
+-- 
+2.37.3
+
diff --git a/0061-Config.mk-correct-PIE-related-option-s-in-EMBEDDED_E.patch b/0061-Config.mk-correct-PIE-related-option-s-in-EMBEDDED_E.patch
new file mode 100644
index 0000000..0f044b2
--- /dev/null
+++ b/0061-Config.mk-correct-PIE-related-option-s-in-EMBEDDED_E.patch
@@ -0,0 +1,58 @@
+From 19cf28b515f21da02df80e68f901ad7650daaa37 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 15:15:55 +0200
+Subject: [PATCH 61/67] Config.mk: correct PIE-related option(s) in
+ EMBEDDED_EXTRA_CFLAGS
+
+I haven't been able to find evidence of "-nopie" ever having been a
+supported compiler option. The correct spelling is "-no-pie".
+Furthermore like "-pie" this is an option which is solely passed to the
+linker. The compiler only recognizes "-fpie" / "-fPIE" / "-fno-pie", and
+it doesn't infer these options from "-pie" / "-no-pie".
+
+Add the compiler recognized form, but for the possible case of the
+variable also being used somewhere for linking keep the linker option as
+well (with corrected spelling).
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+
+Build: Drop -no-pie from EMBEDDED_EXTRA_CFLAGS
+
+This breaks all Clang builds, as demostrated by Gitlab CI.
+
+Contrary to the description in ecd6b9759919, -no-pie is not even an option
+passed to the linker.  GCC's actual behaviour is to inhibit the passing of
+-pie to the linker, as well as selecting different cr0 artefacts to be linked.
+
+EMBEDDED_EXTRA_CFLAGS is not used for $(CC)-doing-linking, and not liable to
+gain such a usecase.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+Tested-by: Stefano Stabellini <sstabellini@kernel.org>
+Fixes: ecd6b9759919 ("Config.mk: correct PIE-related option(s) in EMBEDDED_EXTRA_CFLAGS")
+master commit: ecd6b9759919fa6335b0be1b5fc5cce29a30c4f1
+master date: 2022-09-08 09:25:26 +0200
+master commit: 13a7c0074ac8fb31f6c0485429b7a20a1946cb22
+master date: 2022-09-27 15:40:42 -0700
+---
+ Config.mk | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/Config.mk b/Config.mk
+index 96d89b2f7dfc..9f87608f6602 100644
+--- a/Config.mk
++++ b/Config.mk
+@@ -203,7 +203,7 @@ endif
+ APPEND_LDFLAGS += $(foreach i, $(APPEND_LIB), -L$(i))
+ APPEND_CFLAGS += $(foreach i, $(APPEND_INCLUDES), -I$(i))
+ 
+-EMBEDDED_EXTRA_CFLAGS := -nopie -fno-stack-protector -fno-stack-protector-all
++EMBEDDED_EXTRA_CFLAGS := -fno-pie -fno-stack-protector -fno-stack-protector-all
+ EMBEDDED_EXTRA_CFLAGS += -fno-exceptions -fno-asynchronous-unwind-tables
+ 
+ XEN_EXTFILES_URL ?= http://xenbits.xen.org/xen-extfiles
+-- 
+2.37.3
+
diff --git a/0062-tools-xenstore-minor-fix-of-the-migration-stream-doc.patch b/0062-tools-xenstore-minor-fix-of-the-migration-stream-doc.patch
new file mode 100644
index 0000000..65882a9
--- /dev/null
+++ b/0062-tools-xenstore-minor-fix-of-the-migration-stream-doc.patch
@@ -0,0 +1,41 @@
+From 182f8bb503b9dd3db5dd9118dc763d241787c6fc Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 11 Oct 2022 15:16:09 +0200
+Subject: [PATCH 62/67] tools/xenstore: minor fix of the migration stream doc
+
+Drop mentioning the non-existent read-only socket in the migration
+stream description document.
+
+The related record field was removed in commit 8868a0e3f674 ("docs:
+update the xenstore migration stream documentation).
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+master commit: ace1d2eff80d3d66c37ae765dae3e3cb5697e5a4
+master date: 2022-09-08 09:25:58 +0200
+---
+ docs/designs/xenstore-migration.md | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/docs/designs/xenstore-migration.md b/docs/designs/xenstore-migration.md
+index 5f1155273ec3..78530bbb0ef4 100644
+--- a/docs/designs/xenstore-migration.md
++++ b/docs/designs/xenstore-migration.md
+@@ -129,11 +129,9 @@ xenstored state that needs to be restored.
+ | `evtchn-fd`    | The file descriptor used to communicate with |
+ |                | the event channel driver                     |
+ 
+-xenstored will resume in the original process context. Hence `rw-socket-fd` and
+-`ro-socket-fd` simply specify the file descriptors of the sockets. Sockets
+-are not always used, however, and so -1 will be used to denote an unused
+-socket.
+-
++xenstored will resume in the original process context. Hence `rw-socket-fd`
++simply specifies the file descriptor of the socket. Sockets are not always
++used, however, and so -1 will be used to denote an unused socket.
+ 
+ \pagebreak
+ 
+-- 
+2.37.3
+
diff --git a/0063-xen-gnttab-fix-gnttab_acquire_resource.patch b/0063-xen-gnttab-fix-gnttab_acquire_resource.patch
new file mode 100644
index 0000000..0d58157
--- /dev/null
+++ b/0063-xen-gnttab-fix-gnttab_acquire_resource.patch
@@ -0,0 +1,69 @@
+From 3ac64b3751837a117ee3dfb3e2cc27057a83d0f7 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 11 Oct 2022 15:16:53 +0200
+Subject: [PATCH 63/67] xen/gnttab: fix gnttab_acquire_resource()
+
+Commit 9dc46386d89d ("gnttab: work around "may be used uninitialized"
+warning") was wrong, as vaddrs can legitimately be NULL in case
+XENMEM_resource_grant_table_id_status was specified for a grant table
+v1. This would result in crashes in debug builds due to
+ASSERT_UNREACHABLE() triggering.
+
+Check vaddrs only to be NULL in the rc == 0 case.
+
+Expand the tests in tools/tests/resource to tickle this path, and verify that
+using XENMEM_resource_grant_table_id_status on a v1 grant table fails.
+
+Fixes: 9dc46386d89d ("gnttab: work around "may be used uninitialized" warning")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com> # xen
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 52daa6a8483e4fbd6757c9d1b791e23931791608
+master date: 2022-09-09 16:28:38 +0100
+---
+ tools/tests/resource/test-resource.c | 15 +++++++++++++++
+ xen/common/grant_table.c             |  2 +-
+ 2 files changed, 16 insertions(+), 1 deletion(-)
+
+diff --git a/tools/tests/resource/test-resource.c b/tools/tests/resource/test-resource.c
+index 1caaa60e62d9..bf485baff2b4 100644
+--- a/tools/tests/resource/test-resource.c
++++ b/tools/tests/resource/test-resource.c
+@@ -63,6 +63,21 @@ static void test_gnttab(uint32_t domid, unsigned int nr_frames)
+     rc = xenforeignmemory_unmap_resource(fh, res);
+     if ( rc )
+         return fail("    Fail: Unmap %d - %s\n", errno, strerror(errno));
++
++    /*
++     * Verify that an attempt to map the status frames fails, as the domain is
++     * in gnttab v1 mode.
++     */
++    res = xenforeignmemory_map_resource(
++        fh, domid, XENMEM_resource_grant_table,
++        XENMEM_resource_grant_table_id_status, 0, 1,
++        (void **)&gnttab, PROT_READ | PROT_WRITE, 0);
++
++    if ( res )
++    {
++        fail("    Fail: Managed to map gnttab v2 status frames in v1 mode\n");
++        xenforeignmemory_unmap_resource(fh, res);
++    }
+ }
+ 
+ static void test_domain_configurations(void)
+diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
+index 0523beb9b734..01e426c67fb6 100644
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -4138,7 +4138,7 @@ int gnttab_acquire_resource(
+      * on non-error paths, and hence it needs setting to NULL at the top of the
+      * function.  Leave some runtime safety.
+      */
+-    if ( !vaddrs )
++    if ( !rc && !vaddrs )
+     {
+         ASSERT_UNREACHABLE();
+         rc = -ENODATA;
+-- 
+2.37.3
+
diff --git a/0064-x86-wire-up-VCPUOP_register_vcpu_time_memory_area-fo.patch b/0064-x86-wire-up-VCPUOP_register_vcpu_time_memory_area-fo.patch
new file mode 100644
index 0000000..4246b01
--- /dev/null
+++ b/0064-x86-wire-up-VCPUOP_register_vcpu_time_memory_area-fo.patch
@@ -0,0 +1,59 @@
+From 62e534d17cdd838828bfd75d3d845e31524dd336 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 15:17:12 +0200
+Subject: [PATCH 64/67] x86: wire up VCPUOP_register_vcpu_time_memory_area for
+ 32-bit guests
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Forever sinced its introduction VCPUOP_register_vcpu_time_memory_area
+was available only to native domains. Linux, for example, would attempt
+to use it irrespective of guest bitness (including in its so called
+PVHVM mode) as long as it finds XEN_PVCLOCK_TSC_STABLE_BIT set (which we
+set only for clocksource=tsc, which in turn needs engaging via command
+line option).
+
+Fixes: a5d39947cb89 ("Allow guests to register secondary vcpu_time_info")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: b726541d94bd0a80b5864d17a2cd2e6d73a3fe0a
+master date: 2022-09-29 14:47:45 +0200
+---
+ xen/arch/x86/x86_64/domain.c | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+diff --git a/xen/arch/x86/x86_64/domain.c b/xen/arch/x86/x86_64/domain.c
+index c46dccc25a54..d51d99344796 100644
+--- a/xen/arch/x86/x86_64/domain.c
++++ b/xen/arch/x86/x86_64/domain.c
+@@ -54,6 +54,26 @@ arch_compat_vcpu_op(
+         break;
+     }
+ 
++    case VCPUOP_register_vcpu_time_memory_area:
++    {
++        struct compat_vcpu_register_time_memory_area area = { .addr.p = 0 };
++
++        rc = -EFAULT;
++        if ( copy_from_guest(&area.addr.h, arg, 1) )
++            break;
++
++        if ( area.addr.h.c != area.addr.p ||
++             !compat_handle_okay(area.addr.h, 1) )
++            break;
++
++        rc = 0;
++        guest_from_compat_handle(v->arch.time_info_guest, area.addr.h);
++
++        force_update_vcpu_system_time(v);
++
++        break;
++    }
++
+     case VCPUOP_get_physid:
+         rc = arch_do_vcpu_op(cmd, v, arg);
+         break;
+-- 
+2.37.3
+
diff --git a/0065-x86-vpmu-Fix-race-condition-in-vpmu_load.patch b/0065-x86-vpmu-Fix-race-condition-in-vpmu_load.patch
new file mode 100644
index 0000000..df4fb38
--- /dev/null
+++ b/0065-x86-vpmu-Fix-race-condition-in-vpmu_load.patch
@@ -0,0 +1,97 @@
+From 9690bb261d5fa09cb281e1fa124d93db7b84fda5 Mon Sep 17 00:00:00 2001
+From: Tamas K Lengyel <tamas.lengyel@intel.com>
+Date: Tue, 11 Oct 2022 15:17:42 +0200
+Subject: [PATCH 65/67] x86/vpmu: Fix race-condition in vpmu_load
+
+The vPMU code-bases attempts to perform an optimization on saving/reloading the
+PMU context by keeping track of what vCPU ran on each pCPU. When a pCPU is
+getting scheduled, checks if the previous vCPU isn't the current one. If so,
+attempts a call to vpmu_save_force. Unfortunately if the previous vCPU is
+already getting scheduled to run on another pCPU its state will be already
+runnable, which results in an ASSERT failure.
+
+Fix this by always performing a pmu context save in vpmu_save when called from
+vpmu_switch_from, and do a vpmu_load when called from vpmu_switch_to.
+
+While this presents a minimal overhead in case the same vCPU is getting
+rescheduled on the same pCPU, the ASSERT failure is avoided and the code is a
+lot easier to reason about.
+
+Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+master commit: defa4e51d20a143bdd4395a075bf0933bb38a9a4
+master date: 2022-09-30 09:53:49 +0200
+---
+ xen/arch/x86/cpu/vpmu.c | 42 ++++-------------------------------------
+ 1 file changed, 4 insertions(+), 38 deletions(-)
+
+diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c
+index fb1b296a6cc1..800eff87dc03 100644
+--- a/xen/arch/x86/cpu/vpmu.c
++++ b/xen/arch/x86/cpu/vpmu.c
+@@ -364,58 +364,24 @@ void vpmu_save(struct vcpu *v)
+     vpmu->last_pcpu = pcpu;
+     per_cpu(last_vcpu, pcpu) = v;
+ 
++    vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
++
+     if ( vpmu->arch_vpmu_ops )
+         if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) )
+             vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+ 
++    vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
++
+     apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
+ }
+ 
+ int vpmu_load(struct vcpu *v, bool_t from_guest)
+ {
+     struct vpmu_struct *vpmu = vcpu_vpmu(v);
+-    int pcpu = smp_processor_id();
+-    struct vcpu *prev = NULL;
+ 
+     if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+         return 0;
+ 
+-    /* First time this VCPU is running here */
+-    if ( vpmu->last_pcpu != pcpu )
+-    {
+-        /*
+-         * Get the context from last pcpu that we ran on. Note that if another
+-         * VCPU is running there it must have saved this VPCU's context before
+-         * startig to run (see below).
+-         * There should be no race since remote pcpu will disable interrupts
+-         * before saving the context.
+-         */
+-        if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+-        {
+-            on_selected_cpus(cpumask_of(vpmu->last_pcpu),
+-                             vpmu_save_force, (void *)v, 1);
+-            vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+-        }
+-    } 
+-
+-    /* Prevent forced context save from remote CPU */
+-    local_irq_disable();
+-
+-    prev = per_cpu(last_vcpu, pcpu);
+-
+-    if ( prev != v && prev )
+-    {
+-        vpmu = vcpu_vpmu(prev);
+-
+-        /* Someone ran here before us */
+-        vpmu_save_force(prev);
+-        vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+-
+-        vpmu = vcpu_vpmu(v);
+-    }
+-
+-    local_irq_enable();
+-
+     /* Only when PMU is counting, we load PMU context immediately. */
+     if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
+          (!has_vlapic(vpmu_vcpu(vpmu)->domain) &&
+-- 
+2.37.3
+
diff --git a/0066-tools-tests-fix-wrong-backport-of-upstream-commit-52.patch b/0066-tools-tests-fix-wrong-backport-of-upstream-commit-52.patch
new file mode 100644
index 0000000..24b9576
--- /dev/null
+++ b/0066-tools-tests-fix-wrong-backport-of-upstream-commit-52.patch
@@ -0,0 +1,31 @@
+From 0d233924d4b0f676056856096e8761205add3ee8 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Wed, 12 Oct 2022 17:31:44 +0200
+Subject: [PATCH 66/67] tools/tests: fix wrong backport of upstream commit
+ 52daa6a8483e4
+
+The backport of upstream commit 52daa6a8483e4 had a bug, correct it.
+
+Fixes: 3ac64b375183 ("xen/gnttab: fix gnttab_acquire_resource()")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+ tools/tests/resource/test-resource.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/tests/resource/test-resource.c b/tools/tests/resource/test-resource.c
+index bf485baff2b4..51a8f4a000f6 100644
+--- a/tools/tests/resource/test-resource.c
++++ b/tools/tests/resource/test-resource.c
+@@ -71,7 +71,7 @@ static void test_gnttab(uint32_t domid, unsigned int nr_frames)
+     res = xenforeignmemory_map_resource(
+         fh, domid, XENMEM_resource_grant_table,
+         XENMEM_resource_grant_table_id_status, 0, 1,
+-        (void **)&gnttab, PROT_READ | PROT_WRITE, 0);
++        &addr, PROT_READ | PROT_WRITE, 0);
+ 
+     if ( res )
+     {
+-- 
+2.37.3
+
diff --git a/0067-libxl-Arm-correct-xc_shadow_control-invocation-to-fi.patch b/0067-libxl-Arm-correct-xc_shadow_control-invocation-to-fi.patch
new file mode 100644
index 0000000..309d486
--- /dev/null
+++ b/0067-libxl-Arm-correct-xc_shadow_control-invocation-to-fi.patch
@@ -0,0 +1,42 @@
+From 816580afdd1730d4f85f64477a242a439af1cdf8 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 12 Oct 2022 17:33:40 +0200
+Subject: [PATCH 67/67] libxl/Arm: correct xc_shadow_control() invocation to
+ fix build
+
+The backport didn't adapt to the earlier function prototype taking more
+(unused here) arguments.
+
+Fixes: c5215044578e ("xen/arm, libxl: Implement XEN_DOMCTL_shadow_op for Arm")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Henry Wang <Henry.Wang@arm.com>
+Acked-by: Anthony PERARD <anthony.perard@citrix.com>
+---
+ tools/libs/light/libxl_arm.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
+index d21f614ed788..ba548befdd25 100644
+--- a/tools/libs/light/libxl_arm.c
++++ b/tools/libs/light/libxl_arm.c
+@@ -132,14 +132,14 @@ int libxl__arch_domain_create(libxl__gc *gc,
+                               uint32_t domid)
+ {
+     libxl_ctx *ctx = libxl__gc_owner(gc);
+-    unsigned int shadow_mb = DIV_ROUNDUP(d_config->b_info.shadow_memkb, 1024);
++    unsigned long shadow_mb = DIV_ROUNDUP(d_config->b_info.shadow_memkb, 1024);
+ 
+     int r = xc_shadow_control(ctx->xch, domid,
+                               XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION,
+-                              &shadow_mb, 0);
++                              NULL, 0, &shadow_mb, 0, NULL);
+     if (r) {
+         LOGED(ERROR, domid,
+-              "Failed to set %u MiB shadow allocation", shadow_mb);
++              "Failed to set %lu MiB shadow allocation", shadow_mb);
+         return ERROR_FAIL;
+     }
+ 
+-- 
+2.37.3
+
diff --git a/info.txt b/info.txt
index 7a2843f..a677aa4 100644
--- a/info.txt
+++ b/info.txt
@@ -1,6 +1,6 @@
-Xen upstream patchset #0.1 for 4.15.4-pre
+Xen upstream patchset #1 for 4.15.4-pre
 
 Containing patches from
 RELEASE-4.15.3 (feecaf4abf733e83b7a297190819eca7a7f65168)
 to
-staging-4.15 (35bf91d30f1a480dcf5bfd99b79384b2b283da7f)
+staging-4.15 (816580afdd1730d4f85f64477a242a439af1cdf8)
author	Florian Schmaus <flow@gentoo.org>	2022-10-19 10:38:35 +0200
committer	Florian Schmaus <flow@gentoo.org>	2022-10-19 10:38:35 +0200
commit	4a9cd4d8a4efd8f2e52483117b4009122393c6a6 (patch)
tree	8a467d1618431f5353fa676ca58b32bb18ba21aa
parent	Xen 4.15.4-pre-patchset-0.1 (diff)
download	xen-upstream-patches-4a9cd4d8a4efd8f2e52483117b4009122393c6a6.tar.gz xen-upstream-patches-4a9cd4d8a4efd8f2e52483117b4009122393c6a6.tar.bz2 xen-upstream-patches-4a9cd4d8a4efd8f2e52483117b4009122393c6a6.zip