summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomáš Mózes <hydrapolic@gmail.com>2024-04-05 08:59:40 +0200
committerTomáš Mózes <hydrapolic@gmail.com>2024-04-05 08:59:40 +0200
commitd0ce95087288b30e5e211bac8e9a0817f2effcf5 (patch)
treece2e128cfdf8d491a494d6583979bc5330db21e2 /0051-locking-attempt-to-ensure-lock-wrappers-are-always-i.patch
parentXen 4.17.4-pre-patchset-0 (diff)
downloadxen-upstream-patches-d0ce95087288b30e5e211bac8e9a0817f2effcf5.tar.gz
xen-upstream-patches-d0ce95087288b30e5e211bac8e9a0817f2effcf5.tar.bz2
xen-upstream-patches-d0ce95087288b30e5e211bac8e9a0817f2effcf5.zip
Xen 4.17.4-pre-patchset-14.17.4-pre-patchset-14.17
Signed-off-by: Tomáš Mózes <hydrapolic@gmail.com>
Diffstat (limited to '0051-locking-attempt-to-ensure-lock-wrappers-are-always-i.patch')
-rw-r--r--0051-locking-attempt-to-ensure-lock-wrappers-are-always-i.patch405
1 files changed, 405 insertions, 0 deletions
diff --git a/0051-locking-attempt-to-ensure-lock-wrappers-are-always-i.patch b/0051-locking-attempt-to-ensure-lock-wrappers-are-always-i.patch
new file mode 100644
index 0000000..822836d
--- /dev/null
+++ b/0051-locking-attempt-to-ensure-lock-wrappers-are-always-i.patch
@@ -0,0 +1,405 @@
+From 2cc5e57be680a516aa5cdef4281856d09b9d0ea6 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Mon, 4 Mar 2024 14:29:36 +0100
+Subject: [PATCH 51/67] locking: attempt to ensure lock wrappers are always
+ inline
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+In order to prevent the locking speculation barriers from being inside of
+`call`ed functions that could be speculatively bypassed.
+
+While there also add an extra locking barrier to _mm_write_lock() in the branch
+taken when the lock is already held.
+
+Note some functions are switched to use the unsafe variants (without speculation
+barrier) of the locking primitives, but a speculation barrier is always added
+to the exposed public lock wrapping helper. That's the case with
+sched_spin_lock_double() or pcidevs_lock() for example.
+
+This is part of XSA-453 / CVE-2024-2193
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 197ecd838a2aaf959a469df3696d4559c4f8b762)
+---
+ xen/arch/x86/hvm/vpt.c | 10 +++++++---
+ xen/arch/x86/include/asm/irq.h | 1 +
+ xen/arch/x86/mm/mm-locks.h | 28 +++++++++++++++-------------
+ xen/arch/x86/mm/p2m-pod.c | 2 +-
+ xen/common/event_channel.c | 5 +++--
+ xen/common/grant_table.c | 6 +++---
+ xen/common/sched/core.c | 19 ++++++++++++-------
+ xen/common/sched/private.h | 26 ++++++++++++++++++++++++--
+ xen/common/timer.c | 8 +++++---
+ xen/drivers/passthrough/pci.c | 5 +++--
+ xen/include/xen/event.h | 4 ++--
+ xen/include/xen/pci.h | 8 ++++++--
+ 12 files changed, 82 insertions(+), 40 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c
+index cb1d81bf9e..66f1095245 100644
+--- a/xen/arch/x86/hvm/vpt.c
++++ b/xen/arch/x86/hvm/vpt.c
+@@ -161,7 +161,7 @@ static int pt_irq_masked(struct periodic_time *pt)
+ * pt->vcpu field, because another thread holding the pt_migrate lock
+ * may already be spinning waiting for your vcpu lock.
+ */
+-static void pt_vcpu_lock(struct vcpu *v)
++static always_inline void pt_vcpu_lock(struct vcpu *v)
+ {
+ spin_lock(&v->arch.hvm.tm_lock);
+ }
+@@ -180,9 +180,13 @@ static void pt_vcpu_unlock(struct vcpu *v)
+ * need to take an additional lock that protects against pt->vcpu
+ * changing.
+ */
+-static void pt_lock(struct periodic_time *pt)
++static always_inline void pt_lock(struct periodic_time *pt)
+ {
+- read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
++ /*
++ * Use the speculation unsafe variant for the first lock, as the following
++ * lock taking helper already includes a speculation barrier.
++ */
++ _read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
+ spin_lock(&pt->vcpu->arch.hvm.tm_lock);
+ }
+
+diff --git a/xen/arch/x86/include/asm/irq.h b/xen/arch/x86/include/asm/irq.h
+index f6a0207a80..823d627fd0 100644
+--- a/xen/arch/x86/include/asm/irq.h
++++ b/xen/arch/x86/include/asm/irq.h
+@@ -178,6 +178,7 @@ void cf_check irq_complete_move(struct irq_desc *);
+
+ extern struct irq_desc *irq_desc;
+
++/* Not speculation safe, only used for AP bringup. */
+ void lock_vector_lock(void);
+ void unlock_vector_lock(void);
+
+diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
+index c1523aeccf..265239c49f 100644
+--- a/xen/arch/x86/mm/mm-locks.h
++++ b/xen/arch/x86/mm/mm-locks.h
+@@ -86,8 +86,8 @@ static inline void _set_lock_level(int l)
+ this_cpu(mm_lock_level) = l;
+ }
+
+-static inline void _mm_lock(const struct domain *d, mm_lock_t *l,
+- const char *func, int level, int rec)
++static always_inline void _mm_lock(const struct domain *d, mm_lock_t *l,
++ const char *func, int level, int rec)
+ {
+ if ( !((mm_locked_by_me(l)) && rec) )
+ _check_lock_level(d, level);
+@@ -137,8 +137,8 @@ static inline int mm_write_locked_by_me(mm_rwlock_t *l)
+ return (l->locker == get_processor_id());
+ }
+
+-static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
+- const char *func, int level)
++static always_inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
++ const char *func, int level)
+ {
+ if ( !mm_write_locked_by_me(l) )
+ {
+@@ -149,6 +149,8 @@ static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
+ l->unlock_level = _get_lock_level();
+ _set_lock_level(_lock_level(d, level));
+ }
++ else
++ block_speculation();
+ l->recurse_count++;
+ }
+
+@@ -162,8 +164,8 @@ static inline void mm_write_unlock(mm_rwlock_t *l)
+ percpu_write_unlock(p2m_percpu_rwlock, &l->lock);
+ }
+
+-static inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l,
+- int level)
++static always_inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l,
++ int level)
+ {
+ _check_lock_level(d, level);
+ percpu_read_lock(p2m_percpu_rwlock, &l->lock);
+@@ -178,15 +180,15 @@ static inline void mm_read_unlock(mm_rwlock_t *l)
+
+ /* This wrapper uses the line number to express the locking order below */
+ #define declare_mm_lock(name) \
+- static inline void mm_lock_##name(const struct domain *d, mm_lock_t *l, \
+- const char *func, int rec) \
++ static always_inline void mm_lock_##name( \
++ const struct domain *d, mm_lock_t *l, const char *func, int rec) \
+ { _mm_lock(d, l, func, MM_LOCK_ORDER_##name, rec); }
+ #define declare_mm_rwlock(name) \
+- static inline void mm_write_lock_##name(const struct domain *d, \
+- mm_rwlock_t *l, const char *func) \
++ static always_inline void mm_write_lock_##name( \
++ const struct domain *d, mm_rwlock_t *l, const char *func) \
+ { _mm_write_lock(d, l, func, MM_LOCK_ORDER_##name); } \
+- static inline void mm_read_lock_##name(const struct domain *d, \
+- mm_rwlock_t *l) \
++ static always_inline void mm_read_lock_##name(const struct domain *d, \
++ mm_rwlock_t *l) \
+ { _mm_read_lock(d, l, MM_LOCK_ORDER_##name); }
+ /* These capture the name of the calling function */
+ #define mm_lock(name, d, l) mm_lock_##name(d, l, __func__, 0)
+@@ -321,7 +323,7 @@ declare_mm_lock(altp2mlist)
+ #define MM_LOCK_ORDER_altp2m 40
+ declare_mm_rwlock(altp2m);
+
+-static inline void p2m_lock(struct p2m_domain *p)
++static always_inline void p2m_lock(struct p2m_domain *p)
+ {
+ if ( p2m_is_altp2m(p) )
+ mm_write_lock(altp2m, p->domain, &p->lock);
+diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
+index fc110506dc..99dbcb3101 100644
+--- a/xen/arch/x86/mm/p2m-pod.c
++++ b/xen/arch/x86/mm/p2m-pod.c
+@@ -36,7 +36,7 @@
+ #define superpage_aligned(_x) (((_x)&(SUPERPAGE_PAGES-1))==0)
+
+ /* Enforce lock ordering when grabbing the "external" page_alloc lock */
+-static inline void lock_page_alloc(struct p2m_domain *p2m)
++static always_inline void lock_page_alloc(struct p2m_domain *p2m)
+ {
+ page_alloc_mm_pre_lock(p2m->domain);
+ spin_lock(&(p2m->domain->page_alloc_lock));
+diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
+index f5e0b12d15..dada9f15f5 100644
+--- a/xen/common/event_channel.c
++++ b/xen/common/event_channel.c
+@@ -62,7 +62,7 @@
+ * just assume the event channel is free or unbound at the moment when the
+ * evtchn_read_trylock() returns false.
+ */
+-static inline void evtchn_write_lock(struct evtchn *evtchn)
++static always_inline void evtchn_write_lock(struct evtchn *evtchn)
+ {
+ write_lock(&evtchn->lock);
+
+@@ -364,7 +364,8 @@ int evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc, evtchn_port_t port)
+ return rc;
+ }
+
+-static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn)
++static always_inline void double_evtchn_lock(struct evtchn *lchn,
++ struct evtchn *rchn)
+ {
+ ASSERT(lchn != rchn);
+
+diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
+index ee7cc496b8..62a8685cd5 100644
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -410,7 +410,7 @@ static inline void act_set_gfn(struct active_grant_entry *act, gfn_t gfn)
+
+ static DEFINE_PERCPU_RWLOCK_GLOBAL(grant_rwlock);
+
+-static inline void grant_read_lock(struct grant_table *gt)
++static always_inline void grant_read_lock(struct grant_table *gt)
+ {
+ percpu_read_lock(grant_rwlock, &gt->lock);
+ }
+@@ -420,7 +420,7 @@ static inline void grant_read_unlock(struct grant_table *gt)
+ percpu_read_unlock(grant_rwlock, &gt->lock);
+ }
+
+-static inline void grant_write_lock(struct grant_table *gt)
++static always_inline void grant_write_lock(struct grant_table *gt)
+ {
+ percpu_write_lock(grant_rwlock, &gt->lock);
+ }
+@@ -457,7 +457,7 @@ nr_active_grant_frames(struct grant_table *gt)
+ return num_act_frames_from_sha_frames(nr_grant_frames(gt));
+ }
+
+-static inline struct active_grant_entry *
++static always_inline struct active_grant_entry *
+ active_entry_acquire(struct grant_table *t, grant_ref_t e)
+ {
+ struct active_grant_entry *act;
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index 078beb1adb..29bbab5ac6 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -348,23 +348,28 @@ uint64_t get_cpu_idle_time(unsigned int cpu)
+ * This avoids dead- or live-locks when this code is running on both
+ * cpus at the same time.
+ */
+-static void sched_spin_lock_double(spinlock_t *lock1, spinlock_t *lock2,
+- unsigned long *flags)
++static always_inline void sched_spin_lock_double(
++ spinlock_t *lock1, spinlock_t *lock2, unsigned long *flags)
+ {
++ /*
++ * In order to avoid extra overhead, use the locking primitives without the
++ * speculation barrier, and introduce a single barrier here.
++ */
+ if ( lock1 == lock2 )
+ {
+- spin_lock_irqsave(lock1, *flags);
++ *flags = _spin_lock_irqsave(lock1);
+ }
+ else if ( lock1 < lock2 )
+ {
+- spin_lock_irqsave(lock1, *flags);
+- spin_lock(lock2);
++ *flags = _spin_lock_irqsave(lock1);
++ _spin_lock(lock2);
+ }
+ else
+ {
+- spin_lock_irqsave(lock2, *flags);
+- spin_lock(lock1);
++ *flags = _spin_lock_irqsave(lock2);
++ _spin_lock(lock1);
+ }
++ block_lock_speculation();
+ }
+
+ static void sched_spin_unlock_double(spinlock_t *lock1, spinlock_t *lock2,
+diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
+index 0527a8c70d..24a93dd0c1 100644
+--- a/xen/common/sched/private.h
++++ b/xen/common/sched/private.h
+@@ -207,8 +207,24 @@ DECLARE_PER_CPU(cpumask_t, cpumask_scratch);
+ #define cpumask_scratch (&this_cpu(cpumask_scratch))
+ #define cpumask_scratch_cpu(c) (&per_cpu(cpumask_scratch, c))
+
++/*
++ * Deal with _spin_lock_irqsave() returning the flags value instead of storing
++ * it in a passed parameter.
++ */
++#define _sched_spinlock0(lock, irq) _spin_lock##irq(lock)
++#define _sched_spinlock1(lock, irq, arg) ({ \
++ BUILD_BUG_ON(sizeof(arg) != sizeof(unsigned long)); \
++ (arg) = _spin_lock##irq(lock); \
++})
++
++#define _sched_spinlock__(nr) _sched_spinlock ## nr
++#define _sched_spinlock_(nr) _sched_spinlock__(nr)
++#define _sched_spinlock(lock, irq, args...) \
++ _sched_spinlock_(count_args(args))(lock, irq, ## args)
++
+ #define sched_lock(kind, param, cpu, irq, arg...) \
+-static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
++static always_inline spinlock_t \
++*kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
+ { \
+ for ( ; ; ) \
+ { \
+@@ -220,10 +236,16 @@ static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
+ * \
+ * It may also be the case that v->processor may change but the \
+ * lock may be the same; this will succeed in that case. \
++ * \
++ * Use the speculation unsafe locking helper, there's a speculation \
++ * barrier before returning to the caller. \
+ */ \
+- spin_lock##irq(lock, ## arg); \
++ _sched_spinlock(lock, irq, ## arg); \
+ if ( likely(lock == get_sched_res(cpu)->schedule_lock) ) \
++ { \
++ block_lock_speculation(); \
+ return lock; \
++ } \
+ spin_unlock##irq(lock, ## arg); \
+ } \
+ }
+diff --git a/xen/common/timer.c b/xen/common/timer.c
+index 9b5016d5ed..459668d417 100644
+--- a/xen/common/timer.c
++++ b/xen/common/timer.c
+@@ -240,7 +240,7 @@ static inline void deactivate_timer(struct timer *timer)
+ list_add(&timer->inactive, &per_cpu(timers, timer->cpu).inactive);
+ }
+
+-static inline bool_t timer_lock(struct timer *timer)
++static inline bool_t timer_lock_unsafe(struct timer *timer)
+ {
+ unsigned int cpu;
+
+@@ -254,7 +254,8 @@ static inline bool_t timer_lock(struct timer *timer)
+ rcu_read_unlock(&timer_cpu_read_lock);
+ return 0;
+ }
+- spin_lock(&per_cpu(timers, cpu).lock);
++ /* Use the speculation unsafe variant, the wrapper has the barrier. */
++ _spin_lock(&per_cpu(timers, cpu).lock);
+ if ( likely(timer->cpu == cpu) )
+ break;
+ spin_unlock(&per_cpu(timers, cpu).lock);
+@@ -267,8 +268,9 @@ static inline bool_t timer_lock(struct timer *timer)
+ #define timer_lock_irqsave(t, flags) ({ \
+ bool_t __x; \
+ local_irq_save(flags); \
+- if ( !(__x = timer_lock(t)) ) \
++ if ( !(__x = timer_lock_unsafe(t)) ) \
+ local_irq_restore(flags); \
++ block_lock_speculation(); \
+ __x; \
+ })
+
+diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
+index 8c62b14d19..1b3d285166 100644
+--- a/xen/drivers/passthrough/pci.c
++++ b/xen/drivers/passthrough/pci.c
+@@ -52,9 +52,10 @@ struct pci_seg {
+
+ static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED;
+
+-void pcidevs_lock(void)
++/* Do not use, as it has no speculation barrier, use pcidevs_lock() instead. */
++void pcidevs_lock_unsafe(void)
+ {
+- spin_lock_recursive(&_pcidevs_lock);
++ _spin_lock_recursive(&_pcidevs_lock);
+ }
+
+ void pcidevs_unlock(void)
+diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
+index 8eae9984a9..dd96e84c69 100644
+--- a/xen/include/xen/event.h
++++ b/xen/include/xen/event.h
+@@ -114,12 +114,12 @@ void notify_via_xen_event_channel(struct domain *ld, int lport);
+ #define bucket_from_port(d, p) \
+ ((group_from_port(d, p))[((p) % EVTCHNS_PER_GROUP) / EVTCHNS_PER_BUCKET])
+
+-static inline void evtchn_read_lock(struct evtchn *evtchn)
++static always_inline void evtchn_read_lock(struct evtchn *evtchn)
+ {
+ read_lock(&evtchn->lock);
+ }
+
+-static inline bool evtchn_read_trylock(struct evtchn *evtchn)
++static always_inline bool evtchn_read_trylock(struct evtchn *evtchn)
+ {
+ return read_trylock(&evtchn->lock);
+ }
+diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
+index 5975ca2f30..b373f139d1 100644
+--- a/xen/include/xen/pci.h
++++ b/xen/include/xen/pci.h
+@@ -155,8 +155,12 @@ struct pci_dev {
+ * devices, it also sync the access to the msi capability that is not
+ * interrupt handling related (the mask bit register).
+ */
+-
+-void pcidevs_lock(void);
++void pcidevs_lock_unsafe(void);
++static always_inline void pcidevs_lock(void)
++{
++ pcidevs_lock_unsafe();
++ block_lock_speculation();
++}
+ void pcidevs_unlock(void);
+ bool_t __must_check pcidevs_locked(void);
+
+--
+2.44.0
+