1 files changed, 263 insertions, 0 deletions
diff --git a/0059-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch b/0059-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch
new file mode 100644
index 0000000..587eef7
--- /dev/null
+++ b/0059-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch
@@ -0,0 +1,263 @@
+From d638c2085f71f694344b34e70eb1b371c86b00f0 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 11 Oct 2022 15:15:14 +0200
+Subject: [PATCH 59/67] xen/sched: carve out memory allocation and freeing from
+ schedule_cpu_rm()
+
+In order to prepare not allocating or freeing memory from
+schedule_cpu_rm(), move this functionality to dedicated functions.
+
+For now call those functions from schedule_cpu_rm().
+
+No change of behavior expected.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: d42be6f83480b3ada286dc18444331a816be88a3
+master date: 2022-09-05 11:42:30 +0100
+---
+ xen/common/sched/core.c    | 143 ++++++++++++++++++++++---------------
+ xen/common/sched/private.h |  11 +++
+ 2 files changed, 98 insertions(+), 56 deletions(-)
+
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index 065a83eca912..2decb1161a63 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -3221,6 +3221,75 @@ out:
+     return ret;
+ }
+ 
++/*
++ * Allocate all memory needed for free_cpu_rm_data(), as allocations cannot
++ * be made in stop_machine() context.
++ *
++ * Between alloc_cpu_rm_data() and the real cpu removal action the relevant
++ * contents of struct sched_resource can't change, as the cpu in question is
++ * locked against any other movement to or from cpupools, and the data copied
++ * by alloc_cpu_rm_data() is modified only in case the cpu in question is
++ * being moved from or to a cpupool.
++ */
++struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu)
++{
++    struct cpu_rm_data *data;
++    const struct sched_resource *sr;
++    unsigned int idx;
++
++    rcu_read_lock(&sched_res_rculock);
++
++    sr = get_sched_res(cpu);
++    data = xmalloc_flex_struct(struct cpu_rm_data, sr, sr->granularity - 1);
++    if ( !data )
++        goto out;
++
++    data->old_ops = sr->scheduler;
++    data->vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
++    data->ppriv_old = sr->sched_priv;
++
++    for ( idx = 0; idx < sr->granularity - 1; idx++ )
++    {
++        data->sr[idx] = sched_alloc_res();
++        if ( data->sr[idx] )
++        {
++            data->sr[idx]->sched_unit_idle = sched_alloc_unit_mem();
++            if ( !data->sr[idx]->sched_unit_idle )
++            {
++                sched_res_free(&data->sr[idx]->rcu);
++                data->sr[idx] = NULL;
++            }
++        }
++        if ( !data->sr[idx] )
++        {
++            while ( idx > 0 )
++                sched_res_free(&data->sr[--idx]->rcu);
++            XFREE(data);
++            goto out;
++        }
++
++        data->sr[idx]->curr = data->sr[idx]->sched_unit_idle;
++        data->sr[idx]->scheduler = &sched_idle_ops;
++        data->sr[idx]->granularity = 1;
++
++        /* We want the lock not to change when replacing the resource. */
++        data->sr[idx]->schedule_lock = sr->schedule_lock;
++    }
++
++ out:
++    rcu_read_unlock(&sched_res_rculock);
++
++    return data;
++}
++
++void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu)
++{
++    sched_free_udata(mem->old_ops, mem->vpriv_old);
++    sched_free_pdata(mem->old_ops, mem->ppriv_old, cpu);
++
++    xfree(mem);
++}
++
+ /*
+  * Remove a pCPU from its cpupool. Its scheduler becomes &sched_idle_ops
+  * (the idle scheduler).
+@@ -3229,53 +3298,23 @@ out:
+  */
+ int schedule_cpu_rm(unsigned int cpu)
+ {
+-    void *ppriv_old, *vpriv_old;
+-    struct sched_resource *sr, **sr_new = NULL;
++    struct sched_resource *sr;
++    struct cpu_rm_data *data;
+     struct sched_unit *unit;
+-    struct scheduler *old_ops;
+     spinlock_t *old_lock;
+     unsigned long flags;
+-    int idx, ret = -ENOMEM;
++    int idx = 0;
+     unsigned int cpu_iter;
+ 
++    data = alloc_cpu_rm_data(cpu);
++    if ( !data )
++        return -ENOMEM;
++
+     rcu_read_lock(&sched_res_rculock);
+ 
+     sr = get_sched_res(cpu);
+-    old_ops = sr->scheduler;
+ 
+-    if ( sr->granularity > 1 )
+-    {
+-        sr_new = xmalloc_array(struct sched_resource *, sr->granularity - 1);
+-        if ( !sr_new )
+-            goto out;
+-        for ( idx = 0; idx < sr->granularity - 1; idx++ )
+-        {
+-            sr_new[idx] = sched_alloc_res();
+-            if ( sr_new[idx] )
+-            {
+-                sr_new[idx]->sched_unit_idle = sched_alloc_unit_mem();
+-                if ( !sr_new[idx]->sched_unit_idle )
+-                {
+-                    sched_res_free(&sr_new[idx]->rcu);
+-                    sr_new[idx] = NULL;
+-                }
+-            }
+-            if ( !sr_new[idx] )
+-            {
+-                for ( idx--; idx >= 0; idx-- )
+-                    sched_res_free(&sr_new[idx]->rcu);
+-                goto out;
+-            }
+-            sr_new[idx]->curr = sr_new[idx]->sched_unit_idle;
+-            sr_new[idx]->scheduler = &sched_idle_ops;
+-            sr_new[idx]->granularity = 1;
+-
+-            /* We want the lock not to change when replacing the resource. */
+-            sr_new[idx]->schedule_lock = sr->schedule_lock;
+-        }
+-    }
+-
+-    ret = 0;
++    ASSERT(sr->granularity);
+     ASSERT(sr->cpupool != NULL);
+     ASSERT(cpumask_test_cpu(cpu, &cpupool_free_cpus));
+     ASSERT(!cpumask_test_cpu(cpu, sr->cpupool->cpu_valid));
+@@ -3283,10 +3322,6 @@ int schedule_cpu_rm(unsigned int cpu)
+     /* See comment in schedule_cpu_add() regarding lock switching. */
+     old_lock = pcpu_schedule_lock_irqsave(cpu, &flags);
+ 
+-    vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
+-    ppriv_old = sr->sched_priv;
+-
+-    idx = 0;
+     for_each_cpu ( cpu_iter, sr->cpus )
+     {
+         per_cpu(sched_res_idx, cpu_iter) = 0;
+@@ -3300,27 +3335,27 @@ int schedule_cpu_rm(unsigned int cpu)
+         else
+         {
+             /* Initialize unit. */
+-            unit = sr_new[idx]->sched_unit_idle;
+-            unit->res = sr_new[idx];
++            unit = data->sr[idx]->sched_unit_idle;
++            unit->res = data->sr[idx];
+             unit->is_running = true;
+             sched_unit_add_vcpu(unit, idle_vcpu[cpu_iter]);
+             sched_domain_insert_unit(unit, idle_vcpu[cpu_iter]->domain);
+ 
+             /* Adjust cpu masks of resources (old and new). */
+             cpumask_clear_cpu(cpu_iter, sr->cpus);
+-            cpumask_set_cpu(cpu_iter, sr_new[idx]->cpus);
++            cpumask_set_cpu(cpu_iter, data->sr[idx]->cpus);
+             cpumask_set_cpu(cpu_iter, &sched_res_mask);
+ 
+             /* Init timer. */
+-            init_timer(&sr_new[idx]->s_timer, s_timer_fn, NULL, cpu_iter);
++            init_timer(&data->sr[idx]->s_timer, s_timer_fn, NULL, cpu_iter);
+ 
+             /* Last resource initializations and insert resource pointer. */
+-            sr_new[idx]->master_cpu = cpu_iter;
+-            set_sched_res(cpu_iter, sr_new[idx]);
++            data->sr[idx]->master_cpu = cpu_iter;
++            set_sched_res(cpu_iter, data->sr[idx]);
+ 
+             /* Last action: set the new lock pointer. */
+             smp_mb();
+-            sr_new[idx]->schedule_lock = &sched_free_cpu_lock;
++            data->sr[idx]->schedule_lock = &sched_free_cpu_lock;
+ 
+             idx++;
+         }
+@@ -3336,16 +3371,12 @@ int schedule_cpu_rm(unsigned int cpu)
+     /* _Not_ pcpu_schedule_unlock(): schedule_lock may have changed! */
+     spin_unlock_irqrestore(old_lock, flags);
+ 
+-    sched_deinit_pdata(old_ops, ppriv_old, cpu);
++    sched_deinit_pdata(data->old_ops, data->ppriv_old, cpu);
+ 
+-    sched_free_udata(old_ops, vpriv_old);
+-    sched_free_pdata(old_ops, ppriv_old, cpu);
+-
+-out:
+     rcu_read_unlock(&sched_res_rculock);
+-    xfree(sr_new);
++    free_cpu_rm_data(data, cpu);
+ 
+-    return ret;
++    return 0;
+ }
+ 
+ struct scheduler *scheduler_get_default(void)
+diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
+index 6e036f8c8077..ff3185425219 100644
+--- a/xen/common/sched/private.h
++++ b/xen/common/sched/private.h
+@@ -600,6 +600,15 @@ struct affinity_masks {
+ 
+ bool alloc_affinity_masks(struct affinity_masks *affinity);
+ void free_affinity_masks(struct affinity_masks *affinity);
++
++/* Memory allocation related data for schedule_cpu_rm(). */
++struct cpu_rm_data {
++    const struct scheduler *old_ops;
++    void *ppriv_old;
++    void *vpriv_old;
++    struct sched_resource *sr[];
++};
++
+ void sched_rm_cpu(unsigned int cpu);
+ const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu);
+ void schedule_dump(struct cpupool *c);
+@@ -608,6 +617,8 @@ struct scheduler *scheduler_alloc(unsigned int sched_id);
+ void scheduler_free(struct scheduler *sched);
+ int cpu_disable_scheduler(unsigned int cpu);
+ int schedule_cpu_add(unsigned int cpu, struct cpupool *c);
++struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu);
++void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu);
+ int schedule_cpu_rm(unsigned int cpu);
+ int sched_move_domain(struct domain *d, struct cpupool *c);
+ struct cpupool *cpupool_get_by_id(unsigned int poolid);
+-- 
+2.37.3
+