aboutsummaryrefslogtreecommitdiff
path: root/numa.c
diff options
context:
space:
mode:
authorLaurent Vivier <lvivier@redhat.com>2017-05-02 18:29:55 +0200
committerEduardo Habkost <ehabkost@redhat.com>2017-05-11 16:08:47 -0300
commit3bfe57165b4bf86a431099078df422f54598f5c6 (patch)
tree268732cbb3c47f666250ab80d2bc2b7aa300d995 /numa.c
parent0f203430dd88cc6270310956ace58aca639edb59 (diff)
numa: equally distribute memory on nodes
When there are more nodes than available memory to put the minimum allowed memory by node, all the memory is put on the last node. This is because we put (ram_size / nb_numa_nodes) & ~((1 << mc->numa_mem_align_shift) - 1); on each node, and in this case the value is 0. This is particularly true with pseries, as the memory must be aligned to 256MB. To avoid this problem, this patch uses an error diffusion algorithm [1] to distribute equally the memory on nodes. We introduce numa_auto_assign_ram() function in MachineClass to keep compatibility between machine type versions. The legacy function is used with pseries-2.9, pc-q35-2.9 and pc-i440fx-2.9 (and previous), the new one with all others. Example: qemu-system-ppc64 -S -nographic -nodefaults -monitor stdio -m 1G -smp 8 \ -numa node -numa node -numa node \ -numa node -numa node -numa node Before: (qemu) info numa 6 nodes node 0 cpus: 0 6 node 0 size: 0 MB node 1 cpus: 1 7 node 1 size: 0 MB node 2 cpus: 2 node 2 size: 0 MB node 3 cpus: 3 node 3 size: 0 MB node 4 cpus: 4 node 4 size: 0 MB node 5 cpus: 5 node 5 size: 1024 MB After: (qemu) info numa 6 nodes node 0 cpus: 0 6 node 0 size: 0 MB node 1 cpus: 1 7 node 1 size: 256 MB node 2 cpus: 2 node 2 size: 0 MB node 3 cpus: 3 node 3 size: 256 MB node 4 cpus: 4 node 4 size: 256 MB node 5 cpus: 5 node 5 size: 256 MB [1] https://en.wikipedia.org/wiki/Error_diffusion Signed-off-by: Laurent Vivier <lvivier@redhat.com> Message-Id: <20170502162955.1610-2-lvivier@redhat.com> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> [ehabkost: s/ram_size/size/ at numa_default_auto_assign_ram()] Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Diffstat (limited to 'numa.c')
-rw-r--r--numa.c49
1 files changed, 38 insertions, 11 deletions
diff --git a/numa.c b/numa.c
index 2b3fc69915..d753687dec 100644
--- a/numa.c
+++ b/numa.c
@@ -407,6 +407,42 @@ static void complete_init_numa_distance(void)
}
}
+void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
+ int nb_nodes, ram_addr_t size)
+{
+ int i;
+ uint64_t usedmem = 0;
+
+ /* Align each node according to the alignment
+ * requirements of the machine class
+ */
+
+ for (i = 0; i < nb_nodes - 1; i++) {
+ nodes[i].node_mem = (size / nb_nodes) &
+ ~((1 << mc->numa_mem_align_shift) - 1);
+ usedmem += nodes[i].node_mem;
+ }
+ nodes[i].node_mem = size - usedmem;
+}
+
+void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
+ int nb_nodes, ram_addr_t size)
+{
+ int i;
+ uint64_t usedmem = 0, node_mem;
+ uint64_t granularity = size / nb_nodes;
+ uint64_t propagate = 0;
+
+ for (i = 0; i < nb_nodes - 1; i++) {
+ node_mem = (granularity + propagate) &
+ ~((1 << mc->numa_mem_align_shift) - 1);
+ propagate = granularity + propagate - node_mem;
+ nodes[i].node_mem = node_mem;
+ usedmem += node_mem;
+ }
+ nodes[i].node_mem = size - usedmem;
+}
+
void parse_numa_opts(MachineClass *mc)
{
int i;
@@ -449,17 +485,8 @@ void parse_numa_opts(MachineClass *mc)
}
}
if (i == nb_numa_nodes) {
- uint64_t usedmem = 0;
-
- /* Align each node according to the alignment
- * requirements of the machine class
- */
- for (i = 0; i < nb_numa_nodes - 1; i++) {
- numa_info[i].node_mem = (ram_size / nb_numa_nodes) &
- ~((1 << mc->numa_mem_align_shift) - 1);
- usedmem += numa_info[i].node_mem;
- }
- numa_info[i].node_mem = ram_size - usedmem;
+ assert(mc->numa_auto_assign_ram);
+ mc->numa_auto_assign_ram(mc, numa_info, nb_numa_nodes, ram_size);
}
numa_total = 0;