Commit 11c2fd3e01835fe8562a7dae4842a645c9fe8bb5

Authored by aliguori
1 parent 030ea37b

sending NUMA topology to BIOS (Andre Przywara)

uses the QEMU firmware configuration interfacce to send the NUMA
topology to the BIOS, which has to setup the tables. Only one firmware
configuration channel is used.

Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@7212 c046a42c-6fe2-441c-8c8c-71466251a162
hw/fw_cfg.h
... ... @@ -14,6 +14,7 @@
14 14 #define FW_CFG_INITRD_ADDR 0x0a
15 15 #define FW_CFG_INITRD_SIZE 0x0b
16 16 #define FW_CFG_BOOT_DEVICE 0x0c
  17 +#define FW_CFG_NUMA 0x0d
17 18 #define FW_CFG_MAX_ENTRY 0x10
18 19  
19 20 #define FW_CFG_WRITE_CHANNEL 0x4000
... ...
... ... @@ -424,11 +424,15 @@ static void bochs_bios_write(void *opaque, uint32_t addr, uint32_t val)
424 424 }
425 425 }
426 426  
  427 +extern uint64_t node_cpumask[MAX_NODES];
  428 +
427 429 static void bochs_bios_init(void)
428 430 {
429 431 void *fw_cfg;
430 432 uint8_t *smbios_table;
431 433 size_t smbios_len;
  434 + uint64_t *numa_fw_cfg;
  435 + int i, j;
432 436  
433 437 register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL);
434 438 register_ioport_write(0x401, 1, 2, bochs_bios_write, NULL);
... ... @@ -451,6 +455,26 @@ static void bochs_bios_init(void)
451 455 if (smbios_table)
452 456 fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES,
453 457 smbios_table, smbios_len);
  458 +
  459 + /* allocate memory for the NUMA channel: one (64bit) word for the number
  460 + * of nodes, one word for each VCPU->node and one word for each node to
  461 + * hold the amount of memory.
  462 + */
  463 + numa_fw_cfg = qemu_mallocz((1 + smp_cpus + nb_numa_nodes) * 8);
  464 + numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
  465 + for (i = 0; i < smp_cpus; i++) {
  466 + for (j = 0; j < nb_numa_nodes; j++) {
  467 + if (node_cpumask[j] & (1 << i)) {
  468 + numa_fw_cfg[i + 1] = cpu_to_le64(j);
  469 + break;
  470 + }
  471 + }
  472 + }
  473 + for (i = 0; i < nb_numa_nodes; i++) {
  474 + numa_fw_cfg[smp_cpus + 1 + i] = cpu_to_le64(node_mem[i]);
  475 + }
  476 + fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg,
  477 + (1 + smp_cpus + nb_numa_nodes) * 8);
454 478 }
455 479  
456 480 /* Generate an initial boot sector which sets state and jump to
... ...
pc-bios/bios-pq/0014_add-srat-acpi-table-support.patch 0 → 100644
  1 +add SRAT ACPI table support (Andre Przywara)
  2 +
  3 +Take NUMA topology info from the QEMU firmware configuration interface
  4 +(number of nodes, node for each (V)CPU and amount of memory) and build
  5 +a SRAT table describing this topology for the guest OS. Handles more than
  6 +4 GB of RAM by including a hole for 32bit PCI memory mapping.
  7 +
  8 +Signed-off-by: Andre Przywara <andre.przywara@amd.com>
  9 +Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
  10 +
  11 +diff --git a/bios/rombios32.c b/bios/rombios32.c
  12 +index 49dfd62..d8f6d4e 100644
  13 +--- a/bios/rombios32.c
  14 ++++ b/bios/rombios32.c
  15 +@@ -450,6 +450,11 @@ int pm_sci_int;
  16 + unsigned long bios_table_cur_addr;
  17 + unsigned long bios_table_end_addr;
  18 +
  19 ++static inline uint64_t le64_to_cpu(uint64_t x)
  20 ++{
  21 ++ return x;
  22 ++}
  23 ++
  24 + void wrmsr_smp(uint32_t index, uint64_t val)
  25 + {
  26 + static struct { uint32_t ecx, eax, edx; } *p = (void *)SMP_MSR_ADDR;
  27 +@@ -468,6 +473,7 @@ void wrmsr_smp(uint32_t index, uint64_t val)
  28 + #define QEMU_CFG_SIGNATURE 0x00
  29 + #define QEMU_CFG_ID 0x01
  30 + #define QEMU_CFG_UUID 0x02
  31 ++#define QEMU_CFG_NUMA 0x0D
  32 + #define QEMU_CFG_ARCH_LOCAL 0x8000
  33 + #define QEMU_CFG_ACPI_TABLES (QEMU_CFG_ARCH_LOCAL + 0)
  34 + #define QEMU_CFG_SMBIOS_ENTRIES (QEMU_CFG_ARCH_LOCAL + 1)
  35 +@@ -529,6 +535,14 @@ static uint16_t smbios_entries(void)
  36 +
  37 + return cnt;
  38 + }
  39 ++
  40 ++uint64_t qemu_cfg_get64 (void)
  41 ++{
  42 ++ uint64_t ret;
  43 ++
  44 ++ qemu_cfg_read((uint8_t*)&ret, 8);
  45 ++ return le64_to_cpu(ret);
  46 ++}
  47 + #endif
  48 +
  49 + void cpu_probe(void)
  50 +@@ -1281,7 +1295,7 @@ struct rsdt_descriptor_rev1
  51 + {
  52 + ACPI_TABLE_HEADER_DEF /* ACPI common table header */
  53 + #ifdef BX_QEMU
  54 +- uint32_t table_offset_entry [4]; /* Array of pointers to other */
  55 ++ uint32_t table_offset_entry [5]; /* Array of pointers to other */
  56 + #else
  57 + uint32_t table_offset_entry [3]; /* Array of pointers to other */
  58 + #endif
  59 +@@ -1389,7 +1403,7 @@ struct multiple_apic_table
  60 + } __attribute__((__packed__));
  61 +
  62 +
  63 +-/* Values for Type in APIC_HEADER_DEF */
  64 ++/* Values for Type in APIC sub-headers */
  65 +
  66 + #define APIC_PROCESSOR 0
  67 + #define APIC_IO 1
  68 +@@ -1402,18 +1416,18 @@ struct multiple_apic_table
  69 + #define APIC_XRUPT_SOURCE 8
  70 + #define APIC_RESERVED 9 /* 9 and greater are reserved */
  71 +
  72 +-/*
  73 +- * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
  74 +- */
  75 +-#define APIC_HEADER_DEF /* Common APIC sub-structure header */\
  76 ++#define ACPI_SUB_HEADER_DEF /* Common ACPI sub-structure header */\
  77 + uint8_t type; \
  78 + uint8_t length;
  79 +
  80 ++/*
  81 ++ * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
  82 ++ */
  83 + /* Sub-structures for MADT */
  84 +
  85 + struct madt_processor_apic
  86 + {
  87 +- APIC_HEADER_DEF
  88 ++ ACPI_SUB_HEADER_DEF
  89 + uint8_t processor_id; /* ACPI processor id */
  90 + uint8_t local_apic_id; /* Processor's local APIC id */
  91 + #if 0
  92 +@@ -1424,6 +1438,43 @@ struct madt_processor_apic
  93 + #endif
  94 + } __attribute__((__packed__));
  95 +
  96 ++/*
  97 ++ * SRAT (NUMA topology description) table
  98 ++ */
  99 ++
  100 ++#define SRAT_PROCESSOR 0
  101 ++#define SRAT_MEMORY 1
  102 ++
  103 ++struct system_resource_affinity_table
  104 ++{
  105 ++ ACPI_TABLE_HEADER_DEF
  106 ++ uint32_t reserved1;
  107 ++ uint32_t reserved2[2];
  108 ++};
  109 ++
  110 ++struct srat_processor_affinity
  111 ++{
  112 ++ ACPI_SUB_HEADER_DEF
  113 ++ uint8_t proximity_lo;
  114 ++ uint8_t local_apic_id;
  115 ++ uint32_t flags;
  116 ++ uint8_t local_sapic_eid;
  117 ++ uint8_t proximity_hi[3];
  118 ++ uint32_t reserved;
  119 ++};
  120 ++
  121 ++struct srat_memory_affinity
  122 ++{
  123 ++ ACPI_SUB_HEADER_DEF
  124 ++ uint8_t proximity[4];
  125 ++ uint16_t reserved1;
  126 ++ uint32_t base_addr_low,base_addr_high;
  127 ++ uint32_t length_low,length_high;
  128 ++ uint32_t reserved2;
  129 ++ uint32_t flags;
  130 ++ uint32_t reserved3[2];
  131 ++};
  132 ++
  133 + #ifdef BX_QEMU
  134 + /*
  135 + * * ACPI 2.0 Generic Address Space definition.
  136 +@@ -1452,7 +1503,7 @@ struct acpi_20_hpet {
  137 +
  138 + struct madt_io_apic
  139 + {
  140 +- APIC_HEADER_DEF
  141 ++ ACPI_SUB_HEADER_DEF
  142 + uint8_t io_apic_id; /* I/O APIC ID */
  143 + uint8_t reserved; /* Reserved - must be zero */
  144 + uint32_t address; /* APIC physical address */
  145 +@@ -1463,7 +1514,7 @@ struct madt_io_apic
  146 + #ifdef BX_QEMU
  147 + struct madt_int_override
  148 + {
  149 +- APIC_HEADER_DEF
  150 ++ ACPI_SUB_HEADER_DEF
  151 + uint8_t bus; /* Identifies ISA Bus */
  152 + uint8_t source; /* Bus-relative interrupt source */
  153 + uint32_t gsi; /* GSI that source will signal */
  154 +@@ -1567,6 +1618,21 @@ int acpi_build_processor_ssdt(uint8_t *ssdt)
  155 + return ssdt_ptr - ssdt;
  156 + }
  157 +
  158 ++static void acpi_build_srat_memory(struct srat_memory_affinity *numamem,
  159 ++ uint64_t base, uint64_t len, int node, int enabled)
  160 ++{
  161 ++ numamem->type = SRAT_MEMORY;
  162 ++ numamem->length = sizeof(*numamem);
  163 ++ memset (numamem->proximity, 0 ,4);
  164 ++ numamem->proximity[0] = node;
  165 ++ numamem->flags = cpu_to_le32(!!enabled);
  166 ++ numamem->base_addr_low = base & 0xFFFFFFFF;
  167 ++ numamem->base_addr_high = base >> 32;
  168 ++ numamem->length_low = len & 0xFFFFFFFF;
  169 ++ numamem->length_high = len >> 32;
  170 ++ return;
  171 ++}
  172 ++
  173 + /* base_addr must be a multiple of 4KB */
  174 + void acpi_bios_init(void)
  175 + {
  176 +@@ -1577,12 +1643,15 @@ void acpi_bios_init(void)
  177 + struct multiple_apic_table *madt;
  178 + uint8_t *dsdt, *ssdt;
  179 + #ifdef BX_QEMU
  180 ++ struct system_resource_affinity_table *srat;
  181 + struct acpi_20_hpet *hpet;
  182 + uint32_t hpet_addr;
  183 + #endif
  184 + uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr, ssdt_addr;
  185 + uint32_t acpi_tables_size, madt_addr, madt_size, rsdt_size;
  186 ++ uint32_t srat_addr,srat_size;
  187 + uint16_t i, external_tables;
  188 ++ int nb_numa_nodes;
  189 +
  190 + /* reserve memory space for tables */
  191 + #ifdef BX_USE_EBDA_TABLES
  192 +@@ -1624,6 +1693,25 @@ void acpi_bios_init(void)
  193 + ssdt_addr = addr;
  194 + ssdt = (void *)(addr);
  195 + addr += acpi_build_processor_ssdt(ssdt);
  196 ++#ifdef BX_QEMU
  197 ++ qemu_cfg_select(QEMU_CFG_NUMA);
  198 ++ nb_numa_nodes = qemu_cfg_get64();
  199 ++#else
  200 ++ nb_numa_nodes = 0;
  201 ++#endif
  202 ++ if (nb_numa_nodes > 0) {
  203 ++ addr = (addr + 7) & ~7;
  204 ++ srat_addr = addr;
  205 ++ srat_size = sizeof(*srat) +
  206 ++ sizeof(struct srat_processor_affinity) * smp_cpus +
  207 ++ sizeof(struct srat_memory_affinity) * (nb_numa_nodes + 2);
  208 ++ srat = (void *)(addr);
  209 ++ addr += srat_size;
  210 ++ } else {
  211 ++ srat_addr = addr;
  212 ++ srat = (void*)(addr);
  213 ++ srat_size = 0;
  214 ++ }
  215 +
  216 + addr = (addr + 7) & ~7;
  217 + madt_addr = addr;
  218 +@@ -1733,6 +1821,69 @@ void acpi_bios_init(void)
  219 +
  220 + memset(rsdt, 0, rsdt_size);
  221 + #ifdef BX_QEMU
  222 ++ /* SRAT */
  223 ++ if (nb_numa_nodes > 0) {
  224 ++ struct srat_processor_affinity *core;
  225 ++ struct srat_memory_affinity *numamem;
  226 ++ int slots;
  227 ++ uint64_t mem_len, mem_base, next_base = 0, curnode;
  228 ++
  229 ++ qemu_cfg_select(QEMU_CFG_NUMA);
  230 ++ qemu_cfg_get64();
  231 ++ memset (srat, 0 , srat_size);
  232 ++ srat->reserved1=1;
  233 ++
  234 ++ core = (void*)(srat + 1);
  235 ++ for (i = 0; i < smp_cpus; ++i) {
  236 ++ core->type = SRAT_PROCESSOR;
  237 ++ core->length = sizeof(*core);
  238 ++ core->local_apic_id = i;
  239 ++ curnode = qemu_cfg_get64();
  240 ++ core->proximity_lo = curnode;
  241 ++ memset (core->proximity_hi, 0, 3);
  242 ++ core->local_sapic_eid = 0;
  243 ++ if (i < smp_cpus)
  244 ++ core->flags = cpu_to_le32(1);
  245 ++ else
  246 ++ core->flags = 0;
  247 ++ core++;
  248 ++ }
  249 ++
  250 ++ /* the memory map is a bit tricky, it contains at least one hole
  251 ++ * from 640k-1M and possibly another one from 3.5G-4G.
  252 ++ */
  253 ++ numamem = (void*)core; slots = 0;
  254 ++ acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1);
  255 ++ next_base = 1024 * 1024; numamem++;slots++;
  256 ++ for (i = 1; i < nb_numa_nodes + 1; ++i) {
  257 ++ mem_base = next_base;
  258 ++ mem_len = qemu_cfg_get64();
  259 ++ if (i == 1) mem_len -= 1024 * 1024;
  260 ++ next_base = mem_base + mem_len;
  261 ++
  262 ++ /* Cut out the PCI hole */
  263 ++ if (mem_base <= ram_size && next_base > ram_size) {
  264 ++ mem_len -= next_base - ram_size;
  265 ++ if (mem_len > 0) {
  266 ++ acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
  267 ++ numamem++; slots++;
  268 ++ }
  269 ++ mem_base = 1ULL << 32;
  270 ++ mem_len = next_base - ram_size;
  271 ++ next_base += (1ULL << 32) - ram_size;
  272 ++ }
  273 ++ acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
  274 ++ numamem++; slots++;
  275 ++ }
  276 ++ for (; slots < nb_numa_nodes + 2; slots++) {
  277 ++ acpi_build_srat_memory(numamem, 0, 0, 0, 0);
  278 ++ numamem++;
  279 ++ }
  280 ++
  281 ++ acpi_build_table_header((struct acpi_table_header *)srat,
  282 ++ "SRAT", srat_size, 1);
  283 ++ }
  284 ++
  285 + /* HPET */
  286 + memset(hpet, 0, sizeof(*hpet));
  287 + /* Note timer_block_id value must be kept in sync with value advertised by
  288 +@@ -1761,9 +1912,11 @@ void acpi_bios_init(void)
  289 + rsdt->table_offset_entry[2] = cpu_to_le32(ssdt_addr);
  290 + #ifdef BX_QEMU
  291 + rsdt->table_offset_entry[3] = cpu_to_le32(hpet_addr);
  292 ++ if (nb_numa_nodes > 0)
  293 ++ rsdt->table_offset_entry[4] = cpu_to_le32(srat_addr);
  294 + #endif
  295 +- acpi_build_table_header((struct acpi_table_header *)rsdt,
  296 +- "RSDT", rsdt_size, 1);
  297 ++ acpi_build_table_header((struct acpi_table_header *)rsdt, "RSDT",
  298 ++ rsdt_size - (nb_numa_nodes > 0? 0: sizeof(uint32_t)), 1);
  299 +
  300 + acpi_tables_size = addr - base_addr;
  301 +
  302 +--
  303 +1.6.1.3
  304 +
  305 +
... ...
pc-bios/bios-pq/series
... ... @@ -11,3 +11,4 @@
11 11 0011_read-additional-acpi-tables-from-a-vm.patch
12 12 0012-load-smbios-entries-and-files-from-qemu.patch
13 13 0013_fix-non-acpi-timer-interrupt-routing.patch
  14 +0014_add-srat-acpi-table-support.patch
... ...
pc-bios/bios.bin
No preview for this file type