0014_add-srat-acpi-table-support.patch
9.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
add SRAT ACPI table support (Andre Przywara)
Take NUMA topology info from the QEMU firmware configuration interface
(number of nodes, node for each (V)CPU and amount of memory) and build
a SRAT table describing this topology for the guest OS. Handles more than
4 GB of RAM by including a hole for 32bit PCI memory mapping.
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
diff --git a/bios/rombios32.c b/bios/rombios32.c
index 49dfd62..d8f6d4e 100644
--- a/bios/rombios32.c
+++ b/bios/rombios32.c
@@ -450,6 +450,11 @@ int pm_sci_int;
unsigned long bios_table_cur_addr;
unsigned long bios_table_end_addr;
+static inline uint64_t le64_to_cpu(uint64_t x)
+{
+ return x;
+}
+
void wrmsr_smp(uint32_t index, uint64_t val)
{
static struct { uint32_t ecx, eax, edx; } *p = (void *)SMP_MSR_ADDR;
@@ -468,6 +473,7 @@ void wrmsr_smp(uint32_t index, uint64_t val)
#define QEMU_CFG_SIGNATURE 0x00
#define QEMU_CFG_ID 0x01
#define QEMU_CFG_UUID 0x02
+#define QEMU_CFG_NUMA 0x0D
#define QEMU_CFG_ARCH_LOCAL 0x8000
#define QEMU_CFG_ACPI_TABLES (QEMU_CFG_ARCH_LOCAL + 0)
#define QEMU_CFG_SMBIOS_ENTRIES (QEMU_CFG_ARCH_LOCAL + 1)
@@ -529,6 +535,14 @@ static uint16_t smbios_entries(void)
return cnt;
}
+
+uint64_t qemu_cfg_get64 (void)
+{
+ uint64_t ret;
+
+ qemu_cfg_read((uint8_t*)&ret, 8);
+ return le64_to_cpu(ret);
+}
#endif
void cpu_probe(void)
@@ -1281,7 +1295,7 @@ struct rsdt_descriptor_rev1
{
ACPI_TABLE_HEADER_DEF /* ACPI common table header */
#ifdef BX_QEMU
- uint32_t table_offset_entry [4]; /* Array of pointers to other */
+ uint32_t table_offset_entry [5]; /* Array of pointers to other */
#else
uint32_t table_offset_entry [3]; /* Array of pointers to other */
#endif
@@ -1389,7 +1403,7 @@ struct multiple_apic_table
} __attribute__((__packed__));
-/* Values for Type in APIC_HEADER_DEF */
+/* Values for Type in APIC sub-headers */
#define APIC_PROCESSOR 0
#define APIC_IO 1
@@ -1402,18 +1416,18 @@ struct multiple_apic_table
#define APIC_XRUPT_SOURCE 8
#define APIC_RESERVED 9 /* 9 and greater are reserved */
-/*
- * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
- */
-#define APIC_HEADER_DEF /* Common APIC sub-structure header */\
+#define ACPI_SUB_HEADER_DEF /* Common ACPI sub-structure header */\
uint8_t type; \
uint8_t length;
+/*
+ * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
+ */
/* Sub-structures for MADT */
struct madt_processor_apic
{
- APIC_HEADER_DEF
+ ACPI_SUB_HEADER_DEF
uint8_t processor_id; /* ACPI processor id */
uint8_t local_apic_id; /* Processor's local APIC id */
#if 0
@@ -1424,6 +1438,43 @@ struct madt_processor_apic
#endif
} __attribute__((__packed__));
+/*
+ * SRAT (NUMA topology description) table
+ */
+
+#define SRAT_PROCESSOR 0
+#define SRAT_MEMORY 1
+
+struct system_resource_affinity_table
+{
+ ACPI_TABLE_HEADER_DEF
+ uint32_t reserved1;
+ uint32_t reserved2[2];
+};
+
+struct srat_processor_affinity
+{
+ ACPI_SUB_HEADER_DEF
+ uint8_t proximity_lo;
+ uint8_t local_apic_id;
+ uint32_t flags;
+ uint8_t local_sapic_eid;
+ uint8_t proximity_hi[3];
+ uint32_t reserved;
+};
+
+struct srat_memory_affinity
+{
+ ACPI_SUB_HEADER_DEF
+ uint8_t proximity[4];
+ uint16_t reserved1;
+ uint32_t base_addr_low,base_addr_high;
+ uint32_t length_low,length_high;
+ uint32_t reserved2;
+ uint32_t flags;
+ uint32_t reserved3[2];
+};
+
#ifdef BX_QEMU
/*
* * ACPI 2.0 Generic Address Space definition.
@@ -1452,7 +1503,7 @@ struct acpi_20_hpet {
struct madt_io_apic
{
- APIC_HEADER_DEF
+ ACPI_SUB_HEADER_DEF
uint8_t io_apic_id; /* I/O APIC ID */
uint8_t reserved; /* Reserved - must be zero */
uint32_t address; /* APIC physical address */
@@ -1463,7 +1514,7 @@ struct madt_io_apic
#ifdef BX_QEMU
struct madt_int_override
{
- APIC_HEADER_DEF
+ ACPI_SUB_HEADER_DEF
uint8_t bus; /* Identifies ISA Bus */
uint8_t source; /* Bus-relative interrupt source */
uint32_t gsi; /* GSI that source will signal */
@@ -1567,6 +1618,21 @@ int acpi_build_processor_ssdt(uint8_t *ssdt)
return ssdt_ptr - ssdt;
}
+static void acpi_build_srat_memory(struct srat_memory_affinity *numamem,
+ uint64_t base, uint64_t len, int node, int enabled)
+{
+ numamem->type = SRAT_MEMORY;
+ numamem->length = sizeof(*numamem);
+ memset (numamem->proximity, 0 ,4);
+ numamem->proximity[0] = node;
+ numamem->flags = cpu_to_le32(!!enabled);
+ numamem->base_addr_low = base & 0xFFFFFFFF;
+ numamem->base_addr_high = base >> 32;
+ numamem->length_low = len & 0xFFFFFFFF;
+ numamem->length_high = len >> 32;
+ return;
+}
+
/* base_addr must be a multiple of 4KB */
void acpi_bios_init(void)
{
@@ -1577,12 +1643,15 @@ void acpi_bios_init(void)
struct multiple_apic_table *madt;
uint8_t *dsdt, *ssdt;
#ifdef BX_QEMU
+ struct system_resource_affinity_table *srat;
struct acpi_20_hpet *hpet;
uint32_t hpet_addr;
#endif
uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr, ssdt_addr;
uint32_t acpi_tables_size, madt_addr, madt_size, rsdt_size;
+ uint32_t srat_addr,srat_size;
uint16_t i, external_tables;
+ int nb_numa_nodes;
/* reserve memory space for tables */
#ifdef BX_USE_EBDA_TABLES
@@ -1624,6 +1693,25 @@ void acpi_bios_init(void)
ssdt_addr = addr;
ssdt = (void *)(addr);
addr += acpi_build_processor_ssdt(ssdt);
+#ifdef BX_QEMU
+ qemu_cfg_select(QEMU_CFG_NUMA);
+ nb_numa_nodes = qemu_cfg_get64();
+#else
+ nb_numa_nodes = 0;
+#endif
+ if (nb_numa_nodes > 0) {
+ addr = (addr + 7) & ~7;
+ srat_addr = addr;
+ srat_size = sizeof(*srat) +
+ sizeof(struct srat_processor_affinity) * smp_cpus +
+ sizeof(struct srat_memory_affinity) * (nb_numa_nodes + 2);
+ srat = (void *)(addr);
+ addr += srat_size;
+ } else {
+ srat_addr = addr;
+ srat = (void*)(addr);
+ srat_size = 0;
+ }
addr = (addr + 7) & ~7;
madt_addr = addr;
@@ -1733,6 +1821,69 @@ void acpi_bios_init(void)
memset(rsdt, 0, rsdt_size);
#ifdef BX_QEMU
+ /* SRAT */
+ if (nb_numa_nodes > 0) {
+ struct srat_processor_affinity *core;
+ struct srat_memory_affinity *numamem;
+ int slots;
+ uint64_t mem_len, mem_base, next_base = 0, curnode;
+
+ qemu_cfg_select(QEMU_CFG_NUMA);
+ qemu_cfg_get64();
+ memset (srat, 0 , srat_size);
+ srat->reserved1=1;
+
+ core = (void*)(srat + 1);
+ for (i = 0; i < smp_cpus; ++i) {
+ core->type = SRAT_PROCESSOR;
+ core->length = sizeof(*core);
+ core->local_apic_id = i;
+ curnode = qemu_cfg_get64();
+ core->proximity_lo = curnode;
+ memset (core->proximity_hi, 0, 3);
+ core->local_sapic_eid = 0;
+ if (i < smp_cpus)
+ core->flags = cpu_to_le32(1);
+ else
+ core->flags = 0;
+ core++;
+ }
+
+ /* the memory map is a bit tricky, it contains at least one hole
+ * from 640k-1M and possibly another one from 3.5G-4G.
+ */
+ numamem = (void*)core; slots = 0;
+ acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1);
+ next_base = 1024 * 1024; numamem++;slots++;
+ for (i = 1; i < nb_numa_nodes + 1; ++i) {
+ mem_base = next_base;
+ mem_len = qemu_cfg_get64();
+ if (i == 1) mem_len -= 1024 * 1024;
+ next_base = mem_base + mem_len;
+
+ /* Cut out the PCI hole */
+ if (mem_base <= ram_size && next_base > ram_size) {
+ mem_len -= next_base - ram_size;
+ if (mem_len > 0) {
+ acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
+ numamem++; slots++;
+ }
+ mem_base = 1ULL << 32;
+ mem_len = next_base - ram_size;
+ next_base += (1ULL << 32) - ram_size;
+ }
+ acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
+ numamem++; slots++;
+ }
+ for (; slots < nb_numa_nodes + 2; slots++) {
+ acpi_build_srat_memory(numamem, 0, 0, 0, 0);
+ numamem++;
+ }
+
+ acpi_build_table_header((struct acpi_table_header *)srat,
+ "SRAT", srat_size, 1);
+ }
+
/* HPET */
memset(hpet, 0, sizeof(*hpet));
/* Note timer_block_id value must be kept in sync with value advertised by
@@ -1761,9 +1912,11 @@ void acpi_bios_init(void)
rsdt->table_offset_entry[2] = cpu_to_le32(ssdt_addr);
#ifdef BX_QEMU
rsdt->table_offset_entry[3] = cpu_to_le32(hpet_addr);
+ if (nb_numa_nodes > 0)
+ rsdt->table_offset_entry[4] = cpu_to_le32(srat_addr);
#endif
- acpi_build_table_header((struct acpi_table_header *)rsdt,
- "RSDT", rsdt_size, 1);
+ acpi_build_table_header((struct acpi_table_header *)rsdt, "RSDT",
+ rsdt_size - (nb_numa_nodes > 0? 0: sizeof(uint32_t)), 1);
acpi_tables_size = addr - base_addr;
--
1.6.1.3