| 1 | /* $NetBSD: gdt.c,v 1.34 2016/08/21 10:42:33 maxv Exp $ */ |
| 2 | |
| 3 | /*- |
| 4 | * Copyright (c) 1996, 1997, 2009 The NetBSD Foundation, Inc. |
| 5 | * All rights reserved. |
| 6 | * |
| 7 | * This code is derived from software contributed to The NetBSD Foundation |
| 8 | * by John T. Kohl, by Charles M. Hannum, and by Andrew Doran. |
| 9 | * |
| 10 | * Redistribution and use in source and binary forms, with or without |
| 11 | * modification, are permitted provided that the following conditions |
| 12 | * are met: |
| 13 | * 1. Redistributions of source code must retain the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer. |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright |
| 16 | * notice, this list of conditions and the following disclaimer in the |
| 17 | * documentation and/or other materials provided with the distribution. |
| 18 | * |
| 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
| 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
| 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
| 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 29 | * POSSIBILITY OF SUCH DAMAGE. |
| 30 | */ |
| 31 | |
| 32 | /* |
| 33 | * Modified to deal with variable-length entries for NetBSD/x86_64 by |
| 34 | * fvdl@wasabisystems.com, may 2001 |
| 35 | * XXX this file should be shared with the i386 code, the difference |
| 36 | * can be hidden in macros. |
| 37 | */ |
| 38 | |
| 39 | #include <sys/cdefs.h> |
| 40 | __KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.34 2016/08/21 10:42:33 maxv Exp $" ); |
| 41 | |
| 42 | #include "opt_multiprocessor.h" |
| 43 | #include "opt_xen.h" |
| 44 | #include "opt_user_ldt.h" |
| 45 | |
| 46 | #include <sys/param.h> |
| 47 | #include <sys/systm.h> |
| 48 | #include <sys/proc.h> |
| 49 | #include <sys/mutex.h> |
| 50 | #include <sys/cpu.h> |
| 51 | |
| 52 | #include <uvm/uvm.h> |
| 53 | |
| 54 | #include <machine/gdt.h> |
| 55 | |
| 56 | #ifdef XEN |
| 57 | #include <xen/hypervisor.h> |
| 58 | #endif |
| 59 | |
| 60 | int gdt_size; /* size of GDT in bytes */ |
| 61 | int gdt_dyncount; /* number of dyn. allocated GDT entries in use */ |
| 62 | int gdt_dynavail; |
| 63 | int gdt_next; /* next available slot for sweeping */ |
| 64 | int gdt_free; /* next free slot; terminated with GNULL_SEL */ |
| 65 | |
| 66 | void gdt_init(void); |
| 67 | |
| 68 | void |
| 69 | update_descriptor(void *tp, void *ep) |
| 70 | { |
| 71 | uint64_t *table, *entry; |
| 72 | |
| 73 | table = tp; |
| 74 | entry = ep; |
| 75 | |
| 76 | #ifndef XEN |
| 77 | *table = *entry; |
| 78 | #else |
| 79 | paddr_t pa; |
| 80 | |
| 81 | if (!pmap_extract_ma(pmap_kernel(), (vaddr_t)table, &pa) || |
| 82 | HYPERVISOR_update_descriptor(pa, *entry)) |
| 83 | panic("HYPERVISOR_update_descriptor failed\n" ); |
| 84 | #endif |
| 85 | } |
| 86 | |
| 87 | void |
| 88 | set_sys_gdt(int slot, void *base, size_t limit, int type, int dpl, int gran) |
| 89 | { |
| 90 | union { |
| 91 | struct sys_segment_descriptor sd; |
| 92 | uint64_t bits[2]; |
| 93 | } d; |
| 94 | CPU_INFO_ITERATOR cii; |
| 95 | struct cpu_info *ci; |
| 96 | int idx; |
| 97 | |
| 98 | set_sys_segment(&d.sd, base, limit, type, dpl, gran); |
| 99 | idx = IDXSEL(GDYNSEL(slot, SEL_KPL)); |
| 100 | for (CPU_INFO_FOREACH(cii, ci)) { |
| 101 | KASSERT(ci->ci_gdt != NULL); |
| 102 | update_descriptor(&ci->ci_gdt[idx + 0], &d.bits[0]); |
| 103 | update_descriptor(&ci->ci_gdt[idx + 1], &d.bits[1]); |
| 104 | } |
| 105 | } |
| 106 | |
| 107 | /* |
| 108 | * Initialize the GDT. We already have a gdtstore, which was temporarily used |
| 109 | * by the bootstrap code. Now, we allocate a new gdtstore, and put it in cpu0. |
| 110 | */ |
| 111 | void |
| 112 | gdt_init(void) |
| 113 | { |
| 114 | char *old_gdt; |
| 115 | struct vm_page *pg; |
| 116 | vaddr_t va; |
| 117 | struct cpu_info *ci = &cpu_info_primary; |
| 118 | |
| 119 | gdt_size = MINGDTSIZ; |
| 120 | gdt_dyncount = 0; |
| 121 | gdt_next = 0; |
| 122 | gdt_free = GNULL_SEL; |
| 123 | gdt_dynavail = |
| 124 | (gdt_size - DYNSEL_START) / sizeof(struct sys_segment_descriptor); |
| 125 | |
| 126 | old_gdt = gdtstore; |
| 127 | |
| 128 | /* Allocate MAXGDTSIZ bytes of virtual memory. */ |
| 129 | gdtstore = (char *)uvm_km_alloc(kernel_map, MAXGDTSIZ, 0, |
| 130 | UVM_KMF_VAONLY); |
| 131 | |
| 132 | /* |
| 133 | * Allocate only MINGDTSIZ bytes of physical memory. We will grow this |
| 134 | * area in gdt_grow at run-time if needed. |
| 135 | */ |
| 136 | for (va = (vaddr_t)gdtstore; va < (vaddr_t)gdtstore + MINGDTSIZ; |
| 137 | va += PAGE_SIZE) { |
| 138 | pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); |
| 139 | if (pg == NULL) { |
| 140 | panic("gdt_init: no pages" ); |
| 141 | } |
| 142 | pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), |
| 143 | VM_PROT_READ | VM_PROT_WRITE, 0); |
| 144 | } |
| 145 | pmap_update(pmap_kernel()); |
| 146 | |
| 147 | /* Copy the initial bootstrap GDT into the new area. */ |
| 148 | memcpy(gdtstore, old_gdt, DYNSEL_START); |
| 149 | ci->ci_gdt = (void *)gdtstore; |
| 150 | #ifndef XEN |
| 151 | set_sys_segment(GDT_ADDR_SYS(gdtstore, GLDT_SEL), ldtstore, |
| 152 | LDT_SIZE - 1, SDT_SYSLDT, SEL_KPL, 0); |
| 153 | #endif |
| 154 | |
| 155 | gdt_init_cpu(ci); |
| 156 | } |
| 157 | |
| 158 | /* |
| 159 | * Allocate shadow GDT for a secondary CPU. It contains the same values as the |
| 160 | * GDT present in cpu0 (gdtstore). |
| 161 | */ |
| 162 | void |
| 163 | gdt_alloc_cpu(struct cpu_info *ci) |
| 164 | { |
| 165 | int max_len = MAXGDTSIZ; |
| 166 | int min_len = MINGDTSIZ; |
| 167 | struct vm_page *pg; |
| 168 | vaddr_t va; |
| 169 | |
| 170 | ci->ci_gdt = (union descriptor *)uvm_km_alloc(kernel_map, max_len, |
| 171 | 0, UVM_KMF_VAONLY); |
| 172 | |
| 173 | for (va = (vaddr_t)ci->ci_gdt; va < (vaddr_t)ci->ci_gdt + min_len; |
| 174 | va += PAGE_SIZE) { |
| 175 | while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO)) |
| 176 | == NULL) { |
| 177 | uvm_wait("gdt_alloc_cpu" ); |
| 178 | } |
| 179 | pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), |
| 180 | VM_PROT_READ | VM_PROT_WRITE, 0); |
| 181 | } |
| 182 | pmap_update(pmap_kernel()); |
| 183 | |
| 184 | memset(ci->ci_gdt, 0, min_len); |
| 185 | memcpy(ci->ci_gdt, gdtstore, gdt_size); |
| 186 | } |
| 187 | |
| 188 | /* |
| 189 | * Load appropriate GDT descriptor into the currently running CPU, which must |
| 190 | * be ci. |
| 191 | */ |
| 192 | void |
| 193 | gdt_init_cpu(struct cpu_info *ci) |
| 194 | { |
| 195 | struct region_descriptor region; |
| 196 | |
| 197 | KASSERT(curcpu() == ci); |
| 198 | |
| 199 | #ifndef XEN |
| 200 | setregion(®ion, ci->ci_gdt, (uint16_t)(MAXGDTSIZ - 1)); |
| 201 | #else |
| 202 | setregion(®ion, ci->ci_gdt, (uint16_t)(gdt_size - 1)); |
| 203 | #endif |
| 204 | lgdt(®ion); |
| 205 | } |
| 206 | |
| 207 | #ifdef MULTIPROCESSOR |
| 208 | void |
| 209 | gdt_reload_cpu(struct cpu_info *ci) |
| 210 | { |
| 211 | struct region_descriptor region; |
| 212 | |
| 213 | #ifndef XEN |
| 214 | setregion(®ion, ci->ci_gdt, MAXGDTSIZ - 1); |
| 215 | #else |
| 216 | setregion(®ion, ci->ci_gdt, gdt_size - 1); |
| 217 | #endif |
| 218 | lgdt(®ion); |
| 219 | } |
| 220 | #endif |
| 221 | |
| 222 | #if !defined(XEN) || defined(USER_LDT) |
| 223 | /* |
| 224 | * Grow the GDT. The GDT is present on each CPU, so we need to iterate over all |
| 225 | * of them. We already have the virtual memory, we only need to grow the |
| 226 | * physical memory. |
| 227 | */ |
| 228 | static void |
| 229 | gdt_grow(void) |
| 230 | { |
| 231 | size_t old_size; |
| 232 | CPU_INFO_ITERATOR cii; |
| 233 | struct cpu_info *ci; |
| 234 | struct vm_page *pg; |
| 235 | vaddr_t va; |
| 236 | |
| 237 | old_size = gdt_size; |
| 238 | gdt_size <<= 1; |
| 239 | if (gdt_size > MAXGDTSIZ) |
| 240 | gdt_size = MAXGDTSIZ; |
| 241 | gdt_dynavail = |
| 242 | (gdt_size - DYNSEL_START) / sizeof(struct sys_segment_descriptor); |
| 243 | |
| 244 | for (CPU_INFO_FOREACH(cii, ci)) { |
| 245 | for (va = (vaddr_t)(ci->ci_gdt) + old_size; |
| 246 | va < (vaddr_t)(ci->ci_gdt) + gdt_size; |
| 247 | va += PAGE_SIZE) { |
| 248 | while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO)) == |
| 249 | NULL) { |
| 250 | uvm_wait("gdt_grow" ); |
| 251 | } |
| 252 | pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), |
| 253 | VM_PROT_READ | VM_PROT_WRITE, 0); |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | pmap_update(pmap_kernel()); |
| 258 | } |
| 259 | |
| 260 | /* |
| 261 | * Allocate a GDT slot as follows: |
| 262 | * 1) If there are entries on the free list, use those. |
| 263 | * 2) If there are fewer than gdt_dynavail entries in use, there are free slots |
| 264 | * near the end that we can sweep through. |
| 265 | * 3) As a last resort, we increase the size of the GDT, and sweep through |
| 266 | * the new slots. |
| 267 | */ |
| 268 | static int |
| 269 | gdt_get_slot(void) |
| 270 | { |
| 271 | int slot; |
| 272 | struct sys_segment_descriptor *gdt; |
| 273 | |
| 274 | gdt = (struct sys_segment_descriptor *)&gdtstore[DYNSEL_START]; |
| 275 | |
| 276 | KASSERT(mutex_owned(&cpu_lock)); |
| 277 | |
| 278 | if (gdt_free != GNULL_SEL) { |
| 279 | slot = gdt_free; |
| 280 | gdt_free = gdt[slot].sd_xx3; /* XXXfvdl res. field abuse */ |
| 281 | } else { |
| 282 | KASSERT(gdt_next == gdt_dyncount); |
| 283 | if (gdt_next >= gdt_dynavail) { |
| 284 | if (gdt_size >= MAXGDTSIZ) |
| 285 | panic("gdt_get_slot: out of memory" ); |
| 286 | gdt_grow(); |
| 287 | } |
| 288 | slot = gdt_next++; |
| 289 | } |
| 290 | |
| 291 | gdt_dyncount++; |
| 292 | return slot; |
| 293 | } |
| 294 | |
| 295 | /* |
| 296 | * Deallocate a GDT slot, putting it on the free list. |
| 297 | */ |
| 298 | static void |
| 299 | gdt_put_slot(int slot) |
| 300 | { |
| 301 | struct sys_segment_descriptor *gdt; |
| 302 | |
| 303 | KASSERT(mutex_owned(&cpu_lock)); |
| 304 | |
| 305 | gdt = (struct sys_segment_descriptor *)&gdtstore[DYNSEL_START]; |
| 306 | |
| 307 | gdt_dyncount--; |
| 308 | gdt[slot].sd_type = SDT_SYSNULL; |
| 309 | gdt[slot].sd_xx3 = gdt_free; |
| 310 | gdt_free = slot; |
| 311 | } |
| 312 | #endif |
| 313 | |
| 314 | int |
| 315 | tss_alloc(struct x86_64_tss *tss) |
| 316 | { |
| 317 | #ifndef XEN |
| 318 | int slot; |
| 319 | |
| 320 | mutex_enter(&cpu_lock); |
| 321 | |
| 322 | slot = gdt_get_slot(); |
| 323 | set_sys_gdt(slot, tss, sizeof(struct x86_64_tss) - 1, SDT_SYS386TSS, |
| 324 | SEL_KPL, 0); |
| 325 | |
| 326 | mutex_exit(&cpu_lock); |
| 327 | |
| 328 | return GDYNSEL(slot, SEL_KPL); |
| 329 | #else /* XEN */ |
| 330 | /* TSS, what for? */ |
| 331 | return GSEL(GNULL_SEL, SEL_KPL); |
| 332 | #endif |
| 333 | } |
| 334 | |
| 335 | void |
| 336 | tss_free(int sel) |
| 337 | { |
| 338 | #ifndef XEN |
| 339 | mutex_enter(&cpu_lock); |
| 340 | gdt_put_slot(IDXDYNSEL(sel)); |
| 341 | mutex_exit(&cpu_lock); |
| 342 | #else |
| 343 | KASSERT(sel == GSEL(GNULL_SEL, SEL_KPL)); |
| 344 | #endif |
| 345 | } |
| 346 | |
| 347 | #ifdef USER_LDT |
| 348 | /* |
| 349 | * XXX: USER_LDT is not implemented on amd64. |
| 350 | */ |
| 351 | int |
| 352 | ldt_alloc(void *ldtp, size_t len) |
| 353 | { |
| 354 | int slot; |
| 355 | |
| 356 | KASSERT(mutex_owned(&cpu_lock)); |
| 357 | |
| 358 | slot = gdt_get_slot(); |
| 359 | set_sys_gdt(slot, ldtp, len - 1, SDT_SYSLDT, SEL_KPL, 0); |
| 360 | |
| 361 | return GDYNSEL(slot, SEL_KPL); |
| 362 | } |
| 363 | |
| 364 | void |
| 365 | ldt_free(int sel) |
| 366 | { |
| 367 | int slot; |
| 368 | |
| 369 | KASSERT(mutex_owned(&cpu_lock)); |
| 370 | |
| 371 | slot = IDXDYNSEL(sel); |
| 372 | |
| 373 | gdt_put_slot(slot); |
| 374 | } |
| 375 | #endif |
| 376 | |
| 377 | #ifdef XEN |
| 378 | void |
| 379 | lgdt(struct region_descriptor *desc) |
| 380 | { |
| 381 | paddr_t frames[16]; |
| 382 | int i; |
| 383 | vaddr_t va; |
| 384 | |
| 385 | /* |
| 386 | * XXX: Xen even checks descriptors AFTER limit. |
| 387 | * Zero out last frame after limit if needed. |
| 388 | */ |
| 389 | va = desc->rd_base + desc->rd_limit + 1; |
| 390 | __PRINTK(("memset 0x%lx -> 0x%lx\n" , va, roundup(va, PAGE_SIZE))); |
| 391 | memset((void *) va, 0, roundup(va, PAGE_SIZE) - va); |
| 392 | for (i = 0; i < roundup(desc->rd_limit, PAGE_SIZE) >> PAGE_SHIFT; i++) { |
| 393 | |
| 394 | /* |
| 395 | * The lgdt instruction uses virtual addresses, |
| 396 | * do some translation for Xen. |
| 397 | * Mark pages R/O too, else Xen will refuse to use them. |
| 398 | */ |
| 399 | |
| 400 | frames[i] = ((paddr_t) xpmap_ptetomach( |
| 401 | (pt_entry_t *) (desc->rd_base + (i << PAGE_SHIFT)))) |
| 402 | >> PAGE_SHIFT; |
| 403 | __PRINTK(("frames[%d] = 0x%lx (pa 0x%lx)\n" , i, frames[i], |
| 404 | xpmap_mtop(frames[i] << PAGE_SHIFT))); |
| 405 | pmap_pte_clearbits(kvtopte(desc->rd_base + (i << PAGE_SHIFT)), |
| 406 | PG_RW); |
| 407 | } |
| 408 | __PRINTK(("HYPERVISOR_set_gdt(%d)\n" , (desc->rd_limit + 1) >> 3)); |
| 409 | |
| 410 | if (HYPERVISOR_set_gdt(frames, (desc->rd_limit + 1) >> 3)) |
| 411 | panic("lgdt(): HYPERVISOR_set_gdt() failed" ); |
| 412 | lgdt_finish(); |
| 413 | } |
| 414 | #endif |
| 415 | |