| 1 | /* $NetBSD: vm_machdep.c,v 1.26 2016/11/08 03:05:36 christos Exp $ */ |
| 2 | |
| 3 | /*- |
| 4 | * Copyright (c) 1982, 1986 The Regents of the University of California. |
| 5 | * All rights reserved. |
| 6 | * |
| 7 | * This code is derived from software contributed to Berkeley by |
| 8 | * the Systems Programming Group of the University of Utah Computer |
| 9 | * Science Department, and William Jolitz. |
| 10 | * |
| 11 | * Redistribution and use in source and binary forms, with or without |
| 12 | * modification, are permitted provided that the following conditions |
| 13 | * are met: |
| 14 | * 1. Redistributions of source code must retain the above copyright |
| 15 | * notice, this list of conditions and the following disclaimer. |
| 16 | * 2. Redistributions in binary form must reproduce the above copyright |
| 17 | * notice, this list of conditions and the following disclaimer in the |
| 18 | * documentation and/or other materials provided with the distribution. |
| 19 | * 3. Neither the name of the University nor the names of its contributors |
| 20 | * may be used to endorse or promote products derived from this software |
| 21 | * without specific prior written permission. |
| 22 | * |
| 23 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 24 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 25 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 26 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 27 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 28 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 29 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 30 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 31 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 32 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 33 | * SUCH DAMAGE. |
| 34 | * |
| 35 | * @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 |
| 36 | */ |
| 37 | |
| 38 | /*- |
| 39 | * Copyright (c) 1995 Charles M. Hannum. All rights reserved. |
| 40 | * Copyright (c) 1989, 1990 William Jolitz |
| 41 | * All rights reserved. |
| 42 | * |
| 43 | * This code is derived from software contributed to Berkeley by |
| 44 | * the Systems Programming Group of the University of Utah Computer |
| 45 | * Science Department, and William Jolitz. |
| 46 | * |
| 47 | * Redistribution and use in source and binary forms, with or without |
| 48 | * modification, are permitted provided that the following conditions |
| 49 | * are met: |
| 50 | * 1. Redistributions of source code must retain the above copyright |
| 51 | * notice, this list of conditions and the following disclaimer. |
| 52 | * 2. Redistributions in binary form must reproduce the above copyright |
| 53 | * notice, this list of conditions and the following disclaimer in the |
| 54 | * documentation and/or other materials provided with the distribution. |
| 55 | * 3. All advertising materials mentioning features or use of this software |
| 56 | * must display the following acknowledgement: |
| 57 | * This product includes software developed by the University of |
| 58 | * California, Berkeley and its contributors. |
| 59 | * 4. Neither the name of the University nor the names of its contributors |
| 60 | * may be used to endorse or promote products derived from this software |
| 61 | * without specific prior written permission. |
| 62 | * |
| 63 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 64 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 65 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 66 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 67 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 68 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 69 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 70 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 71 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 72 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 73 | * SUCH DAMAGE. |
| 74 | * |
| 75 | * @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 |
| 76 | */ |
| 77 | |
| 78 | /* |
| 79 | * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ |
| 80 | */ |
| 81 | |
| 82 | #include <sys/cdefs.h> |
| 83 | __KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.26 2016/11/08 03:05:36 christos Exp $" ); |
| 84 | |
| 85 | #include "opt_mtrr.h" |
| 86 | |
| 87 | #include <sys/param.h> |
| 88 | #include <sys/systm.h> |
| 89 | #include <sys/proc.h> |
| 90 | #include <sys/vnode.h> |
| 91 | #include <sys/buf.h> |
| 92 | #include <sys/core.h> |
| 93 | #include <sys/exec.h> |
| 94 | #include <sys/ptrace.h> |
| 95 | |
| 96 | #include <uvm/uvm.h> |
| 97 | |
| 98 | #include <machine/cpu.h> |
| 99 | #include <machine/gdt.h> |
| 100 | #include <machine/reg.h> |
| 101 | #include <machine/specialreg.h> |
| 102 | |
| 103 | #ifdef MTRR |
| 104 | #include <machine/mtrr.h> |
| 105 | #endif |
| 106 | |
| 107 | #include <x86/fpu.h> |
| 108 | |
| 109 | void |
| 110 | cpu_proc_fork(struct proc *p1, struct proc *p2) |
| 111 | { |
| 112 | |
| 113 | p2->p_md.md_flags = p1->p_md.md_flags; |
| 114 | } |
| 115 | |
| 116 | /* |
| 117 | * cpu_lwp_fork: finish a new LWP (l2) operation. |
| 118 | * |
| 119 | * First LWP (l1) is the process being forked. If it is &lwp0, then we |
| 120 | * are creating a kthread, where return path and argument are specified |
| 121 | * with `func' and `arg'. |
| 122 | * |
| 123 | * If an alternate user-level stack is requested (with non-zero values |
| 124 | * in both the stack and stacksize arguments), then set up the user stack |
| 125 | * pointer accordingly. |
| 126 | */ |
| 127 | void |
| 128 | cpu_lwp_fork(struct lwp *l1, struct lwp *l2, void *stack, size_t stacksize, |
| 129 | void (*func)(void *), void *arg) |
| 130 | { |
| 131 | struct pcb *pcb1, *pcb2; |
| 132 | struct trapframe *tf; |
| 133 | struct switchframe *sf; |
| 134 | vaddr_t uv; |
| 135 | |
| 136 | pcb1 = lwp_getpcb(l1); |
| 137 | pcb2 = lwp_getpcb(l2); |
| 138 | |
| 139 | /* |
| 140 | * If parent LWP was using FPU, then we have to save the FPU h/w |
| 141 | * state to PCB so that we can copy it. |
| 142 | */ |
| 143 | fpusave_lwp(l1, true); |
| 144 | |
| 145 | /* |
| 146 | * Sync the PCB before we copy it. |
| 147 | */ |
| 148 | if (l1 == curlwp) { |
| 149 | KASSERT(pcb1 == curpcb); |
| 150 | savectx(pcb1); |
| 151 | } else { |
| 152 | KASSERT(l1 == &lwp0); |
| 153 | } |
| 154 | |
| 155 | /* Copy the PCB from parent. */ |
| 156 | memcpy(pcb2, pcb1, sizeof(struct pcb)); |
| 157 | /* Copy any additional fpu state */ |
| 158 | fpu_save_area_fork(pcb2, pcb1); |
| 159 | |
| 160 | #if defined(XEN) |
| 161 | pcb2->pcb_iopl = SEL_KPL; |
| 162 | #endif |
| 163 | |
| 164 | /* |
| 165 | * Set the kernel stack address (from the address to uarea) and |
| 166 | * trapframe address for child. |
| 167 | * |
| 168 | * Rig kernel stack so that it would start out in lwp_trampoline() |
| 169 | * and call child_return() with l2 as an argument. This causes the |
| 170 | * newly-created child process to go directly to user level with a |
| 171 | * parent return value of 0 from fork(), while the parent process |
| 172 | * returns normally. |
| 173 | */ |
| 174 | uv = uvm_lwp_getuarea(l2); |
| 175 | |
| 176 | #ifdef __x86_64__ |
| 177 | pcb2->pcb_rsp0 = (uv + USPACE - 16) & ~0xf; |
| 178 | tf = (struct trapframe *)pcb2->pcb_rsp0 - 1; |
| 179 | #else |
| 180 | pcb2->pcb_esp0 = (uv + USPACE - 16); |
| 181 | tf = (struct trapframe *)pcb2->pcb_esp0 - 1; |
| 182 | |
| 183 | pcb2->pcb_iomap = NULL; |
| 184 | #endif |
| 185 | l2->l_md.md_regs = tf; |
| 186 | |
| 187 | /* |
| 188 | * Copy the trapframe from parent, so that return to userspace |
| 189 | * will be to right address, with correct registers. |
| 190 | */ |
| 191 | memcpy(tf, l1->l_md.md_regs, sizeof(struct trapframe)); |
| 192 | |
| 193 | /* Child LWP might get aston() before returning to userspace. */ |
| 194 | tf->tf_trapno = T_ASTFLT; |
| 195 | |
| 196 | #if 0 /* DIAGNOSTIC */ |
| 197 | /* Set a red zone in the kernel stack after the uarea. */ |
| 198 | pmap_kremove(uv, PAGE_SIZE); |
| 199 | pmap_update(pmap_kernel()); |
| 200 | #endif |
| 201 | |
| 202 | /* If specified, set a different user stack for a child. */ |
| 203 | if (stack != NULL) { |
| 204 | #ifdef __x86_64__ |
| 205 | tf->tf_rsp = (uint64_t)stack + stacksize; |
| 206 | #else |
| 207 | tf->tf_esp = (uint32_t)stack + stacksize; |
| 208 | #endif |
| 209 | } |
| 210 | |
| 211 | l2->l_md.md_flags = l1->l_md.md_flags; |
| 212 | l2->l_md.md_astpending = 0; |
| 213 | |
| 214 | sf = (struct switchframe *)tf - 1; |
| 215 | |
| 216 | #ifdef __x86_64__ |
| 217 | sf->sf_r12 = (uint64_t)func; |
| 218 | sf->sf_r13 = (uint64_t)arg; |
| 219 | sf->sf_rip = (uint64_t)lwp_trampoline; |
| 220 | pcb2->pcb_rsp = (uint64_t)sf; |
| 221 | pcb2->pcb_rbp = (uint64_t)l2; |
| 222 | #else |
| 223 | /* |
| 224 | * XXX Is there a reason sf->sf_edi isn't initialized here? |
| 225 | * Could this leak potentially sensitive information to new |
| 226 | * userspace processes? |
| 227 | */ |
| 228 | sf->sf_esi = (int)func; |
| 229 | sf->sf_ebx = (int)arg; |
| 230 | sf->sf_eip = (int)lwp_trampoline; |
| 231 | pcb2->pcb_esp = (int)sf; |
| 232 | pcb2->pcb_ebp = (int)l2; |
| 233 | #endif |
| 234 | } |
| 235 | |
| 236 | /* |
| 237 | * cpu_lwp_free is called from exit() to let machine-dependent |
| 238 | * code free machine-dependent resources. Note that this routine |
| 239 | * must not block. |
| 240 | */ |
| 241 | void |
| 242 | cpu_lwp_free(struct lwp *l, int proc) |
| 243 | { |
| 244 | |
| 245 | /* If we were using the FPU, forget about it. */ |
| 246 | fpusave_lwp(l, false); |
| 247 | |
| 248 | #ifdef MTRR |
| 249 | if (proc && l->l_proc->p_md.md_flags & MDP_USEDMTRR) |
| 250 | mtrr_clean(l->l_proc); |
| 251 | #endif |
| 252 | /* |
| 253 | * Free deferred mappings if any. |
| 254 | */ |
| 255 | struct vm_page *empty_ptps = l->l_md.md_gc_ptp; |
| 256 | l->l_md.md_gc_ptp = NULL; |
| 257 | pmap_free_ptps(empty_ptps); |
| 258 | } |
| 259 | |
| 260 | /* |
| 261 | * cpu_lwp_free2 is called when an LWP is being reaped. |
| 262 | * This routine may block. |
| 263 | */ |
| 264 | void |
| 265 | cpu_lwp_free2(struct lwp *l) |
| 266 | { |
| 267 | |
| 268 | KASSERT(l->l_md.md_gc_ptp == NULL); |
| 269 | KASSERT(l->l_md.md_gc_pmap == NULL); |
| 270 | } |
| 271 | |
| 272 | /* |
| 273 | * Convert kernel VA to physical address |
| 274 | */ |
| 275 | paddr_t |
| 276 | kvtop(void *addr) |
| 277 | { |
| 278 | paddr_t pa; |
| 279 | bool ret __diagused; |
| 280 | |
| 281 | ret = pmap_extract(pmap_kernel(), (vaddr_t)addr, &pa); |
| 282 | KASSERT(ret == true); |
| 283 | return pa; |
| 284 | } |
| 285 | |
| 286 | /* |
| 287 | * Map a user I/O request into kernel virtual address space. |
| 288 | * Note: the pages are already locked by uvm_vslock(), so we |
| 289 | * do not need to pass an access_type to pmap_enter(). |
| 290 | */ |
| 291 | int |
| 292 | vmapbuf(struct buf *bp, vsize_t len) |
| 293 | { |
| 294 | vaddr_t faddr, taddr, off; |
| 295 | paddr_t fpa; |
| 296 | |
| 297 | KASSERT((bp->b_flags & B_PHYS) != 0); |
| 298 | |
| 299 | bp->b_saveaddr = bp->b_data; |
| 300 | faddr = trunc_page((vaddr_t)bp->b_data); |
| 301 | off = (vaddr_t)bp->b_data - faddr; |
| 302 | len = round_page(off + len); |
| 303 | taddr = uvm_km_alloc(phys_map, len, 0, UVM_KMF_VAONLY | UVM_KMF_WAITVA); |
| 304 | bp->b_data = (void *)(taddr + off); |
| 305 | /* |
| 306 | * The region is locked, so we expect that pmap_pte() will return |
| 307 | * non-NULL. |
| 308 | * XXX: unwise to expect this in a multithreaded environment. |
| 309 | * anything can happen to a pmap between the time we lock a |
| 310 | * region, release the pmap lock, and then relock it for |
| 311 | * the pmap_extract(). |
| 312 | * |
| 313 | * no need to flush TLB since we expect nothing to be mapped |
| 314 | * where we we just allocated (TLB will be flushed when our |
| 315 | * mapping is removed). |
| 316 | */ |
| 317 | while (len) { |
| 318 | (void) pmap_extract(vm_map_pmap(&bp->b_proc->p_vmspace->vm_map), |
| 319 | faddr, &fpa); |
| 320 | pmap_kenter_pa(taddr, fpa, VM_PROT_READ|VM_PROT_WRITE, 0); |
| 321 | faddr += PAGE_SIZE; |
| 322 | taddr += PAGE_SIZE; |
| 323 | len -= PAGE_SIZE; |
| 324 | } |
| 325 | pmap_update(pmap_kernel()); |
| 326 | |
| 327 | return 0; |
| 328 | } |
| 329 | |
| 330 | /* |
| 331 | * Unmap a previously-mapped user I/O request. |
| 332 | */ |
| 333 | void |
| 334 | vunmapbuf(struct buf *bp, vsize_t len) |
| 335 | { |
| 336 | vaddr_t addr, off; |
| 337 | |
| 338 | KASSERT((bp->b_flags & B_PHYS) != 0); |
| 339 | |
| 340 | addr = trunc_page((vaddr_t)bp->b_data); |
| 341 | off = (vaddr_t)bp->b_data - addr; |
| 342 | len = round_page(off + len); |
| 343 | pmap_kremove(addr, len); |
| 344 | pmap_update(pmap_kernel()); |
| 345 | uvm_km_free(phys_map, addr, len, UVM_KMF_VAONLY); |
| 346 | bp->b_data = bp->b_saveaddr; |
| 347 | bp->b_saveaddr = 0; |
| 348 | } |
| 349 | |
| 350 | #ifdef __HAVE_CPU_UAREA_ROUTINES |
| 351 | void * |
| 352 | cpu_uarea_alloc(bool system) |
| 353 | { |
| 354 | struct pglist pglist; |
| 355 | int error; |
| 356 | |
| 357 | /* |
| 358 | * Allocate a new physically contiguous uarea which can be |
| 359 | * direct-mapped. |
| 360 | */ |
| 361 | error = uvm_pglistalloc(USPACE, 0, ptoa(physmem), 0, 0, &pglist, 1, 1); |
| 362 | if (error) { |
| 363 | return NULL; |
| 364 | } |
| 365 | |
| 366 | /* |
| 367 | * Get the physical address from the first page. |
| 368 | */ |
| 369 | const struct vm_page * const pg = TAILQ_FIRST(&pglist); |
| 370 | KASSERT(pg != NULL); |
| 371 | const paddr_t pa = VM_PAGE_TO_PHYS(pg); |
| 372 | |
| 373 | /* |
| 374 | * We need to return a direct-mapped VA for the pa. |
| 375 | */ |
| 376 | |
| 377 | return (void *)PMAP_MAP_POOLPAGE(pa); |
| 378 | } |
| 379 | |
| 380 | /* |
| 381 | * Return true if we freed it, false if we didn't. |
| 382 | */ |
| 383 | bool |
| 384 | cpu_uarea_free(void *vva) |
| 385 | { |
| 386 | vaddr_t va = (vaddr_t) vva; |
| 387 | |
| 388 | if (va >= VM_MIN_KERNEL_ADDRESS && va < VM_MAX_KERNEL_ADDRESS) { |
| 389 | return false; |
| 390 | } |
| 391 | |
| 392 | /* |
| 393 | * Since the pages are physically contiguous, the vm_page structures |
| 394 | * will be as well. |
| 395 | */ |
| 396 | struct vm_page *pg = PHYS_TO_VM_PAGE(PMAP_UNMAP_POOLPAGE(va)); |
| 397 | KASSERT(pg != NULL); |
| 398 | for (size_t i = 0; i < UPAGES; i++, pg++) { |
| 399 | uvm_pagefree(pg); |
| 400 | } |
| 401 | return true; |
| 402 | } |
| 403 | #endif /* __HAVE_CPU_UAREA_ROUTINES */ |
| 404 | |