| 1 | /* $NetBSD: trap.c,v 1.87 2016/10/26 22:02:14 christos Exp $ */ |
| 2 | |
| 3 | /*- |
| 4 | * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. |
| 5 | * All rights reserved. |
| 6 | * |
| 7 | * This code is derived from software contributed to The NetBSD Foundation |
| 8 | * by Charles M. Hannum. |
| 9 | * |
| 10 | * Redistribution and use in source and binary forms, with or without |
| 11 | * modification, are permitted provided that the following conditions |
| 12 | * are met: |
| 13 | * 1. Redistributions of source code must retain the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer. |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright |
| 16 | * notice, this list of conditions and the following disclaimer in the |
| 17 | * documentation and/or other materials provided with the distribution. |
| 18 | * |
| 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
| 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
| 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
| 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 29 | * POSSIBILITY OF SUCH DAMAGE. |
| 30 | */ |
| 31 | |
| 32 | /*- |
| 33 | * Copyright (c) 1990 The Regents of the University of California. |
| 34 | * All rights reserved. |
| 35 | * |
| 36 | * This code is derived from software contributed to Berkeley by |
| 37 | * the University of Utah, and William Jolitz. |
| 38 | * |
| 39 | * Redistribution and use in source and binary forms, with or without |
| 40 | * modification, are permitted provided that the following conditions |
| 41 | * are met: |
| 42 | * 1. Redistributions of source code must retain the above copyright |
| 43 | * notice, this list of conditions and the following disclaimer. |
| 44 | * 2. Redistributions in binary form must reproduce the above copyright |
| 45 | * notice, this list of conditions and the following disclaimer in the |
| 46 | * documentation and/or other materials provided with the distribution. |
| 47 | * 3. Neither the name of the University nor the names of its contributors |
| 48 | * may be used to endorse or promote products derived from this software |
| 49 | * without specific prior written permission. |
| 50 | * |
| 51 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 52 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 53 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 54 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 55 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 56 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 57 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 58 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 59 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 60 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 61 | * SUCH DAMAGE. |
| 62 | * |
| 63 | * @(#)trap.c 7.4 (Berkeley) 5/13/91 |
| 64 | */ |
| 65 | |
| 66 | /* |
| 67 | * 386 Trap and System call handling |
| 68 | */ |
| 69 | |
| 70 | #include <sys/cdefs.h> |
| 71 | __KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.87 2016/10/26 22:02:14 christos Exp $" ); |
| 72 | |
| 73 | #include "opt_ddb.h" |
| 74 | #include "opt_kgdb.h" |
| 75 | #include "opt_xen.h" |
| 76 | #include "opt_dtrace.h" |
| 77 | |
| 78 | #include <sys/param.h> |
| 79 | #include <sys/systm.h> |
| 80 | #include <sys/proc.h> |
| 81 | #include <sys/acct.h> |
| 82 | #include <sys/kauth.h> |
| 83 | #include <sys/kernel.h> |
| 84 | #include <sys/kmem.h> |
| 85 | #include <sys/ras.h> |
| 86 | #include <sys/signal.h> |
| 87 | #include <sys/syscall.h> |
| 88 | #include <sys/cpu.h> |
| 89 | #include <sys/ucontext.h> |
| 90 | |
| 91 | #include <uvm/uvm_extern.h> |
| 92 | |
| 93 | #include <machine/cpufunc.h> |
| 94 | #include <x86/fpu.h> |
| 95 | #include <machine/psl.h> |
| 96 | #include <machine/reg.h> |
| 97 | #include <machine/trap.h> |
| 98 | #include <machine/userret.h> |
| 99 | #include <machine/db_machdep.h> |
| 100 | |
| 101 | #include <x86/nmi.h> |
| 102 | |
| 103 | #ifndef XEN |
| 104 | #include "isa.h" |
| 105 | #endif |
| 106 | |
| 107 | #include <sys/kgdb.h> |
| 108 | |
| 109 | #ifdef KDTRACE_HOOKS |
| 110 | #include <sys/dtrace_bsd.h> |
| 111 | |
| 112 | /* |
| 113 | * This is a hook which is initialized by the dtrace module |
| 114 | * to handle traps which might occur during DTrace probe |
| 115 | * execution. |
| 116 | */ |
| 117 | dtrace_trap_func_t dtrace_trap_func = NULL; |
| 118 | |
| 119 | dtrace_doubletrap_func_t dtrace_doubletrap_func = NULL; |
| 120 | #endif |
| 121 | |
| 122 | void trap(struct trapframe *); |
| 123 | void trap_return_fault_return(struct trapframe *) __dead; |
| 124 | |
| 125 | const char * const trap_type[] = { |
| 126 | "privileged instruction fault" , /* 0 T_PRIVINFLT */ |
| 127 | "breakpoint trap" , /* 1 T_BPTFLT */ |
| 128 | "arithmetic trap" , /* 2 T_ARITHTRAP */ |
| 129 | "asynchronous system trap" , /* 3 T_ASTFLT */ |
| 130 | "protection fault" , /* 4 T_PROTFLT */ |
| 131 | "trace trap" , /* 5 T_TRCTRAP */ |
| 132 | "page fault" , /* 6 T_PAGEFLT */ |
| 133 | "alignment fault" , /* 7 T_ALIGNFLT */ |
| 134 | "integer divide fault" , /* 8 T_DIVIDE */ |
| 135 | "non-maskable interrupt" , /* 9 T_NMI */ |
| 136 | "overflow trap" , /* 10 T_OFLOW */ |
| 137 | "bounds check fault" , /* 11 T_BOUND */ |
| 138 | "FPU not available fault" , /* 12 T_DNA */ |
| 139 | "double fault" , /* 13 T_DOUBLEFLT */ |
| 140 | "FPU operand fetch fault" , /* 14 T_FPOPFLT */ |
| 141 | "invalid TSS fault" , /* 15 T_TSSFLT */ |
| 142 | "segment not present fault" , /* 16 T_SEGNPFLT */ |
| 143 | "stack fault" , /* 17 T_STKFLT */ |
| 144 | "machine check fault" , /* 18 T_MCA */ |
| 145 | "SSE FP exception" , /* 19 T_XMM */ |
| 146 | "reserved trap" , /* 20 T_RESERVED */ |
| 147 | }; |
| 148 | int trap_types = __arraycount(trap_type); |
| 149 | |
| 150 | #ifdef DEBUG |
| 151 | int trapdebug = 0; |
| 152 | #endif |
| 153 | |
| 154 | #define IDTVEC(name) __CONCAT(X, name) |
| 155 | |
| 156 | #ifdef TRAP_SIGDEBUG |
| 157 | static void frame_dump(struct trapframe *); |
| 158 | #endif |
| 159 | |
| 160 | static void * |
| 161 | onfault_handler(const struct pcb *pcb, const struct trapframe *tf) |
| 162 | { |
| 163 | struct onfault_table { |
| 164 | uintptr_t start; |
| 165 | uintptr_t end; |
| 166 | void *handler; |
| 167 | }; |
| 168 | extern const struct onfault_table onfault_table[]; |
| 169 | const struct onfault_table *p; |
| 170 | uintptr_t pc; |
| 171 | |
| 172 | if (pcb->pcb_onfault != NULL) { |
| 173 | return pcb->pcb_onfault; |
| 174 | } |
| 175 | |
| 176 | pc = tf->tf_rip; |
| 177 | for (p = onfault_table; p->start; p++) { |
| 178 | if (p->start <= pc && pc < p->end) { |
| 179 | return p->handler; |
| 180 | } |
| 181 | } |
| 182 | return NULL; |
| 183 | } |
| 184 | |
| 185 | static void |
| 186 | trap_print(const struct trapframe *frame, const lwp_t *l) |
| 187 | { |
| 188 | const int type = frame->tf_trapno; |
| 189 | |
| 190 | if (frame->tf_trapno < trap_types) { |
| 191 | printf("fatal %s" , trap_type[type]); |
| 192 | } else { |
| 193 | printf("unknown trap %d" , type); |
| 194 | } |
| 195 | printf(" in %s mode\n" , (type & T_USER) ? "user" : "supervisor" ); |
| 196 | |
| 197 | printf("trap type %d code %lx rip %lx cs %lx rflags %lx cr2 %lx " |
| 198 | "ilevel %x rsp %lx\n" , |
| 199 | type, frame->tf_err, (u_long)frame->tf_rip, frame->tf_cs, |
| 200 | frame->tf_rflags, rcr2(), curcpu()->ci_ilevel, frame->tf_rsp); |
| 201 | |
| 202 | printf("curlwp %p pid %d.%d lowest kstack %p\n" , |
| 203 | l, l->l_proc->p_pid, l->l_lid, KSTACK_LOWEST_ADDR(l)); |
| 204 | } |
| 205 | |
| 206 | /* |
| 207 | * trap(frame): exception, fault, and trap interface to BSD kernel. |
| 208 | * |
| 209 | * This common code is called from assembly language IDT gate entry routines |
| 210 | * that prepare a suitable stack frame, and restore this frame after the |
| 211 | * exception has been processed. Note that the effect is as if the arguments |
| 212 | * were passed call by reference. |
| 213 | * |
| 214 | * Note that the fpu traps (07 T_DNA, 10 T_ARITHTRAP and 13 T_XMM) |
| 215 | * jump directly into the code in x86/fpu.c so they get processed |
| 216 | * without interrupts being enabled. |
| 217 | */ |
| 218 | void |
| 219 | trap(struct trapframe *frame) |
| 220 | { |
| 221 | struct lwp *l = curlwp; |
| 222 | struct proc *p; |
| 223 | struct pcb *pcb; |
| 224 | extern char fusuintrfailure[], kcopy_fault[]; |
| 225 | extern char IDTVEC(oosyscall)[]; |
| 226 | extern char IDTVEC(osyscall)[]; |
| 227 | extern char IDTVEC(syscall32)[]; |
| 228 | #ifndef XEN |
| 229 | struct trapframe *vframe; |
| 230 | #endif |
| 231 | ksiginfo_t ksi; |
| 232 | void *onfault; |
| 233 | int type, error; |
| 234 | uint64_t cr2; |
| 235 | bool pfail; |
| 236 | |
| 237 | if (__predict_true(l != NULL)) { |
| 238 | pcb = lwp_getpcb(l); |
| 239 | p = l->l_proc; |
| 240 | } else { |
| 241 | /* |
| 242 | * this can happen eg. on break points in early on boot. |
| 243 | */ |
| 244 | pcb = NULL; |
| 245 | p = NULL; |
| 246 | } |
| 247 | type = frame->tf_trapno; |
| 248 | |
| 249 | #ifdef DEBUG |
| 250 | if (trapdebug) { |
| 251 | trap_print(frame, l); |
| 252 | } |
| 253 | #endif |
| 254 | if (type != T_NMI && !KERNELMODE(frame->tf_cs, frame->tf_rflags)) { |
| 255 | type |= T_USER; |
| 256 | l->l_md.md_regs = frame; |
| 257 | LWP_CACHE_CREDS(l, p); |
| 258 | } |
| 259 | |
| 260 | #ifdef KDTRACE_HOOKS |
| 261 | /* |
| 262 | * A trap can occur while DTrace executes a probe. Before |
| 263 | * executing the probe, DTrace blocks re-scheduling and sets |
| 264 | * a flag in its per-cpu flags to indicate that it doesn't |
| 265 | * want to fault. On returning from the the probe, the no-fault |
| 266 | * flag is cleared and finally re-scheduling is enabled. |
| 267 | * |
| 268 | * If the DTrace kernel module has registered a trap handler, |
| 269 | * call it and if it returns non-zero, assume that it has |
| 270 | * handled the trap and modified the trap frame so that this |
| 271 | * function can return normally. |
| 272 | */ |
| 273 | if ((type == T_PROTFLT || type == T_PAGEFLT) && |
| 274 | dtrace_trap_func != NULL) { |
| 275 | if ((*dtrace_trap_func)(frame, type)) { |
| 276 | return; |
| 277 | } |
| 278 | } |
| 279 | #endif |
| 280 | |
| 281 | switch (type) { |
| 282 | |
| 283 | default: |
| 284 | we_re_toast: |
| 285 | trap_print(frame, l); |
| 286 | |
| 287 | if (kdb_trap(type, 0, frame)) |
| 288 | return; |
| 289 | if (kgdb_trap(type, frame)) |
| 290 | return; |
| 291 | /* |
| 292 | * If this is a breakpoint, don't panic if we're not connected. |
| 293 | */ |
| 294 | if (type == T_BPTFLT && kgdb_disconnected()) { |
| 295 | printf("kgdb: ignored %s\n" , trap_type[type]); |
| 296 | return; |
| 297 | } |
| 298 | panic("trap" ); |
| 299 | /*NOTREACHED*/ |
| 300 | |
| 301 | case T_PROTFLT: |
| 302 | case T_SEGNPFLT: |
| 303 | case T_ALIGNFLT: |
| 304 | case T_TSSFLT: |
| 305 | if (p == NULL) |
| 306 | goto we_re_toast; |
| 307 | /* Check for copyin/copyout fault. */ |
| 308 | onfault = onfault_handler(pcb, frame); |
| 309 | if (onfault != NULL) { |
| 310 | copyefault: |
| 311 | error = EFAULT; |
| 312 | copyfault: |
| 313 | frame->tf_rip = (uintptr_t)onfault; |
| 314 | frame->tf_rax = error; |
| 315 | return; |
| 316 | } |
| 317 | |
| 318 | /* |
| 319 | * Check for failure during return to user mode. |
| 320 | * This can happen loading invalid values into the segment |
| 321 | * registers, or during the 'iret' itself. |
| 322 | * |
| 323 | * We do this by looking at the instruction we faulted on. |
| 324 | * The specific instructions we recognize only happen when |
| 325 | * returning from a trap, syscall, or interrupt. |
| 326 | */ |
| 327 | |
| 328 | kernelfault: |
| 329 | #ifdef XEN |
| 330 | /* |
| 331 | * XXX: there has to be an equivalent 'problem' |
| 332 | * but I (dsl) don't know exactly what happens! |
| 333 | * For now panic the kernel. |
| 334 | */ |
| 335 | goto we_re_toast; |
| 336 | #else |
| 337 | KSI_INIT_TRAP(&ksi); |
| 338 | ksi.ksi_signo = SIGSEGV; |
| 339 | ksi.ksi_code = SEGV_ACCERR; |
| 340 | ksi.ksi_trap = type; |
| 341 | |
| 342 | /* Get %rsp value before fault - there may be a pad word |
| 343 | * below the trap frame. */ |
| 344 | vframe = (void *)frame->tf_rsp; |
| 345 | if (frame->tf_rip == 0) { |
| 346 | /* |
| 347 | * Assume that if we jumped to null we |
| 348 | * probably did it via a null function |
| 349 | * pointer, so print the return address. |
| 350 | */ |
| 351 | printf("kernel jumped to null; return addr was %p\n" , |
| 352 | *(void **)frame->tf_rsp); |
| 353 | goto we_re_toast; |
| 354 | } |
| 355 | switch (*(uint16_t *)frame->tf_rip) { |
| 356 | case 0xcf48: /* iretq */ |
| 357 | /* |
| 358 | * The 'iretq' instruction faulted, so we have the |
| 359 | * 'user' registers saved after the kernel |
| 360 | * %rip:%cs:%fl:%rsp:%ss of the iret, and below that |
| 361 | * the user %rip:%cs:%fl:%rsp:%ss the 'iret' was |
| 362 | * processing. |
| 363 | * We must copy the user register back over the |
| 364 | * kernel fault frame to generate a normal stack |
| 365 | * frame (eg for sending a SIGSEGV). |
| 366 | */ |
| 367 | vframe = (void *)((char *)vframe |
| 368 | - offsetof(struct trapframe, tf_rip)); |
| 369 | memmove(vframe, frame, |
| 370 | offsetof(struct trapframe, tf_rip)); |
| 371 | /* Set the faulting address to the user %eip */ |
| 372 | ksi.ksi_addr = (void *)vframe->tf_rip; |
| 373 | break; |
| 374 | case 0x848e: /* mov 0xa8(%rsp),%es (8e 84 24 a8 00 00 00) */ |
| 375 | case 0x9c8e: /* mov 0xb0(%rsp),%ds (8e 9c 24 b0 00 00 00) */ |
| 376 | /* |
| 377 | * We faulted loading one of the user segment registers. |
| 378 | * The stack frame containing the user registers is |
| 379 | * still valid and pointed to by tf_rsp. |
| 380 | * Maybe we should check the iretq follows. |
| 381 | */ |
| 382 | if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags)) |
| 383 | goto we_re_toast; |
| 384 | /* There is no valid address for the fault */ |
| 385 | break; |
| 386 | |
| 387 | default: |
| 388 | goto we_re_toast; |
| 389 | } |
| 390 | |
| 391 | /* XXX: worry about on-stack trampolines for nested |
| 392 | * handlers?? */ |
| 393 | /* Save outer frame for any signal return */ |
| 394 | l->l_md.md_regs = vframe; |
| 395 | (*p->p_emul->e_trapsignal)(l, &ksi); |
| 396 | /* Return to user by reloading the user frame */ |
| 397 | trap_return_fault_return(vframe); |
| 398 | /* NOTREACHED */ |
| 399 | #endif |
| 400 | |
| 401 | case T_PROTFLT|T_USER: /* protection fault */ |
| 402 | case T_TSSFLT|T_USER: |
| 403 | case T_SEGNPFLT|T_USER: |
| 404 | case T_STKFLT|T_USER: |
| 405 | case T_ALIGNFLT|T_USER: |
| 406 | #ifdef TRAP_SIGDEBUG |
| 407 | printf("pid %d.%d (%s): BUS/SEGV (%x) at rip %lx addr %lx\n" , |
| 408 | p->p_pid, l->l_lid, p->p_comm, type, frame->tf_rip, rcr2()); |
| 409 | frame_dump(frame); |
| 410 | #endif |
| 411 | KSI_INIT_TRAP(&ksi); |
| 412 | ksi.ksi_trap = type & ~T_USER; |
| 413 | ksi.ksi_addr = (void *)rcr2(); |
| 414 | switch (type) { |
| 415 | case T_SEGNPFLT|T_USER: |
| 416 | case T_STKFLT|T_USER: |
| 417 | ksi.ksi_signo = SIGBUS; |
| 418 | ksi.ksi_code = BUS_ADRERR; |
| 419 | break; |
| 420 | case T_TSSFLT|T_USER: |
| 421 | ksi.ksi_signo = SIGBUS; |
| 422 | ksi.ksi_code = BUS_OBJERR; |
| 423 | break; |
| 424 | case T_ALIGNFLT|T_USER: |
| 425 | ksi.ksi_signo = SIGBUS; |
| 426 | ksi.ksi_code = BUS_ADRALN; |
| 427 | break; |
| 428 | case T_PROTFLT|T_USER: |
| 429 | ksi.ksi_signo = SIGSEGV; |
| 430 | ksi.ksi_code = SEGV_ACCERR; |
| 431 | break; |
| 432 | default: |
| 433 | KASSERT(0); |
| 434 | break; |
| 435 | } |
| 436 | goto trapsignal; |
| 437 | |
| 438 | case T_PRIVINFLT|T_USER: /* privileged instruction fault */ |
| 439 | case T_FPOPFLT|T_USER: /* coprocessor operand fault */ |
| 440 | #ifdef TRAP_SIGDEBUG |
| 441 | printf("pid %d.%d (%s): ILL at rip %lx addr %lx\n" , |
| 442 | p->p_pid, l->l_lid, p->p_comm, frame->tf_rip, rcr2()); |
| 443 | frame_dump(frame); |
| 444 | #endif |
| 445 | KSI_INIT_TRAP(&ksi); |
| 446 | ksi.ksi_signo = SIGILL; |
| 447 | ksi.ksi_trap = type & ~T_USER; |
| 448 | ksi.ksi_addr = (void *) frame->tf_rip; |
| 449 | switch (type) { |
| 450 | case T_PRIVINFLT|T_USER: |
| 451 | ksi.ksi_code = ILL_PRVOPC; |
| 452 | break; |
| 453 | case T_FPOPFLT|T_USER: |
| 454 | ksi.ksi_code = ILL_COPROC; |
| 455 | break; |
| 456 | default: |
| 457 | KASSERT(0); |
| 458 | break; |
| 459 | } |
| 460 | goto trapsignal; |
| 461 | |
| 462 | case T_ASTFLT|T_USER: |
| 463 | /* Allow process switch. */ |
| 464 | //curcpu()->ci_data.cpu_nast++; |
| 465 | if (l->l_pflag & LP_OWEUPC) { |
| 466 | l->l_pflag &= ~LP_OWEUPC; |
| 467 | ADDUPROF(l); |
| 468 | } |
| 469 | /* Allow a forced task switch. */ |
| 470 | if (curcpu()->ci_want_resched) { |
| 471 | preempt(); |
| 472 | } |
| 473 | goto out; |
| 474 | |
| 475 | case T_BOUND|T_USER: |
| 476 | case T_OFLOW|T_USER: |
| 477 | case T_DIVIDE|T_USER: |
| 478 | KSI_INIT_TRAP(&ksi); |
| 479 | ksi.ksi_signo = SIGFPE; |
| 480 | ksi.ksi_trap = type & ~T_USER; |
| 481 | ksi.ksi_addr = (void *)frame->tf_rip; |
| 482 | switch (type) { |
| 483 | case T_BOUND|T_USER: |
| 484 | ksi.ksi_code = FPE_FLTSUB; |
| 485 | break; |
| 486 | case T_OFLOW|T_USER: |
| 487 | ksi.ksi_code = FPE_INTOVF; |
| 488 | break; |
| 489 | case T_DIVIDE|T_USER: |
| 490 | ksi.ksi_code = FPE_INTDIV; |
| 491 | break; |
| 492 | default: |
| 493 | #ifdef DIAGNOSTIC |
| 494 | panic("unhandled type %x\n" , type); |
| 495 | #endif |
| 496 | break; |
| 497 | } |
| 498 | goto trapsignal; |
| 499 | |
| 500 | case T_PAGEFLT: |
| 501 | /* Allow page faults in kernel mode. */ |
| 502 | if (__predict_false(l == NULL)) |
| 503 | goto we_re_toast; |
| 504 | |
| 505 | /* |
| 506 | * fusuintrfailure is used by [fs]uswintr() to prevent |
| 507 | * page faulting from inside the profiling interrupt. |
| 508 | */ |
| 509 | onfault = pcb->pcb_onfault; |
| 510 | if (onfault == fusuintrfailure) { |
| 511 | goto copyefault; |
| 512 | } |
| 513 | if (cpu_intr_p() || (l->l_pflag & LP_INTR) != 0) { |
| 514 | goto we_re_toast; |
| 515 | } |
| 516 | |
| 517 | cr2 = rcr2(); |
| 518 | |
| 519 | if (frame->tf_err & PGEX_X) { |
| 520 | /* SMEP might have brought us here */ |
| 521 | if (cr2 > VM_MIN_ADDRESS && cr2 <= VM_MAXUSER_ADDRESS) |
| 522 | panic("prevented execution of %p (SMEP)" , |
| 523 | (void *)cr2); |
| 524 | } |
| 525 | |
| 526 | goto faultcommon; |
| 527 | |
| 528 | case T_PAGEFLT|T_USER: { /* page fault */ |
| 529 | register vaddr_t va; |
| 530 | register struct vmspace *vm; |
| 531 | register struct vm_map *map; |
| 532 | vm_prot_t ftype; |
| 533 | extern struct vm_map *kernel_map; |
| 534 | |
| 535 | cr2 = rcr2(); |
| 536 | if (p->p_emul->e_usertrap != NULL && |
| 537 | (*p->p_emul->e_usertrap)(l, cr2, frame) != 0) |
| 538 | return; |
| 539 | faultcommon: |
| 540 | vm = p->p_vmspace; |
| 541 | if (__predict_false(vm == NULL)) { |
| 542 | goto we_re_toast; |
| 543 | } |
| 544 | pcb->pcb_cr2 = cr2; |
| 545 | va = trunc_page((vaddr_t)cr2); |
| 546 | /* |
| 547 | * It is only a kernel address space fault iff: |
| 548 | * 1. (type & T_USER) == 0 and |
| 549 | * 2. pcb_onfault not set or |
| 550 | * 3. pcb_onfault set but supervisor space fault |
| 551 | * The last can occur during an exec() copyin where the |
| 552 | * argument space is lazy-allocated. |
| 553 | */ |
| 554 | if (type == T_PAGEFLT && va >= VM_MIN_KERNEL_ADDRESS) |
| 555 | map = kernel_map; |
| 556 | else |
| 557 | map = &vm->vm_map; |
| 558 | if (frame->tf_err & PGEX_W) |
| 559 | ftype = VM_PROT_WRITE; |
| 560 | else if (frame->tf_err & PGEX_X) |
| 561 | ftype = VM_PROT_EXECUTE; |
| 562 | else |
| 563 | ftype = VM_PROT_READ; |
| 564 | |
| 565 | #ifdef DIAGNOSTIC |
| 566 | if (map == kernel_map && va == 0) { |
| 567 | printf("trap: bad kernel access at %lx\n" , va); |
| 568 | goto we_re_toast; |
| 569 | } |
| 570 | #endif |
| 571 | /* Fault the original page in. */ |
| 572 | onfault = pcb->pcb_onfault; |
| 573 | pcb->pcb_onfault = NULL; |
| 574 | error = uvm_fault(map, va, ftype); |
| 575 | pcb->pcb_onfault = onfault; |
| 576 | if (error == 0) { |
| 577 | if (map != kernel_map && (void *)va >= vm->vm_maxsaddr) |
| 578 | uvm_grow(p, va); |
| 579 | |
| 580 | pfail = false; |
| 581 | while (type == T_PAGEFLT) { |
| 582 | /* |
| 583 | * we need to switch pmap now if we're in |
| 584 | * the middle of copyin/out. |
| 585 | * |
| 586 | * but we don't need to do so for kcopy as |
| 587 | * it never touch userspace. |
| 588 | */ |
| 589 | kpreempt_disable(); |
| 590 | if (curcpu()->ci_want_pmapload) { |
| 591 | onfault = onfault_handler(pcb, frame); |
| 592 | if (onfault != kcopy_fault) { |
| 593 | pmap_load(); |
| 594 | } |
| 595 | } |
| 596 | /* |
| 597 | * We need to keep the pmap loaded and |
| 598 | * so avoid being preempted until back |
| 599 | * into the copy functions. Disable |
| 600 | * interrupts at the hardware level before |
| 601 | * re-enabling preemption. Interrupts |
| 602 | * will be re-enabled by 'iret' when |
| 603 | * returning back out of the trap stub. |
| 604 | * They'll only be re-enabled when the |
| 605 | * program counter is once again in |
| 606 | * the copy functions, and so visible |
| 607 | * to cpu_kpreempt_exit(). |
| 608 | */ |
| 609 | #ifndef XEN |
| 610 | x86_disable_intr(); |
| 611 | #endif |
| 612 | l->l_nopreempt--; |
| 613 | if (l->l_nopreempt > 0 || !l->l_dopreempt || |
| 614 | pfail) { |
| 615 | return; |
| 616 | } |
| 617 | #ifndef XEN |
| 618 | x86_enable_intr(); |
| 619 | #endif |
| 620 | /* |
| 621 | * If preemption fails for some reason, |
| 622 | * don't retry it. The conditions won't |
| 623 | * change under our nose. |
| 624 | */ |
| 625 | pfail = kpreempt(0); |
| 626 | } |
| 627 | goto out; |
| 628 | } |
| 629 | |
| 630 | if (type == T_PAGEFLT) { |
| 631 | onfault = onfault_handler(pcb, frame); |
| 632 | if (onfault != NULL) |
| 633 | goto copyfault; |
| 634 | printf("uvm_fault(%p, 0x%lx, %d) -> %x\n" , |
| 635 | map, va, ftype, error); |
| 636 | goto kernelfault; |
| 637 | } |
| 638 | |
| 639 | KSI_INIT_TRAP(&ksi); |
| 640 | ksi.ksi_trap = type & ~T_USER; |
| 641 | ksi.ksi_addr = (void *)cr2; |
| 642 | switch (error) { |
| 643 | case EINVAL: |
| 644 | ksi.ksi_signo = SIGBUS; |
| 645 | ksi.ksi_code = BUS_ADRERR; |
| 646 | break; |
| 647 | case EACCES: |
| 648 | ksi.ksi_signo = SIGSEGV; |
| 649 | ksi.ksi_code = SEGV_ACCERR; |
| 650 | error = EFAULT; |
| 651 | break; |
| 652 | case ENOMEM: |
| 653 | ksi.ksi_signo = SIGKILL; |
| 654 | printf("UVM: pid %d.%d (%s), uid %d killed: " |
| 655 | "out of swap\n" , p->p_pid, l->l_lid, p->p_comm, |
| 656 | l->l_cred ? kauth_cred_geteuid(l->l_cred) : -1); |
| 657 | break; |
| 658 | default: |
| 659 | ksi.ksi_signo = SIGSEGV; |
| 660 | ksi.ksi_code = SEGV_MAPERR; |
| 661 | break; |
| 662 | } |
| 663 | |
| 664 | #ifdef TRAP_SIGDEBUG |
| 665 | printf("pid %d.%d (%s): signal %d at rip %#lx addr %#lx " |
| 666 | "error %d trap %d cr2 %p\n" , p->p_pid, l->l_lid, p->p_comm, |
| 667 | ksi.ksi_signo, frame->tf_rip, va, error, ksi.ksi_trap, |
| 668 | ksi.ksi_addr); |
| 669 | frame_dump(frame); |
| 670 | #endif |
| 671 | (*p->p_emul->e_trapsignal)(l, &ksi); |
| 672 | break; |
| 673 | } |
| 674 | |
| 675 | case T_TRCTRAP: |
| 676 | /* Check whether they single-stepped into a lcall. */ |
| 677 | if (frame->tf_rip == (uint64_t)IDTVEC(oosyscall) || |
| 678 | frame->tf_rip == (uint64_t)IDTVEC(osyscall) || |
| 679 | frame->tf_rip == (uint64_t)IDTVEC(syscall32)) { |
| 680 | frame->tf_rflags &= ~PSL_T; |
| 681 | return; |
| 682 | } |
| 683 | goto we_re_toast; |
| 684 | |
| 685 | case T_BPTFLT|T_USER: /* bpt instruction fault */ |
| 686 | case T_TRCTRAP|T_USER: /* trace trap */ |
| 687 | /* |
| 688 | * Don't go single-stepping into a RAS. |
| 689 | */ |
| 690 | |
| 691 | if (p->p_raslist == NULL || |
| 692 | (ras_lookup(p, (void *)frame->tf_rip) == (void *)-1)) { |
| 693 | KSI_INIT_TRAP(&ksi); |
| 694 | ksi.ksi_signo = SIGTRAP; |
| 695 | ksi.ksi_trap = type & ~T_USER; |
| 696 | if (type == (T_BPTFLT|T_USER)) |
| 697 | ksi.ksi_code = TRAP_BRKPT; |
| 698 | else |
| 699 | ksi.ksi_code = TRAP_TRACE; |
| 700 | (*p->p_emul->e_trapsignal)(l, &ksi); |
| 701 | } |
| 702 | break; |
| 703 | |
| 704 | case T_NMI: |
| 705 | if (nmi_dispatch(frame)) |
| 706 | return; |
| 707 | /* NMI can be hooked up to a pushbutton for debugging */ |
| 708 | if (kgdb_trap(type, frame)) |
| 709 | return; |
| 710 | if (kdb_trap(type, 0, frame)) |
| 711 | return; |
| 712 | /* machine/parity/power fail/"kitchen sink" faults */ |
| 713 | |
| 714 | x86_nmi(); |
| 715 | return; |
| 716 | } |
| 717 | |
| 718 | if ((type & T_USER) == 0) |
| 719 | return; |
| 720 | out: |
| 721 | userret(l); |
| 722 | return; |
| 723 | trapsignal: |
| 724 | (*p->p_emul->e_trapsignal)(l, &ksi); |
| 725 | userret(l); |
| 726 | } |
| 727 | |
| 728 | /* |
| 729 | * startlwp: start of a new LWP. |
| 730 | */ |
| 731 | void |
| 732 | startlwp(void *arg) |
| 733 | { |
| 734 | ucontext_t *uc = arg; |
| 735 | lwp_t *l = curlwp; |
| 736 | int error __diagused; |
| 737 | |
| 738 | error = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags); |
| 739 | KASSERT(error == 0); |
| 740 | |
| 741 | kmem_free(uc, sizeof(ucontext_t)); |
| 742 | userret(l); |
| 743 | } |
| 744 | |
| 745 | #ifdef TRAP_SIGDEBUG |
| 746 | static void |
| 747 | frame_dump(struct trapframe *tf) |
| 748 | { |
| 749 | int i; |
| 750 | unsigned long *p; |
| 751 | |
| 752 | printf("rip %p rsp %p rfl %p\n" , |
| 753 | (void *)tf->tf_rip, (void *)tf->tf_rsp, (void *)tf->tf_rflags); |
| 754 | printf("rdi %p rsi %p rdx %p\n" , |
| 755 | (void *)tf->tf_rdi, (void *)tf->tf_rsi, (void *)tf->tf_rdx); |
| 756 | printf("rcx %p r8 %p r9 %p\n" , |
| 757 | (void *)tf->tf_rcx, (void *)tf->tf_r8, (void *)tf->tf_r9); |
| 758 | printf("r10 %p r11 %p r12 %p\n" , |
| 759 | (void *)tf->tf_r10, (void *)tf->tf_r11, (void *)tf->tf_r12); |
| 760 | printf("r13 %p r14 %p r15 %p\n" , |
| 761 | (void *)tf->tf_r13, (void *)tf->tf_r14, (void *)tf->tf_r15); |
| 762 | printf("rbp %p rbx %p rax %p\n" , |
| 763 | (void *)tf->tf_rbp, (void *)tf->tf_rbx, (void *)tf->tf_rax); |
| 764 | printf("cs %lx ds %lx es %lx fs %lx gs %lx ss %lx\n" , |
| 765 | tf->tf_cs & 0xffff, tf->tf_ds & 0xffff, tf->tf_es & 0xffff, |
| 766 | tf->tf_fs & 0xffff, tf->tf_gs & 0xffff, tf->tf_ss & 0xffff); |
| 767 | |
| 768 | printf("\n" ); |
| 769 | printf("Stack dump:\n" ); |
| 770 | for (i = 0, p = (unsigned long *) tf; i < 20; i ++, p += 4) |
| 771 | printf(" 0x%.16lx 0x%.16lx 0x%.16lx 0x%.16lx\n" , *p, p[1], p[2], p[3]); |
| 772 | printf("\n" ); |
| 773 | } |
| 774 | #endif |
| 775 | |