| 1 | /* $NetBSD: fpu.c,v 1.12 2016/09/29 17:01:43 maxv Exp $ */ |
| 2 | |
| 3 | /*- |
| 4 | * Copyright (c) 2008 The NetBSD Foundation, Inc. All |
| 5 | * rights reserved. |
| 6 | * |
| 7 | * This code is derived from software developed for The NetBSD Foundation |
| 8 | * by Andrew Doran. |
| 9 | * |
| 10 | * Redistribution and use in source and binary forms, with or without |
| 11 | * modification, are permitted provided that the following conditions |
| 12 | * are met: |
| 13 | * 1. Redistributions of source code must retain the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer. |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright |
| 16 | * notice, this list of conditions and the following disclaimer in the |
| 17 | * documentation and/or other materials provided with the distribution. |
| 18 | * |
| 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
| 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
| 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
| 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 29 | * POSSIBILITY OF SUCH DAMAGE. |
| 30 | */ |
| 31 | |
| 32 | /*- |
| 33 | * Copyright (c) 1991 The Regents of the University of California. |
| 34 | * All rights reserved. |
| 35 | * |
| 36 | * Redistribution and use in source and binary forms, with or without |
| 37 | * modification, are permitted provided that the following conditions |
| 38 | * are met: |
| 39 | * 1. Redistributions of source code must retain the above copyright |
| 40 | * notice, this list of conditions and the following disclaimer. |
| 41 | * 2. Redistributions in binary form must reproduce the above copyright |
| 42 | * notice, this list of conditions and the following disclaimer in the |
| 43 | * documentation and/or other materials provided with the distribution. |
| 44 | * 3. Neither the name of the University nor the names of its contributors |
| 45 | * may be used to endorse or promote products derived from this software |
| 46 | * without specific prior written permission. |
| 47 | * |
| 48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 58 | * SUCH DAMAGE. |
| 59 | * |
| 60 | * @(#)npx.c 7.2 (Berkeley) 5/12/91 |
| 61 | */ |
| 62 | |
| 63 | /*- |
| 64 | * Copyright (c) 1994, 1995, 1998 Charles M. Hannum. All rights reserved. |
| 65 | * Copyright (c) 1990 William Jolitz. |
| 66 | * |
| 67 | * Redistribution and use in source and binary forms, with or without |
| 68 | * modification, are permitted provided that the following conditions |
| 69 | * are met: |
| 70 | * 1. Redistributions of source code must retain the above copyright |
| 71 | * notice, this list of conditions and the following disclaimer. |
| 72 | * 2. Redistributions in binary form must reproduce the above copyright |
| 73 | * notice, this list of conditions and the following disclaimer in the |
| 74 | * documentation and/or other materials provided with the distribution. |
| 75 | * 3. All advertising materials mentioning features or use of this software |
| 76 | * must display the following acknowledgement: |
| 77 | * This product includes software developed by the University of |
| 78 | * California, Berkeley and its contributors. |
| 79 | * 4. Neither the name of the University nor the names of its contributors |
| 80 | * may be used to endorse or promote products derived from this software |
| 81 | * without specific prior written permission. |
| 82 | * |
| 83 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 84 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 85 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 86 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 87 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 88 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 89 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 90 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 91 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 92 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 93 | * SUCH DAMAGE. |
| 94 | * |
| 95 | * @(#)npx.c 7.2 (Berkeley) 5/12/91 |
| 96 | */ |
| 97 | |
| 98 | #include <sys/cdefs.h> |
| 99 | __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.12 2016/09/29 17:01:43 maxv Exp $" ); |
| 100 | |
| 101 | #include "opt_multiprocessor.h" |
| 102 | |
| 103 | #include <sys/param.h> |
| 104 | #include <sys/systm.h> |
| 105 | #include <sys/conf.h> |
| 106 | #include <sys/cpu.h> |
| 107 | #include <sys/file.h> |
| 108 | #include <sys/proc.h> |
| 109 | #include <sys/kernel.h> |
| 110 | |
| 111 | #include <machine/cpu.h> |
| 112 | #include <machine/intr.h> |
| 113 | #include <machine/cpufunc.h> |
| 114 | #include <machine/pcb.h> |
| 115 | #include <machine/trap.h> |
| 116 | #include <machine/specialreg.h> |
| 117 | #include <x86/cpu.h> |
| 118 | #include <x86/fpu.h> |
| 119 | |
| 120 | /* Check some duplicate definitions match */ |
| 121 | #include <machine/fenv.h> |
| 122 | |
| 123 | #ifdef XEN |
| 124 | #define clts() HYPERVISOR_fpu_taskswitch(0) |
| 125 | #define stts() HYPERVISOR_fpu_taskswitch(1) |
| 126 | #endif |
| 127 | |
| 128 | static inline union savefpu * |
| 129 | process_fpframe(struct lwp *lwp) |
| 130 | { |
| 131 | struct pcb *pcb = lwp_getpcb(lwp); |
| 132 | |
| 133 | return &pcb->pcb_savefpu; |
| 134 | } |
| 135 | |
| 136 | /* |
| 137 | * The following table is used to ensure that the FPE_... value |
| 138 | * that is passed as a trapcode to the signal handler of the user |
| 139 | * process does not have more than one bit set. |
| 140 | * |
| 141 | * Multiple bits may be set if SSE simd instructions generate errors |
| 142 | * on more than one value or if the user process modifies the control |
| 143 | * word while a status word bit is already set (which this is a sign |
| 144 | * of bad coding). |
| 145 | * We have no choise than to narrow them down to one bit, since we must |
| 146 | * not send a trapcode that is not exactly one of the FPE_ macros. |
| 147 | * |
| 148 | * The mechanism has a static table with 127 entries. Each combination |
| 149 | * of the 7 FPU status word exception bits directly translates to a |
| 150 | * position in this table, where a single FPE_... value is stored. |
| 151 | * This FPE_... value stored there is considered the "most important" |
| 152 | * of the exception bits and will be sent as the signal code. The |
| 153 | * precedence of the bits is based upon Intel Document "Numerical |
| 154 | * Applications", Chapter "Special Computational Situations". |
| 155 | * |
| 156 | * The code to choose one of these values does these steps: |
| 157 | * 1) Throw away status word bits that cannot be masked. |
| 158 | * 2) Throw away the bits currently masked in the control word, |
| 159 | * assuming the user isn't interested in them anymore. |
| 160 | * 3) Reinsert status word bit 7 (stack fault) if it is set, which |
| 161 | * cannot be masked but must be presered. |
| 162 | * 'Stack fault' is a sub-class of 'invalid operation'. |
| 163 | * 4) Use the remaining bits to point into the trapcode table. |
| 164 | * |
| 165 | * The 6 maskable bits in order of their preference, as stated in the |
| 166 | * above referenced Intel manual: |
| 167 | * 1 Invalid operation (FP_X_INV) |
| 168 | * 1a Stack underflow |
| 169 | * 1b Stack overflow |
| 170 | * 1c Operand of unsupported format |
| 171 | * 1d SNaN operand. |
| 172 | * 2 QNaN operand (not an exception, irrelavant here) |
| 173 | * 3 Any other invalid-operation not mentioned above or zero divide |
| 174 | * (FP_X_INV, FP_X_DZ) |
| 175 | * 4 Denormal operand (FP_X_DNML) |
| 176 | * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) |
| 177 | * 6 Inexact result (FP_X_IMP) |
| 178 | * |
| 179 | * NB: the above seems to mix up the mxscr error bits and the x87 ones. |
| 180 | * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx |
| 181 | * status. |
| 182 | * |
| 183 | * The table is nearly, but not quite, in bit order (ZERODIV and DENORM |
| 184 | * are swapped). |
| 185 | * |
| 186 | * This table assumes that any stack fault is cleared - so that an INVOP |
| 187 | * fault will only be reported as FLTSUB once. |
| 188 | * This might not happen if the mask is being changed. |
| 189 | */ |
| 190 | #define FPE_xxx1(f) (f & EN_SW_INVOP \ |
| 191 | ? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \ |
| 192 | : f & EN_SW_ZERODIV ? FPE_FLTDIV \ |
| 193 | : f & EN_SW_DENORM ? FPE_FLTUND \ |
| 194 | : f & EN_SW_OVERFLOW ? FPE_FLTOVF \ |
| 195 | : f & EN_SW_UNDERFLOW ? FPE_FLTUND \ |
| 196 | : f & EN_SW_PRECLOSS ? FPE_FLTRES \ |
| 197 | : f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0) |
| 198 | #define FPE_xxx2(f) FPE_xxx1(f), FPE_xxx1((f + 1)) |
| 199 | #define FPE_xxx4(f) FPE_xxx2(f), FPE_xxx2((f + 2)) |
| 200 | #define FPE_xxx8(f) FPE_xxx4(f), FPE_xxx4((f + 4)) |
| 201 | #define FPE_xxx16(f) FPE_xxx8(f), FPE_xxx8((f + 8)) |
| 202 | #define FPE_xxx32(f) FPE_xxx16(f), FPE_xxx16((f + 16)) |
| 203 | static const uint8_t fpetable[128] = { |
| 204 | FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96) |
| 205 | }; |
| 206 | #undef FPE_xxx1 |
| 207 | #undef FPE_xxx2 |
| 208 | #undef FPE_xxx4 |
| 209 | #undef FPE_xxx8 |
| 210 | #undef FPE_xxx16 |
| 211 | #undef FPE_xxx32 |
| 212 | |
| 213 | /* |
| 214 | * Init the FPU. |
| 215 | * |
| 216 | * This might not be strictly necessary since it will be initialised |
| 217 | * for each process. However it does no harm. |
| 218 | */ |
| 219 | void |
| 220 | fpuinit(struct cpu_info *ci) |
| 221 | { |
| 222 | if (!i386_fpu_present) |
| 223 | return; |
| 224 | |
| 225 | clts(); |
| 226 | fninit(); |
| 227 | stts(); |
| 228 | } |
| 229 | |
| 230 | static void |
| 231 | send_sigill(void *rip) |
| 232 | { |
| 233 | /* No fpu (486SX) - send SIGILL */ |
| 234 | ksiginfo_t ksi; |
| 235 | |
| 236 | x86_enable_intr(); |
| 237 | KSI_INIT_TRAP(&ksi); |
| 238 | ksi.ksi_signo = SIGILL; |
| 239 | ksi.ksi_addr = rip; |
| 240 | (*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi); |
| 241 | return; |
| 242 | } |
| 243 | |
| 244 | /* |
| 245 | * This is a synchronous trap on either an x87 instruction (due to an |
| 246 | * unmasked error on the previous x87 instruction) or on an SSE/SSE2 etc |
| 247 | * instruction due to an error on the instruction itself. |
| 248 | * |
| 249 | * If trap actually generates a signal, then the fpu state is saved |
| 250 | * and then copied onto the process's user-stack, and then recovered |
| 251 | * from there when the signal returns (or from the jmp_buf if the |
| 252 | * signal handler exits with a longjmp()). |
| 253 | * |
| 254 | * All this code need to do is save the reason for the trap. |
| 255 | * For x87 interrupts the status word bits need clearing to stop the |
| 256 | * trap re-occurring. |
| 257 | * |
| 258 | * The mxcsr bits are 'sticky' and need clearing to not confuse a later trap. |
| 259 | * |
| 260 | * Since this is a synchronous trap, the fpu registers must still belong |
| 261 | * to the correct process (we trap through an interrupt gate so that |
| 262 | * interrupts are disabled on entry). |
| 263 | * Interrupts (these better include IPIs) are left disabled until we've |
| 264 | * finished looking at fpu registers. |
| 265 | * |
| 266 | * For amd64 the calling code (in amd64_trap.S) has already checked |
| 267 | * that we trapped from usermode. |
| 268 | */ |
| 269 | |
| 270 | void |
| 271 | fputrap(struct trapframe *frame) |
| 272 | { |
| 273 | uint32_t statbits; |
| 274 | ksiginfo_t ksi; |
| 275 | |
| 276 | if (!USERMODE(frame->tf_cs, frame->tf_eflags)) |
| 277 | panic("fpu trap from kernel, trapframe %p\n" , frame); |
| 278 | |
| 279 | if (i386_fpu_present == 0) { |
| 280 | send_sigill((void *)X86_TF_RIP(frame)); |
| 281 | return; |
| 282 | } |
| 283 | |
| 284 | /* |
| 285 | * At this point, fpcurlwp should be curlwp. If it wasn't, the TS bit |
| 286 | * should be set, and we should have gotten a DNA exception. |
| 287 | */ |
| 288 | KASSERT(curcpu()->ci_fpcurlwp == curlwp); |
| 289 | |
| 290 | if (frame->tf_trapno == T_XMM) { |
| 291 | uint32_t mxcsr; |
| 292 | x86_stmxcsr(&mxcsr); |
| 293 | statbits = mxcsr; |
| 294 | /* Clear the sticky status bits */ |
| 295 | mxcsr &= ~0x3f; |
| 296 | x86_ldmxcsr(&mxcsr); |
| 297 | |
| 298 | /* Remove masked interrupts and non-status bits */ |
| 299 | statbits &= ~(statbits >> 7) & 0x3f; |
| 300 | /* Mark this is an XMM status */ |
| 301 | statbits |= 0x10000; |
| 302 | } else { |
| 303 | uint16_t cw, sw; |
| 304 | /* Get current control and status words */ |
| 305 | fnstcw(&cw); |
| 306 | fnstsw(&sw); |
| 307 | /* Clear any pending exceptions from status word */ |
| 308 | fnclex(); |
| 309 | |
| 310 | /* Removed masked interrupts */ |
| 311 | statbits = sw & ~(cw & 0x3f); |
| 312 | } |
| 313 | |
| 314 | /* Doesn't matter now if we get pre-empted */ |
| 315 | x86_enable_intr(); |
| 316 | |
| 317 | KSI_INIT_TRAP(&ksi); |
| 318 | ksi.ksi_signo = SIGFPE; |
| 319 | ksi.ksi_addr = (void *)X86_TF_RIP(frame); |
| 320 | ksi.ksi_code = fpetable[statbits & 0x7f]; |
| 321 | ksi.ksi_trap = statbits; |
| 322 | (*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi); |
| 323 | } |
| 324 | |
| 325 | /* |
| 326 | * Implement device not available (DNA) exception |
| 327 | * |
| 328 | * If we were the last lwp to use the FPU, we can simply return. |
| 329 | * Otherwise, we save the previous state, if necessary, and restore |
| 330 | * our last saved state. |
| 331 | * |
| 332 | * Called directly from the trap 0x13 entry with interrupts still disabled. |
| 333 | */ |
| 334 | void |
| 335 | fpudna(struct trapframe *frame) |
| 336 | { |
| 337 | struct cpu_info *ci; |
| 338 | struct lwp *l, *fl; |
| 339 | struct pcb *pcb; |
| 340 | int s; |
| 341 | |
| 342 | if (!USERMODE(frame->tf_cs, frame->tf_eflags)) |
| 343 | panic("fpudna from kernel, ip %p, trapframe %p\n" , |
| 344 | (void *)X86_TF_RIP(frame), frame); |
| 345 | |
| 346 | if (i386_fpu_present == 0) { |
| 347 | send_sigill((void *)X86_TF_RIP(frame)); |
| 348 | return; |
| 349 | } |
| 350 | |
| 351 | ci = curcpu(); |
| 352 | |
| 353 | /* Save soft spl level - interrupts are hard disabled */ |
| 354 | s = splhigh(); |
| 355 | |
| 356 | /* Save state on current CPU. */ |
| 357 | l = ci->ci_curlwp; |
| 358 | pcb = lwp_getpcb(l); |
| 359 | fl = ci->ci_fpcurlwp; |
| 360 | if (fl != NULL) { |
| 361 | /* |
| 362 | * It seems we can get here on Xen even if we didn't |
| 363 | * switch lwp. In this case do nothing |
| 364 | */ |
| 365 | if (fl == l) { |
| 366 | KASSERT(pcb->pcb_fpcpu == ci); |
| 367 | clts(); |
| 368 | splx(s); |
| 369 | return; |
| 370 | } |
| 371 | fpusave_cpu(true); |
| 372 | } |
| 373 | |
| 374 | /* Save our state if on a remote CPU. */ |
| 375 | if (pcb->pcb_fpcpu != NULL) { |
| 376 | /* Explicitly disable preemption before dropping spl. */ |
| 377 | kpreempt_disable(); |
| 378 | splx(s); |
| 379 | |
| 380 | /* Actually enable interrupts */ |
| 381 | x86_enable_intr(); |
| 382 | |
| 383 | fpusave_lwp(l, true); |
| 384 | KASSERT(pcb->pcb_fpcpu == NULL); |
| 385 | s = splhigh(); |
| 386 | kpreempt_enable(); |
| 387 | } |
| 388 | |
| 389 | /* |
| 390 | * Restore state on this CPU, or initialize. Ensure that |
| 391 | * the entire update is atomic with respect to FPU-sync IPIs. |
| 392 | */ |
| 393 | clts(); |
| 394 | ci->ci_fpcurlwp = l; |
| 395 | pcb->pcb_fpcpu = ci; |
| 396 | |
| 397 | if (i386_use_fxsave) { |
| 398 | if (x86_xsave_features != 0) { |
| 399 | xrstor(&pcb->pcb_savefpu, x86_xsave_features); |
| 400 | } else { |
| 401 | /* |
| 402 | * AMD FPU's do not restore FIP, FDP, and FOP on |
| 403 | * fxrstor, leaking other process's execution history. |
| 404 | * Clear them manually by loading a zero. |
| 405 | * |
| 406 | * Clear the ES bit in the x87 status word if it is |
| 407 | * currently set, in order to avoid causing a fault |
| 408 | * in the upcoming load. |
| 409 | */ |
| 410 | if (fngetsw() & 0x80) |
| 411 | fnclex(); |
| 412 | fldummy(); |
| 413 | |
| 414 | fxrstor(&pcb->pcb_savefpu); |
| 415 | } |
| 416 | } else { |
| 417 | frstor(&pcb->pcb_savefpu); |
| 418 | } |
| 419 | |
| 420 | KASSERT(ci == curcpu()); |
| 421 | splx(s); |
| 422 | } |
| 423 | |
| 424 | /* |
| 425 | * Save current CPU's FPU state. Must be called at IPL_HIGH. |
| 426 | */ |
| 427 | void |
| 428 | fpusave_cpu(bool save) |
| 429 | { |
| 430 | struct cpu_info *ci; |
| 431 | struct pcb *pcb; |
| 432 | struct lwp *l; |
| 433 | |
| 434 | KASSERT(curcpu()->ci_ilevel == IPL_HIGH); |
| 435 | |
| 436 | ci = curcpu(); |
| 437 | l = ci->ci_fpcurlwp; |
| 438 | if (l == NULL) { |
| 439 | return; |
| 440 | } |
| 441 | pcb = lwp_getpcb(l); |
| 442 | |
| 443 | if (save) { |
| 444 | clts(); |
| 445 | if (i386_use_fxsave) { |
| 446 | if (x86_xsave_features != 0) |
| 447 | xsave(&pcb->pcb_savefpu, x86_xsave_features); |
| 448 | else |
| 449 | fxsave(&pcb->pcb_savefpu); |
| 450 | } else { |
| 451 | fnsave(&pcb->pcb_savefpu); |
| 452 | } |
| 453 | } |
| 454 | |
| 455 | stts(); |
| 456 | pcb->pcb_fpcpu = NULL; |
| 457 | ci->ci_fpcurlwp = NULL; |
| 458 | } |
| 459 | |
| 460 | /* |
| 461 | * Save l's FPU state, which may be on this processor or another processor. |
| 462 | * It may take some time, so we avoid disabling preemption where possible. |
| 463 | * Caller must know that the target LWP is stopped, otherwise this routine |
| 464 | * may race against it. |
| 465 | */ |
| 466 | void |
| 467 | fpusave_lwp(struct lwp *l, bool save) |
| 468 | { |
| 469 | struct pcb *pcb = lwp_getpcb(l); |
| 470 | struct cpu_info *oci; |
| 471 | int s, spins, ticks; |
| 472 | |
| 473 | spins = 0; |
| 474 | ticks = hardclock_ticks; |
| 475 | for (;;) { |
| 476 | s = splhigh(); |
| 477 | oci = pcb->pcb_fpcpu; |
| 478 | if (oci == NULL) { |
| 479 | splx(s); |
| 480 | break; |
| 481 | } |
| 482 | if (oci == curcpu()) { |
| 483 | KASSERT(oci->ci_fpcurlwp == l); |
| 484 | fpusave_cpu(save); |
| 485 | splx(s); |
| 486 | break; |
| 487 | } |
| 488 | splx(s); |
| 489 | #ifdef XEN |
| 490 | if (xen_send_ipi(oci, XEN_IPI_SYNCH_FPU) != 0) { |
| 491 | panic("xen_send_ipi(%s, XEN_IPI_SYNCH_FPU) failed." , |
| 492 | cpu_name(oci)); |
| 493 | } |
| 494 | #else /* XEN */ |
| 495 | x86_send_ipi(oci, X86_IPI_SYNCH_FPU); |
| 496 | #endif |
| 497 | while (pcb->pcb_fpcpu == oci && ticks == hardclock_ticks) { |
| 498 | x86_pause(); |
| 499 | spins++; |
| 500 | } |
| 501 | if (spins > 100000000) { |
| 502 | panic("fpusave_lwp: did not" ); |
| 503 | } |
| 504 | } |
| 505 | } |
| 506 | |
| 507 | void |
| 508 | fpu_set_default_cw(struct lwp *l, unsigned int x87_cw) |
| 509 | { |
| 510 | union savefpu *fpu_save = process_fpframe(l); |
| 511 | |
| 512 | if (i386_use_fxsave) |
| 513 | fpu_save->sv_xmm.fx_cw = x87_cw; |
| 514 | else |
| 515 | fpu_save->sv_87.s87_cw = x87_cw; |
| 516 | fpu_save->sv_os.fxo_dflt_cw = x87_cw; |
| 517 | } |
| 518 | |
| 519 | /* |
| 520 | * Exec needs to clear the fpu save area to avoid leaking info from the |
| 521 | * old process to userspace. |
| 522 | */ |
| 523 | void |
| 524 | fpu_save_area_clear(struct lwp *l, unsigned int x87_cw) |
| 525 | { |
| 526 | union savefpu *fpu_save; |
| 527 | |
| 528 | fpusave_lwp(l, false); |
| 529 | fpu_save = process_fpframe(l); |
| 530 | |
| 531 | if (i386_use_fxsave) { |
| 532 | memset(&fpu_save->sv_xmm, 0, sizeof(fpu_save->sv_xmm)); |
| 533 | fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__; |
| 534 | fpu_save->sv_xmm.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__; |
| 535 | fpu_save->sv_xmm.fx_cw = x87_cw; |
| 536 | } else { |
| 537 | memset(&fpu_save->sv_87, 0, x86_fpu_save_size); |
| 538 | fpu_save->sv_87.s87_tw = 0xffff; |
| 539 | fpu_save->sv_87.s87_cw = x87_cw; |
| 540 | } |
| 541 | fpu_save->sv_os.fxo_dflt_cw = x87_cw; |
| 542 | } |
| 543 | |
| 544 | /* For signal handlers the register values don't matter */ |
| 545 | void |
| 546 | fpu_save_area_reset(struct lwp *l) |
| 547 | { |
| 548 | union savefpu *fpu_save = process_fpframe(l); |
| 549 | |
| 550 | if (i386_use_fxsave) { |
| 551 | fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__; |
| 552 | fpu_save->sv_xmm.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__; |
| 553 | fpu_save->sv_xmm.fx_tw = 0; |
| 554 | fpu_save->sv_xmm.fx_cw = fpu_save->sv_os.fxo_dflt_cw; |
| 555 | } else { |
| 556 | fpu_save->sv_87.s87_tw = 0xffff; |
| 557 | fpu_save->sv_87.s87_cw = fpu_save->sv_os.fxo_dflt_cw; |
| 558 | } |
| 559 | } |
| 560 | |
| 561 | /* During fork the xsave data needs to be copied */ |
| 562 | void |
| 563 | fpu_save_area_fork(struct pcb *pcb2, const struct pcb *pcb1) |
| 564 | { |
| 565 | ssize_t ; |
| 566 | |
| 567 | /* The pcb itself has been copied, but the xsave area |
| 568 | * extends further. */ |
| 569 | |
| 570 | extra = offsetof(struct pcb, pcb_savefpu) + x86_fpu_save_size - |
| 571 | sizeof (struct pcb); |
| 572 | |
| 573 | if (extra > 0) |
| 574 | memcpy(pcb2 + 1, pcb1 + 1, extra); |
| 575 | } |
| 576 | |
| 577 | |
| 578 | /* |
| 579 | * Write the FP registers. |
| 580 | * Buffer has usually come from userspace so should not be trusted. |
| 581 | */ |
| 582 | void |
| 583 | process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs) |
| 584 | { |
| 585 | union savefpu *fpu_save; |
| 586 | |
| 587 | fpusave_lwp(l, false); |
| 588 | fpu_save = process_fpframe(l); |
| 589 | |
| 590 | if (i386_use_fxsave) { |
| 591 | memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm)); |
| 592 | /* Invalid bits in the mxcsr_mask will cause faults */ |
| 593 | fpu_save->sv_xmm.fx_mxcsr_mask &= __INITIAL_MXCSR_MASK__; |
| 594 | } else { |
| 595 | process_xmm_to_s87(fpregs, &fpu_save->sv_87); |
| 596 | } |
| 597 | } |
| 598 | |
| 599 | /* We need to use x87 format for 32bit ptrace */ |
| 600 | void |
| 601 | process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs) |
| 602 | { |
| 603 | union savefpu *fpu_save; |
| 604 | |
| 605 | if (i386_use_fxsave) { |
| 606 | /* Save so we don't lose the xmm registers */ |
| 607 | fpusave_lwp(l, true); |
| 608 | fpu_save = process_fpframe(l); |
| 609 | process_s87_to_xmm(fpregs, &fpu_save->sv_xmm); |
| 610 | } else { |
| 611 | fpusave_lwp(l, false); |
| 612 | fpu_save = process_fpframe(l); |
| 613 | memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87)); |
| 614 | } |
| 615 | } |
| 616 | |
| 617 | /* |
| 618 | * Read fpu registers, the buffer is usually copied out to userspace. |
| 619 | * Ensure we write to the entire structure. |
| 620 | */ |
| 621 | void |
| 622 | process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs) |
| 623 | { |
| 624 | union savefpu *fpu_save; |
| 625 | |
| 626 | fpusave_lwp(l, true); |
| 627 | fpu_save = process_fpframe(l); |
| 628 | |
| 629 | if (i386_use_fxsave) { |
| 630 | memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm)); |
| 631 | } else { |
| 632 | /* This usually gets copied to userspace */ |
| 633 | memset(fpregs, 0, sizeof(*fpregs)); |
| 634 | process_s87_to_xmm(&fpu_save->sv_87, fpregs); |
| 635 | } |
| 636 | } |
| 637 | |
| 638 | void |
| 639 | process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs) |
| 640 | { |
| 641 | union savefpu *fpu_save; |
| 642 | |
| 643 | fpusave_lwp(l, true); |
| 644 | fpu_save = process_fpframe(l); |
| 645 | |
| 646 | if (i386_use_fxsave) { |
| 647 | memset(fpregs, 0, 12); |
| 648 | process_xmm_to_s87(&fpu_save->sv_xmm, fpregs); |
| 649 | } else { |
| 650 | memcpy(fpregs, &fpu_save->sv_87, sizeof(fpu_save->sv_87)); |
| 651 | } |
| 652 | } |
| 653 | |