| 1 | /* $NetBSD: pmap_tlb.c,v 1.8 2016/11/13 12:58:40 maxv Exp $ */ |
| 2 | |
| 3 | /*- |
| 4 | * Copyright (c) 2008-2012 The NetBSD Foundation, Inc. |
| 5 | * All rights reserved. |
| 6 | * |
| 7 | * This code is derived from software contributed to The NetBSD Foundation |
| 8 | * by Andrew Doran and Mindaugas Rasiukevicius. |
| 9 | * |
| 10 | * Redistribution and use in source and binary forms, with or without |
| 11 | * modification, are permitted provided that the following conditions |
| 12 | * are met: |
| 13 | * 1. Redistributions of source code must retain the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer. |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright |
| 16 | * notice, this list of conditions and the following disclaimer in the |
| 17 | * documentation and/or other materials provided with the distribution. |
| 18 | * |
| 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
| 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
| 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
| 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 29 | * POSSIBILITY OF SUCH DAMAGE. |
| 30 | */ |
| 31 | |
| 32 | /* |
| 33 | * x86 pmap(9) module: TLB shootdowns. |
| 34 | * |
| 35 | * TLB shootdowns are hard interrupts that operate outside the SPL framework. |
| 36 | * They do not need to be blocked, provided that the pmap module gets the |
| 37 | * order of events correct. The calls are made by poking the LAPIC directly. |
| 38 | * The interrupt handler is short and does one of the following: invalidate |
| 39 | * a set of pages, all user TLB entries or the entire TLB. |
| 40 | */ |
| 41 | |
| 42 | #include <sys/cdefs.h> |
| 43 | __KERNEL_RCSID(0, "$NetBSD: pmap_tlb.c,v 1.8 2016/11/13 12:58:40 maxv Exp $" ); |
| 44 | |
| 45 | #include <sys/param.h> |
| 46 | #include <sys/kernel.h> |
| 47 | |
| 48 | #include <sys/systm.h> |
| 49 | #include <sys/atomic.h> |
| 50 | #include <sys/cpu.h> |
| 51 | #include <sys/intr.h> |
| 52 | #include <uvm/uvm.h> |
| 53 | |
| 54 | #include <machine/cpuvar.h> |
| 55 | #ifdef XEN |
| 56 | #include <xen/xenpmap.h> |
| 57 | #endif /* XEN */ |
| 58 | #include <x86/i82489reg.h> |
| 59 | #include <x86/i82489var.h> |
| 60 | |
| 61 | /* |
| 62 | * TLB shootdown structures. |
| 63 | */ |
| 64 | |
| 65 | typedef struct { |
| 66 | #ifdef _LP64 |
| 67 | uintptr_t tp_va[14]; /* whole struct: 128 bytes */ |
| 68 | #else |
| 69 | uintptr_t tp_va[13]; /* whole struct: 64 bytes */ |
| 70 | #endif |
| 71 | uint16_t tp_count; |
| 72 | uint16_t tp_pte; |
| 73 | int tp_userpmap; |
| 74 | kcpuset_t * tp_cpumask; |
| 75 | } pmap_tlb_packet_t; |
| 76 | |
| 77 | /* |
| 78 | * No more than N separate invlpg. |
| 79 | * |
| 80 | * Statistically, a value of six is big enough to cover the requested number |
| 81 | * of pages in ~ 95% of the TLB shootdowns we are getting. We therefore rarely |
| 82 | * reach the limit, and increasing it can actually reduce the performance due |
| 83 | * to the high cost of invlpg. |
| 84 | */ |
| 85 | #define TP_MAXVA 6 |
| 86 | |
| 87 | /* |
| 88 | * TLB shootdown state. |
| 89 | */ |
| 90 | static pmap_tlb_packet_t pmap_tlb_packet __cacheline_aligned; |
| 91 | static volatile u_int pmap_tlb_pendcount __cacheline_aligned; |
| 92 | static volatile u_int pmap_tlb_gen __cacheline_aligned; |
| 93 | static struct evcnt pmap_tlb_evcnt __cacheline_aligned; |
| 94 | |
| 95 | /* |
| 96 | * TLB shootdown statistics. |
| 97 | */ |
| 98 | #ifdef TLBSTATS |
| 99 | static struct evcnt tlbstat_local[TLBSHOOT__MAX]; |
| 100 | static struct evcnt tlbstat_remote[TLBSHOOT__MAX]; |
| 101 | static struct evcnt tlbstat_kernel[TLBSHOOT__MAX]; |
| 102 | static struct evcnt tlbstat_single_req; |
| 103 | static struct evcnt tlbstat_single_issue; |
| 104 | static const char * tlbstat_name[ ] = { |
| 105 | "APTE" , |
| 106 | "KENTER" , |
| 107 | "KREMOVE" , |
| 108 | "FREE_PTP1" , |
| 109 | "FREE_PTP2" , |
| 110 | "REMOVE_PTE" , |
| 111 | "REMOVE_PTES" , |
| 112 | "SYNC_PV1" , |
| 113 | "SYNC_PV2" , |
| 114 | "WRITE_PROTECT" , |
| 115 | "ENTER" , |
| 116 | "UPDATE" , |
| 117 | "BUS_DMA" , |
| 118 | "BUS_SPACE" |
| 119 | }; |
| 120 | #endif |
| 121 | |
| 122 | void |
| 123 | pmap_tlb_init(void) |
| 124 | { |
| 125 | |
| 126 | memset(&pmap_tlb_packet, 0, sizeof(pmap_tlb_packet_t)); |
| 127 | pmap_tlb_pendcount = 0; |
| 128 | pmap_tlb_gen = 0; |
| 129 | |
| 130 | evcnt_attach_dynamic(&pmap_tlb_evcnt, EVCNT_TYPE_INTR, |
| 131 | NULL, "TLB" , "shootdown" ); |
| 132 | |
| 133 | #ifdef TLBSTATS |
| 134 | int i; |
| 135 | |
| 136 | for (i = 0; i < TLBSHOOT__MAX; i++) { |
| 137 | evcnt_attach_dynamic(&tlbstat_local[i], EVCNT_TYPE_MISC, |
| 138 | NULL, "tlbshoot local" , tlbstat_name[i]); |
| 139 | } |
| 140 | for (i = 0; i < TLBSHOOT__MAX; i++) { |
| 141 | evcnt_attach_dynamic(&tlbstat_remote[i], EVCNT_TYPE_MISC, |
| 142 | NULL, "tlbshoot remote" , tlbstat_name[i]); |
| 143 | } |
| 144 | for (i = 0; i < TLBSHOOT__MAX; i++) { |
| 145 | evcnt_attach_dynamic(&tlbstat_kernel[i], EVCNT_TYPE_MISC, |
| 146 | NULL, "tlbshoot kernel" , tlbstat_name[i]); |
| 147 | } |
| 148 | evcnt_attach_dynamic(&tlbstat_single_req, EVCNT_TYPE_MISC, |
| 149 | NULL, "tlbshoot single page" , "requests" ); |
| 150 | evcnt_attach_dynamic(&tlbstat_single_issue, EVCNT_TYPE_MISC, |
| 151 | NULL, "tlbshoot single page" , "issues" ); |
| 152 | #endif |
| 153 | } |
| 154 | |
| 155 | void |
| 156 | pmap_tlb_cpu_init(struct cpu_info *ci) |
| 157 | { |
| 158 | pmap_tlb_packet_t *tp = (pmap_tlb_packet_t *)ci->ci_pmap_data; |
| 159 | |
| 160 | memset(tp, 0, sizeof(pmap_tlb_packet_t)); |
| 161 | kcpuset_create(&tp->tp_cpumask, true); |
| 162 | } |
| 163 | |
| 164 | static inline void |
| 165 | pmap_tlbstat_count(struct pmap *pm, vaddr_t va, tlbwhy_t why) |
| 166 | { |
| 167 | #ifdef TLBSTATS |
| 168 | const cpuid_t cid = cpu_index(curcpu()); |
| 169 | bool local = false, remote = false; |
| 170 | |
| 171 | if (va != (vaddr_t)-1LL) { |
| 172 | atomic_inc_64(&tlbstat_single_req.ev_count); |
| 173 | } |
| 174 | if (pm == pmap_kernel()) { |
| 175 | atomic_inc_64(&tlbstat_kernel[why].ev_count); |
| 176 | return; |
| 177 | } |
| 178 | |
| 179 | if (va >= VM_MAXUSER_ADDRESS) { |
| 180 | remote = kcpuset_isotherset(pm->pm_kernel_cpus, cid); |
| 181 | local = kcpuset_isset(pm->pm_kernel_cpus, cid); |
| 182 | } |
| 183 | remote |= kcpuset_isotherset(pm->pm_cpus, cid); |
| 184 | local |= kcpuset_isset(pm->pm_cpus, cid); |
| 185 | |
| 186 | if (local) { |
| 187 | atomic_inc_64(&tlbstat_local[why].ev_count); |
| 188 | } |
| 189 | if (remote) { |
| 190 | atomic_inc_64(&tlbstat_remote[why].ev_count); |
| 191 | } |
| 192 | #endif |
| 193 | } |
| 194 | |
| 195 | static inline void |
| 196 | pmap_tlb_invalidate(const pmap_tlb_packet_t *tp) |
| 197 | { |
| 198 | int i; |
| 199 | |
| 200 | /* Find out what we need to invalidate. */ |
| 201 | if (tp->tp_count == (uint16_t)-1) { |
| 202 | u_int egen = uvm_emap_gen_return(); |
| 203 | if (tp->tp_pte & PG_G) { |
| 204 | /* Invalidating user and kernel TLB entries. */ |
| 205 | tlbflushg(); |
| 206 | } else { |
| 207 | /* Invalidating user TLB entries only. */ |
| 208 | tlbflush(); |
| 209 | } |
| 210 | uvm_emap_update(egen); |
| 211 | } else { |
| 212 | /* Invalidating a single page or a range of pages. */ |
| 213 | for (i = tp->tp_count - 1; i >= 0; i--) { |
| 214 | pmap_update_pg(tp->tp_va[i]); |
| 215 | } |
| 216 | } |
| 217 | } |
| 218 | |
| 219 | /* |
| 220 | * pmap_tlb_shootdown: invalidate a page on all CPUs using pmap 'pm'. |
| 221 | */ |
| 222 | void |
| 223 | pmap_tlb_shootdown(struct pmap *pm, vaddr_t va, pt_entry_t pte, tlbwhy_t why) |
| 224 | { |
| 225 | pmap_tlb_packet_t *tp; |
| 226 | int s; |
| 227 | |
| 228 | #ifndef XEN |
| 229 | KASSERT((pte & PG_G) == 0 || pm == pmap_kernel()); |
| 230 | #endif |
| 231 | |
| 232 | /* |
| 233 | * If tearing down the pmap, do nothing. We will flush later |
| 234 | * when we are ready to recycle/destroy it. |
| 235 | */ |
| 236 | if (__predict_false(curlwp->l_md.md_gc_pmap == pm)) { |
| 237 | return; |
| 238 | } |
| 239 | |
| 240 | if ((pte & PG_PS) != 0) { |
| 241 | va &= PG_LGFRAME; |
| 242 | } |
| 243 | |
| 244 | /* |
| 245 | * Add the shootdown operation to our pending set. |
| 246 | */ |
| 247 | s = splvm(); |
| 248 | tp = (pmap_tlb_packet_t *)curcpu()->ci_pmap_data; |
| 249 | |
| 250 | /* Whole address flush will be needed if PG_G is set. */ |
| 251 | CTASSERT(PG_G == (uint16_t)PG_G); |
| 252 | tp->tp_pte |= (uint16_t)pte; |
| 253 | |
| 254 | if (tp->tp_count == (uint16_t)-1) { |
| 255 | /* |
| 256 | * Already flushing everything. |
| 257 | */ |
| 258 | } else if (tp->tp_count < TP_MAXVA && va != (vaddr_t)-1LL) { |
| 259 | /* Flush a single page. */ |
| 260 | tp->tp_va[tp->tp_count++] = va; |
| 261 | KASSERT(tp->tp_count > 0); |
| 262 | } else { |
| 263 | /* Flush everything. */ |
| 264 | tp->tp_count = (uint16_t)-1; |
| 265 | } |
| 266 | |
| 267 | if (pm != pmap_kernel()) { |
| 268 | kcpuset_merge(tp->tp_cpumask, pm->pm_cpus); |
| 269 | if (va >= VM_MAXUSER_ADDRESS) { |
| 270 | kcpuset_merge(tp->tp_cpumask, pm->pm_kernel_cpus); |
| 271 | } |
| 272 | tp->tp_userpmap = 1; |
| 273 | } else { |
| 274 | kcpuset_copy(tp->tp_cpumask, kcpuset_running); |
| 275 | } |
| 276 | pmap_tlbstat_count(pm, va, why); |
| 277 | splx(s); |
| 278 | } |
| 279 | |
| 280 | #ifdef MULTIPROCESSOR |
| 281 | #ifdef XEN |
| 282 | |
| 283 | static inline void |
| 284 | pmap_tlb_processpacket(pmap_tlb_packet_t *tp, kcpuset_t *target) |
| 285 | { |
| 286 | |
| 287 | if (tp->tp_count != (uint16_t)-1) { |
| 288 | /* Invalidating a single page or a range of pages. */ |
| 289 | for (int i = tp->tp_count - 1; i >= 0; i--) { |
| 290 | xen_mcast_invlpg(tp->tp_va[i], target); |
| 291 | } |
| 292 | } else { |
| 293 | xen_mcast_tlbflush(target); |
| 294 | } |
| 295 | |
| 296 | /* Remote CPUs have been synchronously flushed. */ |
| 297 | pmap_tlb_pendcount = 0; |
| 298 | } |
| 299 | |
| 300 | #else |
| 301 | |
| 302 | static inline void |
| 303 | pmap_tlb_processpacket(pmap_tlb_packet_t *tp, kcpuset_t *target) |
| 304 | { |
| 305 | int err = 0; |
| 306 | |
| 307 | if (!kcpuset_match(target, kcpuset_attached)) { |
| 308 | const struct cpu_info * const self = curcpu(); |
| 309 | CPU_INFO_ITERATOR cii; |
| 310 | struct cpu_info *lci; |
| 311 | |
| 312 | for (CPU_INFO_FOREACH(cii, lci)) { |
| 313 | const cpuid_t lcid = cpu_index(lci); |
| 314 | |
| 315 | if (__predict_false(lci == self) || |
| 316 | !kcpuset_isset(target, lcid)) { |
| 317 | continue; |
| 318 | } |
| 319 | err |= x86_ipi(LAPIC_TLB_VECTOR, |
| 320 | lci->ci_cpuid, LAPIC_DLMODE_FIXED); |
| 321 | } |
| 322 | } else { |
| 323 | err = x86_ipi(LAPIC_TLB_VECTOR, LAPIC_DEST_ALLEXCL, |
| 324 | LAPIC_DLMODE_FIXED); |
| 325 | } |
| 326 | KASSERT(err == 0); |
| 327 | } |
| 328 | |
| 329 | #endif /* XEN */ |
| 330 | #endif /* MULTIPROCESSOR */ |
| 331 | |
| 332 | /* |
| 333 | * pmap_tlb_shootnow: process pending TLB shootdowns queued on current CPU. |
| 334 | * |
| 335 | * => Must be called with preemption disabled. |
| 336 | */ |
| 337 | void |
| 338 | pmap_tlb_shootnow(void) |
| 339 | { |
| 340 | pmap_tlb_packet_t *tp; |
| 341 | struct cpu_info *ci; |
| 342 | kcpuset_t *target; |
| 343 | u_int local, gen, rcpucount; |
| 344 | cpuid_t cid; |
| 345 | int s; |
| 346 | |
| 347 | KASSERT(kpreempt_disabled()); |
| 348 | |
| 349 | ci = curcpu(); |
| 350 | tp = (pmap_tlb_packet_t *)ci->ci_pmap_data; |
| 351 | |
| 352 | /* Pre-check first. */ |
| 353 | if (tp->tp_count == 0) { |
| 354 | return; |
| 355 | } |
| 356 | |
| 357 | s = splvm(); |
| 358 | if (tp->tp_count == 0) { |
| 359 | splx(s); |
| 360 | return; |
| 361 | } |
| 362 | cid = cpu_index(ci); |
| 363 | |
| 364 | target = tp->tp_cpumask; |
| 365 | local = kcpuset_isset(target, cid) ? 1 : 0; |
| 366 | rcpucount = kcpuset_countset(target) - local; |
| 367 | gen = 0; |
| 368 | |
| 369 | #ifdef MULTIPROCESSOR |
| 370 | if (rcpucount) { |
| 371 | int count; |
| 372 | |
| 373 | /* |
| 374 | * Gain ownership of the shootdown mailbox. We must stay |
| 375 | * at IPL_VM once we own it or could deadlock against an |
| 376 | * interrupt on this CPU trying to do the same. |
| 377 | */ |
| 378 | KASSERT(rcpucount < ncpu); |
| 379 | |
| 380 | while (atomic_cas_uint(&pmap_tlb_pendcount, 0, rcpucount)) { |
| 381 | splx(s); |
| 382 | count = SPINLOCK_BACKOFF_MIN; |
| 383 | while (pmap_tlb_pendcount) { |
| 384 | KASSERT(pmap_tlb_pendcount < ncpu); |
| 385 | SPINLOCK_BACKOFF(count); |
| 386 | } |
| 387 | s = splvm(); |
| 388 | /* An interrupt might have done it for us. */ |
| 389 | if (tp->tp_count == 0) { |
| 390 | splx(s); |
| 391 | return; |
| 392 | } |
| 393 | } |
| 394 | |
| 395 | /* |
| 396 | * Start a new generation of updates. Copy our shootdown |
| 397 | * requests into the global buffer. Note that tp_cpumask |
| 398 | * will not be used by remote CPUs (it would be unsafe). |
| 399 | */ |
| 400 | gen = ++pmap_tlb_gen; |
| 401 | memcpy(&pmap_tlb_packet, tp, sizeof(*tp)); |
| 402 | pmap_tlb_evcnt.ev_count++; |
| 403 | |
| 404 | /* |
| 405 | * Initiate shootdowns on remote CPUs. |
| 406 | */ |
| 407 | pmap_tlb_processpacket(tp, target); |
| 408 | } |
| 409 | #endif |
| 410 | |
| 411 | /* |
| 412 | * Shootdowns on remote CPUs are now in flight. In the meantime, |
| 413 | * perform local shootdown if needed. |
| 414 | */ |
| 415 | if (local) { |
| 416 | pmap_tlb_invalidate(tp); |
| 417 | } |
| 418 | |
| 419 | /* |
| 420 | * Clear out our local buffer. |
| 421 | */ |
| 422 | #ifdef TLBSTATS |
| 423 | if (tp->tp_count != (uint16_t)-1) { |
| 424 | atomic_add_64(&tlbstat_single_issue.ev_count, tp->tp_count); |
| 425 | } |
| 426 | #endif |
| 427 | kcpuset_zero(tp->tp_cpumask); |
| 428 | tp->tp_userpmap = 0; |
| 429 | tp->tp_count = 0; |
| 430 | tp->tp_pte = 0; |
| 431 | splx(s); |
| 432 | |
| 433 | /* |
| 434 | * Now wait for the current generation of updates to be |
| 435 | * processed by remote CPUs. |
| 436 | */ |
| 437 | if (rcpucount && pmap_tlb_pendcount) { |
| 438 | int count = SPINLOCK_BACKOFF_MIN; |
| 439 | |
| 440 | while (pmap_tlb_pendcount && pmap_tlb_gen == gen) { |
| 441 | KASSERT(pmap_tlb_pendcount < ncpu); |
| 442 | SPINLOCK_BACKOFF(count); |
| 443 | } |
| 444 | } |
| 445 | } |
| 446 | |
| 447 | /* |
| 448 | * pmap_tlb_intr: pmap shootdown interrupt handler to invalidate TLB entries. |
| 449 | * |
| 450 | * => Called from IPI only. |
| 451 | */ |
| 452 | void |
| 453 | pmap_tlb_intr(void) |
| 454 | { |
| 455 | const pmap_tlb_packet_t *tp = &pmap_tlb_packet; |
| 456 | struct cpu_info *ci = curcpu(); |
| 457 | |
| 458 | KASSERT(pmap_tlb_pendcount > 0); |
| 459 | |
| 460 | /* First, TLB flush. */ |
| 461 | pmap_tlb_invalidate(tp); |
| 462 | |
| 463 | /* |
| 464 | * Check the current TLB state. If we do not want further |
| 465 | * invalidations for this pmap, then take the CPU out of |
| 466 | * the pmap's bitmask. |
| 467 | */ |
| 468 | if (ci->ci_tlbstate == TLBSTATE_LAZY && tp->tp_userpmap) { |
| 469 | struct pmap *pm = ci->ci_pmap; |
| 470 | cpuid_t cid = cpu_index(ci); |
| 471 | |
| 472 | kcpuset_atomic_clear(pm->pm_cpus, cid); |
| 473 | ci->ci_tlbstate = TLBSTATE_STALE; |
| 474 | } |
| 475 | |
| 476 | /* Finally, ack the request. */ |
| 477 | atomic_dec_uint(&pmap_tlb_pendcount); |
| 478 | } |
| 479 | |