| 1 | /* $NetBSD: kern_sleepq.c,v 1.51 2016/07/03 14:24:58 christos Exp $ */ |
| 2 | |
| 3 | /*- |
| 4 | * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. |
| 5 | * All rights reserved. |
| 6 | * |
| 7 | * This code is derived from software contributed to The NetBSD Foundation |
| 8 | * by Andrew Doran. |
| 9 | * |
| 10 | * Redistribution and use in source and binary forms, with or without |
| 11 | * modification, are permitted provided that the following conditions |
| 12 | * are met: |
| 13 | * 1. Redistributions of source code must retain the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer. |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright |
| 16 | * notice, this list of conditions and the following disclaimer in the |
| 17 | * documentation and/or other materials provided with the distribution. |
| 18 | * |
| 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
| 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
| 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
| 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 29 | * POSSIBILITY OF SUCH DAMAGE. |
| 30 | */ |
| 31 | |
| 32 | /* |
| 33 | * Sleep queue implementation, used by turnstiles and general sleep/wakeup |
| 34 | * interfaces. |
| 35 | */ |
| 36 | |
| 37 | #include <sys/cdefs.h> |
| 38 | __KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.51 2016/07/03 14:24:58 christos Exp $" ); |
| 39 | |
| 40 | #include <sys/param.h> |
| 41 | #include <sys/kernel.h> |
| 42 | #include <sys/cpu.h> |
| 43 | #include <sys/intr.h> |
| 44 | #include <sys/pool.h> |
| 45 | #include <sys/proc.h> |
| 46 | #include <sys/resourcevar.h> |
| 47 | #include <sys/sched.h> |
| 48 | #include <sys/systm.h> |
| 49 | #include <sys/sleepq.h> |
| 50 | #include <sys/ktrace.h> |
| 51 | |
| 52 | /* |
| 53 | * for sleepq_abort: |
| 54 | * During autoconfiguration or after a panic, a sleep will simply lower the |
| 55 | * priority briefly to allow interrupts, then return. The priority to be |
| 56 | * used (IPL_SAFEPRI) is machine-dependent, thus this value is initialized and |
| 57 | * maintained in the machine-dependent layers. This priority will typically |
| 58 | * be 0, or the lowest priority that is safe for use on the interrupt stack; |
| 59 | * it can be made higher to block network software interrupts after panics. |
| 60 | */ |
| 61 | #ifndef IPL_SAFEPRI |
| 62 | #define IPL_SAFEPRI 0 |
| 63 | #endif |
| 64 | |
| 65 | static int sleepq_sigtoerror(lwp_t *, int); |
| 66 | |
| 67 | /* General purpose sleep table, used by mtsleep() and condition variables. */ |
| 68 | sleeptab_t sleeptab __cacheline_aligned; |
| 69 | |
| 70 | /* |
| 71 | * sleeptab_init: |
| 72 | * |
| 73 | * Initialize a sleep table. |
| 74 | */ |
| 75 | void |
| 76 | sleeptab_init(sleeptab_t *st) |
| 77 | { |
| 78 | sleepq_t *sq; |
| 79 | int i; |
| 80 | |
| 81 | for (i = 0; i < SLEEPTAB_HASH_SIZE; i++) { |
| 82 | sq = &st->st_queues[i].st_queue; |
| 83 | st->st_queues[i].st_mutex = |
| 84 | mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); |
| 85 | sleepq_init(sq); |
| 86 | } |
| 87 | } |
| 88 | |
| 89 | /* |
| 90 | * sleepq_init: |
| 91 | * |
| 92 | * Prepare a sleep queue for use. |
| 93 | */ |
| 94 | void |
| 95 | sleepq_init(sleepq_t *sq) |
| 96 | { |
| 97 | |
| 98 | TAILQ_INIT(sq); |
| 99 | } |
| 100 | |
| 101 | /* |
| 102 | * sleepq_remove: |
| 103 | * |
| 104 | * Remove an LWP from a sleep queue and wake it up. |
| 105 | */ |
| 106 | void |
| 107 | sleepq_remove(sleepq_t *sq, lwp_t *l) |
| 108 | { |
| 109 | struct schedstate_percpu *spc; |
| 110 | struct cpu_info *ci; |
| 111 | |
| 112 | KASSERT(lwp_locked(l, NULL)); |
| 113 | |
| 114 | TAILQ_REMOVE(sq, l, l_sleepchain); |
| 115 | l->l_syncobj = &sched_syncobj; |
| 116 | l->l_wchan = NULL; |
| 117 | l->l_sleepq = NULL; |
| 118 | l->l_flag &= ~LW_SINTR; |
| 119 | |
| 120 | ci = l->l_cpu; |
| 121 | spc = &ci->ci_schedstate; |
| 122 | |
| 123 | /* |
| 124 | * If not sleeping, the LWP must have been suspended. Let whoever |
| 125 | * holds it stopped set it running again. |
| 126 | */ |
| 127 | if (l->l_stat != LSSLEEP) { |
| 128 | KASSERT(l->l_stat == LSSTOP || l->l_stat == LSSUSPENDED); |
| 129 | lwp_setlock(l, spc->spc_lwplock); |
| 130 | return; |
| 131 | } |
| 132 | |
| 133 | /* |
| 134 | * If the LWP is still on the CPU, mark it as LSONPROC. It may be |
| 135 | * about to call mi_switch(), in which case it will yield. |
| 136 | */ |
| 137 | if ((l->l_pflag & LP_RUNNING) != 0) { |
| 138 | l->l_stat = LSONPROC; |
| 139 | l->l_slptime = 0; |
| 140 | lwp_setlock(l, spc->spc_lwplock); |
| 141 | return; |
| 142 | } |
| 143 | |
| 144 | /* Update sleep time delta, call the wake-up handler of scheduler */ |
| 145 | l->l_slpticksum += (hardclock_ticks - l->l_slpticks); |
| 146 | sched_wakeup(l); |
| 147 | |
| 148 | /* Look for a CPU to wake up */ |
| 149 | l->l_cpu = sched_takecpu(l); |
| 150 | ci = l->l_cpu; |
| 151 | spc = &ci->ci_schedstate; |
| 152 | |
| 153 | /* |
| 154 | * Set it running. |
| 155 | */ |
| 156 | spc_lock(ci); |
| 157 | lwp_setlock(l, spc->spc_mutex); |
| 158 | sched_setrunnable(l); |
| 159 | l->l_stat = LSRUN; |
| 160 | l->l_slptime = 0; |
| 161 | sched_enqueue(l, false); |
| 162 | spc_unlock(ci); |
| 163 | } |
| 164 | |
| 165 | /* |
| 166 | * sleepq_insert: |
| 167 | * |
| 168 | * Insert an LWP into the sleep queue, optionally sorting by priority. |
| 169 | */ |
| 170 | static void |
| 171 | sleepq_insert(sleepq_t *sq, lwp_t *l, syncobj_t *sobj) |
| 172 | { |
| 173 | |
| 174 | if ((sobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) { |
| 175 | lwp_t *l2; |
| 176 | const int pri = lwp_eprio(l); |
| 177 | |
| 178 | TAILQ_FOREACH(l2, sq, l_sleepchain) { |
| 179 | if (lwp_eprio(l2) < pri) { |
| 180 | TAILQ_INSERT_BEFORE(l2, l, l_sleepchain); |
| 181 | return; |
| 182 | } |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | if ((sobj->sobj_flag & SOBJ_SLEEPQ_LIFO) != 0) |
| 187 | TAILQ_INSERT_HEAD(sq, l, l_sleepchain); |
| 188 | else |
| 189 | TAILQ_INSERT_TAIL(sq, l, l_sleepchain); |
| 190 | } |
| 191 | |
| 192 | /* |
| 193 | * sleepq_enqueue: |
| 194 | * |
| 195 | * Enter an LWP into the sleep queue and prepare for sleep. The sleep |
| 196 | * queue must already be locked, and any interlock (such as the kernel |
| 197 | * lock) must have be released (see sleeptab_lookup(), sleepq_enter()). |
| 198 | */ |
| 199 | void |
| 200 | sleepq_enqueue(sleepq_t *sq, wchan_t wchan, const char *wmesg, syncobj_t *sobj) |
| 201 | { |
| 202 | lwp_t *l = curlwp; |
| 203 | |
| 204 | KASSERT(lwp_locked(l, NULL)); |
| 205 | KASSERT(l->l_stat == LSONPROC); |
| 206 | KASSERT(l->l_wchan == NULL && l->l_sleepq == NULL); |
| 207 | |
| 208 | l->l_syncobj = sobj; |
| 209 | l->l_wchan = wchan; |
| 210 | l->l_sleepq = sq; |
| 211 | l->l_wmesg = wmesg; |
| 212 | l->l_slptime = 0; |
| 213 | l->l_stat = LSSLEEP; |
| 214 | l->l_sleeperr = 0; |
| 215 | |
| 216 | sleepq_insert(sq, l, sobj); |
| 217 | |
| 218 | /* Save the time when thread has slept */ |
| 219 | l->l_slpticks = hardclock_ticks; |
| 220 | sched_slept(l); |
| 221 | } |
| 222 | |
| 223 | /* |
| 224 | * sleepq_block: |
| 225 | * |
| 226 | * After any intermediate step such as releasing an interlock, switch. |
| 227 | * sleepq_block() may return early under exceptional conditions, for |
| 228 | * example if the LWP's containing process is exiting. |
| 229 | * |
| 230 | * timo is a timeout in ticks. timo = 0 specifies an infinite timeout. |
| 231 | */ |
| 232 | int |
| 233 | sleepq_block(int timo, bool catch_p) |
| 234 | { |
| 235 | int error = 0, sig; |
| 236 | struct proc *p; |
| 237 | lwp_t *l = curlwp; |
| 238 | bool early = false; |
| 239 | int biglocks = l->l_biglocks; |
| 240 | |
| 241 | ktrcsw(1, 0); |
| 242 | |
| 243 | /* |
| 244 | * If sleeping interruptably, check for pending signals, exits or |
| 245 | * core dump events. |
| 246 | */ |
| 247 | if (catch_p) { |
| 248 | l->l_flag |= LW_SINTR; |
| 249 | if ((l->l_flag & (LW_CANCELLED|LW_WEXIT|LW_WCORE)) != 0) { |
| 250 | l->l_flag &= ~LW_CANCELLED; |
| 251 | error = EINTR; |
| 252 | early = true; |
| 253 | } else if ((l->l_flag & LW_PENDSIG) != 0 && sigispending(l, 0)) |
| 254 | early = true; |
| 255 | } |
| 256 | |
| 257 | if (early) { |
| 258 | /* lwp_unsleep() will release the lock */ |
| 259 | lwp_unsleep(l, true); |
| 260 | } else { |
| 261 | if (timo) { |
| 262 | callout_schedule(&l->l_timeout_ch, timo); |
| 263 | } |
| 264 | mi_switch(l); |
| 265 | |
| 266 | /* The LWP and sleep queue are now unlocked. */ |
| 267 | if (timo) { |
| 268 | /* |
| 269 | * Even if the callout appears to have fired, we need to |
| 270 | * stop it in order to synchronise with other CPUs. |
| 271 | */ |
| 272 | if (callout_halt(&l->l_timeout_ch, NULL)) |
| 273 | error = EWOULDBLOCK; |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | if (catch_p && error == 0) { |
| 278 | p = l->l_proc; |
| 279 | if ((l->l_flag & (LW_CANCELLED | LW_WEXIT | LW_WCORE)) != 0) |
| 280 | error = EINTR; |
| 281 | else if ((l->l_flag & LW_PENDSIG) != 0) { |
| 282 | /* |
| 283 | * Acquiring p_lock may cause us to recurse |
| 284 | * through the sleep path and back into this |
| 285 | * routine, but is safe because LWPs sleeping |
| 286 | * on locks are non-interruptable. We will |
| 287 | * not recurse again. |
| 288 | */ |
| 289 | mutex_enter(p->p_lock); |
| 290 | if (((sig = sigispending(l, 0)) != 0 && |
| 291 | (sigprop[sig] & SA_STOP) == 0) || |
| 292 | (sig = issignal(l)) != 0) |
| 293 | error = sleepq_sigtoerror(l, sig); |
| 294 | mutex_exit(p->p_lock); |
| 295 | } |
| 296 | } |
| 297 | |
| 298 | ktrcsw(0, 0); |
| 299 | if (__predict_false(biglocks != 0)) { |
| 300 | KERNEL_LOCK(biglocks, NULL); |
| 301 | } |
| 302 | return error; |
| 303 | } |
| 304 | |
| 305 | /* |
| 306 | * sleepq_wake: |
| 307 | * |
| 308 | * Wake zero or more LWPs blocked on a single wait channel. |
| 309 | */ |
| 310 | void |
| 311 | sleepq_wake(sleepq_t *sq, wchan_t wchan, u_int expected, kmutex_t *mp) |
| 312 | { |
| 313 | lwp_t *l, *next; |
| 314 | |
| 315 | KASSERT(mutex_owned(mp)); |
| 316 | |
| 317 | for (l = TAILQ_FIRST(sq); l != NULL; l = next) { |
| 318 | KASSERT(l->l_sleepq == sq); |
| 319 | KASSERT(l->l_mutex == mp); |
| 320 | next = TAILQ_NEXT(l, l_sleepchain); |
| 321 | if (l->l_wchan != wchan) |
| 322 | continue; |
| 323 | sleepq_remove(sq, l); |
| 324 | if (--expected == 0) |
| 325 | break; |
| 326 | } |
| 327 | |
| 328 | mutex_spin_exit(mp); |
| 329 | } |
| 330 | |
| 331 | /* |
| 332 | * sleepq_unsleep: |
| 333 | * |
| 334 | * Remove an LWP from its sleep queue and set it runnable again. |
| 335 | * sleepq_unsleep() is called with the LWP's mutex held, and will |
| 336 | * always release it. |
| 337 | */ |
| 338 | void |
| 339 | sleepq_unsleep(lwp_t *l, bool cleanup) |
| 340 | { |
| 341 | sleepq_t *sq = l->l_sleepq; |
| 342 | kmutex_t *mp = l->l_mutex; |
| 343 | |
| 344 | KASSERT(lwp_locked(l, mp)); |
| 345 | KASSERT(l->l_wchan != NULL); |
| 346 | |
| 347 | sleepq_remove(sq, l); |
| 348 | if (cleanup) { |
| 349 | mutex_spin_exit(mp); |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | /* |
| 354 | * sleepq_timeout: |
| 355 | * |
| 356 | * Entered via the callout(9) subsystem to time out an LWP that is on a |
| 357 | * sleep queue. |
| 358 | */ |
| 359 | void |
| 360 | sleepq_timeout(void *arg) |
| 361 | { |
| 362 | lwp_t *l = arg; |
| 363 | |
| 364 | /* |
| 365 | * Lock the LWP. Assuming it's still on the sleep queue, its |
| 366 | * current mutex will also be the sleep queue mutex. |
| 367 | */ |
| 368 | lwp_lock(l); |
| 369 | |
| 370 | if (l->l_wchan == NULL) { |
| 371 | /* Somebody beat us to it. */ |
| 372 | lwp_unlock(l); |
| 373 | return; |
| 374 | } |
| 375 | |
| 376 | lwp_unsleep(l, true); |
| 377 | } |
| 378 | |
| 379 | /* |
| 380 | * sleepq_sigtoerror: |
| 381 | * |
| 382 | * Given a signal number, interpret and return an error code. |
| 383 | */ |
| 384 | static int |
| 385 | sleepq_sigtoerror(lwp_t *l, int sig) |
| 386 | { |
| 387 | struct proc *p = l->l_proc; |
| 388 | int error; |
| 389 | |
| 390 | KASSERT(mutex_owned(p->p_lock)); |
| 391 | |
| 392 | /* |
| 393 | * If this sleep was canceled, don't let the syscall restart. |
| 394 | */ |
| 395 | if ((SIGACTION(p, sig).sa_flags & SA_RESTART) == 0) |
| 396 | error = EINTR; |
| 397 | else |
| 398 | error = ERESTART; |
| 399 | |
| 400 | return error; |
| 401 | } |
| 402 | |
| 403 | /* |
| 404 | * sleepq_abort: |
| 405 | * |
| 406 | * After a panic or during autoconfiguration, lower the interrupt |
| 407 | * priority level to give pending interrupts a chance to run, and |
| 408 | * then return. Called if sleepq_dontsleep() returns non-zero, and |
| 409 | * always returns zero. |
| 410 | */ |
| 411 | int |
| 412 | sleepq_abort(kmutex_t *mtx, int unlock) |
| 413 | { |
| 414 | int s; |
| 415 | |
| 416 | s = splhigh(); |
| 417 | splx(IPL_SAFEPRI); |
| 418 | splx(s); |
| 419 | if (mtx != NULL && unlock != 0) |
| 420 | mutex_exit(mtx); |
| 421 | |
| 422 | return 0; |
| 423 | } |
| 424 | |
| 425 | /* |
| 426 | * sleepq_reinsert: |
| 427 | * |
| 428 | * Move the possition of the lwp in the sleep queue after a possible |
| 429 | * change of the lwp's effective priority. |
| 430 | */ |
| 431 | static void |
| 432 | sleepq_reinsert(sleepq_t *sq, lwp_t *l) |
| 433 | { |
| 434 | |
| 435 | KASSERT(l->l_sleepq == sq); |
| 436 | if ((l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_SORTED) == 0) { |
| 437 | return; |
| 438 | } |
| 439 | |
| 440 | /* |
| 441 | * Don't let the sleep queue become empty, even briefly. |
| 442 | * cv_signal() and cv_broadcast() inspect it without the |
| 443 | * sleep queue lock held and need to see a non-empty queue |
| 444 | * head if there are waiters. |
| 445 | */ |
| 446 | if (TAILQ_FIRST(sq) == l && TAILQ_NEXT(l, l_sleepchain) == NULL) { |
| 447 | return; |
| 448 | } |
| 449 | TAILQ_REMOVE(sq, l, l_sleepchain); |
| 450 | sleepq_insert(sq, l, l->l_syncobj); |
| 451 | } |
| 452 | |
| 453 | /* |
| 454 | * sleepq_changepri: |
| 455 | * |
| 456 | * Adjust the priority of an LWP residing on a sleepq. |
| 457 | */ |
| 458 | void |
| 459 | sleepq_changepri(lwp_t *l, pri_t pri) |
| 460 | { |
| 461 | sleepq_t *sq = l->l_sleepq; |
| 462 | |
| 463 | KASSERT(lwp_locked(l, NULL)); |
| 464 | |
| 465 | l->l_priority = pri; |
| 466 | sleepq_reinsert(sq, l); |
| 467 | } |
| 468 | |
| 469 | /* |
| 470 | * sleepq_changepri: |
| 471 | * |
| 472 | * Adjust the lended priority of an LWP residing on a sleepq. |
| 473 | */ |
| 474 | void |
| 475 | sleepq_lendpri(lwp_t *l, pri_t pri) |
| 476 | { |
| 477 | sleepq_t *sq = l->l_sleepq; |
| 478 | |
| 479 | KASSERT(lwp_locked(l, NULL)); |
| 480 | |
| 481 | l->l_inheritedprio = pri; |
| 482 | l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio); |
| 483 | sleepq_reinsert(sq, l); |
| 484 | } |
| 485 | |