| 1 | /* $NetBSD: rf_stripelocks.c,v 1.32 2011/05/05 08:21:29 mrg Exp $ */ |
| 2 | /* |
| 3 | * Copyright (c) 1995 Carnegie-Mellon University. |
| 4 | * All rights reserved. |
| 5 | * |
| 6 | * Authors: Mark Holland, Jim Zelenka |
| 7 | * |
| 8 | * Permission to use, copy, modify and distribute this software and |
| 9 | * its documentation is hereby granted, provided that both the copyright |
| 10 | * notice and this permission notice appear in all copies of the |
| 11 | * software, derivative works or modified versions, and any portions |
| 12 | * thereof, and that both notices appear in supporting documentation. |
| 13 | * |
| 14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
| 15 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
| 16 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
| 17 | * |
| 18 | * Carnegie Mellon requests users of this software to return to |
| 19 | * |
| 20 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
| 21 | * School of Computer Science |
| 22 | * Carnegie Mellon University |
| 23 | * Pittsburgh PA 15213-3890 |
| 24 | * |
| 25 | * any improvements or extensions that they make and grant Carnegie the |
| 26 | * rights to redistribute these changes. |
| 27 | */ |
| 28 | |
| 29 | /* |
| 30 | * stripelocks.c -- code to lock stripes for read and write access |
| 31 | * |
| 32 | * The code distinguishes between read locks and write locks. There can be |
| 33 | * as many readers to given stripe as desired. When a write request comes |
| 34 | * in, no further readers are allowed to enter, and all subsequent requests |
| 35 | * are queued in FIFO order. When a the number of readers goes to zero, the |
| 36 | * writer is given the lock. When a writer releases the lock, the list of |
| 37 | * queued requests is scanned, and all readersq up to the next writer are |
| 38 | * given the lock. |
| 39 | * |
| 40 | * The lock table size must be one less than a power of two, but HASH_STRIPEID |
| 41 | * is the only function that requires this. |
| 42 | * |
| 43 | * The code now supports "range locks". When you ask to lock a stripe, you |
| 44 | * specify a range of addresses in that stripe that you want to lock. When |
| 45 | * you acquire the lock, you've locked only this range of addresses, and |
| 46 | * other threads can concurrently read/write any non-overlapping portions |
| 47 | * of the stripe. The "addresses" that you lock are abstract in that you |
| 48 | * can pass in anything you like. The expectation is that you'll pass in |
| 49 | * the range of physical disk offsets of the parity bits you're planning |
| 50 | * to update. The idea behind this, of course, is to allow sub-stripe |
| 51 | * locking. The implementation is perhaps not the best imaginable; in the |
| 52 | * worst case a lock release is O(n^2) in the total number of outstanding |
| 53 | * requests to a given stripe. Note that if you're striping with a |
| 54 | * stripe unit size equal to an entire disk (i.e. not striping), there will |
| 55 | * be only one stripe and you may spend some significant number of cycles |
| 56 | * searching through stripe lock descriptors. |
| 57 | */ |
| 58 | |
| 59 | #include <sys/cdefs.h> |
| 60 | __KERNEL_RCSID(0, "$NetBSD: rf_stripelocks.c,v 1.32 2011/05/05 08:21:29 mrg Exp $" ); |
| 61 | |
| 62 | #include <dev/raidframe/raidframevar.h> |
| 63 | |
| 64 | #include "rf_raid.h" |
| 65 | #include "rf_stripelocks.h" |
| 66 | #include "rf_alloclist.h" |
| 67 | #include "rf_debugprint.h" |
| 68 | #include "rf_general.h" |
| 69 | #include "rf_driver.h" |
| 70 | #include "rf_shutdown.h" |
| 71 | |
| 72 | #ifdef DEBUG |
| 73 | |
| 74 | #define Dprintf1(s,a) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) |
| 75 | #define Dprintf2(s,a,b) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) |
| 76 | #define Dprintf3(s,a,b,c) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) |
| 77 | #define Dprintf4(s,a,b,c,d) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL) |
| 78 | #define Dprintf5(s,a,b,c,d,e) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL) |
| 79 | #define Dprintf6(s,a,b,c,d,e,f) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),NULL,NULL) |
| 80 | #define Dprintf7(s,a,b,c,d,e,f,g) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),NULL) |
| 81 | #define Dprintf8(s,a,b,c,d,e,f,g,h) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),(void *)((unsigned long)h)) |
| 82 | |
| 83 | #else /* DEBUG */ |
| 84 | |
| 85 | #define Dprintf1(s,a) {} |
| 86 | #define Dprintf2(s,a,b) {} |
| 87 | #define Dprintf3(s,a,b,c) {} |
| 88 | #define Dprintf4(s,a,b,c,d) {} |
| 89 | #define Dprintf5(s,a,b,c,d,e) {} |
| 90 | #define Dprintf6(s,a,b,c,d,e,f) {} |
| 91 | #define Dprintf7(s,a,b,c,d,e,f,g) {} |
| 92 | #define Dprintf8(s,a,b,c,d,e,f,g,h) {} |
| 93 | |
| 94 | #endif /* DEBUG */ |
| 95 | |
| 96 | #define FLUSH |
| 97 | |
| 98 | #define HASH_STRIPEID(_sid_) ( (_sid_) & (rf_lockTableSize-1) ) |
| 99 | |
| 100 | static void AddToWaitersQueue(RF_StripeLockDesc_t * lockDesc, |
| 101 | RF_LockReqDesc_t * lockReqDesc); |
| 102 | static RF_StripeLockDesc_t *AllocStripeLockDesc(RF_StripeNum_t stripeID); |
| 103 | static void FreeStripeLockDesc(RF_StripeLockDesc_t * p); |
| 104 | static RF_LockTableEntry_t *rf_MakeLockTable(void); |
| 105 | #if RF_DEBUG_STRIPELOCK |
| 106 | static void PrintLockedStripes(RF_LockTableEntry_t * lockTable); |
| 107 | #endif |
| 108 | |
| 109 | /* determines if two ranges overlap. always yields false if either |
| 110 | start value is negative */ |
| 111 | #define SINGLE_RANGE_OVERLAP(_strt1, _stop1, _strt2, _stop2) \ |
| 112 | ( (_strt1 >= 0) && (_strt2 >= 0) && \ |
| 113 | (RF_MAX(_strt1, _strt2) <= RF_MIN(_stop1, _stop2)) ) |
| 114 | |
| 115 | /* determines if any of the ranges specified in the two lock |
| 116 | descriptors overlap each other */ |
| 117 | |
| 118 | #define RANGE_OVERLAP(_cand, _pred) \ |
| 119 | ( SINGLE_RANGE_OVERLAP((_cand)->start, (_cand)->stop, \ |
| 120 | (_pred)->start, (_pred)->stop ) || \ |
| 121 | SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2, \ |
| 122 | (_pred)->start, (_pred)->stop ) || \ |
| 123 | SINGLE_RANGE_OVERLAP((_cand)->start, (_cand)->stop, \ |
| 124 | (_pred)->start2, (_pred)->stop2) || \ |
| 125 | SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2, \ |
| 126 | (_pred)->start2, (_pred)->stop2) ) |
| 127 | |
| 128 | /* Determines if a candidate lock request conflicts with a predecessor |
| 129 | * lock req. Note that the arguments are not interchangeable. |
| 130 | * |
| 131 | * The rules are: |
| 132 | * |
| 133 | * a candidate read conflicts with a predecessor write if any |
| 134 | * ranges overlap |
| 135 | * |
| 136 | * a candidate write conflicts with a predecessor read if any |
| 137 | * ranges overlap |
| 138 | * |
| 139 | * a candidate write conflicts with a predecessor write if any |
| 140 | * ranges overlap */ |
| 141 | |
| 142 | #define STRIPELOCK_CONFLICT(_cand, _pred) \ |
| 143 | RANGE_OVERLAP((_cand), (_pred)) && \ |
| 144 | ( ( (((_cand)->type == RF_IO_TYPE_READ) && \ |
| 145 | ((_pred)->type == RF_IO_TYPE_WRITE)) || \ |
| 146 | (((_cand)->type == RF_IO_TYPE_WRITE) && \ |
| 147 | ((_pred)->type == RF_IO_TYPE_READ)) || \ |
| 148 | (((_cand)->type == RF_IO_TYPE_WRITE) && \ |
| 149 | ((_pred)->type == RF_IO_TYPE_WRITE)) \ |
| 150 | ) \ |
| 151 | ) |
| 152 | |
| 153 | #define RF_MAX_FREE_STRIPELOCK 128 |
| 154 | #define RF_MIN_FREE_STRIPELOCK 32 |
| 155 | |
| 156 | static void rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable); |
| 157 | static void rf_ShutdownStripeLockFreeList(void *); |
| 158 | static void rf_RaidShutdownStripeLocks(void *); |
| 159 | |
| 160 | static void |
| 161 | rf_ShutdownStripeLockFreeList(void *ignored) |
| 162 | { |
| 163 | pool_destroy(&rf_pools.stripelock); |
| 164 | } |
| 165 | |
| 166 | int |
| 167 | rf_ConfigureStripeLockFreeList(RF_ShutdownList_t **listp) |
| 168 | { |
| 169 | unsigned mask; |
| 170 | |
| 171 | rf_pool_init(&rf_pools.stripelock, sizeof(RF_StripeLockDesc_t), |
| 172 | "rf_stripelock_pl" , RF_MIN_FREE_STRIPELOCK, RF_MAX_FREE_STRIPELOCK); |
| 173 | rf_ShutdownCreate(listp, rf_ShutdownStripeLockFreeList, NULL); |
| 174 | |
| 175 | for (mask = 0x1; mask; mask <<= 1) |
| 176 | if (rf_lockTableSize == mask) |
| 177 | break; |
| 178 | if (!mask) { |
| 179 | printf("[WARNING: lock table size must be a power of two. Setting to %d.]\n" , RF_DEFAULT_LOCK_TABLE_SIZE); |
| 180 | rf_lockTableSize = RF_DEFAULT_LOCK_TABLE_SIZE; |
| 181 | } |
| 182 | return (0); |
| 183 | } |
| 184 | |
| 185 | static void |
| 186 | rf_DestroyLockTable(RF_LockTableEntry_t *lockTable) |
| 187 | { |
| 188 | int i; |
| 189 | |
| 190 | for (i = 0; i < rf_lockTableSize; i++) { |
| 191 | rf_destroy_mutex2(lockTable[i].mutex); |
| 192 | } |
| 193 | RF_Free(lockTable, rf_lockTableSize * sizeof(RF_LockTableEntry_t)); |
| 194 | } |
| 195 | |
| 196 | static RF_LockTableEntry_t * |
| 197 | rf_MakeLockTable(void) |
| 198 | { |
| 199 | RF_LockTableEntry_t *lockTable; |
| 200 | int i; |
| 201 | |
| 202 | RF_Malloc(lockTable, |
| 203 | ((int) rf_lockTableSize) * sizeof(RF_LockTableEntry_t), |
| 204 | (RF_LockTableEntry_t *)); |
| 205 | if (lockTable == NULL) |
| 206 | return (NULL); |
| 207 | for (i = 0; i < rf_lockTableSize; i++) { |
| 208 | rf_init_mutex2(lockTable[i].mutex, IPL_VM); |
| 209 | } |
| 210 | return (lockTable); |
| 211 | } |
| 212 | |
| 213 | static void |
| 214 | rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable) |
| 215 | { |
| 216 | |
| 217 | #if RF_DEBUG_STRIPELOCK |
| 218 | if (rf_stripeLockDebug) { |
| 219 | PrintLockedStripes(lockTable); |
| 220 | } |
| 221 | #endif |
| 222 | rf_DestroyLockTable(lockTable); |
| 223 | } |
| 224 | |
| 225 | static void |
| 226 | rf_RaidShutdownStripeLocks(void *arg) |
| 227 | { |
| 228 | RF_Raid_t *raidPtr = (RF_Raid_t *) arg; |
| 229 | rf_ShutdownStripeLocks(raidPtr->lockTable); |
| 230 | } |
| 231 | |
| 232 | int |
| 233 | rf_ConfigureStripeLocks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, |
| 234 | RF_Config_t *cfgPtr) |
| 235 | { |
| 236 | |
| 237 | raidPtr->lockTable = rf_MakeLockTable(); |
| 238 | if (raidPtr->lockTable == NULL) |
| 239 | return (ENOMEM); |
| 240 | rf_ShutdownCreate(listp, rf_RaidShutdownStripeLocks, raidPtr); |
| 241 | |
| 242 | return (0); |
| 243 | } |
| 244 | /* returns 0 if you've got the lock, and non-zero if you have to wait. |
| 245 | * if and only if you have to wait, we'll cause cbFunc to get invoked |
| 246 | * with cbArg when you are granted the lock. We store a tag in |
| 247 | * *releaseTag that you need to give back to us when you release the |
| 248 | * lock. */ |
| 249 | int |
| 250 | rf_AcquireStripeLock(RF_LockTableEntry_t *lockTable, RF_StripeNum_t stripeID, |
| 251 | RF_LockReqDesc_t *lockReqDesc) |
| 252 | { |
| 253 | RF_StripeLockDesc_t *lockDesc; |
| 254 | RF_StripeLockDesc_t *newlockDesc; |
| 255 | RF_LockReqDesc_t *p; |
| 256 | #if defined(DEBUG) && (RF_DEBUG_STRIPELOCK > 0) |
| 257 | int tid = 0; |
| 258 | #endif |
| 259 | int hashval = HASH_STRIPEID(stripeID); |
| 260 | int retcode = 0; |
| 261 | |
| 262 | RF_ASSERT(RF_IO_IS_R_OR_W(lockReqDesc->type)); |
| 263 | |
| 264 | #if RF_DEBUG_STRIPELOCK |
| 265 | if (rf_stripeLockDebug) { |
| 266 | if (stripeID == -1) { |
| 267 | Dprintf1("[%d] Lock acquisition supressed (stripeID == -1)\n" , tid); |
| 268 | } else { |
| 269 | Dprintf8("[%d] Trying to acquire stripe lock table 0x%lx SID %ld type %c range %ld-%ld, range2 %ld-%ld hashval %d\n" , |
| 270 | tid, (unsigned long) lockTable, stripeID, lockReqDesc->type, lockReqDesc->start, |
| 271 | lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); |
| 272 | Dprintf3("[%d] lock %ld hashval %d\n" , tid, stripeID, hashval); |
| 273 | FLUSH; |
| 274 | } |
| 275 | } |
| 276 | #endif |
| 277 | if (stripeID == -1) |
| 278 | return (0); |
| 279 | lockReqDesc->next = NULL; /* just to be sure */ |
| 280 | newlockDesc = AllocStripeLockDesc(stripeID); |
| 281 | |
| 282 | rf_lock_mutex2(lockTable[hashval].mutex); |
| 283 | for (lockDesc = lockTable[hashval].descList; lockDesc; |
| 284 | lockDesc = lockDesc->next) { |
| 285 | if (lockDesc->stripeID == stripeID) |
| 286 | break; |
| 287 | } |
| 288 | |
| 289 | if (!lockDesc) { |
| 290 | /* no entry in table => no one reading or writing */ |
| 291 | lockDesc = newlockDesc; |
| 292 | lockDesc->next = lockTable[hashval].descList; |
| 293 | lockTable[hashval].descList = lockDesc; |
| 294 | if (lockReqDesc->type == RF_IO_TYPE_WRITE) |
| 295 | lockDesc->nWriters++; |
| 296 | lockDesc->granted = lockReqDesc; |
| 297 | #if RF_DEBUG_STRIPELOCK |
| 298 | if (rf_stripeLockDebug) { |
| 299 | Dprintf7("[%d] no one waiting: lock %ld %c %ld-%ld %ld-%ld granted\n" , |
| 300 | tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); |
| 301 | FLUSH; |
| 302 | } |
| 303 | #endif |
| 304 | } else { |
| 305 | /* we won't be needing newlockDesc after all.. pity.. */ |
| 306 | FreeStripeLockDesc(newlockDesc); |
| 307 | |
| 308 | if (lockReqDesc->type == RF_IO_TYPE_WRITE) |
| 309 | lockDesc->nWriters++; |
| 310 | |
| 311 | if (lockDesc->nWriters == 0) { |
| 312 | /* no need to search any lists if there are no |
| 313 | * writers anywhere */ |
| 314 | lockReqDesc->next = lockDesc->granted; |
| 315 | lockDesc->granted = lockReqDesc; |
| 316 | #if RF_DEBUG_STRIPELOCK |
| 317 | if (rf_stripeLockDebug) { |
| 318 | Dprintf7("[%d] no writers: lock %ld %c %ld-%ld %ld-%ld granted\n" , |
| 319 | tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); |
| 320 | FLUSH; |
| 321 | } |
| 322 | #endif |
| 323 | } else { |
| 324 | |
| 325 | /* search the granted & waiting lists for a |
| 326 | * conflict. stop searching as soon as we |
| 327 | * find one */ |
| 328 | retcode = 0; |
| 329 | for (p = lockDesc->granted; p; p = p->next) |
| 330 | if (STRIPELOCK_CONFLICT(lockReqDesc, p)) { |
| 331 | retcode = 1; |
| 332 | break; |
| 333 | } |
| 334 | if (!retcode) |
| 335 | for (p = lockDesc->waitersH; p; p = p->next) |
| 336 | if (STRIPELOCK_CONFLICT(lockReqDesc, p)) { |
| 337 | retcode = 2; |
| 338 | break; |
| 339 | } |
| 340 | if (!retcode) { |
| 341 | /* no conflicts found => grant lock */ |
| 342 | lockReqDesc->next = lockDesc->granted; |
| 343 | lockDesc->granted = lockReqDesc; |
| 344 | #if RF_DEBUG_STRIPELOCK |
| 345 | if (rf_stripeLockDebug) { |
| 346 | Dprintf7("[%d] no conflicts: lock %ld %c %ld-%ld %ld-%ld granted\n" , |
| 347 | tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, |
| 348 | lockReqDesc->start2, lockReqDesc->stop2); |
| 349 | FLUSH; |
| 350 | } |
| 351 | #endif |
| 352 | } else { |
| 353 | #if RF_DEBUG_STRIPELOCK |
| 354 | if (rf_stripeLockDebug) { |
| 355 | Dprintf6("[%d] conflict: lock %ld %c %ld-%ld hashval=%d not granted\n" , |
| 356 | tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, |
| 357 | hashval); |
| 358 | Dprintf3("[%d] lock %ld retcode=%d\n" , tid, stripeID, retcode); |
| 359 | FLUSH; |
| 360 | } |
| 361 | #endif |
| 362 | AddToWaitersQueue(lockDesc, lockReqDesc); |
| 363 | /* conflict => the current access must wait */ |
| 364 | } |
| 365 | } |
| 366 | } |
| 367 | |
| 368 | rf_unlock_mutex2(lockTable[hashval].mutex); |
| 369 | return (retcode); |
| 370 | } |
| 371 | |
| 372 | void |
| 373 | rf_ReleaseStripeLock(RF_LockTableEntry_t *lockTable, RF_StripeNum_t stripeID, |
| 374 | RF_LockReqDesc_t *lockReqDesc) |
| 375 | { |
| 376 | RF_StripeLockDesc_t *lockDesc, *ld_t; |
| 377 | RF_LockReqDesc_t *lr, *lr_t, *callbacklist, *t; |
| 378 | #if defined(DEBUG) && (RF_DEBUG_STRIPELOCK > 0) |
| 379 | int tid = 0; |
| 380 | #endif |
| 381 | int hashval = HASH_STRIPEID(stripeID); |
| 382 | int release_it, consider_it; |
| 383 | RF_LockReqDesc_t *candidate, *candidate_t, *predecessor; |
| 384 | |
| 385 | RF_ASSERT(RF_IO_IS_R_OR_W(lockReqDesc->type)); |
| 386 | |
| 387 | #if RF_DEBUG_STRIPELOCK |
| 388 | if (rf_stripeLockDebug) { |
| 389 | if (stripeID == -1) { |
| 390 | Dprintf1("[%d] Lock release supressed (stripeID == -1)\n" , tid); |
| 391 | } else { |
| 392 | Dprintf8("[%d] Releasing stripe lock on stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n" , |
| 393 | tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2, lockTable); |
| 394 | FLUSH; |
| 395 | } |
| 396 | } |
| 397 | #endif |
| 398 | if (stripeID == -1) |
| 399 | return; |
| 400 | |
| 401 | rf_lock_mutex2(lockTable[hashval].mutex); |
| 402 | |
| 403 | /* find the stripe lock descriptor */ |
| 404 | for (ld_t = NULL, lockDesc = lockTable[hashval].descList; |
| 405 | lockDesc; ld_t = lockDesc, lockDesc = lockDesc->next) { |
| 406 | if (lockDesc->stripeID == stripeID) |
| 407 | break; |
| 408 | } |
| 409 | RF_ASSERT(lockDesc); /* major error to release a lock that doesn't |
| 410 | * exist */ |
| 411 | |
| 412 | /* find the stripe lock request descriptor & delete it from the list */ |
| 413 | for (lr_t = NULL, lr = lockDesc->granted; lr; lr_t = lr, lr = lr->next) |
| 414 | if (lr == lockReqDesc) |
| 415 | break; |
| 416 | |
| 417 | RF_ASSERT(lr && (lr == lockReqDesc)); /* major error to release a |
| 418 | * lock that hasn't been |
| 419 | * granted */ |
| 420 | if (lr_t) |
| 421 | lr_t->next = lr->next; |
| 422 | else { |
| 423 | RF_ASSERT(lr == lockDesc->granted); |
| 424 | lockDesc->granted = lr->next; |
| 425 | } |
| 426 | lr->next = NULL; |
| 427 | |
| 428 | if (lockReqDesc->type == RF_IO_TYPE_WRITE) |
| 429 | lockDesc->nWriters--; |
| 430 | |
| 431 | /* search through the waiters list to see if anyone needs to |
| 432 | * be woken up. for each such descriptor in the wait list, we |
| 433 | * check it against everything granted and against everything |
| 434 | * _in front_ of it in the waiters queue. If it conflicts |
| 435 | * with none of these, we release it. |
| 436 | * |
| 437 | * DON'T TOUCH THE TEMPLINK POINTER OF ANYTHING IN THE GRANTED |
| 438 | * LIST HERE. |
| 439 | * |
| 440 | * This will roach the case where the callback tries to |
| 441 | * acquire a new lock in the same stripe. There are some |
| 442 | * asserts to try and detect this. |
| 443 | * |
| 444 | * We apply 2 performance optimizations: (1) if releasing this |
| 445 | * lock results in no more writers to this stripe, we just |
| 446 | * release everybody waiting, since we place no restrictions |
| 447 | * on the number of concurrent reads. (2) we consider as |
| 448 | * candidates for wakeup only those waiters that have a range |
| 449 | * overlap with either the descriptor being woken up or with |
| 450 | * something in the callbacklist (i.e. something we've just |
| 451 | * now woken up). This allows us to avoid the long evaluation |
| 452 | * for some descriptors. */ |
| 453 | |
| 454 | callbacklist = NULL; |
| 455 | if (lockDesc->nWriters == 0) { /* performance tweak (1) */ |
| 456 | while (lockDesc->waitersH) { |
| 457 | /* delete from waiters list */ |
| 458 | lr = lockDesc->waitersH; |
| 459 | lockDesc->waitersH = lr->next; |
| 460 | |
| 461 | RF_ASSERT(lr->type == RF_IO_TYPE_READ); |
| 462 | |
| 463 | /* add to granted list */ |
| 464 | lr->next = lockDesc->granted; |
| 465 | lockDesc->granted = lr; |
| 466 | |
| 467 | RF_ASSERT(!lr->templink); |
| 468 | /* put on callback list so that we'll invoke |
| 469 | callback below */ |
| 470 | lr->templink = callbacklist; |
| 471 | callbacklist = lr; |
| 472 | #if RF_DEBUG_STRIPELOCK |
| 473 | if (rf_stripeLockDebug) { |
| 474 | Dprintf8("[%d] No writers: granting lock stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n" , |
| 475 | tid, stripeID, lr->type, lr->start, lr->stop, lr->start2, lr->stop2, (unsigned long) lockTable); |
| 476 | FLUSH; |
| 477 | } |
| 478 | #endif |
| 479 | } |
| 480 | lockDesc->waitersT = NULL; |
| 481 | /* we've purged the whole waiters list */ |
| 482 | |
| 483 | } else |
| 484 | for (candidate_t = NULL, candidate = lockDesc->waitersH; |
| 485 | candidate;) { |
| 486 | |
| 487 | /* performance tweak (2) */ |
| 488 | consider_it = 0; |
| 489 | if (RANGE_OVERLAP(lockReqDesc, candidate)) |
| 490 | consider_it = 1; |
| 491 | else |
| 492 | for (t = callbacklist; t; t = t->templink) |
| 493 | if (RANGE_OVERLAP(t, candidate)) { |
| 494 | consider_it = 1; |
| 495 | break; |
| 496 | } |
| 497 | if (!consider_it) { |
| 498 | #if RF_DEBUG_STRIPELOCK |
| 499 | if (rf_stripeLockDebug) { |
| 500 | Dprintf8("[%d] No overlap: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n" , |
| 501 | tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, |
| 502 | (unsigned long) lockTable); |
| 503 | FLUSH; |
| 504 | } |
| 505 | #endif |
| 506 | candidate_t = candidate; |
| 507 | candidate = candidate->next; |
| 508 | continue; |
| 509 | } |
| 510 | /* we have a candidate for release. check to |
| 511 | * make sure it is not blocked by any granted |
| 512 | * locks */ |
| 513 | release_it = 1; |
| 514 | for (predecessor = lockDesc->granted; predecessor; |
| 515 | predecessor = predecessor->next) { |
| 516 | if (STRIPELOCK_CONFLICT(candidate, |
| 517 | predecessor)) { |
| 518 | #if RF_DEBUG_STRIPELOCK |
| 519 | if (rf_stripeLockDebug) { |
| 520 | Dprintf8("[%d] Conflicts with granted lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n" , |
| 521 | tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, |
| 522 | (unsigned long) lockTable); |
| 523 | FLUSH; |
| 524 | } |
| 525 | #endif |
| 526 | release_it = 0; |
| 527 | break; |
| 528 | } |
| 529 | } |
| 530 | |
| 531 | /* now check to see if the candidate is |
| 532 | * blocked by any waiters that occur before it |
| 533 | * it the wait queue */ |
| 534 | if (release_it) |
| 535 | for (predecessor = lockDesc->waitersH; |
| 536 | predecessor != candidate; |
| 537 | predecessor = predecessor->next) { |
| 538 | if (STRIPELOCK_CONFLICT(candidate, |
| 539 | predecessor)) { |
| 540 | #if RF_DEBUG_STRIPELOCK |
| 541 | if (rf_stripeLockDebug) { |
| 542 | Dprintf8("[%d] Conflicts with waiting lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n" , |
| 543 | tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, |
| 544 | (unsigned long) lockTable); |
| 545 | FLUSH; |
| 546 | } |
| 547 | #endif |
| 548 | release_it = 0; |
| 549 | break; |
| 550 | } |
| 551 | } |
| 552 | |
| 553 | /* release it if indicated */ |
| 554 | if (release_it) { |
| 555 | #if RF_DEBUG_STRIPELOCK |
| 556 | if (rf_stripeLockDebug) { |
| 557 | Dprintf8("[%d] Granting lock to candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n" , |
| 558 | tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, |
| 559 | (unsigned long) lockTable); |
| 560 | FLUSH; |
| 561 | } |
| 562 | #endif |
| 563 | if (candidate_t) { |
| 564 | candidate_t->next = candidate->next; |
| 565 | if (lockDesc->waitersT == candidate) |
| 566 | lockDesc->waitersT = candidate_t; /* cannot be waitersH since candidate_t is not NULL */ |
| 567 | } else { |
| 568 | RF_ASSERT(candidate == lockDesc->waitersH); |
| 569 | lockDesc->waitersH = lockDesc->waitersH->next; |
| 570 | if (!lockDesc->waitersH) |
| 571 | lockDesc->waitersT = NULL; |
| 572 | } |
| 573 | /* move it to the granted list */ |
| 574 | candidate->next = lockDesc->granted; |
| 575 | lockDesc->granted = candidate; |
| 576 | |
| 577 | RF_ASSERT(!candidate->templink); |
| 578 | /* put it on the list of things to be |
| 579 | called after we release the mutex */ |
| 580 | candidate->templink = callbacklist; |
| 581 | |
| 582 | callbacklist = candidate; |
| 583 | |
| 584 | if (!candidate_t) |
| 585 | candidate = lockDesc->waitersH; |
| 586 | else |
| 587 | candidate = candidate_t->next; |
| 588 | /* continue with the rest of the list */ |
| 589 | } else { |
| 590 | candidate_t = candidate; |
| 591 | /* continue with the rest of the list */ |
| 592 | candidate = candidate->next; |
| 593 | } |
| 594 | } |
| 595 | |
| 596 | /* delete the descriptor if no one is waiting or active */ |
| 597 | if (!lockDesc->granted && !lockDesc->waitersH) { |
| 598 | RF_ASSERT(lockDesc->nWriters == 0); |
| 599 | #if RF_DEBUG_STRIPELOCK |
| 600 | if (rf_stripeLockDebug) { |
| 601 | Dprintf3("[%d] Last lock released (table 0x%lx): deleting desc for stripeID %ld\n" , tid, (unsigned long) lockTable, stripeID); |
| 602 | FLUSH; |
| 603 | } |
| 604 | #endif |
| 605 | if (ld_t) |
| 606 | ld_t->next = lockDesc->next; |
| 607 | else { |
| 608 | RF_ASSERT(lockDesc == lockTable[hashval].descList); |
| 609 | lockTable[hashval].descList = lockDesc->next; |
| 610 | } |
| 611 | FreeStripeLockDesc(lockDesc); |
| 612 | lockDesc = NULL;/* only for the ASSERT below */ |
| 613 | } |
| 614 | rf_unlock_mutex2(lockTable[hashval].mutex); |
| 615 | |
| 616 | /* now that we've unlocked the mutex, invoke the callback on |
| 617 | * all the descriptors in the list */ |
| 618 | |
| 619 | /* if we deleted the descriptor, we should have no callbacks |
| 620 | * to do */ |
| 621 | RF_ASSERT(!((callbacklist) && (!lockDesc))); |
| 622 | for (candidate = callbacklist; candidate;) { |
| 623 | t = candidate; |
| 624 | candidate = candidate->templink; |
| 625 | t->templink = NULL; |
| 626 | (t->cbFunc) (t->cbArg); |
| 627 | } |
| 628 | } |
| 629 | /* must have the indicated lock table mutex upon entry */ |
| 630 | static void |
| 631 | AddToWaitersQueue(RF_StripeLockDesc_t *lockDesc, RF_LockReqDesc_t *lockReqDesc) |
| 632 | { |
| 633 | if (!lockDesc->waitersH) { |
| 634 | lockDesc->waitersH = lockDesc->waitersT = lockReqDesc; |
| 635 | } else { |
| 636 | lockDesc->waitersT->next = lockReqDesc; |
| 637 | lockDesc->waitersT = lockReqDesc; |
| 638 | } |
| 639 | } |
| 640 | |
| 641 | static RF_StripeLockDesc_t * |
| 642 | AllocStripeLockDesc(RF_StripeNum_t stripeID) |
| 643 | { |
| 644 | RF_StripeLockDesc_t *p; |
| 645 | |
| 646 | p = pool_get(&rf_pools.stripelock, PR_WAITOK); |
| 647 | if (p) { |
| 648 | p->stripeID = stripeID; |
| 649 | p->granted = NULL; |
| 650 | p->waitersH = NULL; |
| 651 | p->waitersT = NULL; |
| 652 | p->nWriters = 0; |
| 653 | p->next = NULL; |
| 654 | } |
| 655 | return (p); |
| 656 | } |
| 657 | |
| 658 | static void |
| 659 | FreeStripeLockDesc(RF_StripeLockDesc_t *p) |
| 660 | { |
| 661 | pool_put(&rf_pools.stripelock, p); |
| 662 | } |
| 663 | |
| 664 | #if RF_DEBUG_STRIPELOCK |
| 665 | static void |
| 666 | PrintLockedStripes(RF_LockTableEntry_t *lockTable) |
| 667 | { |
| 668 | int i, j, foundone = 0, did; |
| 669 | RF_StripeLockDesc_t *p; |
| 670 | RF_LockReqDesc_t *q; |
| 671 | |
| 672 | rf_lock_mutex2(rf_printf_mutex); |
| 673 | printf("Locked stripes:\n" ); |
| 674 | for (i = 0; i < rf_lockTableSize; i++) |
| 675 | if (lockTable[i].descList) { |
| 676 | foundone = 1; |
| 677 | for (p = lockTable[i].descList; p; p = p->next) { |
| 678 | printf("Stripe ID 0x%lx (%d) nWriters %d\n" , |
| 679 | (long) p->stripeID, (int) p->stripeID, |
| 680 | p->nWriters); |
| 681 | |
| 682 | if (!(p->granted)) |
| 683 | printf("Granted: (none)\n" ); |
| 684 | else |
| 685 | printf("Granted:\n" ); |
| 686 | for (did = 1, j = 0, q = p->granted; q; |
| 687 | j++, q = q->next) { |
| 688 | printf(" %c(%ld-%ld" , q->type, (long) q->start, (long) q->stop); |
| 689 | if (q->start2 != -1) |
| 690 | printf(",%ld-%ld) " , (long) q->start2, |
| 691 | (long) q->stop2); |
| 692 | else |
| 693 | printf(") " ); |
| 694 | if (j && !(j % 4)) { |
| 695 | printf("\n" ); |
| 696 | did = 1; |
| 697 | } else |
| 698 | did = 0; |
| 699 | } |
| 700 | if (!did) |
| 701 | printf("\n" ); |
| 702 | |
| 703 | if (!(p->waitersH)) |
| 704 | printf("Waiting: (none)\n" ); |
| 705 | else |
| 706 | printf("Waiting:\n" ); |
| 707 | for (did = 1, j = 0, q = p->waitersH; q; |
| 708 | j++, q = q->next) { |
| 709 | printf("%c(%ld-%ld" , q->type, (long) q->start, (long) q->stop); |
| 710 | if (q->start2 != -1) |
| 711 | printf(",%ld-%ld) " , (long) q->start2, (long) q->stop2); |
| 712 | else |
| 713 | printf(") " ); |
| 714 | if (j && !(j % 4)) { |
| 715 | printf("\n " ); |
| 716 | did = 1; |
| 717 | } else |
| 718 | did = 0; |
| 719 | } |
| 720 | if (!did) |
| 721 | printf("\n" ); |
| 722 | } |
| 723 | } |
| 724 | if (!foundone) |
| 725 | printf("(none)\n" ); |
| 726 | else |
| 727 | printf("\n" ); |
| 728 | rf_unlock_mutex2(rf_printf_mutex); |
| 729 | } |
| 730 | #endif |
| 731 | |