| 1 | /* $NetBSD: rf_paritylog.c,v 1.18 2011/05/11 06:03:06 mrg Exp $ */ |
| 2 | /* |
| 3 | * Copyright (c) 1995 Carnegie-Mellon University. |
| 4 | * All rights reserved. |
| 5 | * |
| 6 | * Author: William V. Courtright II |
| 7 | * |
| 8 | * Permission to use, copy, modify and distribute this software and |
| 9 | * its documentation is hereby granted, provided that both the copyright |
| 10 | * notice and this permission notice appear in all copies of the |
| 11 | * software, derivative works or modified versions, and any portions |
| 12 | * thereof, and that both notices appear in supporting documentation. |
| 13 | * |
| 14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
| 15 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
| 16 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
| 17 | * |
| 18 | * Carnegie Mellon requests users of this software to return to |
| 19 | * |
| 20 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
| 21 | * School of Computer Science |
| 22 | * Carnegie Mellon University |
| 23 | * Pittsburgh PA 15213-3890 |
| 24 | * |
| 25 | * any improvements or extensions that they make and grant Carnegie the |
| 26 | * rights to redistribute these changes. |
| 27 | */ |
| 28 | |
| 29 | /* Code for manipulating in-core parity logs |
| 30 | * |
| 31 | */ |
| 32 | |
| 33 | #include <sys/cdefs.h> |
| 34 | __KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.18 2011/05/11 06:03:06 mrg Exp $" ); |
| 35 | |
| 36 | #include "rf_archs.h" |
| 37 | |
| 38 | #if RF_INCLUDE_PARITYLOGGING > 0 |
| 39 | |
| 40 | /* |
| 41 | * Append-only log for recording parity "update" and "overwrite" records |
| 42 | */ |
| 43 | |
| 44 | #include <dev/raidframe/raidframevar.h> |
| 45 | |
| 46 | #include "rf_threadstuff.h" |
| 47 | #include "rf_mcpair.h" |
| 48 | #include "rf_raid.h" |
| 49 | #include "rf_dag.h" |
| 50 | #include "rf_dagfuncs.h" |
| 51 | #include "rf_desc.h" |
| 52 | #include "rf_layout.h" |
| 53 | #include "rf_diskqueue.h" |
| 54 | #include "rf_etimer.h" |
| 55 | #include "rf_paritylog.h" |
| 56 | #include "rf_general.h" |
| 57 | #include "rf_map.h" |
| 58 | #include "rf_paritylogging.h" |
| 59 | #include "rf_paritylogDiskMgr.h" |
| 60 | |
| 61 | static RF_CommonLogData_t * |
| 62 | AllocParityLogCommonData(RF_Raid_t * raidPtr) |
| 63 | { |
| 64 | RF_CommonLogData_t *common = NULL; |
| 65 | |
| 66 | /* Return a struct for holding common parity log information from the |
| 67 | * free list (rf_parityLogDiskQueue.freeCommonList). If the free list |
| 68 | * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */ |
| 69 | |
| 70 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 71 | if (raidPtr->parityLogDiskQueue.freeCommonList) { |
| 72 | common = raidPtr->parityLogDiskQueue.freeCommonList; |
| 73 | raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; |
| 74 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 75 | } else { |
| 76 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 77 | RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); |
| 78 | /* destroy is in rf_paritylogging.c */ |
| 79 | rf_init_mutex2(common->mutex, IPL_VM); |
| 80 | } |
| 81 | common->next = NULL; |
| 82 | return (common); |
| 83 | } |
| 84 | |
| 85 | static void |
| 86 | FreeParityLogCommonData(RF_CommonLogData_t * common) |
| 87 | { |
| 88 | RF_Raid_t *raidPtr; |
| 89 | |
| 90 | /* Insert a single struct for holding parity log information (data) |
| 91 | * into the free list (rf_parityLogDiskQueue.freeCommonList). |
| 92 | * NON-BLOCKING */ |
| 93 | |
| 94 | raidPtr = common->raidPtr; |
| 95 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 96 | common->next = raidPtr->parityLogDiskQueue.freeCommonList; |
| 97 | raidPtr->parityLogDiskQueue.freeCommonList = common; |
| 98 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 99 | } |
| 100 | |
| 101 | static RF_ParityLogData_t * |
| 102 | AllocParityLogData(RF_Raid_t * raidPtr) |
| 103 | { |
| 104 | RF_ParityLogData_t *data = NULL; |
| 105 | |
| 106 | /* Return a struct for holding parity log information from the free |
| 107 | * list (rf_parityLogDiskQueue.freeList). If the free list is empty, |
| 108 | * call RF_Malloc to create a new structure. NON-BLOCKING */ |
| 109 | |
| 110 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 111 | if (raidPtr->parityLogDiskQueue.freeDataList) { |
| 112 | data = raidPtr->parityLogDiskQueue.freeDataList; |
| 113 | raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; |
| 114 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 115 | } else { |
| 116 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 117 | RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); |
| 118 | } |
| 119 | data->next = NULL; |
| 120 | data->prev = NULL; |
| 121 | return (data); |
| 122 | } |
| 123 | |
| 124 | |
| 125 | static void |
| 126 | FreeParityLogData(RF_ParityLogData_t * data) |
| 127 | { |
| 128 | RF_ParityLogData_t *nextItem; |
| 129 | RF_Raid_t *raidPtr; |
| 130 | |
| 131 | /* Insert a linked list of structs for holding parity log information |
| 132 | * (data) into the free list (parityLogDiskQueue.freeList). |
| 133 | * NON-BLOCKING */ |
| 134 | |
| 135 | raidPtr = data->common->raidPtr; |
| 136 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 137 | while (data) { |
| 138 | nextItem = data->next; |
| 139 | data->next = raidPtr->parityLogDiskQueue.freeDataList; |
| 140 | raidPtr->parityLogDiskQueue.freeDataList = data; |
| 141 | data = nextItem; |
| 142 | } |
| 143 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 144 | } |
| 145 | |
| 146 | |
| 147 | static void |
| 148 | EnqueueParityLogData( |
| 149 | RF_ParityLogData_t * data, |
| 150 | RF_ParityLogData_t ** head, |
| 151 | RF_ParityLogData_t ** tail) |
| 152 | { |
| 153 | RF_Raid_t *raidPtr; |
| 154 | |
| 155 | /* Insert an in-core parity log (*data) into the head of a disk queue |
| 156 | * (*head, *tail). NON-BLOCKING */ |
| 157 | |
| 158 | raidPtr = data->common->raidPtr; |
| 159 | if (rf_parityLogDebug) |
| 160 | printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n" , data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); |
| 161 | RF_ASSERT(data->prev == NULL); |
| 162 | RF_ASSERT(data->next == NULL); |
| 163 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 164 | if (*head) { |
| 165 | /* insert into head of queue */ |
| 166 | RF_ASSERT((*head)->prev == NULL); |
| 167 | RF_ASSERT((*tail)->next == NULL); |
| 168 | data->next = *head; |
| 169 | (*head)->prev = data; |
| 170 | *head = data; |
| 171 | } else { |
| 172 | /* insert into empty list */ |
| 173 | RF_ASSERT(*head == NULL); |
| 174 | RF_ASSERT(*tail == NULL); |
| 175 | *head = data; |
| 176 | *tail = data; |
| 177 | } |
| 178 | RF_ASSERT((*head)->prev == NULL); |
| 179 | RF_ASSERT((*tail)->next == NULL); |
| 180 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 181 | } |
| 182 | |
| 183 | static RF_ParityLogData_t * |
| 184 | DequeueParityLogData( |
| 185 | RF_Raid_t * raidPtr, |
| 186 | RF_ParityLogData_t ** head, |
| 187 | RF_ParityLogData_t ** tail, |
| 188 | int ignoreLocks) |
| 189 | { |
| 190 | RF_ParityLogData_t *data; |
| 191 | |
| 192 | /* Remove and return an in-core parity log from the tail of a disk |
| 193 | * queue (*head, *tail). NON-BLOCKING */ |
| 194 | |
| 195 | /* remove from tail, preserving FIFO order */ |
| 196 | if (!ignoreLocks) |
| 197 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 198 | data = *tail; |
| 199 | if (data) { |
| 200 | if (*head == *tail) { |
| 201 | /* removing last item from queue */ |
| 202 | *head = NULL; |
| 203 | *tail = NULL; |
| 204 | } else { |
| 205 | *tail = (*tail)->prev; |
| 206 | (*tail)->next = NULL; |
| 207 | RF_ASSERT((*head)->prev == NULL); |
| 208 | RF_ASSERT((*tail)->next == NULL); |
| 209 | } |
| 210 | data->next = NULL; |
| 211 | data->prev = NULL; |
| 212 | if (rf_parityLogDebug) |
| 213 | printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n" , data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); |
| 214 | } |
| 215 | if (*head) { |
| 216 | RF_ASSERT((*head)->prev == NULL); |
| 217 | RF_ASSERT((*tail)->next == NULL); |
| 218 | } |
| 219 | if (!ignoreLocks) |
| 220 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 221 | return (data); |
| 222 | } |
| 223 | |
| 224 | |
| 225 | static void |
| 226 | RequeueParityLogData( |
| 227 | RF_ParityLogData_t * data, |
| 228 | RF_ParityLogData_t ** head, |
| 229 | RF_ParityLogData_t ** tail) |
| 230 | { |
| 231 | RF_Raid_t *raidPtr; |
| 232 | |
| 233 | /* Insert an in-core parity log (*data) into the tail of a disk queue |
| 234 | * (*head, *tail). NON-BLOCKING */ |
| 235 | |
| 236 | raidPtr = data->common->raidPtr; |
| 237 | RF_ASSERT(data); |
| 238 | if (rf_parityLogDebug) |
| 239 | printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n" , data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); |
| 240 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 241 | if (*tail) { |
| 242 | /* append to tail of list */ |
| 243 | data->prev = *tail; |
| 244 | data->next = NULL; |
| 245 | (*tail)->next = data; |
| 246 | *tail = data; |
| 247 | } else { |
| 248 | /* inserting into an empty list */ |
| 249 | *head = data; |
| 250 | *tail = data; |
| 251 | (*head)->prev = NULL; |
| 252 | (*tail)->next = NULL; |
| 253 | } |
| 254 | RF_ASSERT((*head)->prev == NULL); |
| 255 | RF_ASSERT((*tail)->next == NULL); |
| 256 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 257 | } |
| 258 | |
| 259 | RF_ParityLogData_t * |
| 260 | rf_CreateParityLogData( |
| 261 | RF_ParityRecordType_t operation, |
| 262 | RF_PhysDiskAddr_t * pda, |
| 263 | void *bufPtr, |
| 264 | RF_Raid_t * raidPtr, |
| 265 | int (*wakeFunc) (RF_DagNode_t * node, int status), |
| 266 | void *wakeArg, |
| 267 | RF_AccTraceEntry_t * tracerec, |
| 268 | RF_Etimer_t startTime) |
| 269 | { |
| 270 | RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; |
| 271 | RF_CommonLogData_t *common; |
| 272 | RF_PhysDiskAddr_t *diskAddress; |
| 273 | int boundary, offset = 0; |
| 274 | |
| 275 | /* Return an initialized struct of info to be logged. Build one item |
| 276 | * per physical disk address, one item per region. |
| 277 | * |
| 278 | * NON-BLOCKING */ |
| 279 | |
| 280 | diskAddress = pda; |
| 281 | common = AllocParityLogCommonData(raidPtr); |
| 282 | RF_ASSERT(common); |
| 283 | |
| 284 | common->operation = operation; |
| 285 | common->bufPtr = bufPtr; |
| 286 | common->raidPtr = raidPtr; |
| 287 | common->wakeFunc = wakeFunc; |
| 288 | common->wakeArg = wakeArg; |
| 289 | common->tracerec = tracerec; |
| 290 | common->startTime = startTime; |
| 291 | common->cnt = 0; |
| 292 | |
| 293 | if (rf_parityLogDebug) |
| 294 | printf("[entering CreateParityLogData]\n" ); |
| 295 | while (diskAddress) { |
| 296 | common->cnt++; |
| 297 | data = AllocParityLogData(raidPtr); |
| 298 | RF_ASSERT(data); |
| 299 | data->common = common; |
| 300 | data->next = NULL; |
| 301 | data->prev = NULL; |
| 302 | data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); |
| 303 | if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) { |
| 304 | /* disk address does not cross a region boundary */ |
| 305 | data->diskAddress = *diskAddress; |
| 306 | data->bufOffset = offset; |
| 307 | offset = offset + diskAddress->numSector; |
| 308 | EnqueueParityLogData(data, &resultHead, &resultTail); |
| 309 | /* adjust disk address */ |
| 310 | diskAddress = diskAddress->next; |
| 311 | } else { |
| 312 | /* disk address crosses a region boundary */ |
| 313 | /* find address where region is crossed */ |
| 314 | boundary = 0; |
| 315 | while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) |
| 316 | boundary++; |
| 317 | |
| 318 | /* enter data before the boundary */ |
| 319 | data->diskAddress = *diskAddress; |
| 320 | data->diskAddress.numSector = boundary; |
| 321 | data->bufOffset = offset; |
| 322 | offset += boundary; |
| 323 | EnqueueParityLogData(data, &resultHead, &resultTail); |
| 324 | /* adjust disk address */ |
| 325 | diskAddress->startSector += boundary; |
| 326 | diskAddress->numSector -= boundary; |
| 327 | } |
| 328 | } |
| 329 | if (rf_parityLogDebug) |
| 330 | printf("[leaving CreateParityLogData]\n" ); |
| 331 | return (resultHead); |
| 332 | } |
| 333 | |
| 334 | |
| 335 | RF_ParityLogData_t * |
| 336 | rf_SearchAndDequeueParityLogData( |
| 337 | RF_Raid_t * raidPtr, |
| 338 | int regionID, |
| 339 | RF_ParityLogData_t ** head, |
| 340 | RF_ParityLogData_t ** tail, |
| 341 | int ignoreLocks) |
| 342 | { |
| 343 | RF_ParityLogData_t *w; |
| 344 | |
| 345 | /* Remove and return an in-core parity log from a specified region |
| 346 | * (regionID). If a matching log is not found, return NULL. |
| 347 | * |
| 348 | * NON-BLOCKING. */ |
| 349 | |
| 350 | /* walk backward through a list, looking for an entry with a matching |
| 351 | * region ID */ |
| 352 | if (!ignoreLocks) |
| 353 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 354 | w = (*tail); |
| 355 | while (w) { |
| 356 | if (w->regionID == regionID) { |
| 357 | /* remove an element from the list */ |
| 358 | if (w == *tail) { |
| 359 | if (*head == *tail) { |
| 360 | /* removing only element in the list */ |
| 361 | *head = NULL; |
| 362 | *tail = NULL; |
| 363 | } else { |
| 364 | /* removing last item in the list */ |
| 365 | *tail = (*tail)->prev; |
| 366 | (*tail)->next = NULL; |
| 367 | RF_ASSERT((*head)->prev == NULL); |
| 368 | RF_ASSERT((*tail)->next == NULL); |
| 369 | } |
| 370 | } else { |
| 371 | if (w == *head) { |
| 372 | /* removing first item in the list */ |
| 373 | *head = (*head)->next; |
| 374 | (*head)->prev = NULL; |
| 375 | RF_ASSERT((*head)->prev == NULL); |
| 376 | RF_ASSERT((*tail)->next == NULL); |
| 377 | } else { |
| 378 | /* removing an item from the middle of |
| 379 | * the list */ |
| 380 | w->prev->next = w->next; |
| 381 | w->next->prev = w->prev; |
| 382 | RF_ASSERT((*head)->prev == NULL); |
| 383 | RF_ASSERT((*tail)->next == NULL); |
| 384 | } |
| 385 | } |
| 386 | w->prev = NULL; |
| 387 | w->next = NULL; |
| 388 | if (rf_parityLogDebug) |
| 389 | printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n" , w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector); |
| 390 | return (w); |
| 391 | } else |
| 392 | w = w->prev; |
| 393 | } |
| 394 | if (!ignoreLocks) |
| 395 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 396 | return (NULL); |
| 397 | } |
| 398 | |
| 399 | static RF_ParityLogData_t * |
| 400 | DequeueMatchingLogData( |
| 401 | RF_Raid_t * raidPtr, |
| 402 | RF_ParityLogData_t ** head, |
| 403 | RF_ParityLogData_t ** tail) |
| 404 | { |
| 405 | RF_ParityLogData_t *logDataList, *logData; |
| 406 | int regionID; |
| 407 | |
| 408 | /* Remove and return an in-core parity log from the tail of a disk |
| 409 | * queue (*head, *tail). Then remove all matching (identical |
| 410 | * regionIDs) logData and return as a linked list. |
| 411 | * |
| 412 | * NON-BLOCKING */ |
| 413 | |
| 414 | logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); |
| 415 | if (logDataList) { |
| 416 | regionID = logDataList->regionID; |
| 417 | logData = logDataList; |
| 418 | logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); |
| 419 | while (logData->next) { |
| 420 | logData = logData->next; |
| 421 | logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); |
| 422 | } |
| 423 | } |
| 424 | return (logDataList); |
| 425 | } |
| 426 | |
| 427 | |
| 428 | static RF_ParityLog_t * |
| 429 | AcquireParityLog( |
| 430 | RF_ParityLogData_t * logData, |
| 431 | int finish) |
| 432 | { |
| 433 | RF_ParityLog_t *log = NULL; |
| 434 | RF_Raid_t *raidPtr; |
| 435 | |
| 436 | /* Grab a log buffer from the pool and return it. If no buffers are |
| 437 | * available, return NULL. NON-BLOCKING */ |
| 438 | raidPtr = logData->common->raidPtr; |
| 439 | rf_lock_mutex2(raidPtr->parityLogPool.mutex); |
| 440 | if (raidPtr->parityLogPool.parityLogs) { |
| 441 | log = raidPtr->parityLogPool.parityLogs; |
| 442 | raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; |
| 443 | log->regionID = logData->regionID; |
| 444 | log->numRecords = 0; |
| 445 | log->next = NULL; |
| 446 | raidPtr->logsInUse++; |
| 447 | RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); |
| 448 | } else { |
| 449 | /* no logs available, so place ourselves on the queue of work |
| 450 | * waiting on log buffers this is done while |
| 451 | * parityLogPool.mutex is held, to ensure synchronization with |
| 452 | * ReleaseParityLogs. */ |
| 453 | if (rf_parityLogDebug) |
| 454 | printf("[blocked on log, region %d, finish %d]\n" , logData->regionID, finish); |
| 455 | if (finish) |
| 456 | RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); |
| 457 | else |
| 458 | EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); |
| 459 | } |
| 460 | rf_unlock_mutex2(raidPtr->parityLogPool.mutex); |
| 461 | return (log); |
| 462 | } |
| 463 | |
| 464 | void |
| 465 | rf_ReleaseParityLogs( |
| 466 | RF_Raid_t * raidPtr, |
| 467 | RF_ParityLog_t * firstLog) |
| 468 | { |
| 469 | RF_ParityLogData_t *logDataList; |
| 470 | RF_ParityLog_t *log, *lastLog; |
| 471 | int cnt; |
| 472 | |
| 473 | /* Insert a linked list of parity logs (firstLog) to the free list |
| 474 | * (parityLogPool.parityLogPool) |
| 475 | * |
| 476 | * NON-BLOCKING. */ |
| 477 | |
| 478 | RF_ASSERT(firstLog); |
| 479 | |
| 480 | /* Before returning logs to global free list, service all requests |
| 481 | * which are blocked on logs. Holding mutexes for parityLogPool and |
| 482 | * parityLogDiskQueue forces synchronization with AcquireParityLog(). */ |
| 483 | rf_lock_mutex2(raidPtr->parityLogPool.mutex); |
| 484 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 485 | logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); |
| 486 | log = firstLog; |
| 487 | if (firstLog) |
| 488 | firstLog = firstLog->next; |
| 489 | log->numRecords = 0; |
| 490 | log->next = NULL; |
| 491 | while (logDataList && log) { |
| 492 | rf_unlock_mutex2(raidPtr->parityLogPool.mutex); |
| 493 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 494 | rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); |
| 495 | if (rf_parityLogDebug) |
| 496 | printf("[finishing up buf-blocked log data, region %d]\n" , logDataList->regionID); |
| 497 | if (log == NULL) { |
| 498 | log = firstLog; |
| 499 | if (firstLog) { |
| 500 | firstLog = firstLog->next; |
| 501 | log->numRecords = 0; |
| 502 | log->next = NULL; |
| 503 | } |
| 504 | } |
| 505 | rf_lock_mutex2(raidPtr->parityLogPool.mutex); |
| 506 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 507 | if (log) |
| 508 | logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); |
| 509 | } |
| 510 | /* return remaining logs to pool */ |
| 511 | if (log) { |
| 512 | log->next = firstLog; |
| 513 | firstLog = log; |
| 514 | } |
| 515 | if (firstLog) { |
| 516 | lastLog = firstLog; |
| 517 | raidPtr->logsInUse--; |
| 518 | RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); |
| 519 | while (lastLog->next) { |
| 520 | lastLog = lastLog->next; |
| 521 | raidPtr->logsInUse--; |
| 522 | RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); |
| 523 | } |
| 524 | lastLog->next = raidPtr->parityLogPool.parityLogs; |
| 525 | raidPtr->parityLogPool.parityLogs = firstLog; |
| 526 | cnt = 0; |
| 527 | log = raidPtr->parityLogPool.parityLogs; |
| 528 | while (log) { |
| 529 | cnt++; |
| 530 | log = log->next; |
| 531 | } |
| 532 | RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs); |
| 533 | } |
| 534 | rf_unlock_mutex2(raidPtr->parityLogPool.mutex); |
| 535 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 536 | } |
| 537 | |
| 538 | static void |
| 539 | ReintLog( |
| 540 | RF_Raid_t * raidPtr, |
| 541 | int regionID, |
| 542 | RF_ParityLog_t * log) |
| 543 | { |
| 544 | RF_ASSERT(log); |
| 545 | |
| 546 | /* Insert an in-core parity log (log) into the disk queue of |
| 547 | * reintegration work. Set the flag (reintInProgress) for the |
| 548 | * specified region (regionID) to indicate that reintegration is in |
| 549 | * progress for this region. NON-BLOCKING */ |
| 550 | |
| 551 | rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex); |
| 552 | raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint |
| 553 | * complete */ |
| 554 | |
| 555 | if (rf_parityLogDebug) |
| 556 | printf("[requesting reintegration of region %d]\n" , log->regionID); |
| 557 | /* move record to reintegration queue */ |
| 558 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 559 | log->next = raidPtr->parityLogDiskQueue.reintQueue; |
| 560 | raidPtr->parityLogDiskQueue.reintQueue = log; |
| 561 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); |
| 562 | rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); |
| 563 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 564 | } |
| 565 | |
| 566 | static void |
| 567 | FlushLog( |
| 568 | RF_Raid_t * raidPtr, |
| 569 | RF_ParityLog_t * log) |
| 570 | { |
| 571 | /* insert a core log (log) into a list of logs |
| 572 | * (parityLogDiskQueue.flushQueue) waiting to be written to disk. |
| 573 | * NON-BLOCKING */ |
| 574 | |
| 575 | RF_ASSERT(log); |
| 576 | RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); |
| 577 | RF_ASSERT(log->next == NULL); |
| 578 | /* move log to flush queue */ |
| 579 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 580 | log->next = raidPtr->parityLogDiskQueue.flushQueue; |
| 581 | raidPtr->parityLogDiskQueue.flushQueue = log; |
| 582 | rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); |
| 583 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 584 | } |
| 585 | |
| 586 | static int |
| 587 | DumpParityLogToDisk( |
| 588 | int finish, |
| 589 | RF_ParityLogData_t * logData) |
| 590 | { |
| 591 | int i, diskCount, regionID = logData->regionID; |
| 592 | RF_ParityLog_t *log; |
| 593 | RF_Raid_t *raidPtr; |
| 594 | |
| 595 | raidPtr = logData->common->raidPtr; |
| 596 | |
| 597 | /* Move a core log to disk. If the log disk is full, initiate |
| 598 | * reintegration. |
| 599 | * |
| 600 | * Return (0) if we can enqueue the dump immediately, otherwise return |
| 601 | * (1) to indicate we are blocked on reintegration and control of the |
| 602 | * thread should be relinquished. |
| 603 | * |
| 604 | * Caller must hold regionInfo[regionID].mutex |
| 605 | * |
| 606 | * NON-BLOCKING */ |
| 607 | |
| 608 | RF_ASSERT(rf_owned_mutex2(raidPtr->regionInfo[regionID].mutex)); |
| 609 | |
| 610 | if (rf_parityLogDebug) |
| 611 | printf("[dumping parity log to disk, region %d]\n" , regionID); |
| 612 | log = raidPtr->regionInfo[regionID].coreLog; |
| 613 | RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); |
| 614 | RF_ASSERT(log->next == NULL); |
| 615 | |
| 616 | /* if reintegration is in progress, must queue work */ |
| 617 | rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex); |
| 618 | if (raidPtr->regionInfo[regionID].reintInProgress) { |
| 619 | /* Can not proceed since this region is currently being |
| 620 | * reintegrated. We can not block, so queue remaining work and |
| 621 | * return */ |
| 622 | if (rf_parityLogDebug) |
| 623 | printf("[region %d waiting on reintegration]\n" , regionID); |
| 624 | /* XXX not sure about the use of finish - shouldn't this |
| 625 | * always be "Enqueue"? */ |
| 626 | if (finish) |
| 627 | RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); |
| 628 | else |
| 629 | EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); |
| 630 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); |
| 631 | return (1); /* relenquish control of this thread */ |
| 632 | } |
| 633 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); |
| 634 | raidPtr->regionInfo[regionID].coreLog = NULL; |
| 635 | if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) |
| 636 | /* IMPORTANT!! this loop bound assumes region disk holds an |
| 637 | * integral number of core logs */ |
| 638 | { |
| 639 | /* update disk map for this region */ |
| 640 | diskCount = raidPtr->regionInfo[regionID].diskCount; |
| 641 | for (i = 0; i < raidPtr->numSectorsPerLog; i++) { |
| 642 | raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; |
| 643 | raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; |
| 644 | } |
| 645 | log->diskOffset = diskCount; |
| 646 | raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; |
| 647 | FlushLog(raidPtr, log); |
| 648 | } else { |
| 649 | /* no room for log on disk, send it to disk manager and |
| 650 | * request reintegration */ |
| 651 | RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); |
| 652 | ReintLog(raidPtr, regionID, log); |
| 653 | } |
| 654 | if (rf_parityLogDebug) |
| 655 | printf("[finished dumping parity log to disk, region %d]\n" , regionID); |
| 656 | return (0); |
| 657 | } |
| 658 | |
| 659 | int |
| 660 | rf_ParityLogAppend( |
| 661 | RF_ParityLogData_t * logData, |
| 662 | int finish, |
| 663 | RF_ParityLog_t ** incomingLog, |
| 664 | int clearReintFlag) |
| 665 | { |
| 666 | int regionID, logItem, itemDone; |
| 667 | RF_ParityLogData_t *item; |
| 668 | int punt, done = RF_FALSE; |
| 669 | RF_ParityLog_t *log; |
| 670 | RF_Raid_t *raidPtr; |
| 671 | RF_Etimer_t timer; |
| 672 | int (*wakeFunc) (RF_DagNode_t * node, int status); |
| 673 | void *wakeArg; |
| 674 | |
| 675 | /* Add parity to the appropriate log, one sector at a time. This |
| 676 | * routine is called is called by dag functions ParityLogUpdateFunc |
| 677 | * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. |
| 678 | * |
| 679 | * Parity to be logged is contained in a linked-list (logData). When |
| 680 | * this routine returns, every sector in the list will be in one of |
| 681 | * three places: 1) entered into the parity log 2) queued, waiting on |
| 682 | * reintegration 3) queued, waiting on a core log |
| 683 | * |
| 684 | * Blocked work is passed to the ParityLoggingDiskManager for completion. |
| 685 | * Later, as conditions which required the block are removed, the work |
| 686 | * reenters this routine with the "finish" parameter set to "RF_TRUE." |
| 687 | * |
| 688 | * NON-BLOCKING */ |
| 689 | |
| 690 | raidPtr = logData->common->raidPtr; |
| 691 | /* lock the region for the first item in logData */ |
| 692 | RF_ASSERT(logData != NULL); |
| 693 | regionID = logData->regionID; |
| 694 | rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); |
| 695 | RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); |
| 696 | |
| 697 | if (clearReintFlag) { |
| 698 | /* Enable flushing for this region. Holding both locks |
| 699 | * provides a synchronization barrier with DumpParityLogToDisk */ |
| 700 | rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex); |
| 701 | /* XXXmrg need this? */ |
| 702 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 703 | RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); |
| 704 | raidPtr->regionInfo[regionID].diskCount = 0; |
| 705 | raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; |
| 706 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now |
| 707 | * enabled */ |
| 708 | /* XXXmrg need this? */ |
| 709 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 710 | } |
| 711 | /* process each item in logData */ |
| 712 | while (logData) { |
| 713 | /* remove an item from logData */ |
| 714 | item = logData; |
| 715 | logData = logData->next; |
| 716 | item->next = NULL; |
| 717 | item->prev = NULL; |
| 718 | |
| 719 | if (rf_parityLogDebug) |
| 720 | printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n" , item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector); |
| 721 | |
| 722 | /* see if we moved to a new region */ |
| 723 | if (regionID != item->regionID) { |
| 724 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); |
| 725 | regionID = item->regionID; |
| 726 | rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); |
| 727 | RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); |
| 728 | } |
| 729 | punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This |
| 730 | * can happen in one of two ways: 1) no core |
| 731 | * log (AcquireParityLog) 2) waiting on |
| 732 | * reintegration (DumpParityLogToDisk) If punt |
| 733 | * is RF_TRUE, the dataItem was queued, so |
| 734 | * skip to next item. */ |
| 735 | |
| 736 | /* process item, one sector at a time, until all sectors |
| 737 | * processed or we punt */ |
| 738 | if (item->diskAddress.numSector > 0) |
| 739 | done = RF_FALSE; |
| 740 | else |
| 741 | RF_ASSERT(0); |
| 742 | while (!punt && !done) { |
| 743 | /* verify that a core log exists for this region */ |
| 744 | if (!raidPtr->regionInfo[regionID].coreLog) { |
| 745 | /* Attempt to acquire a parity log. If |
| 746 | * acquisition fails, queue remaining work in |
| 747 | * data item and move to nextItem. */ |
| 748 | if (incomingLog) |
| 749 | if (*incomingLog) { |
| 750 | RF_ASSERT((*incomingLog)->next == NULL); |
| 751 | raidPtr->regionInfo[regionID].coreLog = *incomingLog; |
| 752 | raidPtr->regionInfo[regionID].coreLog->regionID = regionID; |
| 753 | *incomingLog = NULL; |
| 754 | } else |
| 755 | raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); |
| 756 | else |
| 757 | raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); |
| 758 | /* Note: AcquireParityLog either returns a log |
| 759 | * or enqueues currentItem */ |
| 760 | } |
| 761 | if (!raidPtr->regionInfo[regionID].coreLog) |
| 762 | punt = RF_TRUE; /* failed to find a core log */ |
| 763 | else { |
| 764 | RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); |
| 765 | /* verify that the log has room for new |
| 766 | * entries */ |
| 767 | /* if log is full, dump it to disk and grab a |
| 768 | * new log */ |
| 769 | if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) { |
| 770 | /* log is full, dump it to disk */ |
| 771 | if (DumpParityLogToDisk(finish, item)) |
| 772 | punt = RF_TRUE; /* dump unsuccessful, |
| 773 | * blocked on |
| 774 | * reintegration */ |
| 775 | else { |
| 776 | /* dump was successful */ |
| 777 | if (incomingLog) |
| 778 | if (*incomingLog) { |
| 779 | RF_ASSERT((*incomingLog)->next == NULL); |
| 780 | raidPtr->regionInfo[regionID].coreLog = *incomingLog; |
| 781 | raidPtr->regionInfo[regionID].coreLog->regionID = regionID; |
| 782 | *incomingLog = NULL; |
| 783 | } else |
| 784 | raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); |
| 785 | else |
| 786 | raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); |
| 787 | /* if a core log is not |
| 788 | * available, must queue work |
| 789 | * and return */ |
| 790 | if (!raidPtr->regionInfo[regionID].coreLog) |
| 791 | punt = RF_TRUE; /* blocked on log |
| 792 | * availability */ |
| 793 | } |
| 794 | } |
| 795 | } |
| 796 | /* if we didn't punt on this item, attempt to add a |
| 797 | * sector to the core log */ |
| 798 | if (!punt) { |
| 799 | RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); |
| 800 | /* at this point, we have a core log with |
| 801 | * enough room for a sector */ |
| 802 | /* copy a sector into the log */ |
| 803 | log = raidPtr->regionInfo[regionID].coreLog; |
| 804 | RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); |
| 805 | logItem = log->numRecords++; |
| 806 | log->records[logItem].parityAddr = item->diskAddress; |
| 807 | RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); |
| 808 | RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); |
| 809 | log->records[logItem].parityAddr.numSector = 1; |
| 810 | log->records[logItem].operation = item->common->operation; |
| 811 | memcpy((char *)log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), ((char *)item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), (1 << item->common->raidPtr->logBytesPerSector)); |
| 812 | item->diskAddress.numSector--; |
| 813 | item->diskAddress.startSector++; |
| 814 | if (item->diskAddress.numSector == 0) |
| 815 | done = RF_TRUE; |
| 816 | } |
| 817 | } |
| 818 | |
| 819 | if (!punt) { |
| 820 | /* Processed this item completely, decrement count of |
| 821 | * items to be processed. */ |
| 822 | RF_ASSERT(item->diskAddress.numSector == 0); |
| 823 | rf_lock_mutex2(item->common->mutex); |
| 824 | item->common->cnt--; |
| 825 | if (item->common->cnt == 0) |
| 826 | itemDone = RF_TRUE; |
| 827 | else |
| 828 | itemDone = RF_FALSE; |
| 829 | rf_unlock_mutex2(item->common->mutex); |
| 830 | if (itemDone) { |
| 831 | /* Finished processing all log data for this |
| 832 | * IO Return structs to free list and invoke |
| 833 | * wakeup function. */ |
| 834 | timer = item->common->startTime; /* grab initial value of |
| 835 | * timer */ |
| 836 | RF_ETIMER_STOP(timer); |
| 837 | RF_ETIMER_EVAL(timer); |
| 838 | item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); |
| 839 | if (rf_parityLogDebug) |
| 840 | printf("[waking process for region %d]\n" , item->regionID); |
| 841 | wakeFunc = item->common->wakeFunc; |
| 842 | wakeArg = item->common->wakeArg; |
| 843 | FreeParityLogCommonData(item->common); |
| 844 | FreeParityLogData(item); |
| 845 | (wakeFunc) (wakeArg, 0); |
| 846 | } else |
| 847 | FreeParityLogData(item); |
| 848 | } |
| 849 | } |
| 850 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); |
| 851 | if (rf_parityLogDebug) |
| 852 | printf("[exiting ParityLogAppend]\n" ); |
| 853 | return (0); |
| 854 | } |
| 855 | |
| 856 | |
| 857 | void |
| 858 | rf_EnableParityLogging(RF_Raid_t * raidPtr) |
| 859 | { |
| 860 | int regionID; |
| 861 | |
| 862 | for (regionID = 0; regionID < rf_numParityRegions; regionID++) { |
| 863 | rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); |
| 864 | raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; |
| 865 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); |
| 866 | } |
| 867 | if (rf_parityLogDebug) |
| 868 | printf("[parity logging enabled]\n" ); |
| 869 | } |
| 870 | #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ |
| 871 | |