| 1 | /* $NetBSD: rf_paritylogDiskMgr.c,v 1.28 2011/05/11 06:20:33 mrg Exp $ */ |
| 2 | /* |
| 3 | * Copyright (c) 1995 Carnegie-Mellon University. |
| 4 | * All rights reserved. |
| 5 | * |
| 6 | * Author: William V. Courtright II |
| 7 | * |
| 8 | * Permission to use, copy, modify and distribute this software and |
| 9 | * its documentation is hereby granted, provided that both the copyright |
| 10 | * notice and this permission notice appear in all copies of the |
| 11 | * software, derivative works or modified versions, and any portions |
| 12 | * thereof, and that both notices appear in supporting documentation. |
| 13 | * |
| 14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
| 15 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
| 16 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
| 17 | * |
| 18 | * Carnegie Mellon requests users of this software to return to |
| 19 | * |
| 20 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
| 21 | * School of Computer Science |
| 22 | * Carnegie Mellon University |
| 23 | * Pittsburgh PA 15213-3890 |
| 24 | * |
| 25 | * any improvements or extensions that they make and grant Carnegie the |
| 26 | * rights to redistribute these changes. |
| 27 | */ |
| 28 | /* Code for flushing and reintegration operations related to parity logging. |
| 29 | * |
| 30 | */ |
| 31 | |
| 32 | #include <sys/cdefs.h> |
| 33 | __KERNEL_RCSID(0, "$NetBSD: rf_paritylogDiskMgr.c,v 1.28 2011/05/11 06:20:33 mrg Exp $" ); |
| 34 | |
| 35 | #include "rf_archs.h" |
| 36 | |
| 37 | #if RF_INCLUDE_PARITYLOGGING > 0 |
| 38 | |
| 39 | #include <dev/raidframe/raidframevar.h> |
| 40 | |
| 41 | #include "rf_threadstuff.h" |
| 42 | #include "rf_mcpair.h" |
| 43 | #include "rf_raid.h" |
| 44 | #include "rf_dag.h" |
| 45 | #include "rf_dagfuncs.h" |
| 46 | #include "rf_desc.h" |
| 47 | #include "rf_layout.h" |
| 48 | #include "rf_diskqueue.h" |
| 49 | #include "rf_paritylog.h" |
| 50 | #include "rf_general.h" |
| 51 | #include "rf_etimer.h" |
| 52 | #include "rf_paritylogging.h" |
| 53 | #include "rf_engine.h" |
| 54 | #include "rf_dagutils.h" |
| 55 | #include "rf_map.h" |
| 56 | #include "rf_parityscan.h" |
| 57 | |
| 58 | #include "rf_paritylogDiskMgr.h" |
| 59 | |
| 60 | static void *AcquireReintBuffer(RF_RegionBufferQueue_t *); |
| 61 | |
| 62 | static void * |
| 63 | AcquireReintBuffer(RF_RegionBufferQueue_t *pool) |
| 64 | { |
| 65 | void *bufPtr = NULL; |
| 66 | |
| 67 | /* Return a region buffer from the free list (pool). If the free list |
| 68 | * is empty, WAIT. BLOCKING */ |
| 69 | |
| 70 | rf_lock_mutex2(pool->mutex); |
| 71 | if (pool->availableBuffers > 0) { |
| 72 | bufPtr = pool->buffers[pool->availBuffersIndex]; |
| 73 | pool->availableBuffers--; |
| 74 | pool->availBuffersIndex++; |
| 75 | if (pool->availBuffersIndex == pool->totalBuffers) |
| 76 | pool->availBuffersIndex = 0; |
| 77 | rf_unlock_mutex2(pool->mutex); |
| 78 | } else { |
| 79 | RF_PANIC(); /* should never happen in correct config, |
| 80 | * single reint */ |
| 81 | rf_wait_cond2(pool->cond, pool->mutex); |
| 82 | } |
| 83 | return (bufPtr); |
| 84 | } |
| 85 | |
| 86 | static void |
| 87 | ReleaseReintBuffer( |
| 88 | RF_RegionBufferQueue_t * pool, |
| 89 | void *bufPtr) |
| 90 | { |
| 91 | /* Insert a region buffer (bufPtr) into the free list (pool). |
| 92 | * NON-BLOCKING */ |
| 93 | |
| 94 | rf_lock_mutex2(pool->mutex); |
| 95 | pool->availableBuffers++; |
| 96 | pool->buffers[pool->emptyBuffersIndex] = bufPtr; |
| 97 | pool->emptyBuffersIndex++; |
| 98 | if (pool->emptyBuffersIndex == pool->totalBuffers) |
| 99 | pool->emptyBuffersIndex = 0; |
| 100 | RF_ASSERT(pool->availableBuffers <= pool->totalBuffers); |
| 101 | /* |
| 102 | * XXXmrg this signal goes with the above "shouldn't happen" wait? |
| 103 | */ |
| 104 | rf_signal_cond2(pool->cond); |
| 105 | rf_unlock_mutex2(pool->mutex); |
| 106 | } |
| 107 | |
| 108 | |
| 109 | |
| 110 | static void |
| 111 | ReadRegionLog( |
| 112 | RF_RegionId_t regionID, |
| 113 | RF_MCPair_t * rrd_mcpair, |
| 114 | void *regionBuffer, |
| 115 | RF_Raid_t * raidPtr, |
| 116 | RF_DagHeader_t ** rrd_dag_h, |
| 117 | RF_AllocListElem_t ** rrd_alloclist, |
| 118 | RF_PhysDiskAddr_t ** rrd_pda) |
| 119 | { |
| 120 | /* Initiate the read a region log from disk. Once initiated, return |
| 121 | * to the calling routine. |
| 122 | * |
| 123 | * NON-BLOCKING */ |
| 124 | |
| 125 | RF_AccTraceEntry_t *tracerec; |
| 126 | RF_DagNode_t *rrd_rdNode; |
| 127 | |
| 128 | /* create DAG to read region log from disk */ |
| 129 | rf_MakeAllocList(*rrd_alloclist); |
| 130 | *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, |
| 131 | rf_DiskReadFunc, rf_DiskReadUndoFunc, |
| 132 | "Rrl" , *rrd_alloclist, |
| 133 | RF_DAG_FLAGS_NONE, |
| 134 | RF_IO_NORMAL_PRIORITY); |
| 135 | |
| 136 | /* create and initialize PDA for the core log */ |
| 137 | /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t |
| 138 | * *)); */ |
| 139 | *rrd_pda = rf_AllocPDAList(1); |
| 140 | rf_MapLogParityLogging(raidPtr, regionID, 0, |
| 141 | &((*rrd_pda)->col), &((*rrd_pda)->startSector)); |
| 142 | (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity; |
| 143 | |
| 144 | if ((*rrd_pda)->next) { |
| 145 | (*rrd_pda)->next = NULL; |
| 146 | printf("set rrd_pda->next to NULL\n" ); |
| 147 | } |
| 148 | /* initialize DAG parameters */ |
| 149 | RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); |
| 150 | memset((char *) tracerec, 0, sizeof(RF_AccTraceEntry_t)); |
| 151 | (*rrd_dag_h)->tracerec = tracerec; |
| 152 | rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0]; |
| 153 | rrd_rdNode->params[0].p = *rrd_pda; |
| 154 | /* rrd_rdNode->params[1] = regionBuffer; */ |
| 155 | rrd_rdNode->params[2].v = 0; |
| 156 | rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); |
| 157 | |
| 158 | /* launch region log read dag */ |
| 159 | rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, |
| 160 | (void *) rrd_mcpair); |
| 161 | } |
| 162 | |
| 163 | |
| 164 | |
| 165 | static void |
| 166 | WriteCoreLog( |
| 167 | RF_ParityLog_t * log, |
| 168 | RF_MCPair_t * fwr_mcpair, |
| 169 | RF_Raid_t * raidPtr, |
| 170 | RF_DagHeader_t ** fwr_dag_h, |
| 171 | RF_AllocListElem_t ** fwr_alloclist, |
| 172 | RF_PhysDiskAddr_t ** fwr_pda) |
| 173 | { |
| 174 | RF_RegionId_t regionID = log->regionID; |
| 175 | RF_AccTraceEntry_t *tracerec; |
| 176 | RF_SectorNum_t regionOffset; |
| 177 | RF_DagNode_t *fwr_wrNode; |
| 178 | |
| 179 | /* Initiate the write of a core log to a region log disk. Once |
| 180 | * initiated, return to the calling routine. |
| 181 | * |
| 182 | * NON-BLOCKING */ |
| 183 | |
| 184 | /* create DAG to write a core log to a region log disk */ |
| 185 | rf_MakeAllocList(*fwr_alloclist); |
| 186 | *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, |
| 187 | rf_DiskWriteFunc, rf_DiskWriteUndoFunc, |
| 188 | "Wcl" , *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); |
| 189 | |
| 190 | /* create and initialize PDA for the region log */ |
| 191 | /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t |
| 192 | * *)); */ |
| 193 | *fwr_pda = rf_AllocPDAList(1); |
| 194 | regionOffset = log->diskOffset; |
| 195 | rf_MapLogParityLogging(raidPtr, regionID, regionOffset, |
| 196 | &((*fwr_pda)->col), |
| 197 | &((*fwr_pda)->startSector)); |
| 198 | (*fwr_pda)->numSector = raidPtr->numSectorsPerLog; |
| 199 | |
| 200 | /* initialize DAG parameters */ |
| 201 | RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); |
| 202 | memset((char *) tracerec, 0, sizeof(RF_AccTraceEntry_t)); |
| 203 | (*fwr_dag_h)->tracerec = tracerec; |
| 204 | fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0]; |
| 205 | fwr_wrNode->params[0].p = *fwr_pda; |
| 206 | /* fwr_wrNode->params[1] = log->bufPtr; */ |
| 207 | fwr_wrNode->params[2].v = 0; |
| 208 | fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); |
| 209 | |
| 210 | /* launch the dag to write the core log to disk */ |
| 211 | rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, |
| 212 | (void *) fwr_mcpair); |
| 213 | } |
| 214 | |
| 215 | |
| 216 | static void |
| 217 | ReadRegionParity( |
| 218 | RF_RegionId_t regionID, |
| 219 | RF_MCPair_t * prd_mcpair, |
| 220 | void *parityBuffer, |
| 221 | RF_Raid_t * raidPtr, |
| 222 | RF_DagHeader_t ** prd_dag_h, |
| 223 | RF_AllocListElem_t ** prd_alloclist, |
| 224 | RF_PhysDiskAddr_t ** prd_pda) |
| 225 | { |
| 226 | /* Initiate the read region parity from disk. Once initiated, return |
| 227 | * to the calling routine. |
| 228 | * |
| 229 | * NON-BLOCKING */ |
| 230 | |
| 231 | RF_AccTraceEntry_t *tracerec; |
| 232 | RF_DagNode_t *prd_rdNode; |
| 233 | |
| 234 | /* create DAG to read region parity from disk */ |
| 235 | rf_MakeAllocList(*prd_alloclist); |
| 236 | *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, |
| 237 | rf_DiskReadUndoFunc, "Rrp" , |
| 238 | *prd_alloclist, RF_DAG_FLAGS_NONE, |
| 239 | RF_IO_NORMAL_PRIORITY); |
| 240 | |
| 241 | /* create and initialize PDA for region parity */ |
| 242 | /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t |
| 243 | * *)); */ |
| 244 | *prd_pda = rf_AllocPDAList(1); |
| 245 | rf_MapRegionParity(raidPtr, regionID, |
| 246 | &((*prd_pda)->col), &((*prd_pda)->startSector), |
| 247 | &((*prd_pda)->numSector)); |
| 248 | if (rf_parityLogDebug) |
| 249 | printf("[reading %d sectors of parity from region %d]\n" , |
| 250 | (int) (*prd_pda)->numSector, regionID); |
| 251 | if ((*prd_pda)->next) { |
| 252 | (*prd_pda)->next = NULL; |
| 253 | printf("set prd_pda->next to NULL\n" ); |
| 254 | } |
| 255 | /* initialize DAG parameters */ |
| 256 | RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); |
| 257 | memset((char *) tracerec, 0, sizeof(RF_AccTraceEntry_t)); |
| 258 | (*prd_dag_h)->tracerec = tracerec; |
| 259 | prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0]; |
| 260 | prd_rdNode->params[0].p = *prd_pda; |
| 261 | prd_rdNode->params[1].p = parityBuffer; |
| 262 | prd_rdNode->params[2].v = 0; |
| 263 | prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); |
| 264 | #if RF_DEBUG_VALIDATE_DAG |
| 265 | if (rf_validateDAGDebug) |
| 266 | rf_ValidateDAG(*prd_dag_h); |
| 267 | #endif |
| 268 | /* launch region parity read dag */ |
| 269 | rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, |
| 270 | (void *) prd_mcpair); |
| 271 | } |
| 272 | |
| 273 | static void |
| 274 | WriteRegionParity( |
| 275 | RF_RegionId_t regionID, |
| 276 | RF_MCPair_t * pwr_mcpair, |
| 277 | void *parityBuffer, |
| 278 | RF_Raid_t * raidPtr, |
| 279 | RF_DagHeader_t ** pwr_dag_h, |
| 280 | RF_AllocListElem_t ** pwr_alloclist, |
| 281 | RF_PhysDiskAddr_t ** pwr_pda) |
| 282 | { |
| 283 | /* Initiate the write of region parity to disk. Once initiated, return |
| 284 | * to the calling routine. |
| 285 | * |
| 286 | * NON-BLOCKING */ |
| 287 | |
| 288 | RF_AccTraceEntry_t *tracerec; |
| 289 | RF_DagNode_t *pwr_wrNode; |
| 290 | |
| 291 | /* create DAG to write region log from disk */ |
| 292 | rf_MakeAllocList(*pwr_alloclist); |
| 293 | *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, |
| 294 | rf_DiskWriteFunc, rf_DiskWriteUndoFunc, |
| 295 | "Wrp" , *pwr_alloclist, |
| 296 | RF_DAG_FLAGS_NONE, |
| 297 | RF_IO_NORMAL_PRIORITY); |
| 298 | |
| 299 | /* create and initialize PDA for region parity */ |
| 300 | /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t |
| 301 | * *)); */ |
| 302 | *pwr_pda = rf_AllocPDAList(1); |
| 303 | rf_MapRegionParity(raidPtr, regionID, |
| 304 | &((*pwr_pda)->col), &((*pwr_pda)->startSector), |
| 305 | &((*pwr_pda)->numSector)); |
| 306 | |
| 307 | /* initialize DAG parameters */ |
| 308 | RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); |
| 309 | memset((char *) tracerec, 0, sizeof(RF_AccTraceEntry_t)); |
| 310 | (*pwr_dag_h)->tracerec = tracerec; |
| 311 | pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0]; |
| 312 | pwr_wrNode->params[0].p = *pwr_pda; |
| 313 | /* pwr_wrNode->params[1] = parityBuffer; */ |
| 314 | pwr_wrNode->params[2].v = 0; |
| 315 | pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); |
| 316 | |
| 317 | /* launch the dag to write region parity to disk */ |
| 318 | rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, |
| 319 | (void *) pwr_mcpair); |
| 320 | } |
| 321 | |
| 322 | static void |
| 323 | FlushLogsToDisk( |
| 324 | RF_Raid_t * raidPtr, |
| 325 | RF_ParityLog_t * logList) |
| 326 | { |
| 327 | /* Flush a linked list of core logs to the log disk. Logs contain the |
| 328 | * disk location where they should be written. Logs were written in |
| 329 | * FIFO order and that order must be preserved. |
| 330 | * |
| 331 | * Recommended optimizations: 1) allow multiple flushes to occur |
| 332 | * simultaneously 2) coalesce contiguous flush operations |
| 333 | * |
| 334 | * BLOCKING */ |
| 335 | |
| 336 | RF_ParityLog_t *log; |
| 337 | RF_RegionId_t regionID; |
| 338 | RF_MCPair_t *fwr_mcpair; |
| 339 | RF_DagHeader_t *fwr_dag_h; |
| 340 | RF_AllocListElem_t *fwr_alloclist; |
| 341 | RF_PhysDiskAddr_t *fwr_pda; |
| 342 | |
| 343 | fwr_mcpair = rf_AllocMCPair(); |
| 344 | RF_LOCK_MCPAIR(fwr_mcpair); |
| 345 | |
| 346 | RF_ASSERT(logList); |
| 347 | log = logList; |
| 348 | while (log) { |
| 349 | regionID = log->regionID; |
| 350 | |
| 351 | /* create and launch a DAG to write the core log */ |
| 352 | if (rf_parityLogDebug) |
| 353 | printf("[initiating write of core log for region %d]\n" , regionID); |
| 354 | fwr_mcpair->flag = RF_FALSE; |
| 355 | WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, |
| 356 | &fwr_alloclist, &fwr_pda); |
| 357 | |
| 358 | /* wait for the DAG to complete */ |
| 359 | while (!fwr_mcpair->flag) |
| 360 | RF_WAIT_MCPAIR(fwr_mcpair); |
| 361 | if (fwr_dag_h->status != rf_enable) { |
| 362 | RF_ERRORMSG1("Unable to write core log to disk (region %d)\n" , regionID); |
| 363 | RF_ASSERT(0); |
| 364 | } |
| 365 | /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */ |
| 366 | rf_FreePhysDiskAddr(fwr_pda); |
| 367 | rf_FreeDAG(fwr_dag_h); |
| 368 | rf_FreeAllocList(fwr_alloclist); |
| 369 | |
| 370 | log = log->next; |
| 371 | } |
| 372 | RF_UNLOCK_MCPAIR(fwr_mcpair); |
| 373 | rf_FreeMCPair(fwr_mcpair); |
| 374 | rf_ReleaseParityLogs(raidPtr, logList); |
| 375 | } |
| 376 | |
| 377 | static void |
| 378 | ReintegrateRegion( |
| 379 | RF_Raid_t * raidPtr, |
| 380 | RF_RegionId_t regionID, |
| 381 | RF_ParityLog_t * coreLog) |
| 382 | { |
| 383 | RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair; |
| 384 | RF_DagHeader_t *rrd_dag_h = NULL, *prd_dag_h, *pwr_dag_h; |
| 385 | RF_AllocListElem_t *rrd_alloclist = NULL, *prd_alloclist, *pwr_alloclist; |
| 386 | RF_PhysDiskAddr_t *rrd_pda = NULL, *prd_pda, *pwr_pda; |
| 387 | void *parityBuffer, *regionBuffer = NULL; |
| 388 | |
| 389 | /* Reintegrate a region (regionID). |
| 390 | * |
| 391 | * 1. acquire region and parity buffers |
| 392 | * 2. read log from disk |
| 393 | * 3. read parity from disk |
| 394 | * 4. apply log to parity |
| 395 | * 5. apply core log to parity |
| 396 | * 6. write new parity to disk |
| 397 | * |
| 398 | * BLOCKING */ |
| 399 | |
| 400 | if (rf_parityLogDebug) |
| 401 | printf("[reintegrating region %d]\n" , regionID); |
| 402 | |
| 403 | /* initiate read of region parity */ |
| 404 | if (rf_parityLogDebug) |
| 405 | printf("[initiating read of parity for region %d]\n" ,regionID); |
| 406 | parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool); |
| 407 | prd_mcpair = rf_AllocMCPair(); |
| 408 | RF_LOCK_MCPAIR(prd_mcpair); |
| 409 | prd_mcpair->flag = RF_FALSE; |
| 410 | ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, |
| 411 | &prd_dag_h, &prd_alloclist, &prd_pda); |
| 412 | |
| 413 | /* if region log nonempty, initiate read */ |
| 414 | if (raidPtr->regionInfo[regionID].diskCount > 0) { |
| 415 | if (rf_parityLogDebug) |
| 416 | printf("[initiating read of disk log for region %d]\n" , |
| 417 | regionID); |
| 418 | regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool); |
| 419 | rrd_mcpair = rf_AllocMCPair(); |
| 420 | RF_LOCK_MCPAIR(rrd_mcpair); |
| 421 | rrd_mcpair->flag = RF_FALSE; |
| 422 | ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, |
| 423 | &rrd_dag_h, &rrd_alloclist, &rrd_pda); |
| 424 | } |
| 425 | /* wait on read of region parity to complete */ |
| 426 | while (!prd_mcpair->flag) { |
| 427 | RF_WAIT_MCPAIR(prd_mcpair); |
| 428 | } |
| 429 | RF_UNLOCK_MCPAIR(prd_mcpair); |
| 430 | if (prd_dag_h->status != rf_enable) { |
| 431 | RF_ERRORMSG("Unable to read parity from disk\n" ); |
| 432 | /* add code to fail the parity disk */ |
| 433 | RF_ASSERT(0); |
| 434 | } |
| 435 | /* apply core log to parity */ |
| 436 | /* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */ |
| 437 | |
| 438 | if (raidPtr->regionInfo[regionID].diskCount > 0) { |
| 439 | /* wait on read of region log to complete */ |
| 440 | while (!rrd_mcpair->flag) |
| 441 | RF_WAIT_MCPAIR(rrd_mcpair); |
| 442 | RF_UNLOCK_MCPAIR(rrd_mcpair); |
| 443 | if (rrd_dag_h->status != rf_enable) { |
| 444 | RF_ERRORMSG("Unable to read region log from disk\n" ); |
| 445 | /* add code to fail the log disk */ |
| 446 | RF_ASSERT(0); |
| 447 | } |
| 448 | /* apply region log to parity */ |
| 449 | /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */ |
| 450 | /* release resources associated with region log */ |
| 451 | /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */ |
| 452 | rf_FreePhysDiskAddr(rrd_pda); |
| 453 | rf_FreeDAG(rrd_dag_h); |
| 454 | rf_FreeAllocList(rrd_alloclist); |
| 455 | rf_FreeMCPair(rrd_mcpair); |
| 456 | ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer); |
| 457 | } |
| 458 | /* write reintegrated parity to disk */ |
| 459 | if (rf_parityLogDebug) |
| 460 | printf("[initiating write of parity for region %d]\n" , |
| 461 | regionID); |
| 462 | pwr_mcpair = rf_AllocMCPair(); |
| 463 | RF_LOCK_MCPAIR(pwr_mcpair); |
| 464 | pwr_mcpair->flag = RF_FALSE; |
| 465 | WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, |
| 466 | &pwr_dag_h, &pwr_alloclist, &pwr_pda); |
| 467 | while (!pwr_mcpair->flag) |
| 468 | RF_WAIT_MCPAIR(pwr_mcpair); |
| 469 | RF_UNLOCK_MCPAIR(pwr_mcpair); |
| 470 | if (pwr_dag_h->status != rf_enable) { |
| 471 | RF_ERRORMSG("Unable to write parity to disk\n" ); |
| 472 | /* add code to fail the parity disk */ |
| 473 | RF_ASSERT(0); |
| 474 | } |
| 475 | /* release resources associated with read of old parity */ |
| 476 | /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */ |
| 477 | rf_FreePhysDiskAddr(prd_pda); |
| 478 | rf_FreeDAG(prd_dag_h); |
| 479 | rf_FreeAllocList(prd_alloclist); |
| 480 | rf_FreeMCPair(prd_mcpair); |
| 481 | |
| 482 | /* release resources associated with write of new parity */ |
| 483 | ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer); |
| 484 | /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */ |
| 485 | rf_FreePhysDiskAddr(pwr_pda); |
| 486 | rf_FreeDAG(pwr_dag_h); |
| 487 | rf_FreeAllocList(pwr_alloclist); |
| 488 | rf_FreeMCPair(pwr_mcpair); |
| 489 | |
| 490 | if (rf_parityLogDebug) |
| 491 | printf("[finished reintegrating region %d]\n" , regionID); |
| 492 | } |
| 493 | |
| 494 | |
| 495 | |
| 496 | static void |
| 497 | ReintegrateLogs( |
| 498 | RF_Raid_t * raidPtr, |
| 499 | RF_ParityLog_t * logList) |
| 500 | { |
| 501 | RF_ParityLog_t *log, *freeLogList = NULL; |
| 502 | RF_ParityLogData_t *logData, *logDataList; |
| 503 | RF_RegionId_t regionID; |
| 504 | |
| 505 | RF_ASSERT(logList); |
| 506 | while (logList) { |
| 507 | log = logList; |
| 508 | logList = logList->next; |
| 509 | log->next = NULL; |
| 510 | regionID = log->regionID; |
| 511 | ReintegrateRegion(raidPtr, regionID, log); |
| 512 | log->numRecords = 0; |
| 513 | |
| 514 | /* remove all items which are blocked on reintegration of this |
| 515 | * region */ |
| 516 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 517 | logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, |
| 518 | &raidPtr->parityLogDiskQueue.reintBlockHead, |
| 519 | &raidPtr->parityLogDiskQueue.reintBlockTail, |
| 520 | RF_TRUE); |
| 521 | logDataList = logData; |
| 522 | while (logData) { |
| 523 | logData->next = rf_SearchAndDequeueParityLogData( |
| 524 | raidPtr, regionID, |
| 525 | &raidPtr->parityLogDiskQueue.reintBlockHead, |
| 526 | &raidPtr->parityLogDiskQueue.reintBlockTail, |
| 527 | RF_TRUE); |
| 528 | logData = logData->next; |
| 529 | } |
| 530 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 531 | |
| 532 | /* process blocked log data and clear reintInProgress flag for |
| 533 | * this region */ |
| 534 | if (logDataList) |
| 535 | rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE); |
| 536 | else { |
| 537 | /* Enable flushing for this region. Holding both |
| 538 | * locks provides a synchronization barrier with |
| 539 | * DumpParityLogToDisk */ |
| 540 | rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); |
| 541 | rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex); |
| 542 | /* XXXmrg: don't need this? */ |
| 543 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 544 | raidPtr->regionInfo[regionID].diskCount = 0; |
| 545 | raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; |
| 546 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); |
| 547 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now |
| 548 | * enabled */ |
| 549 | /* XXXmrg: don't need this? */ |
| 550 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 551 | } |
| 552 | /* if log wasn't used, attach it to the list of logs to be |
| 553 | * returned */ |
| 554 | if (log) { |
| 555 | log->next = freeLogList; |
| 556 | freeLogList = log; |
| 557 | } |
| 558 | } |
| 559 | if (freeLogList) |
| 560 | rf_ReleaseParityLogs(raidPtr, freeLogList); |
| 561 | } |
| 562 | |
| 563 | int |
| 564 | rf_ShutdownLogging(RF_Raid_t * raidPtr) |
| 565 | { |
| 566 | /* shutdown parity logging 1) disable parity logging in all regions 2) |
| 567 | * reintegrate all regions */ |
| 568 | |
| 569 | RF_SectorCount_t diskCount; |
| 570 | RF_RegionId_t regionID; |
| 571 | RF_ParityLog_t *log; |
| 572 | |
| 573 | if (rf_parityLogDebug) |
| 574 | printf("[shutting down parity logging]\n" ); |
| 575 | /* Since parity log maps are volatile, we must reintegrate all |
| 576 | * regions. */ |
| 577 | if (rf_forceParityLogReint) { |
| 578 | for (regionID = 0; regionID < rf_numParityRegions; regionID++) { |
| 579 | rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); |
| 580 | raidPtr->regionInfo[regionID].loggingEnabled = |
| 581 | RF_FALSE; |
| 582 | log = raidPtr->regionInfo[regionID].coreLog; |
| 583 | raidPtr->regionInfo[regionID].coreLog = NULL; |
| 584 | diskCount = raidPtr->regionInfo[regionID].diskCount; |
| 585 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); |
| 586 | if (diskCount > 0 || log != NULL) |
| 587 | ReintegrateRegion(raidPtr, regionID, log); |
| 588 | if (log != NULL) |
| 589 | rf_ReleaseParityLogs(raidPtr, log); |
| 590 | } |
| 591 | } |
| 592 | if (rf_parityLogDebug) { |
| 593 | printf("[parity logging disabled]\n" ); |
| 594 | printf("[should be done!]\n" ); |
| 595 | } |
| 596 | return (0); |
| 597 | } |
| 598 | |
| 599 | int |
| 600 | rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) |
| 601 | { |
| 602 | RF_ParityLog_t *reintQueue, *flushQueue; |
| 603 | int workNeeded, done = RF_FALSE; |
| 604 | int s; |
| 605 | |
| 606 | /* Main program for parity logging disk thread. This routine waits |
| 607 | * for work to appear in either the flush or reintegration queues and |
| 608 | * is responsible for flushing core logs to the log disk as well as |
| 609 | * reintegrating parity regions. |
| 610 | * |
| 611 | * BLOCKING */ |
| 612 | |
| 613 | s = splbio(); |
| 614 | |
| 615 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 616 | |
| 617 | /* |
| 618 | * Inform our creator that we're running. Don't bother doing the |
| 619 | * mutex lock/unlock dance- we locked above, and we'll unlock |
| 620 | * below with nothing to do, yet. |
| 621 | */ |
| 622 | raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING; |
| 623 | rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); |
| 624 | |
| 625 | /* empty the work queues */ |
| 626 | flushQueue = raidPtr->parityLogDiskQueue.flushQueue; |
| 627 | raidPtr->parityLogDiskQueue.flushQueue = NULL; |
| 628 | reintQueue = raidPtr->parityLogDiskQueue.reintQueue; |
| 629 | raidPtr->parityLogDiskQueue.reintQueue = NULL; |
| 630 | workNeeded = (flushQueue || reintQueue); |
| 631 | |
| 632 | while (!done) { |
| 633 | while (workNeeded) { |
| 634 | /* First, flush all logs in the flush queue, freeing |
| 635 | * buffers Second, reintegrate all regions which are |
| 636 | * reported as full. Third, append queued log data |
| 637 | * until blocked. |
| 638 | * |
| 639 | * Note: Incoming appends (ParityLogAppend) can block on |
| 640 | * either 1. empty buffer pool 2. region under |
| 641 | * reintegration To preserve a global FIFO ordering of |
| 642 | * appends, buffers are not released to the world |
| 643 | * until those appends blocked on buffers are removed |
| 644 | * from the append queue. Similarly, regions which |
| 645 | * are reintegrated are not opened for general use |
| 646 | * until the append queue has been emptied. */ |
| 647 | |
| 648 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 649 | |
| 650 | /* empty flushQueue, using free'd log buffers to |
| 651 | * process bufTail */ |
| 652 | if (flushQueue) |
| 653 | FlushLogsToDisk(raidPtr, flushQueue); |
| 654 | |
| 655 | /* empty reintQueue, flushing from reintTail as we go */ |
| 656 | if (reintQueue) |
| 657 | ReintegrateLogs(raidPtr, reintQueue); |
| 658 | |
| 659 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 660 | flushQueue = raidPtr->parityLogDiskQueue.flushQueue; |
| 661 | raidPtr->parityLogDiskQueue.flushQueue = NULL; |
| 662 | reintQueue = raidPtr->parityLogDiskQueue.reintQueue; |
| 663 | raidPtr->parityLogDiskQueue.reintQueue = NULL; |
| 664 | workNeeded = (flushQueue || reintQueue); |
| 665 | } |
| 666 | /* no work is needed at this point */ |
| 667 | if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) { |
| 668 | /* shutdown parity logging 1. disable parity logging |
| 669 | * in all regions 2. reintegrate all regions */ |
| 670 | done = RF_TRUE; /* thread disabled, no work needed */ |
| 671 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 672 | rf_ShutdownLogging(raidPtr); |
| 673 | } |
| 674 | if (!done) { |
| 675 | /* thread enabled, no work needed, so sleep */ |
| 676 | if (rf_parityLogDebug) |
| 677 | printf("[parity logging disk manager sleeping]\n" ); |
| 678 | rf_wait_cond2(raidPtr->parityLogDiskQueue.cond, |
| 679 | raidPtr->parityLogDiskQueue.mutex); |
| 680 | if (rf_parityLogDebug) |
| 681 | printf("[parity logging disk manager just woke up]\n" ); |
| 682 | flushQueue = raidPtr->parityLogDiskQueue.flushQueue; |
| 683 | raidPtr->parityLogDiskQueue.flushQueue = NULL; |
| 684 | reintQueue = raidPtr->parityLogDiskQueue.reintQueue; |
| 685 | raidPtr->parityLogDiskQueue.reintQueue = NULL; |
| 686 | workNeeded = (flushQueue || reintQueue); |
| 687 | } |
| 688 | } |
| 689 | /* |
| 690 | * Announce that we're done. |
| 691 | */ |
| 692 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 693 | raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN; |
| 694 | rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); |
| 695 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 696 | |
| 697 | splx(s); |
| 698 | |
| 699 | /* |
| 700 | * In the NetBSD kernel, the thread must exit; returning would |
| 701 | * cause the proc trampoline to attempt to return to userspace. |
| 702 | */ |
| 703 | kthread_exit(0); /* does not return */ |
| 704 | } |
| 705 | #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ |
| 706 | |