| 1 | /* $NetBSD: rf_dagdegwr.c,v 1.33 2014/03/23 03:42:39 christos Exp $ */ |
| 2 | /* |
| 3 | * Copyright (c) 1995 Carnegie-Mellon University. |
| 4 | * All rights reserved. |
| 5 | * |
| 6 | * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II |
| 7 | * |
| 8 | * Permission to use, copy, modify and distribute this software and |
| 9 | * its documentation is hereby granted, provided that both the copyright |
| 10 | * notice and this permission notice appear in all copies of the |
| 11 | * software, derivative works or modified versions, and any portions |
| 12 | * thereof, and that both notices appear in supporting documentation. |
| 13 | * |
| 14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
| 15 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
| 16 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
| 17 | * |
| 18 | * Carnegie Mellon requests users of this software to return to |
| 19 | * |
| 20 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
| 21 | * School of Computer Science |
| 22 | * Carnegie Mellon University |
| 23 | * Pittsburgh PA 15213-3890 |
| 24 | * |
| 25 | * any improvements or extensions that they make and grant Carnegie the |
| 26 | * rights to redistribute these changes. |
| 27 | */ |
| 28 | |
| 29 | /* |
| 30 | * rf_dagdegwr.c |
| 31 | * |
| 32 | * code for creating degraded write DAGs |
| 33 | * |
| 34 | */ |
| 35 | |
| 36 | #include <sys/cdefs.h> |
| 37 | __KERNEL_RCSID(0, "$NetBSD: rf_dagdegwr.c,v 1.33 2014/03/23 03:42:39 christos Exp $" ); |
| 38 | |
| 39 | #include <dev/raidframe/raidframevar.h> |
| 40 | |
| 41 | #include "rf_raid.h" |
| 42 | #include "rf_dag.h" |
| 43 | #include "rf_dagutils.h" |
| 44 | #include "rf_dagfuncs.h" |
| 45 | #include "rf_debugMem.h" |
| 46 | #include "rf_general.h" |
| 47 | #include "rf_dagdegwr.h" |
| 48 | #include "rf_map.h" |
| 49 | |
| 50 | |
| 51 | /****************************************************************************** |
| 52 | * |
| 53 | * General comments on DAG creation: |
| 54 | * |
| 55 | * All DAGs in this file use roll-away error recovery. Each DAG has a single |
| 56 | * commit node, usually called "Cmt." If an error occurs before the Cmt node |
| 57 | * is reached, the execution engine will halt forward execution and work |
| 58 | * backward through the graph, executing the undo functions. Assuming that |
| 59 | * each node in the graph prior to the Cmt node are undoable and atomic - or - |
| 60 | * does not make changes to permanent state, the graph will fail atomically. |
| 61 | * If an error occurs after the Cmt node executes, the engine will roll-forward |
| 62 | * through the graph, blindly executing nodes until it reaches the end. |
| 63 | * If a graph reaches the end, it is assumed to have completed successfully. |
| 64 | * |
| 65 | * A graph has only 1 Cmt node. |
| 66 | * |
| 67 | */ |
| 68 | |
| 69 | |
| 70 | /****************************************************************************** |
| 71 | * |
| 72 | * The following wrappers map the standard DAG creation interface to the |
| 73 | * DAG creation routines. Additionally, these wrappers enable experimentation |
| 74 | * with new DAG structures by providing an extra level of indirection, allowing |
| 75 | * the DAG creation routines to be replaced at this single point. |
| 76 | */ |
| 77 | |
| 78 | static |
| 79 | RF_CREATE_DAG_FUNC_DECL(rf_CreateSimpleDegradedWriteDAG) |
| 80 | { |
| 81 | rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, |
| 82 | flags, allocList, 1, rf_RecoveryXorFunc, RF_TRUE); |
| 83 | } |
| 84 | |
| 85 | void |
| 86 | rf_CreateDegradedWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, |
| 87 | RF_DagHeader_t *dag_h, void *bp, |
| 88 | RF_RaidAccessFlags_t flags, |
| 89 | RF_AllocListElem_t *allocList) |
| 90 | { |
| 91 | |
| 92 | RF_ASSERT(asmap->numDataFailed == 1); |
| 93 | dag_h->creator = "DegradedWriteDAG" ; |
| 94 | |
| 95 | /* |
| 96 | * if the access writes only a portion of the failed unit, and also |
| 97 | * writes some portion of at least one surviving unit, we create two |
| 98 | * DAGs, one for the failed component and one for the non-failed |
| 99 | * component, and do them sequentially. Note that the fact that we're |
| 100 | * accessing only a portion of the failed unit indicates that the |
| 101 | * access either starts or ends in the failed unit, and hence we need |
| 102 | * create only two dags. This is inefficient in that the same data or |
| 103 | * parity can get read and written twice using this structure. I need |
| 104 | * to fix this to do the access all at once. |
| 105 | */ |
| 106 | RF_ASSERT(!(asmap->numStripeUnitsAccessed != 1 && |
| 107 | asmap->failedPDAs[0]->numSector != |
| 108 | raidPtr->Layout.sectorsPerStripeUnit)); |
| 109 | rf_CreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, |
| 110 | allocList); |
| 111 | } |
| 112 | |
| 113 | |
| 114 | |
| 115 | /****************************************************************************** |
| 116 | * |
| 117 | * DAG creation code begins here |
| 118 | */ |
| 119 | |
| 120 | |
| 121 | |
| 122 | /****************************************************************************** |
| 123 | * |
| 124 | * CommonCreateSimpleDegradedWriteDAG -- creates a DAG to do a degraded-mode |
| 125 | * write, which is as follows |
| 126 | * |
| 127 | * / {Wnq} --\ |
| 128 | * hdr -> blockNode -> Rod -> Xor -> Cmt -> Wnp ----> unblock -> term |
| 129 | * \ {Rod} / \ Wnd ---/ |
| 130 | * \ {Wnd} -/ |
| 131 | * |
| 132 | * commit nodes: Xor, Wnd |
| 133 | * |
| 134 | * IMPORTANT: |
| 135 | * This DAG generator does not work for double-degraded archs since it does not |
| 136 | * generate Q |
| 137 | * |
| 138 | * This dag is essentially identical to the large-write dag, except that the |
| 139 | * write to the failed data unit is suppressed. |
| 140 | * |
| 141 | * IMPORTANT: this dag does not work in the case where the access writes only |
| 142 | * a portion of the failed unit, and also writes some portion of at least one |
| 143 | * surviving SU. this case is handled in CreateDegradedWriteDAG above. |
| 144 | * |
| 145 | * The block & unblock nodes are leftovers from a previous version. They |
| 146 | * do nothing, but I haven't deleted them because it would be a tremendous |
| 147 | * effort to put them back in. |
| 148 | * |
| 149 | * This dag is used whenever a one of the data units in a write has failed. |
| 150 | * If it is the parity unit that failed, the nonredundant write dag (below) |
| 151 | * is used. |
| 152 | *****************************************************************************/ |
| 153 | |
| 154 | void |
| 155 | rf_CommonCreateSimpleDegradedWriteDAG(RF_Raid_t *raidPtr, |
| 156 | RF_AccessStripeMap_t *asmap, |
| 157 | RF_DagHeader_t *dag_h, void *bp, |
| 158 | RF_RaidAccessFlags_t flags, |
| 159 | RF_AllocListElem_t *allocList, |
| 160 | int nfaults, |
| 161 | int (*redFunc) (RF_DagNode_t *), |
| 162 | int allowBufferRecycle) |
| 163 | { |
| 164 | int nRrdNodes, nWndNodes, nXorBufs, i, j, paramNum, |
| 165 | rdnodesFaked; |
| 166 | RF_DagNode_t *blockNode, *unblockNode, *wnpNode, *termNode; |
| 167 | #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) |
| 168 | RF_DagNode_t *wnqNode; |
| 169 | #endif |
| 170 | RF_DagNode_t *wndNodes, *rrdNodes, *xorNode, *commitNode; |
| 171 | RF_DagNode_t *tmpNode, *tmpwndNode, *tmprrdNode; |
| 172 | RF_SectorCount_t sectorsPerSU; |
| 173 | RF_ReconUnitNum_t which_ru; |
| 174 | char *xorTargetBuf = NULL; /* the target buffer for the XOR |
| 175 | * operation */ |
| 176 | char overlappingPDAs[RF_MAXCOL];/* a temporary array of flags */ |
| 177 | RF_AccessStripeMapHeader_t *new_asm_h[2]; |
| 178 | RF_PhysDiskAddr_t *pda, *parityPDA; |
| 179 | RF_StripeNum_t parityStripeID; |
| 180 | RF_PhysDiskAddr_t *failedPDA; |
| 181 | RF_RaidLayout_t *layoutPtr; |
| 182 | |
| 183 | layoutPtr = &(raidPtr->Layout); |
| 184 | parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, |
| 185 | &which_ru); |
| 186 | sectorsPerSU = layoutPtr->sectorsPerStripeUnit; |
| 187 | /* failedPDA points to the pda within the asm that targets the failed |
| 188 | * disk */ |
| 189 | failedPDA = asmap->failedPDAs[0]; |
| 190 | |
| 191 | #if RF_DEBUG_DAG |
| 192 | if (rf_dagDebug) |
| 193 | printf("[Creating degraded-write DAG]\n" ); |
| 194 | #endif |
| 195 | |
| 196 | RF_ASSERT(asmap->numDataFailed == 1); |
| 197 | dag_h->creator = "SimpleDegradedWriteDAG" ; |
| 198 | |
| 199 | /* |
| 200 | * Generate two ASMs identifying the surviving data |
| 201 | * we need in order to recover the lost data. |
| 202 | */ |
| 203 | /* overlappingPDAs array must be zero'd */ |
| 204 | memset(overlappingPDAs, 0, RF_MAXCOL); |
| 205 | rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, |
| 206 | &nXorBufs, NULL, overlappingPDAs, allocList); |
| 207 | |
| 208 | /* create all the nodes at once */ |
| 209 | nWndNodes = asmap->numStripeUnitsAccessed - 1; /* no access is |
| 210 | * generated for the |
| 211 | * failed pda */ |
| 212 | |
| 213 | nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + |
| 214 | ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); |
| 215 | /* |
| 216 | * XXX |
| 217 | * |
| 218 | * There's a bug with a complete stripe overwrite- that means 0 reads |
| 219 | * of old data, and the rest of the DAG generation code doesn't like |
| 220 | * that. A release is coming, and I don't wanna risk breaking a critical |
| 221 | * DAG generator, so here's what I'm gonna do- if there's no read nodes, |
| 222 | * I'm gonna fake there being a read node, and I'm gonna swap in a |
| 223 | * no-op node in its place (to make all the link-up code happy). |
| 224 | * This should be fixed at some point. --jimz |
| 225 | */ |
| 226 | if (nRrdNodes == 0) { |
| 227 | nRrdNodes = 1; |
| 228 | rdnodesFaked = 1; |
| 229 | } else { |
| 230 | rdnodesFaked = 0; |
| 231 | } |
| 232 | |
| 233 | blockNode = rf_AllocDAGNode(); |
| 234 | blockNode->list_next = dag_h->nodes; |
| 235 | dag_h->nodes = blockNode; |
| 236 | |
| 237 | commitNode = rf_AllocDAGNode(); |
| 238 | commitNode->list_next = dag_h->nodes; |
| 239 | dag_h->nodes = commitNode; |
| 240 | |
| 241 | unblockNode = rf_AllocDAGNode(); |
| 242 | unblockNode->list_next = dag_h->nodes; |
| 243 | dag_h->nodes = unblockNode; |
| 244 | |
| 245 | termNode = rf_AllocDAGNode(); |
| 246 | termNode->list_next = dag_h->nodes; |
| 247 | dag_h->nodes = termNode; |
| 248 | |
| 249 | xorNode = rf_AllocDAGNode(); |
| 250 | xorNode->list_next = dag_h->nodes; |
| 251 | dag_h->nodes = xorNode; |
| 252 | |
| 253 | wnpNode = rf_AllocDAGNode(); |
| 254 | wnpNode->list_next = dag_h->nodes; |
| 255 | dag_h->nodes = wnpNode; |
| 256 | |
| 257 | for (i = 0; i < nWndNodes; i++) { |
| 258 | tmpNode = rf_AllocDAGNode(); |
| 259 | tmpNode->list_next = dag_h->nodes; |
| 260 | dag_h->nodes = tmpNode; |
| 261 | } |
| 262 | wndNodes = dag_h->nodes; |
| 263 | |
| 264 | for (i = 0; i < nRrdNodes; i++) { |
| 265 | tmpNode = rf_AllocDAGNode(); |
| 266 | tmpNode->list_next = dag_h->nodes; |
| 267 | dag_h->nodes = tmpNode; |
| 268 | } |
| 269 | rrdNodes = dag_h->nodes; |
| 270 | |
| 271 | #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) |
| 272 | if (nfaults == 2) { |
| 273 | wnqNode = rf_AllocDAGNode(); |
| 274 | wnqNode->list_next = dag_h->nodes; |
| 275 | dag_h->nodes = wnqNode; |
| 276 | } else { |
| 277 | wnqNode = NULL; |
| 278 | } |
| 279 | #endif |
| 280 | |
| 281 | /* this dag can not commit until all rrd and xor Nodes have completed */ |
| 282 | dag_h->numCommitNodes = 1; |
| 283 | dag_h->numCommits = 0; |
| 284 | dag_h->numSuccedents = 1; |
| 285 | |
| 286 | RF_ASSERT(nRrdNodes > 0); |
| 287 | rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, |
| 288 | NULL, nRrdNodes, 0, 0, 0, dag_h, "Nil" , allocList); |
| 289 | rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, |
| 290 | NULL, nWndNodes + nfaults, 1, 0, 0, dag_h, "Cmt" , allocList); |
| 291 | rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, |
| 292 | NULL, 1, nWndNodes + nfaults, 0, 0, dag_h, "Nil" , allocList); |
| 293 | rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, |
| 294 | NULL, 0, 1, 0, 0, dag_h, "Trm" , allocList); |
| 295 | rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, |
| 296 | nRrdNodes, 2 * nXorBufs + 2, nfaults, dag_h, "Xrc" , allocList); |
| 297 | |
| 298 | /* |
| 299 | * Fill in the Rrd nodes. If any of the rrd buffers are the same size as |
| 300 | * the failed buffer, save a pointer to it so we can use it as the target |
| 301 | * of the XOR. The pdas in the rrd nodes have been range-restricted, so if |
| 302 | * a buffer is the same size as the failed buffer, it must also be at the |
| 303 | * same alignment within the SU. |
| 304 | */ |
| 305 | i = 0; |
| 306 | tmprrdNode = rrdNodes; |
| 307 | if (new_asm_h[0]) { |
| 308 | for (i = 0, pda = new_asm_h[0]->stripeMap->physInfo; |
| 309 | i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed; |
| 310 | i++, pda = pda->next) { |
| 311 | rf_InitNode(tmprrdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, |
| 312 | rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd" , allocList); |
| 313 | RF_ASSERT(pda); |
| 314 | tmprrdNode->params[0].p = pda; |
| 315 | tmprrdNode->params[1].p = pda->bufPtr; |
| 316 | tmprrdNode->params[2].v = parityStripeID; |
| 317 | tmprrdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); |
| 318 | tmprrdNode = tmprrdNode->list_next; |
| 319 | } |
| 320 | } |
| 321 | /* i now equals the number of stripe units accessed in new_asm_h[0] */ |
| 322 | /* Note that for tmprrdNode, this means a continuation from above, so no need to |
| 323 | assign it anything.. */ |
| 324 | if (new_asm_h[1]) { |
| 325 | for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo; |
| 326 | j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed; |
| 327 | j++, pda = pda->next) { |
| 328 | rf_InitNode(tmprrdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, |
| 329 | rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd" , allocList); |
| 330 | RF_ASSERT(pda); |
| 331 | tmprrdNode->params[0].p = pda; |
| 332 | tmprrdNode->params[1].p = pda->bufPtr; |
| 333 | tmprrdNode->params[2].v = parityStripeID; |
| 334 | tmprrdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); |
| 335 | if (allowBufferRecycle && (pda->numSector == failedPDA->numSector)) |
| 336 | xorTargetBuf = pda->bufPtr; |
| 337 | tmprrdNode = tmprrdNode->list_next; |
| 338 | } |
| 339 | } |
| 340 | if (rdnodesFaked) { |
| 341 | /* |
| 342 | * This is where we'll init that fake noop read node |
| 343 | * (XXX should the wakeup func be different?) |
| 344 | */ |
| 345 | /* node that rrdNodes will just be a single node... */ |
| 346 | rf_InitNode(rrdNodes, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, |
| 347 | NULL, 1, 1, 0, 0, dag_h, "RrN" , allocList); |
| 348 | } |
| 349 | /* |
| 350 | * Make a PDA for the parity unit. The parity PDA should start at |
| 351 | * the same offset into the SU as the failed PDA. |
| 352 | */ |
| 353 | /* Danner comment: I don't think this copy is really necessary. We are |
| 354 | * in one of two cases here. (1) The entire failed unit is written. |
| 355 | * Then asmap->parityInfo will describe the entire parity. (2) We are |
| 356 | * only writing a subset of the failed unit and nothing else. Then the |
| 357 | * asmap->parityInfo describes the failed unit and the copy can also |
| 358 | * be avoided. */ |
| 359 | |
| 360 | parityPDA = rf_AllocPhysDiskAddr(); |
| 361 | parityPDA->next = dag_h->pda_cleanup_list; |
| 362 | dag_h->pda_cleanup_list = parityPDA; |
| 363 | parityPDA->col = asmap->parityInfo->col; |
| 364 | parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) |
| 365 | * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); |
| 366 | parityPDA->numSector = failedPDA->numSector; |
| 367 | |
| 368 | if (!xorTargetBuf) { |
| 369 | xorTargetBuf = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, failedPDA->numSector)); |
| 370 | } |
| 371 | /* init the Wnp node */ |
| 372 | rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, |
| 373 | rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp" , allocList); |
| 374 | wnpNode->params[0].p = parityPDA; |
| 375 | wnpNode->params[1].p = xorTargetBuf; |
| 376 | wnpNode->params[2].v = parityStripeID; |
| 377 | wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); |
| 378 | |
| 379 | #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) |
| 380 | /* fill in the Wnq Node */ |
| 381 | if (nfaults == 2) { |
| 382 | { |
| 383 | RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), |
| 384 | (RF_PhysDiskAddr_t *), allocList); |
| 385 | parityPDA->col = asmap->qInfo->col; |
| 386 | parityPDA->startSector = ((asmap->qInfo->startSector / sectorsPerSU) |
| 387 | * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); |
| 388 | parityPDA->numSector = failedPDA->numSector; |
| 389 | |
| 390 | rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, |
| 391 | rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq" , allocList); |
| 392 | wnqNode->params[0].p = parityPDA; |
| 393 | RF_MallocAndAdd(xorNode->results[1], |
| 394 | rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList); |
| 395 | wnqNode->params[1].p = xorNode->results[1]; |
| 396 | wnqNode->params[2].v = parityStripeID; |
| 397 | wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); |
| 398 | } |
| 399 | } |
| 400 | #endif |
| 401 | /* fill in the Wnd nodes */ |
| 402 | tmpwndNode = wndNodes; |
| 403 | for (pda = asmap->physInfo, i = 0; i < nWndNodes; i++, pda = pda->next) { |
| 404 | if (pda == failedPDA) { |
| 405 | i--; |
| 406 | continue; |
| 407 | } |
| 408 | rf_InitNode(tmpwndNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, |
| 409 | rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd" , allocList); |
| 410 | RF_ASSERT(pda); |
| 411 | tmpwndNode->params[0].p = pda; |
| 412 | tmpwndNode->params[1].p = pda->bufPtr; |
| 413 | tmpwndNode->params[2].v = parityStripeID; |
| 414 | tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); |
| 415 | tmpwndNode = tmpwndNode->list_next; |
| 416 | } |
| 417 | |
| 418 | /* fill in the results of the xor node */ |
| 419 | xorNode->results[0] = xorTargetBuf; |
| 420 | |
| 421 | /* fill in the params of the xor node */ |
| 422 | |
| 423 | paramNum = 0; |
| 424 | if (rdnodesFaked == 0) { |
| 425 | tmprrdNode = rrdNodes; |
| 426 | for (i = 0; i < nRrdNodes; i++) { |
| 427 | /* all the Rrd nodes need to be xored together */ |
| 428 | xorNode->params[paramNum++] = tmprrdNode->params[0]; |
| 429 | xorNode->params[paramNum++] = tmprrdNode->params[1]; |
| 430 | tmprrdNode = tmprrdNode->list_next; |
| 431 | } |
| 432 | } |
| 433 | tmpwndNode = wndNodes; |
| 434 | for (i = 0; i < nWndNodes; i++) { |
| 435 | /* any Wnd nodes that overlap the failed access need to be |
| 436 | * xored in */ |
| 437 | if (overlappingPDAs[i]) { |
| 438 | pda = rf_AllocPhysDiskAddr(); |
| 439 | memcpy((char *) pda, (char *) tmpwndNode->params[0].p, sizeof(RF_PhysDiskAddr_t)); |
| 440 | /* add it into the pda_cleanup_list *after* the copy, TYVM */ |
| 441 | pda->next = dag_h->pda_cleanup_list; |
| 442 | dag_h->pda_cleanup_list = pda; |
| 443 | rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); |
| 444 | xorNode->params[paramNum++].p = pda; |
| 445 | xorNode->params[paramNum++].p = pda->bufPtr; |
| 446 | } |
| 447 | tmpwndNode = tmpwndNode->list_next; |
| 448 | } |
| 449 | |
| 450 | /* |
| 451 | * Install the failed PDA into the xor param list so that the |
| 452 | * new data gets xor'd in. |
| 453 | */ |
| 454 | xorNode->params[paramNum++].p = failedPDA; |
| 455 | xorNode->params[paramNum++].p = failedPDA->bufPtr; |
| 456 | |
| 457 | /* |
| 458 | * The last 2 params to the recovery xor node are always the failed |
| 459 | * PDA and the raidPtr. install the failedPDA even though we have just |
| 460 | * done so above. This allows us to use the same XOR function for both |
| 461 | * degraded reads and degraded writes. |
| 462 | */ |
| 463 | xorNode->params[paramNum++].p = failedPDA; |
| 464 | xorNode->params[paramNum++].p = raidPtr; |
| 465 | RF_ASSERT(paramNum == 2 * nXorBufs + 2); |
| 466 | |
| 467 | /* |
| 468 | * Code to link nodes begins here |
| 469 | */ |
| 470 | |
| 471 | /* link header to block node */ |
| 472 | RF_ASSERT(blockNode->numAntecedents == 0); |
| 473 | dag_h->succedents[0] = blockNode; |
| 474 | |
| 475 | /* link block node to rd nodes */ |
| 476 | RF_ASSERT(blockNode->numSuccedents == nRrdNodes); |
| 477 | tmprrdNode = rrdNodes; |
| 478 | for (i = 0; i < nRrdNodes; i++) { |
| 479 | RF_ASSERT(tmprrdNode->numAntecedents == 1); |
| 480 | blockNode->succedents[i] = tmprrdNode; |
| 481 | tmprrdNode->antecedents[0] = blockNode; |
| 482 | tmprrdNode->antType[0] = rf_control; |
| 483 | tmprrdNode = tmprrdNode->list_next; |
| 484 | } |
| 485 | |
| 486 | /* link read nodes to xor node */ |
| 487 | RF_ASSERT(xorNode->numAntecedents == nRrdNodes); |
| 488 | tmprrdNode = rrdNodes; |
| 489 | for (i = 0; i < nRrdNodes; i++) { |
| 490 | RF_ASSERT(tmprrdNode->numSuccedents == 1); |
| 491 | tmprrdNode->succedents[0] = xorNode; |
| 492 | xorNode->antecedents[i] = tmprrdNode; |
| 493 | xorNode->antType[i] = rf_trueData; |
| 494 | tmprrdNode = tmprrdNode->list_next; |
| 495 | } |
| 496 | |
| 497 | /* link xor node to commit node */ |
| 498 | RF_ASSERT(xorNode->numSuccedents == 1); |
| 499 | RF_ASSERT(commitNode->numAntecedents == 1); |
| 500 | xorNode->succedents[0] = commitNode; |
| 501 | commitNode->antecedents[0] = xorNode; |
| 502 | commitNode->antType[0] = rf_control; |
| 503 | |
| 504 | /* link commit node to wnd nodes */ |
| 505 | RF_ASSERT(commitNode->numSuccedents == nfaults + nWndNodes); |
| 506 | tmpwndNode = wndNodes; |
| 507 | for (i = 0; i < nWndNodes; i++) { |
| 508 | RF_ASSERT(tmpwndNode->numAntecedents == 1); |
| 509 | commitNode->succedents[i] = tmpwndNode; |
| 510 | tmpwndNode->antecedents[0] = commitNode; |
| 511 | tmpwndNode->antType[0] = rf_control; |
| 512 | tmpwndNode = tmpwndNode->list_next; |
| 513 | } |
| 514 | |
| 515 | /* link the commit node to wnp, wnq nodes */ |
| 516 | RF_ASSERT(wnpNode->numAntecedents == 1); |
| 517 | commitNode->succedents[nWndNodes] = wnpNode; |
| 518 | wnpNode->antecedents[0] = commitNode; |
| 519 | wnpNode->antType[0] = rf_control; |
| 520 | #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) |
| 521 | if (nfaults == 2) { |
| 522 | RF_ASSERT(wnqNode->numAntecedents == 1); |
| 523 | commitNode->succedents[nWndNodes + 1] = wnqNode; |
| 524 | wnqNode->antecedents[0] = commitNode; |
| 525 | wnqNode->antType[0] = rf_control; |
| 526 | } |
| 527 | #endif |
| 528 | /* link write new data nodes to unblock node */ |
| 529 | RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nfaults)); |
| 530 | tmpwndNode = wndNodes; |
| 531 | for (i = 0; i < nWndNodes; i++) { |
| 532 | RF_ASSERT(tmpwndNode->numSuccedents == 1); |
| 533 | tmpwndNode->succedents[0] = unblockNode; |
| 534 | unblockNode->antecedents[i] = tmpwndNode; |
| 535 | unblockNode->antType[i] = rf_control; |
| 536 | tmpwndNode = tmpwndNode->list_next; |
| 537 | } |
| 538 | |
| 539 | /* link write new parity node to unblock node */ |
| 540 | RF_ASSERT(wnpNode->numSuccedents == 1); |
| 541 | wnpNode->succedents[0] = unblockNode; |
| 542 | unblockNode->antecedents[nWndNodes] = wnpNode; |
| 543 | unblockNode->antType[nWndNodes] = rf_control; |
| 544 | |
| 545 | #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) |
| 546 | /* link write new q node to unblock node */ |
| 547 | if (nfaults == 2) { |
| 548 | RF_ASSERT(wnqNode->numSuccedents == 1); |
| 549 | wnqNode->succedents[0] = unblockNode; |
| 550 | unblockNode->antecedents[nWndNodes + 1] = wnqNode; |
| 551 | unblockNode->antType[nWndNodes + 1] = rf_control; |
| 552 | } |
| 553 | #endif |
| 554 | /* link unblock node to term node */ |
| 555 | RF_ASSERT(unblockNode->numSuccedents == 1); |
| 556 | RF_ASSERT(termNode->numAntecedents == 1); |
| 557 | RF_ASSERT(termNode->numSuccedents == 0); |
| 558 | unblockNode->succedents[0] = termNode; |
| 559 | termNode->antecedents[0] = unblockNode; |
| 560 | termNode->antType[0] = rf_control; |
| 561 | } |
| 562 | #define CONS_PDA(if,start,num) \ |
| 563 | pda_p->col = asmap->if->col; \ |
| 564 | pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \ |
| 565 | pda_p->numSector = num; \ |
| 566 | pda_p->next = NULL; \ |
| 567 | RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList) |
| 568 | #if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_EVENODD > 0) |
| 569 | void |
| 570 | rf_WriteGenerateFailedAccessASMs( |
| 571 | RF_Raid_t * raidPtr, |
| 572 | RF_AccessStripeMap_t * asmap, |
| 573 | RF_PhysDiskAddr_t ** pdap, |
| 574 | int *nNodep, |
| 575 | RF_PhysDiskAddr_t ** pqpdap, |
| 576 | int *nPQNodep, |
| 577 | RF_AllocListElem_t * allocList) |
| 578 | { |
| 579 | RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); |
| 580 | int PDAPerDisk, i; |
| 581 | RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; |
| 582 | int numDataCol = layoutPtr->numDataCol; |
| 583 | int state; |
| 584 | unsigned napdas; |
| 585 | RF_SectorNum_t fone_start, ftwo_start = 0; |
| 586 | RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; |
| 587 | RF_PhysDiskAddr_t *pda_p; |
| 588 | RF_RaidAddr_t sosAddr; |
| 589 | |
| 590 | /* determine how many pda's we will have to generate per unaccess |
| 591 | * stripe. If there is only one failed data unit, it is one; if two, |
| 592 | * possibly two, depending whether they overlap. */ |
| 593 | |
| 594 | fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector); |
| 595 | |
| 596 | if (asmap->numDataFailed == 1) { |
| 597 | PDAPerDisk = 1; |
| 598 | state = 1; |
| 599 | RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); |
| 600 | pda_p = *pqpdap; |
| 601 | /* build p */ |
| 602 | CONS_PDA(parityInfo, fone_start, fone->numSector); |
| 603 | pda_p->type = RF_PDA_TYPE_PARITY; |
| 604 | pda_p++; |
| 605 | /* build q */ |
| 606 | CONS_PDA(qInfo, fone_start, fone->numSector); |
| 607 | pda_p->type = RF_PDA_TYPE_Q; |
| 608 | } else { |
| 609 | ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector); |
| 610 | if (fone->numSector + ftwo->numSector > secPerSU) { |
| 611 | PDAPerDisk = 1; |
| 612 | state = 2; |
| 613 | RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); |
| 614 | pda_p = *pqpdap; |
| 615 | CONS_PDA(parityInfo, 0, secPerSU); |
| 616 | pda_p->type = RF_PDA_TYPE_PARITY; |
| 617 | pda_p++; |
| 618 | CONS_PDA(qInfo, 0, secPerSU); |
| 619 | pda_p->type = RF_PDA_TYPE_Q; |
| 620 | } else { |
| 621 | PDAPerDisk = 2; |
| 622 | state = 3; |
| 623 | /* four of them, fone, then ftwo */ |
| 624 | RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); |
| 625 | pda_p = *pqpdap; |
| 626 | CONS_PDA(parityInfo, fone_start, fone->numSector); |
| 627 | pda_p->type = RF_PDA_TYPE_PARITY; |
| 628 | pda_p++; |
| 629 | CONS_PDA(qInfo, fone_start, fone->numSector); |
| 630 | pda_p->type = RF_PDA_TYPE_Q; |
| 631 | pda_p++; |
| 632 | CONS_PDA(parityInfo, ftwo_start, ftwo->numSector); |
| 633 | pda_p->type = RF_PDA_TYPE_PARITY; |
| 634 | pda_p++; |
| 635 | CONS_PDA(qInfo, ftwo_start, ftwo->numSector); |
| 636 | pda_p->type = RF_PDA_TYPE_Q; |
| 637 | } |
| 638 | } |
| 639 | /* figure out number of nonaccessed pda */ |
| 640 | napdas = PDAPerDisk * (numDataCol - 2); |
| 641 | *nPQNodep = PDAPerDisk; |
| 642 | |
| 643 | *nNodep = napdas; |
| 644 | if (napdas == 0) |
| 645 | return; /* short circuit */ |
| 646 | |
| 647 | /* allocate up our list of pda's */ |
| 648 | |
| 649 | RF_MallocAndAdd(pda_p, napdas * sizeof(RF_PhysDiskAddr_t), |
| 650 | (RF_PhysDiskAddr_t *), allocList); |
| 651 | *pdap = pda_p; |
| 652 | |
| 653 | /* linkem together */ |
| 654 | for (i = 0; i < (napdas - 1); i++) |
| 655 | pda_p[i].next = pda_p + (i + 1); |
| 656 | |
| 657 | sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); |
| 658 | for (i = 0; i < numDataCol; i++) { |
| 659 | if ((pda_p - (*pdap)) == napdas) |
| 660 | continue; |
| 661 | pda_p->type = RF_PDA_TYPE_DATA; |
| 662 | pda_p->raidAddress = sosAddr + (i * secPerSU); |
| 663 | (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); |
| 664 | /* skip over dead disks */ |
| 665 | if (RF_DEAD_DISK(raidPtr->Disks[pda_p->col].status)) |
| 666 | continue; |
| 667 | switch (state) { |
| 668 | case 1: /* fone */ |
| 669 | pda_p->numSector = fone->numSector; |
| 670 | pda_p->raidAddress += fone_start; |
| 671 | pda_p->startSector += fone_start; |
| 672 | RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); |
| 673 | break; |
| 674 | case 2: /* full stripe */ |
| 675 | pda_p->numSector = secPerSU; |
| 676 | RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); |
| 677 | break; |
| 678 | case 3: /* two slabs */ |
| 679 | pda_p->numSector = fone->numSector; |
| 680 | pda_p->raidAddress += fone_start; |
| 681 | pda_p->startSector += fone_start; |
| 682 | RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); |
| 683 | pda_p++; |
| 684 | pda_p->type = RF_PDA_TYPE_DATA; |
| 685 | pda_p->raidAddress = sosAddr + (i * secPerSU); |
| 686 | (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0); |
| 687 | pda_p->numSector = ftwo->numSector; |
| 688 | pda_p->raidAddress += ftwo_start; |
| 689 | pda_p->startSector += ftwo_start; |
| 690 | RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); |
| 691 | break; |
| 692 | default: |
| 693 | RF_PANIC(); |
| 694 | } |
| 695 | pda_p++; |
| 696 | } |
| 697 | |
| 698 | RF_ASSERT(pda_p - *pdap == napdas); |
| 699 | return; |
| 700 | } |
| 701 | #define DISK_NODE_PDA(node) ((node)->params[0].p) |
| 702 | |
| 703 | #define DISK_NODE_PARAMS(_node_,_p_) \ |
| 704 | (_node_).params[0].p = _p_ ; \ |
| 705 | (_node_).params[1].p = (_p_)->bufPtr; \ |
| 706 | (_node_).params[2].v = parityStripeID; \ |
| 707 | (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru) |
| 708 | |
| 709 | void |
| 710 | rf_DoubleDegSmallWrite(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, |
| 711 | RF_DagHeader_t *dag_h, void *bp, |
| 712 | RF_RaidAccessFlags_t flags, |
| 713 | RF_AllocListElem_t *allocList, |
| 714 | const char *redundantReadNodeName, |
| 715 | const char *redundantWriteNodeName, |
| 716 | const char *recoveryNodeName, |
| 717 | int (*recovFunc) (RF_DagNode_t *)) |
| 718 | { |
| 719 | RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); |
| 720 | RF_DagNode_t *nodes, *wudNodes, *rrdNodes, *recoveryNode, *blockNode, |
| 721 | *unblockNode, *rpNodes, *rqNodes, *wpNodes, *wqNodes, *termNode; |
| 722 | RF_PhysDiskAddr_t *pda, *pqPDAs; |
| 723 | RF_PhysDiskAddr_t *npdas; |
| 724 | int nWriteNodes, nNodes, nReadNodes, nRrdNodes, nWudNodes, i; |
| 725 | RF_ReconUnitNum_t which_ru; |
| 726 | int nPQNodes; |
| 727 | RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); |
| 728 | |
| 729 | /* simple small write case - First part looks like a reconstruct-read |
| 730 | * of the failed data units. Then a write of all data units not |
| 731 | * failed. */ |
| 732 | |
| 733 | |
| 734 | /* Hdr | ------Block- / / \ Rrd Rrd ... Rrd Rp Rq \ \ |
| 735 | * / -------PQ----- / \ \ Wud Wp WQ \ | / |
| 736 | * --Unblock- | T |
| 737 | * |
| 738 | * Rrd = read recovery data (potentially none) Wud = write user data |
| 739 | * (not incl. failed disks) Wp = Write P (could be two) Wq = Write Q |
| 740 | * (could be two) |
| 741 | * |
| 742 | */ |
| 743 | |
| 744 | rf_WriteGenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList); |
| 745 | |
| 746 | RF_ASSERT(asmap->numDataFailed == 1); |
| 747 | |
| 748 | nWudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed); |
| 749 | nReadNodes = nRrdNodes + 2 * nPQNodes; |
| 750 | nWriteNodes = nWudNodes + 2 * nPQNodes; |
| 751 | nNodes = 4 + nReadNodes + nWriteNodes; |
| 752 | |
| 753 | RF_MallocAndAdd(nodes, nNodes * sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); |
| 754 | blockNode = nodes; |
| 755 | unblockNode = blockNode + 1; |
| 756 | termNode = unblockNode + 1; |
| 757 | recoveryNode = termNode + 1; |
| 758 | rrdNodes = recoveryNode + 1; |
| 759 | rpNodes = rrdNodes + nRrdNodes; |
| 760 | rqNodes = rpNodes + nPQNodes; |
| 761 | wudNodes = rqNodes + nPQNodes; |
| 762 | wpNodes = wudNodes + nWudNodes; |
| 763 | wqNodes = wpNodes + nPQNodes; |
| 764 | |
| 765 | dag_h->creator = "PQ_DDSimpleSmallWrite" ; |
| 766 | dag_h->numSuccedents = 1; |
| 767 | dag_h->succedents[0] = blockNode; |
| 768 | rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm" , allocList); |
| 769 | termNode->antecedents[0] = unblockNode; |
| 770 | termNode->antType[0] = rf_control; |
| 771 | |
| 772 | /* init the block and unblock nodes */ |
| 773 | /* The block node has all the read nodes as successors */ |
| 774 | rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil" , allocList); |
| 775 | for (i = 0; i < nReadNodes; i++) |
| 776 | blockNode->succedents[i] = rrdNodes + i; |
| 777 | |
| 778 | /* The unblock node has all the writes as successors */ |
| 779 | rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWriteNodes, 0, 0, dag_h, "Nil" , allocList); |
| 780 | for (i = 0; i < nWriteNodes; i++) { |
| 781 | unblockNode->antecedents[i] = wudNodes + i; |
| 782 | unblockNode->antType[i] = rf_control; |
| 783 | } |
| 784 | unblockNode->succedents[0] = termNode; |
| 785 | |
| 786 | #define INIT_READ_NODE(node,name) \ |
| 787 | rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); \ |
| 788 | (node)->succedents[0] = recoveryNode; \ |
| 789 | (node)->antecedents[0] = blockNode; \ |
| 790 | (node)->antType[0] = rf_control; |
| 791 | |
| 792 | /* build the read nodes */ |
| 793 | pda = npdas; |
| 794 | for (i = 0; i < nRrdNodes; i++, pda = pda->next) { |
| 795 | INIT_READ_NODE(rrdNodes + i, "rrd" ); |
| 796 | DISK_NODE_PARAMS(rrdNodes[i], pda); |
| 797 | } |
| 798 | |
| 799 | /* read redundancy pdas */ |
| 800 | pda = pqPDAs; |
| 801 | INIT_READ_NODE(rpNodes, "Rp" ); |
| 802 | RF_ASSERT(pda); |
| 803 | DISK_NODE_PARAMS(rpNodes[0], pda); |
| 804 | pda++; |
| 805 | INIT_READ_NODE(rqNodes, redundantReadNodeName); |
| 806 | RF_ASSERT(pda); |
| 807 | DISK_NODE_PARAMS(rqNodes[0], pda); |
| 808 | if (nPQNodes == 2) { |
| 809 | pda++; |
| 810 | INIT_READ_NODE(rpNodes + 1, "Rp" ); |
| 811 | RF_ASSERT(pda); |
| 812 | DISK_NODE_PARAMS(rpNodes[1], pda); |
| 813 | pda++; |
| 814 | INIT_READ_NODE(rqNodes + 1, redundantReadNodeName); |
| 815 | RF_ASSERT(pda); |
| 816 | DISK_NODE_PARAMS(rqNodes[1], pda); |
| 817 | } |
| 818 | /* the recovery node has all reads as precedessors and all writes as |
| 819 | * successors. It generates a result for every write P or write Q |
| 820 | * node. As parameters, it takes a pda per read and a pda per stripe |
| 821 | * of user data written. It also takes as the last params the raidPtr |
| 822 | * and asm. For results, it takes PDA for P & Q. */ |
| 823 | |
| 824 | |
| 825 | rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, |
| 826 | nWriteNodes, /* succesors */ |
| 827 | nReadNodes, /* preds */ |
| 828 | nReadNodes + nWudNodes + 3, /* params */ |
| 829 | 2 * nPQNodes, /* results */ |
| 830 | dag_h, recoveryNodeName, allocList); |
| 831 | |
| 832 | |
| 833 | |
| 834 | for (i = 0; i < nReadNodes; i++) { |
| 835 | recoveryNode->antecedents[i] = rrdNodes + i; |
| 836 | recoveryNode->antType[i] = rf_control; |
| 837 | recoveryNode->params[i].p = DISK_NODE_PDA(rrdNodes + i); |
| 838 | } |
| 839 | for (i = 0; i < nWudNodes; i++) { |
| 840 | recoveryNode->succedents[i] = wudNodes + i; |
| 841 | } |
| 842 | recoveryNode->params[nReadNodes + nWudNodes].p = asmap->failedPDAs[0]; |
| 843 | recoveryNode->params[nReadNodes + nWudNodes + 1].p = raidPtr; |
| 844 | recoveryNode->params[nReadNodes + nWudNodes + 2].p = asmap; |
| 845 | |
| 846 | for (; i < nWriteNodes; i++) |
| 847 | recoveryNode->succedents[i] = wudNodes + i; |
| 848 | |
| 849 | pda = pqPDAs; |
| 850 | recoveryNode->results[0] = pda; |
| 851 | pda++; |
| 852 | recoveryNode->results[1] = pda; |
| 853 | if (nPQNodes == 2) { |
| 854 | pda++; |
| 855 | recoveryNode->results[2] = pda; |
| 856 | pda++; |
| 857 | recoveryNode->results[3] = pda; |
| 858 | } |
| 859 | /* fill writes */ |
| 860 | #define INIT_WRITE_NODE(node,name) \ |
| 861 | rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); \ |
| 862 | (node)->succedents[0] = unblockNode; \ |
| 863 | (node)->antecedents[0] = recoveryNode; \ |
| 864 | (node)->antType[0] = rf_control; |
| 865 | |
| 866 | pda = asmap->physInfo; |
| 867 | for (i = 0; i < nWudNodes; i++) { |
| 868 | INIT_WRITE_NODE(wudNodes + i, "Wd" ); |
| 869 | DISK_NODE_PARAMS(wudNodes[i], pda); |
| 870 | recoveryNode->params[nReadNodes + i].p = DISK_NODE_PDA(wudNodes + i); |
| 871 | pda = pda->next; |
| 872 | } |
| 873 | /* write redundancy pdas */ |
| 874 | pda = pqPDAs; |
| 875 | INIT_WRITE_NODE(wpNodes, "Wp" ); |
| 876 | RF_ASSERT(pda); |
| 877 | DISK_NODE_PARAMS(wpNodes[0], pda); |
| 878 | pda++; |
| 879 | INIT_WRITE_NODE(wqNodes, "Wq" ); |
| 880 | RF_ASSERT(pda); |
| 881 | DISK_NODE_PARAMS(wqNodes[0], pda); |
| 882 | if (nPQNodes == 2) { |
| 883 | pda++; |
| 884 | INIT_WRITE_NODE(wpNodes + 1, "Wp" ); |
| 885 | RF_ASSERT(pda); |
| 886 | DISK_NODE_PARAMS(wpNodes[1], pda); |
| 887 | pda++; |
| 888 | INIT_WRITE_NODE(wqNodes + 1, "Wq" ); |
| 889 | RF_ASSERT(pda); |
| 890 | DISK_NODE_PARAMS(wqNodes[1], pda); |
| 891 | } |
| 892 | } |
| 893 | #endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_EVENODD > 0) */ |
| 894 | |