| 1 | /* $NetBSD: rf_evenodd_dagfuncs.c,v 1.22 2014/03/23 09:30:59 christos Exp $ */ |
| 2 | /* |
| 3 | * Copyright (c) 1995 Carnegie-Mellon University. |
| 4 | * All rights reserved. |
| 5 | * |
| 6 | * Author: ChangMing Wu |
| 7 | * |
| 8 | * Permission to use, copy, modify and distribute this software and |
| 9 | * its documentation is hereby granted, provided that both the copyright |
| 10 | * notice and this permission notice appear in all copies of the |
| 11 | * software, derivative works or modified versions, and any portions |
| 12 | * thereof, and that both notices appear in supporting documentation. |
| 13 | * |
| 14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
| 15 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
| 16 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
| 17 | * |
| 18 | * Carnegie Mellon requests users of this software to return to |
| 19 | * |
| 20 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
| 21 | * School of Computer Science |
| 22 | * Carnegie Mellon University |
| 23 | * Pittsburgh PA 15213-3890 |
| 24 | * |
| 25 | * any improvements or extensions that they make and grant Carnegie the |
| 26 | * rights to redistribute these changes. |
| 27 | */ |
| 28 | |
| 29 | /* |
| 30 | * Code for RAID-EVENODD architecture. |
| 31 | */ |
| 32 | |
| 33 | #include <sys/cdefs.h> |
| 34 | __KERNEL_RCSID(0, "$NetBSD: rf_evenodd_dagfuncs.c,v 1.22 2014/03/23 09:30:59 christos Exp $" ); |
| 35 | |
| 36 | #include "rf_archs.h" |
| 37 | |
| 38 | #ifdef _KERNEL_OPT |
| 39 | #include "opt_raid_diagnostic.h" |
| 40 | #endif |
| 41 | |
| 42 | #if RF_INCLUDE_EVENODD > 0 |
| 43 | |
| 44 | #include <dev/raidframe/raidframevar.h> |
| 45 | |
| 46 | #include "rf_raid.h" |
| 47 | #include "rf_dag.h" |
| 48 | #include "rf_dagffrd.h" |
| 49 | #include "rf_dagffwr.h" |
| 50 | #include "rf_dagdegrd.h" |
| 51 | #include "rf_dagdegwr.h" |
| 52 | #include "rf_dagutils.h" |
| 53 | #include "rf_dagfuncs.h" |
| 54 | #include "rf_etimer.h" |
| 55 | #include "rf_general.h" |
| 56 | #include "rf_parityscan.h" |
| 57 | #include "rf_evenodd.h" |
| 58 | #include "rf_evenodd_dagfuncs.h" |
| 59 | |
| 60 | /* These redundant functions are for small write */ |
| 61 | RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P" , rf_SimpleXorFunc, "Simple Old-New P" }; |
| 62 | RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E" , rf_SimpleONEFunc, "Regular Old-New E" }; |
| 63 | /* These redundant functions are for degraded read */ |
| 64 | RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr" , rf_RecoveryXorFunc, "Recovery Xr" }; |
| 65 | RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func" , rf_RecoveryEFunc, "Recovery E Func" }; |
| 66 | /********************************************************************************************** |
| 67 | * the following encoding node functions is used in EO_000_CreateLargeWriteDAG |
| 68 | **********************************************************************************************/ |
| 69 | int |
| 70 | rf_RegularPEFunc(RF_DagNode_t *node) |
| 71 | { |
| 72 | rf_RegularESubroutine(node, node->results[1]); |
| 73 | rf_RegularXorFunc(node);/* does the wakeup here! */ |
| 74 | #if 1 |
| 75 | return (0); /* XXX This was missing... GO */ |
| 76 | #endif |
| 77 | } |
| 78 | |
| 79 | |
| 80 | /************************************************************************************************ |
| 81 | * For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to |
| 82 | * be used. The previous case is when write access at least sectors of full stripe unit. |
| 83 | * The later function is used when the write access two stripe units but with total sectors |
| 84 | * less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected |
| 85 | * areas in their stripe unit and parity write and 'E' write are both devided into two distinct |
| 86 | * writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5 |
| 87 | ************************************************************************************************/ |
| 88 | |
| 89 | /* Algorithm: |
| 90 | 1. Store the difference of old data and new data in the Rod buffer. |
| 91 | 2. then encode this buffer into the buffer which already have old 'E' information inside it, |
| 92 | the result can be shown to be the new 'E' information. |
| 93 | 3. xor the Wnd buffer into the difference buffer to recover the original old data. |
| 94 | Here we have another alternative: to allocate a temporary buffer for storing the difference of |
| 95 | old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach |
| 96 | take the same speed as the previous, and need more memory. |
| 97 | */ |
| 98 | int |
| 99 | rf_RegularONEFunc(RF_DagNode_t *node) |
| 100 | { |
| 101 | RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; |
| 102 | RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; |
| 103 | int EpdaIndex = (node->numParams - 1) / 2 - 1; /* the parameter of node |
| 104 | * where you can find |
| 105 | * e-pda */ |
| 106 | int i, k; |
| 107 | int suoffset, length; |
| 108 | RF_RowCol_t scol; |
| 109 | char *srcbuf, *destbuf; |
| 110 | RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; |
| 111 | RF_Etimer_t timer; |
| 112 | RF_PhysDiskAddr_t *pda; |
| 113 | #ifdef RAID_DIAGNOSTIC |
| 114 | RF_PhysDiskAddr_t *EPDA = |
| 115 | (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p; |
| 116 | int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector); |
| 117 | |
| 118 | RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q); |
| 119 | RF_ASSERT(ESUOffset == 0); |
| 120 | #endif /* RAID_DIAGNOSTIC */ |
| 121 | |
| 122 | RF_ETIMER_START(timer); |
| 123 | |
| 124 | /* Xor the Wnd buffer into Rod buffer, the difference of old data and |
| 125 | * new data is stored in Rod buffer */ |
| 126 | for (k = 0; k < EpdaIndex; k += 2) { |
| 127 | length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector); |
| 128 | rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length); |
| 129 | } |
| 130 | /* Start to encoding the buffer storing the difference of old data and |
| 131 | * new data into 'E' buffer */ |
| 132 | for (i = 0; i < EpdaIndex; i += 2) |
| 133 | if (node->params[i + 1].p != node->results[0]) { /* results[0] is buf ptr |
| 134 | * of E */ |
| 135 | pda = (RF_PhysDiskAddr_t *) node->params[i].p; |
| 136 | srcbuf = (char *) node->params[i + 1].p; |
| 137 | scol = rf_EUCol(layoutPtr, pda->raidAddress); |
| 138 | suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); |
| 139 | destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset); |
| 140 | rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); |
| 141 | } |
| 142 | /* Recover the original old data to be used by parity encoding |
| 143 | * function in XorNode */ |
| 144 | for (k = 0; k < EpdaIndex; k += 2) { |
| 145 | length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector); |
| 146 | rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length); |
| 147 | } |
| 148 | RF_ETIMER_STOP(timer); |
| 149 | RF_ETIMER_EVAL(timer); |
| 150 | tracerec->q_us += RF_ETIMER_VAL_US(timer); |
| 151 | rf_GenericWakeupFunc(node, 0); |
| 152 | #if 1 |
| 153 | return (0); /* XXX this was missing.. GO */ |
| 154 | #endif |
| 155 | } |
| 156 | |
| 157 | int |
| 158 | rf_SimpleONEFunc(RF_DagNode_t *node) |
| 159 | { |
| 160 | RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; |
| 161 | RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; |
| 162 | RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; |
| 163 | int retcode = 0; |
| 164 | char *srcbuf, *destbuf; |
| 165 | RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; |
| 166 | int length; |
| 167 | RF_RowCol_t scol; |
| 168 | RF_Etimer_t timer; |
| 169 | |
| 170 | RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q); |
| 171 | if (node->dagHdr->status == rf_enable) { |
| 172 | RF_ETIMER_START(timer); |
| 173 | length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector); /* this is a pda of |
| 174 | * writeDataNodes */ |
| 175 | /* bxor to buffer of readDataNodes */ |
| 176 | retcode = rf_bxor(node->params[5].p, node->params[1].p, length); |
| 177 | /* find out the corresponding colume in encoding matrix for |
| 178 | * write colume to be encoded into redundant disk 'E' */ |
| 179 | scol = rf_EUCol(layoutPtr, pda->raidAddress); |
| 180 | srcbuf = node->params[1].p; |
| 181 | destbuf = node->params[3].p; |
| 182 | /* Start encoding process */ |
| 183 | rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); |
| 184 | rf_bxor(node->params[5].p, node->params[1].p, length); |
| 185 | RF_ETIMER_STOP(timer); |
| 186 | RF_ETIMER_EVAL(timer); |
| 187 | tracerec->q_us += RF_ETIMER_VAL_US(timer); |
| 188 | |
| 189 | } |
| 190 | return (rf_GenericWakeupFunc(node, retcode)); /* call wake func |
| 191 | * explicitly since no |
| 192 | * I/O in this node */ |
| 193 | } |
| 194 | |
| 195 | |
| 196 | /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write ********/ |
| 197 | void |
| 198 | rf_RegularESubroutine(RF_DagNode_t *node, char *ebuf) |
| 199 | { |
| 200 | RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; |
| 201 | RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; |
| 202 | RF_PhysDiskAddr_t *pda; |
| 203 | int i, suoffset; |
| 204 | RF_RowCol_t scol; |
| 205 | char *srcbuf, *destbuf; |
| 206 | RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; |
| 207 | RF_Etimer_t timer; |
| 208 | |
| 209 | RF_ETIMER_START(timer); |
| 210 | for (i = 0; i < node->numParams - 2; i += 2) { |
| 211 | RF_ASSERT(node->params[i + 1].p != ebuf); |
| 212 | pda = (RF_PhysDiskAddr_t *) node->params[i].p; |
| 213 | suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); |
| 214 | scol = rf_EUCol(layoutPtr, pda->raidAddress); |
| 215 | srcbuf = (char *) node->params[i + 1].p; |
| 216 | destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset); |
| 217 | rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); |
| 218 | } |
| 219 | RF_ETIMER_STOP(timer); |
| 220 | RF_ETIMER_EVAL(timer); |
| 221 | tracerec->xor_us += RF_ETIMER_VAL_US(timer); |
| 222 | } |
| 223 | |
| 224 | |
| 225 | /******************************************************************************************* |
| 226 | * Used in EO_001_CreateLargeWriteDAG |
| 227 | ******************************************************************************************/ |
| 228 | int |
| 229 | rf_RegularEFunc(RF_DagNode_t *node) |
| 230 | { |
| 231 | rf_RegularESubroutine(node, node->results[0]); |
| 232 | rf_GenericWakeupFunc(node, 0); |
| 233 | #if 1 |
| 234 | return (0); /* XXX this was missing?.. GO */ |
| 235 | #endif |
| 236 | } |
| 237 | /******************************************************************************************* |
| 238 | * This degraded function allow only two case: |
| 239 | * 1. when write access the full failed stripe unit, then the access can be more than |
| 240 | * one tripe units. |
| 241 | * 2. when write access only part of the failed SU, we assume accesses of more than |
| 242 | * one stripe unit is not allowed so that the write can be dealt with like a |
| 243 | * large write. |
| 244 | * The following function is based on these assumptions. So except in the second case, |
| 245 | * it looks the same as a large write encodeing function. But this is not exactly the |
| 246 | * normal way for doing a degraded write, since raidframe have to break cases of access |
| 247 | * other than the above two into smaller accesses. We may have to change |
| 248 | * DegrESubroutin in the future. |
| 249 | *******************************************************************************************/ |
| 250 | void |
| 251 | rf_DegrESubroutine(RF_DagNode_t *node, char *ebuf) |
| 252 | { |
| 253 | RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; |
| 254 | RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; |
| 255 | RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; |
| 256 | RF_PhysDiskAddr_t *pda; |
| 257 | int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); |
| 258 | RF_RowCol_t scol; |
| 259 | char *srcbuf, *destbuf; |
| 260 | RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; |
| 261 | RF_Etimer_t timer; |
| 262 | |
| 263 | RF_ETIMER_START(timer); |
| 264 | for (i = 0; i < node->numParams - 2; i += 2) { |
| 265 | RF_ASSERT(node->params[i + 1].p != ebuf); |
| 266 | pda = (RF_PhysDiskAddr_t *) node->params[i].p; |
| 267 | suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); |
| 268 | scol = rf_EUCol(layoutPtr, pda->raidAddress); |
| 269 | srcbuf = (char *) node->params[i + 1].p; |
| 270 | destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); |
| 271 | rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); |
| 272 | } |
| 273 | |
| 274 | RF_ETIMER_STOP(timer); |
| 275 | RF_ETIMER_EVAL(timer); |
| 276 | tracerec->q_us += RF_ETIMER_VAL_US(timer); |
| 277 | } |
| 278 | |
| 279 | |
| 280 | /************************************************************************************** |
| 281 | * This function is used in case where one data disk failed and both redundant disks |
| 282 | * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk |
| 283 | * failed in the stripe but not accessed at this time, then we should, instead, use |
| 284 | * the rf_EOWriteDoubleRecoveryFunc(). |
| 285 | **************************************************************************************/ |
| 286 | int |
| 287 | rf_Degraded_100_EOFunc(RF_DagNode_t *node) |
| 288 | { |
| 289 | rf_DegrESubroutine(node, node->results[1]); |
| 290 | rf_RecoveryXorFunc(node); /* does the wakeup here! */ |
| 291 | #if 1 |
| 292 | return (0); /* XXX this was missing... SHould these be |
| 293 | * void functions??? GO */ |
| 294 | #endif |
| 295 | } |
| 296 | /************************************************************************************** |
| 297 | * This function is to encode one sector in one of the data disks to the E disk. |
| 298 | * However, in evenodd this function can also be used as decoding function to recover |
| 299 | * data from dead disk in the case of parity failure and a single data failure. |
| 300 | **************************************************************************************/ |
| 301 | void |
| 302 | rf_e_EncOneSect( |
| 303 | RF_RowCol_t srcLogicCol, |
| 304 | char *srcSecbuf, |
| 305 | RF_RowCol_t destLogicCol, |
| 306 | char *destSecbuf, |
| 307 | int bytesPerSector) |
| 308 | { |
| 309 | int S_index; /* index of the EU in the src col which need |
| 310 | * be Xored into all EUs in a dest sector */ |
| 311 | int numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1; |
| 312 | RF_RowCol_t j, indexInDest, /* row index of an encoding unit in |
| 313 | * the destination colume of encoding |
| 314 | * matrix */ |
| 315 | indexInSrc; /* row index of an encoding unit in the source |
| 316 | * colume used for recovery */ |
| 317 | int bytesPerEU = bytesPerSector / numRowInEncMatix; |
| 318 | |
| 319 | #if RF_EO_MATRIX_DIM > 17 |
| 320 | int shortsPerEU = bytesPerEU / sizeof(short); |
| 321 | short *destShortBuf, *srcShortBuf1, *srcShortBuf2; |
| 322 | short temp1; |
| 323 | #elif RF_EO_MATRIX_DIM == 17 |
| 324 | int longsPerEU = bytesPerEU / sizeof(long); |
| 325 | long *destLongBuf, *srcLongBuf1, *srcLongBuf2; |
| 326 | long temp1; |
| 327 | #endif |
| 328 | |
| 329 | #if RF_EO_MATRIX_DIM > 17 |
| 330 | RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1); |
| 331 | RF_ASSERT(bytesPerEU % sizeof(short) == 0); |
| 332 | #elif RF_EO_MATRIX_DIM == 17 |
| 333 | RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4); |
| 334 | RF_ASSERT(bytesPerEU % sizeof(long) == 0); |
| 335 | #endif |
| 336 | |
| 337 | S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM); |
| 338 | #if RF_EO_MATRIX_DIM > 17 |
| 339 | srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU); |
| 340 | #elif RF_EO_MATRIX_DIM == 17 |
| 341 | srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU); |
| 342 | #endif |
| 343 | |
| 344 | for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) { |
| 345 | indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM); |
| 346 | |
| 347 | #if RF_EO_MATRIX_DIM > 17 |
| 348 | destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU); |
| 349 | srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU); |
| 350 | for (j = 0; j < shortsPerEU; j++) { |
| 351 | temp1 = destShortBuf[j] ^ srcShortBuf1[j]; |
| 352 | /* note: S_index won't be at the end row for any src |
| 353 | * col! */ |
| 354 | if (indexInSrc != RF_EO_MATRIX_DIM - 1) |
| 355 | destShortBuf[j] = (srcShortBuf2[j]) ^ temp1; |
| 356 | /* if indexInSrc is at the end row, ie. |
| 357 | * RF_EO_MATRIX_DIM -1, then all elements are zero! */ |
| 358 | else |
| 359 | destShortBuf[j] = temp1; |
| 360 | } |
| 361 | |
| 362 | #elif RF_EO_MATRIX_DIM == 17 |
| 363 | destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU); |
| 364 | srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU); |
| 365 | for (j = 0; j < longsPerEU; j++) { |
| 366 | temp1 = destLongBuf[j] ^ srcLongBuf1[j]; |
| 367 | if (indexInSrc != RF_EO_MATRIX_DIM - 1) |
| 368 | destLongBuf[j] = (srcLongBuf2[j]) ^ temp1; |
| 369 | else |
| 370 | destLongBuf[j] = temp1; |
| 371 | } |
| 372 | #endif |
| 373 | } |
| 374 | } |
| 375 | |
| 376 | void |
| 377 | rf_e_encToBuf( |
| 378 | RF_Raid_t * raidPtr, |
| 379 | RF_RowCol_t srcLogicCol, |
| 380 | char *srcbuf, |
| 381 | RF_RowCol_t destLogicCol, |
| 382 | char *destbuf, |
| 383 | int numSector) |
| 384 | { |
| 385 | int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); |
| 386 | |
| 387 | for (i = 0; i < numSector; i++) { |
| 388 | rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector); |
| 389 | srcbuf += bytesPerSector; |
| 390 | destbuf += bytesPerSector; |
| 391 | } |
| 392 | } |
| 393 | /************************************************************************************** |
| 394 | * when parity die and one data die, We use second redundant information, 'E', |
| 395 | * to recover the data in dead disk. This function is used in the recovery node of |
| 396 | * for EO_110_CreateReadDAG |
| 397 | **************************************************************************************/ |
| 398 | int |
| 399 | rf_RecoveryEFunc(RF_DagNode_t *node) |
| 400 | { |
| 401 | RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; |
| 402 | RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; |
| 403 | RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; |
| 404 | RF_RowCol_t scol, /* source logical column */ |
| 405 | fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress); /* logical column of |
| 406 | * failed SU */ |
| 407 | int i; |
| 408 | RF_PhysDiskAddr_t *pda; |
| 409 | int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); |
| 410 | char *srcbuf, *destbuf; |
| 411 | RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; |
| 412 | RF_Etimer_t timer; |
| 413 | |
| 414 | memset((char *) node->results[0], 0, |
| 415 | rf_RaidAddressToByte(raidPtr, failedPDA->numSector)); |
| 416 | if (node->dagHdr->status == rf_enable) { |
| 417 | RF_ETIMER_START(timer); |
| 418 | for (i = 0; i < node->numParams - 2; i += 2) |
| 419 | if (node->params[i + 1].p != node->results[0]) { |
| 420 | pda = (RF_PhysDiskAddr_t *) node->params[i].p; |
| 421 | if (i == node->numParams - 4) |
| 422 | scol = RF_EO_MATRIX_DIM - 2; /* the colume of |
| 423 | * redundant E */ |
| 424 | else |
| 425 | scol = rf_EUCol(layoutPtr, pda->raidAddress); |
| 426 | srcbuf = (char *) node->params[i + 1].p; |
| 427 | suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); |
| 428 | destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); |
| 429 | rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector); |
| 430 | } |
| 431 | RF_ETIMER_STOP(timer); |
| 432 | RF_ETIMER_EVAL(timer); |
| 433 | tracerec->xor_us += RF_ETIMER_VAL_US(timer); |
| 434 | } |
| 435 | return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */ |
| 436 | } |
| 437 | /************************************************************************************** |
| 438 | * This function is used in the case where one data and the parity have filed. |
| 439 | * (in EO_110_CreateWriteDAG ) |
| 440 | **************************************************************************************/ |
| 441 | int |
| 442 | rf_EO_DegradedWriteEFunc(RF_DagNode_t * node) |
| 443 | { |
| 444 | rf_DegrESubroutine(node, node->results[0]); |
| 445 | rf_GenericWakeupFunc(node, 0); |
| 446 | #if 1 |
| 447 | return (0); /* XXX Yet another one!! GO */ |
| 448 | #endif |
| 449 | } |
| 450 | |
| 451 | |
| 452 | |
| 453 | /************************************************************************************** |
| 454 | * THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES |
| 455 | **************************************************************************************/ |
| 456 | |
| 457 | void |
| 458 | rf_doubleEOdecode( |
| 459 | RF_Raid_t * raidPtr, |
| 460 | char **rrdbuf, |
| 461 | char **dest, |
| 462 | RF_RowCol_t * fcol, |
| 463 | char *pbuf, |
| 464 | char *ebuf) |
| 465 | { |
| 466 | RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); |
| 467 | int i, j, k, f1, f2, row; |
| 468 | int rrdrow, erow, count = 0; |
| 469 | int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); |
| 470 | int numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1; |
| 471 | #if 0 |
| 472 | int pcol = (RF_EO_MATRIX_DIM) - 1; |
| 473 | #endif |
| 474 | int ecol = (RF_EO_MATRIX_DIM) - 2; |
| 475 | int bytesPerEU = bytesPerSector / numRowInEncMatix; |
| 476 | int numDataCol = layoutPtr->numDataCol; |
| 477 | #if RF_EO_MATRIX_DIM > 17 |
| 478 | int shortsPerEU = bytesPerEU / sizeof(short); |
| 479 | short *rrdbuf_current, *pbuf_current, *ebuf_current; |
| 480 | short *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current; |
| 481 | short *temp; |
| 482 | short *P; |
| 483 | |
| 484 | RF_ASSERT(bytesPerEU % sizeof(short) == 0); |
| 485 | RF_Malloc(P, bytesPerEU, (short *)); |
| 486 | RF_Malloc(temp, bytesPerEU, (short *)); |
| 487 | #elif RF_EO_MATRIX_DIM == 17 |
| 488 | int longsPerEU = bytesPerEU / sizeof(long); |
| 489 | long *rrdbuf_current, *pbuf_current, *ebuf_current; |
| 490 | long *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current; |
| 491 | long *temp; |
| 492 | long *P; |
| 493 | |
| 494 | RF_ASSERT(bytesPerEU % sizeof(long) == 0); |
| 495 | RF_Malloc(P, bytesPerEU, (long *)); |
| 496 | RF_Malloc(temp, bytesPerEU, (long *)); |
| 497 | #endif |
| 498 | RF_ASSERT(*((long *) dest[0]) == 0); |
| 499 | RF_ASSERT(*((long *) dest[1]) == 0); |
| 500 | memset((char *) P, 0, bytesPerEU); |
| 501 | memset((char *) temp, 0, bytesPerEU); |
| 502 | RF_ASSERT(*P == 0); |
| 503 | /* calculate the 'P' parameter, which, not parity, is the Xor of all |
| 504 | * elements in the last two column, ie. 'E' and 'parity' colume, see |
| 505 | * the Ref. paper by Blaum, et al 1993 */ |
| 506 | for (i = 0; i < numRowInEncMatix; i++) |
| 507 | for (k = 0; k < longsPerEU; k++) { |
| 508 | #if RF_EO_MATRIX_DIM > 17 |
| 509 | ebuf_current = ((short *) ebuf) + i * shortsPerEU + k; |
| 510 | pbuf_current = ((short *) pbuf) + i * shortsPerEU + k; |
| 511 | #elif RF_EO_MATRIX_DIM == 17 |
| 512 | ebuf_current = ((long *) ebuf) + i * longsPerEU + k; |
| 513 | pbuf_current = ((long *) pbuf) + i * longsPerEU + k; |
| 514 | #endif |
| 515 | P[k] ^= *ebuf_current; |
| 516 | P[k] ^= *pbuf_current; |
| 517 | } |
| 518 | RF_ASSERT(fcol[0] != fcol[1]); |
| 519 | if (fcol[0] < fcol[1]) { |
| 520 | #if RF_EO_MATRIX_DIM > 17 |
| 521 | dest_smaller = (short *) (dest[0]); |
| 522 | dest_larger = (short *) (dest[1]); |
| 523 | #elif RF_EO_MATRIX_DIM == 17 |
| 524 | dest_smaller = (long *) (dest[0]); |
| 525 | dest_larger = (long *) (dest[1]); |
| 526 | #endif |
| 527 | f1 = fcol[0]; |
| 528 | f2 = fcol[1]; |
| 529 | } else { |
| 530 | #if RF_EO_MATRIX_DIM > 17 |
| 531 | dest_smaller = (short *) (dest[1]); |
| 532 | dest_larger = (short *) (dest[0]); |
| 533 | #elif RF_EO_MATRIX_DIM == 17 |
| 534 | dest_smaller = (long *) (dest[1]); |
| 535 | dest_larger = (long *) (dest[0]); |
| 536 | #endif |
| 537 | f1 = fcol[1]; |
| 538 | f2 = fcol[0]; |
| 539 | } |
| 540 | row = (RF_EO_MATRIX_DIM) - 1; |
| 541 | while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) { |
| 542 | #if RF_EO_MATRIX_DIM > 17 |
| 543 | dest_larger_current = dest_larger + row * shortsPerEU; |
| 544 | dest_smaller_current = dest_smaller + row * shortsPerEU; |
| 545 | #elif RF_EO_MATRIX_DIM == 17 |
| 546 | dest_larger_current = dest_larger + row * longsPerEU; |
| 547 | dest_smaller_current = dest_smaller + row * longsPerEU; |
| 548 | #endif |
| 549 | /** Do the diagonal recovery. Initially, temp[k] = (failed 1), |
| 550 | which is the failed data in the colume which has smaller col index. **/ |
| 551 | /* step 1: ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */ |
| 552 | for (j = 0; j < numDataCol; j++) { |
| 553 | if (j == f1 || j == f2) |
| 554 | continue; |
| 555 | rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM); |
| 556 | if (rrdrow != (RF_EO_MATRIX_DIM) - 1) { |
| 557 | #if RF_EO_MATRIX_DIM > 17 |
| 558 | rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU; |
| 559 | for (k = 0; k < shortsPerEU; k++) |
| 560 | temp[k] ^= *(rrdbuf_current + k); |
| 561 | #elif RF_EO_MATRIX_DIM == 17 |
| 562 | rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU; |
| 563 | for (k = 0; k < longsPerEU; k++) |
| 564 | temp[k] ^= *(rrdbuf_current + k); |
| 565 | #endif |
| 566 | } |
| 567 | } |
| 568 | /* step 2: ^E(erow,m-2), If erow is at the buttom row, don't |
| 569 | * Xor into it E(erow,m-2) = (principle diagonal) ^ (failed |
| 570 | * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal |
| 571 | * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle |
| 572 | * diagonal) ^ (failed 2) */ |
| 573 | |
| 574 | erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM)); |
| 575 | if (erow != (RF_EO_MATRIX_DIM) - 1) { |
| 576 | #if RF_EO_MATRIX_DIM > 17 |
| 577 | ebuf_current = (short *) ebuf + shortsPerEU * erow; |
| 578 | for (k = 0; k < shortsPerEU; k++) |
| 579 | temp[k] ^= *(ebuf_current + k); |
| 580 | #elif RF_EO_MATRIX_DIM == 17 |
| 581 | ebuf_current = (long *) ebuf + longsPerEU * erow; |
| 582 | for (k = 0; k < longsPerEU; k++) |
| 583 | temp[k] ^= *(ebuf_current + k); |
| 584 | #endif |
| 585 | } |
| 586 | /* step 3: ^P to obtain the failed data (failed 2). P can be |
| 587 | * proved to be actually (principle diagonal) After this |
| 588 | * step, temp[k] = (failed 2), the failed data to be recovered */ |
| 589 | #if RF_EO_MATRIX_DIM > 17 |
| 590 | for (k = 0; k < shortsPerEU; k++) |
| 591 | temp[k] ^= P[k]; |
| 592 | /* Put the data to the destination buffer */ |
| 593 | for (k = 0; k < shortsPerEU; k++) |
| 594 | dest_larger_current[k] = temp[k]; |
| 595 | #elif RF_EO_MATRIX_DIM == 17 |
| 596 | for (k = 0; k < longsPerEU; k++) |
| 597 | temp[k] ^= P[k]; |
| 598 | /* Put the data to the destination buffer */ |
| 599 | for (k = 0; k < longsPerEU; k++) |
| 600 | dest_larger_current[k] = temp[k]; |
| 601 | #endif |
| 602 | |
| 603 | /** THE FOLLOWING DO THE HORIZONTAL XOR **/ |
| 604 | /* step 1: ^(SUM of A(row,0..m-3)), ie. all nonfailed data |
| 605 | * columes */ |
| 606 | for (j = 0; j < numDataCol; j++) { |
| 607 | if (j == f1 || j == f2) |
| 608 | continue; |
| 609 | #if RF_EO_MATRIX_DIM > 17 |
| 610 | rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU; |
| 611 | for (k = 0; k < shortsPerEU; k++) |
| 612 | temp[k] ^= *(rrdbuf_current + k); |
| 613 | #elif RF_EO_MATRIX_DIM == 17 |
| 614 | rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU; |
| 615 | for (k = 0; k < longsPerEU; k++) |
| 616 | temp[k] ^= *(rrdbuf_current + k); |
| 617 | #endif |
| 618 | } |
| 619 | /* step 2: ^A(row,m-1) */ |
| 620 | /* step 3: Put the data to the destination buffer */ |
| 621 | #if RF_EO_MATRIX_DIM > 17 |
| 622 | pbuf_current = (short *) pbuf + shortsPerEU * row; |
| 623 | for (k = 0; k < shortsPerEU; k++) |
| 624 | temp[k] ^= *(pbuf_current + k); |
| 625 | for (k = 0; k < shortsPerEU; k++) |
| 626 | dest_smaller_current[k] = temp[k]; |
| 627 | #elif RF_EO_MATRIX_DIM == 17 |
| 628 | pbuf_current = (long *) pbuf + longsPerEU * row; |
| 629 | for (k = 0; k < longsPerEU; k++) |
| 630 | temp[k] ^= *(pbuf_current + k); |
| 631 | for (k = 0; k < longsPerEU; k++) |
| 632 | dest_smaller_current[k] = temp[k]; |
| 633 | #endif |
| 634 | count++; |
| 635 | } |
| 636 | /* Check if all Encoding Unit in the data buffer have been decoded, |
| 637 | * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number, |
| 638 | * this algorithm will covered all buffer */ |
| 639 | RF_ASSERT(count == numRowInEncMatix); |
| 640 | RF_Free((char *) P, bytesPerEU); |
| 641 | RF_Free((char *) temp, bytesPerEU); |
| 642 | } |
| 643 | |
| 644 | |
| 645 | /*************************************************************************************** |
| 646 | * This function is called by double degragded read |
| 647 | * EO_200_CreateReadDAG |
| 648 | * |
| 649 | ***************************************************************************************/ |
| 650 | int |
| 651 | rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t *node) |
| 652 | { |
| 653 | int ndataParam = 0; |
| 654 | int np = node->numParams; |
| 655 | RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; |
| 656 | RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; |
| 657 | RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); |
| 658 | int i, prm, sector, nresults = node->numResults; |
| 659 | RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; |
| 660 | unsigned sosAddr; |
| 661 | int mallc_one = 0, mallc_two = 0; /* flags to indicate if |
| 662 | * memory is allocated */ |
| 663 | int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); |
| 664 | RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1, |
| 665 | npda; |
| 666 | RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol; |
| 667 | char **buf, *ebuf, *pbuf, *dest[2]; |
| 668 | long *suoff = NULL, *suend = NULL, *prmToCol = NULL, |
| 669 | psuoff = 0, esuoff = 0; |
| 670 | RF_SectorNum_t startSector, endSector; |
| 671 | RF_Etimer_t timer; |
| 672 | RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; |
| 673 | |
| 674 | RF_ETIMER_START(timer); |
| 675 | |
| 676 | /* Find out the number of parameters which are pdas for data |
| 677 | * information */ |
| 678 | for (i = 0; i <= np; i++) |
| 679 | if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) { |
| 680 | ndataParam = i; |
| 681 | break; |
| 682 | } |
| 683 | RF_Malloc(buf, numDataCol * sizeof(char *), (char **)); |
| 684 | if (ndataParam != 0) { |
| 685 | RF_Malloc(suoff, ndataParam * sizeof(long), (long *)); |
| 686 | RF_Malloc(suend, ndataParam * sizeof(long), (long *)); |
| 687 | RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *)); |
| 688 | } |
| 689 | if (asmap->failedPDAs[1] && |
| 690 | (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) { |
| 691 | RF_ASSERT(0); /* currently, no support for this situation */ |
| 692 | ppda = node->params[np - 6].p; |
| 693 | ppda2 = node->params[np - 5].p; |
| 694 | RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY); |
| 695 | epda = node->params[np - 4].p; |
| 696 | epda2 = node->params[np - 3].p; |
| 697 | RF_ASSERT(epda2->type == RF_PDA_TYPE_Q); |
| 698 | } else { |
| 699 | ppda = node->params[np - 4].p; |
| 700 | epda = node->params[np - 3].p; |
| 701 | psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector); |
| 702 | esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector); |
| 703 | RF_ASSERT(psuoff == esuoff); |
| 704 | } |
| 705 | /* |
| 706 | the followings have three goals: |
| 707 | 1. determine the startSector to begin decoding and endSector to end decoding. |
| 708 | 2. determine the colume numbers of the two failed disks. |
| 709 | 3. determine the offset and end offset of the access within each failed stripe unit. |
| 710 | */ |
| 711 | if (nresults == 1) { |
| 712 | /* find the startSector to begin decoding */ |
| 713 | pda = node->results[0]; |
| 714 | memset(pda->bufPtr, 0, bytesPerSector * pda->numSector); |
| 715 | fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector); |
| 716 | fsuend[0] = fsuoff[0] + pda->numSector; |
| 717 | fsuoff[1] = 0; |
| 718 | fsuend[1] = 0; |
| 719 | startSector = fsuoff[0]; |
| 720 | endSector = fsuend[0]; |
| 721 | |
| 722 | /* find out the column of failed disk being accessed */ |
| 723 | fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress); |
| 724 | |
| 725 | /* find out the other failed colume not accessed */ |
| 726 | sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); |
| 727 | for (i = 0; i < numDataCol; i++) { |
| 728 | npda.raidAddress = sosAddr + (i * secPerSU); |
| 729 | (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0); |
| 730 | /* skip over dead disks */ |
| 731 | if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status)) |
| 732 | if (i != fcol[0]) |
| 733 | break; |
| 734 | } |
| 735 | RF_ASSERT(i < numDataCol); |
| 736 | fcol[1] = i; |
| 737 | } else { |
| 738 | RF_ASSERT(nresults == 2); |
| 739 | pda0 = node->results[0]; |
| 740 | memset(pda0->bufPtr, 0, bytesPerSector * pda0->numSector); |
| 741 | pda1 = node->results[1]; |
| 742 | memset(pda1->bufPtr, 0, bytesPerSector * pda1->numSector); |
| 743 | /* determine the failed colume numbers of the two failed |
| 744 | * disks. */ |
| 745 | fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress); |
| 746 | fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress); |
| 747 | /* determine the offset and end offset of the access within |
| 748 | * each failed stripe unit. */ |
| 749 | fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector); |
| 750 | fsuend[0] = fsuoff[0] + pda0->numSector; |
| 751 | fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector); |
| 752 | fsuend[1] = fsuoff[1] + pda1->numSector; |
| 753 | /* determine the startSector to begin decoding */ |
| 754 | startSector = RF_MIN(pda0->startSector, pda1->startSector); |
| 755 | /* determine the endSector to end decoding */ |
| 756 | endSector = RF_MAX(fsuend[0], fsuend[1]); |
| 757 | } |
| 758 | /* |
| 759 | assign the beginning sector and the end sector for each parameter |
| 760 | find out the corresponding colume # for each parameter |
| 761 | */ |
| 762 | for (prm = 0; prm < ndataParam; prm++) { |
| 763 | pda = node->params[prm].p; |
| 764 | suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector); |
| 765 | suend[prm] = suoff[prm] + pda->numSector; |
| 766 | prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress); |
| 767 | } |
| 768 | /* 'sector' is the sector for the current decoding algorithm. For each |
| 769 | * sector in the failed SU, find out the corresponding parameters that |
| 770 | * cover the current sector and that are needed for decoding of this |
| 771 | * sector in failed SU. 2. Find out if sector is in the shadow of any |
| 772 | * accessed failed SU. If not, malloc a temporary space of a sector in |
| 773 | * size. */ |
| 774 | for (sector = startSector; sector < endSector; sector++) { |
| 775 | if (nresults == 2) |
| 776 | if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1])) |
| 777 | continue; |
| 778 | for (prm = 0; prm < ndataParam; prm++) |
| 779 | if (suoff[prm] <= sector && sector < suend[prm]) |
| 780 | buf[(prmToCol[prm])] = (char *)((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr + |
| 781 | rf_RaidAddressToByte(raidPtr, sector - suoff[prm]); |
| 782 | /* find out if sector is in the shadow of any accessed failed |
| 783 | * SU. If yes, assign dest[0], dest[1] to point at suitable |
| 784 | * position of the buffer corresponding to failed SUs. if no, |
| 785 | * malloc a temporary space of a sector in size for |
| 786 | * destination of decoding. */ |
| 787 | RF_ASSERT(nresults == 1 || nresults == 2); |
| 788 | if (nresults == 1) { |
| 789 | dest[0] = (char *)((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]); |
| 790 | /* Always malloc temp buffer to dest[1] */ |
| 791 | RF_Malloc(dest[1], bytesPerSector, (char *)); |
| 792 | memset(dest[1], 0, bytesPerSector); |
| 793 | mallc_two = 1; |
| 794 | } else { |
| 795 | if (fsuoff[0] <= sector && sector < fsuend[0]) |
| 796 | dest[0] = (char *)((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]); |
| 797 | else { |
| 798 | RF_Malloc(dest[0], bytesPerSector, (char *)); |
| 799 | memset(dest[0], 0, bytesPerSector); |
| 800 | mallc_one = 1; |
| 801 | } |
| 802 | if (fsuoff[1] <= sector && sector < fsuend[1]) |
| 803 | dest[1] = (char *)((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]); |
| 804 | else { |
| 805 | RF_Malloc(dest[1], bytesPerSector, (char *)); |
| 806 | memset(dest[1], 0, bytesPerSector); |
| 807 | mallc_two = 1; |
| 808 | } |
| 809 | RF_ASSERT(mallc_one == 0 || mallc_two == 0); |
| 810 | } |
| 811 | pbuf = (char *)ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff); |
| 812 | ebuf = (char *)epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff); |
| 813 | /* |
| 814 | * After finish finding all needed sectors, call doubleEOdecode function for decoding |
| 815 | * one sector to destination. |
| 816 | */ |
| 817 | rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf); |
| 818 | /* free all allocated memory, and mark flag to indicate no |
| 819 | * memory is being allocated */ |
| 820 | if (mallc_one == 1) |
| 821 | RF_Free(dest[0], bytesPerSector); |
| 822 | if (mallc_two == 1) |
| 823 | RF_Free(dest[1], bytesPerSector); |
| 824 | mallc_one = mallc_two = 0; |
| 825 | } |
| 826 | RF_Free(buf, numDataCol * sizeof(char *)); |
| 827 | if (ndataParam != 0) { |
| 828 | RF_Free(suoff, ndataParam * sizeof(long)); |
| 829 | RF_Free(suend, ndataParam * sizeof(long)); |
| 830 | RF_Free(prmToCol, ndataParam * sizeof(long)); |
| 831 | } |
| 832 | RF_ETIMER_STOP(timer); |
| 833 | RF_ETIMER_EVAL(timer); |
| 834 | if (tracerec) { |
| 835 | tracerec->q_us += RF_ETIMER_VAL_US(timer); |
| 836 | } |
| 837 | rf_GenericWakeupFunc(node, 0); |
| 838 | #if 1 |
| 839 | return (0); /* XXX is this even close!!?!?!!? GO */ |
| 840 | #endif |
| 841 | } |
| 842 | |
| 843 | |
| 844 | /* currently, only access of one of the two failed SU is allowed in this function. |
| 845 | * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into |
| 846 | * many accesses of single stripe unit. |
| 847 | */ |
| 848 | |
| 849 | int |
| 850 | rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t *node) |
| 851 | { |
| 852 | int np = node->numParams; |
| 853 | RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; |
| 854 | RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; |
| 855 | RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); |
| 856 | RF_SectorNum_t sector; |
| 857 | RF_RowCol_t col, scol; |
| 858 | int prm, i, j; |
| 859 | RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; |
| 860 | unsigned sosAddr; |
| 861 | unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); |
| 862 | RF_int64 numbytes; |
| 863 | RF_SectorNum_t startSector, endSector; |
| 864 | RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda; |
| 865 | RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol; |
| 866 | char **buf; /* buf[0], buf[1], buf[2], ...etc. point to |
| 867 | * buffer storing data read from col0, col1, |
| 868 | * col2 */ |
| 869 | char *ebuf, *pbuf, *dest[2], *olddata[2]; |
| 870 | RF_Etimer_t timer; |
| 871 | RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; |
| 872 | |
| 873 | RF_ASSERT(asmap->numDataFailed == 1); /* currently only support this |
| 874 | * case, the other failed SU |
| 875 | * is not being accessed */ |
| 876 | RF_ETIMER_START(timer); |
| 877 | RF_Malloc(buf, numDataCol * sizeof(char *), (char **)); |
| 878 | |
| 879 | ppda = node->results[0];/* Instead of being buffers, node->results[0] |
| 880 | * and [1] are Ppda and Epda */ |
| 881 | epda = node->results[1]; |
| 882 | fpda = asmap->failedPDAs[0]; |
| 883 | |
| 884 | /* First, recovery the failed old SU using EvenOdd double decoding */ |
| 885 | /* determine the startSector and endSector for decoding */ |
| 886 | startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector); |
| 887 | endSector = startSector + fpda->numSector; |
| 888 | /* Assign buf[col] pointers to point to each non-failed colume and |
| 889 | * initialize the pbuf and ebuf to point at the beginning of each |
| 890 | * source buffers and destination buffers */ |
| 891 | for (prm = 0; prm < numDataCol - 2; prm++) { |
| 892 | pda = (RF_PhysDiskAddr_t *) node->params[prm].p; |
| 893 | col = rf_EUCol(layoutPtr, pda->raidAddress); |
| 894 | buf[col] = pda->bufPtr; |
| 895 | } |
| 896 | /* pbuf and ebuf: they will change values as double recovery decoding |
| 897 | * goes on */ |
| 898 | pbuf = ppda->bufPtr; |
| 899 | ebuf = epda->bufPtr; |
| 900 | /* find out the logical colume numbers in the encoding matrix of the |
| 901 | * two failed columes */ |
| 902 | fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress); |
| 903 | |
| 904 | /* find out the other failed colume not accessed this time */ |
| 905 | sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); |
| 906 | for (i = 0; i < numDataCol; i++) { |
| 907 | npda.raidAddress = sosAddr + (i * secPerSU); |
| 908 | (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0); |
| 909 | /* skip over dead disks */ |
| 910 | if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status)) |
| 911 | if (i != fcol[0]) |
| 912 | break; |
| 913 | } |
| 914 | RF_ASSERT(i < numDataCol); |
| 915 | fcol[1] = i; |
| 916 | /* assign temporary space to put recovered failed SU */ |
| 917 | numbytes = fpda->numSector * bytesPerSector; |
| 918 | RF_Malloc(olddata[0], numbytes, (char *)); |
| 919 | RF_Malloc(olddata[1], numbytes, (char *)); |
| 920 | dest[0] = olddata[0]; |
| 921 | dest[1] = olddata[1]; |
| 922 | memset(olddata[0], 0, numbytes); |
| 923 | memset(olddata[1], 0, numbytes); |
| 924 | /* Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j] |
| 925 | * have already pointed at the beginning of each source buffers and |
| 926 | * destination buffers */ |
| 927 | for (sector = startSector, i = 0; sector < endSector; sector++, i++) { |
| 928 | rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf); |
| 929 | for (j = 0; j < numDataCol; j++) |
| 930 | if ((j != fcol[0]) && (j != fcol[1])) |
| 931 | buf[j] += bytesPerSector; |
| 932 | dest[0] += bytesPerSector; |
| 933 | dest[1] += bytesPerSector; |
| 934 | ebuf += bytesPerSector; |
| 935 | pbuf += bytesPerSector; |
| 936 | } |
| 937 | /* after recovery, the buffer pointed by olddata[0] is the old failed |
| 938 | * data. With new writing data and this old data, use small write to |
| 939 | * calculate the new redundant informations */ |
| 940 | /* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of |
| 941 | * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol |
| 942 | * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[ |
| 943 | * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol |
| 944 | * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of |
| 945 | * wudNodes; For current implementation, we assume the simplest case: |
| 946 | * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1 |
| 947 | * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new |
| 948 | * data to be writen to the failed disk. We first bxor the new data |
| 949 | * into the old recovered data, then do the same things as small |
| 950 | * write. */ |
| 951 | |
| 952 | rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes); |
| 953 | /* do new 'E' calculation */ |
| 954 | /* find out the corresponding colume in encoding matrix for write |
| 955 | * colume to be encoded into redundant disk 'E' */ |
| 956 | scol = rf_EUCol(layoutPtr, fpda->raidAddress); |
| 957 | /* olddata[0] now is source buffer pointer; epda->bufPtr is the dest |
| 958 | * buffer pointer */ |
| 959 | rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector); |
| 960 | |
| 961 | /* do new 'P' calculation */ |
| 962 | rf_bxor(olddata[0], ppda->bufPtr, numbytes); |
| 963 | /* Free the allocated buffer */ |
| 964 | RF_Free(olddata[0], numbytes); |
| 965 | RF_Free(olddata[1], numbytes); |
| 966 | RF_Free(buf, numDataCol * sizeof(char *)); |
| 967 | |
| 968 | RF_ETIMER_STOP(timer); |
| 969 | RF_ETIMER_EVAL(timer); |
| 970 | if (tracerec) { |
| 971 | tracerec->q_us += RF_ETIMER_VAL_US(timer); |
| 972 | } |
| 973 | rf_GenericWakeupFunc(node, 0); |
| 974 | return (0); |
| 975 | } |
| 976 | #endif /* RF_INCLUDE_EVENODD > 0 */ |
| 977 | |