| 1 | /* $NetBSD: rf_paritylogging.c,v 1.34 2011/05/11 06:20:33 mrg Exp $ */ |
| 2 | /* |
| 3 | * Copyright (c) 1995 Carnegie-Mellon University. |
| 4 | * All rights reserved. |
| 5 | * |
| 6 | * Author: William V. Courtright II |
| 7 | * |
| 8 | * Permission to use, copy, modify and distribute this software and |
| 9 | * its documentation is hereby granted, provided that both the copyright |
| 10 | * notice and this permission notice appear in all copies of the |
| 11 | * software, derivative works or modified versions, and any portions |
| 12 | * thereof, and that both notices appear in supporting documentation. |
| 13 | * |
| 14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
| 15 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
| 16 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
| 17 | * |
| 18 | * Carnegie Mellon requests users of this software to return to |
| 19 | * |
| 20 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
| 21 | * School of Computer Science |
| 22 | * Carnegie Mellon University |
| 23 | * Pittsburgh PA 15213-3890 |
| 24 | * |
| 25 | * any improvements or extensions that they make and grant Carnegie the |
| 26 | * rights to redistribute these changes. |
| 27 | */ |
| 28 | |
| 29 | |
| 30 | /* |
| 31 | parity logging configuration, dag selection, and mapping is implemented here |
| 32 | */ |
| 33 | |
| 34 | #include <sys/cdefs.h> |
| 35 | __KERNEL_RCSID(0, "$NetBSD: rf_paritylogging.c,v 1.34 2011/05/11 06:20:33 mrg Exp $" ); |
| 36 | |
| 37 | #include "rf_archs.h" |
| 38 | |
| 39 | #if RF_INCLUDE_PARITYLOGGING > 0 |
| 40 | |
| 41 | #include <dev/raidframe/raidframevar.h> |
| 42 | |
| 43 | #include "rf_raid.h" |
| 44 | #include "rf_dag.h" |
| 45 | #include "rf_dagutils.h" |
| 46 | #include "rf_dagfuncs.h" |
| 47 | #include "rf_dagffrd.h" |
| 48 | #include "rf_dagffwr.h" |
| 49 | #include "rf_dagdegrd.h" |
| 50 | #include "rf_dagdegwr.h" |
| 51 | #include "rf_paritylog.h" |
| 52 | #include "rf_paritylogDiskMgr.h" |
| 53 | #include "rf_paritylogging.h" |
| 54 | #include "rf_parityloggingdags.h" |
| 55 | #include "rf_general.h" |
| 56 | #include "rf_map.h" |
| 57 | #include "rf_utils.h" |
| 58 | #include "rf_shutdown.h" |
| 59 | |
| 60 | typedef struct RF_ParityLoggingConfigInfo_s { |
| 61 | RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by |
| 62 | * IdentifyStripe */ |
| 63 | } RF_ParityLoggingConfigInfo_t; |
| 64 | |
| 65 | static void FreeRegionInfo(RF_Raid_t * raidPtr, RF_RegionId_t regionID); |
| 66 | static void rf_ShutdownParityLogging(RF_ThreadArg_t arg); |
| 67 | static void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg); |
| 68 | static void rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg); |
| 69 | static void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg); |
| 70 | static void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg); |
| 71 | static void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg); |
| 72 | |
| 73 | int |
| 74 | rf_ConfigureParityLogging( |
| 75 | RF_ShutdownList_t ** listp, |
| 76 | RF_Raid_t * raidPtr, |
| 77 | RF_Config_t * cfgPtr) |
| 78 | { |
| 79 | int i, j, startdisk, rc; |
| 80 | RF_SectorCount_t totalLogCapacity, fragmentation, lastRegionCapacity; |
| 81 | RF_SectorCount_t parityBufferCapacity, maxRegionParityRange; |
| 82 | RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; |
| 83 | RF_ParityLoggingConfigInfo_t *info; |
| 84 | RF_ParityLog_t *l = NULL, *next; |
| 85 | void *lHeapPtr; |
| 86 | |
| 87 | if (rf_numParityRegions <= 0) |
| 88 | return(EINVAL); |
| 89 | |
| 90 | /* |
| 91 | * We create multiple entries on the shutdown list here, since |
| 92 | * this configuration routine is fairly complicated in and of |
| 93 | * itself, and this makes backing out of a failed configuration |
| 94 | * much simpler. |
| 95 | */ |
| 96 | |
| 97 | raidPtr->numSectorsPerLog = RF_DEFAULT_NUM_SECTORS_PER_LOG; |
| 98 | |
| 99 | /* create a parity logging configuration structure */ |
| 100 | RF_MallocAndAdd(info, sizeof(RF_ParityLoggingConfigInfo_t), |
| 101 | (RF_ParityLoggingConfigInfo_t *), |
| 102 | raidPtr->cleanupList); |
| 103 | if (info == NULL) |
| 104 | return (ENOMEM); |
| 105 | layoutPtr->layoutSpecificInfo = (void *) info; |
| 106 | |
| 107 | /* the stripe identifier must identify the disks in each stripe, IN |
| 108 | * THE ORDER THAT THEY APPEAR IN THE STRIPE. */ |
| 109 | info->stripeIdentifier = rf_make_2d_array((raidPtr->numCol), |
| 110 | (raidPtr->numCol), |
| 111 | raidPtr->cleanupList); |
| 112 | if (info->stripeIdentifier == NULL) |
| 113 | return (ENOMEM); |
| 114 | |
| 115 | startdisk = 0; |
| 116 | for (i = 0; i < (raidPtr->numCol); i++) { |
| 117 | for (j = 0; j < (raidPtr->numCol); j++) { |
| 118 | info->stripeIdentifier[i][j] = (startdisk + j) % |
| 119 | (raidPtr->numCol - 1); |
| 120 | } |
| 121 | if ((--startdisk) < 0) |
| 122 | startdisk = raidPtr->numCol - 1 - 1; |
| 123 | } |
| 124 | |
| 125 | /* fill in the remaining layout parameters */ |
| 126 | layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; |
| 127 | layoutPtr->numParityCol = 1; |
| 128 | layoutPtr->numParityLogCol = 1; |
| 129 | layoutPtr->numDataCol = raidPtr->numCol - layoutPtr->numParityCol - |
| 130 | layoutPtr->numParityLogCol; |
| 131 | layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * |
| 132 | layoutPtr->sectorsPerStripeUnit; |
| 133 | layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; |
| 134 | raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * |
| 135 | layoutPtr->sectorsPerStripeUnit; |
| 136 | |
| 137 | raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * |
| 138 | layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; |
| 139 | |
| 140 | /* configure parity log parameters |
| 141 | * |
| 142 | * parameter comment/constraints |
| 143 | * ------------------------------------------- |
| 144 | * numParityRegions* all regions (except possibly last) |
| 145 | * of equal size |
| 146 | * totalInCoreLogCapacity* amount of memory in bytes available |
| 147 | * for in-core logs (default 1 MB) |
| 148 | * numSectorsPerLog# capacity of an in-core log in sectors |
| 149 | * (1 * disk track) |
| 150 | * numParityLogs total number of in-core logs, |
| 151 | * should be at least numParityRegions |
| 152 | * regionLogCapacity size of a region log (except possibly |
| 153 | * last one) in sectors |
| 154 | * totalLogCapacity total amount of log space in sectors |
| 155 | * |
| 156 | * where '*' denotes a user settable parameter. |
| 157 | * Note that logs are fixed to be the size of a disk track, |
| 158 | * value #defined in rf_paritylog.h |
| 159 | * |
| 160 | */ |
| 161 | |
| 162 | totalLogCapacity = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit * layoutPtr->numParityLogCol; |
| 163 | raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions; |
| 164 | if (rf_parityLogDebug) |
| 165 | printf("bytes per sector %d\n" , raidPtr->bytesPerSector); |
| 166 | |
| 167 | /* reduce fragmentation within a disk region by adjusting the number |
| 168 | * of regions in an attempt to allow an integral number of logs to fit |
| 169 | * into a disk region */ |
| 170 | fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog; |
| 171 | if (fragmentation > 0) |
| 172 | for (i = 1; i < (raidPtr->numSectorsPerLog / 2); i++) { |
| 173 | if (((totalLogCapacity / (rf_numParityRegions + i)) % |
| 174 | raidPtr->numSectorsPerLog) < fragmentation) { |
| 175 | rf_numParityRegions++; |
| 176 | raidPtr->regionLogCapacity = totalLogCapacity / |
| 177 | rf_numParityRegions; |
| 178 | fragmentation = raidPtr->regionLogCapacity % |
| 179 | raidPtr->numSectorsPerLog; |
| 180 | } |
| 181 | if (((totalLogCapacity / (rf_numParityRegions - i)) % |
| 182 | raidPtr->numSectorsPerLog) < fragmentation) { |
| 183 | rf_numParityRegions--; |
| 184 | raidPtr->regionLogCapacity = totalLogCapacity / |
| 185 | rf_numParityRegions; |
| 186 | fragmentation = raidPtr->regionLogCapacity % |
| 187 | raidPtr->numSectorsPerLog; |
| 188 | } |
| 189 | } |
| 190 | /* ensure integral number of regions per log */ |
| 191 | raidPtr->regionLogCapacity = (raidPtr->regionLogCapacity / |
| 192 | raidPtr->numSectorsPerLog) * |
| 193 | raidPtr->numSectorsPerLog; |
| 194 | |
| 195 | raidPtr->numParityLogs = rf_totalInCoreLogCapacity / |
| 196 | (raidPtr->bytesPerSector * raidPtr->numSectorsPerLog); |
| 197 | /* to avoid deadlock, must ensure that enough logs exist for each |
| 198 | * region to have one simultaneously */ |
| 199 | if (raidPtr->numParityLogs < rf_numParityRegions) |
| 200 | raidPtr->numParityLogs = rf_numParityRegions; |
| 201 | |
| 202 | /* create region information structs */ |
| 203 | printf("Allocating %d bytes for in-core parity region info\n" , |
| 204 | (int) (rf_numParityRegions * sizeof(RF_RegionInfo_t))); |
| 205 | RF_Malloc(raidPtr->regionInfo, |
| 206 | (rf_numParityRegions * sizeof(RF_RegionInfo_t)), |
| 207 | (RF_RegionInfo_t *)); |
| 208 | if (raidPtr->regionInfo == NULL) |
| 209 | return (ENOMEM); |
| 210 | |
| 211 | /* last region may not be full capacity */ |
| 212 | lastRegionCapacity = raidPtr->regionLogCapacity; |
| 213 | while ((rf_numParityRegions - 1) * raidPtr->regionLogCapacity + |
| 214 | lastRegionCapacity > totalLogCapacity) |
| 215 | lastRegionCapacity = lastRegionCapacity - |
| 216 | raidPtr->numSectorsPerLog; |
| 217 | |
| 218 | raidPtr->regionParityRange = raidPtr->sectorsPerDisk / |
| 219 | rf_numParityRegions; |
| 220 | maxRegionParityRange = raidPtr->regionParityRange; |
| 221 | |
| 222 | /* i can't remember why this line is in the code -wvcii 6/30/95 */ |
| 223 | /* if (raidPtr->sectorsPerDisk % rf_numParityRegions > 0) |
| 224 | regionParityRange++; */ |
| 225 | |
| 226 | /* build pool of unused parity logs */ |
| 227 | printf("Allocating %d bytes for %d parity logs\n" , |
| 228 | raidPtr->numParityLogs * raidPtr->numSectorsPerLog * |
| 229 | raidPtr->bytesPerSector, |
| 230 | raidPtr->numParityLogs); |
| 231 | RF_Malloc(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * |
| 232 | raidPtr->numSectorsPerLog * raidPtr->bytesPerSector, |
| 233 | (void *)); |
| 234 | if (raidPtr->parityLogBufferHeap == NULL) |
| 235 | return (ENOMEM); |
| 236 | lHeapPtr = raidPtr->parityLogBufferHeap; |
| 237 | rf_init_mutex2(raidPtr->parityLogPool.mutex, IPL_VM); |
| 238 | for (i = 0; i < raidPtr->numParityLogs; i++) { |
| 239 | if (i == 0) { |
| 240 | RF_Malloc(raidPtr->parityLogPool.parityLogs, |
| 241 | sizeof(RF_ParityLog_t), (RF_ParityLog_t *)); |
| 242 | if (raidPtr->parityLogPool.parityLogs == NULL) { |
| 243 | RF_Free(raidPtr->parityLogBufferHeap, |
| 244 | raidPtr->numParityLogs * |
| 245 | raidPtr->numSectorsPerLog * |
| 246 | raidPtr->bytesPerSector); |
| 247 | return (ENOMEM); |
| 248 | } |
| 249 | l = raidPtr->parityLogPool.parityLogs; |
| 250 | } else { |
| 251 | RF_Malloc(l->next, sizeof(RF_ParityLog_t), |
| 252 | (RF_ParityLog_t *)); |
| 253 | if (l->next == NULL) { |
| 254 | RF_Free(raidPtr->parityLogBufferHeap, |
| 255 | raidPtr->numParityLogs * |
| 256 | raidPtr->numSectorsPerLog * |
| 257 | raidPtr->bytesPerSector); |
| 258 | for (l = raidPtr->parityLogPool.parityLogs; |
| 259 | l; |
| 260 | l = next) { |
| 261 | next = l->next; |
| 262 | if (l->records) |
| 263 | RF_Free(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t))); |
| 264 | RF_Free(l, sizeof(RF_ParityLog_t)); |
| 265 | } |
| 266 | return (ENOMEM); |
| 267 | } |
| 268 | l = l->next; |
| 269 | } |
| 270 | l->bufPtr = lHeapPtr; |
| 271 | lHeapPtr = (char *)lHeapPtr + raidPtr->numSectorsPerLog * |
| 272 | raidPtr->bytesPerSector; |
| 273 | RF_Malloc(l->records, (raidPtr->numSectorsPerLog * |
| 274 | sizeof(RF_ParityLogRecord_t)), |
| 275 | (RF_ParityLogRecord_t *)); |
| 276 | if (l->records == NULL) { |
| 277 | RF_Free(raidPtr->parityLogBufferHeap, |
| 278 | raidPtr->numParityLogs * |
| 279 | raidPtr->numSectorsPerLog * |
| 280 | raidPtr->bytesPerSector); |
| 281 | for (l = raidPtr->parityLogPool.parityLogs; |
| 282 | l; |
| 283 | l = next) { |
| 284 | next = l->next; |
| 285 | if (l->records) |
| 286 | RF_Free(l->records, |
| 287 | (raidPtr->numSectorsPerLog * |
| 288 | sizeof(RF_ParityLogRecord_t))); |
| 289 | RF_Free(l, sizeof(RF_ParityLog_t)); |
| 290 | } |
| 291 | return (ENOMEM); |
| 292 | } |
| 293 | } |
| 294 | rf_ShutdownCreate(listp, rf_ShutdownParityLoggingPool, raidPtr); |
| 295 | /* build pool of region buffers */ |
| 296 | rf_init_mutex2(raidPtr->regionBufferPool.mutex, IPL_VM); |
| 297 | rf_init_cond2(raidPtr->regionBufferPool.cond, "rfrbpl" ); |
| 298 | raidPtr->regionBufferPool.bufferSize = raidPtr->regionLogCapacity * |
| 299 | raidPtr->bytesPerSector; |
| 300 | printf("regionBufferPool.bufferSize %d\n" , |
| 301 | raidPtr->regionBufferPool.bufferSize); |
| 302 | |
| 303 | /* for now, only one region at a time may be reintegrated */ |
| 304 | raidPtr->regionBufferPool.totalBuffers = 1; |
| 305 | |
| 306 | raidPtr->regionBufferPool.availableBuffers = |
| 307 | raidPtr->regionBufferPool.totalBuffers; |
| 308 | raidPtr->regionBufferPool.availBuffersIndex = 0; |
| 309 | raidPtr->regionBufferPool.emptyBuffersIndex = 0; |
| 310 | printf("Allocating %d bytes for regionBufferPool\n" , |
| 311 | (int) (raidPtr->regionBufferPool.totalBuffers * |
| 312 | sizeof(void *))); |
| 313 | RF_Malloc(raidPtr->regionBufferPool.buffers, |
| 314 | raidPtr->regionBufferPool.totalBuffers * sizeof(void *), |
| 315 | (void **)); |
| 316 | if (raidPtr->regionBufferPool.buffers == NULL) { |
| 317 | return (ENOMEM); |
| 318 | } |
| 319 | for (i = 0; i < raidPtr->regionBufferPool.totalBuffers; i++) { |
| 320 | printf("Allocating %d bytes for regionBufferPool#%d\n" , |
| 321 | (int) (raidPtr->regionBufferPool.bufferSize * |
| 322 | sizeof(char)), i); |
| 323 | RF_Malloc(raidPtr->regionBufferPool.buffers[i], |
| 324 | raidPtr->regionBufferPool.bufferSize * sizeof(char), |
| 325 | (void *)); |
| 326 | if (raidPtr->regionBufferPool.buffers[i] == NULL) { |
| 327 | for (j = 0; j < i; j++) { |
| 328 | RF_Free(raidPtr->regionBufferPool.buffers[i], |
| 329 | raidPtr->regionBufferPool.bufferSize * |
| 330 | sizeof(char)); |
| 331 | } |
| 332 | RF_Free(raidPtr->regionBufferPool.buffers, |
| 333 | raidPtr->regionBufferPool.totalBuffers * |
| 334 | sizeof(void *)); |
| 335 | return (ENOMEM); |
| 336 | } |
| 337 | printf("raidPtr->regionBufferPool.buffers[%d] = %lx\n" , i, |
| 338 | (long) raidPtr->regionBufferPool.buffers[i]); |
| 339 | } |
| 340 | rf_ShutdownCreate(listp, |
| 341 | rf_ShutdownParityLoggingRegionBufferPool, |
| 342 | raidPtr); |
| 343 | /* build pool of parity buffers */ |
| 344 | parityBufferCapacity = maxRegionParityRange; |
| 345 | rf_init_mutex2(raidPtr->parityBufferPool.mutex, IPL_VM); |
| 346 | rf_init_cond2(raidPtr->parityBufferPool.cond, "rfpbpl" ); |
| 347 | raidPtr->parityBufferPool.bufferSize = parityBufferCapacity * |
| 348 | raidPtr->bytesPerSector; |
| 349 | printf("parityBufferPool.bufferSize %d\n" , |
| 350 | raidPtr->parityBufferPool.bufferSize); |
| 351 | |
| 352 | /* for now, only one region at a time may be reintegrated */ |
| 353 | raidPtr->parityBufferPool.totalBuffers = 1; |
| 354 | |
| 355 | raidPtr->parityBufferPool.availableBuffers = |
| 356 | raidPtr->parityBufferPool.totalBuffers; |
| 357 | raidPtr->parityBufferPool.availBuffersIndex = 0; |
| 358 | raidPtr->parityBufferPool.emptyBuffersIndex = 0; |
| 359 | printf("Allocating %d bytes for parityBufferPool of %d units\n" , |
| 360 | (int) (raidPtr->parityBufferPool.totalBuffers * |
| 361 | sizeof(void *)), |
| 362 | raidPtr->parityBufferPool.totalBuffers ); |
| 363 | RF_Malloc(raidPtr->parityBufferPool.buffers, |
| 364 | raidPtr->parityBufferPool.totalBuffers * sizeof(void *), |
| 365 | (void **)); |
| 366 | if (raidPtr->parityBufferPool.buffers == NULL) { |
| 367 | return (ENOMEM); |
| 368 | } |
| 369 | for (i = 0; i < raidPtr->parityBufferPool.totalBuffers; i++) { |
| 370 | printf("Allocating %d bytes for parityBufferPool#%d\n" , |
| 371 | (int) (raidPtr->parityBufferPool.bufferSize * |
| 372 | sizeof(char)),i); |
| 373 | RF_Malloc(raidPtr->parityBufferPool.buffers[i], |
| 374 | raidPtr->parityBufferPool.bufferSize * sizeof(char), |
| 375 | (void *)); |
| 376 | if (raidPtr->parityBufferPool.buffers == NULL) { |
| 377 | for (j = 0; j < i; j++) { |
| 378 | RF_Free(raidPtr->parityBufferPool.buffers[i], |
| 379 | raidPtr->regionBufferPool.bufferSize * |
| 380 | sizeof(char)); |
| 381 | } |
| 382 | RF_Free(raidPtr->parityBufferPool.buffers, |
| 383 | raidPtr->regionBufferPool.totalBuffers * |
| 384 | sizeof(void *)); |
| 385 | return (ENOMEM); |
| 386 | } |
| 387 | printf("parityBufferPool.buffers[%d] = %lx\n" , i, |
| 388 | (long) raidPtr->parityBufferPool.buffers[i]); |
| 389 | } |
| 390 | rf_ShutdownCreate(listp, |
| 391 | rf_ShutdownParityLoggingParityBufferPool, |
| 392 | raidPtr); |
| 393 | /* initialize parityLogDiskQueue */ |
| 394 | rf_init_mutex2(raidPtr->parityLogDiskQueue.mutex, IPL_VM); |
| 395 | rf_init_cond2(raidPtr->parityLogDiskQueue.cond, "rfpldq" ); |
| 396 | raidPtr->parityLogDiskQueue.flushQueue = NULL; |
| 397 | raidPtr->parityLogDiskQueue.reintQueue = NULL; |
| 398 | raidPtr->parityLogDiskQueue.bufHead = NULL; |
| 399 | raidPtr->parityLogDiskQueue.bufTail = NULL; |
| 400 | raidPtr->parityLogDiskQueue.reintHead = NULL; |
| 401 | raidPtr->parityLogDiskQueue.reintTail = NULL; |
| 402 | raidPtr->parityLogDiskQueue.logBlockHead = NULL; |
| 403 | raidPtr->parityLogDiskQueue.logBlockTail = NULL; |
| 404 | raidPtr->parityLogDiskQueue.reintBlockHead = NULL; |
| 405 | raidPtr->parityLogDiskQueue.reintBlockTail = NULL; |
| 406 | raidPtr->parityLogDiskQueue.freeDataList = NULL; |
| 407 | raidPtr->parityLogDiskQueue.freeCommonList = NULL; |
| 408 | |
| 409 | rf_ShutdownCreate(listp, |
| 410 | rf_ShutdownParityLoggingDiskQueue, |
| 411 | raidPtr); |
| 412 | for (i = 0; i < rf_numParityRegions; i++) { |
| 413 | rf_init_mutex2(raidPtr->regionInfo[i].mutex, IPL_VM); |
| 414 | rf_init_mutex2(raidPtr->regionInfo[i].reintMutex, IPL_VM); |
| 415 | raidPtr->regionInfo[i].reintInProgress = RF_FALSE; |
| 416 | raidPtr->regionInfo[i].regionStartAddr = |
| 417 | raidPtr->regionLogCapacity * i; |
| 418 | raidPtr->regionInfo[i].parityStartAddr = |
| 419 | raidPtr->regionParityRange * i; |
| 420 | if (i < rf_numParityRegions - 1) { |
| 421 | raidPtr->regionInfo[i].capacity = |
| 422 | raidPtr->regionLogCapacity; |
| 423 | raidPtr->regionInfo[i].numSectorsParity = |
| 424 | raidPtr->regionParityRange; |
| 425 | } else { |
| 426 | raidPtr->regionInfo[i].capacity = |
| 427 | lastRegionCapacity; |
| 428 | raidPtr->regionInfo[i].numSectorsParity = |
| 429 | raidPtr->sectorsPerDisk - |
| 430 | raidPtr->regionParityRange * i; |
| 431 | if (raidPtr->regionInfo[i].numSectorsParity > |
| 432 | maxRegionParityRange) |
| 433 | maxRegionParityRange = |
| 434 | raidPtr->regionInfo[i].numSectorsParity; |
| 435 | } |
| 436 | raidPtr->regionInfo[i].diskCount = 0; |
| 437 | RF_ASSERT(raidPtr->regionInfo[i].capacity + |
| 438 | raidPtr->regionInfo[i].regionStartAddr <= |
| 439 | totalLogCapacity); |
| 440 | RF_ASSERT(raidPtr->regionInfo[i].parityStartAddr + |
| 441 | raidPtr->regionInfo[i].numSectorsParity <= |
| 442 | raidPtr->sectorsPerDisk); |
| 443 | printf("Allocating %d bytes for region %d\n" , |
| 444 | (int) (raidPtr->regionInfo[i].capacity * |
| 445 | sizeof(RF_DiskMap_t)), i); |
| 446 | RF_Malloc(raidPtr->regionInfo[i].diskMap, |
| 447 | (raidPtr->regionInfo[i].capacity * |
| 448 | sizeof(RF_DiskMap_t)), |
| 449 | (RF_DiskMap_t *)); |
| 450 | if (raidPtr->regionInfo[i].diskMap == NULL) { |
| 451 | for (j = 0; j < i; j++) |
| 452 | FreeRegionInfo(raidPtr, j); |
| 453 | RF_Free(raidPtr->regionInfo, |
| 454 | (rf_numParityRegions * |
| 455 | sizeof(RF_RegionInfo_t))); |
| 456 | return (ENOMEM); |
| 457 | } |
| 458 | raidPtr->regionInfo[i].loggingEnabled = RF_FALSE; |
| 459 | raidPtr->regionInfo[i].coreLog = NULL; |
| 460 | } |
| 461 | rf_ShutdownCreate(listp, |
| 462 | rf_ShutdownParityLoggingRegionInfo, |
| 463 | raidPtr); |
| 464 | RF_ASSERT(raidPtr->parityLogDiskQueue.threadState == 0); |
| 465 | raidPtr->parityLogDiskQueue.threadState = RF_PLOG_CREATED; |
| 466 | rc = RF_CREATE_THREAD(raidPtr->pLogDiskThreadHandle, |
| 467 | rf_ParityLoggingDiskManager, raidPtr,"rf_log" ); |
| 468 | if (rc) { |
| 469 | raidPtr->parityLogDiskQueue.threadState = 0; |
| 470 | RF_ERRORMSG3("Unable to create parity logging disk thread file %s line %d rc=%d\n" , |
| 471 | __FILE__, __LINE__, rc); |
| 472 | return (ENOMEM); |
| 473 | } |
| 474 | /* wait for thread to start */ |
| 475 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 476 | while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_RUNNING)) { |
| 477 | rf_wait_cond2(raidPtr->parityLogDiskQueue.cond, |
| 478 | raidPtr->parityLogDiskQueue.mutex); |
| 479 | } |
| 480 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 481 | |
| 482 | rf_ShutdownCreate(listp, rf_ShutdownParityLogging, raidPtr); |
| 483 | if (rf_parityLogDebug) { |
| 484 | printf(" size of disk log in sectors: %d\n" , |
| 485 | (int) totalLogCapacity); |
| 486 | printf(" total number of parity regions is %d\n" , (int) rf_numParityRegions); |
| 487 | printf(" nominal sectors of log per parity region is %d\n" , (int) raidPtr->regionLogCapacity); |
| 488 | printf(" nominal region fragmentation is %d sectors\n" , (int) fragmentation); |
| 489 | printf(" total number of parity logs is %d\n" , raidPtr->numParityLogs); |
| 490 | printf(" parity log size is %d sectors\n" , raidPtr->numSectorsPerLog); |
| 491 | printf(" total in-core log space is %d bytes\n" , (int) rf_totalInCoreLogCapacity); |
| 492 | } |
| 493 | rf_EnableParityLogging(raidPtr); |
| 494 | |
| 495 | return (0); |
| 496 | } |
| 497 | |
| 498 | static void |
| 499 | FreeRegionInfo( |
| 500 | RF_Raid_t * raidPtr, |
| 501 | RF_RegionId_t regionID) |
| 502 | { |
| 503 | RF_Free(raidPtr->regionInfo[regionID].diskMap, |
| 504 | (raidPtr->regionInfo[regionID].capacity * |
| 505 | sizeof(RF_DiskMap_t))); |
| 506 | if (!rf_forceParityLogReint && raidPtr->regionInfo[regionID].coreLog) { |
| 507 | rf_ReleaseParityLogs(raidPtr, |
| 508 | raidPtr->regionInfo[regionID].coreLog); |
| 509 | raidPtr->regionInfo[regionID].coreLog = NULL; |
| 510 | } else { |
| 511 | RF_ASSERT(raidPtr->regionInfo[regionID].coreLog == NULL); |
| 512 | RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == 0); |
| 513 | } |
| 514 | rf_destroy_mutex2(raidPtr->regionInfo[regionID].reintMutex); |
| 515 | rf_destroy_mutex2(raidPtr->regionInfo[regionID].mutex); |
| 516 | } |
| 517 | |
| 518 | |
| 519 | static void |
| 520 | FreeParityLogQueue(RF_Raid_t * raidPtr) |
| 521 | { |
| 522 | RF_ParityLog_t *l1, *l2; |
| 523 | |
| 524 | l1 = raidPtr->parityLogPool.parityLogs; |
| 525 | while (l1) { |
| 526 | l2 = l1; |
| 527 | l1 = l2->next; |
| 528 | RF_Free(l2->records, (raidPtr->numSectorsPerLog * |
| 529 | sizeof(RF_ParityLogRecord_t))); |
| 530 | RF_Free(l2, sizeof(RF_ParityLog_t)); |
| 531 | } |
| 532 | rf_destroy_mutex2(raidPtr->parityLogPool.mutex); |
| 533 | } |
| 534 | |
| 535 | |
| 536 | static void |
| 537 | FreeRegionBufferQueue(RF_RegionBufferQueue_t * queue) |
| 538 | { |
| 539 | int i; |
| 540 | |
| 541 | if (queue->availableBuffers != queue->totalBuffers) { |
| 542 | printf("Attempt to free region queue which is still in use!\n" ); |
| 543 | RF_ASSERT(0); |
| 544 | } |
| 545 | for (i = 0; i < queue->totalBuffers; i++) |
| 546 | RF_Free(queue->buffers[i], queue->bufferSize); |
| 547 | RF_Free(queue->buffers, queue->totalBuffers * sizeof(void *)); |
| 548 | rf_destroy_mutex2(queue->mutex); |
| 549 | rf_destroy_cond2(queue->cond); |
| 550 | } |
| 551 | |
| 552 | static void |
| 553 | rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg) |
| 554 | { |
| 555 | RF_Raid_t *raidPtr; |
| 556 | RF_RegionId_t i; |
| 557 | |
| 558 | raidPtr = (RF_Raid_t *) arg; |
| 559 | if (rf_parityLogDebug) { |
| 560 | printf("raid%d: ShutdownParityLoggingRegionInfo\n" , |
| 561 | raidPtr->raidid); |
| 562 | } |
| 563 | /* free region information structs */ |
| 564 | for (i = 0; i < rf_numParityRegions; i++) |
| 565 | FreeRegionInfo(raidPtr, i); |
| 566 | RF_Free(raidPtr->regionInfo, (rf_numParityRegions * |
| 567 | sizeof(raidPtr->regionInfo))); |
| 568 | raidPtr->regionInfo = NULL; |
| 569 | } |
| 570 | |
| 571 | static void |
| 572 | rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg) |
| 573 | { |
| 574 | RF_Raid_t *raidPtr; |
| 575 | |
| 576 | raidPtr = (RF_Raid_t *) arg; |
| 577 | if (rf_parityLogDebug) { |
| 578 | printf("raid%d: ShutdownParityLoggingPool\n" , raidPtr->raidid); |
| 579 | } |
| 580 | /* free contents of parityLogPool */ |
| 581 | FreeParityLogQueue(raidPtr); |
| 582 | RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * |
| 583 | raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); |
| 584 | } |
| 585 | |
| 586 | static void |
| 587 | rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg) |
| 588 | { |
| 589 | RF_Raid_t *raidPtr; |
| 590 | |
| 591 | raidPtr = (RF_Raid_t *) arg; |
| 592 | if (rf_parityLogDebug) { |
| 593 | printf("raid%d: ShutdownParityLoggingRegionBufferPool\n" , |
| 594 | raidPtr->raidid); |
| 595 | } |
| 596 | FreeRegionBufferQueue(&raidPtr->regionBufferPool); |
| 597 | } |
| 598 | |
| 599 | static void |
| 600 | rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg) |
| 601 | { |
| 602 | RF_Raid_t *raidPtr; |
| 603 | |
| 604 | raidPtr = (RF_Raid_t *) arg; |
| 605 | if (rf_parityLogDebug) { |
| 606 | printf("raid%d: ShutdownParityLoggingParityBufferPool\n" , |
| 607 | raidPtr->raidid); |
| 608 | } |
| 609 | FreeRegionBufferQueue(&raidPtr->parityBufferPool); |
| 610 | } |
| 611 | |
| 612 | static void |
| 613 | rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg) |
| 614 | { |
| 615 | RF_ParityLogData_t *d; |
| 616 | RF_CommonLogData_t *c; |
| 617 | RF_Raid_t *raidPtr; |
| 618 | |
| 619 | raidPtr = (RF_Raid_t *) arg; |
| 620 | if (rf_parityLogDebug) { |
| 621 | printf("raid%d: ShutdownParityLoggingDiskQueue\n" , |
| 622 | raidPtr->raidid); |
| 623 | } |
| 624 | /* free disk manager stuff */ |
| 625 | RF_ASSERT(raidPtr->parityLogDiskQueue.bufHead == NULL); |
| 626 | RF_ASSERT(raidPtr->parityLogDiskQueue.bufTail == NULL); |
| 627 | RF_ASSERT(raidPtr->parityLogDiskQueue.reintHead == NULL); |
| 628 | RF_ASSERT(raidPtr->parityLogDiskQueue.reintTail == NULL); |
| 629 | while (raidPtr->parityLogDiskQueue.freeDataList) { |
| 630 | d = raidPtr->parityLogDiskQueue.freeDataList; |
| 631 | raidPtr->parityLogDiskQueue.freeDataList = |
| 632 | raidPtr->parityLogDiskQueue.freeDataList->next; |
| 633 | RF_Free(d, sizeof(RF_ParityLogData_t)); |
| 634 | } |
| 635 | while (raidPtr->parityLogDiskQueue.freeCommonList) { |
| 636 | c = raidPtr->parityLogDiskQueue.freeCommonList; |
| 637 | raidPtr->parityLogDiskQueue.freeCommonList = c->next; |
| 638 | /* init is in rf_paritylog.c */ |
| 639 | rf_destroy_mutex2(c->mutex); |
| 640 | RF_Free(c, sizeof(RF_CommonLogData_t)); |
| 641 | } |
| 642 | |
| 643 | rf_destroy_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 644 | rf_destroy_cond2(raidPtr->parityLogDiskQueue.cond); |
| 645 | } |
| 646 | |
| 647 | static void |
| 648 | rf_ShutdownParityLogging(RF_ThreadArg_t arg) |
| 649 | { |
| 650 | RF_Raid_t *raidPtr; |
| 651 | |
| 652 | raidPtr = (RF_Raid_t *) arg; |
| 653 | if (rf_parityLogDebug) { |
| 654 | printf("raid%d: ShutdownParityLogging\n" , raidPtr->raidid); |
| 655 | } |
| 656 | /* shutdown disk thread */ |
| 657 | /* This has the desirable side-effect of forcing all regions to be |
| 658 | * reintegrated. This is necessary since all parity log maps are |
| 659 | * currently held in volatile memory. */ |
| 660 | |
| 661 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 662 | raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_TERMINATE; |
| 663 | rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); |
| 664 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 665 | /* |
| 666 | * pLogDiskThread will now terminate when queues are cleared |
| 667 | * now wait for it to be done |
| 668 | */ |
| 669 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 670 | while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_SHUTDOWN)) { |
| 671 | rf_wait_cond2(raidPtr->parityLogDiskQueue.cond, |
| 672 | raidPtr->parityLogDiskQueue.mutex); |
| 673 | } |
| 674 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
| 675 | if (rf_parityLogDebug) { |
| 676 | printf("raid%d: ShutdownParityLogging done (thread completed)\n" , raidPtr->raidid); |
| 677 | } |
| 678 | } |
| 679 | |
| 680 | int |
| 681 | rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t * raidPtr) |
| 682 | { |
| 683 | return (20); |
| 684 | } |
| 685 | |
| 686 | RF_HeadSepLimit_t |
| 687 | rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t * raidPtr) |
| 688 | { |
| 689 | return (10); |
| 690 | } |
| 691 | /* return the region ID for a given RAID address */ |
| 692 | RF_RegionId_t |
| 693 | rf_MapRegionIDParityLogging( |
| 694 | RF_Raid_t * raidPtr, |
| 695 | RF_SectorNum_t address) |
| 696 | { |
| 697 | RF_RegionId_t regionID; |
| 698 | |
| 699 | /* regionID = address / (raidPtr->regionParityRange * raidPtr->Layout.numDataCol); */ |
| 700 | regionID = address / raidPtr->regionParityRange; |
| 701 | if (regionID == rf_numParityRegions) { |
| 702 | /* last region may be larger than other regions */ |
| 703 | regionID--; |
| 704 | } |
| 705 | RF_ASSERT(address >= raidPtr->regionInfo[regionID].parityStartAddr); |
| 706 | RF_ASSERT(address < raidPtr->regionInfo[regionID].parityStartAddr + |
| 707 | raidPtr->regionInfo[regionID].numSectorsParity); |
| 708 | RF_ASSERT(regionID < rf_numParityRegions); |
| 709 | return (regionID); |
| 710 | } |
| 711 | |
| 712 | |
| 713 | /* given a logical RAID sector, determine physical disk address of data */ |
| 714 | void |
| 715 | rf_MapSectorParityLogging( |
| 716 | RF_Raid_t * raidPtr, |
| 717 | RF_RaidAddr_t raidSector, |
| 718 | RF_RowCol_t * col, |
| 719 | RF_SectorNum_t * diskSector, |
| 720 | int remap) |
| 721 | { |
| 722 | RF_StripeNum_t SUID = raidSector / |
| 723 | raidPtr->Layout.sectorsPerStripeUnit; |
| 724 | /* *col = (SUID % (raidPtr->numCol - |
| 725 | * raidPtr->Layout.numParityLogCol)); */ |
| 726 | *col = SUID % raidPtr->Layout.numDataCol; |
| 727 | *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * |
| 728 | raidPtr->Layout.sectorsPerStripeUnit + |
| 729 | (raidSector % raidPtr->Layout.sectorsPerStripeUnit); |
| 730 | } |
| 731 | |
| 732 | |
| 733 | /* given a logical RAID sector, determine physical disk address of parity */ |
| 734 | void |
| 735 | rf_MapParityParityLogging( |
| 736 | RF_Raid_t * raidPtr, |
| 737 | RF_RaidAddr_t raidSector, |
| 738 | RF_RowCol_t * col, |
| 739 | RF_SectorNum_t * diskSector, |
| 740 | int remap) |
| 741 | { |
| 742 | RF_StripeNum_t SUID = raidSector / |
| 743 | raidPtr->Layout.sectorsPerStripeUnit; |
| 744 | |
| 745 | /* *col = |
| 746 | * raidPtr->Layout.numDataCol-(SUID/raidPtr->Layout.numDataCol)%(raidPt |
| 747 | * r->numCol - raidPtr->Layout.numParityLogCol); */ |
| 748 | *col = raidPtr->Layout.numDataCol; |
| 749 | *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * |
| 750 | raidPtr->Layout.sectorsPerStripeUnit + |
| 751 | (raidSector % raidPtr->Layout.sectorsPerStripeUnit); |
| 752 | } |
| 753 | |
| 754 | |
| 755 | /* given a regionID and sector offset, determine the physical disk address of the parity log */ |
| 756 | void |
| 757 | rf_MapLogParityLogging( |
| 758 | RF_Raid_t * raidPtr, |
| 759 | RF_RegionId_t regionID, |
| 760 | RF_SectorNum_t regionOffset, |
| 761 | RF_RowCol_t * col, |
| 762 | RF_SectorNum_t * startSector) |
| 763 | { |
| 764 | *col = raidPtr->numCol - 1; |
| 765 | *startSector = raidPtr->regionInfo[regionID].regionStartAddr + regionOffset; |
| 766 | } |
| 767 | |
| 768 | |
| 769 | /* given a regionID, determine the physical disk address of the logged |
| 770 | parity for that region */ |
| 771 | void |
| 772 | rf_MapRegionParity( |
| 773 | RF_Raid_t * raidPtr, |
| 774 | RF_RegionId_t regionID, |
| 775 | RF_RowCol_t * col, |
| 776 | RF_SectorNum_t * startSector, |
| 777 | RF_SectorCount_t * numSector) |
| 778 | { |
| 779 | *col = raidPtr->numCol - 2; |
| 780 | *startSector = raidPtr->regionInfo[regionID].parityStartAddr; |
| 781 | *numSector = raidPtr->regionInfo[regionID].numSectorsParity; |
| 782 | } |
| 783 | |
| 784 | |
| 785 | /* given a logical RAID address, determine the participating disks in |
| 786 | the stripe */ |
| 787 | void |
| 788 | rf_IdentifyStripeParityLogging( |
| 789 | RF_Raid_t * raidPtr, |
| 790 | RF_RaidAddr_t addr, |
| 791 | RF_RowCol_t ** diskids) |
| 792 | { |
| 793 | RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, |
| 794 | addr); |
| 795 | RF_ParityLoggingConfigInfo_t *info = (RF_ParityLoggingConfigInfo_t *) |
| 796 | raidPtr->Layout.layoutSpecificInfo; |
| 797 | *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; |
| 798 | } |
| 799 | |
| 800 | |
| 801 | void |
| 802 | rf_MapSIDToPSIDParityLogging( |
| 803 | RF_RaidLayout_t * layoutPtr, |
| 804 | RF_StripeNum_t stripeID, |
| 805 | RF_StripeNum_t * psID, |
| 806 | RF_ReconUnitNum_t * which_ru) |
| 807 | { |
| 808 | *which_ru = 0; |
| 809 | *psID = stripeID; |
| 810 | } |
| 811 | |
| 812 | |
| 813 | /* select an algorithm for performing an access. Returns two pointers, |
| 814 | * one to a function that will return information about the DAG, and |
| 815 | * another to a function that will create the dag. |
| 816 | */ |
| 817 | void |
| 818 | rf_ParityLoggingDagSelect( |
| 819 | RF_Raid_t * raidPtr, |
| 820 | RF_IoType_t type, |
| 821 | RF_AccessStripeMap_t * asmp, |
| 822 | RF_VoidFuncPtr * createFunc) |
| 823 | { |
| 824 | RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); |
| 825 | RF_PhysDiskAddr_t *failedPDA = NULL; |
| 826 | RF_RowCol_t fcol; |
| 827 | RF_RowStatus_t rstat; |
| 828 | int prior_recon; |
| 829 | |
| 830 | RF_ASSERT(RF_IO_IS_R_OR_W(type)); |
| 831 | |
| 832 | if (asmp->numDataFailed + asmp->numParityFailed > 1) { |
| 833 | RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n" ); |
| 834 | *createFunc = NULL; |
| 835 | return; |
| 836 | } else |
| 837 | if (asmp->numDataFailed + asmp->numParityFailed == 1) { |
| 838 | |
| 839 | /* if under recon & already reconstructed, redirect |
| 840 | * the access to the spare drive and eliminate the |
| 841 | * failure indication */ |
| 842 | failedPDA = asmp->failedPDAs[0]; |
| 843 | fcol = failedPDA->col; |
| 844 | rstat = raidPtr->status; |
| 845 | prior_recon = (rstat == rf_rs_reconfigured) || ( |
| 846 | (rstat == rf_rs_reconstructing) ? |
| 847 | rf_CheckRUReconstructed(raidPtr->reconControl->reconMap, failedPDA->startSector) : 0 |
| 848 | ); |
| 849 | if (prior_recon) { |
| 850 | RF_RowCol_t oc = failedPDA->col; |
| 851 | RF_SectorNum_t oo = failedPDA->startSector; |
| 852 | if (layoutPtr->map->flags & |
| 853 | RF_DISTRIBUTE_SPARE) { |
| 854 | /* redirect to dist spare space */ |
| 855 | |
| 856 | if (failedPDA == asmp->parityInfo) { |
| 857 | |
| 858 | /* parity has failed */ |
| 859 | (layoutPtr->map->MapParity) (raidPtr, failedPDA->raidAddress, |
| 860 | &failedPDA->col, &failedPDA->startSector, RF_REMAP); |
| 861 | |
| 862 | if (asmp->parityInfo->next) { /* redir 2nd component, |
| 863 | * if any */ |
| 864 | RF_PhysDiskAddr_t *p = asmp->parityInfo->next; |
| 865 | RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit; |
| 866 | p->col = failedPDA->col; |
| 867 | p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) + |
| 868 | SUoffs; /* cheating: |
| 869 | * startSector is not |
| 870 | * really a RAID address */ |
| 871 | } |
| 872 | } else |
| 873 | if (asmp->parityInfo->next && failedPDA == asmp->parityInfo->next) { |
| 874 | RF_ASSERT(0); /* should not ever |
| 875 | * happen */ |
| 876 | } else { |
| 877 | |
| 878 | /* data has failed */ |
| 879 | (layoutPtr->map->MapSector) (raidPtr, failedPDA->raidAddress, |
| 880 | &failedPDA->col, &failedPDA->startSector, RF_REMAP); |
| 881 | |
| 882 | } |
| 883 | |
| 884 | } else { |
| 885 | /* redirect to dedicated spare space */ |
| 886 | |
| 887 | failedPDA->col = raidPtr->Disks[fcol].spareCol; |
| 888 | |
| 889 | /* the parity may have two distinct |
| 890 | * components, both of which may need |
| 891 | * to be redirected */ |
| 892 | if (asmp->parityInfo->next) { |
| 893 | if (failedPDA == asmp->parityInfo) { |
| 894 | failedPDA->next->col = failedPDA->col; |
| 895 | } else |
| 896 | if (failedPDA == asmp->parityInfo->next) { /* paranoid: should never occur */ |
| 897 | asmp->parityInfo->col = failedPDA->col; |
| 898 | } |
| 899 | } |
| 900 | } |
| 901 | |
| 902 | RF_ASSERT(failedPDA->col != -1); |
| 903 | |
| 904 | if (rf_dagDebug || rf_mapDebug) { |
| 905 | printf("raid%d: Redirected type '%c' c %d o %ld -> c %d o %ld\n" , |
| 906 | raidPtr->raidid, type, oc, (long) oo, failedPDA->col, (long) failedPDA->startSector); |
| 907 | } |
| 908 | asmp->numDataFailed = asmp->numParityFailed = 0; |
| 909 | } |
| 910 | } |
| 911 | if (type == RF_IO_TYPE_READ) { |
| 912 | |
| 913 | if (asmp->numDataFailed == 0) |
| 914 | *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; |
| 915 | else |
| 916 | *createFunc = (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG; |
| 917 | |
| 918 | } else { |
| 919 | |
| 920 | |
| 921 | /* if mirroring, always use large writes. If the access |
| 922 | * requires two distinct parity updates, always do a small |
| 923 | * write. If the stripe contains a failure but the access |
| 924 | * does not, do a small write. The first conditional |
| 925 | * (numStripeUnitsAccessed <= numDataCol/2) uses a |
| 926 | * less-than-or-equal rather than just a less-than because |
| 927 | * when G is 3 or 4, numDataCol/2 is 1, and I want |
| 928 | * single-stripe-unit updates to use just one disk. */ |
| 929 | if ((asmp->numDataFailed + asmp->numParityFailed) == 0) { |
| 930 | if (((asmp->numStripeUnitsAccessed <= |
| 931 | (layoutPtr->numDataCol / 2)) && |
| 932 | (layoutPtr->numDataCol != 1)) || |
| 933 | (asmp->parityInfo->next != NULL) || |
| 934 | rf_CheckStripeForFailures(raidPtr, asmp)) { |
| 935 | *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingSmallWriteDAG; |
| 936 | } else |
| 937 | *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingLargeWriteDAG; |
| 938 | } else |
| 939 | if (asmp->numParityFailed == 1) |
| 940 | *createFunc = (RF_VoidFuncPtr) rf_CreateNonRedundantWriteDAG; |
| 941 | else |
| 942 | if (asmp->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit) |
| 943 | *createFunc = NULL; |
| 944 | else |
| 945 | *createFunc = (RF_VoidFuncPtr) rf_CreateDegradedWriteDAG; |
| 946 | } |
| 947 | } |
| 948 | #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ |
| 949 | |