| 1 | /* $NetBSD: rf_raid.h,v 1.45 2014/10/18 08:33:28 snj Exp $ */ |
| 2 | /* |
| 3 | * Copyright (c) 1995 Carnegie-Mellon University. |
| 4 | * All rights reserved. |
| 5 | * |
| 6 | * Author: Mark Holland |
| 7 | * |
| 8 | * Permission to use, copy, modify and distribute this software and |
| 9 | * its documentation is hereby granted, provided that both the copyright |
| 10 | * notice and this permission notice appear in all copies of the |
| 11 | * software, derivative works or modified versions, and any portions |
| 12 | * thereof, and that both notices appear in supporting documentation. |
| 13 | * |
| 14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
| 15 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
| 16 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
| 17 | * |
| 18 | * Carnegie Mellon requests users of this software to return to |
| 19 | * |
| 20 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
| 21 | * School of Computer Science |
| 22 | * Carnegie Mellon University |
| 23 | * Pittsburgh PA 15213-3890 |
| 24 | * |
| 25 | * any improvements or extensions that they make and grant Carnegie the |
| 26 | * rights to redistribute these changes. |
| 27 | */ |
| 28 | |
| 29 | /********************************************** |
| 30 | * rf_raid.h -- main header file for RAID driver |
| 31 | **********************************************/ |
| 32 | |
| 33 | |
| 34 | #ifndef _RF__RF_RAID_H_ |
| 35 | #define _RF__RF_RAID_H_ |
| 36 | |
| 37 | #include <dev/raidframe/raidframevar.h> |
| 38 | #include "rf_archs.h" |
| 39 | #include "rf_threadstuff.h" |
| 40 | |
| 41 | #include "rf_netbsd.h" |
| 42 | |
| 43 | #include <sys/disklabel.h> |
| 44 | #include <sys/types.h> |
| 45 | #include <sys/queue.h> |
| 46 | |
| 47 | #include "rf_alloclist.h" |
| 48 | #include "rf_stripelocks.h" |
| 49 | #include "rf_layout.h" |
| 50 | #include "rf_disks.h" |
| 51 | #include "rf_debugMem.h" |
| 52 | #include "rf_diskqueue.h" |
| 53 | #include "rf_reconstruct.h" |
| 54 | #include "rf_acctrace.h" |
| 55 | #include "rf_fifo.h" |
| 56 | |
| 57 | #if RF_INCLUDE_PARITYLOGGING > 0 |
| 58 | #include "rf_paritylog.h" |
| 59 | #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ |
| 60 | |
| 61 | #define RF_COMPONENT_LABEL_VERSION_1 1 |
| 62 | #define RF_COMPONENT_LABEL_VERSION 2 |
| 63 | #define RF_RAID_DIRTY 0 |
| 64 | #define RF_RAID_CLEAN 1 |
| 65 | |
| 66 | |
| 67 | /* |
| 68 | * Each row in the array is a distinct parity group, so |
| 69 | * each has its own status, which is one of the following. |
| 70 | */ |
| 71 | typedef enum RF_RowStatus_e { |
| 72 | rf_rs_optimal, |
| 73 | rf_rs_degraded, |
| 74 | rf_rs_reconstructing, |
| 75 | rf_rs_reconfigured |
| 76 | } RF_RowStatus_t; |
| 77 | |
| 78 | struct RF_CumulativeStats_s { |
| 79 | struct timeval start; /* the time when the stats were last started */ |
| 80 | struct timeval stop; /* the time when the stats were last stopped */ |
| 81 | long sum_io_us; /* sum of all user response times (us) */ |
| 82 | long num_ios; /* total number of I/Os serviced */ |
| 83 | long num_sect_moved; /* total number of sectors read or written */ |
| 84 | }; |
| 85 | |
| 86 | struct RF_ThroughputStats_s { |
| 87 | rf_declare_mutex2(mutex);/* a mutex used to lock the configuration |
| 88 | * stuff */ |
| 89 | struct timeval start; /* timer started when numOutstandingRequests |
| 90 | * moves from 0 to 1 */ |
| 91 | struct timeval stop; /* timer stopped when numOutstandingRequests |
| 92 | * moves from 1 to 0 */ |
| 93 | RF_uint64 sum_io_us; /* total time timer is enabled */ |
| 94 | RF_uint64 num_ios; /* total number of ios processed by RAIDframe */ |
| 95 | long num_out_ios; /* number of outstanding ios */ |
| 96 | }; |
| 97 | |
| 98 | struct RF_Raid_s { |
| 99 | /* This portion never changes, and can be accessed without locking */ |
| 100 | /* an exception is Disks[][].status, which requires locking when it is |
| 101 | * changed. XXX this is no longer true. numSpare and friends can |
| 102 | * change now. |
| 103 | */ |
| 104 | u_int numCol; /* number of columns of disks, typically == # |
| 105 | * of disks/rank */ |
| 106 | u_int numSpare; /* number of spare disks */ |
| 107 | int maxQueueDepth; /* max disk queue depth */ |
| 108 | RF_SectorCount_t totalSectors; /* total number of sectors in the |
| 109 | * array */ |
| 110 | RF_SectorCount_t sectorsPerDisk; /* number of sectors on each |
| 111 | * disk */ |
| 112 | u_int logBytesPerSector; /* base-2 log of the number of bytes |
| 113 | * in a sector */ |
| 114 | u_int bytesPerSector; /* bytes in a sector */ |
| 115 | RF_int32 sectorMask; /* mask of bytes-per-sector */ |
| 116 | |
| 117 | RF_RaidLayout_t Layout; /* all information related to layout */ |
| 118 | RF_RaidDisk_t *Disks; /* all information related to physical disks */ |
| 119 | RF_DiskQueue_t *Queues;/* all information related to disk queues */ |
| 120 | const RF_DiskQueueSW_t *qType;/* pointer to the DiskQueueSW used for the |
| 121 | component queues. */ |
| 122 | /* NOTE: This is an anchor point via which the queues can be |
| 123 | * accessed, but the enqueue/dequeue routines in diskqueue.c use a |
| 124 | * local copy of this pointer for the actual accesses. */ |
| 125 | /* The remainder of the structure can change, and therefore requires |
| 126 | * locking on reads and updates */ |
| 127 | rf_declare_mutex2(mutex);/* mutex used to serialize access to |
| 128 | * the fields below */ |
| 129 | RF_RowStatus_t status; /* the status of each row in the array */ |
| 130 | int valid; /* indicates successful configuration */ |
| 131 | RF_LockTableEntry_t *lockTable; /* stripe-lock table */ |
| 132 | RF_LockTableEntry_t *quiesceLock; /* quiesnce table */ |
| 133 | int numFailures; /* total number of failures in the array */ |
| 134 | int numNewFailures; /* number of *new* failures (that havn't |
| 135 | caused a mod_counter update */ |
| 136 | |
| 137 | int parity_good; /* !0 if parity is known to be correct */ |
| 138 | int serial_number; /* a "serial number" for this set */ |
| 139 | int mod_counter; /* modification counter for component labels */ |
| 140 | int clean; /* completely unused and should be removed */ |
| 141 | |
| 142 | int openings; /* Number of IO's which can be scheduled |
| 143 | simultaneously (high-level - not a |
| 144 | per-component limit)*/ |
| 145 | |
| 146 | int maxOutstanding; /* maxOutstanding requests (per-component) */ |
| 147 | int autoconfigure; /* automatically configure this RAID set. |
| 148 | 0 == no, 1 == yes */ |
| 149 | int root_partition; /* Use this set as / |
| 150 | 0 == no, 1 == yes*/ |
| 151 | int last_unit; /* last unit number (e.g. 0 for /dev/raid0) |
| 152 | of this component. Used for autoconfigure |
| 153 | only. */ |
| 154 | int config_order; /* 0 .. n. The order in which the component |
| 155 | should be auto-configured. E.g. 0 is will |
| 156 | done first, (and would become raid0). |
| 157 | This may be in conflict with last_unit!!?! */ |
| 158 | /* Not currently used. */ |
| 159 | |
| 160 | /* queue to gather up requests from KernelWakeupFunc() and let |
| 161 | a kernel thread deal with calling rf_DiskIOComplete and any |
| 162 | callback functions. */ |
| 163 | TAILQ_HEAD(iodone_q,RF_DiskQueueData_s) iodone; |
| 164 | /* and a lock/cv to protect it */ |
| 165 | rf_declare_mutex2(iodone_lock); |
| 166 | rf_declare_cond2(iodone_cv); |
| 167 | |
| 168 | |
| 169 | RF_VoidPointerListElem_t *iobuf; /* I/O buffer free list */ |
| 170 | int iobuf_count; /* count of I/O buffers on the freelist */ |
| 171 | int numEmergencyBuffers; /* number of these buffers to pre-allocate */ |
| 172 | |
| 173 | RF_VoidPointerListElem_t *stripebuf; /* Full-stripe buffer free list */ |
| 174 | int stripebuf_count; /* count of full-stripe buffers on the freelist */ |
| 175 | int numEmergencyStripeBuffers; /* number of these buffers to pre-allocate */ |
| 176 | |
| 177 | /* |
| 178 | * Cleanup stuff |
| 179 | */ |
| 180 | RF_ShutdownList_t *shutdownList; /* shutdown activities */ |
| 181 | RF_AllocListElem_t *cleanupList; /* memory to be freed at |
| 182 | * shutdown time */ |
| 183 | |
| 184 | /* |
| 185 | * Recon stuff |
| 186 | */ |
| 187 | RF_HeadSepLimit_t headSepLimit; |
| 188 | int numFloatingReconBufs; |
| 189 | int reconInProgress; |
| 190 | rf_declare_cond2(waitForReconCond); /* goes with raidPtr->mutex */ |
| 191 | RF_RaidReconDesc_t *reconDesc; /* reconstruction descriptor */ |
| 192 | RF_ReconCtrl_t *reconControl; /* reconstruction control structure |
| 193 | * pointers for each row in the array */ |
| 194 | |
| 195 | /* |
| 196 | * Array-quiescence stuff |
| 197 | */ |
| 198 | rf_declare_mutex2(access_suspend_mutex); |
| 199 | rf_declare_cond2(access_suspend_cv); |
| 200 | RF_IoCount_t accesses_suspended; |
| 201 | RF_IoCount_t accs_in_flight; |
| 202 | int access_suspend_release; |
| 203 | int waiting_for_quiescence; |
| 204 | RF_CallbackDesc_t *quiesce_wait_list; |
| 205 | |
| 206 | /* |
| 207 | * Statistics |
| 208 | */ |
| 209 | RF_StripeCount_t parity_rewrite_stripes_done; |
| 210 | RF_StripeCount_t copyback_stripes_done; |
| 211 | |
| 212 | int recon_in_progress; |
| 213 | int parity_rewrite_in_progress; |
| 214 | int copyback_in_progress; |
| 215 | int adding_hot_spare; |
| 216 | |
| 217 | rf_declare_cond2(adding_hot_spare_cv); |
| 218 | |
| 219 | /* |
| 220 | * Engine thread control |
| 221 | */ |
| 222 | rf_declare_mutex2(node_queue_mutex); |
| 223 | rf_declare_cond2(node_queue_cv); |
| 224 | RF_DagNode_t *node_queue; |
| 225 | RF_Thread_t parity_rewrite_thread; |
| 226 | RF_Thread_t copyback_thread; |
| 227 | RF_Thread_t engine_thread; |
| 228 | RF_Thread_t engine_helper_thread; |
| 229 | RF_Thread_t recon_thread; |
| 230 | int shutdown_engine; |
| 231 | int shutdown_raidio; |
| 232 | int dags_in_flight; /* debug */ |
| 233 | |
| 234 | /* |
| 235 | * PSS (Parity Stripe Status) stuff |
| 236 | */ |
| 237 | long pssTableSize; |
| 238 | |
| 239 | /* |
| 240 | * Reconstruction stuff |
| 241 | */ |
| 242 | int procsInBufWait; |
| 243 | int numFullReconBuffers; |
| 244 | #if RF_ACC_TRACE > 0 |
| 245 | RF_AccTraceEntry_t *recon_tracerecs; |
| 246 | #endif |
| 247 | unsigned long accumXorTimeUs; |
| 248 | |
| 249 | /* |
| 250 | * nAccOutstanding, waitShutdown protected by desc freelist lock |
| 251 | * (This may seem strange, since that's a central serialization point |
| 252 | * for a per-array piece of data, but otherwise, it'd be an extra |
| 253 | * per-array lock, and that'd only be less efficient...) |
| 254 | */ |
| 255 | rf_declare_mutex2(rad_lock); |
| 256 | rf_declare_cond2(outstandingCond); |
| 257 | int waitShutdown; |
| 258 | int nAccOutstanding; |
| 259 | |
| 260 | RF_DiskId_t **diskids; |
| 261 | |
| 262 | int raidid; |
| 263 | void *softc; |
| 264 | RF_AccTotals_t acc_totals; |
| 265 | int keep_acc_totals; |
| 266 | |
| 267 | struct raidcinfo *raid_cinfo; /* array of component info */ |
| 268 | |
| 269 | int terminate_disk_queues; |
| 270 | |
| 271 | /* |
| 272 | * XXX |
| 273 | * |
| 274 | * config-specific information should be moved |
| 275 | * somewhere else, or at least hung off this |
| 276 | * in some generic way |
| 277 | */ |
| 278 | #if RF_INCLUDE_CHAINDECLUSTER > 0 |
| 279 | |
| 280 | /* used by rf_compute_workload_shift */ |
| 281 | RF_RowCol_t hist_diskreq[RF_MAXCOL]; |
| 282 | #endif |
| 283 | /* used by declustering */ |
| 284 | int noRotate; |
| 285 | |
| 286 | #if RF_INCLUDE_PARITYLOGGING > 0 |
| 287 | /* used by parity logging */ |
| 288 | RF_SectorCount_t regionLogCapacity; |
| 289 | RF_ParityLogQueue_t parityLogPool; /* pool of unused parity logs */ |
| 290 | RF_RegionInfo_t *regionInfo; /* array of region state */ |
| 291 | int numParityLogs; |
| 292 | int numSectorsPerLog; |
| 293 | int regionParityRange; |
| 294 | int logsInUse; /* debugging */ |
| 295 | RF_ParityLogDiskQueue_t parityLogDiskQueue; /* state of parity |
| 296 | * logging disk work */ |
| 297 | RF_RegionBufferQueue_t regionBufferPool; /* buffers for holding |
| 298 | * region log */ |
| 299 | RF_RegionBufferQueue_t parityBufferPool; /* buffers for holding |
| 300 | * parity */ |
| 301 | void *parityLogBufferHeap; /* pool of unused parity logs */ |
| 302 | RF_Thread_t pLogDiskThreadHandle; |
| 303 | |
| 304 | #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ |
| 305 | struct rf_paritymap *parity_map; |
| 306 | }; |
| 307 | #endif /* !_RF__RF_RAID_H_ */ |
| 308 | |