| 1 | /* $NetBSD: rf_netbsdkintf.c,v 1.347 2016/09/19 23:37:10 jdolecek Exp $ */ |
| 2 | |
| 3 | /*- |
| 4 | * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc. |
| 5 | * All rights reserved. |
| 6 | * |
| 7 | * This code is derived from software contributed to The NetBSD Foundation |
| 8 | * by Greg Oster; Jason R. Thorpe. |
| 9 | * |
| 10 | * Redistribution and use in source and binary forms, with or without |
| 11 | * modification, are permitted provided that the following conditions |
| 12 | * are met: |
| 13 | * 1. Redistributions of source code must retain the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer. |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright |
| 16 | * notice, this list of conditions and the following disclaimer in the |
| 17 | * documentation and/or other materials provided with the distribution. |
| 18 | * |
| 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
| 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
| 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
| 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 29 | * POSSIBILITY OF SUCH DAMAGE. |
| 30 | */ |
| 31 | |
| 32 | /* |
| 33 | * Copyright (c) 1988 University of Utah. |
| 34 | * Copyright (c) 1990, 1993 |
| 35 | * The Regents of the University of California. All rights reserved. |
| 36 | * |
| 37 | * This code is derived from software contributed to Berkeley by |
| 38 | * the Systems Programming Group of the University of Utah Computer |
| 39 | * Science Department. |
| 40 | * |
| 41 | * Redistribution and use in source and binary forms, with or without |
| 42 | * modification, are permitted provided that the following conditions |
| 43 | * are met: |
| 44 | * 1. Redistributions of source code must retain the above copyright |
| 45 | * notice, this list of conditions and the following disclaimer. |
| 46 | * 2. Redistributions in binary form must reproduce the above copyright |
| 47 | * notice, this list of conditions and the following disclaimer in the |
| 48 | * documentation and/or other materials provided with the distribution. |
| 49 | * 3. Neither the name of the University nor the names of its contributors |
| 50 | * may be used to endorse or promote products derived from this software |
| 51 | * without specific prior written permission. |
| 52 | * |
| 53 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 54 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 55 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 56 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 57 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 58 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 59 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 60 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 61 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 62 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 63 | * SUCH DAMAGE. |
| 64 | * |
| 65 | * from: Utah $Hdr: cd.c 1.6 90/11/28$ |
| 66 | * |
| 67 | * @(#)cd.c 8.2 (Berkeley) 11/16/93 |
| 68 | */ |
| 69 | |
| 70 | /* |
| 71 | * Copyright (c) 1995 Carnegie-Mellon University. |
| 72 | * All rights reserved. |
| 73 | * |
| 74 | * Authors: Mark Holland, Jim Zelenka |
| 75 | * |
| 76 | * Permission to use, copy, modify and distribute this software and |
| 77 | * its documentation is hereby granted, provided that both the copyright |
| 78 | * notice and this permission notice appear in all copies of the |
| 79 | * software, derivative works or modified versions, and any portions |
| 80 | * thereof, and that both notices appear in supporting documentation. |
| 81 | * |
| 82 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
| 83 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
| 84 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
| 85 | * |
| 86 | * Carnegie Mellon requests users of this software to return to |
| 87 | * |
| 88 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
| 89 | * School of Computer Science |
| 90 | * Carnegie Mellon University |
| 91 | * Pittsburgh PA 15213-3890 |
| 92 | * |
| 93 | * any improvements or extensions that they make and grant Carnegie the |
| 94 | * rights to redistribute these changes. |
| 95 | */ |
| 96 | |
| 97 | /*********************************************************** |
| 98 | * |
| 99 | * rf_kintf.c -- the kernel interface routines for RAIDframe |
| 100 | * |
| 101 | ***********************************************************/ |
| 102 | |
| 103 | #include <sys/cdefs.h> |
| 104 | __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.347 2016/09/19 23:37:10 jdolecek Exp $" ); |
| 105 | |
| 106 | #ifdef _KERNEL_OPT |
| 107 | #include "opt_compat_netbsd.h" |
| 108 | #include "opt_raid_autoconfig.h" |
| 109 | #endif |
| 110 | |
| 111 | #include <sys/param.h> |
| 112 | #include <sys/errno.h> |
| 113 | #include <sys/pool.h> |
| 114 | #include <sys/proc.h> |
| 115 | #include <sys/queue.h> |
| 116 | #include <sys/disk.h> |
| 117 | #include <sys/device.h> |
| 118 | #include <sys/stat.h> |
| 119 | #include <sys/ioctl.h> |
| 120 | #include <sys/fcntl.h> |
| 121 | #include <sys/systm.h> |
| 122 | #include <sys/vnode.h> |
| 123 | #include <sys/disklabel.h> |
| 124 | #include <sys/conf.h> |
| 125 | #include <sys/buf.h> |
| 126 | #include <sys/bufq.h> |
| 127 | #include <sys/reboot.h> |
| 128 | #include <sys/kauth.h> |
| 129 | #include <sys/module.h> |
| 130 | |
| 131 | #include <prop/proplib.h> |
| 132 | |
| 133 | #include <dev/raidframe/raidframevar.h> |
| 134 | #include <dev/raidframe/raidframeio.h> |
| 135 | #include <dev/raidframe/rf_paritymap.h> |
| 136 | |
| 137 | #include "rf_raid.h" |
| 138 | #include "rf_copyback.h" |
| 139 | #include "rf_dag.h" |
| 140 | #include "rf_dagflags.h" |
| 141 | #include "rf_desc.h" |
| 142 | #include "rf_diskqueue.h" |
| 143 | #include "rf_etimer.h" |
| 144 | #include "rf_general.h" |
| 145 | #include "rf_kintf.h" |
| 146 | #include "rf_options.h" |
| 147 | #include "rf_driver.h" |
| 148 | #include "rf_parityscan.h" |
| 149 | #include "rf_threadstuff.h" |
| 150 | |
| 151 | #ifdef COMPAT_50 |
| 152 | #include "rf_compat50.h" |
| 153 | #endif |
| 154 | |
| 155 | #include "ioconf.h" |
| 156 | |
| 157 | #ifdef DEBUG |
| 158 | int rf_kdebug_level = 0; |
| 159 | #define db1_printf(a) if (rf_kdebug_level > 0) printf a |
| 160 | #else /* DEBUG */ |
| 161 | #define db1_printf(a) { } |
| 162 | #endif /* DEBUG */ |
| 163 | |
| 164 | #ifdef DEBUG_ROOT |
| 165 | #define DPRINTF(a, ...) printf(a, __VA_ARGS__) |
| 166 | #else |
| 167 | #define DPRINTF(a, ...) |
| 168 | #endif |
| 169 | |
| 170 | #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) |
| 171 | static rf_declare_mutex2(rf_sparet_wait_mutex); |
| 172 | static rf_declare_cond2(rf_sparet_wait_cv); |
| 173 | static rf_declare_cond2(rf_sparet_resp_cv); |
| 174 | |
| 175 | static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a |
| 176 | * spare table */ |
| 177 | static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from |
| 178 | * installation process */ |
| 179 | #endif |
| 180 | |
| 181 | MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe" , "RAIDframe structures" ); |
| 182 | |
| 183 | /* prototypes */ |
| 184 | static void KernelWakeupFunc(struct buf *); |
| 185 | static void InitBP(struct buf *, struct vnode *, unsigned, |
| 186 | dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *), |
| 187 | void *, int, struct proc *); |
| 188 | struct raid_softc; |
| 189 | static void raidinit(struct raid_softc *); |
| 190 | static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp); |
| 191 | |
| 192 | static int raid_match(device_t, cfdata_t, void *); |
| 193 | static void raid_attach(device_t, device_t, void *); |
| 194 | static int raid_detach(device_t, int); |
| 195 | |
| 196 | static int raidread_component_area(dev_t, struct vnode *, void *, size_t, |
| 197 | daddr_t, daddr_t); |
| 198 | static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t, |
| 199 | daddr_t, daddr_t, int); |
| 200 | |
| 201 | static int raidwrite_component_label(unsigned, |
| 202 | dev_t, struct vnode *, RF_ComponentLabel_t *); |
| 203 | static int raidread_component_label(unsigned, |
| 204 | dev_t, struct vnode *, RF_ComponentLabel_t *); |
| 205 | |
| 206 | static int raid_diskstart(device_t, struct buf *bp); |
| 207 | static int raid_dumpblocks(device_t, void *, daddr_t, int); |
| 208 | static int raid_lastclose(device_t); |
| 209 | |
| 210 | static dev_type_open(raidopen); |
| 211 | static dev_type_close(raidclose); |
| 212 | static dev_type_read(raidread); |
| 213 | static dev_type_write(raidwrite); |
| 214 | static dev_type_ioctl(raidioctl); |
| 215 | static dev_type_strategy(raidstrategy); |
| 216 | static dev_type_dump(raiddump); |
| 217 | static dev_type_size(raidsize); |
| 218 | |
| 219 | const struct bdevsw raid_bdevsw = { |
| 220 | .d_open = raidopen, |
| 221 | .d_close = raidclose, |
| 222 | .d_strategy = raidstrategy, |
| 223 | .d_ioctl = raidioctl, |
| 224 | .d_dump = raiddump, |
| 225 | .d_psize = raidsize, |
| 226 | .d_discard = nodiscard, |
| 227 | .d_flag = D_DISK |
| 228 | }; |
| 229 | |
| 230 | const struct cdevsw raid_cdevsw = { |
| 231 | .d_open = raidopen, |
| 232 | .d_close = raidclose, |
| 233 | .d_read = raidread, |
| 234 | .d_write = raidwrite, |
| 235 | .d_ioctl = raidioctl, |
| 236 | .d_stop = nostop, |
| 237 | .d_tty = notty, |
| 238 | .d_poll = nopoll, |
| 239 | .d_mmap = nommap, |
| 240 | .d_kqfilter = nokqfilter, |
| 241 | .d_discard = nodiscard, |
| 242 | .d_flag = D_DISK |
| 243 | }; |
| 244 | |
| 245 | static struct dkdriver rf_dkdriver = { |
| 246 | .d_open = raidopen, |
| 247 | .d_close = raidclose, |
| 248 | .d_strategy = raidstrategy, |
| 249 | .d_diskstart = raid_diskstart, |
| 250 | .d_dumpblocks = raid_dumpblocks, |
| 251 | .d_lastclose = raid_lastclose, |
| 252 | .d_minphys = minphys |
| 253 | }; |
| 254 | |
| 255 | struct raid_softc { |
| 256 | struct dk_softc sc_dksc; |
| 257 | int sc_unit; |
| 258 | int sc_flags; /* flags */ |
| 259 | int sc_cflags; /* configuration flags */ |
| 260 | kmutex_t sc_mutex; /* interlock mutex */ |
| 261 | kcondvar_t sc_cv; /* and the condvar */ |
| 262 | uint64_t sc_size; /* size of the raid device */ |
| 263 | char sc_xname[20]; /* XXX external name */ |
| 264 | RF_Raid_t sc_r; |
| 265 | LIST_ENTRY(raid_softc) sc_link; |
| 266 | }; |
| 267 | /* sc_flags */ |
| 268 | #define RAIDF_INITED 0x01 /* unit has been initialized */ |
| 269 | #define RAIDF_SHUTDOWN 0x02 /* unit is being shutdown */ |
| 270 | #define RAIDF_DETACH 0x04 /* detach after final close */ |
| 271 | #define RAIDF_WANTED 0x08 /* someone waiting to obtain a lock */ |
| 272 | #define RAIDF_LOCKED 0x10 /* unit is locked */ |
| 273 | #define RAIDF_UNIT_CHANGED 0x20 /* unit is being changed */ |
| 274 | |
| 275 | #define raidunit(x) DISKUNIT(x) |
| 276 | #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc) |
| 277 | |
| 278 | extern struct cfdriver raid_cd; |
| 279 | CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc), |
| 280 | raid_match, raid_attach, raid_detach, NULL, NULL, NULL, |
| 281 | DVF_DETACH_SHUTDOWN); |
| 282 | |
| 283 | /* |
| 284 | * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. |
| 285 | * Be aware that large numbers can allow the driver to consume a lot of |
| 286 | * kernel memory, especially on writes, and in degraded mode reads. |
| 287 | * |
| 288 | * For example: with a stripe width of 64 blocks (32k) and 5 disks, |
| 289 | * a single 64K write will typically require 64K for the old data, |
| 290 | * 64K for the old parity, and 64K for the new parity, for a total |
| 291 | * of 192K (if the parity buffer is not re-used immediately). |
| 292 | * Even it if is used immediately, that's still 128K, which when multiplied |
| 293 | * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. |
| 294 | * |
| 295 | * Now in degraded mode, for example, a 64K read on the above setup may |
| 296 | * require data reconstruction, which will require *all* of the 4 remaining |
| 297 | * disks to participate -- 4 * 32K/disk == 128K again. |
| 298 | */ |
| 299 | |
| 300 | #ifndef RAIDOUTSTANDING |
| 301 | #define RAIDOUTSTANDING 6 |
| 302 | #endif |
| 303 | |
| 304 | #define RAIDLABELDEV(dev) \ |
| 305 | (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) |
| 306 | |
| 307 | /* declared here, and made public, for the benefit of KVM stuff.. */ |
| 308 | |
| 309 | static int raidlock(struct raid_softc *); |
| 310 | static void raidunlock(struct raid_softc *); |
| 311 | |
| 312 | static int raid_detach_unlocked(struct raid_softc *); |
| 313 | |
| 314 | static void rf_markalldirty(RF_Raid_t *); |
| 315 | static void rf_set_geometry(struct raid_softc *, RF_Raid_t *); |
| 316 | |
| 317 | void rf_ReconThread(struct rf_recon_req *); |
| 318 | void rf_RewriteParityThread(RF_Raid_t *raidPtr); |
| 319 | void rf_CopybackThread(RF_Raid_t *raidPtr); |
| 320 | void rf_ReconstructInPlaceThread(struct rf_recon_req *); |
| 321 | int rf_autoconfig(device_t); |
| 322 | void rf_buildroothack(RF_ConfigSet_t *); |
| 323 | |
| 324 | RF_AutoConfig_t *rf_find_raid_components(void); |
| 325 | RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); |
| 326 | static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); |
| 327 | int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t); |
| 328 | void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); |
| 329 | int rf_set_autoconfig(RF_Raid_t *, int); |
| 330 | int rf_set_rootpartition(RF_Raid_t *, int); |
| 331 | void rf_release_all_vps(RF_ConfigSet_t *); |
| 332 | void rf_cleanup_config_set(RF_ConfigSet_t *); |
| 333 | int rf_have_enough_components(RF_ConfigSet_t *); |
| 334 | struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *); |
| 335 | static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t); |
| 336 | |
| 337 | /* |
| 338 | * Debugging, mostly. Set to 0 to not allow autoconfig to take place. |
| 339 | * Note that this is overridden by having RAID_AUTOCONFIG as an option |
| 340 | * in the kernel config file. |
| 341 | */ |
| 342 | #ifdef RAID_AUTOCONFIG |
| 343 | int raidautoconfig = 1; |
| 344 | #else |
| 345 | int raidautoconfig = 0; |
| 346 | #endif |
| 347 | static bool raidautoconfigdone = false; |
| 348 | |
| 349 | struct RF_Pools_s rf_pools; |
| 350 | |
| 351 | static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids); |
| 352 | static kmutex_t raid_lock; |
| 353 | |
| 354 | static struct raid_softc * |
| 355 | raidcreate(int unit) { |
| 356 | struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); |
| 357 | if (sc == NULL) { |
| 358 | #ifdef DIAGNOSTIC |
| 359 | printf("%s: out of memory\n" , __func__); |
| 360 | #endif |
| 361 | return NULL; |
| 362 | } |
| 363 | sc->sc_unit = unit; |
| 364 | cv_init(&sc->sc_cv, "raidunit" ); |
| 365 | mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE); |
| 366 | return sc; |
| 367 | } |
| 368 | |
| 369 | static void |
| 370 | raiddestroy(struct raid_softc *sc) { |
| 371 | cv_destroy(&sc->sc_cv); |
| 372 | mutex_destroy(&sc->sc_mutex); |
| 373 | kmem_free(sc, sizeof(*sc)); |
| 374 | } |
| 375 | |
| 376 | static struct raid_softc * |
| 377 | raidget(int unit, bool create) { |
| 378 | struct raid_softc *sc; |
| 379 | if (unit < 0) { |
| 380 | #ifdef DIAGNOSTIC |
| 381 | panic("%s: unit %d!" , __func__, unit); |
| 382 | #endif |
| 383 | return NULL; |
| 384 | } |
| 385 | mutex_enter(&raid_lock); |
| 386 | LIST_FOREACH(sc, &raids, sc_link) { |
| 387 | if (sc->sc_unit == unit) { |
| 388 | mutex_exit(&raid_lock); |
| 389 | return sc; |
| 390 | } |
| 391 | } |
| 392 | mutex_exit(&raid_lock); |
| 393 | if (!create) |
| 394 | return NULL; |
| 395 | if ((sc = raidcreate(unit)) == NULL) |
| 396 | return NULL; |
| 397 | mutex_enter(&raid_lock); |
| 398 | LIST_INSERT_HEAD(&raids, sc, sc_link); |
| 399 | mutex_exit(&raid_lock); |
| 400 | return sc; |
| 401 | } |
| 402 | |
| 403 | static void |
| 404 | raidput(struct raid_softc *sc) { |
| 405 | mutex_enter(&raid_lock); |
| 406 | LIST_REMOVE(sc, sc_link); |
| 407 | mutex_exit(&raid_lock); |
| 408 | raiddestroy(sc); |
| 409 | } |
| 410 | |
| 411 | void |
| 412 | raidattach(int num) |
| 413 | { |
| 414 | |
| 415 | /* |
| 416 | * Device attachment and associated initialization now occurs |
| 417 | * as part of the module initialization. |
| 418 | */ |
| 419 | } |
| 420 | |
| 421 | int |
| 422 | rf_autoconfig(device_t self) |
| 423 | { |
| 424 | RF_AutoConfig_t *ac_list; |
| 425 | RF_ConfigSet_t *config_sets; |
| 426 | |
| 427 | if (!raidautoconfig || raidautoconfigdone == true) |
| 428 | return (0); |
| 429 | |
| 430 | /* XXX This code can only be run once. */ |
| 431 | raidautoconfigdone = true; |
| 432 | |
| 433 | #ifdef __HAVE_CPU_BOOTCONF |
| 434 | /* |
| 435 | * 0. find the boot device if needed first so we can use it later |
| 436 | * this needs to be done before we autoconfigure any raid sets, |
| 437 | * because if we use wedges we are not going to be able to open |
| 438 | * the boot device later |
| 439 | */ |
| 440 | if (booted_device == NULL) |
| 441 | cpu_bootconf(); |
| 442 | #endif |
| 443 | /* 1. locate all RAID components on the system */ |
| 444 | aprint_debug("Searching for RAID components...\n" ); |
| 445 | ac_list = rf_find_raid_components(); |
| 446 | |
| 447 | /* 2. Sort them into their respective sets. */ |
| 448 | config_sets = rf_create_auto_sets(ac_list); |
| 449 | |
| 450 | /* |
| 451 | * 3. Evaluate each set and configure the valid ones. |
| 452 | * This gets done in rf_buildroothack(). |
| 453 | */ |
| 454 | rf_buildroothack(config_sets); |
| 455 | |
| 456 | return 1; |
| 457 | } |
| 458 | |
| 459 | static int |
| 460 | rf_containsboot(RF_Raid_t *r, device_t bdv) { |
| 461 | const char *bootname = device_xname(bdv); |
| 462 | size_t len = strlen(bootname); |
| 463 | |
| 464 | for (int col = 0; col < r->numCol; col++) { |
| 465 | const char *devname = r->Disks[col].devname; |
| 466 | devname += sizeof("/dev/" ) - 1; |
| 467 | if (strncmp(devname, "dk" , 2) == 0) { |
| 468 | const char *parent = |
| 469 | dkwedge_get_parent_name(r->Disks[col].dev); |
| 470 | if (parent != NULL) |
| 471 | devname = parent; |
| 472 | } |
| 473 | if (strncmp(devname, bootname, len) == 0) { |
| 474 | struct raid_softc *sc = r->softc; |
| 475 | aprint_debug("raid%d includes boot device %s\n" , |
| 476 | sc->sc_unit, devname); |
| 477 | return 1; |
| 478 | } |
| 479 | } |
| 480 | return 0; |
| 481 | } |
| 482 | |
| 483 | void |
| 484 | rf_buildroothack(RF_ConfigSet_t *config_sets) |
| 485 | { |
| 486 | RF_ConfigSet_t *cset; |
| 487 | RF_ConfigSet_t *next_cset; |
| 488 | int num_root; |
| 489 | struct raid_softc *sc, *rsc; |
| 490 | struct dk_softc *dksc; |
| 491 | |
| 492 | sc = rsc = NULL; |
| 493 | num_root = 0; |
| 494 | cset = config_sets; |
| 495 | while (cset != NULL) { |
| 496 | next_cset = cset->next; |
| 497 | if (rf_have_enough_components(cset) && |
| 498 | cset->ac->clabel->autoconfigure == 1) { |
| 499 | sc = rf_auto_config_set(cset); |
| 500 | if (sc != NULL) { |
| 501 | aprint_debug("raid%d: configured ok\n" , |
| 502 | sc->sc_unit); |
| 503 | if (cset->rootable) { |
| 504 | rsc = sc; |
| 505 | num_root++; |
| 506 | } |
| 507 | } else { |
| 508 | /* The autoconfig didn't work :( */ |
| 509 | aprint_debug("Autoconfig failed\n" ); |
| 510 | rf_release_all_vps(cset); |
| 511 | } |
| 512 | } else { |
| 513 | /* we're not autoconfiguring this set... |
| 514 | release the associated resources */ |
| 515 | rf_release_all_vps(cset); |
| 516 | } |
| 517 | /* cleanup */ |
| 518 | rf_cleanup_config_set(cset); |
| 519 | cset = next_cset; |
| 520 | } |
| 521 | dksc = &rsc->sc_dksc; |
| 522 | |
| 523 | /* if the user has specified what the root device should be |
| 524 | then we don't touch booted_device or boothowto... */ |
| 525 | |
| 526 | if (rootspec != NULL) |
| 527 | return; |
| 528 | |
| 529 | /* we found something bootable... */ |
| 530 | |
| 531 | /* |
| 532 | * XXX: The following code assumes that the root raid |
| 533 | * is the first ('a') partition. This is about the best |
| 534 | * we can do with a BSD disklabel, but we might be able |
| 535 | * to do better with a GPT label, by setting a specified |
| 536 | * attribute to indicate the root partition. We can then |
| 537 | * stash the partition number in the r->root_partition |
| 538 | * high bits (the bottom 2 bits are already used). For |
| 539 | * now we just set booted_partition to 0 when we override |
| 540 | * root. |
| 541 | */ |
| 542 | if (num_root == 1) { |
| 543 | device_t candidate_root; |
| 544 | if (dksc->sc_dkdev.dk_nwedges != 0) { |
| 545 | char cname[sizeof(cset->ac->devname)]; |
| 546 | /* XXX: assume partition 'a' first */ |
| 547 | snprintf(cname, sizeof(cname), "%s%c" , |
| 548 | device_xname(dksc->sc_dev), 'a'); |
| 549 | candidate_root = dkwedge_find_by_wname(cname); |
| 550 | DPRINTF("%s: candidate wedge root=%s\n" , __func__, |
| 551 | cname); |
| 552 | if (candidate_root == NULL) { |
| 553 | /* |
| 554 | * If that is not found, because we don't use |
| 555 | * disklabel, return the first dk child |
| 556 | * XXX: we can skip the 'a' check above |
| 557 | * and always do this... |
| 558 | */ |
| 559 | size_t i = 0; |
| 560 | candidate_root = dkwedge_find_by_parent( |
| 561 | device_xname(dksc->sc_dev), &i); |
| 562 | } |
| 563 | DPRINTF("%s: candidate wedge root=%p\n" , __func__, |
| 564 | candidate_root); |
| 565 | } else |
| 566 | candidate_root = dksc->sc_dev; |
| 567 | DPRINTF("%s: candidate root=%p\n" , __func__, candidate_root); |
| 568 | DPRINTF("%s: booted_device=%p root_partition=%d " |
| 569 | "contains_boot=%d\n" , __func__, booted_device, |
| 570 | rsc->sc_r.root_partition, |
| 571 | rf_containsboot(&rsc->sc_r, booted_device)); |
| 572 | if (booted_device == NULL || |
| 573 | rsc->sc_r.root_partition == 1 || |
| 574 | rf_containsboot(&rsc->sc_r, booted_device)) { |
| 575 | booted_device = candidate_root; |
| 576 | booted_partition = 0; /* XXX assume 'a' */ |
| 577 | } |
| 578 | } else if (num_root > 1) { |
| 579 | DPRINTF("%s: many roots=%d, %p\n" , __func__, num_root, |
| 580 | booted_device); |
| 581 | |
| 582 | /* |
| 583 | * Maybe the MD code can help. If it cannot, then |
| 584 | * setroot() will discover that we have no |
| 585 | * booted_device and will ask the user if nothing was |
| 586 | * hardwired in the kernel config file |
| 587 | */ |
| 588 | if (booted_device == NULL) |
| 589 | return; |
| 590 | |
| 591 | num_root = 0; |
| 592 | mutex_enter(&raid_lock); |
| 593 | LIST_FOREACH(sc, &raids, sc_link) { |
| 594 | RF_Raid_t *r = &sc->sc_r; |
| 595 | if (r->valid == 0) |
| 596 | continue; |
| 597 | |
| 598 | if (r->root_partition == 0) |
| 599 | continue; |
| 600 | |
| 601 | if (rf_containsboot(r, booted_device)) { |
| 602 | num_root++; |
| 603 | rsc = sc; |
| 604 | dksc = &rsc->sc_dksc; |
| 605 | } |
| 606 | } |
| 607 | mutex_exit(&raid_lock); |
| 608 | |
| 609 | if (num_root == 1) { |
| 610 | booted_device = dksc->sc_dev; |
| 611 | booted_partition = 0; /* XXX assume 'a' */ |
| 612 | } else { |
| 613 | /* we can't guess.. require the user to answer... */ |
| 614 | boothowto |= RB_ASKNAME; |
| 615 | } |
| 616 | } |
| 617 | } |
| 618 | |
| 619 | static int |
| 620 | raidsize(dev_t dev) |
| 621 | { |
| 622 | struct raid_softc *rs; |
| 623 | struct dk_softc *dksc; |
| 624 | unsigned int unit; |
| 625 | |
| 626 | unit = raidunit(dev); |
| 627 | if ((rs = raidget(unit, false)) == NULL) |
| 628 | return -1; |
| 629 | dksc = &rs->sc_dksc; |
| 630 | |
| 631 | if ((rs->sc_flags & RAIDF_INITED) == 0) |
| 632 | return -1; |
| 633 | |
| 634 | return dk_size(dksc, dev); |
| 635 | } |
| 636 | |
| 637 | static int |
| 638 | raiddump(dev_t dev, daddr_t blkno, void *va, size_t size) |
| 639 | { |
| 640 | unsigned int unit; |
| 641 | struct raid_softc *rs; |
| 642 | struct dk_softc *dksc; |
| 643 | |
| 644 | unit = raidunit(dev); |
| 645 | if ((rs = raidget(unit, false)) == NULL) |
| 646 | return ENXIO; |
| 647 | dksc = &rs->sc_dksc; |
| 648 | |
| 649 | if ((rs->sc_flags & RAIDF_INITED) == 0) |
| 650 | return ENODEV; |
| 651 | |
| 652 | /* |
| 653 | Note that blkno is relative to this particular partition. |
| 654 | By adding adding RF_PROTECTED_SECTORS, we get a value that |
| 655 | is relative to the partition used for the underlying component. |
| 656 | */ |
| 657 | blkno += RF_PROTECTED_SECTORS; |
| 658 | |
| 659 | return dk_dump(dksc, dev, blkno, va, size); |
| 660 | } |
| 661 | |
| 662 | static int |
| 663 | raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk) |
| 664 | { |
| 665 | struct raid_softc *rs = raidsoftc(dev); |
| 666 | const struct bdevsw *bdev; |
| 667 | RF_Raid_t *raidPtr; |
| 668 | int c, sparecol, j, scol, dumpto; |
| 669 | int error = 0; |
| 670 | |
| 671 | raidPtr = &rs->sc_r; |
| 672 | |
| 673 | /* we only support dumping to RAID 1 sets */ |
| 674 | if (raidPtr->Layout.numDataCol != 1 || |
| 675 | raidPtr->Layout.numParityCol != 1) |
| 676 | return EINVAL; |
| 677 | |
| 678 | if ((error = raidlock(rs)) != 0) |
| 679 | return error; |
| 680 | |
| 681 | /* figure out what device is alive.. */ |
| 682 | |
| 683 | /* |
| 684 | Look for a component to dump to. The preference for the |
| 685 | component to dump to is as follows: |
| 686 | 1) the master |
| 687 | 2) a used_spare of the master |
| 688 | 3) the slave |
| 689 | 4) a used_spare of the slave |
| 690 | */ |
| 691 | |
| 692 | dumpto = -1; |
| 693 | for (c = 0; c < raidPtr->numCol; c++) { |
| 694 | if (raidPtr->Disks[c].status == rf_ds_optimal) { |
| 695 | /* this might be the one */ |
| 696 | dumpto = c; |
| 697 | break; |
| 698 | } |
| 699 | } |
| 700 | |
| 701 | /* |
| 702 | At this point we have possibly selected a live master or a |
| 703 | live slave. We now check to see if there is a spared |
| 704 | master (or a spared slave), if we didn't find a live master |
| 705 | or a live slave. |
| 706 | */ |
| 707 | |
| 708 | for (c = 0; c < raidPtr->numSpare; c++) { |
| 709 | sparecol = raidPtr->numCol + c; |
| 710 | if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { |
| 711 | /* How about this one? */ |
| 712 | scol = -1; |
| 713 | for(j=0;j<raidPtr->numCol;j++) { |
| 714 | if (raidPtr->Disks[j].spareCol == sparecol) { |
| 715 | scol = j; |
| 716 | break; |
| 717 | } |
| 718 | } |
| 719 | if (scol == 0) { |
| 720 | /* |
| 721 | We must have found a spared master! |
| 722 | We'll take that over anything else |
| 723 | found so far. (We couldn't have |
| 724 | found a real master before, since |
| 725 | this is a used spare, and it's |
| 726 | saying that it's replacing the |
| 727 | master.) On reboot (with |
| 728 | autoconfiguration turned on) |
| 729 | sparecol will become the 1st |
| 730 | component (component0) of this set. |
| 731 | */ |
| 732 | dumpto = sparecol; |
| 733 | break; |
| 734 | } else if (scol != -1) { |
| 735 | /* |
| 736 | Must be a spared slave. We'll dump |
| 737 | to that if we havn't found anything |
| 738 | else so far. |
| 739 | */ |
| 740 | if (dumpto == -1) |
| 741 | dumpto = sparecol; |
| 742 | } |
| 743 | } |
| 744 | } |
| 745 | |
| 746 | if (dumpto == -1) { |
| 747 | /* we couldn't find any live components to dump to!?!? |
| 748 | */ |
| 749 | error = EINVAL; |
| 750 | goto out; |
| 751 | } |
| 752 | |
| 753 | bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev); |
| 754 | if (bdev == NULL) { |
| 755 | error = ENXIO; |
| 756 | goto out; |
| 757 | } |
| 758 | |
| 759 | error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev, |
| 760 | blkno, va, nblk * raidPtr->bytesPerSector); |
| 761 | |
| 762 | out: |
| 763 | raidunlock(rs); |
| 764 | |
| 765 | return error; |
| 766 | } |
| 767 | |
| 768 | /* ARGSUSED */ |
| 769 | static int |
| 770 | raidopen(dev_t dev, int flags, int fmt, |
| 771 | struct lwp *l) |
| 772 | { |
| 773 | int unit = raidunit(dev); |
| 774 | struct raid_softc *rs; |
| 775 | struct dk_softc *dksc; |
| 776 | int error = 0; |
| 777 | int part, pmask; |
| 778 | |
| 779 | if ((rs = raidget(unit, true)) == NULL) |
| 780 | return ENXIO; |
| 781 | if ((error = raidlock(rs)) != 0) |
| 782 | return (error); |
| 783 | |
| 784 | if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) { |
| 785 | error = EBUSY; |
| 786 | goto bad; |
| 787 | } |
| 788 | |
| 789 | dksc = &rs->sc_dksc; |
| 790 | |
| 791 | part = DISKPART(dev); |
| 792 | pmask = (1 << part); |
| 793 | |
| 794 | if (!DK_BUSY(dksc, pmask) && |
| 795 | ((rs->sc_flags & RAIDF_INITED) != 0)) { |
| 796 | /* First one... mark things as dirty... Note that we *MUST* |
| 797 | have done a configure before this. I DO NOT WANT TO BE |
| 798 | SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED |
| 799 | THAT THEY BELONG TOGETHER!!!!! */ |
| 800 | /* XXX should check to see if we're only open for reading |
| 801 | here... If so, we needn't do this, but then need some |
| 802 | other way of keeping track of what's happened.. */ |
| 803 | |
| 804 | rf_markalldirty(&rs->sc_r); |
| 805 | } |
| 806 | |
| 807 | if ((rs->sc_flags & RAIDF_INITED) != 0) |
| 808 | error = dk_open(dksc, dev, flags, fmt, l); |
| 809 | |
| 810 | bad: |
| 811 | raidunlock(rs); |
| 812 | |
| 813 | return (error); |
| 814 | |
| 815 | |
| 816 | } |
| 817 | |
| 818 | static int |
| 819 | raid_lastclose(device_t self) |
| 820 | { |
| 821 | struct raid_softc *rs = raidsoftc(self); |
| 822 | |
| 823 | /* Last one... device is not unconfigured yet. |
| 824 | Device shutdown has taken care of setting the |
| 825 | clean bits if RAIDF_INITED is not set |
| 826 | mark things as clean... */ |
| 827 | |
| 828 | rf_update_component_labels(&rs->sc_r, |
| 829 | RF_FINAL_COMPONENT_UPDATE); |
| 830 | |
| 831 | /* pass to unlocked code */ |
| 832 | if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) |
| 833 | rs->sc_flags |= RAIDF_DETACH; |
| 834 | |
| 835 | return 0; |
| 836 | } |
| 837 | |
| 838 | /* ARGSUSED */ |
| 839 | static int |
| 840 | raidclose(dev_t dev, int flags, int fmt, struct lwp *l) |
| 841 | { |
| 842 | int unit = raidunit(dev); |
| 843 | struct raid_softc *rs; |
| 844 | struct dk_softc *dksc; |
| 845 | cfdata_t cf; |
| 846 | int error = 0, do_detach = 0, do_put = 0; |
| 847 | |
| 848 | if ((rs = raidget(unit, false)) == NULL) |
| 849 | return ENXIO; |
| 850 | dksc = &rs->sc_dksc; |
| 851 | |
| 852 | if ((error = raidlock(rs)) != 0) |
| 853 | return (error); |
| 854 | |
| 855 | if ((rs->sc_flags & RAIDF_INITED) != 0) { |
| 856 | error = dk_close(dksc, dev, flags, fmt, l); |
| 857 | if ((rs->sc_flags & RAIDF_DETACH) != 0) |
| 858 | do_detach = 1; |
| 859 | } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) |
| 860 | do_put = 1; |
| 861 | |
| 862 | raidunlock(rs); |
| 863 | |
| 864 | if (do_detach) { |
| 865 | /* free the pseudo device attach bits */ |
| 866 | cf = device_cfdata(dksc->sc_dev); |
| 867 | error = config_detach(dksc->sc_dev, 0); |
| 868 | if (error == 0) |
| 869 | free(cf, M_RAIDFRAME); |
| 870 | } else if (do_put) { |
| 871 | raidput(rs); |
| 872 | } |
| 873 | |
| 874 | return (error); |
| 875 | |
| 876 | } |
| 877 | |
| 878 | static void |
| 879 | raid_wakeup(RF_Raid_t *raidPtr) |
| 880 | { |
| 881 | rf_lock_mutex2(raidPtr->iodone_lock); |
| 882 | rf_signal_cond2(raidPtr->iodone_cv); |
| 883 | rf_unlock_mutex2(raidPtr->iodone_lock); |
| 884 | } |
| 885 | |
| 886 | static void |
| 887 | raidstrategy(struct buf *bp) |
| 888 | { |
| 889 | unsigned int unit; |
| 890 | struct raid_softc *rs; |
| 891 | struct dk_softc *dksc; |
| 892 | RF_Raid_t *raidPtr; |
| 893 | |
| 894 | unit = raidunit(bp->b_dev); |
| 895 | if ((rs = raidget(unit, false)) == NULL) { |
| 896 | bp->b_error = ENXIO; |
| 897 | goto fail; |
| 898 | } |
| 899 | if ((rs->sc_flags & RAIDF_INITED) == 0) { |
| 900 | bp->b_error = ENXIO; |
| 901 | goto fail; |
| 902 | } |
| 903 | dksc = &rs->sc_dksc; |
| 904 | raidPtr = &rs->sc_r; |
| 905 | |
| 906 | /* Queue IO only */ |
| 907 | if (dk_strategy_defer(dksc, bp)) |
| 908 | goto done; |
| 909 | |
| 910 | /* schedule the IO to happen at the next convenient time */ |
| 911 | raid_wakeup(raidPtr); |
| 912 | |
| 913 | done: |
| 914 | return; |
| 915 | |
| 916 | fail: |
| 917 | bp->b_resid = bp->b_bcount; |
| 918 | biodone(bp); |
| 919 | } |
| 920 | |
| 921 | static int |
| 922 | raid_diskstart(device_t dev, struct buf *bp) |
| 923 | { |
| 924 | struct raid_softc *rs = raidsoftc(dev); |
| 925 | RF_Raid_t *raidPtr; |
| 926 | |
| 927 | raidPtr = &rs->sc_r; |
| 928 | if (!raidPtr->valid) { |
| 929 | db1_printf(("raid is not valid..\n" )); |
| 930 | return ENODEV; |
| 931 | } |
| 932 | |
| 933 | /* XXX */ |
| 934 | bp->b_resid = 0; |
| 935 | |
| 936 | return raiddoaccess(raidPtr, bp); |
| 937 | } |
| 938 | |
| 939 | void |
| 940 | raiddone(RF_Raid_t *raidPtr, struct buf *bp) |
| 941 | { |
| 942 | struct raid_softc *rs; |
| 943 | struct dk_softc *dksc; |
| 944 | |
| 945 | rs = raidPtr->softc; |
| 946 | dksc = &rs->sc_dksc; |
| 947 | |
| 948 | dk_done(dksc, bp); |
| 949 | |
| 950 | rf_lock_mutex2(raidPtr->mutex); |
| 951 | raidPtr->openings++; |
| 952 | rf_unlock_mutex2(raidPtr->mutex); |
| 953 | |
| 954 | /* schedule more IO */ |
| 955 | raid_wakeup(raidPtr); |
| 956 | } |
| 957 | |
| 958 | /* ARGSUSED */ |
| 959 | static int |
| 960 | raidread(dev_t dev, struct uio *uio, int flags) |
| 961 | { |
| 962 | int unit = raidunit(dev); |
| 963 | struct raid_softc *rs; |
| 964 | |
| 965 | if ((rs = raidget(unit, false)) == NULL) |
| 966 | return ENXIO; |
| 967 | |
| 968 | if ((rs->sc_flags & RAIDF_INITED) == 0) |
| 969 | return (ENXIO); |
| 970 | |
| 971 | return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); |
| 972 | |
| 973 | } |
| 974 | |
| 975 | /* ARGSUSED */ |
| 976 | static int |
| 977 | raidwrite(dev_t dev, struct uio *uio, int flags) |
| 978 | { |
| 979 | int unit = raidunit(dev); |
| 980 | struct raid_softc *rs; |
| 981 | |
| 982 | if ((rs = raidget(unit, false)) == NULL) |
| 983 | return ENXIO; |
| 984 | |
| 985 | if ((rs->sc_flags & RAIDF_INITED) == 0) |
| 986 | return (ENXIO); |
| 987 | |
| 988 | return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); |
| 989 | |
| 990 | } |
| 991 | |
| 992 | static int |
| 993 | raid_detach_unlocked(struct raid_softc *rs) |
| 994 | { |
| 995 | struct dk_softc *dksc = &rs->sc_dksc; |
| 996 | RF_Raid_t *raidPtr; |
| 997 | int error; |
| 998 | |
| 999 | raidPtr = &rs->sc_r; |
| 1000 | |
| 1001 | if (DK_BUSY(dksc, 0) || |
| 1002 | raidPtr->recon_in_progress != 0 || |
| 1003 | raidPtr->parity_rewrite_in_progress != 0 || |
| 1004 | raidPtr->copyback_in_progress != 0) |
| 1005 | return EBUSY; |
| 1006 | |
| 1007 | if ((rs->sc_flags & RAIDF_INITED) == 0) |
| 1008 | return 0; |
| 1009 | |
| 1010 | rs->sc_flags &= ~RAIDF_SHUTDOWN; |
| 1011 | |
| 1012 | if ((error = rf_Shutdown(raidPtr)) != 0) |
| 1013 | return error; |
| 1014 | |
| 1015 | rs->sc_flags &= ~RAIDF_INITED; |
| 1016 | |
| 1017 | /* Kill off any queued buffers */ |
| 1018 | dk_drain(dksc); |
| 1019 | bufq_free(dksc->sc_bufq); |
| 1020 | |
| 1021 | /* Detach the disk. */ |
| 1022 | dkwedge_delall(&dksc->sc_dkdev); |
| 1023 | disk_detach(&dksc->sc_dkdev); |
| 1024 | disk_destroy(&dksc->sc_dkdev); |
| 1025 | dk_detach(dksc); |
| 1026 | |
| 1027 | return 0; |
| 1028 | } |
| 1029 | |
| 1030 | static int |
| 1031 | raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) |
| 1032 | { |
| 1033 | int unit = raidunit(dev); |
| 1034 | int error = 0; |
| 1035 | int part, pmask; |
| 1036 | struct raid_softc *rs; |
| 1037 | struct dk_softc *dksc; |
| 1038 | RF_Config_t *k_cfg, *u_cfg; |
| 1039 | RF_Raid_t *raidPtr; |
| 1040 | RF_RaidDisk_t *diskPtr; |
| 1041 | RF_AccTotals_t *totals; |
| 1042 | RF_DeviceConfig_t *d_cfg, **ucfgp; |
| 1043 | u_char *specific_buf; |
| 1044 | int retcode = 0; |
| 1045 | int column; |
| 1046 | /* int raidid; */ |
| 1047 | struct rf_recon_req *rrcopy, *rr; |
| 1048 | RF_ComponentLabel_t *clabel; |
| 1049 | RF_ComponentLabel_t *ci_label; |
| 1050 | RF_ComponentLabel_t **clabel_ptr; |
| 1051 | RF_SingleComponent_t *sparePtr,*componentPtr; |
| 1052 | RF_SingleComponent_t component; |
| 1053 | RF_ProgressInfo_t progressInfo, **progressInfoPtr; |
| 1054 | int i, j, d; |
| 1055 | |
| 1056 | if ((rs = raidget(unit, false)) == NULL) |
| 1057 | return ENXIO; |
| 1058 | dksc = &rs->sc_dksc; |
| 1059 | raidPtr = &rs->sc_r; |
| 1060 | |
| 1061 | db1_printf(("raidioctl: %d %d %d %lu\n" , (int) dev, |
| 1062 | (int) DISKPART(dev), (int) unit, cmd)); |
| 1063 | |
| 1064 | /* Must be initialized for these... */ |
| 1065 | switch (cmd) { |
| 1066 | case RAIDFRAME_REWRITEPARITY: |
| 1067 | case RAIDFRAME_GET_INFO: |
| 1068 | case RAIDFRAME_RESET_ACCTOTALS: |
| 1069 | case RAIDFRAME_GET_ACCTOTALS: |
| 1070 | case RAIDFRAME_KEEP_ACCTOTALS: |
| 1071 | case RAIDFRAME_GET_SIZE: |
| 1072 | case RAIDFRAME_FAIL_DISK: |
| 1073 | case RAIDFRAME_COPYBACK: |
| 1074 | case RAIDFRAME_CHECK_RECON_STATUS: |
| 1075 | case RAIDFRAME_CHECK_RECON_STATUS_EXT: |
| 1076 | case RAIDFRAME_GET_COMPONENT_LABEL: |
| 1077 | case RAIDFRAME_SET_COMPONENT_LABEL: |
| 1078 | case RAIDFRAME_ADD_HOT_SPARE: |
| 1079 | case RAIDFRAME_REMOVE_HOT_SPARE: |
| 1080 | case RAIDFRAME_INIT_LABELS: |
| 1081 | case RAIDFRAME_REBUILD_IN_PLACE: |
| 1082 | case RAIDFRAME_CHECK_PARITY: |
| 1083 | case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: |
| 1084 | case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: |
| 1085 | case RAIDFRAME_CHECK_COPYBACK_STATUS: |
| 1086 | case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: |
| 1087 | case RAIDFRAME_SET_AUTOCONFIG: |
| 1088 | case RAIDFRAME_SET_ROOT: |
| 1089 | case RAIDFRAME_DELETE_COMPONENT: |
| 1090 | case RAIDFRAME_INCORPORATE_HOT_SPARE: |
| 1091 | case RAIDFRAME_PARITYMAP_STATUS: |
| 1092 | case RAIDFRAME_PARITYMAP_GET_DISABLE: |
| 1093 | case RAIDFRAME_PARITYMAP_SET_DISABLE: |
| 1094 | case RAIDFRAME_PARITYMAP_SET_PARAMS: |
| 1095 | if ((rs->sc_flags & RAIDF_INITED) == 0) |
| 1096 | return (ENXIO); |
| 1097 | } |
| 1098 | |
| 1099 | switch (cmd) { |
| 1100 | #ifdef COMPAT_50 |
| 1101 | case RAIDFRAME_GET_INFO50: |
| 1102 | return rf_get_info50(raidPtr, data); |
| 1103 | |
| 1104 | case RAIDFRAME_CONFIGURE50: |
| 1105 | if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0) |
| 1106 | return retcode; |
| 1107 | goto config; |
| 1108 | #endif |
| 1109 | /* configure the system */ |
| 1110 | case RAIDFRAME_CONFIGURE: |
| 1111 | |
| 1112 | if (raidPtr->valid) { |
| 1113 | /* There is a valid RAID set running on this unit! */ |
| 1114 | printf("raid%d: Device already configured!\n" ,unit); |
| 1115 | return(EINVAL); |
| 1116 | } |
| 1117 | |
| 1118 | /* copy-in the configuration information */ |
| 1119 | /* data points to a pointer to the configuration structure */ |
| 1120 | |
| 1121 | u_cfg = *((RF_Config_t **) data); |
| 1122 | RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); |
| 1123 | if (k_cfg == NULL) { |
| 1124 | return (ENOMEM); |
| 1125 | } |
| 1126 | retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); |
| 1127 | if (retcode) { |
| 1128 | RF_Free(k_cfg, sizeof(RF_Config_t)); |
| 1129 | db1_printf(("rf_ioctl: retcode=%d copyin.1\n" , |
| 1130 | retcode)); |
| 1131 | goto no_config; |
| 1132 | } |
| 1133 | goto config; |
| 1134 | config: |
| 1135 | rs->sc_flags &= ~RAIDF_SHUTDOWN; |
| 1136 | |
| 1137 | /* allocate a buffer for the layout-specific data, and copy it |
| 1138 | * in */ |
| 1139 | if (k_cfg->layoutSpecificSize) { |
| 1140 | if (k_cfg->layoutSpecificSize > 10000) { |
| 1141 | /* sanity check */ |
| 1142 | RF_Free(k_cfg, sizeof(RF_Config_t)); |
| 1143 | retcode = EINVAL; |
| 1144 | goto no_config; |
| 1145 | } |
| 1146 | RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, |
| 1147 | (u_char *)); |
| 1148 | if (specific_buf == NULL) { |
| 1149 | RF_Free(k_cfg, sizeof(RF_Config_t)); |
| 1150 | retcode = ENOMEM; |
| 1151 | goto no_config; |
| 1152 | } |
| 1153 | retcode = copyin(k_cfg->layoutSpecific, specific_buf, |
| 1154 | k_cfg->layoutSpecificSize); |
| 1155 | if (retcode) { |
| 1156 | RF_Free(k_cfg, sizeof(RF_Config_t)); |
| 1157 | RF_Free(specific_buf, |
| 1158 | k_cfg->layoutSpecificSize); |
| 1159 | db1_printf(("rf_ioctl: retcode=%d copyin.2\n" , |
| 1160 | retcode)); |
| 1161 | goto no_config; |
| 1162 | } |
| 1163 | } else |
| 1164 | specific_buf = NULL; |
| 1165 | k_cfg->layoutSpecific = specific_buf; |
| 1166 | |
| 1167 | /* should do some kind of sanity check on the configuration. |
| 1168 | * Store the sum of all the bytes in the last byte? */ |
| 1169 | |
| 1170 | /* configure the system */ |
| 1171 | |
| 1172 | /* |
| 1173 | * Clear the entire RAID descriptor, just to make sure |
| 1174 | * there is no stale data left in the case of a |
| 1175 | * reconfiguration |
| 1176 | */ |
| 1177 | memset(raidPtr, 0, sizeof(*raidPtr)); |
| 1178 | raidPtr->softc = rs; |
| 1179 | raidPtr->raidid = unit; |
| 1180 | |
| 1181 | retcode = rf_Configure(raidPtr, k_cfg, NULL); |
| 1182 | |
| 1183 | if (retcode == 0) { |
| 1184 | |
| 1185 | /* allow this many simultaneous IO's to |
| 1186 | this RAID device */ |
| 1187 | raidPtr->openings = RAIDOUTSTANDING; |
| 1188 | |
| 1189 | raidinit(rs); |
| 1190 | raid_wakeup(raidPtr); |
| 1191 | rf_markalldirty(raidPtr); |
| 1192 | } |
| 1193 | /* free the buffers. No return code here. */ |
| 1194 | if (k_cfg->layoutSpecificSize) { |
| 1195 | RF_Free(specific_buf, k_cfg->layoutSpecificSize); |
| 1196 | } |
| 1197 | RF_Free(k_cfg, sizeof(RF_Config_t)); |
| 1198 | |
| 1199 | no_config: |
| 1200 | /* |
| 1201 | * If configuration failed, set sc_flags so that we |
| 1202 | * will detach the device when we close it. |
| 1203 | */ |
| 1204 | if (retcode != 0) |
| 1205 | rs->sc_flags |= RAIDF_SHUTDOWN; |
| 1206 | return (retcode); |
| 1207 | |
| 1208 | /* shutdown the system */ |
| 1209 | case RAIDFRAME_SHUTDOWN: |
| 1210 | |
| 1211 | part = DISKPART(dev); |
| 1212 | pmask = (1 << part); |
| 1213 | |
| 1214 | if ((error = raidlock(rs)) != 0) |
| 1215 | return (error); |
| 1216 | |
| 1217 | if (DK_BUSY(dksc, pmask) || |
| 1218 | raidPtr->recon_in_progress != 0 || |
| 1219 | raidPtr->parity_rewrite_in_progress != 0 || |
| 1220 | raidPtr->copyback_in_progress != 0) |
| 1221 | retcode = EBUSY; |
| 1222 | else { |
| 1223 | /* detach and free on close */ |
| 1224 | rs->sc_flags |= RAIDF_SHUTDOWN; |
| 1225 | retcode = 0; |
| 1226 | } |
| 1227 | |
| 1228 | raidunlock(rs); |
| 1229 | |
| 1230 | return (retcode); |
| 1231 | case RAIDFRAME_GET_COMPONENT_LABEL: |
| 1232 | clabel_ptr = (RF_ComponentLabel_t **) data; |
| 1233 | /* need to read the component label for the disk indicated |
| 1234 | by row,column in clabel */ |
| 1235 | |
| 1236 | /* |
| 1237 | * Perhaps there should be an option to skip the in-core |
| 1238 | * copy and hit the disk, as with disklabel(8). |
| 1239 | */ |
| 1240 | RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *)); |
| 1241 | |
| 1242 | retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel)); |
| 1243 | |
| 1244 | if (retcode) { |
| 1245 | RF_Free(clabel, sizeof(*clabel)); |
| 1246 | return retcode; |
| 1247 | } |
| 1248 | |
| 1249 | clabel->row = 0; /* Don't allow looking at anything else.*/ |
| 1250 | |
| 1251 | column = clabel->column; |
| 1252 | |
| 1253 | if ((column < 0) || (column >= raidPtr->numCol + |
| 1254 | raidPtr->numSpare)) { |
| 1255 | RF_Free(clabel, sizeof(*clabel)); |
| 1256 | return EINVAL; |
| 1257 | } |
| 1258 | |
| 1259 | RF_Free(clabel, sizeof(*clabel)); |
| 1260 | |
| 1261 | clabel = raidget_component_label(raidPtr, column); |
| 1262 | |
| 1263 | return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr)); |
| 1264 | |
| 1265 | #if 0 |
| 1266 | case RAIDFRAME_SET_COMPONENT_LABEL: |
| 1267 | clabel = (RF_ComponentLabel_t *) data; |
| 1268 | |
| 1269 | /* XXX check the label for valid stuff... */ |
| 1270 | /* Note that some things *should not* get modified -- |
| 1271 | the user should be re-initing the labels instead of |
| 1272 | trying to patch things. |
| 1273 | */ |
| 1274 | |
| 1275 | raidid = raidPtr->raidid; |
| 1276 | #ifdef DEBUG |
| 1277 | printf("raid%d: Got component label:\n" , raidid); |
| 1278 | printf("raid%d: Version: %d\n" , raidid, clabel->version); |
| 1279 | printf("raid%d: Serial Number: %d\n" , raidid, clabel->serial_number); |
| 1280 | printf("raid%d: Mod counter: %d\n" , raidid, clabel->mod_counter); |
| 1281 | printf("raid%d: Column: %d\n" , raidid, clabel->column); |
| 1282 | printf("raid%d: Num Columns: %d\n" , raidid, clabel->num_columns); |
| 1283 | printf("raid%d: Clean: %d\n" , raidid, clabel->clean); |
| 1284 | printf("raid%d: Status: %d\n" , raidid, clabel->status); |
| 1285 | #endif |
| 1286 | clabel->row = 0; |
| 1287 | column = clabel->column; |
| 1288 | |
| 1289 | if ((column < 0) || (column >= raidPtr->numCol)) { |
| 1290 | return(EINVAL); |
| 1291 | } |
| 1292 | |
| 1293 | /* XXX this isn't allowed to do anything for now :-) */ |
| 1294 | |
| 1295 | /* XXX and before it is, we need to fill in the rest |
| 1296 | of the fields!?!?!?! */ |
| 1297 | memcpy(raidget_component_label(raidPtr, column), |
| 1298 | clabel, sizeof(*clabel)); |
| 1299 | raidflush_component_label(raidPtr, column); |
| 1300 | return (0); |
| 1301 | #endif |
| 1302 | |
| 1303 | case RAIDFRAME_INIT_LABELS: |
| 1304 | clabel = (RF_ComponentLabel_t *) data; |
| 1305 | /* |
| 1306 | we only want the serial number from |
| 1307 | the above. We get all the rest of the information |
| 1308 | from the config that was used to create this RAID |
| 1309 | set. |
| 1310 | */ |
| 1311 | |
| 1312 | raidPtr->serial_number = clabel->serial_number; |
| 1313 | |
| 1314 | for(column=0;column<raidPtr->numCol;column++) { |
| 1315 | diskPtr = &raidPtr->Disks[column]; |
| 1316 | if (!RF_DEAD_DISK(diskPtr->status)) { |
| 1317 | ci_label = raidget_component_label(raidPtr, |
| 1318 | column); |
| 1319 | /* Zeroing this is important. */ |
| 1320 | memset(ci_label, 0, sizeof(*ci_label)); |
| 1321 | raid_init_component_label(raidPtr, ci_label); |
| 1322 | ci_label->serial_number = |
| 1323 | raidPtr->serial_number; |
| 1324 | ci_label->row = 0; /* we dont' pretend to support more */ |
| 1325 | rf_component_label_set_partitionsize(ci_label, |
| 1326 | diskPtr->partitionSize); |
| 1327 | ci_label->column = column; |
| 1328 | raidflush_component_label(raidPtr, column); |
| 1329 | } |
| 1330 | /* XXXjld what about the spares? */ |
| 1331 | } |
| 1332 | |
| 1333 | return (retcode); |
| 1334 | case RAIDFRAME_SET_AUTOCONFIG: |
| 1335 | d = rf_set_autoconfig(raidPtr, *(int *) data); |
| 1336 | printf("raid%d: New autoconfig value is: %d\n" , |
| 1337 | raidPtr->raidid, d); |
| 1338 | *(int *) data = d; |
| 1339 | return (retcode); |
| 1340 | |
| 1341 | case RAIDFRAME_SET_ROOT: |
| 1342 | d = rf_set_rootpartition(raidPtr, *(int *) data); |
| 1343 | printf("raid%d: New rootpartition value is: %d\n" , |
| 1344 | raidPtr->raidid, d); |
| 1345 | *(int *) data = d; |
| 1346 | return (retcode); |
| 1347 | |
| 1348 | /* initialize all parity */ |
| 1349 | case RAIDFRAME_REWRITEPARITY: |
| 1350 | |
| 1351 | if (raidPtr->Layout.map->faultsTolerated == 0) { |
| 1352 | /* Parity for RAID 0 is trivially correct */ |
| 1353 | raidPtr->parity_good = RF_RAID_CLEAN; |
| 1354 | return(0); |
| 1355 | } |
| 1356 | |
| 1357 | if (raidPtr->parity_rewrite_in_progress == 1) { |
| 1358 | /* Re-write is already in progress! */ |
| 1359 | return(EINVAL); |
| 1360 | } |
| 1361 | |
| 1362 | retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, |
| 1363 | rf_RewriteParityThread, |
| 1364 | raidPtr,"raid_parity" ); |
| 1365 | return (retcode); |
| 1366 | |
| 1367 | |
| 1368 | case RAIDFRAME_ADD_HOT_SPARE: |
| 1369 | sparePtr = (RF_SingleComponent_t *) data; |
| 1370 | memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t)); |
| 1371 | retcode = rf_add_hot_spare(raidPtr, &component); |
| 1372 | return(retcode); |
| 1373 | |
| 1374 | case RAIDFRAME_REMOVE_HOT_SPARE: |
| 1375 | return(retcode); |
| 1376 | |
| 1377 | case RAIDFRAME_DELETE_COMPONENT: |
| 1378 | componentPtr = (RF_SingleComponent_t *)data; |
| 1379 | memcpy( &component, componentPtr, |
| 1380 | sizeof(RF_SingleComponent_t)); |
| 1381 | retcode = rf_delete_component(raidPtr, &component); |
| 1382 | return(retcode); |
| 1383 | |
| 1384 | case RAIDFRAME_INCORPORATE_HOT_SPARE: |
| 1385 | componentPtr = (RF_SingleComponent_t *)data; |
| 1386 | memcpy( &component, componentPtr, |
| 1387 | sizeof(RF_SingleComponent_t)); |
| 1388 | retcode = rf_incorporate_hot_spare(raidPtr, &component); |
| 1389 | return(retcode); |
| 1390 | |
| 1391 | case RAIDFRAME_REBUILD_IN_PLACE: |
| 1392 | |
| 1393 | if (raidPtr->Layout.map->faultsTolerated == 0) { |
| 1394 | /* Can't do this on a RAID 0!! */ |
| 1395 | return(EINVAL); |
| 1396 | } |
| 1397 | |
| 1398 | if (raidPtr->recon_in_progress == 1) { |
| 1399 | /* a reconstruct is already in progress! */ |
| 1400 | return(EINVAL); |
| 1401 | } |
| 1402 | |
| 1403 | componentPtr = (RF_SingleComponent_t *) data; |
| 1404 | memcpy( &component, componentPtr, |
| 1405 | sizeof(RF_SingleComponent_t)); |
| 1406 | component.row = 0; /* we don't support any more */ |
| 1407 | column = component.column; |
| 1408 | |
| 1409 | if ((column < 0) || (column >= raidPtr->numCol)) { |
| 1410 | return(EINVAL); |
| 1411 | } |
| 1412 | |
| 1413 | rf_lock_mutex2(raidPtr->mutex); |
| 1414 | if ((raidPtr->Disks[column].status == rf_ds_optimal) && |
| 1415 | (raidPtr->numFailures > 0)) { |
| 1416 | /* XXX 0 above shouldn't be constant!!! */ |
| 1417 | /* some component other than this has failed. |
| 1418 | Let's not make things worse than they already |
| 1419 | are... */ |
| 1420 | printf("raid%d: Unable to reconstruct to disk at:\n" , |
| 1421 | raidPtr->raidid); |
| 1422 | printf("raid%d: Col: %d Too many failures.\n" , |
| 1423 | raidPtr->raidid, column); |
| 1424 | rf_unlock_mutex2(raidPtr->mutex); |
| 1425 | return (EINVAL); |
| 1426 | } |
| 1427 | if (raidPtr->Disks[column].status == |
| 1428 | rf_ds_reconstructing) { |
| 1429 | printf("raid%d: Unable to reconstruct to disk at:\n" , |
| 1430 | raidPtr->raidid); |
| 1431 | printf("raid%d: Col: %d Reconstruction already occurring!\n" , raidPtr->raidid, column); |
| 1432 | |
| 1433 | rf_unlock_mutex2(raidPtr->mutex); |
| 1434 | return (EINVAL); |
| 1435 | } |
| 1436 | if (raidPtr->Disks[column].status == rf_ds_spared) { |
| 1437 | rf_unlock_mutex2(raidPtr->mutex); |
| 1438 | return (EINVAL); |
| 1439 | } |
| 1440 | rf_unlock_mutex2(raidPtr->mutex); |
| 1441 | |
| 1442 | RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); |
| 1443 | if (rrcopy == NULL) |
| 1444 | return(ENOMEM); |
| 1445 | |
| 1446 | rrcopy->raidPtr = (void *) raidPtr; |
| 1447 | rrcopy->col = column; |
| 1448 | |
| 1449 | retcode = RF_CREATE_THREAD(raidPtr->recon_thread, |
| 1450 | rf_ReconstructInPlaceThread, |
| 1451 | rrcopy,"raid_reconip" ); |
| 1452 | return(retcode); |
| 1453 | |
| 1454 | case RAIDFRAME_GET_INFO: |
| 1455 | if (!raidPtr->valid) |
| 1456 | return (ENODEV); |
| 1457 | ucfgp = (RF_DeviceConfig_t **) data; |
| 1458 | RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), |
| 1459 | (RF_DeviceConfig_t *)); |
| 1460 | if (d_cfg == NULL) |
| 1461 | return (ENOMEM); |
| 1462 | d_cfg->rows = 1; /* there is only 1 row now */ |
| 1463 | d_cfg->cols = raidPtr->numCol; |
| 1464 | d_cfg->ndevs = raidPtr->numCol; |
| 1465 | if (d_cfg->ndevs >= RF_MAX_DISKS) { |
| 1466 | RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); |
| 1467 | return (ENOMEM); |
| 1468 | } |
| 1469 | d_cfg->nspares = raidPtr->numSpare; |
| 1470 | if (d_cfg->nspares >= RF_MAX_DISKS) { |
| 1471 | RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); |
| 1472 | return (ENOMEM); |
| 1473 | } |
| 1474 | d_cfg->maxqdepth = raidPtr->maxQueueDepth; |
| 1475 | d = 0; |
| 1476 | for (j = 0; j < d_cfg->cols; j++) { |
| 1477 | d_cfg->devs[d] = raidPtr->Disks[j]; |
| 1478 | d++; |
| 1479 | } |
| 1480 | for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { |
| 1481 | d_cfg->spares[i] = raidPtr->Disks[j]; |
| 1482 | if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) { |
| 1483 | /* XXX: raidctl(8) expects to see this as a used spare */ |
| 1484 | d_cfg->spares[i].status = rf_ds_used_spare; |
| 1485 | } |
| 1486 | } |
| 1487 | retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); |
| 1488 | RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); |
| 1489 | |
| 1490 | return (retcode); |
| 1491 | |
| 1492 | case RAIDFRAME_CHECK_PARITY: |
| 1493 | *(int *) data = raidPtr->parity_good; |
| 1494 | return (0); |
| 1495 | |
| 1496 | case RAIDFRAME_PARITYMAP_STATUS: |
| 1497 | if (rf_paritymap_ineligible(raidPtr)) |
| 1498 | return EINVAL; |
| 1499 | rf_paritymap_status(raidPtr->parity_map, |
| 1500 | (struct rf_pmstat *)data); |
| 1501 | return 0; |
| 1502 | |
| 1503 | case RAIDFRAME_PARITYMAP_SET_PARAMS: |
| 1504 | if (rf_paritymap_ineligible(raidPtr)) |
| 1505 | return EINVAL; |
| 1506 | if (raidPtr->parity_map == NULL) |
| 1507 | return ENOENT; /* ??? */ |
| 1508 | if (0 != rf_paritymap_set_params(raidPtr->parity_map, |
| 1509 | (struct rf_pmparams *)data, 1)) |
| 1510 | return EINVAL; |
| 1511 | return 0; |
| 1512 | |
| 1513 | case RAIDFRAME_PARITYMAP_GET_DISABLE: |
| 1514 | if (rf_paritymap_ineligible(raidPtr)) |
| 1515 | return EINVAL; |
| 1516 | *(int *) data = rf_paritymap_get_disable(raidPtr); |
| 1517 | return 0; |
| 1518 | |
| 1519 | case RAIDFRAME_PARITYMAP_SET_DISABLE: |
| 1520 | if (rf_paritymap_ineligible(raidPtr)) |
| 1521 | return EINVAL; |
| 1522 | rf_paritymap_set_disable(raidPtr, *(int *)data); |
| 1523 | /* XXX should errors be passed up? */ |
| 1524 | return 0; |
| 1525 | |
| 1526 | case RAIDFRAME_RESET_ACCTOTALS: |
| 1527 | memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); |
| 1528 | return (0); |
| 1529 | |
| 1530 | case RAIDFRAME_GET_ACCTOTALS: |
| 1531 | totals = (RF_AccTotals_t *) data; |
| 1532 | *totals = raidPtr->acc_totals; |
| 1533 | return (0); |
| 1534 | |
| 1535 | case RAIDFRAME_KEEP_ACCTOTALS: |
| 1536 | raidPtr->keep_acc_totals = *(int *)data; |
| 1537 | return (0); |
| 1538 | |
| 1539 | case RAIDFRAME_GET_SIZE: |
| 1540 | *(int *) data = raidPtr->totalSectors; |
| 1541 | return (0); |
| 1542 | |
| 1543 | /* fail a disk & optionally start reconstruction */ |
| 1544 | case RAIDFRAME_FAIL_DISK: |
| 1545 | |
| 1546 | if (raidPtr->Layout.map->faultsTolerated == 0) { |
| 1547 | /* Can't do this on a RAID 0!! */ |
| 1548 | return(EINVAL); |
| 1549 | } |
| 1550 | |
| 1551 | rr = (struct rf_recon_req *) data; |
| 1552 | rr->row = 0; |
| 1553 | if (rr->col < 0 || rr->col >= raidPtr->numCol) |
| 1554 | return (EINVAL); |
| 1555 | |
| 1556 | |
| 1557 | rf_lock_mutex2(raidPtr->mutex); |
| 1558 | if (raidPtr->status == rf_rs_reconstructing) { |
| 1559 | /* you can't fail a disk while we're reconstructing! */ |
| 1560 | /* XXX wrong for RAID6 */ |
| 1561 | rf_unlock_mutex2(raidPtr->mutex); |
| 1562 | return (EINVAL); |
| 1563 | } |
| 1564 | if ((raidPtr->Disks[rr->col].status == |
| 1565 | rf_ds_optimal) && (raidPtr->numFailures > 0)) { |
| 1566 | /* some other component has failed. Let's not make |
| 1567 | things worse. XXX wrong for RAID6 */ |
| 1568 | rf_unlock_mutex2(raidPtr->mutex); |
| 1569 | return (EINVAL); |
| 1570 | } |
| 1571 | if (raidPtr->Disks[rr->col].status == rf_ds_spared) { |
| 1572 | /* Can't fail a spared disk! */ |
| 1573 | rf_unlock_mutex2(raidPtr->mutex); |
| 1574 | return (EINVAL); |
| 1575 | } |
| 1576 | rf_unlock_mutex2(raidPtr->mutex); |
| 1577 | |
| 1578 | /* make a copy of the recon request so that we don't rely on |
| 1579 | * the user's buffer */ |
| 1580 | RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); |
| 1581 | if (rrcopy == NULL) |
| 1582 | return(ENOMEM); |
| 1583 | memcpy(rrcopy, rr, sizeof(*rr)); |
| 1584 | rrcopy->raidPtr = (void *) raidPtr; |
| 1585 | |
| 1586 | retcode = RF_CREATE_THREAD(raidPtr->recon_thread, |
| 1587 | rf_ReconThread, |
| 1588 | rrcopy,"raid_recon" ); |
| 1589 | return (0); |
| 1590 | |
| 1591 | /* invoke a copyback operation after recon on whatever disk |
| 1592 | * needs it, if any */ |
| 1593 | case RAIDFRAME_COPYBACK: |
| 1594 | |
| 1595 | if (raidPtr->Layout.map->faultsTolerated == 0) { |
| 1596 | /* This makes no sense on a RAID 0!! */ |
| 1597 | return(EINVAL); |
| 1598 | } |
| 1599 | |
| 1600 | if (raidPtr->copyback_in_progress == 1) { |
| 1601 | /* Copyback is already in progress! */ |
| 1602 | return(EINVAL); |
| 1603 | } |
| 1604 | |
| 1605 | retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, |
| 1606 | rf_CopybackThread, |
| 1607 | raidPtr,"raid_copyback" ); |
| 1608 | return (retcode); |
| 1609 | |
| 1610 | /* return the percentage completion of reconstruction */ |
| 1611 | case RAIDFRAME_CHECK_RECON_STATUS: |
| 1612 | if (raidPtr->Layout.map->faultsTolerated == 0) { |
| 1613 | /* This makes no sense on a RAID 0, so tell the |
| 1614 | user it's done. */ |
| 1615 | *(int *) data = 100; |
| 1616 | return(0); |
| 1617 | } |
| 1618 | if (raidPtr->status != rf_rs_reconstructing) |
| 1619 | *(int *) data = 100; |
| 1620 | else { |
| 1621 | if (raidPtr->reconControl->numRUsTotal > 0) { |
| 1622 | *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); |
| 1623 | } else { |
| 1624 | *(int *) data = 0; |
| 1625 | } |
| 1626 | } |
| 1627 | return (0); |
| 1628 | case RAIDFRAME_CHECK_RECON_STATUS_EXT: |
| 1629 | progressInfoPtr = (RF_ProgressInfo_t **) data; |
| 1630 | if (raidPtr->status != rf_rs_reconstructing) { |
| 1631 | progressInfo.remaining = 0; |
| 1632 | progressInfo.completed = 100; |
| 1633 | progressInfo.total = 100; |
| 1634 | } else { |
| 1635 | progressInfo.total = |
| 1636 | raidPtr->reconControl->numRUsTotal; |
| 1637 | progressInfo.completed = |
| 1638 | raidPtr->reconControl->numRUsComplete; |
| 1639 | progressInfo.remaining = progressInfo.total - |
| 1640 | progressInfo.completed; |
| 1641 | } |
| 1642 | retcode = copyout(&progressInfo, *progressInfoPtr, |
| 1643 | sizeof(RF_ProgressInfo_t)); |
| 1644 | return (retcode); |
| 1645 | |
| 1646 | case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: |
| 1647 | if (raidPtr->Layout.map->faultsTolerated == 0) { |
| 1648 | /* This makes no sense on a RAID 0, so tell the |
| 1649 | user it's done. */ |
| 1650 | *(int *) data = 100; |
| 1651 | return(0); |
| 1652 | } |
| 1653 | if (raidPtr->parity_rewrite_in_progress == 1) { |
| 1654 | *(int *) data = 100 * |
| 1655 | raidPtr->parity_rewrite_stripes_done / |
| 1656 | raidPtr->Layout.numStripe; |
| 1657 | } else { |
| 1658 | *(int *) data = 100; |
| 1659 | } |
| 1660 | return (0); |
| 1661 | |
| 1662 | case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: |
| 1663 | progressInfoPtr = (RF_ProgressInfo_t **) data; |
| 1664 | if (raidPtr->parity_rewrite_in_progress == 1) { |
| 1665 | progressInfo.total = raidPtr->Layout.numStripe; |
| 1666 | progressInfo.completed = |
| 1667 | raidPtr->parity_rewrite_stripes_done; |
| 1668 | progressInfo.remaining = progressInfo.total - |
| 1669 | progressInfo.completed; |
| 1670 | } else { |
| 1671 | progressInfo.remaining = 0; |
| 1672 | progressInfo.completed = 100; |
| 1673 | progressInfo.total = 100; |
| 1674 | } |
| 1675 | retcode = copyout(&progressInfo, *progressInfoPtr, |
| 1676 | sizeof(RF_ProgressInfo_t)); |
| 1677 | return (retcode); |
| 1678 | |
| 1679 | case RAIDFRAME_CHECK_COPYBACK_STATUS: |
| 1680 | if (raidPtr->Layout.map->faultsTolerated == 0) { |
| 1681 | /* This makes no sense on a RAID 0 */ |
| 1682 | *(int *) data = 100; |
| 1683 | return(0); |
| 1684 | } |
| 1685 | if (raidPtr->copyback_in_progress == 1) { |
| 1686 | *(int *) data = 100 * raidPtr->copyback_stripes_done / |
| 1687 | raidPtr->Layout.numStripe; |
| 1688 | } else { |
| 1689 | *(int *) data = 100; |
| 1690 | } |
| 1691 | return (0); |
| 1692 | |
| 1693 | case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: |
| 1694 | progressInfoPtr = (RF_ProgressInfo_t **) data; |
| 1695 | if (raidPtr->copyback_in_progress == 1) { |
| 1696 | progressInfo.total = raidPtr->Layout.numStripe; |
| 1697 | progressInfo.completed = |
| 1698 | raidPtr->copyback_stripes_done; |
| 1699 | progressInfo.remaining = progressInfo.total - |
| 1700 | progressInfo.completed; |
| 1701 | } else { |
| 1702 | progressInfo.remaining = 0; |
| 1703 | progressInfo.completed = 100; |
| 1704 | progressInfo.total = 100; |
| 1705 | } |
| 1706 | retcode = copyout(&progressInfo, *progressInfoPtr, |
| 1707 | sizeof(RF_ProgressInfo_t)); |
| 1708 | return (retcode); |
| 1709 | |
| 1710 | case RAIDFRAME_SET_LAST_UNIT: |
| 1711 | for (column = 0; column < raidPtr->numCol; column++) |
| 1712 | if (raidPtr->Disks[column].status != rf_ds_optimal) |
| 1713 | return EBUSY; |
| 1714 | |
| 1715 | for (column = 0; column < raidPtr->numCol; column++) { |
| 1716 | clabel = raidget_component_label(raidPtr, column); |
| 1717 | clabel->last_unit = *(int *)data; |
| 1718 | raidflush_component_label(raidPtr, column); |
| 1719 | } |
| 1720 | rs->sc_cflags |= RAIDF_UNIT_CHANGED; |
| 1721 | return 0; |
| 1722 | |
| 1723 | /* the sparetable daemon calls this to wait for the kernel to |
| 1724 | * need a spare table. this ioctl does not return until a |
| 1725 | * spare table is needed. XXX -- calling mpsleep here in the |
| 1726 | * ioctl code is almost certainly wrong and evil. -- XXX XXX |
| 1727 | * -- I should either compute the spare table in the kernel, |
| 1728 | * or have a different -- XXX XXX -- interface (a different |
| 1729 | * character device) for delivering the table -- XXX */ |
| 1730 | #if 0 |
| 1731 | case RAIDFRAME_SPARET_WAIT: |
| 1732 | rf_lock_mutex2(rf_sparet_wait_mutex); |
| 1733 | while (!rf_sparet_wait_queue) |
| 1734 | rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex); |
| 1735 | waitreq = rf_sparet_wait_queue; |
| 1736 | rf_sparet_wait_queue = rf_sparet_wait_queue->next; |
| 1737 | rf_unlock_mutex2(rf_sparet_wait_mutex); |
| 1738 | |
| 1739 | /* structure assignment */ |
| 1740 | *((RF_SparetWait_t *) data) = *waitreq; |
| 1741 | |
| 1742 | RF_Free(waitreq, sizeof(*waitreq)); |
| 1743 | return (0); |
| 1744 | |
| 1745 | /* wakes up a process waiting on SPARET_WAIT and puts an error |
| 1746 | * code in it that will cause the dameon to exit */ |
| 1747 | case RAIDFRAME_ABORT_SPARET_WAIT: |
| 1748 | RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); |
| 1749 | waitreq->fcol = -1; |
| 1750 | rf_lock_mutex2(rf_sparet_wait_mutex); |
| 1751 | waitreq->next = rf_sparet_wait_queue; |
| 1752 | rf_sparet_wait_queue = waitreq; |
| 1753 | rf_broadcast_conf2(rf_sparet_wait_cv); |
| 1754 | rf_unlock_mutex2(rf_sparet_wait_mutex); |
| 1755 | return (0); |
| 1756 | |
| 1757 | /* used by the spare table daemon to deliver a spare table |
| 1758 | * into the kernel */ |
| 1759 | case RAIDFRAME_SEND_SPARET: |
| 1760 | |
| 1761 | /* install the spare table */ |
| 1762 | retcode = rf_SetSpareTable(raidPtr, *(void **) data); |
| 1763 | |
| 1764 | /* respond to the requestor. the return status of the spare |
| 1765 | * table installation is passed in the "fcol" field */ |
| 1766 | RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); |
| 1767 | waitreq->fcol = retcode; |
| 1768 | rf_lock_mutex2(rf_sparet_wait_mutex); |
| 1769 | waitreq->next = rf_sparet_resp_queue; |
| 1770 | rf_sparet_resp_queue = waitreq; |
| 1771 | rf_broadcast_cond2(rf_sparet_resp_cv); |
| 1772 | rf_unlock_mutex2(rf_sparet_wait_mutex); |
| 1773 | |
| 1774 | return (retcode); |
| 1775 | #endif |
| 1776 | |
| 1777 | default: |
| 1778 | break; /* fall through to the os-specific code below */ |
| 1779 | |
| 1780 | } |
| 1781 | |
| 1782 | if (!raidPtr->valid) |
| 1783 | return (EINVAL); |
| 1784 | |
| 1785 | /* |
| 1786 | * Add support for "regular" device ioctls here. |
| 1787 | */ |
| 1788 | |
| 1789 | switch (cmd) { |
| 1790 | case DIOCCACHESYNC: |
| 1791 | retcode = rf_sync_component_caches(raidPtr); |
| 1792 | break; |
| 1793 | |
| 1794 | default: |
| 1795 | retcode = dk_ioctl(dksc, dev, cmd, data, flag, l); |
| 1796 | break; |
| 1797 | } |
| 1798 | |
| 1799 | return (retcode); |
| 1800 | |
| 1801 | } |
| 1802 | |
| 1803 | |
| 1804 | /* raidinit -- complete the rest of the initialization for the |
| 1805 | RAIDframe device. */ |
| 1806 | |
| 1807 | |
| 1808 | static void |
| 1809 | raidinit(struct raid_softc *rs) |
| 1810 | { |
| 1811 | cfdata_t cf; |
| 1812 | unsigned int unit; |
| 1813 | struct dk_softc *dksc = &rs->sc_dksc; |
| 1814 | RF_Raid_t *raidPtr = &rs->sc_r; |
| 1815 | device_t dev; |
| 1816 | |
| 1817 | unit = raidPtr->raidid; |
| 1818 | |
| 1819 | /* XXX doesn't check bounds. */ |
| 1820 | snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u" , unit); |
| 1821 | |
| 1822 | /* attach the pseudo device */ |
| 1823 | cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK); |
| 1824 | cf->cf_name = raid_cd.cd_name; |
| 1825 | cf->cf_atname = raid_cd.cd_name; |
| 1826 | cf->cf_unit = unit; |
| 1827 | cf->cf_fstate = FSTATE_STAR; |
| 1828 | |
| 1829 | dev = config_attach_pseudo(cf); |
| 1830 | if (dev == NULL) { |
| 1831 | printf("raid%d: config_attach_pseudo failed\n" , |
| 1832 | raidPtr->raidid); |
| 1833 | free(cf, M_RAIDFRAME); |
| 1834 | return; |
| 1835 | } |
| 1836 | |
| 1837 | /* provide a backpointer to the real softc */ |
| 1838 | raidsoftc(dev) = rs; |
| 1839 | |
| 1840 | /* disk_attach actually creates space for the CPU disklabel, among |
| 1841 | * other things, so it's critical to call this *BEFORE* we try putzing |
| 1842 | * with disklabels. */ |
| 1843 | dk_init(dksc, dev, DKTYPE_RAID); |
| 1844 | disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver); |
| 1845 | |
| 1846 | /* XXX There may be a weird interaction here between this, and |
| 1847 | * protectedSectors, as used in RAIDframe. */ |
| 1848 | |
| 1849 | rs->sc_size = raidPtr->totalSectors; |
| 1850 | |
| 1851 | /* Attach dk and disk subsystems */ |
| 1852 | dk_attach(dksc); |
| 1853 | disk_attach(&dksc->sc_dkdev); |
| 1854 | rf_set_geometry(rs, raidPtr); |
| 1855 | |
| 1856 | bufq_alloc(&dksc->sc_bufq, "fcfs" , BUFQ_SORT_RAWBLOCK); |
| 1857 | |
| 1858 | /* mark unit as usuable */ |
| 1859 | rs->sc_flags |= RAIDF_INITED; |
| 1860 | |
| 1861 | dkwedge_discover(&dksc->sc_dkdev); |
| 1862 | } |
| 1863 | |
| 1864 | #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) |
| 1865 | /* wake up the daemon & tell it to get us a spare table |
| 1866 | * XXX |
| 1867 | * the entries in the queues should be tagged with the raidPtr |
| 1868 | * so that in the extremely rare case that two recons happen at once, |
| 1869 | * we know for which device were requesting a spare table |
| 1870 | * XXX |
| 1871 | * |
| 1872 | * XXX This code is not currently used. GO |
| 1873 | */ |
| 1874 | int |
| 1875 | rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) |
| 1876 | { |
| 1877 | int retcode; |
| 1878 | |
| 1879 | rf_lock_mutex2(rf_sparet_wait_mutex); |
| 1880 | req->next = rf_sparet_wait_queue; |
| 1881 | rf_sparet_wait_queue = req; |
| 1882 | rf_broadcast_cond2(rf_sparet_wait_cv); |
| 1883 | |
| 1884 | /* mpsleep unlocks the mutex */ |
| 1885 | while (!rf_sparet_resp_queue) { |
| 1886 | rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex); |
| 1887 | } |
| 1888 | req = rf_sparet_resp_queue; |
| 1889 | rf_sparet_resp_queue = req->next; |
| 1890 | rf_unlock_mutex2(rf_sparet_wait_mutex); |
| 1891 | |
| 1892 | retcode = req->fcol; |
| 1893 | RF_Free(req, sizeof(*req)); /* this is not the same req as we |
| 1894 | * alloc'd */ |
| 1895 | return (retcode); |
| 1896 | } |
| 1897 | #endif |
| 1898 | |
| 1899 | /* a wrapper around rf_DoAccess that extracts appropriate info from the |
| 1900 | * bp & passes it down. |
| 1901 | * any calls originating in the kernel must use non-blocking I/O |
| 1902 | * do some extra sanity checking to return "appropriate" error values for |
| 1903 | * certain conditions (to make some standard utilities work) |
| 1904 | * |
| 1905 | * Formerly known as: rf_DoAccessKernel |
| 1906 | */ |
| 1907 | void |
| 1908 | raidstart(RF_Raid_t *raidPtr) |
| 1909 | { |
| 1910 | struct raid_softc *rs; |
| 1911 | struct dk_softc *dksc; |
| 1912 | |
| 1913 | rs = raidPtr->softc; |
| 1914 | dksc = &rs->sc_dksc; |
| 1915 | /* quick check to see if anything has died recently */ |
| 1916 | rf_lock_mutex2(raidPtr->mutex); |
| 1917 | if (raidPtr->numNewFailures > 0) { |
| 1918 | rf_unlock_mutex2(raidPtr->mutex); |
| 1919 | rf_update_component_labels(raidPtr, |
| 1920 | RF_NORMAL_COMPONENT_UPDATE); |
| 1921 | rf_lock_mutex2(raidPtr->mutex); |
| 1922 | raidPtr->numNewFailures--; |
| 1923 | } |
| 1924 | rf_unlock_mutex2(raidPtr->mutex); |
| 1925 | |
| 1926 | if ((rs->sc_flags & RAIDF_INITED) == 0) { |
| 1927 | printf("raid%d: raidstart not ready\n" , raidPtr->raidid); |
| 1928 | return; |
| 1929 | } |
| 1930 | |
| 1931 | dk_start(dksc, NULL); |
| 1932 | } |
| 1933 | |
| 1934 | static int |
| 1935 | raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp) |
| 1936 | { |
| 1937 | RF_SectorCount_t num_blocks, pb, sum; |
| 1938 | RF_RaidAddr_t raid_addr; |
| 1939 | daddr_t blocknum; |
| 1940 | int do_async; |
| 1941 | int rc; |
| 1942 | |
| 1943 | rf_lock_mutex2(raidPtr->mutex); |
| 1944 | if (raidPtr->openings == 0) { |
| 1945 | rf_unlock_mutex2(raidPtr->mutex); |
| 1946 | return EAGAIN; |
| 1947 | } |
| 1948 | rf_unlock_mutex2(raidPtr->mutex); |
| 1949 | |
| 1950 | blocknum = bp->b_rawblkno; |
| 1951 | |
| 1952 | db1_printf(("Blocks: %d, %d\n" , (int) bp->b_blkno, |
| 1953 | (int) blocknum)); |
| 1954 | |
| 1955 | db1_printf(("bp->b_bcount = %d\n" , (int) bp->b_bcount)); |
| 1956 | db1_printf(("bp->b_resid = %d\n" , (int) bp->b_resid)); |
| 1957 | |
| 1958 | /* *THIS* is where we adjust what block we're going to... |
| 1959 | * but DO NOT TOUCH bp->b_blkno!!! */ |
| 1960 | raid_addr = blocknum; |
| 1961 | |
| 1962 | num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; |
| 1963 | pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; |
| 1964 | sum = raid_addr + num_blocks + pb; |
| 1965 | if (1 || rf_debugKernelAccess) { |
| 1966 | db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n" , |
| 1967 | (int) raid_addr, (int) sum, (int) num_blocks, |
| 1968 | (int) pb, (int) bp->b_resid)); |
| 1969 | } |
| 1970 | if ((sum > raidPtr->totalSectors) || (sum < raid_addr) |
| 1971 | || (sum < num_blocks) || (sum < pb)) { |
| 1972 | rc = ENOSPC; |
| 1973 | goto done; |
| 1974 | } |
| 1975 | /* |
| 1976 | * XXX rf_DoAccess() should do this, not just DoAccessKernel() |
| 1977 | */ |
| 1978 | |
| 1979 | if (bp->b_bcount & raidPtr->sectorMask) { |
| 1980 | rc = ENOSPC; |
| 1981 | goto done; |
| 1982 | } |
| 1983 | db1_printf(("Calling DoAccess..\n" )); |
| 1984 | |
| 1985 | |
| 1986 | rf_lock_mutex2(raidPtr->mutex); |
| 1987 | raidPtr->openings--; |
| 1988 | rf_unlock_mutex2(raidPtr->mutex); |
| 1989 | |
| 1990 | /* |
| 1991 | * Everything is async. |
| 1992 | */ |
| 1993 | do_async = 1; |
| 1994 | |
| 1995 | /* don't ever condition on bp->b_flags & B_WRITE. |
| 1996 | * always condition on B_READ instead */ |
| 1997 | |
| 1998 | rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? |
| 1999 | RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, |
| 2000 | do_async, raid_addr, num_blocks, |
| 2001 | bp->b_data, bp, RF_DAG_NONBLOCKING_IO); |
| 2002 | |
| 2003 | done: |
| 2004 | return rc; |
| 2005 | } |
| 2006 | |
| 2007 | /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ |
| 2008 | |
| 2009 | int |
| 2010 | rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) |
| 2011 | { |
| 2012 | int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; |
| 2013 | struct buf *bp; |
| 2014 | |
| 2015 | req->queue = queue; |
| 2016 | bp = req->bp; |
| 2017 | |
| 2018 | switch (req->type) { |
| 2019 | case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ |
| 2020 | /* XXX need to do something extra here.. */ |
| 2021 | /* I'm leaving this in, as I've never actually seen it used, |
| 2022 | * and I'd like folks to report it... GO */ |
| 2023 | printf(("WAKEUP CALLED\n" )); |
| 2024 | queue->numOutstanding++; |
| 2025 | |
| 2026 | bp->b_flags = 0; |
| 2027 | bp->b_private = req; |
| 2028 | |
| 2029 | KernelWakeupFunc(bp); |
| 2030 | break; |
| 2031 | |
| 2032 | case RF_IO_TYPE_READ: |
| 2033 | case RF_IO_TYPE_WRITE: |
| 2034 | #if RF_ACC_TRACE > 0 |
| 2035 | if (req->tracerec) { |
| 2036 | RF_ETIMER_START(req->tracerec->timer); |
| 2037 | } |
| 2038 | #endif |
| 2039 | InitBP(bp, queue->rf_cinfo->ci_vp, |
| 2040 | op, queue->rf_cinfo->ci_dev, |
| 2041 | req->sectorOffset, req->numSector, |
| 2042 | req->buf, KernelWakeupFunc, (void *) req, |
| 2043 | queue->raidPtr->logBytesPerSector, req->b_proc); |
| 2044 | |
| 2045 | if (rf_debugKernelAccess) { |
| 2046 | db1_printf(("dispatch: bp->b_blkno = %ld\n" , |
| 2047 | (long) bp->b_blkno)); |
| 2048 | } |
| 2049 | queue->numOutstanding++; |
| 2050 | queue->last_deq_sector = req->sectorOffset; |
| 2051 | /* acc wouldn't have been let in if there were any pending |
| 2052 | * reqs at any other priority */ |
| 2053 | queue->curPriority = req->priority; |
| 2054 | |
| 2055 | db1_printf(("Going for %c to unit %d col %d\n" , |
| 2056 | req->type, queue->raidPtr->raidid, |
| 2057 | queue->col)); |
| 2058 | db1_printf(("sector %d count %d (%d bytes) %d\n" , |
| 2059 | (int) req->sectorOffset, (int) req->numSector, |
| 2060 | (int) (req->numSector << |
| 2061 | queue->raidPtr->logBytesPerSector), |
| 2062 | (int) queue->raidPtr->logBytesPerSector)); |
| 2063 | |
| 2064 | /* |
| 2065 | * XXX: drop lock here since this can block at |
| 2066 | * least with backing SCSI devices. Retake it |
| 2067 | * to minimize fuss with calling interfaces. |
| 2068 | */ |
| 2069 | |
| 2070 | RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam" ); |
| 2071 | bdev_strategy(bp); |
| 2072 | RF_LOCK_QUEUE_MUTEX(queue, "unusedparam" ); |
| 2073 | break; |
| 2074 | |
| 2075 | default: |
| 2076 | panic("bad req->type in rf_DispatchKernelIO" ); |
| 2077 | } |
| 2078 | db1_printf(("Exiting from DispatchKernelIO\n" )); |
| 2079 | |
| 2080 | return (0); |
| 2081 | } |
| 2082 | /* this is the callback function associated with a I/O invoked from |
| 2083 | kernel code. |
| 2084 | */ |
| 2085 | static void |
| 2086 | KernelWakeupFunc(struct buf *bp) |
| 2087 | { |
| 2088 | RF_DiskQueueData_t *req = NULL; |
| 2089 | RF_DiskQueue_t *queue; |
| 2090 | |
| 2091 | db1_printf(("recovering the request queue:\n" )); |
| 2092 | |
| 2093 | req = bp->b_private; |
| 2094 | |
| 2095 | queue = (RF_DiskQueue_t *) req->queue; |
| 2096 | |
| 2097 | rf_lock_mutex2(queue->raidPtr->iodone_lock); |
| 2098 | |
| 2099 | #if RF_ACC_TRACE > 0 |
| 2100 | if (req->tracerec) { |
| 2101 | RF_ETIMER_STOP(req->tracerec->timer); |
| 2102 | RF_ETIMER_EVAL(req->tracerec->timer); |
| 2103 | rf_lock_mutex2(rf_tracing_mutex); |
| 2104 | req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); |
| 2105 | req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); |
| 2106 | req->tracerec->num_phys_ios++; |
| 2107 | rf_unlock_mutex2(rf_tracing_mutex); |
| 2108 | } |
| 2109 | #endif |
| 2110 | |
| 2111 | /* XXX Ok, let's get aggressive... If b_error is set, let's go |
| 2112 | * ballistic, and mark the component as hosed... */ |
| 2113 | |
| 2114 | if (bp->b_error != 0) { |
| 2115 | /* Mark the disk as dead */ |
| 2116 | /* but only mark it once... */ |
| 2117 | /* and only if it wouldn't leave this RAID set |
| 2118 | completely broken */ |
| 2119 | if (((queue->raidPtr->Disks[queue->col].status == |
| 2120 | rf_ds_optimal) || |
| 2121 | (queue->raidPtr->Disks[queue->col].status == |
| 2122 | rf_ds_used_spare)) && |
| 2123 | (queue->raidPtr->numFailures < |
| 2124 | queue->raidPtr->Layout.map->faultsTolerated)) { |
| 2125 | printf("raid%d: IO Error (%d). Marking %s as failed.\n" , |
| 2126 | queue->raidPtr->raidid, |
| 2127 | bp->b_error, |
| 2128 | queue->raidPtr->Disks[queue->col].devname); |
| 2129 | queue->raidPtr->Disks[queue->col].status = |
| 2130 | rf_ds_failed; |
| 2131 | queue->raidPtr->status = rf_rs_degraded; |
| 2132 | queue->raidPtr->numFailures++; |
| 2133 | queue->raidPtr->numNewFailures++; |
| 2134 | } else { /* Disk is already dead... */ |
| 2135 | /* printf("Disk already marked as dead!\n"); */ |
| 2136 | } |
| 2137 | |
| 2138 | } |
| 2139 | |
| 2140 | /* Fill in the error value */ |
| 2141 | req->error = bp->b_error; |
| 2142 | |
| 2143 | /* Drop this one on the "finished" queue... */ |
| 2144 | TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); |
| 2145 | |
| 2146 | /* Let the raidio thread know there is work to be done. */ |
| 2147 | rf_signal_cond2(queue->raidPtr->iodone_cv); |
| 2148 | |
| 2149 | rf_unlock_mutex2(queue->raidPtr->iodone_lock); |
| 2150 | } |
| 2151 | |
| 2152 | |
| 2153 | /* |
| 2154 | * initialize a buf structure for doing an I/O in the kernel. |
| 2155 | */ |
| 2156 | static void |
| 2157 | InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, |
| 2158 | RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf, |
| 2159 | void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, |
| 2160 | struct proc *b_proc) |
| 2161 | { |
| 2162 | /* bp->b_flags = B_PHYS | rw_flag; */ |
| 2163 | bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */ |
| 2164 | bp->b_oflags = 0; |
| 2165 | bp->b_cflags = 0; |
| 2166 | bp->b_bcount = numSect << logBytesPerSector; |
| 2167 | bp->b_bufsize = bp->b_bcount; |
| 2168 | bp->b_error = 0; |
| 2169 | bp->b_dev = dev; |
| 2170 | bp->b_data = bf; |
| 2171 | bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT; |
| 2172 | bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ |
| 2173 | if (bp->b_bcount == 0) { |
| 2174 | panic("bp->b_bcount is zero in InitBP!!" ); |
| 2175 | } |
| 2176 | bp->b_proc = b_proc; |
| 2177 | bp->b_iodone = cbFunc; |
| 2178 | bp->b_private = cbArg; |
| 2179 | } |
| 2180 | |
| 2181 | /* |
| 2182 | * Wait interruptibly for an exclusive lock. |
| 2183 | * |
| 2184 | * XXX |
| 2185 | * Several drivers do this; it should be abstracted and made MP-safe. |
| 2186 | * (Hmm... where have we seen this warning before :-> GO ) |
| 2187 | */ |
| 2188 | static int |
| 2189 | raidlock(struct raid_softc *rs) |
| 2190 | { |
| 2191 | int error; |
| 2192 | |
| 2193 | error = 0; |
| 2194 | mutex_enter(&rs->sc_mutex); |
| 2195 | while ((rs->sc_flags & RAIDF_LOCKED) != 0) { |
| 2196 | rs->sc_flags |= RAIDF_WANTED; |
| 2197 | error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex); |
| 2198 | if (error != 0) |
| 2199 | goto done; |
| 2200 | } |
| 2201 | rs->sc_flags |= RAIDF_LOCKED; |
| 2202 | done: |
| 2203 | mutex_exit(&rs->sc_mutex); |
| 2204 | return (error); |
| 2205 | } |
| 2206 | /* |
| 2207 | * Unlock and wake up any waiters. |
| 2208 | */ |
| 2209 | static void |
| 2210 | raidunlock(struct raid_softc *rs) |
| 2211 | { |
| 2212 | |
| 2213 | mutex_enter(&rs->sc_mutex); |
| 2214 | rs->sc_flags &= ~RAIDF_LOCKED; |
| 2215 | if ((rs->sc_flags & RAIDF_WANTED) != 0) { |
| 2216 | rs->sc_flags &= ~RAIDF_WANTED; |
| 2217 | cv_broadcast(&rs->sc_cv); |
| 2218 | } |
| 2219 | mutex_exit(&rs->sc_mutex); |
| 2220 | } |
| 2221 | |
| 2222 | |
| 2223 | #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ |
| 2224 | #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ |
| 2225 | #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE |
| 2226 | |
| 2227 | static daddr_t |
| 2228 | rf_component_info_offset(void) |
| 2229 | { |
| 2230 | |
| 2231 | return RF_COMPONENT_INFO_OFFSET; |
| 2232 | } |
| 2233 | |
| 2234 | static daddr_t |
| 2235 | rf_component_info_size(unsigned secsize) |
| 2236 | { |
| 2237 | daddr_t info_size; |
| 2238 | |
| 2239 | KASSERT(secsize); |
| 2240 | if (secsize > RF_COMPONENT_INFO_SIZE) |
| 2241 | info_size = secsize; |
| 2242 | else |
| 2243 | info_size = RF_COMPONENT_INFO_SIZE; |
| 2244 | |
| 2245 | return info_size; |
| 2246 | } |
| 2247 | |
| 2248 | static daddr_t |
| 2249 | rf_parity_map_offset(RF_Raid_t *raidPtr) |
| 2250 | { |
| 2251 | daddr_t map_offset; |
| 2252 | |
| 2253 | KASSERT(raidPtr->bytesPerSector); |
| 2254 | if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE) |
| 2255 | map_offset = raidPtr->bytesPerSector; |
| 2256 | else |
| 2257 | map_offset = RF_COMPONENT_INFO_SIZE; |
| 2258 | map_offset += rf_component_info_offset(); |
| 2259 | |
| 2260 | return map_offset; |
| 2261 | } |
| 2262 | |
| 2263 | static daddr_t |
| 2264 | rf_parity_map_size(RF_Raid_t *raidPtr) |
| 2265 | { |
| 2266 | daddr_t map_size; |
| 2267 | |
| 2268 | if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE) |
| 2269 | map_size = raidPtr->bytesPerSector; |
| 2270 | else |
| 2271 | map_size = RF_PARITY_MAP_SIZE; |
| 2272 | |
| 2273 | return map_size; |
| 2274 | } |
| 2275 | |
| 2276 | int |
| 2277 | raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col) |
| 2278 | { |
| 2279 | RF_ComponentLabel_t *clabel; |
| 2280 | |
| 2281 | clabel = raidget_component_label(raidPtr, col); |
| 2282 | clabel->clean = RF_RAID_CLEAN; |
| 2283 | raidflush_component_label(raidPtr, col); |
| 2284 | return(0); |
| 2285 | } |
| 2286 | |
| 2287 | |
| 2288 | int |
| 2289 | raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col) |
| 2290 | { |
| 2291 | RF_ComponentLabel_t *clabel; |
| 2292 | |
| 2293 | clabel = raidget_component_label(raidPtr, col); |
| 2294 | clabel->clean = RF_RAID_DIRTY; |
| 2295 | raidflush_component_label(raidPtr, col); |
| 2296 | return(0); |
| 2297 | } |
| 2298 | |
| 2299 | int |
| 2300 | raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) |
| 2301 | { |
| 2302 | KASSERT(raidPtr->bytesPerSector); |
| 2303 | return raidread_component_label(raidPtr->bytesPerSector, |
| 2304 | raidPtr->Disks[col].dev, |
| 2305 | raidPtr->raid_cinfo[col].ci_vp, |
| 2306 | &raidPtr->raid_cinfo[col].ci_label); |
| 2307 | } |
| 2308 | |
| 2309 | RF_ComponentLabel_t * |
| 2310 | raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) |
| 2311 | { |
| 2312 | return &raidPtr->raid_cinfo[col].ci_label; |
| 2313 | } |
| 2314 | |
| 2315 | int |
| 2316 | raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) |
| 2317 | { |
| 2318 | RF_ComponentLabel_t *label; |
| 2319 | |
| 2320 | label = &raidPtr->raid_cinfo[col].ci_label; |
| 2321 | label->mod_counter = raidPtr->mod_counter; |
| 2322 | #ifndef RF_NO_PARITY_MAP |
| 2323 | label->parity_map_modcount = label->mod_counter; |
| 2324 | #endif |
| 2325 | return raidwrite_component_label(raidPtr->bytesPerSector, |
| 2326 | raidPtr->Disks[col].dev, |
| 2327 | raidPtr->raid_cinfo[col].ci_vp, label); |
| 2328 | } |
| 2329 | |
| 2330 | |
| 2331 | static int |
| 2332 | raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, |
| 2333 | RF_ComponentLabel_t *clabel) |
| 2334 | { |
| 2335 | return raidread_component_area(dev, b_vp, clabel, |
| 2336 | sizeof(RF_ComponentLabel_t), |
| 2337 | rf_component_info_offset(), |
| 2338 | rf_component_info_size(secsize)); |
| 2339 | } |
| 2340 | |
| 2341 | /* ARGSUSED */ |
| 2342 | static int |
| 2343 | raidread_component_area(dev_t dev, struct vnode *b_vp, void *data, |
| 2344 | size_t msize, daddr_t offset, daddr_t dsize) |
| 2345 | { |
| 2346 | struct buf *bp; |
| 2347 | int error; |
| 2348 | |
| 2349 | /* XXX should probably ensure that we don't try to do this if |
| 2350 | someone has changed rf_protected_sectors. */ |
| 2351 | |
| 2352 | if (b_vp == NULL) { |
| 2353 | /* For whatever reason, this component is not valid. |
| 2354 | Don't try to read a component label from it. */ |
| 2355 | return(EINVAL); |
| 2356 | } |
| 2357 | |
| 2358 | /* get a block of the appropriate size... */ |
| 2359 | bp = geteblk((int)dsize); |
| 2360 | bp->b_dev = dev; |
| 2361 | |
| 2362 | /* get our ducks in a row for the read */ |
| 2363 | bp->b_blkno = offset / DEV_BSIZE; |
| 2364 | bp->b_bcount = dsize; |
| 2365 | bp->b_flags |= B_READ; |
| 2366 | bp->b_resid = dsize; |
| 2367 | |
| 2368 | bdev_strategy(bp); |
| 2369 | error = biowait(bp); |
| 2370 | |
| 2371 | if (!error) { |
| 2372 | memcpy(data, bp->b_data, msize); |
| 2373 | } |
| 2374 | |
| 2375 | brelse(bp, 0); |
| 2376 | return(error); |
| 2377 | } |
| 2378 | |
| 2379 | |
| 2380 | static int |
| 2381 | raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, |
| 2382 | RF_ComponentLabel_t *clabel) |
| 2383 | { |
| 2384 | return raidwrite_component_area(dev, b_vp, clabel, |
| 2385 | sizeof(RF_ComponentLabel_t), |
| 2386 | rf_component_info_offset(), |
| 2387 | rf_component_info_size(secsize), 0); |
| 2388 | } |
| 2389 | |
| 2390 | /* ARGSUSED */ |
| 2391 | static int |
| 2392 | raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data, |
| 2393 | size_t msize, daddr_t offset, daddr_t dsize, int asyncp) |
| 2394 | { |
| 2395 | struct buf *bp; |
| 2396 | int error; |
| 2397 | |
| 2398 | /* get a block of the appropriate size... */ |
| 2399 | bp = geteblk((int)dsize); |
| 2400 | bp->b_dev = dev; |
| 2401 | |
| 2402 | /* get our ducks in a row for the write */ |
| 2403 | bp->b_blkno = offset / DEV_BSIZE; |
| 2404 | bp->b_bcount = dsize; |
| 2405 | bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0); |
| 2406 | bp->b_resid = dsize; |
| 2407 | |
| 2408 | memset(bp->b_data, 0, dsize); |
| 2409 | memcpy(bp->b_data, data, msize); |
| 2410 | |
| 2411 | bdev_strategy(bp); |
| 2412 | if (asyncp) |
| 2413 | return 0; |
| 2414 | error = biowait(bp); |
| 2415 | brelse(bp, 0); |
| 2416 | if (error) { |
| 2417 | #if 1 |
| 2418 | printf("Failed to write RAID component info!\n" ); |
| 2419 | #endif |
| 2420 | } |
| 2421 | |
| 2422 | return(error); |
| 2423 | } |
| 2424 | |
| 2425 | void |
| 2426 | rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) |
| 2427 | { |
| 2428 | int c; |
| 2429 | |
| 2430 | for (c = 0; c < raidPtr->numCol; c++) { |
| 2431 | /* Skip dead disks. */ |
| 2432 | if (RF_DEAD_DISK(raidPtr->Disks[c].status)) |
| 2433 | continue; |
| 2434 | /* XXXjld: what if an error occurs here? */ |
| 2435 | raidwrite_component_area(raidPtr->Disks[c].dev, |
| 2436 | raidPtr->raid_cinfo[c].ci_vp, map, |
| 2437 | RF_PARITYMAP_NBYTE, |
| 2438 | rf_parity_map_offset(raidPtr), |
| 2439 | rf_parity_map_size(raidPtr), 0); |
| 2440 | } |
| 2441 | } |
| 2442 | |
| 2443 | void |
| 2444 | rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) |
| 2445 | { |
| 2446 | struct rf_paritymap_ondisk tmp; |
| 2447 | int c,first; |
| 2448 | |
| 2449 | first=1; |
| 2450 | for (c = 0; c < raidPtr->numCol; c++) { |
| 2451 | /* Skip dead disks. */ |
| 2452 | if (RF_DEAD_DISK(raidPtr->Disks[c].status)) |
| 2453 | continue; |
| 2454 | raidread_component_area(raidPtr->Disks[c].dev, |
| 2455 | raidPtr->raid_cinfo[c].ci_vp, &tmp, |
| 2456 | RF_PARITYMAP_NBYTE, |
| 2457 | rf_parity_map_offset(raidPtr), |
| 2458 | rf_parity_map_size(raidPtr)); |
| 2459 | if (first) { |
| 2460 | memcpy(map, &tmp, sizeof(*map)); |
| 2461 | first = 0; |
| 2462 | } else { |
| 2463 | rf_paritymap_merge(map, &tmp); |
| 2464 | } |
| 2465 | } |
| 2466 | } |
| 2467 | |
| 2468 | void |
| 2469 | rf_markalldirty(RF_Raid_t *raidPtr) |
| 2470 | { |
| 2471 | RF_ComponentLabel_t *clabel; |
| 2472 | int sparecol; |
| 2473 | int c; |
| 2474 | int j; |
| 2475 | int scol = -1; |
| 2476 | |
| 2477 | raidPtr->mod_counter++; |
| 2478 | for (c = 0; c < raidPtr->numCol; c++) { |
| 2479 | /* we don't want to touch (at all) a disk that has |
| 2480 | failed */ |
| 2481 | if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { |
| 2482 | clabel = raidget_component_label(raidPtr, c); |
| 2483 | if (clabel->status == rf_ds_spared) { |
| 2484 | /* XXX do something special... |
| 2485 | but whatever you do, don't |
| 2486 | try to access it!! */ |
| 2487 | } else { |
| 2488 | raidmarkdirty(raidPtr, c); |
| 2489 | } |
| 2490 | } |
| 2491 | } |
| 2492 | |
| 2493 | for( c = 0; c < raidPtr->numSpare ; c++) { |
| 2494 | sparecol = raidPtr->numCol + c; |
| 2495 | if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { |
| 2496 | /* |
| 2497 | |
| 2498 | we claim this disk is "optimal" if it's |
| 2499 | rf_ds_used_spare, as that means it should be |
| 2500 | directly substitutable for the disk it replaced. |
| 2501 | We note that too... |
| 2502 | |
| 2503 | */ |
| 2504 | |
| 2505 | for(j=0;j<raidPtr->numCol;j++) { |
| 2506 | if (raidPtr->Disks[j].spareCol == sparecol) { |
| 2507 | scol = j; |
| 2508 | break; |
| 2509 | } |
| 2510 | } |
| 2511 | |
| 2512 | clabel = raidget_component_label(raidPtr, sparecol); |
| 2513 | /* make sure status is noted */ |
| 2514 | |
| 2515 | raid_init_component_label(raidPtr, clabel); |
| 2516 | |
| 2517 | clabel->row = 0; |
| 2518 | clabel->column = scol; |
| 2519 | /* Note: we *don't* change status from rf_ds_used_spare |
| 2520 | to rf_ds_optimal */ |
| 2521 | /* clabel.status = rf_ds_optimal; */ |
| 2522 | |
| 2523 | raidmarkdirty(raidPtr, sparecol); |
| 2524 | } |
| 2525 | } |
| 2526 | } |
| 2527 | |
| 2528 | |
| 2529 | void |
| 2530 | rf_update_component_labels(RF_Raid_t *raidPtr, int final) |
| 2531 | { |
| 2532 | RF_ComponentLabel_t *clabel; |
| 2533 | int sparecol; |
| 2534 | int c; |
| 2535 | int j; |
| 2536 | int scol; |
| 2537 | struct raid_softc *rs = raidPtr->softc; |
| 2538 | |
| 2539 | scol = -1; |
| 2540 | |
| 2541 | /* XXX should do extra checks to make sure things really are clean, |
| 2542 | rather than blindly setting the clean bit... */ |
| 2543 | |
| 2544 | raidPtr->mod_counter++; |
| 2545 | |
| 2546 | for (c = 0; c < raidPtr->numCol; c++) { |
| 2547 | if (raidPtr->Disks[c].status == rf_ds_optimal) { |
| 2548 | clabel = raidget_component_label(raidPtr, c); |
| 2549 | /* make sure status is noted */ |
| 2550 | clabel->status = rf_ds_optimal; |
| 2551 | |
| 2552 | /* note what unit we are configured as */ |
| 2553 | if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0) |
| 2554 | clabel->last_unit = raidPtr->raidid; |
| 2555 | |
| 2556 | raidflush_component_label(raidPtr, c); |
| 2557 | if (final == RF_FINAL_COMPONENT_UPDATE) { |
| 2558 | if (raidPtr->parity_good == RF_RAID_CLEAN) { |
| 2559 | raidmarkclean(raidPtr, c); |
| 2560 | } |
| 2561 | } |
| 2562 | } |
| 2563 | /* else we don't touch it.. */ |
| 2564 | } |
| 2565 | |
| 2566 | for( c = 0; c < raidPtr->numSpare ; c++) { |
| 2567 | sparecol = raidPtr->numCol + c; |
| 2568 | /* Need to ensure that the reconstruct actually completed! */ |
| 2569 | if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { |
| 2570 | /* |
| 2571 | |
| 2572 | we claim this disk is "optimal" if it's |
| 2573 | rf_ds_used_spare, as that means it should be |
| 2574 | directly substitutable for the disk it replaced. |
| 2575 | We note that too... |
| 2576 | |
| 2577 | */ |
| 2578 | |
| 2579 | for(j=0;j<raidPtr->numCol;j++) { |
| 2580 | if (raidPtr->Disks[j].spareCol == sparecol) { |
| 2581 | scol = j; |
| 2582 | break; |
| 2583 | } |
| 2584 | } |
| 2585 | |
| 2586 | /* XXX shouldn't *really* need this... */ |
| 2587 | clabel = raidget_component_label(raidPtr, sparecol); |
| 2588 | /* make sure status is noted */ |
| 2589 | |
| 2590 | raid_init_component_label(raidPtr, clabel); |
| 2591 | |
| 2592 | clabel->column = scol; |
| 2593 | clabel->status = rf_ds_optimal; |
| 2594 | if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0) |
| 2595 | clabel->last_unit = raidPtr->raidid; |
| 2596 | |
| 2597 | raidflush_component_label(raidPtr, sparecol); |
| 2598 | if (final == RF_FINAL_COMPONENT_UPDATE) { |
| 2599 | if (raidPtr->parity_good == RF_RAID_CLEAN) { |
| 2600 | raidmarkclean(raidPtr, sparecol); |
| 2601 | } |
| 2602 | } |
| 2603 | } |
| 2604 | } |
| 2605 | } |
| 2606 | |
| 2607 | void |
| 2608 | rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) |
| 2609 | { |
| 2610 | |
| 2611 | if (vp != NULL) { |
| 2612 | if (auto_configured == 1) { |
| 2613 | vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); |
| 2614 | VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); |
| 2615 | vput(vp); |
| 2616 | |
| 2617 | } else { |
| 2618 | (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred); |
| 2619 | } |
| 2620 | } |
| 2621 | } |
| 2622 | |
| 2623 | |
| 2624 | void |
| 2625 | rf_UnconfigureVnodes(RF_Raid_t *raidPtr) |
| 2626 | { |
| 2627 | int r,c; |
| 2628 | struct vnode *vp; |
| 2629 | int acd; |
| 2630 | |
| 2631 | |
| 2632 | /* We take this opportunity to close the vnodes like we should.. */ |
| 2633 | |
| 2634 | for (c = 0; c < raidPtr->numCol; c++) { |
| 2635 | vp = raidPtr->raid_cinfo[c].ci_vp; |
| 2636 | acd = raidPtr->Disks[c].auto_configured; |
| 2637 | rf_close_component(raidPtr, vp, acd); |
| 2638 | raidPtr->raid_cinfo[c].ci_vp = NULL; |
| 2639 | raidPtr->Disks[c].auto_configured = 0; |
| 2640 | } |
| 2641 | |
| 2642 | for (r = 0; r < raidPtr->numSpare; r++) { |
| 2643 | vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; |
| 2644 | acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; |
| 2645 | rf_close_component(raidPtr, vp, acd); |
| 2646 | raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; |
| 2647 | raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; |
| 2648 | } |
| 2649 | } |
| 2650 | |
| 2651 | |
| 2652 | void |
| 2653 | rf_ReconThread(struct rf_recon_req *req) |
| 2654 | { |
| 2655 | int s; |
| 2656 | RF_Raid_t *raidPtr; |
| 2657 | |
| 2658 | s = splbio(); |
| 2659 | raidPtr = (RF_Raid_t *) req->raidPtr; |
| 2660 | raidPtr->recon_in_progress = 1; |
| 2661 | |
| 2662 | rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, |
| 2663 | ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); |
| 2664 | |
| 2665 | RF_Free(req, sizeof(*req)); |
| 2666 | |
| 2667 | raidPtr->recon_in_progress = 0; |
| 2668 | splx(s); |
| 2669 | |
| 2670 | /* That's all... */ |
| 2671 | kthread_exit(0); /* does not return */ |
| 2672 | } |
| 2673 | |
| 2674 | void |
| 2675 | rf_RewriteParityThread(RF_Raid_t *raidPtr) |
| 2676 | { |
| 2677 | int retcode; |
| 2678 | int s; |
| 2679 | |
| 2680 | raidPtr->parity_rewrite_stripes_done = 0; |
| 2681 | raidPtr->parity_rewrite_in_progress = 1; |
| 2682 | s = splbio(); |
| 2683 | retcode = rf_RewriteParity(raidPtr); |
| 2684 | splx(s); |
| 2685 | if (retcode) { |
| 2686 | printf("raid%d: Error re-writing parity (%d)!\n" , |
| 2687 | raidPtr->raidid, retcode); |
| 2688 | } else { |
| 2689 | /* set the clean bit! If we shutdown correctly, |
| 2690 | the clean bit on each component label will get |
| 2691 | set */ |
| 2692 | raidPtr->parity_good = RF_RAID_CLEAN; |
| 2693 | } |
| 2694 | raidPtr->parity_rewrite_in_progress = 0; |
| 2695 | |
| 2696 | /* Anyone waiting for us to stop? If so, inform them... */ |
| 2697 | if (raidPtr->waitShutdown) { |
| 2698 | wakeup(&raidPtr->parity_rewrite_in_progress); |
| 2699 | } |
| 2700 | |
| 2701 | /* That's all... */ |
| 2702 | kthread_exit(0); /* does not return */ |
| 2703 | } |
| 2704 | |
| 2705 | |
| 2706 | void |
| 2707 | rf_CopybackThread(RF_Raid_t *raidPtr) |
| 2708 | { |
| 2709 | int s; |
| 2710 | |
| 2711 | raidPtr->copyback_in_progress = 1; |
| 2712 | s = splbio(); |
| 2713 | rf_CopybackReconstructedData(raidPtr); |
| 2714 | splx(s); |
| 2715 | raidPtr->copyback_in_progress = 0; |
| 2716 | |
| 2717 | /* That's all... */ |
| 2718 | kthread_exit(0); /* does not return */ |
| 2719 | } |
| 2720 | |
| 2721 | |
| 2722 | void |
| 2723 | rf_ReconstructInPlaceThread(struct rf_recon_req *req) |
| 2724 | { |
| 2725 | int s; |
| 2726 | RF_Raid_t *raidPtr; |
| 2727 | |
| 2728 | s = splbio(); |
| 2729 | raidPtr = req->raidPtr; |
| 2730 | raidPtr->recon_in_progress = 1; |
| 2731 | rf_ReconstructInPlace(raidPtr, req->col); |
| 2732 | RF_Free(req, sizeof(*req)); |
| 2733 | raidPtr->recon_in_progress = 0; |
| 2734 | splx(s); |
| 2735 | |
| 2736 | /* That's all... */ |
| 2737 | kthread_exit(0); /* does not return */ |
| 2738 | } |
| 2739 | |
| 2740 | static RF_AutoConfig_t * |
| 2741 | rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp, |
| 2742 | const char *cname, RF_SectorCount_t size, uint64_t numsecs, |
| 2743 | unsigned secsize) |
| 2744 | { |
| 2745 | int good_one = 0; |
| 2746 | RF_ComponentLabel_t *clabel; |
| 2747 | RF_AutoConfig_t *ac; |
| 2748 | |
| 2749 | clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT); |
| 2750 | if (clabel == NULL) { |
| 2751 | oomem: |
| 2752 | while(ac_list) { |
| 2753 | ac = ac_list; |
| 2754 | if (ac->clabel) |
| 2755 | free(ac->clabel, M_RAIDFRAME); |
| 2756 | ac_list = ac_list->next; |
| 2757 | free(ac, M_RAIDFRAME); |
| 2758 | } |
| 2759 | printf("RAID auto config: out of memory!\n" ); |
| 2760 | return NULL; /* XXX probably should panic? */ |
| 2761 | } |
| 2762 | |
| 2763 | if (!raidread_component_label(secsize, dev, vp, clabel)) { |
| 2764 | /* Got the label. Does it look reasonable? */ |
| 2765 | if (rf_reasonable_label(clabel, numsecs) && |
| 2766 | (rf_component_label_partitionsize(clabel) <= size)) { |
| 2767 | #ifdef DEBUG |
| 2768 | printf("Component on: %s: %llu\n" , |
| 2769 | cname, (unsigned long long)size); |
| 2770 | rf_print_component_label(clabel); |
| 2771 | #endif |
| 2772 | /* if it's reasonable, add it, else ignore it. */ |
| 2773 | ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME, |
| 2774 | M_NOWAIT); |
| 2775 | if (ac == NULL) { |
| 2776 | free(clabel, M_RAIDFRAME); |
| 2777 | goto oomem; |
| 2778 | } |
| 2779 | strlcpy(ac->devname, cname, sizeof(ac->devname)); |
| 2780 | ac->dev = dev; |
| 2781 | ac->vp = vp; |
| 2782 | ac->clabel = clabel; |
| 2783 | ac->next = ac_list; |
| 2784 | ac_list = ac; |
| 2785 | good_one = 1; |
| 2786 | } |
| 2787 | } |
| 2788 | if (!good_one) { |
| 2789 | /* cleanup */ |
| 2790 | free(clabel, M_RAIDFRAME); |
| 2791 | vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); |
| 2792 | VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); |
| 2793 | vput(vp); |
| 2794 | } |
| 2795 | return ac_list; |
| 2796 | } |
| 2797 | |
| 2798 | RF_AutoConfig_t * |
| 2799 | rf_find_raid_components(void) |
| 2800 | { |
| 2801 | struct vnode *vp; |
| 2802 | struct disklabel label; |
| 2803 | device_t dv; |
| 2804 | deviter_t di; |
| 2805 | dev_t dev; |
| 2806 | int bmajor, bminor, wedge, rf_part_found; |
| 2807 | int error; |
| 2808 | int i; |
| 2809 | RF_AutoConfig_t *ac_list; |
| 2810 | uint64_t numsecs; |
| 2811 | unsigned secsize; |
| 2812 | int dowedges; |
| 2813 | |
| 2814 | /* initialize the AutoConfig list */ |
| 2815 | ac_list = NULL; |
| 2816 | |
| 2817 | /* |
| 2818 | * we begin by trolling through *all* the devices on the system *twice* |
| 2819 | * first we scan for wedges, second for other devices. This avoids |
| 2820 | * using a raw partition instead of a wedge that covers the whole disk |
| 2821 | */ |
| 2822 | |
| 2823 | for (dowedges=1; dowedges>=0; --dowedges) { |
| 2824 | for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL; |
| 2825 | dv = deviter_next(&di)) { |
| 2826 | |
| 2827 | /* we are only interested in disks... */ |
| 2828 | if (device_class(dv) != DV_DISK) |
| 2829 | continue; |
| 2830 | |
| 2831 | /* we don't care about floppies... */ |
| 2832 | if (device_is_a(dv, "fd" )) { |
| 2833 | continue; |
| 2834 | } |
| 2835 | |
| 2836 | /* we don't care about CD's... */ |
| 2837 | if (device_is_a(dv, "cd" )) { |
| 2838 | continue; |
| 2839 | } |
| 2840 | |
| 2841 | /* we don't care about md's... */ |
| 2842 | if (device_is_a(dv, "md" )) { |
| 2843 | continue; |
| 2844 | } |
| 2845 | |
| 2846 | /* hdfd is the Atari/Hades floppy driver */ |
| 2847 | if (device_is_a(dv, "hdfd" )) { |
| 2848 | continue; |
| 2849 | } |
| 2850 | |
| 2851 | /* fdisa is the Atari/Milan floppy driver */ |
| 2852 | if (device_is_a(dv, "fdisa" )) { |
| 2853 | continue; |
| 2854 | } |
| 2855 | |
| 2856 | /* are we in the wedges pass ? */ |
| 2857 | wedge = device_is_a(dv, "dk" ); |
| 2858 | if (wedge != dowedges) { |
| 2859 | continue; |
| 2860 | } |
| 2861 | |
| 2862 | /* need to find the device_name_to_block_device_major stuff */ |
| 2863 | bmajor = devsw_name2blk(device_xname(dv), NULL, 0); |
| 2864 | |
| 2865 | rf_part_found = 0; /*No raid partition as yet*/ |
| 2866 | |
| 2867 | /* get a vnode for the raw partition of this disk */ |
| 2868 | bminor = minor(device_unit(dv)); |
| 2869 | dev = wedge ? makedev(bmajor, bminor) : |
| 2870 | MAKEDISKDEV(bmajor, bminor, RAW_PART); |
| 2871 | if (bdevvp(dev, &vp)) |
| 2872 | panic("RAID can't alloc vnode" ); |
| 2873 | |
| 2874 | error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED); |
| 2875 | |
| 2876 | if (error) { |
| 2877 | /* "Who cares." Continue looking |
| 2878 | for something that exists*/ |
| 2879 | vput(vp); |
| 2880 | continue; |
| 2881 | } |
| 2882 | |
| 2883 | error = getdisksize(vp, &numsecs, &secsize); |
| 2884 | if (error) { |
| 2885 | /* |
| 2886 | * Pseudo devices like vnd and cgd can be |
| 2887 | * opened but may still need some configuration. |
| 2888 | * Ignore these quietly. |
| 2889 | */ |
| 2890 | if (error != ENXIO) |
| 2891 | printf("RAIDframe: can't get disk size" |
| 2892 | " for dev %s (%d)\n" , |
| 2893 | device_xname(dv), error); |
| 2894 | vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); |
| 2895 | VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); |
| 2896 | vput(vp); |
| 2897 | continue; |
| 2898 | } |
| 2899 | if (wedge) { |
| 2900 | struct dkwedge_info dkw; |
| 2901 | error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, |
| 2902 | NOCRED); |
| 2903 | if (error) { |
| 2904 | printf("RAIDframe: can't get wedge info for " |
| 2905 | "dev %s (%d)\n" , device_xname(dv), error); |
| 2906 | vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); |
| 2907 | VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); |
| 2908 | vput(vp); |
| 2909 | continue; |
| 2910 | } |
| 2911 | |
| 2912 | if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) { |
| 2913 | vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); |
| 2914 | VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); |
| 2915 | vput(vp); |
| 2916 | continue; |
| 2917 | } |
| 2918 | |
| 2919 | ac_list = rf_get_component(ac_list, dev, vp, |
| 2920 | device_xname(dv), dkw.dkw_size, numsecs, secsize); |
| 2921 | rf_part_found = 1; /*There is a raid component on this disk*/ |
| 2922 | continue; |
| 2923 | } |
| 2924 | |
| 2925 | /* Ok, the disk exists. Go get the disklabel. */ |
| 2926 | error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED); |
| 2927 | if (error) { |
| 2928 | /* |
| 2929 | * XXX can't happen - open() would |
| 2930 | * have errored out (or faked up one) |
| 2931 | */ |
| 2932 | if (error != ENOTTY) |
| 2933 | printf("RAIDframe: can't get label for dev " |
| 2934 | "%s (%d)\n" , device_xname(dv), error); |
| 2935 | } |
| 2936 | |
| 2937 | /* don't need this any more. We'll allocate it again |
| 2938 | a little later if we really do... */ |
| 2939 | vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); |
| 2940 | VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); |
| 2941 | vput(vp); |
| 2942 | |
| 2943 | if (error) |
| 2944 | continue; |
| 2945 | |
| 2946 | rf_part_found = 0; /*No raid partitions yet*/ |
| 2947 | for (i = 0; i < label.d_npartitions; i++) { |
| 2948 | char cname[sizeof(ac_list->devname)]; |
| 2949 | |
| 2950 | /* We only support partitions marked as RAID */ |
| 2951 | if (label.d_partitions[i].p_fstype != FS_RAID) |
| 2952 | continue; |
| 2953 | |
| 2954 | dev = MAKEDISKDEV(bmajor, device_unit(dv), i); |
| 2955 | if (bdevvp(dev, &vp)) |
| 2956 | panic("RAID can't alloc vnode" ); |
| 2957 | |
| 2958 | error = VOP_OPEN(vp, FREAD, NOCRED); |
| 2959 | if (error) { |
| 2960 | /* Whatever... */ |
| 2961 | vput(vp); |
| 2962 | continue; |
| 2963 | } |
| 2964 | snprintf(cname, sizeof(cname), "%s%c" , |
| 2965 | device_xname(dv), 'a' + i); |
| 2966 | ac_list = rf_get_component(ac_list, dev, vp, cname, |
| 2967 | label.d_partitions[i].p_size, numsecs, secsize); |
| 2968 | rf_part_found = 1; /*There is at least one raid partition on this disk*/ |
| 2969 | } |
| 2970 | |
| 2971 | /* |
| 2972 | *If there is no raid component on this disk, either in a |
| 2973 | *disklabel or inside a wedge, check the raw partition as well, |
| 2974 | *as it is possible to configure raid components on raw disk |
| 2975 | *devices. |
| 2976 | */ |
| 2977 | |
| 2978 | if (!rf_part_found) { |
| 2979 | char cname[sizeof(ac_list->devname)]; |
| 2980 | |
| 2981 | dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART); |
| 2982 | if (bdevvp(dev, &vp)) |
| 2983 | panic("RAID can't alloc vnode" ); |
| 2984 | |
| 2985 | error = VOP_OPEN(vp, FREAD, NOCRED); |
| 2986 | if (error) { |
| 2987 | /* Whatever... */ |
| 2988 | vput(vp); |
| 2989 | continue; |
| 2990 | } |
| 2991 | snprintf(cname, sizeof(cname), "%s%c" , |
| 2992 | device_xname(dv), 'a' + RAW_PART); |
| 2993 | ac_list = rf_get_component(ac_list, dev, vp, cname, |
| 2994 | label.d_partitions[RAW_PART].p_size, numsecs, secsize); |
| 2995 | } |
| 2996 | } |
| 2997 | deviter_release(&di); |
| 2998 | } |
| 2999 | return ac_list; |
| 3000 | } |
| 3001 | |
| 3002 | |
| 3003 | int |
| 3004 | rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs) |
| 3005 | { |
| 3006 | |
| 3007 | if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || |
| 3008 | (clabel->version==RF_COMPONENT_LABEL_VERSION)) && |
| 3009 | ((clabel->clean == RF_RAID_CLEAN) || |
| 3010 | (clabel->clean == RF_RAID_DIRTY)) && |
| 3011 | clabel->row >=0 && |
| 3012 | clabel->column >= 0 && |
| 3013 | clabel->num_rows > 0 && |
| 3014 | clabel->num_columns > 0 && |
| 3015 | clabel->row < clabel->num_rows && |
| 3016 | clabel->column < clabel->num_columns && |
| 3017 | clabel->blockSize > 0 && |
| 3018 | /* |
| 3019 | * numBlocksHi may contain garbage, but it is ok since |
| 3020 | * the type is unsigned. If it is really garbage, |
| 3021 | * rf_fix_old_label_size() will fix it. |
| 3022 | */ |
| 3023 | rf_component_label_numblocks(clabel) > 0) { |
| 3024 | /* |
| 3025 | * label looks reasonable enough... |
| 3026 | * let's make sure it has no old garbage. |
| 3027 | */ |
| 3028 | if (numsecs) |
| 3029 | rf_fix_old_label_size(clabel, numsecs); |
| 3030 | return(1); |
| 3031 | } |
| 3032 | return(0); |
| 3033 | } |
| 3034 | |
| 3035 | |
| 3036 | /* |
| 3037 | * For reasons yet unknown, some old component labels have garbage in |
| 3038 | * the newer numBlocksHi region, and this causes lossage. Since those |
| 3039 | * disks will also have numsecs set to less than 32 bits of sectors, |
| 3040 | * we can determine when this corruption has occurred, and fix it. |
| 3041 | * |
| 3042 | * The exact same problem, with the same unknown reason, happens to |
| 3043 | * the partitionSizeHi member as well. |
| 3044 | */ |
| 3045 | static void |
| 3046 | rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs) |
| 3047 | { |
| 3048 | |
| 3049 | if (numsecs < ((uint64_t)1 << 32)) { |
| 3050 | if (clabel->numBlocksHi) { |
| 3051 | printf("WARNING: total sectors < 32 bits, yet " |
| 3052 | "numBlocksHi set\n" |
| 3053 | "WARNING: resetting numBlocksHi to zero.\n" ); |
| 3054 | clabel->numBlocksHi = 0; |
| 3055 | } |
| 3056 | |
| 3057 | if (clabel->partitionSizeHi) { |
| 3058 | printf("WARNING: total sectors < 32 bits, yet " |
| 3059 | "partitionSizeHi set\n" |
| 3060 | "WARNING: resetting partitionSizeHi to zero.\n" ); |
| 3061 | clabel->partitionSizeHi = 0; |
| 3062 | } |
| 3063 | } |
| 3064 | } |
| 3065 | |
| 3066 | |
| 3067 | #ifdef DEBUG |
| 3068 | void |
| 3069 | rf_print_component_label(RF_ComponentLabel_t *clabel) |
| 3070 | { |
| 3071 | uint64_t numBlocks; |
| 3072 | static const char *rp[] = { |
| 3073 | "No" , "Force" , "Soft" , "*invalid*" |
| 3074 | }; |
| 3075 | |
| 3076 | |
| 3077 | numBlocks = rf_component_label_numblocks(clabel); |
| 3078 | |
| 3079 | printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n" , |
| 3080 | clabel->row, clabel->column, |
| 3081 | clabel->num_rows, clabel->num_columns); |
| 3082 | printf(" Version: %d Serial Number: %d Mod Counter: %d\n" , |
| 3083 | clabel->version, clabel->serial_number, |
| 3084 | clabel->mod_counter); |
| 3085 | printf(" Clean: %s Status: %d\n" , |
| 3086 | clabel->clean ? "Yes" : "No" , clabel->status); |
| 3087 | printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n" , |
| 3088 | clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); |
| 3089 | printf(" RAID Level: %c blocksize: %d numBlocks: %" PRIu64"\n" , |
| 3090 | (char) clabel->parityConfig, clabel->blockSize, numBlocks); |
| 3091 | printf(" Autoconfig: %s\n" , clabel->autoconfigure ? "Yes" : "No" ); |
| 3092 | printf(" Root partition: %s\n" , rp[clabel->root_partition & 3]); |
| 3093 | printf(" Last configured as: raid%d\n" , clabel->last_unit); |
| 3094 | #if 0 |
| 3095 | printf(" Config order: %d\n" , clabel->config_order); |
| 3096 | #endif |
| 3097 | |
| 3098 | } |
| 3099 | #endif |
| 3100 | |
| 3101 | RF_ConfigSet_t * |
| 3102 | rf_create_auto_sets(RF_AutoConfig_t *ac_list) |
| 3103 | { |
| 3104 | RF_AutoConfig_t *ac; |
| 3105 | RF_ConfigSet_t *config_sets; |
| 3106 | RF_ConfigSet_t *cset; |
| 3107 | RF_AutoConfig_t *ac_next; |
| 3108 | |
| 3109 | |
| 3110 | config_sets = NULL; |
| 3111 | |
| 3112 | /* Go through the AutoConfig list, and figure out which components |
| 3113 | belong to what sets. */ |
| 3114 | ac = ac_list; |
| 3115 | while(ac!=NULL) { |
| 3116 | /* we're going to putz with ac->next, so save it here |
| 3117 | for use at the end of the loop */ |
| 3118 | ac_next = ac->next; |
| 3119 | |
| 3120 | if (config_sets == NULL) { |
| 3121 | /* will need at least this one... */ |
| 3122 | config_sets = (RF_ConfigSet_t *) |
| 3123 | malloc(sizeof(RF_ConfigSet_t), |
| 3124 | M_RAIDFRAME, M_NOWAIT); |
| 3125 | if (config_sets == NULL) { |
| 3126 | panic("rf_create_auto_sets: No memory!" ); |
| 3127 | } |
| 3128 | /* this one is easy :) */ |
| 3129 | config_sets->ac = ac; |
| 3130 | config_sets->next = NULL; |
| 3131 | config_sets->rootable = 0; |
| 3132 | ac->next = NULL; |
| 3133 | } else { |
| 3134 | /* which set does this component fit into? */ |
| 3135 | cset = config_sets; |
| 3136 | while(cset!=NULL) { |
| 3137 | if (rf_does_it_fit(cset, ac)) { |
| 3138 | /* looks like it matches... */ |
| 3139 | ac->next = cset->ac; |
| 3140 | cset->ac = ac; |
| 3141 | break; |
| 3142 | } |
| 3143 | cset = cset->next; |
| 3144 | } |
| 3145 | if (cset==NULL) { |
| 3146 | /* didn't find a match above... new set..*/ |
| 3147 | cset = (RF_ConfigSet_t *) |
| 3148 | malloc(sizeof(RF_ConfigSet_t), |
| 3149 | M_RAIDFRAME, M_NOWAIT); |
| 3150 | if (cset == NULL) { |
| 3151 | panic("rf_create_auto_sets: No memory!" ); |
| 3152 | } |
| 3153 | cset->ac = ac; |
| 3154 | ac->next = NULL; |
| 3155 | cset->next = config_sets; |
| 3156 | cset->rootable = 0; |
| 3157 | config_sets = cset; |
| 3158 | } |
| 3159 | } |
| 3160 | ac = ac_next; |
| 3161 | } |
| 3162 | |
| 3163 | |
| 3164 | return(config_sets); |
| 3165 | } |
| 3166 | |
| 3167 | static int |
| 3168 | rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) |
| 3169 | { |
| 3170 | RF_ComponentLabel_t *clabel1, *clabel2; |
| 3171 | |
| 3172 | /* If this one matches the *first* one in the set, that's good |
| 3173 | enough, since the other members of the set would have been |
| 3174 | through here too... */ |
| 3175 | /* note that we are not checking partitionSize here.. |
| 3176 | |
| 3177 | Note that we are also not checking the mod_counters here. |
| 3178 | If everything else matches except the mod_counter, that's |
| 3179 | good enough for this test. We will deal with the mod_counters |
| 3180 | a little later in the autoconfiguration process. |
| 3181 | |
| 3182 | (clabel1->mod_counter == clabel2->mod_counter) && |
| 3183 | |
| 3184 | The reason we don't check for this is that failed disks |
| 3185 | will have lower modification counts. If those disks are |
| 3186 | not added to the set they used to belong to, then they will |
| 3187 | form their own set, which may result in 2 different sets, |
| 3188 | for example, competing to be configured at raid0, and |
| 3189 | perhaps competing to be the root filesystem set. If the |
| 3190 | wrong ones get configured, or both attempt to become /, |
| 3191 | weird behaviour and or serious lossage will occur. Thus we |
| 3192 | need to bring them into the fold here, and kick them out at |
| 3193 | a later point. |
| 3194 | |
| 3195 | */ |
| 3196 | |
| 3197 | clabel1 = cset->ac->clabel; |
| 3198 | clabel2 = ac->clabel; |
| 3199 | if ((clabel1->version == clabel2->version) && |
| 3200 | (clabel1->serial_number == clabel2->serial_number) && |
| 3201 | (clabel1->num_rows == clabel2->num_rows) && |
| 3202 | (clabel1->num_columns == clabel2->num_columns) && |
| 3203 | (clabel1->sectPerSU == clabel2->sectPerSU) && |
| 3204 | (clabel1->SUsPerPU == clabel2->SUsPerPU) && |
| 3205 | (clabel1->SUsPerRU == clabel2->SUsPerRU) && |
| 3206 | (clabel1->parityConfig == clabel2->parityConfig) && |
| 3207 | (clabel1->maxOutstanding == clabel2->maxOutstanding) && |
| 3208 | (clabel1->blockSize == clabel2->blockSize) && |
| 3209 | rf_component_label_numblocks(clabel1) == |
| 3210 | rf_component_label_numblocks(clabel2) && |
| 3211 | (clabel1->autoconfigure == clabel2->autoconfigure) && |
| 3212 | (clabel1->root_partition == clabel2->root_partition) && |
| 3213 | (clabel1->last_unit == clabel2->last_unit) && |
| 3214 | (clabel1->config_order == clabel2->config_order)) { |
| 3215 | /* if it get's here, it almost *has* to be a match */ |
| 3216 | } else { |
| 3217 | /* it's not consistent with somebody in the set.. |
| 3218 | punt */ |
| 3219 | return(0); |
| 3220 | } |
| 3221 | /* all was fine.. it must fit... */ |
| 3222 | return(1); |
| 3223 | } |
| 3224 | |
| 3225 | int |
| 3226 | rf_have_enough_components(RF_ConfigSet_t *cset) |
| 3227 | { |
| 3228 | RF_AutoConfig_t *ac; |
| 3229 | RF_AutoConfig_t *auto_config; |
| 3230 | RF_ComponentLabel_t *clabel; |
| 3231 | int c; |
| 3232 | int num_cols; |
| 3233 | int num_missing; |
| 3234 | int mod_counter; |
| 3235 | int mod_counter_found; |
| 3236 | int even_pair_failed; |
| 3237 | char parity_type; |
| 3238 | |
| 3239 | |
| 3240 | /* check to see that we have enough 'live' components |
| 3241 | of this set. If so, we can configure it if necessary */ |
| 3242 | |
| 3243 | num_cols = cset->ac->clabel->num_columns; |
| 3244 | parity_type = cset->ac->clabel->parityConfig; |
| 3245 | |
| 3246 | /* XXX Check for duplicate components!?!?!? */ |
| 3247 | |
| 3248 | /* Determine what the mod_counter is supposed to be for this set. */ |
| 3249 | |
| 3250 | mod_counter_found = 0; |
| 3251 | mod_counter = 0; |
| 3252 | ac = cset->ac; |
| 3253 | while(ac!=NULL) { |
| 3254 | if (mod_counter_found==0) { |
| 3255 | mod_counter = ac->clabel->mod_counter; |
| 3256 | mod_counter_found = 1; |
| 3257 | } else { |
| 3258 | if (ac->clabel->mod_counter > mod_counter) { |
| 3259 | mod_counter = ac->clabel->mod_counter; |
| 3260 | } |
| 3261 | } |
| 3262 | ac = ac->next; |
| 3263 | } |
| 3264 | |
| 3265 | num_missing = 0; |
| 3266 | auto_config = cset->ac; |
| 3267 | |
| 3268 | even_pair_failed = 0; |
| 3269 | for(c=0; c<num_cols; c++) { |
| 3270 | ac = auto_config; |
| 3271 | while(ac!=NULL) { |
| 3272 | if ((ac->clabel->column == c) && |
| 3273 | (ac->clabel->mod_counter == mod_counter)) { |
| 3274 | /* it's this one... */ |
| 3275 | #ifdef DEBUG |
| 3276 | printf("Found: %s at %d\n" , |
| 3277 | ac->devname,c); |
| 3278 | #endif |
| 3279 | break; |
| 3280 | } |
| 3281 | ac=ac->next; |
| 3282 | } |
| 3283 | if (ac==NULL) { |
| 3284 | /* Didn't find one here! */ |
| 3285 | /* special case for RAID 1, especially |
| 3286 | where there are more than 2 |
| 3287 | components (where RAIDframe treats |
| 3288 | things a little differently :( ) */ |
| 3289 | if (parity_type == '1') { |
| 3290 | if (c%2 == 0) { /* even component */ |
| 3291 | even_pair_failed = 1; |
| 3292 | } else { /* odd component. If |
| 3293 | we're failed, and |
| 3294 | so is the even |
| 3295 | component, it's |
| 3296 | "Good Night, Charlie" */ |
| 3297 | if (even_pair_failed == 1) { |
| 3298 | return(0); |
| 3299 | } |
| 3300 | } |
| 3301 | } else { |
| 3302 | /* normal accounting */ |
| 3303 | num_missing++; |
| 3304 | } |
| 3305 | } |
| 3306 | if ((parity_type == '1') && (c%2 == 1)) { |
| 3307 | /* Just did an even component, and we didn't |
| 3308 | bail.. reset the even_pair_failed flag, |
| 3309 | and go on to the next component.... */ |
| 3310 | even_pair_failed = 0; |
| 3311 | } |
| 3312 | } |
| 3313 | |
| 3314 | clabel = cset->ac->clabel; |
| 3315 | |
| 3316 | if (((clabel->parityConfig == '0') && (num_missing > 0)) || |
| 3317 | ((clabel->parityConfig == '4') && (num_missing > 1)) || |
| 3318 | ((clabel->parityConfig == '5') && (num_missing > 1))) { |
| 3319 | /* XXX this needs to be made *much* more general */ |
| 3320 | /* Too many failures */ |
| 3321 | return(0); |
| 3322 | } |
| 3323 | /* otherwise, all is well, and we've got enough to take a kick |
| 3324 | at autoconfiguring this set */ |
| 3325 | return(1); |
| 3326 | } |
| 3327 | |
| 3328 | void |
| 3329 | rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, |
| 3330 | RF_Raid_t *raidPtr) |
| 3331 | { |
| 3332 | RF_ComponentLabel_t *clabel; |
| 3333 | int i; |
| 3334 | |
| 3335 | clabel = ac->clabel; |
| 3336 | |
| 3337 | /* 1. Fill in the common stuff */ |
| 3338 | config->numRow = clabel->num_rows = 1; |
| 3339 | config->numCol = clabel->num_columns; |
| 3340 | config->numSpare = 0; /* XXX should this be set here? */ |
| 3341 | config->sectPerSU = clabel->sectPerSU; |
| 3342 | config->SUsPerPU = clabel->SUsPerPU; |
| 3343 | config->SUsPerRU = clabel->SUsPerRU; |
| 3344 | config->parityConfig = clabel->parityConfig; |
| 3345 | /* XXX... */ |
| 3346 | strcpy(config->diskQueueType,"fifo" ); |
| 3347 | config->maxOutstandingDiskReqs = clabel->maxOutstanding; |
| 3348 | config->layoutSpecificSize = 0; /* XXX ?? */ |
| 3349 | |
| 3350 | while(ac!=NULL) { |
| 3351 | /* row/col values will be in range due to the checks |
| 3352 | in reasonable_label() */ |
| 3353 | strcpy(config->devnames[0][ac->clabel->column], |
| 3354 | ac->devname); |
| 3355 | ac = ac->next; |
| 3356 | } |
| 3357 | |
| 3358 | for(i=0;i<RF_MAXDBGV;i++) { |
| 3359 | config->debugVars[i][0] = 0; |
| 3360 | } |
| 3361 | } |
| 3362 | |
| 3363 | int |
| 3364 | rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) |
| 3365 | { |
| 3366 | RF_ComponentLabel_t *clabel; |
| 3367 | int column; |
| 3368 | int sparecol; |
| 3369 | |
| 3370 | raidPtr->autoconfigure = new_value; |
| 3371 | |
| 3372 | for(column=0; column<raidPtr->numCol; column++) { |
| 3373 | if (raidPtr->Disks[column].status == rf_ds_optimal) { |
| 3374 | clabel = raidget_component_label(raidPtr, column); |
| 3375 | clabel->autoconfigure = new_value; |
| 3376 | raidflush_component_label(raidPtr, column); |
| 3377 | } |
| 3378 | } |
| 3379 | for(column = 0; column < raidPtr->numSpare ; column++) { |
| 3380 | sparecol = raidPtr->numCol + column; |
| 3381 | if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { |
| 3382 | clabel = raidget_component_label(raidPtr, sparecol); |
| 3383 | clabel->autoconfigure = new_value; |
| 3384 | raidflush_component_label(raidPtr, sparecol); |
| 3385 | } |
| 3386 | } |
| 3387 | return(new_value); |
| 3388 | } |
| 3389 | |
| 3390 | int |
| 3391 | rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) |
| 3392 | { |
| 3393 | RF_ComponentLabel_t *clabel; |
| 3394 | int column; |
| 3395 | int sparecol; |
| 3396 | |
| 3397 | raidPtr->root_partition = new_value; |
| 3398 | for(column=0; column<raidPtr->numCol; column++) { |
| 3399 | if (raidPtr->Disks[column].status == rf_ds_optimal) { |
| 3400 | clabel = raidget_component_label(raidPtr, column); |
| 3401 | clabel->root_partition = new_value; |
| 3402 | raidflush_component_label(raidPtr, column); |
| 3403 | } |
| 3404 | } |
| 3405 | for(column = 0; column < raidPtr->numSpare ; column++) { |
| 3406 | sparecol = raidPtr->numCol + column; |
| 3407 | if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { |
| 3408 | clabel = raidget_component_label(raidPtr, sparecol); |
| 3409 | clabel->root_partition = new_value; |
| 3410 | raidflush_component_label(raidPtr, sparecol); |
| 3411 | } |
| 3412 | } |
| 3413 | return(new_value); |
| 3414 | } |
| 3415 | |
| 3416 | void |
| 3417 | rf_release_all_vps(RF_ConfigSet_t *cset) |
| 3418 | { |
| 3419 | RF_AutoConfig_t *ac; |
| 3420 | |
| 3421 | ac = cset->ac; |
| 3422 | while(ac!=NULL) { |
| 3423 | /* Close the vp, and give it back */ |
| 3424 | if (ac->vp) { |
| 3425 | vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); |
| 3426 | VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED); |
| 3427 | vput(ac->vp); |
| 3428 | ac->vp = NULL; |
| 3429 | } |
| 3430 | ac = ac->next; |
| 3431 | } |
| 3432 | } |
| 3433 | |
| 3434 | |
| 3435 | void |
| 3436 | rf_cleanup_config_set(RF_ConfigSet_t *cset) |
| 3437 | { |
| 3438 | RF_AutoConfig_t *ac; |
| 3439 | RF_AutoConfig_t *next_ac; |
| 3440 | |
| 3441 | ac = cset->ac; |
| 3442 | while(ac!=NULL) { |
| 3443 | next_ac = ac->next; |
| 3444 | /* nuke the label */ |
| 3445 | free(ac->clabel, M_RAIDFRAME); |
| 3446 | /* cleanup the config structure */ |
| 3447 | free(ac, M_RAIDFRAME); |
| 3448 | /* "next.." */ |
| 3449 | ac = next_ac; |
| 3450 | } |
| 3451 | /* and, finally, nuke the config set */ |
| 3452 | free(cset, M_RAIDFRAME); |
| 3453 | } |
| 3454 | |
| 3455 | |
| 3456 | void |
| 3457 | raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) |
| 3458 | { |
| 3459 | /* current version number */ |
| 3460 | clabel->version = RF_COMPONENT_LABEL_VERSION; |
| 3461 | clabel->serial_number = raidPtr->serial_number; |
| 3462 | clabel->mod_counter = raidPtr->mod_counter; |
| 3463 | |
| 3464 | clabel->num_rows = 1; |
| 3465 | clabel->num_columns = raidPtr->numCol; |
| 3466 | clabel->clean = RF_RAID_DIRTY; /* not clean */ |
| 3467 | clabel->status = rf_ds_optimal; /* "It's good!" */ |
| 3468 | |
| 3469 | clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; |
| 3470 | clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; |
| 3471 | clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; |
| 3472 | |
| 3473 | clabel->blockSize = raidPtr->bytesPerSector; |
| 3474 | rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk); |
| 3475 | |
| 3476 | /* XXX not portable */ |
| 3477 | clabel->parityConfig = raidPtr->Layout.map->parityConfig; |
| 3478 | clabel->maxOutstanding = raidPtr->maxOutstanding; |
| 3479 | clabel->autoconfigure = raidPtr->autoconfigure; |
| 3480 | clabel->root_partition = raidPtr->root_partition; |
| 3481 | clabel->last_unit = raidPtr->raidid; |
| 3482 | clabel->config_order = raidPtr->config_order; |
| 3483 | |
| 3484 | #ifndef RF_NO_PARITY_MAP |
| 3485 | rf_paritymap_init_label(raidPtr->parity_map, clabel); |
| 3486 | #endif |
| 3487 | } |
| 3488 | |
| 3489 | struct raid_softc * |
| 3490 | rf_auto_config_set(RF_ConfigSet_t *cset) |
| 3491 | { |
| 3492 | RF_Raid_t *raidPtr; |
| 3493 | RF_Config_t *config; |
| 3494 | int raidID; |
| 3495 | struct raid_softc *sc; |
| 3496 | |
| 3497 | #ifdef DEBUG |
| 3498 | printf("RAID autoconfigure\n" ); |
| 3499 | #endif |
| 3500 | |
| 3501 | /* 1. Create a config structure */ |
| 3502 | config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO); |
| 3503 | if (config == NULL) { |
| 3504 | printf("%s: Out of mem - config!?!?\n" , __func__); |
| 3505 | /* XXX do something more intelligent here. */ |
| 3506 | return NULL; |
| 3507 | } |
| 3508 | |
| 3509 | /* |
| 3510 | 2. Figure out what RAID ID this one is supposed to live at |
| 3511 | See if we can get the same RAID dev that it was configured |
| 3512 | on last time.. |
| 3513 | */ |
| 3514 | |
| 3515 | raidID = cset->ac->clabel->last_unit; |
| 3516 | for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0; |
| 3517 | sc = raidget(++raidID, false)) |
| 3518 | continue; |
| 3519 | #ifdef DEBUG |
| 3520 | printf("Configuring raid%d:\n" ,raidID); |
| 3521 | #endif |
| 3522 | |
| 3523 | if (sc == NULL) |
| 3524 | sc = raidget(raidID, true); |
| 3525 | if (sc == NULL) { |
| 3526 | printf("%s: Out of mem - softc!?!?\n" , __func__); |
| 3527 | /* XXX do something more intelligent here. */ |
| 3528 | free(config, M_RAIDFRAME); |
| 3529 | return NULL; |
| 3530 | } |
| 3531 | |
| 3532 | raidPtr = &sc->sc_r; |
| 3533 | |
| 3534 | /* XXX all this stuff should be done SOMEWHERE ELSE! */ |
| 3535 | raidPtr->softc = sc; |
| 3536 | raidPtr->raidid = raidID; |
| 3537 | raidPtr->openings = RAIDOUTSTANDING; |
| 3538 | |
| 3539 | /* 3. Build the configuration structure */ |
| 3540 | rf_create_configuration(cset->ac, config, raidPtr); |
| 3541 | |
| 3542 | /* 4. Do the configuration */ |
| 3543 | if (rf_Configure(raidPtr, config, cset->ac) == 0) { |
| 3544 | raidinit(sc); |
| 3545 | |
| 3546 | rf_markalldirty(raidPtr); |
| 3547 | raidPtr->autoconfigure = 1; /* XXX do this here? */ |
| 3548 | switch (cset->ac->clabel->root_partition) { |
| 3549 | case 1: /* Force Root */ |
| 3550 | case 2: /* Soft Root: root when boot partition part of raid */ |
| 3551 | /* |
| 3552 | * everything configured just fine. Make a note |
| 3553 | * that this set is eligible to be root, |
| 3554 | * or forced to be root |
| 3555 | */ |
| 3556 | cset->rootable = cset->ac->clabel->root_partition; |
| 3557 | /* XXX do this here? */ |
| 3558 | raidPtr->root_partition = cset->rootable; |
| 3559 | break; |
| 3560 | default: |
| 3561 | break; |
| 3562 | } |
| 3563 | } else { |
| 3564 | raidput(sc); |
| 3565 | sc = NULL; |
| 3566 | } |
| 3567 | |
| 3568 | /* 5. Cleanup */ |
| 3569 | free(config, M_RAIDFRAME); |
| 3570 | return sc; |
| 3571 | } |
| 3572 | |
| 3573 | void |
| 3574 | rf_pool_init(struct pool *p, size_t size, const char *w_chan, |
| 3575 | size_t xmin, size_t xmax) |
| 3576 | { |
| 3577 | pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO); |
| 3578 | pool_sethiwat(p, xmax); |
| 3579 | pool_prime(p, xmin); |
| 3580 | pool_setlowat(p, xmin); |
| 3581 | } |
| 3582 | |
| 3583 | /* |
| 3584 | * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue |
| 3585 | * to see if there is IO pending and if that IO could possibly be done |
| 3586 | * for a given RAID set. Returns 0 if IO is waiting and can be done, 1 |
| 3587 | * otherwise. |
| 3588 | * |
| 3589 | */ |
| 3590 | int |
| 3591 | rf_buf_queue_check(RF_Raid_t *raidPtr) |
| 3592 | { |
| 3593 | struct raid_softc *rs; |
| 3594 | struct dk_softc *dksc; |
| 3595 | |
| 3596 | rs = raidPtr->softc; |
| 3597 | dksc = &rs->sc_dksc; |
| 3598 | |
| 3599 | if ((rs->sc_flags & RAIDF_INITED) == 0) |
| 3600 | return 1; |
| 3601 | |
| 3602 | if (dk_strategy_pending(dksc) && raidPtr->openings > 0) { |
| 3603 | /* there is work to do */ |
| 3604 | return 0; |
| 3605 | } |
| 3606 | /* default is nothing to do */ |
| 3607 | return 1; |
| 3608 | } |
| 3609 | |
| 3610 | int |
| 3611 | rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr) |
| 3612 | { |
| 3613 | uint64_t numsecs; |
| 3614 | unsigned secsize; |
| 3615 | int error; |
| 3616 | |
| 3617 | error = getdisksize(vp, &numsecs, &secsize); |
| 3618 | if (error == 0) { |
| 3619 | diskPtr->blockSize = secsize; |
| 3620 | diskPtr->numBlocks = numsecs - rf_protectedSectors; |
| 3621 | diskPtr->partitionSize = numsecs; |
| 3622 | return 0; |
| 3623 | } |
| 3624 | return error; |
| 3625 | } |
| 3626 | |
| 3627 | static int |
| 3628 | raid_match(device_t self, cfdata_t cfdata, void *aux) |
| 3629 | { |
| 3630 | return 1; |
| 3631 | } |
| 3632 | |
| 3633 | static void |
| 3634 | raid_attach(device_t parent, device_t self, void *aux) |
| 3635 | { |
| 3636 | } |
| 3637 | |
| 3638 | |
| 3639 | static int |
| 3640 | raid_detach(device_t self, int flags) |
| 3641 | { |
| 3642 | int error; |
| 3643 | struct raid_softc *rs = raidsoftc(self); |
| 3644 | |
| 3645 | if (rs == NULL) |
| 3646 | return ENXIO; |
| 3647 | |
| 3648 | if ((error = raidlock(rs)) != 0) |
| 3649 | return (error); |
| 3650 | |
| 3651 | error = raid_detach_unlocked(rs); |
| 3652 | |
| 3653 | raidunlock(rs); |
| 3654 | |
| 3655 | /* XXX raid can be referenced here */ |
| 3656 | |
| 3657 | if (error) |
| 3658 | return error; |
| 3659 | |
| 3660 | /* Free the softc */ |
| 3661 | raidput(rs); |
| 3662 | |
| 3663 | return 0; |
| 3664 | } |
| 3665 | |
| 3666 | static void |
| 3667 | rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr) |
| 3668 | { |
| 3669 | struct dk_softc *dksc = &rs->sc_dksc; |
| 3670 | struct disk_geom *dg = &dksc->sc_dkdev.dk_geom; |
| 3671 | |
| 3672 | memset(dg, 0, sizeof(*dg)); |
| 3673 | |
| 3674 | dg->dg_secperunit = raidPtr->totalSectors; |
| 3675 | dg->dg_secsize = raidPtr->bytesPerSector; |
| 3676 | dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe; |
| 3677 | dg->dg_ntracks = 4 * raidPtr->numCol; |
| 3678 | |
| 3679 | disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL); |
| 3680 | } |
| 3681 | |
| 3682 | /* |
| 3683 | * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components. |
| 3684 | * We end up returning whatever error was returned by the first cache flush |
| 3685 | * that fails. |
| 3686 | */ |
| 3687 | |
| 3688 | int |
| 3689 | rf_sync_component_caches(RF_Raid_t *raidPtr) |
| 3690 | { |
| 3691 | int c, sparecol; |
| 3692 | int e,error; |
| 3693 | int force = 1; |
| 3694 | |
| 3695 | error = 0; |
| 3696 | for (c = 0; c < raidPtr->numCol; c++) { |
| 3697 | if (raidPtr->Disks[c].status == rf_ds_optimal) { |
| 3698 | e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC, |
| 3699 | &force, FWRITE, NOCRED); |
| 3700 | if (e) { |
| 3701 | if (e != ENODEV) |
| 3702 | printf("raid%d: cache flush to component %s failed.\n" , |
| 3703 | raidPtr->raidid, raidPtr->Disks[c].devname); |
| 3704 | if (error == 0) { |
| 3705 | error = e; |
| 3706 | } |
| 3707 | } |
| 3708 | } |
| 3709 | } |
| 3710 | |
| 3711 | for( c = 0; c < raidPtr->numSpare ; c++) { |
| 3712 | sparecol = raidPtr->numCol + c; |
| 3713 | /* Need to ensure that the reconstruct actually completed! */ |
| 3714 | if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { |
| 3715 | e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp, |
| 3716 | DIOCCACHESYNC, &force, FWRITE, NOCRED); |
| 3717 | if (e) { |
| 3718 | if (e != ENODEV) |
| 3719 | printf("raid%d: cache flush to component %s failed.\n" , |
| 3720 | raidPtr->raidid, raidPtr->Disks[sparecol].devname); |
| 3721 | if (error == 0) { |
| 3722 | error = e; |
| 3723 | } |
| 3724 | } |
| 3725 | } |
| 3726 | } |
| 3727 | return error; |
| 3728 | } |
| 3729 | |
| 3730 | /* |
| 3731 | * Module interface |
| 3732 | */ |
| 3733 | |
| 3734 | MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr" ); |
| 3735 | |
| 3736 | #ifdef _MODULE |
| 3737 | CFDRIVER_DECL(raid, DV_DISK, NULL); |
| 3738 | #endif |
| 3739 | |
| 3740 | static int raid_modcmd(modcmd_t, void *); |
| 3741 | static int raid_modcmd_init(void); |
| 3742 | static int raid_modcmd_fini(void); |
| 3743 | |
| 3744 | static int |
| 3745 | raid_modcmd(modcmd_t cmd, void *data) |
| 3746 | { |
| 3747 | int error; |
| 3748 | |
| 3749 | error = 0; |
| 3750 | switch (cmd) { |
| 3751 | case MODULE_CMD_INIT: |
| 3752 | error = raid_modcmd_init(); |
| 3753 | break; |
| 3754 | case MODULE_CMD_FINI: |
| 3755 | error = raid_modcmd_fini(); |
| 3756 | break; |
| 3757 | default: |
| 3758 | error = ENOTTY; |
| 3759 | break; |
| 3760 | } |
| 3761 | return error; |
| 3762 | } |
| 3763 | |
| 3764 | static int |
| 3765 | raid_modcmd_init(void) |
| 3766 | { |
| 3767 | int error; |
| 3768 | int bmajor, cmajor; |
| 3769 | |
| 3770 | mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE); |
| 3771 | mutex_enter(&raid_lock); |
| 3772 | #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) |
| 3773 | rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM); |
| 3774 | rf_init_cond2(rf_sparet_wait_cv, "sparetw" ); |
| 3775 | rf_init_cond2(rf_sparet_resp_cv, "rfgst" ); |
| 3776 | |
| 3777 | rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; |
| 3778 | #endif |
| 3779 | |
| 3780 | bmajor = cmajor = -1; |
| 3781 | error = devsw_attach("raid" , &raid_bdevsw, &bmajor, |
| 3782 | &raid_cdevsw, &cmajor); |
| 3783 | if (error != 0 && error != EEXIST) { |
| 3784 | aprint_error("%s: devsw_attach failed %d\n" , __func__, error); |
| 3785 | mutex_exit(&raid_lock); |
| 3786 | return error; |
| 3787 | } |
| 3788 | #ifdef _MODULE |
| 3789 | error = config_cfdriver_attach(&raid_cd); |
| 3790 | if (error != 0) { |
| 3791 | aprint_error("%s: config_cfdriver_attach failed %d\n" , |
| 3792 | __func__, error); |
| 3793 | devsw_detach(&raid_bdevsw, &raid_cdevsw); |
| 3794 | mutex_exit(&raid_lock); |
| 3795 | return error; |
| 3796 | } |
| 3797 | #endif |
| 3798 | error = config_cfattach_attach(raid_cd.cd_name, &raid_ca); |
| 3799 | if (error != 0) { |
| 3800 | aprint_error("%s: config_cfattach_attach failed %d\n" , |
| 3801 | __func__, error); |
| 3802 | #ifdef _MODULE |
| 3803 | config_cfdriver_detach(&raid_cd); |
| 3804 | #endif |
| 3805 | devsw_detach(&raid_bdevsw, &raid_cdevsw); |
| 3806 | mutex_exit(&raid_lock); |
| 3807 | return error; |
| 3808 | } |
| 3809 | |
| 3810 | raidautoconfigdone = false; |
| 3811 | |
| 3812 | mutex_exit(&raid_lock); |
| 3813 | |
| 3814 | if (error == 0) { |
| 3815 | if (rf_BootRaidframe(true) == 0) |
| 3816 | aprint_verbose("Kernelized RAIDframe activated\n" ); |
| 3817 | else |
| 3818 | panic("Serious error activating RAID!!" ); |
| 3819 | } |
| 3820 | |
| 3821 | /* |
| 3822 | * Register a finalizer which will be used to auto-config RAID |
| 3823 | * sets once all real hardware devices have been found. |
| 3824 | */ |
| 3825 | error = config_finalize_register(NULL, rf_autoconfig); |
| 3826 | if (error != 0) { |
| 3827 | aprint_error("WARNING: unable to register RAIDframe " |
| 3828 | "finalizer\n" ); |
| 3829 | error = 0; |
| 3830 | } |
| 3831 | |
| 3832 | return error; |
| 3833 | } |
| 3834 | |
| 3835 | static int |
| 3836 | raid_modcmd_fini(void) |
| 3837 | { |
| 3838 | int error; |
| 3839 | |
| 3840 | mutex_enter(&raid_lock); |
| 3841 | |
| 3842 | /* Don't allow unload if raid device(s) exist. */ |
| 3843 | if (!LIST_EMPTY(&raids)) { |
| 3844 | mutex_exit(&raid_lock); |
| 3845 | return EBUSY; |
| 3846 | } |
| 3847 | |
| 3848 | error = config_cfattach_detach(raid_cd.cd_name, &raid_ca); |
| 3849 | if (error != 0) { |
| 3850 | aprint_error("%s: cannot detach cfattach\n" ,__func__); |
| 3851 | mutex_exit(&raid_lock); |
| 3852 | return error; |
| 3853 | } |
| 3854 | #ifdef _MODULE |
| 3855 | error = config_cfdriver_detach(&raid_cd); |
| 3856 | if (error != 0) { |
| 3857 | aprint_error("%s: cannot detach cfdriver\n" ,__func__); |
| 3858 | config_cfattach_attach(raid_cd.cd_name, &raid_ca); |
| 3859 | mutex_exit(&raid_lock); |
| 3860 | return error; |
| 3861 | } |
| 3862 | #endif |
| 3863 | error = devsw_detach(&raid_bdevsw, &raid_cdevsw); |
| 3864 | if (error != 0) { |
| 3865 | aprint_error("%s: cannot detach devsw\n" ,__func__); |
| 3866 | #ifdef _MODULE |
| 3867 | config_cfdriver_attach(&raid_cd); |
| 3868 | #endif |
| 3869 | config_cfattach_attach(raid_cd.cd_name, &raid_ca); |
| 3870 | mutex_exit(&raid_lock); |
| 3871 | return error; |
| 3872 | } |
| 3873 | rf_BootRaidframe(false); |
| 3874 | #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) |
| 3875 | rf_destroy_mutex2(rf_sparet_wait_mutex); |
| 3876 | rf_destroy_cond2(rf_sparet_wait_cv); |
| 3877 | rf_destroy_cond2(rf_sparet_resp_cv); |
| 3878 | #endif |
| 3879 | mutex_exit(&raid_lock); |
| 3880 | mutex_destroy(&raid_lock); |
| 3881 | |
| 3882 | return error; |
| 3883 | } |
| 3884 | |