| 1 | /* $NetBSD: layer_vnops.c,v 1.59 2016/08/20 12:37:09 hannken Exp $ */ |
| 2 | |
| 3 | /* |
| 4 | * Copyright (c) 1999 National Aeronautics & Space Administration |
| 5 | * All rights reserved. |
| 6 | * |
| 7 | * This software was written by William Studenmund of the |
| 8 | * Numerical Aerospace Simulation Facility, NASA Ames Research Center. |
| 9 | * |
| 10 | * Redistribution and use in source and binary forms, with or without |
| 11 | * modification, are permitted provided that the following conditions |
| 12 | * are met: |
| 13 | * 1. Redistributions of source code must retain the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer. |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright |
| 16 | * notice, this list of conditions and the following disclaimer in the |
| 17 | * documentation and/or other materials provided with the distribution. |
| 18 | * 3. Neither the name of the National Aeronautics & Space Administration |
| 19 | * nor the names of its contributors may be used to endorse or promote |
| 20 | * products derived from this software without specific prior written |
| 21 | * permission. |
| 22 | * |
| 23 | * THIS SOFTWARE IS PROVIDED BY THE NATIONAL AERONAUTICS & SPACE ADMINISTRATION |
| 24 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
| 25 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 26 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ADMINISTRATION OR CONTRIB- |
| 27 | * UTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, |
| 28 | * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 29 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 30 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 31 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 32 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 33 | * POSSIBILITY OF SUCH DAMAGE. |
| 34 | */ |
| 35 | |
| 36 | /* |
| 37 | * Copyright (c) 1992, 1993 |
| 38 | * The Regents of the University of California. All rights reserved. |
| 39 | * |
| 40 | * This code is derived from software contributed to Berkeley by |
| 41 | * John Heidemann of the UCLA Ficus project. |
| 42 | * |
| 43 | * Redistribution and use in source and binary forms, with or without |
| 44 | * modification, are permitted provided that the following conditions |
| 45 | * are met: |
| 46 | * 1. Redistributions of source code must retain the above copyright |
| 47 | * notice, this list of conditions and the following disclaimer. |
| 48 | * 2. Redistributions in binary form must reproduce the above copyright |
| 49 | * notice, this list of conditions and the following disclaimer in the |
| 50 | * documentation and/or other materials provided with the distribution. |
| 51 | * 3. Neither the name of the University nor the names of its contributors |
| 52 | * may be used to endorse or promote products derived from this software |
| 53 | * without specific prior written permission. |
| 54 | * |
| 55 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 56 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 57 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 58 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 59 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 60 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 61 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 62 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 63 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 64 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 65 | * SUCH DAMAGE. |
| 66 | * |
| 67 | * @(#)null_vnops.c 8.6 (Berkeley) 5/27/95 |
| 68 | * |
| 69 | * Ancestors: |
| 70 | * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92 |
| 71 | * Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp |
| 72 | * ...and... |
| 73 | * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project |
| 74 | */ |
| 75 | |
| 76 | /* |
| 77 | * Generic layer vnode operations. |
| 78 | * |
| 79 | * The layer.h, layer_extern.h, layer_vfs.c, and layer_vnops.c files provide |
| 80 | * the core implementation of stacked file-systems. |
| 81 | * |
| 82 | * The layerfs duplicates a portion of the file system name space under |
| 83 | * a new name. In this respect, it is similar to the loopback file system. |
| 84 | * It differs from the loopback fs in two respects: it is implemented using |
| 85 | * a stackable layers technique, and it is "layerfs-nodes" stack above all |
| 86 | * lower-layer vnodes, not just over directory vnodes. |
| 87 | * |
| 88 | * OPERATION OF LAYERFS |
| 89 | * |
| 90 | * The layerfs is the minimum file system layer, bypassing all possible |
| 91 | * operations to the lower layer for processing there. The majority of its |
| 92 | * activity centers on the bypass routine, through which nearly all vnode |
| 93 | * operations pass. |
| 94 | * |
| 95 | * The bypass routine accepts arbitrary vnode operations for handling by |
| 96 | * the lower layer. It begins by examining vnode operation arguments and |
| 97 | * replacing any layered nodes by their lower-layer equivalents. It then |
| 98 | * invokes an operation on the lower layer. Finally, it replaces the |
| 99 | * layered nodes in the arguments and, if a vnode is returned by the |
| 100 | * operation, stacks a layered node on top of the returned vnode. |
| 101 | * |
| 102 | * The bypass routine in this file, layer_bypass(), is suitable for use |
| 103 | * by many different layered filesystems. It can be used by multiple |
| 104 | * filesystems simultaneously. Alternatively, a layered fs may provide |
| 105 | * its own bypass routine, in which case layer_bypass() should be used as |
| 106 | * a model. For instance, the main functionality provided by umapfs, the user |
| 107 | * identity mapping file system, is handled by a custom bypass routine. |
| 108 | * |
| 109 | * Typically a layered fs registers its selected bypass routine as the |
| 110 | * default vnode operation in its vnodeopv_entry_desc table. Additionally |
| 111 | * the filesystem must store the bypass entry point in the layerm_bypass |
| 112 | * field of struct layer_mount. All other layer routines in this file will |
| 113 | * use the layerm_bypass() routine. |
| 114 | * |
| 115 | * Although the bypass routine handles most operations outright, a number |
| 116 | * of operations are special cased and handled by the layerfs. For instance, |
| 117 | * layer_getattr() must change the fsid being returned. While layer_lock() |
| 118 | * and layer_unlock() must handle any locking for the current vnode as well |
| 119 | * as pass the lock request down. layer_inactive() and layer_reclaim() are |
| 120 | * not bypassed so that they can handle freeing layerfs-specific data. Also, |
| 121 | * certain vnode operations (create, mknod, remove, link, rename, mkdir, |
| 122 | * rmdir, and symlink) change the locking state within the operation. Ideally |
| 123 | * these operations should not change the lock state, but should be changed |
| 124 | * to let the caller of the function unlock them. Otherwise, all intermediate |
| 125 | * vnode layers (such as union, umapfs, etc) must catch these functions to do |
| 126 | * the necessary locking at their layer. |
| 127 | * |
| 128 | * INSTANTIATING VNODE STACKS |
| 129 | * |
| 130 | * Mounting associates "layerfs-nodes" stack and lower layer, in effect |
| 131 | * stacking two VFSes. The initial mount creates a single vnode stack for |
| 132 | * the root of the new layerfs. All other vnode stacks are created as a |
| 133 | * result of vnode operations on this or other layerfs vnode stacks. |
| 134 | * |
| 135 | * New vnode stacks come into existence as a result of an operation which |
| 136 | * returns a vnode. The bypass routine stacks a layerfs-node above the new |
| 137 | * vnode before returning it to the caller. |
| 138 | * |
| 139 | * For example, imagine mounting a null layer with: |
| 140 | * |
| 141 | * "mount_null /usr/include /dev/layer/null" |
| 142 | * |
| 143 | * Changing directory to /dev/layer/null will assign the root layerfs-node, |
| 144 | * which was created when the null layer was mounted). Now consider opening |
| 145 | * "sys". A layer_lookup() would be performed on the root layerfs-node. |
| 146 | * This operation would bypass through to the lower layer which would return |
| 147 | * a vnode representing the UFS "sys". Then, layer_bypass() builds a |
| 148 | * layerfs-node aliasing the UFS "sys" and returns this to the caller. |
| 149 | * Later operations on the layerfs-node "sys" will repeat this process when |
| 150 | * constructing other vnode stacks. |
| 151 | * |
| 152 | * INVOKING OPERATIONS ON LOWER LAYERS |
| 153 | * |
| 154 | * There are two techniques to invoke operations on a lower layer when the |
| 155 | * operation cannot be completely bypassed. Each method is appropriate in |
| 156 | * different situations. In both cases, it is the responsibility of the |
| 157 | * aliasing layer to make the operation arguments "correct" for the lower |
| 158 | * layer by mapping any vnode arguments to the lower layer. |
| 159 | * |
| 160 | * The first approach is to call the aliasing layer's bypass routine. This |
| 161 | * method is most suitable when you wish to invoke the operation currently |
| 162 | * being handled on the lower layer. It has the advantage that the bypass |
| 163 | * routine already must do argument mapping. An example of this is |
| 164 | * layer_getattr(). |
| 165 | * |
| 166 | * A second approach is to directly invoke vnode operations on the lower |
| 167 | * layer with the VOP_OPERATIONNAME interface. The advantage of this method |
| 168 | * is that it is easy to invoke arbitrary operations on the lower layer. |
| 169 | * The disadvantage is that vnode's arguments must be manually mapped. |
| 170 | */ |
| 171 | |
| 172 | #include <sys/cdefs.h> |
| 173 | __KERNEL_RCSID(0, "$NetBSD: layer_vnops.c,v 1.59 2016/08/20 12:37:09 hannken Exp $" ); |
| 174 | |
| 175 | #include <sys/param.h> |
| 176 | #include <sys/systm.h> |
| 177 | #include <sys/proc.h> |
| 178 | #include <sys/time.h> |
| 179 | #include <sys/vnode.h> |
| 180 | #include <sys/mount.h> |
| 181 | #include <sys/namei.h> |
| 182 | #include <sys/kmem.h> |
| 183 | #include <sys/buf.h> |
| 184 | #include <sys/kauth.h> |
| 185 | |
| 186 | #include <miscfs/genfs/layer.h> |
| 187 | #include <miscfs/genfs/layer_extern.h> |
| 188 | #include <miscfs/genfs/genfs.h> |
| 189 | #include <miscfs/specfs/specdev.h> |
| 190 | |
| 191 | /* |
| 192 | * This is the 08-June-99 bypass routine, based on the 10-Apr-92 bypass |
| 193 | * routine by John Heidemann. |
| 194 | * The new element for this version is that the whole nullfs |
| 195 | * system gained the concept of locks on the lower node. |
| 196 | * The 10-Apr-92 version was optimized for speed, throwing away some |
| 197 | * safety checks. It should still always work, but it's not as |
| 198 | * robust to programmer errors. |
| 199 | * |
| 200 | * In general, we map all vnodes going down and unmap them on the way back. |
| 201 | * |
| 202 | * Also, some BSD vnode operations have the side effect of vrele'ing |
| 203 | * their arguments. With stacking, the reference counts are held |
| 204 | * by the upper node, not the lower one, so we must handle these |
| 205 | * side-effects here. This is not of concern in Sun-derived systems |
| 206 | * since there are no such side-effects. |
| 207 | * |
| 208 | * New for the 08-June-99 version: we also handle operations which unlock |
| 209 | * the passed-in node (typically they vput the node). |
| 210 | * |
| 211 | * This makes the following assumptions: |
| 212 | * - only one returned vpp |
| 213 | * - no INOUT vpp's (Sun's vop_open has one of these) |
| 214 | * - the vnode operation vector of the first vnode should be used |
| 215 | * to determine what implementation of the op should be invoked |
| 216 | * - all mapped vnodes are of our vnode-type (NEEDSWORK: |
| 217 | * problems on rmdir'ing mount points and renaming?) |
| 218 | */ |
| 219 | int |
| 220 | layer_bypass(void *v) |
| 221 | { |
| 222 | struct vop_generic_args /* { |
| 223 | struct vnodeop_desc *a_desc; |
| 224 | <other random data follows, presumably> |
| 225 | } */ *ap = v; |
| 226 | int (**our_vnodeop_p)(void *); |
| 227 | struct vnode **this_vp_p; |
| 228 | int error; |
| 229 | struct vnode *old_vps[VDESC_MAX_VPS], *vp0; |
| 230 | struct vnode **vps_p[VDESC_MAX_VPS]; |
| 231 | struct vnode ***vppp; |
| 232 | struct mount *mp; |
| 233 | struct vnodeop_desc *descp = ap->a_desc; |
| 234 | int reles, i, flags; |
| 235 | |
| 236 | #ifdef DIAGNOSTIC |
| 237 | /* |
| 238 | * We require at least one vp. |
| 239 | */ |
| 240 | if (descp->vdesc_vp_offsets == NULL || |
| 241 | descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) |
| 242 | panic("%s: no vp's in map.\n" , __func__); |
| 243 | #endif |
| 244 | |
| 245 | vps_p[0] = |
| 246 | VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[0], ap); |
| 247 | vp0 = *vps_p[0]; |
| 248 | mp = vp0->v_mount; |
| 249 | flags = MOUNTTOLAYERMOUNT(mp)->layerm_flags; |
| 250 | our_vnodeop_p = vp0->v_op; |
| 251 | |
| 252 | if (flags & LAYERFS_MBYPASSDEBUG) |
| 253 | printf("%s: %s\n" , __func__, descp->vdesc_name); |
| 254 | |
| 255 | /* |
| 256 | * Map the vnodes going in. |
| 257 | * Later, we'll invoke the operation based on |
| 258 | * the first mapped vnode's operation vector. |
| 259 | */ |
| 260 | reles = descp->vdesc_flags; |
| 261 | for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { |
| 262 | if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) |
| 263 | break; /* bail out at end of list */ |
| 264 | vps_p[i] = this_vp_p = |
| 265 | VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[i], |
| 266 | ap); |
| 267 | /* |
| 268 | * We're not guaranteed that any but the first vnode |
| 269 | * are of our type. Check for and don't map any |
| 270 | * that aren't. (We must always map first vp or vclean fails.) |
| 271 | */ |
| 272 | if (i && (*this_vp_p == NULL || |
| 273 | (*this_vp_p)->v_op != our_vnodeop_p)) { |
| 274 | old_vps[i] = NULL; |
| 275 | } else { |
| 276 | old_vps[i] = *this_vp_p; |
| 277 | *(vps_p[i]) = LAYERVPTOLOWERVP(*this_vp_p); |
| 278 | /* |
| 279 | * XXX - Several operations have the side effect |
| 280 | * of vrele'ing their vp's. We must account for |
| 281 | * that. (This should go away in the future.) |
| 282 | */ |
| 283 | if (reles & VDESC_VP0_WILLRELE) |
| 284 | vref(*this_vp_p); |
| 285 | } |
| 286 | } |
| 287 | |
| 288 | /* |
| 289 | * Call the operation on the lower layer |
| 290 | * with the modified argument structure. |
| 291 | */ |
| 292 | error = VCALL(*vps_p[0], descp->vdesc_offset, ap); |
| 293 | |
| 294 | /* |
| 295 | * Maintain the illusion of call-by-value |
| 296 | * by restoring vnodes in the argument structure |
| 297 | * to their original value. |
| 298 | */ |
| 299 | reles = descp->vdesc_flags; |
| 300 | for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { |
| 301 | if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) |
| 302 | break; /* bail out at end of list */ |
| 303 | if (old_vps[i]) { |
| 304 | *(vps_p[i]) = old_vps[i]; |
| 305 | if (reles & VDESC_VP0_WILLRELE) |
| 306 | vrele(*(vps_p[i])); |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | /* |
| 311 | * Map the possible out-going vpp |
| 312 | * (Assumes that the lower layer always returns |
| 313 | * a VREF'ed vpp unless it gets an error.) |
| 314 | */ |
| 315 | if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && !error) { |
| 316 | vppp = VOPARG_OFFSETTO(struct vnode***, |
| 317 | descp->vdesc_vpp_offset, ap); |
| 318 | /* |
| 319 | * Only vop_lookup, vop_create, vop_makedir, vop_mknod |
| 320 | * and vop_symlink return vpp's. vop_lookup doesn't call bypass |
| 321 | * as a lookup on "." would generate a locking error. |
| 322 | * So all the calls which get us here have a unlocked vpp. :-) |
| 323 | */ |
| 324 | error = layer_node_create(mp, **vppp, *vppp); |
| 325 | if (error) { |
| 326 | vrele(**vppp); |
| 327 | **vppp = NULL; |
| 328 | } |
| 329 | } |
| 330 | return error; |
| 331 | } |
| 332 | |
| 333 | /* |
| 334 | * We have to carry on the locking protocol on the layer vnodes |
| 335 | * as we progress through the tree. We also have to enforce read-only |
| 336 | * if this layer is mounted read-only. |
| 337 | */ |
| 338 | int |
| 339 | layer_lookup(void *v) |
| 340 | { |
| 341 | struct vop_lookup_v2_args /* { |
| 342 | struct vnodeop_desc *a_desc; |
| 343 | struct vnode * a_dvp; |
| 344 | struct vnode ** a_vpp; |
| 345 | struct componentname * a_cnp; |
| 346 | } */ *ap = v; |
| 347 | struct componentname *cnp = ap->a_cnp; |
| 348 | struct vnode *dvp, *lvp, *ldvp; |
| 349 | int error, flags = cnp->cn_flags; |
| 350 | |
| 351 | dvp = ap->a_dvp; |
| 352 | |
| 353 | if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && |
| 354 | (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { |
| 355 | *ap->a_vpp = NULL; |
| 356 | return EROFS; |
| 357 | } |
| 358 | |
| 359 | ldvp = LAYERVPTOLOWERVP(dvp); |
| 360 | ap->a_dvp = ldvp; |
| 361 | error = VCALL(ldvp, ap->a_desc->vdesc_offset, ap); |
| 362 | lvp = *ap->a_vpp; |
| 363 | *ap->a_vpp = NULL; |
| 364 | |
| 365 | if (error == EJUSTRETURN && (flags & ISLASTCN) && |
| 366 | (dvp->v_mount->mnt_flag & MNT_RDONLY) && |
| 367 | (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) |
| 368 | error = EROFS; |
| 369 | |
| 370 | /* |
| 371 | * We must do the same locking and unlocking at this layer as |
| 372 | * is done in the layers below us. |
| 373 | */ |
| 374 | if (ldvp == lvp) { |
| 375 | /* |
| 376 | * Got the same object back, because we looked up ".", |
| 377 | * or ".." in the root node of a mount point. |
| 378 | * So we make another reference to dvp and return it. |
| 379 | */ |
| 380 | vref(dvp); |
| 381 | *ap->a_vpp = dvp; |
| 382 | vrele(lvp); |
| 383 | } else if (lvp != NULL) { |
| 384 | /* Note: dvp and ldvp are both locked. */ |
| 385 | error = layer_node_create(dvp->v_mount, lvp, ap->a_vpp); |
| 386 | if (error) { |
| 387 | vrele(lvp); |
| 388 | } |
| 389 | } |
| 390 | return error; |
| 391 | } |
| 392 | |
| 393 | /* |
| 394 | * Setattr call. Disallow write attempts if the layer is mounted read-only. |
| 395 | */ |
| 396 | int |
| 397 | layer_setattr(void *v) |
| 398 | { |
| 399 | struct vop_setattr_args /* { |
| 400 | struct vnodeop_desc *a_desc; |
| 401 | struct vnode *a_vp; |
| 402 | struct vattr *a_vap; |
| 403 | kauth_cred_t a_cred; |
| 404 | struct lwp *a_l; |
| 405 | } */ *ap = v; |
| 406 | struct vnode *vp = ap->a_vp; |
| 407 | struct vattr *vap = ap->a_vap; |
| 408 | |
| 409 | if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || |
| 410 | vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || |
| 411 | vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && |
| 412 | (vp->v_mount->mnt_flag & MNT_RDONLY)) |
| 413 | return EROFS; |
| 414 | if (vap->va_size != VNOVAL) { |
| 415 | switch (vp->v_type) { |
| 416 | case VDIR: |
| 417 | return EISDIR; |
| 418 | case VCHR: |
| 419 | case VBLK: |
| 420 | case VSOCK: |
| 421 | case VFIFO: |
| 422 | return 0; |
| 423 | case VREG: |
| 424 | case VLNK: |
| 425 | default: |
| 426 | /* |
| 427 | * Disallow write attempts if the filesystem is |
| 428 | * mounted read-only. |
| 429 | */ |
| 430 | if (vp->v_mount->mnt_flag & MNT_RDONLY) |
| 431 | return EROFS; |
| 432 | } |
| 433 | } |
| 434 | return LAYERFS_DO_BYPASS(vp, ap); |
| 435 | } |
| 436 | |
| 437 | /* |
| 438 | * We handle getattr only to change the fsid. |
| 439 | */ |
| 440 | int |
| 441 | layer_getattr(void *v) |
| 442 | { |
| 443 | struct vop_getattr_args /* { |
| 444 | struct vnode *a_vp; |
| 445 | struct vattr *a_vap; |
| 446 | kauth_cred_t a_cred; |
| 447 | struct lwp *a_l; |
| 448 | } */ *ap = v; |
| 449 | struct vnode *vp = ap->a_vp; |
| 450 | int error; |
| 451 | |
| 452 | error = LAYERFS_DO_BYPASS(vp, ap); |
| 453 | if (error) { |
| 454 | return error; |
| 455 | } |
| 456 | /* Requires that arguments be restored. */ |
| 457 | ap->a_vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0]; |
| 458 | return 0; |
| 459 | } |
| 460 | |
| 461 | int |
| 462 | layer_access(void *v) |
| 463 | { |
| 464 | struct vop_access_args /* { |
| 465 | struct vnode *a_vp; |
| 466 | int a_mode; |
| 467 | kauth_cred_t a_cred; |
| 468 | struct lwp *a_l; |
| 469 | } */ *ap = v; |
| 470 | struct vnode *vp = ap->a_vp; |
| 471 | mode_t mode = ap->a_mode; |
| 472 | |
| 473 | /* |
| 474 | * Disallow write attempts on read-only layers; |
| 475 | * unless the file is a socket, fifo, or a block or |
| 476 | * character device resident on the file system. |
| 477 | */ |
| 478 | if (mode & VWRITE) { |
| 479 | switch (vp->v_type) { |
| 480 | case VDIR: |
| 481 | case VLNK: |
| 482 | case VREG: |
| 483 | if (vp->v_mount->mnt_flag & MNT_RDONLY) |
| 484 | return EROFS; |
| 485 | break; |
| 486 | default: |
| 487 | break; |
| 488 | } |
| 489 | } |
| 490 | return LAYERFS_DO_BYPASS(vp, ap); |
| 491 | } |
| 492 | |
| 493 | /* |
| 494 | * We must handle open to be able to catch MNT_NODEV and friends. |
| 495 | */ |
| 496 | int |
| 497 | layer_open(void *v) |
| 498 | { |
| 499 | struct vop_open_args /* { |
| 500 | const struct vnodeop_desc *a_desc; |
| 501 | struct vnode *a_vp; |
| 502 | int a_mode; |
| 503 | kauth_cred_t a_cred; |
| 504 | } */ *ap = v; |
| 505 | struct vnode *vp = ap->a_vp; |
| 506 | enum vtype lower_type = LAYERVPTOLOWERVP(vp)->v_type; |
| 507 | |
| 508 | if (((lower_type == VBLK) || (lower_type == VCHR)) && |
| 509 | (vp->v_mount->mnt_flag & MNT_NODEV)) |
| 510 | return ENXIO; |
| 511 | |
| 512 | return LAYERFS_DO_BYPASS(vp, ap); |
| 513 | } |
| 514 | |
| 515 | /* |
| 516 | * If vinvalbuf is calling us, it's a "shallow fsync" -- don't bother |
| 517 | * syncing the underlying vnodes, since they'll be fsync'ed when |
| 518 | * reclaimed; otherwise, pass it through to the underlying layer. |
| 519 | * |
| 520 | * XXX Do we still need to worry about shallow fsync? |
| 521 | */ |
| 522 | int |
| 523 | layer_fsync(void *v) |
| 524 | { |
| 525 | struct vop_fsync_args /* { |
| 526 | struct vnode *a_vp; |
| 527 | kauth_cred_t a_cred; |
| 528 | int a_flags; |
| 529 | off_t offlo; |
| 530 | off_t offhi; |
| 531 | struct lwp *a_l; |
| 532 | } */ *ap = v; |
| 533 | int error; |
| 534 | |
| 535 | if (ap->a_flags & FSYNC_RECLAIM) { |
| 536 | return 0; |
| 537 | } |
| 538 | if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) { |
| 539 | error = spec_fsync(v); |
| 540 | if (error) |
| 541 | return error; |
| 542 | } |
| 543 | return LAYERFS_DO_BYPASS(ap->a_vp, ap); |
| 544 | } |
| 545 | |
| 546 | int |
| 547 | layer_inactive(void *v) |
| 548 | { |
| 549 | struct vop_inactive_args /* { |
| 550 | struct vnode *a_vp; |
| 551 | bool *a_recycle; |
| 552 | } */ *ap = v; |
| 553 | struct vnode *vp = ap->a_vp; |
| 554 | |
| 555 | /* |
| 556 | * If we did a remove, don't cache the node. |
| 557 | */ |
| 558 | *ap->a_recycle = ((VTOLAYER(vp)->layer_flags & LAYERFS_REMOVED) != 0); |
| 559 | |
| 560 | /* |
| 561 | * Do nothing (and _don't_ bypass). |
| 562 | * Wait to vrele lowervp until reclaim, |
| 563 | * so that until then our layer_node is in the |
| 564 | * cache and reusable. |
| 565 | * |
| 566 | * NEEDSWORK: Someday, consider inactive'ing |
| 567 | * the lowervp and then trying to reactivate it |
| 568 | * with capabilities (v_id) |
| 569 | * like they do in the name lookup cache code. |
| 570 | * That's too much work for now. |
| 571 | */ |
| 572 | VOP_UNLOCK(vp); |
| 573 | return 0; |
| 574 | } |
| 575 | |
| 576 | int |
| 577 | layer_remove(void *v) |
| 578 | { |
| 579 | struct vop_remove_args /* { |
| 580 | struct vonde *a_dvp; |
| 581 | struct vnode *a_vp; |
| 582 | struct componentname *a_cnp; |
| 583 | } */ *ap = v; |
| 584 | struct vnode *vp = ap->a_vp; |
| 585 | int error; |
| 586 | |
| 587 | vref(vp); |
| 588 | error = LAYERFS_DO_BYPASS(vp, ap); |
| 589 | if (error == 0) { |
| 590 | VTOLAYER(vp)->layer_flags |= LAYERFS_REMOVED; |
| 591 | } |
| 592 | vrele(vp); |
| 593 | |
| 594 | return error; |
| 595 | } |
| 596 | |
| 597 | int |
| 598 | layer_rename(void *v) |
| 599 | { |
| 600 | struct vop_rename_args /* { |
| 601 | struct vnode *a_fdvp; |
| 602 | struct vnode *a_fvp; |
| 603 | struct componentname *a_fcnp; |
| 604 | struct vnode *a_tdvp; |
| 605 | struct vnode *a_tvp; |
| 606 | struct componentname *a_tcnp; |
| 607 | } */ *ap = v; |
| 608 | struct vnode *fdvp = ap->a_fdvp, *tvp; |
| 609 | int error; |
| 610 | |
| 611 | tvp = ap->a_tvp; |
| 612 | if (tvp) { |
| 613 | if (tvp->v_mount != fdvp->v_mount) |
| 614 | tvp = NULL; |
| 615 | else |
| 616 | vref(tvp); |
| 617 | } |
| 618 | error = LAYERFS_DO_BYPASS(fdvp, ap); |
| 619 | if (tvp) { |
| 620 | if (error == 0) |
| 621 | VTOLAYER(tvp)->layer_flags |= LAYERFS_REMOVED; |
| 622 | vrele(tvp); |
| 623 | } |
| 624 | return error; |
| 625 | } |
| 626 | |
| 627 | int |
| 628 | layer_rmdir(void *v) |
| 629 | { |
| 630 | struct vop_rmdir_args /* { |
| 631 | struct vnode *a_dvp; |
| 632 | struct vnode *a_vp; |
| 633 | struct componentname *a_cnp; |
| 634 | } */ *ap = v; |
| 635 | int error; |
| 636 | struct vnode *vp = ap->a_vp; |
| 637 | |
| 638 | vref(vp); |
| 639 | error = LAYERFS_DO_BYPASS(vp, ap); |
| 640 | if (error == 0) { |
| 641 | VTOLAYER(vp)->layer_flags |= LAYERFS_REMOVED; |
| 642 | } |
| 643 | vrele(vp); |
| 644 | |
| 645 | return error; |
| 646 | } |
| 647 | |
| 648 | int |
| 649 | layer_revoke(void *v) |
| 650 | { |
| 651 | struct vop_revoke_args /* { |
| 652 | struct vnode *a_vp; |
| 653 | int a_flags; |
| 654 | } */ *ap = v; |
| 655 | struct vnode *vp = ap->a_vp; |
| 656 | struct vnode *lvp = LAYERVPTOLOWERVP(vp); |
| 657 | int error; |
| 658 | |
| 659 | /* |
| 660 | * We will most likely end up in vclean which uses the v_usecount |
| 661 | * to determine if a vnode is active. Take an extra reference on |
| 662 | * the lower vnode so it will always close and inactivate. |
| 663 | */ |
| 664 | vref(lvp); |
| 665 | error = LAYERFS_DO_BYPASS(vp, ap); |
| 666 | vrele(lvp); |
| 667 | |
| 668 | return error; |
| 669 | } |
| 670 | |
| 671 | int |
| 672 | layer_reclaim(void *v) |
| 673 | { |
| 674 | struct vop_reclaim_args /* { |
| 675 | struct vnode *a_vp; |
| 676 | struct lwp *a_l; |
| 677 | } */ *ap = v; |
| 678 | struct vnode *vp = ap->a_vp; |
| 679 | struct layer_mount *lmp = MOUNTTOLAYERMOUNT(vp->v_mount); |
| 680 | struct layer_node *xp = VTOLAYER(vp); |
| 681 | struct vnode *lowervp = xp->layer_lowervp; |
| 682 | |
| 683 | /* |
| 684 | * Note: in vop_reclaim, the node's struct lock has been |
| 685 | * decomissioned, so we have to be careful about calling |
| 686 | * VOP's on ourself. We must be careful as VXLOCK is set. |
| 687 | */ |
| 688 | if (vp == lmp->layerm_rootvp) { |
| 689 | /* |
| 690 | * Oops! We no longer have a root node. Most likely reason is |
| 691 | * that someone forcably unmunted the underlying fs. |
| 692 | * |
| 693 | * Now getting the root vnode will fail. We're dead. :-( |
| 694 | */ |
| 695 | lmp->layerm_rootvp = NULL; |
| 696 | } |
| 697 | /* After this assignment, this node will not be re-used. */ |
| 698 | xp->layer_lowervp = NULL; |
| 699 | kmem_free(vp->v_data, lmp->layerm_size); |
| 700 | vp->v_data = NULL; |
| 701 | vrele(lowervp); |
| 702 | |
| 703 | return 0; |
| 704 | } |
| 705 | |
| 706 | int |
| 707 | layer_lock(void *v) |
| 708 | { |
| 709 | struct vop_lock_args /* { |
| 710 | struct vnode *a_vp; |
| 711 | int a_flags; |
| 712 | } */ *ap = v; |
| 713 | struct vnode *vp = ap->a_vp; |
| 714 | struct vnode *lowervp = LAYERVPTOLOWERVP(vp); |
| 715 | int flags = ap->a_flags; |
| 716 | int error; |
| 717 | |
| 718 | if (ISSET(flags, LK_NOWAIT)) { |
| 719 | error = VOP_LOCK(lowervp, flags); |
| 720 | if (error) |
| 721 | return error; |
| 722 | if (mutex_tryenter(vp->v_interlock)) { |
| 723 | error = vdead_check(vp, VDEAD_NOWAIT); |
| 724 | mutex_exit(vp->v_interlock); |
| 725 | } else |
| 726 | error = EBUSY; |
| 727 | if (error) |
| 728 | VOP_UNLOCK(lowervp); |
| 729 | return error; |
| 730 | } |
| 731 | |
| 732 | error = VOP_LOCK(lowervp, flags); |
| 733 | if (error) |
| 734 | return error; |
| 735 | |
| 736 | mutex_enter(vp->v_interlock); |
| 737 | error = vdead_check(vp, VDEAD_NOWAIT); |
| 738 | if (error) { |
| 739 | VOP_UNLOCK(lowervp); |
| 740 | error = vdead_check(vp, 0); |
| 741 | KASSERT(error == ENOENT); |
| 742 | } |
| 743 | mutex_exit(vp->v_interlock); |
| 744 | |
| 745 | return error; |
| 746 | } |
| 747 | |
| 748 | /* |
| 749 | * We just feed the returned vnode up to the caller - there's no need |
| 750 | * to build a layer node on top of the node on which we're going to do |
| 751 | * i/o. :-) |
| 752 | */ |
| 753 | int |
| 754 | layer_bmap(void *v) |
| 755 | { |
| 756 | struct vop_bmap_args /* { |
| 757 | struct vnode *a_vp; |
| 758 | daddr_t a_bn; |
| 759 | struct vnode **a_vpp; |
| 760 | daddr_t *a_bnp; |
| 761 | int *a_runp; |
| 762 | } */ *ap = v; |
| 763 | struct vnode *vp; |
| 764 | |
| 765 | vp = LAYERVPTOLOWERVP(ap->a_vp); |
| 766 | ap->a_vp = vp; |
| 767 | |
| 768 | return VCALL(vp, ap->a_desc->vdesc_offset, ap); |
| 769 | } |
| 770 | |
| 771 | int |
| 772 | layer_print(void *v) |
| 773 | { |
| 774 | struct vop_print_args /* { |
| 775 | struct vnode *a_vp; |
| 776 | } */ *ap = v; |
| 777 | struct vnode *vp = ap->a_vp; |
| 778 | printf ("\ttag VT_LAYERFS, vp=%p, lowervp=%p\n" , vp, LAYERVPTOLOWERVP(vp)); |
| 779 | return 0; |
| 780 | } |
| 781 | |
| 782 | int |
| 783 | layer_getpages(void *v) |
| 784 | { |
| 785 | struct vop_getpages_args /* { |
| 786 | struct vnode *a_vp; |
| 787 | voff_t a_offset; |
| 788 | struct vm_page **a_m; |
| 789 | int *a_count; |
| 790 | int a_centeridx; |
| 791 | vm_prot_t a_access_type; |
| 792 | int a_advice; |
| 793 | int a_flags; |
| 794 | } */ *ap = v; |
| 795 | struct vnode *vp = ap->a_vp; |
| 796 | |
| 797 | KASSERT(mutex_owned(vp->v_interlock)); |
| 798 | |
| 799 | if (ap->a_flags & PGO_LOCKED) { |
| 800 | return EBUSY; |
| 801 | } |
| 802 | ap->a_vp = LAYERVPTOLOWERVP(vp); |
| 803 | KASSERT(vp->v_interlock == ap->a_vp->v_interlock); |
| 804 | |
| 805 | /* Just pass the request on to the underlying layer. */ |
| 806 | return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap); |
| 807 | } |
| 808 | |
| 809 | int |
| 810 | layer_putpages(void *v) |
| 811 | { |
| 812 | struct vop_putpages_args /* { |
| 813 | struct vnode *a_vp; |
| 814 | voff_t a_offlo; |
| 815 | voff_t a_offhi; |
| 816 | int a_flags; |
| 817 | } */ *ap = v; |
| 818 | struct vnode *vp = ap->a_vp; |
| 819 | |
| 820 | KASSERT(mutex_owned(vp->v_interlock)); |
| 821 | |
| 822 | ap->a_vp = LAYERVPTOLOWERVP(vp); |
| 823 | KASSERT(vp->v_interlock == ap->a_vp->v_interlock); |
| 824 | |
| 825 | if (ap->a_flags & PGO_RECLAIM) { |
| 826 | mutex_exit(vp->v_interlock); |
| 827 | return 0; |
| 828 | } |
| 829 | |
| 830 | /* Just pass the request on to the underlying layer. */ |
| 831 | return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap); |
| 832 | } |
| 833 | |