| 1 | /* $NetBSD: puffs_vfsops.c,v 1.118 2015/12/20 01:34:00 christos Exp $ */ |
| 2 | |
| 3 | /* |
| 4 | * Copyright (c) 2005, 2006 Antti Kantee. All Rights Reserved. |
| 5 | * |
| 6 | * Development of this software was supported by the |
| 7 | * Google Summer of Code program and the Ulla Tuominen Foundation. |
| 8 | * The Google SoC project was mentored by Bill Studenmund. |
| 9 | * |
| 10 | * Redistribution and use in source and binary forms, with or without |
| 11 | * modification, are permitted provided that the following conditions |
| 12 | * are met: |
| 13 | * 1. Redistributions of source code must retain the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer. |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright |
| 16 | * notice, this list of conditions and the following disclaimer in the |
| 17 | * documentation and/or other materials provided with the distribution. |
| 18 | * |
| 19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS |
| 20 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| 21 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| 22 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
| 23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| 25 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 29 | * SUCH DAMAGE. |
| 30 | */ |
| 31 | |
| 32 | #include <sys/cdefs.h> |
| 33 | __KERNEL_RCSID(0, "$NetBSD: puffs_vfsops.c,v 1.118 2015/12/20 01:34:00 christos Exp $" ); |
| 34 | |
| 35 | #include <sys/param.h> |
| 36 | #include <sys/kernel.h> |
| 37 | #include <sys/mount.h> |
| 38 | #include <sys/extattr.h> |
| 39 | #include <sys/queue.h> |
| 40 | #include <sys/vnode.h> |
| 41 | #include <sys/dirent.h> |
| 42 | #include <sys/kauth.h> |
| 43 | #include <sys/proc.h> |
| 44 | #include <sys/module.h> |
| 45 | #include <sys/kthread.h> |
| 46 | |
| 47 | #include <uvm/uvm.h> |
| 48 | |
| 49 | #include <dev/putter/putter_sys.h> |
| 50 | |
| 51 | #include <miscfs/genfs/genfs.h> |
| 52 | |
| 53 | #include <fs/puffs/puffs_msgif.h> |
| 54 | #include <fs/puffs/puffs_sys.h> |
| 55 | |
| 56 | #include <lib/libkern/libkern.h> |
| 57 | |
| 58 | #include <nfs/nfsproto.h> /* for fh sizes */ |
| 59 | |
| 60 | MODULE(MODULE_CLASS_VFS, puffs, "putter" ); |
| 61 | |
| 62 | VFS_PROTOS(puffs_vfsop); |
| 63 | |
| 64 | static struct putter_ops puffs_putter = { |
| 65 | .pop_getout = puffs_msgif_getout, |
| 66 | .pop_releaseout = puffs_msgif_releaseout, |
| 67 | .pop_waitcount = puffs_msgif_waitcount, |
| 68 | .pop_dispatch = puffs_msgif_dispatch, |
| 69 | .pop_close = puffs_msgif_close, |
| 70 | }; |
| 71 | |
| 72 | static const struct genfs_ops puffs_genfsops = { |
| 73 | .gop_size = puffs_gop_size, |
| 74 | .gop_write = genfs_gop_write, |
| 75 | .gop_markupdate = puffs_gop_markupdate, |
| 76 | #if 0 |
| 77 | .gop_alloc, should ask userspace |
| 78 | #endif |
| 79 | }; |
| 80 | |
| 81 | /* |
| 82 | * Try to ensure data structures used by the puffs protocol |
| 83 | * do not unexpectedly change. |
| 84 | */ |
| 85 | #if defined(__i386__) && defined(__ELF__) |
| 86 | CTASSERT(sizeof(struct puffs_kargs) == 3928); |
| 87 | CTASSERT(sizeof(struct vattr) == 136); |
| 88 | CTASSERT(sizeof(struct puffs_req) == 44); |
| 89 | #endif |
| 90 | |
| 91 | int |
| 92 | puffs_vfsop_mount(struct mount *mp, const char *path, void *data, |
| 93 | size_t *data_len) |
| 94 | { |
| 95 | struct puffs_mount *pmp = NULL; |
| 96 | struct puffs_kargs *args; |
| 97 | char fstype[_VFS_NAMELEN]; |
| 98 | char *p; |
| 99 | int error = 0, i; |
| 100 | pid_t mntpid = curlwp->l_proc->p_pid; |
| 101 | |
| 102 | if (data == NULL) |
| 103 | return EINVAL; |
| 104 | if (*data_len < sizeof *args) |
| 105 | return EINVAL; |
| 106 | |
| 107 | if (mp->mnt_flag & MNT_GETARGS) { |
| 108 | pmp = MPTOPUFFSMP(mp); |
| 109 | *(struct puffs_kargs *)data = pmp->pmp_args; |
| 110 | *data_len = sizeof *args; |
| 111 | return 0; |
| 112 | } |
| 113 | |
| 114 | /* update is not supported currently */ |
| 115 | if (mp->mnt_flag & MNT_UPDATE) |
| 116 | return EOPNOTSUPP; |
| 117 | |
| 118 | args = (struct puffs_kargs *)data; |
| 119 | |
| 120 | if (args->pa_vers != PUFFSVERSION) { |
| 121 | printf("puffs_mount: development version mismatch: " |
| 122 | "kernel %d, lib %d\n" , PUFFSVERSION, args->pa_vers); |
| 123 | error = EINVAL; |
| 124 | goto out; |
| 125 | } |
| 126 | |
| 127 | if ((args->pa_flags & ~PUFFS_KFLAG_MASK) != 0) { |
| 128 | printf("puffs_mount: invalid KFLAGs 0x%x\n" , args->pa_flags); |
| 129 | error = EINVAL; |
| 130 | goto out; |
| 131 | } |
| 132 | if ((args->pa_fhflags & ~PUFFS_FHFLAG_MASK) != 0) { |
| 133 | printf("puffs_mount: invalid FHFLAGs 0x%x\n" , args->pa_fhflags); |
| 134 | error = EINVAL; |
| 135 | goto out; |
| 136 | } |
| 137 | |
| 138 | for (i = 0; i < __arraycount(args->pa_spare); i++) { |
| 139 | if (args->pa_spare[i] != 0) { |
| 140 | printf("puffs_mount: pa_spare[%d] = 0x%x\n" , |
| 141 | i, args->pa_spare[i]); |
| 142 | error = EINVAL; |
| 143 | goto out; |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | /* use dummy value for passthrough */ |
| 148 | if (args->pa_fhflags & PUFFS_FHFLAG_PASSTHROUGH) |
| 149 | args->pa_fhsize = sizeof(struct fid); |
| 150 | |
| 151 | /* sanitize file handle length */ |
| 152 | if (PUFFS_TOFHSIZE(args->pa_fhsize) > FHANDLE_SIZE_MAX) { |
| 153 | printf("puffs_mount: handle size %zu too large\n" , |
| 154 | args->pa_fhsize); |
| 155 | error = EINVAL; |
| 156 | goto out; |
| 157 | } |
| 158 | /* sanity check file handle max sizes */ |
| 159 | if (args->pa_fhsize && args->pa_fhflags & PUFFS_FHFLAG_PROTOMASK) { |
| 160 | size_t kfhsize = PUFFS_TOFHSIZE(args->pa_fhsize); |
| 161 | |
| 162 | if (args->pa_fhflags & PUFFS_FHFLAG_NFSV2) { |
| 163 | if (NFSX_FHTOOBIG_P(kfhsize, 0)) { |
| 164 | printf("puffs_mount: fhsize larger than " |
| 165 | "NFSv2 max %d\n" , |
| 166 | PUFFS_FROMFHSIZE(NFSX_V2FH)); |
| 167 | error = EINVAL; |
| 168 | goto out; |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | if (args->pa_fhflags & PUFFS_FHFLAG_NFSV3) { |
| 173 | if (NFSX_FHTOOBIG_P(kfhsize, 1)) { |
| 174 | printf("puffs_mount: fhsize larger than " |
| 175 | "NFSv3 max %d\n" , |
| 176 | PUFFS_FROMFHSIZE(NFSX_V3FHMAX)); |
| 177 | error = EINVAL; |
| 178 | goto out; |
| 179 | } |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | /* don't allow non-printing characters (like my sweet umlauts.. snif) */ |
| 184 | args->pa_typename[sizeof(args->pa_typename)-1] = '\0'; |
| 185 | for (p = args->pa_typename; *p; p++) |
| 186 | if (*p < ' ' || *p > '~') |
| 187 | *p = '.'; |
| 188 | |
| 189 | args->pa_mntfromname[sizeof(args->pa_mntfromname)-1] = '\0'; |
| 190 | for (p = args->pa_mntfromname; *p; p++) |
| 191 | if (*p < ' ' || *p > '~') |
| 192 | *p = '.'; |
| 193 | |
| 194 | /* build real name */ |
| 195 | (void)strlcpy(fstype, PUFFS_TYPEPREFIX, sizeof(fstype)); |
| 196 | (void)strlcat(fstype, args->pa_typename, sizeof(fstype)); |
| 197 | |
| 198 | /* inform user server if it got the max request size it wanted */ |
| 199 | if (args->pa_maxmsglen == 0 || args->pa_maxmsglen > PUFFS_MSG_MAXSIZE) |
| 200 | args->pa_maxmsglen = PUFFS_MSG_MAXSIZE; |
| 201 | else if (args->pa_maxmsglen < 2*PUFFS_MSGSTRUCT_MAX) |
| 202 | args->pa_maxmsglen = 2*PUFFS_MSGSTRUCT_MAX; |
| 203 | |
| 204 | (void)strlcpy(args->pa_typename, fstype, sizeof(args->pa_typename)); |
| 205 | |
| 206 | error = set_statvfs_info(path, UIO_USERSPACE, args->pa_mntfromname, |
| 207 | UIO_SYSSPACE, fstype, mp, curlwp); |
| 208 | if (error) |
| 209 | goto out; |
| 210 | mp->mnt_stat.f_iosize = DEV_BSIZE; |
| 211 | mp->mnt_stat.f_namemax = args->pa_svfsb.f_namemax; |
| 212 | |
| 213 | /* |
| 214 | * We can't handle the VFS_STATVFS() mount_domount() does |
| 215 | * after VFS_MOUNT() because we'd deadlock, so handle it |
| 216 | * here already. |
| 217 | */ |
| 218 | copy_statvfs_info(&args->pa_svfsb, mp); |
| 219 | (void)memcpy(&mp->mnt_stat, &args->pa_svfsb, sizeof(mp->mnt_stat)); |
| 220 | |
| 221 | KASSERT(curlwp != uvm.pagedaemon_lwp); |
| 222 | pmp = kmem_zalloc(sizeof(struct puffs_mount), KM_SLEEP); |
| 223 | |
| 224 | mp->mnt_fs_bshift = DEV_BSHIFT; |
| 225 | mp->mnt_dev_bshift = DEV_BSHIFT; |
| 226 | mp->mnt_flag &= ~MNT_LOCAL; /* we don't really know, so ... */ |
| 227 | mp->mnt_data = pmp; |
| 228 | |
| 229 | #if 0 |
| 230 | /* |
| 231 | * XXX: puffs code is MPSAFE. However, VFS really isn't. |
| 232 | * Currently, there is nothing which protects an inode from |
| 233 | * reclaim while there are threads inside the file system. |
| 234 | * This means that in the event of a server crash, an MPSAFE |
| 235 | * mount is likely to end up accessing invalid memory. For the |
| 236 | * non-mpsafe case, the kernel lock, general structure of |
| 237 | * puffs and pmp_refcount protect the threads during escape. |
| 238 | * |
| 239 | * Fixing this will require: |
| 240 | * a) fixing vfs |
| 241 | * OR |
| 242 | * b) adding a small sleep to puffs_msgif_close() between |
| 243 | * userdead() and dounmount(). |
| 244 | * (well, this isn't really a fix, but would solve |
| 245 | * 99.999% of the race conditions). |
| 246 | * |
| 247 | * Also, in the event of "b", unmount -f should be used, |
| 248 | * like with any other file system, sparingly and only when |
| 249 | * it is "known" to be safe. |
| 250 | */ |
| 251 | mp->mnt_iflags |= IMNT_MPSAFE; |
| 252 | #endif |
| 253 | |
| 254 | pmp->pmp_status = PUFFSTAT_MOUNTING; |
| 255 | pmp->pmp_mp = mp; |
| 256 | pmp->pmp_msg_maxsize = args->pa_maxmsglen; |
| 257 | pmp->pmp_args = *args; |
| 258 | |
| 259 | /* |
| 260 | * Inform the fileops processing code that we have a mountpoint. |
| 261 | * If it doesn't know about anyone with our pid/fd having the |
| 262 | * device open, punt |
| 263 | */ |
| 264 | if ((pmp->pmp_pi |
| 265 | = putter_attach(mntpid, args->pa_fd, pmp, &puffs_putter)) == NULL) { |
| 266 | error = ENOENT; |
| 267 | goto out; |
| 268 | } |
| 269 | |
| 270 | /* XXX: check parameters */ |
| 271 | pmp->pmp_root_cookie = args->pa_root_cookie; |
| 272 | switch (args->pa_root_vtype) { |
| 273 | case VNON: case VREG: case VDIR: case VBLK: |
| 274 | case VCHR: case VLNK: case VSOCK: case VFIFO: |
| 275 | break; |
| 276 | default: |
| 277 | error = EINVAL; |
| 278 | goto out; |
| 279 | } |
| 280 | pmp->pmp_root_vtype = args->pa_root_vtype; |
| 281 | |
| 282 | if (args->pa_root_vsize < 0) { |
| 283 | error = EINVAL; |
| 284 | goto out; |
| 285 | } |
| 286 | pmp->pmp_root_vsize = args->pa_root_vsize; |
| 287 | |
| 288 | pmp->pmp_root_rdev = args->pa_root_rdev; |
| 289 | pmp->pmp_docompat = args->pa_time32; |
| 290 | |
| 291 | mutex_init(&pmp->pmp_lock, MUTEX_DEFAULT, IPL_NONE); |
| 292 | mutex_init(&pmp->pmp_sopmtx, MUTEX_DEFAULT, IPL_NONE); |
| 293 | cv_init(&pmp->pmp_msg_waiter_cv, "puffsget" ); |
| 294 | cv_init(&pmp->pmp_refcount_cv, "puffsref" ); |
| 295 | cv_init(&pmp->pmp_unmounting_cv, "puffsum" ); |
| 296 | cv_init(&pmp->pmp_sopcv, "puffsop" ); |
| 297 | TAILQ_INIT(&pmp->pmp_msg_touser); |
| 298 | TAILQ_INIT(&pmp->pmp_msg_replywait); |
| 299 | TAILQ_INIT(&pmp->pmp_sopfastreqs); |
| 300 | TAILQ_INIT(&pmp->pmp_sopnodereqs); |
| 301 | |
| 302 | if ((error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, |
| 303 | puffs_sop_thread, pmp, NULL, "puffsop" )) != 0) |
| 304 | goto out; |
| 305 | pmp->pmp_sopthrcount = 1; |
| 306 | |
| 307 | DPRINTF(("puffs_mount: mount point at %p, puffs specific at %p\n" , |
| 308 | mp, MPTOPUFFSMP(mp))); |
| 309 | |
| 310 | vfs_getnewfsid(mp); |
| 311 | |
| 312 | out: |
| 313 | if (error && pmp && pmp->pmp_pi) |
| 314 | putter_detach(pmp->pmp_pi); |
| 315 | if (error && pmp) |
| 316 | kmem_free(pmp, sizeof(struct puffs_mount)); |
| 317 | return error; |
| 318 | } |
| 319 | |
| 320 | int |
| 321 | puffs_vfsop_start(struct mount *mp, int flags) |
| 322 | { |
| 323 | struct puffs_mount *pmp = MPTOPUFFSMP(mp); |
| 324 | |
| 325 | KASSERT(pmp->pmp_status == PUFFSTAT_MOUNTING); |
| 326 | pmp->pmp_status = PUFFSTAT_RUNNING; |
| 327 | |
| 328 | return 0; |
| 329 | } |
| 330 | |
| 331 | int |
| 332 | puffs_vfsop_unmount(struct mount *mp, int mntflags) |
| 333 | { |
| 334 | PUFFS_MSG_VARS(vfs, unmount); |
| 335 | struct puffs_mount *pmp; |
| 336 | int error, force; |
| 337 | |
| 338 | error = 0; |
| 339 | force = mntflags & MNT_FORCE; |
| 340 | pmp = MPTOPUFFSMP(mp); |
| 341 | |
| 342 | DPRINTF(("puffs_unmount: detach filesystem from vfs, current " |
| 343 | "status 0x%x\n" , pmp->pmp_status)); |
| 344 | |
| 345 | /* |
| 346 | * flush all the vnodes. VOP_RECLAIM() takes care that the |
| 347 | * root vnode does not get flushed until unmount. The |
| 348 | * userspace root node cookie is stored in the mount |
| 349 | * structure, so we can always re-instantiate a root vnode, |
| 350 | * should userspace unmount decide it doesn't want to |
| 351 | * cooperate. |
| 352 | */ |
| 353 | error = vflush(mp, NULLVP, force ? FORCECLOSE : 0); |
| 354 | if (error) |
| 355 | goto out; |
| 356 | |
| 357 | /* |
| 358 | * If we are not DYING, we should ask userspace's opinion |
| 359 | * about the situation |
| 360 | */ |
| 361 | mutex_enter(&pmp->pmp_lock); |
| 362 | if (pmp->pmp_status != PUFFSTAT_DYING) { |
| 363 | pmp->pmp_unmounting = 1; |
| 364 | mutex_exit(&pmp->pmp_lock); |
| 365 | |
| 366 | PUFFS_MSG_ALLOC(vfs, unmount); |
| 367 | puffs_msg_setinfo(park_unmount, |
| 368 | PUFFSOP_VFS, PUFFS_VFS_UNMOUNT, NULL); |
| 369 | unmount_msg->pvfsr_flags = mntflags; |
| 370 | |
| 371 | PUFFS_MSG_ENQUEUEWAIT(pmp, park_unmount, error); |
| 372 | PUFFS_MSG_RELEASE(unmount); |
| 373 | |
| 374 | error = checkerr(pmp, error, __func__); |
| 375 | DPRINTF(("puffs_unmount: error %d force %d\n" , error, force)); |
| 376 | |
| 377 | mutex_enter(&pmp->pmp_lock); |
| 378 | pmp->pmp_unmounting = 0; |
| 379 | cv_broadcast(&pmp->pmp_unmounting_cv); |
| 380 | } |
| 381 | |
| 382 | /* |
| 383 | * if userspace cooperated or we really need to die, |
| 384 | * screw what userland thinks and just die. |
| 385 | */ |
| 386 | if (error == 0 || force) { |
| 387 | struct puffs_sopreq *psopr; |
| 388 | |
| 389 | /* tell waiters & other resources to go unwait themselves */ |
| 390 | puffs_userdead(pmp); |
| 391 | putter_detach(pmp->pmp_pi); |
| 392 | |
| 393 | /* |
| 394 | * Wait until there are no more users for the mount resource. |
| 395 | * Notice that this is hooked against transport_close |
| 396 | * and return from touser. In an ideal world, it would |
| 397 | * be hooked against final return from all operations. |
| 398 | * But currently it works well enough, since nobody |
| 399 | * does weird blocking voodoo after return from touser(). |
| 400 | */ |
| 401 | while (pmp->pmp_refcount != 0) |
| 402 | cv_wait(&pmp->pmp_refcount_cv, &pmp->pmp_lock); |
| 403 | mutex_exit(&pmp->pmp_lock); |
| 404 | |
| 405 | /* |
| 406 | * Release kernel thread now that there is nothing |
| 407 | * it would be wanting to lock. |
| 408 | */ |
| 409 | KASSERT(curlwp != uvm.pagedaemon_lwp); |
| 410 | psopr = kmem_alloc(sizeof(*psopr), KM_SLEEP); |
| 411 | psopr->psopr_sopreq = PUFFS_SOPREQSYS_EXIT; |
| 412 | mutex_enter(&pmp->pmp_sopmtx); |
| 413 | if (pmp->pmp_sopthrcount == 0) { |
| 414 | mutex_exit(&pmp->pmp_sopmtx); |
| 415 | kmem_free(psopr, sizeof(*psopr)); |
| 416 | mutex_enter(&pmp->pmp_sopmtx); |
| 417 | KASSERT(pmp->pmp_sopthrcount == 0); |
| 418 | } else { |
| 419 | TAILQ_INSERT_TAIL(&pmp->pmp_sopfastreqs, |
| 420 | psopr, psopr_entries); |
| 421 | cv_signal(&pmp->pmp_sopcv); |
| 422 | } |
| 423 | while (pmp->pmp_sopthrcount > 0) |
| 424 | cv_wait(&pmp->pmp_sopcv, &pmp->pmp_sopmtx); |
| 425 | mutex_exit(&pmp->pmp_sopmtx); |
| 426 | |
| 427 | /* free resources now that we hopefully have no waiters left */ |
| 428 | cv_destroy(&pmp->pmp_unmounting_cv); |
| 429 | cv_destroy(&pmp->pmp_refcount_cv); |
| 430 | cv_destroy(&pmp->pmp_msg_waiter_cv); |
| 431 | cv_destroy(&pmp->pmp_sopcv); |
| 432 | mutex_destroy(&pmp->pmp_lock); |
| 433 | mutex_destroy(&pmp->pmp_sopmtx); |
| 434 | |
| 435 | kmem_free(pmp, sizeof(struct puffs_mount)); |
| 436 | error = 0; |
| 437 | } else { |
| 438 | mutex_exit(&pmp->pmp_lock); |
| 439 | } |
| 440 | |
| 441 | out: |
| 442 | DPRINTF(("puffs_unmount: return %d\n" , error)); |
| 443 | return error; |
| 444 | } |
| 445 | |
| 446 | /* |
| 447 | * This doesn't need to travel to userspace |
| 448 | */ |
| 449 | int |
| 450 | puffs_vfsop_root(struct mount *mp, struct vnode **vpp) |
| 451 | { |
| 452 | struct puffs_mount *pmp = MPTOPUFFSMP(mp); |
| 453 | int rv; |
| 454 | |
| 455 | rv = puffs_cookie2vnode(pmp, pmp->pmp_root_cookie, vpp); |
| 456 | KASSERT(rv != PUFFS_NOSUCHCOOKIE); |
| 457 | if (rv != 0) |
| 458 | return rv; |
| 459 | rv = vn_lock(*vpp, LK_EXCLUSIVE); |
| 460 | if (rv != 0) { |
| 461 | vrele(*vpp); |
| 462 | *vpp = NULL; |
| 463 | return rv; |
| 464 | } |
| 465 | return 0; |
| 466 | } |
| 467 | |
| 468 | int |
| 469 | puffs_vfsop_statvfs(struct mount *mp, struct statvfs *sbp) |
| 470 | { |
| 471 | PUFFS_MSG_VARS(vfs, statvfs); |
| 472 | struct puffs_mount *pmp; |
| 473 | int error = 0; |
| 474 | |
| 475 | pmp = MPTOPUFFSMP(mp); |
| 476 | |
| 477 | /* |
| 478 | * If we are mounting, it means that the userspace counterpart |
| 479 | * is calling mount(2), but mount(2) also calls statvfs. So |
| 480 | * requesting statvfs from userspace would mean a deadlock. |
| 481 | * Compensate. |
| 482 | */ |
| 483 | if (__predict_false(pmp->pmp_status == PUFFSTAT_MOUNTING)) |
| 484 | return EINPROGRESS; |
| 485 | |
| 486 | PUFFS_MSG_ALLOC(vfs, statvfs); |
| 487 | puffs_msg_setinfo(park_statvfs, PUFFSOP_VFS, PUFFS_VFS_STATVFS, NULL); |
| 488 | |
| 489 | PUFFS_MSG_ENQUEUEWAIT(pmp, park_statvfs, error); |
| 490 | error = checkerr(pmp, error, __func__); |
| 491 | statvfs_msg->pvfsr_sb.f_iosize = DEV_BSIZE; |
| 492 | |
| 493 | /* |
| 494 | * Try to produce a sensible result even in the event |
| 495 | * of userspace error. |
| 496 | * |
| 497 | * XXX: cache the copy in non-error case |
| 498 | */ |
| 499 | if (!error) { |
| 500 | copy_statvfs_info(&statvfs_msg->pvfsr_sb, mp); |
| 501 | (void)memcpy(sbp, &statvfs_msg->pvfsr_sb, |
| 502 | sizeof(struct statvfs)); |
| 503 | } else { |
| 504 | copy_statvfs_info(sbp, mp); |
| 505 | } |
| 506 | |
| 507 | PUFFS_MSG_RELEASE(statvfs); |
| 508 | return error; |
| 509 | } |
| 510 | |
| 511 | static bool |
| 512 | pageflush_selector(void *cl, struct vnode *vp) |
| 513 | { |
| 514 | return vp->v_type == VREG && |
| 515 | !(LIST_EMPTY(&vp->v_dirtyblkhd) && UVM_OBJ_IS_CLEAN(&vp->v_uobj)); |
| 516 | } |
| 517 | |
| 518 | static int |
| 519 | pageflush(struct mount *mp, kauth_cred_t cred, int waitfor) |
| 520 | { |
| 521 | struct puffs_node *pn; |
| 522 | struct vnode *vp; |
| 523 | struct vnode_iterator *marker; |
| 524 | int error, rv, fsyncwait; |
| 525 | |
| 526 | error = 0; |
| 527 | fsyncwait = (waitfor == MNT_WAIT) ? FSYNC_WAIT : 0; |
| 528 | |
| 529 | /* |
| 530 | * Sync all cached data from regular vnodes (which are not |
| 531 | * currently locked, see below). After this we call VFS_SYNC |
| 532 | * for the fs server, which should handle data and metadata for |
| 533 | * all the nodes it knows to exist. |
| 534 | */ |
| 535 | vfs_vnode_iterator_init(mp, &marker); |
| 536 | while ((vp = vfs_vnode_iterator_next(marker, pageflush_selector, |
| 537 | NULL))) |
| 538 | { |
| 539 | /* |
| 540 | * Here we try to get a reference to the vnode and to |
| 541 | * lock it. This is mostly cargo-culted, but I will |
| 542 | * offer an explanation to why I believe this might |
| 543 | * actually do the right thing. |
| 544 | * |
| 545 | * If the vnode is a goner, we quite obviously don't need |
| 546 | * to sync it. |
| 547 | * |
| 548 | * If the vnode was busy, we don't need to sync it because |
| 549 | * this is never called with MNT_WAIT except from |
| 550 | * dounmount(), when we are wait-flushing all the dirty |
| 551 | * vnodes through other routes in any case. So there, |
| 552 | * sync() doesn't actually sync. Happy now? |
| 553 | */ |
| 554 | error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT); |
| 555 | if (error) { |
| 556 | vrele(vp); |
| 557 | continue; |
| 558 | } |
| 559 | pn = VPTOPP(vp); |
| 560 | /* hmm.. is the FAF thing entirely sensible? */ |
| 561 | if (waitfor == MNT_LAZY) { |
| 562 | mutex_enter(vp->v_interlock); |
| 563 | pn->pn_stat |= PNODE_FAF; |
| 564 | mutex_exit(vp->v_interlock); |
| 565 | } |
| 566 | rv = VOP_FSYNC(vp, cred, fsyncwait, 0, 0); |
| 567 | if (waitfor == MNT_LAZY) { |
| 568 | mutex_enter(vp->v_interlock); |
| 569 | pn->pn_stat &= ~PNODE_FAF; |
| 570 | mutex_exit(vp->v_interlock); |
| 571 | } |
| 572 | if (rv) |
| 573 | error = rv; |
| 574 | vput(vp); |
| 575 | } |
| 576 | vfs_vnode_iterator_destroy(marker); |
| 577 | |
| 578 | return error; |
| 579 | } |
| 580 | |
| 581 | int |
| 582 | puffs_vfsop_sync(struct mount *mp, int waitfor, struct kauth_cred *cred) |
| 583 | { |
| 584 | PUFFS_MSG_VARS(vfs, sync); |
| 585 | struct puffs_mount *pmp = MPTOPUFFSMP(mp); |
| 586 | int error, rv; |
| 587 | |
| 588 | error = pageflush(mp, cred, waitfor); |
| 589 | |
| 590 | /* sync fs */ |
| 591 | PUFFS_MSG_ALLOC(vfs, sync); |
| 592 | sync_msg->pvfsr_waitfor = waitfor; |
| 593 | puffs_credcvt(&sync_msg->pvfsr_cred, cred); |
| 594 | puffs_msg_setinfo(park_sync, PUFFSOP_VFS, PUFFS_VFS_SYNC, NULL); |
| 595 | |
| 596 | PUFFS_MSG_ENQUEUEWAIT(pmp, park_sync, rv); |
| 597 | rv = checkerr(pmp, rv, __func__); |
| 598 | if (rv) |
| 599 | error = rv; |
| 600 | |
| 601 | PUFFS_MSG_RELEASE(sync); |
| 602 | return error; |
| 603 | } |
| 604 | |
| 605 | int |
| 606 | puffs_vfsop_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) |
| 607 | { |
| 608 | PUFFS_MSG_VARS(vfs, fhtonode); |
| 609 | struct puffs_mount *pmp = MPTOPUFFSMP(mp); |
| 610 | struct vnode *vp; |
| 611 | void *fhdata; |
| 612 | size_t argsize, fhlen; |
| 613 | int error; |
| 614 | |
| 615 | if (pmp->pmp_args.pa_fhsize == 0) |
| 616 | return EOPNOTSUPP; |
| 617 | |
| 618 | if (pmp->pmp_args.pa_fhflags & PUFFS_FHFLAG_PASSTHROUGH) { |
| 619 | fhlen = fhp->fid_len; |
| 620 | fhdata = fhp; |
| 621 | } else { |
| 622 | fhlen = PUFFS_FROMFHSIZE(fhp->fid_len); |
| 623 | fhdata = fhp->fid_data; |
| 624 | |
| 625 | if (pmp->pmp_args.pa_fhflags & PUFFS_FHFLAG_DYNAMIC) { |
| 626 | if (pmp->pmp_args.pa_fhsize < fhlen) |
| 627 | return EINVAL; |
| 628 | } else { |
| 629 | if (pmp->pmp_args.pa_fhsize != fhlen) |
| 630 | return EINVAL; |
| 631 | } |
| 632 | } |
| 633 | |
| 634 | argsize = sizeof(struct puffs_vfsmsg_fhtonode) + fhlen; |
| 635 | puffs_msgmem_alloc(argsize, &park_fhtonode, (void *)&fhtonode_msg, 1); |
| 636 | fhtonode_msg->pvfsr_dsize = fhlen; |
| 637 | memcpy(fhtonode_msg->pvfsr_data, fhdata, fhlen); |
| 638 | puffs_msg_setinfo(park_fhtonode, PUFFSOP_VFS, PUFFS_VFS_FHTOVP, NULL); |
| 639 | |
| 640 | PUFFS_MSG_ENQUEUEWAIT(pmp, park_fhtonode, error); |
| 641 | error = checkerr(pmp, error, __func__); |
| 642 | if (error) |
| 643 | goto out; |
| 644 | |
| 645 | error = puffs_getvnode(mp, fhtonode_msg->pvfsr_fhcookie, |
| 646 | fhtonode_msg->pvfsr_vtype, fhtonode_msg->pvfsr_size, |
| 647 | fhtonode_msg->pvfsr_rdev, &vp); |
| 648 | if (error) |
| 649 | goto out; |
| 650 | vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); |
| 651 | |
| 652 | *vpp = vp; |
| 653 | out: |
| 654 | puffs_msgmem_release(park_fhtonode); |
| 655 | return error; |
| 656 | } |
| 657 | |
| 658 | int |
| 659 | puffs_vfsop_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size) |
| 660 | { |
| 661 | PUFFS_MSG_VARS(vfs, nodetofh); |
| 662 | struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount); |
| 663 | size_t argsize, fhlen; |
| 664 | int error; |
| 665 | |
| 666 | if (pmp->pmp_args.pa_fhsize == 0) |
| 667 | return EOPNOTSUPP; |
| 668 | |
| 669 | /* if file handles are static len, we can test len immediately */ |
| 670 | if (((pmp->pmp_args.pa_fhflags & PUFFS_FHFLAG_DYNAMIC) == 0) |
| 671 | && ((pmp->pmp_args.pa_fhflags & PUFFS_FHFLAG_PASSTHROUGH) == 0) |
| 672 | && (PUFFS_FROMFHSIZE(*fh_size) < pmp->pmp_args.pa_fhsize)) { |
| 673 | *fh_size = PUFFS_TOFHSIZE(pmp->pmp_args.pa_fhsize); |
| 674 | return E2BIG; |
| 675 | } |
| 676 | |
| 677 | if (pmp->pmp_args.pa_fhflags & PUFFS_FHFLAG_PASSTHROUGH) |
| 678 | fhlen = *fh_size; |
| 679 | else |
| 680 | fhlen = PUFFS_FROMFHSIZE(*fh_size); |
| 681 | |
| 682 | argsize = sizeof(struct puffs_vfsmsg_nodetofh) + fhlen; |
| 683 | puffs_msgmem_alloc(argsize, &park_nodetofh, (void *)&nodetofh_msg, 1); |
| 684 | nodetofh_msg->pvfsr_fhcookie = VPTOPNC(vp); |
| 685 | nodetofh_msg->pvfsr_dsize = fhlen; |
| 686 | puffs_msg_setinfo(park_nodetofh, PUFFSOP_VFS, PUFFS_VFS_VPTOFH, NULL); |
| 687 | |
| 688 | PUFFS_MSG_ENQUEUEWAIT(pmp, park_nodetofh, error); |
| 689 | error = checkerr(pmp, error, __func__); |
| 690 | |
| 691 | if (pmp->pmp_args.pa_fhflags & PUFFS_FHFLAG_PASSTHROUGH) |
| 692 | fhlen = nodetofh_msg->pvfsr_dsize; |
| 693 | else if (pmp->pmp_args.pa_fhflags & PUFFS_FHFLAG_DYNAMIC) |
| 694 | fhlen = PUFFS_TOFHSIZE(nodetofh_msg->pvfsr_dsize); |
| 695 | else |
| 696 | fhlen = PUFFS_TOFHSIZE(pmp->pmp_args.pa_fhsize); |
| 697 | |
| 698 | if (error) { |
| 699 | if (error == E2BIG) |
| 700 | *fh_size = fhlen; |
| 701 | goto out; |
| 702 | } |
| 703 | |
| 704 | if (fhlen > FHANDLE_SIZE_MAX) { |
| 705 | puffs_senderr(pmp, PUFFS_ERR_VPTOFH, E2BIG, |
| 706 | "file handle too big" , VPTOPNC(vp)); |
| 707 | error = EPROTO; |
| 708 | goto out; |
| 709 | } |
| 710 | |
| 711 | if (*fh_size < fhlen) { |
| 712 | *fh_size = fhlen; |
| 713 | error = E2BIG; |
| 714 | goto out; |
| 715 | } |
| 716 | *fh_size = fhlen; |
| 717 | |
| 718 | if (fhp) { |
| 719 | if (pmp->pmp_args.pa_fhflags & PUFFS_FHFLAG_PASSTHROUGH) { |
| 720 | memcpy(fhp, nodetofh_msg->pvfsr_data, fhlen); |
| 721 | } else { |
| 722 | fhp->fid_len = *fh_size; |
| 723 | memcpy(fhp->fid_data, nodetofh_msg->pvfsr_data, |
| 724 | nodetofh_msg->pvfsr_dsize); |
| 725 | } |
| 726 | } |
| 727 | |
| 728 | out: |
| 729 | puffs_msgmem_release(park_nodetofh); |
| 730 | return error; |
| 731 | } |
| 732 | |
| 733 | int |
| 734 | puffs_vfsop_loadvnode(struct mount *mp, struct vnode *vp, |
| 735 | const void *key, size_t key_len, const void **new_key) |
| 736 | { |
| 737 | struct puffs_mount *pmp; |
| 738 | struct puffs_node *pnode; |
| 739 | |
| 740 | KASSERT(key_len == sizeof(puffs_cookie_t)); |
| 741 | |
| 742 | pmp = MPTOPUFFSMP(mp); |
| 743 | |
| 744 | /* Allocate and initialize the pnode. */ |
| 745 | pnode = pool_get(&puffs_pnpool, PR_WAITOK); |
| 746 | memset(pnode, 0, sizeof(struct puffs_node)); |
| 747 | |
| 748 | pnode->pn_vp = vp; |
| 749 | memcpy(&pnode->pn_cookie, key, key_len); |
| 750 | pnode->pn_refcount = 1; |
| 751 | mutex_init(&pnode->pn_mtx, MUTEX_DEFAULT, IPL_NONE); |
| 752 | mutex_init(&pnode->pn_sizemtx, MUTEX_DEFAULT, IPL_NONE); |
| 753 | selinit(&pnode->pn_sel); |
| 754 | vp->v_tag = VT_PUFFS; |
| 755 | vp->v_type = VNON; |
| 756 | vp->v_op = puffs_vnodeop_p; |
| 757 | if (pnode->pn_cookie == pmp->pmp_root_cookie) |
| 758 | vp->v_vflag |= VV_ROOT; |
| 759 | vp->v_data = pnode; |
| 760 | |
| 761 | genfs_node_init(vp, &puffs_genfsops); |
| 762 | uvm_vnp_setsize(vp, 0); |
| 763 | |
| 764 | *new_key = &pnode->pn_cookie; |
| 765 | return 0; |
| 766 | } |
| 767 | |
| 768 | void |
| 769 | puffs_vfsop_init(void) |
| 770 | { |
| 771 | |
| 772 | /* some checks depend on this */ |
| 773 | KASSERT(VNOVAL == VSIZENOTSET); |
| 774 | |
| 775 | pool_init(&puffs_pnpool, sizeof(struct puffs_node), 0, 0, 0, |
| 776 | "puffpnpl" , &pool_allocator_nointr, IPL_NONE); |
| 777 | pool_init(&puffs_vapool, sizeof(struct vattr), 0, 0, 0, |
| 778 | "puffvapl" , &pool_allocator_nointr, IPL_NONE); |
| 779 | puffs_msgif_init(); |
| 780 | } |
| 781 | |
| 782 | void |
| 783 | puffs_vfsop_done(void) |
| 784 | { |
| 785 | |
| 786 | puffs_msgif_destroy(); |
| 787 | pool_destroy(&puffs_pnpool); |
| 788 | pool_destroy(&puffs_vapool); |
| 789 | } |
| 790 | |
| 791 | int |
| 792 | puffs_vfsop_snapshot(struct mount *mp, struct vnode *vp, struct timespec *ts) |
| 793 | { |
| 794 | |
| 795 | return EOPNOTSUPP; |
| 796 | } |
| 797 | |
| 798 | int |
| 799 | puffs_vfsop_extattrctl(struct mount *mp, int cmd, struct vnode *vp, |
| 800 | int attrnamespace, const char *attrname) |
| 801 | { |
| 802 | PUFFS_MSG_VARS(vfs, extattrctl); |
| 803 | struct puffs_mount *pmp = MPTOPUFFSMP(mp); |
| 804 | struct puffs_node *pnp; |
| 805 | puffs_cookie_t pnc; |
| 806 | int error, flags; |
| 807 | |
| 808 | if (vp) { |
| 809 | /* doesn't make sense for puffs servers */ |
| 810 | if (vp->v_mount != mp) |
| 811 | return EXDEV; |
| 812 | pnp = vp->v_data; |
| 813 | pnc = pnp->pn_cookie; |
| 814 | flags = PUFFS_EXTATTRCTL_HASNODE; |
| 815 | } else { |
| 816 | pnp = pnc = NULL; |
| 817 | flags = 0; |
| 818 | } |
| 819 | |
| 820 | PUFFS_MSG_ALLOC(vfs, extattrctl); |
| 821 | extattrctl_msg->pvfsr_cmd = cmd; |
| 822 | extattrctl_msg->pvfsr_attrnamespace = attrnamespace; |
| 823 | extattrctl_msg->pvfsr_flags = flags; |
| 824 | if (attrname) { |
| 825 | strlcpy(extattrctl_msg->pvfsr_attrname, attrname, |
| 826 | sizeof(extattrctl_msg->pvfsr_attrname)); |
| 827 | extattrctl_msg->pvfsr_flags |= PUFFS_EXTATTRCTL_HASATTRNAME; |
| 828 | } |
| 829 | puffs_msg_setinfo(park_extattrctl, |
| 830 | PUFFSOP_VFS, PUFFS_VFS_EXTATTRCTL, pnc); |
| 831 | |
| 832 | puffs_msg_enqueue(pmp, park_extattrctl); |
| 833 | if (vp) { |
| 834 | mutex_enter(&pnp->pn_mtx); |
| 835 | puffs_referencenode(pnp); |
| 836 | mutex_exit(&pnp->pn_mtx); |
| 837 | VOP_UNLOCK(vp); |
| 838 | } |
| 839 | error = puffs_msg_wait2(pmp, park_extattrctl, pnp, NULL); |
| 840 | PUFFS_MSG_RELEASE(extattrctl); |
| 841 | if (vp) { |
| 842 | puffs_releasenode(pnp); |
| 843 | } |
| 844 | |
| 845 | return checkerr(pmp, error, __func__); |
| 846 | } |
| 847 | |
| 848 | const struct vnodeopv_desc * const puffs_vnodeopv_descs[] = { |
| 849 | &puffs_vnodeop_opv_desc, |
| 850 | &puffs_specop_opv_desc, |
| 851 | &puffs_fifoop_opv_desc, |
| 852 | &puffs_msgop_opv_desc, |
| 853 | NULL, |
| 854 | }; |
| 855 | |
| 856 | struct vfsops puffs_vfsops = { |
| 857 | .vfs_name = MOUNT_PUFFS, |
| 858 | .vfs_min_mount_data = sizeof (struct puffs_kargs), |
| 859 | .vfs_mount = puffs_vfsop_mount, |
| 860 | .vfs_start = puffs_vfsop_start, |
| 861 | .vfs_unmount = puffs_vfsop_unmount, |
| 862 | .vfs_root = puffs_vfsop_root, |
| 863 | .vfs_quotactl = (void *)eopnotsupp, |
| 864 | .vfs_statvfs = puffs_vfsop_statvfs, |
| 865 | .vfs_sync = puffs_vfsop_sync, |
| 866 | .vfs_vget = (void *)eopnotsupp, |
| 867 | .vfs_loadvnode = puffs_vfsop_loadvnode, |
| 868 | .vfs_fhtovp = puffs_vfsop_fhtovp, |
| 869 | .vfs_vptofh = puffs_vfsop_vptofh, |
| 870 | .vfs_init = puffs_vfsop_init, |
| 871 | .vfs_done = puffs_vfsop_done, |
| 872 | .vfs_snapshot = puffs_vfsop_snapshot, |
| 873 | .vfs_extattrctl = puffs_vfsop_extattrctl, |
| 874 | .vfs_suspendctl = (void *)eopnotsupp, |
| 875 | .vfs_renamelock_enter = genfs_renamelock_enter, |
| 876 | .vfs_renamelock_exit = genfs_renamelock_exit, |
| 877 | .vfs_fsync = (void *)eopnotsupp, |
| 878 | .vfs_opv_descs = puffs_vnodeopv_descs |
| 879 | }; |
| 880 | |
| 881 | static int |
| 882 | puffs_modcmd(modcmd_t cmd, void *arg) |
| 883 | { |
| 884 | |
| 885 | switch (cmd) { |
| 886 | case MODULE_CMD_INIT: |
| 887 | return vfs_attach(&puffs_vfsops); |
| 888 | case MODULE_CMD_FINI: |
| 889 | return vfs_detach(&puffs_vfsops); |
| 890 | default: |
| 891 | return ENOTTY; |
| 892 | } |
| 893 | } |
| 894 | |