| 1 | /* $NetBSD: tmpfs_vfsops.c,v 1.68 2016/08/26 21:44:24 dholland Exp $ */ |
| 2 | |
| 3 | /* |
| 4 | * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. |
| 5 | * All rights reserved. |
| 6 | * |
| 7 | * This code is derived from software contributed to The NetBSD Foundation |
| 8 | * by Julio M. Merino Vidal, developed as part of Google's Summer of Code |
| 9 | * 2005 program. |
| 10 | * |
| 11 | * Redistribution and use in source and binary forms, with or without |
| 12 | * modification, are permitted provided that the following conditions |
| 13 | * are met: |
| 14 | * 1. Redistributions of source code must retain the above copyright |
| 15 | * notice, this list of conditions and the following disclaimer. |
| 16 | * 2. Redistributions in binary form must reproduce the above copyright |
| 17 | * notice, this list of conditions and the following disclaimer in the |
| 18 | * documentation and/or other materials provided with the distribution. |
| 19 | * |
| 20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
| 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
| 22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
| 24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 30 | * POSSIBILITY OF SUCH DAMAGE. |
| 31 | */ |
| 32 | |
| 33 | /* |
| 34 | * Efficient memory file system. |
| 35 | * |
| 36 | * tmpfs is a file system that uses NetBSD's virtual memory sub-system |
| 37 | * (the well-known UVM) to store file data and metadata in an efficient |
| 38 | * way. This means that it does not follow the structure of an on-disk |
| 39 | * file system because it simply does not need to. Instead, it uses |
| 40 | * memory-specific data structures and algorithms to automatically |
| 41 | * allocate and release resources. |
| 42 | */ |
| 43 | |
| 44 | #include <sys/cdefs.h> |
| 45 | __KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.68 2016/08/26 21:44:24 dholland Exp $" ); |
| 46 | |
| 47 | #include <sys/param.h> |
| 48 | #include <sys/atomic.h> |
| 49 | #include <sys/types.h> |
| 50 | #include <sys/kmem.h> |
| 51 | #include <sys/mount.h> |
| 52 | #include <sys/stat.h> |
| 53 | #include <sys/systm.h> |
| 54 | #include <sys/vnode.h> |
| 55 | #include <sys/kauth.h> |
| 56 | #include <sys/module.h> |
| 57 | |
| 58 | #include <miscfs/genfs/genfs.h> |
| 59 | #include <fs/tmpfs/tmpfs.h> |
| 60 | #include <fs/tmpfs/tmpfs_args.h> |
| 61 | |
| 62 | MODULE(MODULE_CLASS_VFS, tmpfs, NULL); |
| 63 | |
| 64 | struct pool tmpfs_dirent_pool; |
| 65 | struct pool tmpfs_node_pool; |
| 66 | |
| 67 | void |
| 68 | tmpfs_init(void) |
| 69 | { |
| 70 | |
| 71 | pool_init(&tmpfs_dirent_pool, sizeof(tmpfs_dirent_t), 0, 0, 0, |
| 72 | "tmpfs_dirent" , &pool_allocator_nointr, IPL_NONE); |
| 73 | pool_init(&tmpfs_node_pool, sizeof(tmpfs_node_t), 0, 0, 0, |
| 74 | "tmpfs_node" , &pool_allocator_nointr, IPL_NONE); |
| 75 | } |
| 76 | |
| 77 | void |
| 78 | tmpfs_done(void) |
| 79 | { |
| 80 | |
| 81 | pool_destroy(&tmpfs_dirent_pool); |
| 82 | pool_destroy(&tmpfs_node_pool); |
| 83 | } |
| 84 | |
| 85 | int |
| 86 | tmpfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) |
| 87 | { |
| 88 | struct tmpfs_args *args = data; |
| 89 | tmpfs_mount_t *tmp; |
| 90 | tmpfs_node_t *root; |
| 91 | struct vattr va; |
| 92 | struct vnode *vp; |
| 93 | uint64_t memlimit; |
| 94 | ino_t nodes; |
| 95 | int error; |
| 96 | bool set_memlimit; |
| 97 | bool set_nodes; |
| 98 | |
| 99 | if (args == NULL) |
| 100 | return EINVAL; |
| 101 | |
| 102 | /* Validate the version. */ |
| 103 | if (*data_len < sizeof(*args) || |
| 104 | args->ta_version != TMPFS_ARGS_VERSION) |
| 105 | return EINVAL; |
| 106 | |
| 107 | /* Handle retrieval of mount point arguments. */ |
| 108 | if (mp->mnt_flag & MNT_GETARGS) { |
| 109 | if (mp->mnt_data == NULL) |
| 110 | return EIO; |
| 111 | tmp = VFS_TO_TMPFS(mp); |
| 112 | |
| 113 | args->ta_version = TMPFS_ARGS_VERSION; |
| 114 | args->ta_nodes_max = tmp->tm_nodes_max; |
| 115 | args->ta_size_max = tmp->tm_mem_limit; |
| 116 | |
| 117 | root = tmp->tm_root; |
| 118 | args->ta_root_uid = root->tn_uid; |
| 119 | args->ta_root_gid = root->tn_gid; |
| 120 | args->ta_root_mode = root->tn_mode; |
| 121 | |
| 122 | *data_len = sizeof(*args); |
| 123 | return 0; |
| 124 | } |
| 125 | |
| 126 | |
| 127 | /* Prohibit mounts if there is not enough memory. */ |
| 128 | if (tmpfs_mem_info(true) < uvmexp.freetarg) |
| 129 | return EINVAL; |
| 130 | |
| 131 | /* Check for invalid uid and gid arguments */ |
| 132 | if (args->ta_root_uid == VNOVAL || args->ta_root_gid == VNOVAL) |
| 133 | return EINVAL; |
| 134 | |
| 135 | /* This can never happen? */ |
| 136 | if ((args->ta_root_mode & ALLPERMS) == VNOVAL) |
| 137 | return EINVAL; |
| 138 | |
| 139 | /* Get the memory usage limit for this file-system. */ |
| 140 | if (args->ta_size_max < PAGE_SIZE) { |
| 141 | memlimit = UINT64_MAX; |
| 142 | set_memlimit = false; |
| 143 | } else { |
| 144 | memlimit = args->ta_size_max; |
| 145 | set_memlimit = true; |
| 146 | } |
| 147 | KASSERT(memlimit > 0); |
| 148 | |
| 149 | if (args->ta_nodes_max <= 3) { |
| 150 | nodes = 3 + (memlimit / 1024); |
| 151 | set_nodes = false; |
| 152 | } else { |
| 153 | nodes = args->ta_nodes_max; |
| 154 | set_nodes = true; |
| 155 | } |
| 156 | nodes = MIN(nodes, INT_MAX); |
| 157 | KASSERT(nodes >= 3); |
| 158 | |
| 159 | if (mp->mnt_flag & MNT_UPDATE) { |
| 160 | tmp = VFS_TO_TMPFS(mp); |
| 161 | if (set_nodes && nodes < tmp->tm_nodes_cnt) |
| 162 | return EBUSY; |
| 163 | if (set_memlimit) { |
| 164 | if ((error = tmpfs_mntmem_set(tmp, memlimit)) != 0) |
| 165 | return error; |
| 166 | } |
| 167 | if (set_nodes) |
| 168 | tmp->tm_nodes_max = nodes; |
| 169 | root = tmp->tm_root; |
| 170 | root->tn_uid = args->ta_root_uid; |
| 171 | root->tn_gid = args->ta_root_gid; |
| 172 | root->tn_mode = args->ta_root_mode; |
| 173 | return 0; |
| 174 | } |
| 175 | |
| 176 | /* Allocate the tmpfs mount structure and fill it. */ |
| 177 | tmp = kmem_zalloc(sizeof(tmpfs_mount_t), KM_SLEEP); |
| 178 | if (tmp == NULL) |
| 179 | return ENOMEM; |
| 180 | |
| 181 | tmp->tm_nodes_max = nodes; |
| 182 | tmp->tm_nodes_cnt = 0; |
| 183 | LIST_INIT(&tmp->tm_nodes); |
| 184 | |
| 185 | mutex_init(&tmp->tm_lock, MUTEX_DEFAULT, IPL_NONE); |
| 186 | tmpfs_mntmem_init(tmp, memlimit); |
| 187 | mp->mnt_data = tmp; |
| 188 | |
| 189 | /* Allocate the root node. */ |
| 190 | vattr_null(&va); |
| 191 | va.va_type = VDIR; |
| 192 | va.va_mode = args->ta_root_mode & ALLPERMS; |
| 193 | va.va_uid = args->ta_root_uid; |
| 194 | va.va_gid = args->ta_root_gid; |
| 195 | error = vcache_new(mp, NULL, &va, NOCRED, &vp); |
| 196 | if (error) { |
| 197 | mp->mnt_data = NULL; |
| 198 | tmpfs_mntmem_destroy(tmp); |
| 199 | mutex_destroy(&tmp->tm_lock); |
| 200 | kmem_free(tmp, sizeof(*tmp)); |
| 201 | return error; |
| 202 | } |
| 203 | KASSERT(vp != NULL); |
| 204 | root = VP_TO_TMPFS_NODE(vp); |
| 205 | KASSERT(root != NULL); |
| 206 | |
| 207 | /* |
| 208 | * Parent of the root inode is itself. Also, root inode has no |
| 209 | * directory entry (i.e. is never attached), thus hold an extra |
| 210 | * reference (link) for it. |
| 211 | */ |
| 212 | root->tn_links++; |
| 213 | root->tn_spec.tn_dir.tn_parent = root; |
| 214 | tmp->tm_root = root; |
| 215 | vrele(vp); |
| 216 | |
| 217 | mp->mnt_flag |= MNT_LOCAL; |
| 218 | mp->mnt_stat.f_namemax = TMPFS_MAXNAMLEN; |
| 219 | mp->mnt_fs_bshift = PAGE_SHIFT; |
| 220 | mp->mnt_dev_bshift = DEV_BSHIFT; |
| 221 | mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO; |
| 222 | vfs_getnewfsid(mp); |
| 223 | |
| 224 | error = set_statvfs_info(path, UIO_USERSPACE, "tmpfs" , UIO_SYSSPACE, |
| 225 | mp->mnt_op->vfs_name, mp, curlwp); |
| 226 | if (error) { |
| 227 | (void)tmpfs_unmount(mp, MNT_FORCE); |
| 228 | } |
| 229 | return error; |
| 230 | } |
| 231 | |
| 232 | int |
| 233 | tmpfs_start(struct mount *mp, int flags) |
| 234 | { |
| 235 | |
| 236 | return 0; |
| 237 | } |
| 238 | |
| 239 | int |
| 240 | tmpfs_unmount(struct mount *mp, int mntflags) |
| 241 | { |
| 242 | tmpfs_mount_t *tmp = VFS_TO_TMPFS(mp); |
| 243 | tmpfs_node_t *node, *cnode; |
| 244 | int error, flags = 0; |
| 245 | |
| 246 | /* Handle forced unmounts. */ |
| 247 | if (mntflags & MNT_FORCE) |
| 248 | flags |= FORCECLOSE; |
| 249 | |
| 250 | /* Finalize all pending I/O. */ |
| 251 | error = vflush(mp, NULL, flags); |
| 252 | if (error != 0) |
| 253 | return error; |
| 254 | |
| 255 | /* |
| 256 | * First round, detach and destroy all directory entries. |
| 257 | * Also, clear the pointers to the vnodes - they are gone. |
| 258 | */ |
| 259 | LIST_FOREACH(node, &tmp->tm_nodes, tn_entries) { |
| 260 | tmpfs_dirent_t *de; |
| 261 | |
| 262 | node->tn_vnode = NULL; |
| 263 | if (node->tn_type != VDIR) { |
| 264 | continue; |
| 265 | } |
| 266 | while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) { |
| 267 | cnode = de->td_node; |
| 268 | if (cnode && cnode != TMPFS_NODE_WHITEOUT) { |
| 269 | cnode->tn_vnode = NULL; |
| 270 | } |
| 271 | tmpfs_dir_detach(node, de); |
| 272 | tmpfs_free_dirent(tmp, de); |
| 273 | } |
| 274 | /* Extra virtual entry (itself for the root). */ |
| 275 | node->tn_links--; |
| 276 | } |
| 277 | |
| 278 | /* Release the reference on root (diagnostic). */ |
| 279 | node = tmp->tm_root; |
| 280 | node->tn_links--; |
| 281 | |
| 282 | /* Second round, destroy all inodes. */ |
| 283 | while ((node = LIST_FIRST(&tmp->tm_nodes)) != NULL) { |
| 284 | tmpfs_free_node(tmp, node); |
| 285 | } |
| 286 | |
| 287 | /* Throw away the tmpfs_mount structure. */ |
| 288 | tmpfs_mntmem_destroy(tmp); |
| 289 | mutex_destroy(&tmp->tm_lock); |
| 290 | kmem_free(tmp, sizeof(*tmp)); |
| 291 | mp->mnt_data = NULL; |
| 292 | |
| 293 | return 0; |
| 294 | } |
| 295 | |
| 296 | int |
| 297 | tmpfs_root(struct mount *mp, vnode_t **vpp) |
| 298 | { |
| 299 | tmpfs_node_t *node = VFS_TO_TMPFS(mp)->tm_root; |
| 300 | int error; |
| 301 | |
| 302 | error = vcache_get(mp, &node, sizeof(node), vpp); |
| 303 | if (error) |
| 304 | return error; |
| 305 | error = vn_lock(*vpp, LK_EXCLUSIVE); |
| 306 | if (error) { |
| 307 | vrele(*vpp); |
| 308 | *vpp = NULL; |
| 309 | return error; |
| 310 | } |
| 311 | |
| 312 | return 0; |
| 313 | } |
| 314 | |
| 315 | int |
| 316 | tmpfs_vget(struct mount *mp, ino_t ino, vnode_t **vpp) |
| 317 | { |
| 318 | |
| 319 | return EOPNOTSUPP; |
| 320 | } |
| 321 | |
| 322 | int |
| 323 | tmpfs_fhtovp(struct mount *mp, struct fid *fhp, vnode_t **vpp) |
| 324 | { |
| 325 | tmpfs_mount_t *tmp = VFS_TO_TMPFS(mp); |
| 326 | tmpfs_node_t *node; |
| 327 | tmpfs_fid_t tfh; |
| 328 | int error; |
| 329 | |
| 330 | if (fhp->fid_len != sizeof(tmpfs_fid_t)) { |
| 331 | return EINVAL; |
| 332 | } |
| 333 | memcpy(&tfh, fhp, sizeof(tmpfs_fid_t)); |
| 334 | |
| 335 | mutex_enter(&tmp->tm_lock); |
| 336 | LIST_FOREACH(node, &tmp->tm_nodes, tn_entries) { |
| 337 | if (node->tn_id == tfh.tf_id) { |
| 338 | /* Prevent this node from disappearing. */ |
| 339 | atomic_inc_32(&node->tn_holdcount); |
| 340 | break; |
| 341 | } |
| 342 | } |
| 343 | mutex_exit(&tmp->tm_lock); |
| 344 | if (node == NULL) |
| 345 | return ESTALE; |
| 346 | |
| 347 | error = vcache_get(mp, &node, sizeof(node), vpp); |
| 348 | /* If this node has been reclaimed free it now. */ |
| 349 | if (atomic_dec_32_nv(&node->tn_holdcount) == TMPFS_NODE_RECLAIMED) { |
| 350 | KASSERT(error != 0); |
| 351 | tmpfs_free_node(tmp, node); |
| 352 | } |
| 353 | if (error) |
| 354 | return (error == ENOENT ? ESTALE : error); |
| 355 | error = vn_lock(*vpp, LK_EXCLUSIVE); |
| 356 | if (error) { |
| 357 | vrele(*vpp); |
| 358 | *vpp = NULL; |
| 359 | return error; |
| 360 | } |
| 361 | if (TMPFS_NODE_GEN(node) != tfh.tf_gen) { |
| 362 | vput(*vpp); |
| 363 | *vpp = NULL; |
| 364 | return ESTALE; |
| 365 | } |
| 366 | |
| 367 | return 0; |
| 368 | } |
| 369 | |
| 370 | int |
| 371 | tmpfs_vptofh(vnode_t *vp, struct fid *fhp, size_t *fh_size) |
| 372 | { |
| 373 | tmpfs_fid_t tfh; |
| 374 | tmpfs_node_t *node; |
| 375 | |
| 376 | if (*fh_size < sizeof(tmpfs_fid_t)) { |
| 377 | *fh_size = sizeof(tmpfs_fid_t); |
| 378 | return E2BIG; |
| 379 | } |
| 380 | *fh_size = sizeof(tmpfs_fid_t); |
| 381 | node = VP_TO_TMPFS_NODE(vp); |
| 382 | |
| 383 | memset(&tfh, 0, sizeof(tfh)); |
| 384 | tfh.tf_len = sizeof(tmpfs_fid_t); |
| 385 | tfh.tf_gen = TMPFS_NODE_GEN(node); |
| 386 | tfh.tf_id = node->tn_id; |
| 387 | memcpy(fhp, &tfh, sizeof(tfh)); |
| 388 | |
| 389 | return 0; |
| 390 | } |
| 391 | |
| 392 | int |
| 393 | tmpfs_statvfs(struct mount *mp, struct statvfs *sbp) |
| 394 | { |
| 395 | tmpfs_mount_t *tmp; |
| 396 | fsfilcnt_t freenodes; |
| 397 | size_t avail; |
| 398 | |
| 399 | tmp = VFS_TO_TMPFS(mp); |
| 400 | |
| 401 | sbp->f_iosize = sbp->f_frsize = sbp->f_bsize = PAGE_SIZE; |
| 402 | |
| 403 | mutex_enter(&tmp->tm_acc_lock); |
| 404 | avail = tmpfs_pages_avail(tmp); |
| 405 | sbp->f_blocks = (tmpfs_bytes_max(tmp) >> PAGE_SHIFT); |
| 406 | sbp->f_bavail = sbp->f_bfree = avail; |
| 407 | sbp->f_bresvd = 0; |
| 408 | |
| 409 | freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_cnt, |
| 410 | avail * PAGE_SIZE / sizeof(tmpfs_node_t)); |
| 411 | |
| 412 | sbp->f_files = tmp->tm_nodes_cnt + freenodes; |
| 413 | sbp->f_favail = sbp->f_ffree = freenodes; |
| 414 | sbp->f_fresvd = 0; |
| 415 | mutex_exit(&tmp->tm_acc_lock); |
| 416 | |
| 417 | copy_statvfs_info(sbp, mp); |
| 418 | |
| 419 | return 0; |
| 420 | } |
| 421 | |
| 422 | int |
| 423 | tmpfs_sync(struct mount *mp, int waitfor, kauth_cred_t uc) |
| 424 | { |
| 425 | |
| 426 | return 0; |
| 427 | } |
| 428 | |
| 429 | int |
| 430 | tmpfs_snapshot(struct mount *mp, vnode_t *vp, struct timespec *ctime) |
| 431 | { |
| 432 | |
| 433 | return EOPNOTSUPP; |
| 434 | } |
| 435 | |
| 436 | /* |
| 437 | * tmpfs vfs operations. |
| 438 | */ |
| 439 | |
| 440 | extern const struct vnodeopv_desc tmpfs_fifoop_opv_desc; |
| 441 | extern const struct vnodeopv_desc tmpfs_specop_opv_desc; |
| 442 | extern const struct vnodeopv_desc tmpfs_vnodeop_opv_desc; |
| 443 | |
| 444 | const struct vnodeopv_desc * const tmpfs_vnodeopv_descs[] = { |
| 445 | &tmpfs_fifoop_opv_desc, |
| 446 | &tmpfs_specop_opv_desc, |
| 447 | &tmpfs_vnodeop_opv_desc, |
| 448 | NULL, |
| 449 | }; |
| 450 | |
| 451 | struct vfsops tmpfs_vfsops = { |
| 452 | .vfs_name = MOUNT_TMPFS, |
| 453 | .vfs_min_mount_data = sizeof (struct tmpfs_args), |
| 454 | .vfs_mount = tmpfs_mount, |
| 455 | .vfs_start = tmpfs_start, |
| 456 | .vfs_unmount = tmpfs_unmount, |
| 457 | .vfs_root = tmpfs_root, |
| 458 | .vfs_quotactl = (void *)eopnotsupp, |
| 459 | .vfs_statvfs = tmpfs_statvfs, |
| 460 | .vfs_sync = tmpfs_sync, |
| 461 | .vfs_vget = tmpfs_vget, |
| 462 | .vfs_loadvnode = tmpfs_loadvnode, |
| 463 | .vfs_newvnode = tmpfs_newvnode, |
| 464 | .vfs_fhtovp = tmpfs_fhtovp, |
| 465 | .vfs_vptofh = tmpfs_vptofh, |
| 466 | .vfs_init = tmpfs_init, |
| 467 | .vfs_done = tmpfs_done, |
| 468 | .vfs_snapshot = tmpfs_snapshot, |
| 469 | .vfs_extattrctl = vfs_stdextattrctl, |
| 470 | .vfs_suspendctl = (void *)eopnotsupp, |
| 471 | .vfs_renamelock_enter = genfs_renamelock_enter, |
| 472 | .vfs_renamelock_exit = genfs_renamelock_exit, |
| 473 | .vfs_fsync = (void *)eopnotsupp, |
| 474 | .vfs_opv_descs = tmpfs_vnodeopv_descs |
| 475 | }; |
| 476 | |
| 477 | static int |
| 478 | tmpfs_modcmd(modcmd_t cmd, void *arg) |
| 479 | { |
| 480 | |
| 481 | switch (cmd) { |
| 482 | case MODULE_CMD_INIT: |
| 483 | return vfs_attach(&tmpfs_vfsops); |
| 484 | case MODULE_CMD_FINI: |
| 485 | return vfs_detach(&tmpfs_vfsops); |
| 486 | default: |
| 487 | return ENOTTY; |
| 488 | } |
| 489 | } |
| 490 | |