| 1 | /* $NetBSD: ufs_inode.c,v 1.97 2016/10/28 20:38:12 jdolecek Exp $ */ |
| 2 | |
| 3 | /* |
| 4 | * Copyright (c) 1991, 1993 |
| 5 | * The Regents of the University of California. All rights reserved. |
| 6 | * (c) UNIX System Laboratories, Inc. |
| 7 | * All or some portions of this file are derived from material licensed |
| 8 | * to the University of California by American Telephone and Telegraph |
| 9 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with |
| 10 | * the permission of UNIX System Laboratories, Inc. |
| 11 | * |
| 12 | * Redistribution and use in source and binary forms, with or without |
| 13 | * modification, are permitted provided that the following conditions |
| 14 | * are met: |
| 15 | * 1. Redistributions of source code must retain the above copyright |
| 16 | * notice, this list of conditions and the following disclaimer. |
| 17 | * 2. Redistributions in binary form must reproduce the above copyright |
| 18 | * notice, this list of conditions and the following disclaimer in the |
| 19 | * documentation and/or other materials provided with the distribution. |
| 20 | * 3. Neither the name of the University nor the names of its contributors |
| 21 | * may be used to endorse or promote products derived from this software |
| 22 | * without specific prior written permission. |
| 23 | * |
| 24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 34 | * SUCH DAMAGE. |
| 35 | * |
| 36 | * @(#)ufs_inode.c 8.9 (Berkeley) 5/14/95 |
| 37 | */ |
| 38 | |
| 39 | #include <sys/cdefs.h> |
| 40 | __KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.97 2016/10/28 20:38:12 jdolecek Exp $" ); |
| 41 | |
| 42 | #if defined(_KERNEL_OPT) |
| 43 | #include "opt_ffs.h" |
| 44 | #include "opt_quota.h" |
| 45 | #include "opt_wapbl.h" |
| 46 | #endif |
| 47 | |
| 48 | #include <sys/param.h> |
| 49 | #include <sys/systm.h> |
| 50 | #include <sys/proc.h> |
| 51 | #include <sys/vnode.h> |
| 52 | #include <sys/mount.h> |
| 53 | #include <sys/kernel.h> |
| 54 | #include <sys/namei.h> |
| 55 | #include <sys/kauth.h> |
| 56 | #include <sys/wapbl.h> |
| 57 | #include <sys/fstrans.h> |
| 58 | #include <sys/kmem.h> |
| 59 | |
| 60 | #include <ufs/ufs/inode.h> |
| 61 | #include <ufs/ufs/ufsmount.h> |
| 62 | #include <ufs/ufs/ufs_extern.h> |
| 63 | #include <ufs/ufs/ufs_wapbl.h> |
| 64 | #ifdef UFS_DIRHASH |
| 65 | #include <ufs/ufs/dirhash.h> |
| 66 | #endif |
| 67 | #ifdef UFS_EXTATTR |
| 68 | #include <ufs/ufs/extattr.h> |
| 69 | #endif |
| 70 | |
| 71 | #include <uvm/uvm.h> |
| 72 | |
| 73 | extern int prtactive; |
| 74 | |
| 75 | /* |
| 76 | * Last reference to an inode. If necessary, write or delete it. |
| 77 | */ |
| 78 | int |
| 79 | ufs_inactive(void *v) |
| 80 | { |
| 81 | struct vop_inactive_args /* { |
| 82 | struct vnode *a_vp; |
| 83 | struct bool *a_recycle; |
| 84 | } */ *ap = v; |
| 85 | struct vnode *vp = ap->a_vp; |
| 86 | struct inode *ip = VTOI(vp); |
| 87 | struct mount *mp = vp->v_mount; |
| 88 | mode_t mode; |
| 89 | int allerror = 0, error; |
| 90 | bool wapbl_locked = false; |
| 91 | |
| 92 | UFS_WAPBL_JUNLOCK_ASSERT(mp); |
| 93 | |
| 94 | fstrans_start(mp, FSTRANS_LAZY); |
| 95 | |
| 96 | /* |
| 97 | * Ignore inodes related to stale file handles. |
| 98 | */ |
| 99 | if (ip->i_mode == 0) |
| 100 | goto out; |
| 101 | |
| 102 | if (ip->i_nlink <= 0 && (mp->mnt_flag & MNT_RDONLY) == 0) { |
| 103 | #ifdef UFS_EXTATTR |
| 104 | ufs_extattr_vnode_inactive(vp, curlwp); |
| 105 | #endif |
| 106 | |
| 107 | /* |
| 108 | * All file blocks must be freed before we can let the vnode |
| 109 | * be reclaimed, so can't postpone full truncating any further. |
| 110 | */ |
| 111 | if (ip->i_size != 0) { |
| 112 | allerror = ufs_truncate_retry(vp, 0, NOCRED); |
| 113 | if (allerror) |
| 114 | goto out; |
| 115 | } |
| 116 | |
| 117 | #if defined(QUOTA) || defined(QUOTA2) |
| 118 | error = UFS_WAPBL_BEGIN(mp); |
| 119 | if (error) { |
| 120 | allerror = error; |
| 121 | } else { |
| 122 | wapbl_locked = true; |
| 123 | (void)chkiq(ip, -1, NOCRED, 0); |
| 124 | } |
| 125 | #endif |
| 126 | DIP_ASSIGN(ip, rdev, 0); |
| 127 | mode = ip->i_mode; |
| 128 | ip->i_mode = 0; |
| 129 | ip->i_omode = mode; |
| 130 | DIP_ASSIGN(ip, mode, 0); |
| 131 | ip->i_flag |= IN_CHANGE | IN_UPDATE; |
| 132 | /* |
| 133 | * Defer final inode free and update to ufs_reclaim(). |
| 134 | */ |
| 135 | } |
| 136 | |
| 137 | if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) { |
| 138 | if (! wapbl_locked) { |
| 139 | error = UFS_WAPBL_BEGIN(mp); |
| 140 | if (error) { |
| 141 | allerror = error; |
| 142 | goto out; |
| 143 | } |
| 144 | wapbl_locked = true; |
| 145 | } |
| 146 | UFS_UPDATE(vp, NULL, NULL, 0); |
| 147 | } |
| 148 | out: |
| 149 | if (wapbl_locked) |
| 150 | UFS_WAPBL_END(mp); |
| 151 | /* |
| 152 | * If we are done with the inode, reclaim it |
| 153 | * so that it can be reused immediately. |
| 154 | */ |
| 155 | *ap->a_recycle = (ip->i_mode == 0); |
| 156 | VOP_UNLOCK(vp); |
| 157 | fstrans_done(mp); |
| 158 | return (allerror); |
| 159 | } |
| 160 | |
| 161 | /* |
| 162 | * Reclaim an inode so that it can be used for other purposes. |
| 163 | */ |
| 164 | int |
| 165 | ufs_reclaim(struct vnode *vp) |
| 166 | { |
| 167 | struct inode *ip = VTOI(vp); |
| 168 | |
| 169 | if (prtactive && vp->v_usecount > 1) |
| 170 | vprint("ufs_reclaim: pushing active" , vp); |
| 171 | |
| 172 | if (!UFS_WAPBL_BEGIN(vp->v_mount)) { |
| 173 | UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE); |
| 174 | UFS_WAPBL_END(vp->v_mount); |
| 175 | } |
| 176 | UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE); |
| 177 | |
| 178 | if (ip->i_devvp) { |
| 179 | vrele(ip->i_devvp); |
| 180 | ip->i_devvp = 0; |
| 181 | } |
| 182 | #if defined(QUOTA) || defined(QUOTA2) |
| 183 | ufsquota_free(ip); |
| 184 | #endif |
| 185 | #ifdef UFS_DIRHASH |
| 186 | if (ip->i_dirhash != NULL) |
| 187 | ufsdirhash_free(ip); |
| 188 | #endif |
| 189 | return (0); |
| 190 | } |
| 191 | |
| 192 | /* |
| 193 | * allocate a range of blocks in a file. |
| 194 | * after this function returns, any page entirely contained within the range |
| 195 | * will map to invalid data and thus must be overwritten before it is made |
| 196 | * accessible to others. |
| 197 | */ |
| 198 | |
| 199 | int |
| 200 | ufs_balloc_range(struct vnode *vp, off_t off, off_t len, kauth_cred_t cred, |
| 201 | int flags) |
| 202 | { |
| 203 | off_t neweof; /* file size after the operation */ |
| 204 | off_t neweob; /* offset next to the last block after the operation */ |
| 205 | off_t pagestart; /* starting offset of range covered by pgs */ |
| 206 | off_t eob; /* offset next to allocated blocks */ |
| 207 | struct uvm_object *uobj; |
| 208 | int i, delta, error, npages; |
| 209 | int bshift = vp->v_mount->mnt_fs_bshift; |
| 210 | int bsize = 1 << bshift; |
| 211 | int ppb = MAX(bsize >> PAGE_SHIFT, 1); |
| 212 | struct vm_page **pgs; |
| 213 | size_t pgssize; |
| 214 | UVMHIST_FUNC("ufs_balloc_range" ); UVMHIST_CALLED(ubchist); |
| 215 | UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x" , |
| 216 | vp, off, len, vp->v_size); |
| 217 | |
| 218 | neweof = MAX(vp->v_size, off + len); |
| 219 | GOP_SIZE(vp, neweof, &neweob, 0); |
| 220 | |
| 221 | error = 0; |
| 222 | uobj = &vp->v_uobj; |
| 223 | |
| 224 | /* |
| 225 | * read or create pages covering the range of the allocation and |
| 226 | * keep them locked until the new block is allocated, so there |
| 227 | * will be no window where the old contents of the new block are |
| 228 | * visible to racing threads. |
| 229 | */ |
| 230 | |
| 231 | pagestart = trunc_page(off) & ~(bsize - 1); |
| 232 | npages = MIN(ppb, (round_page(neweob) - pagestart) >> PAGE_SHIFT); |
| 233 | pgssize = npages * sizeof(struct vm_page *); |
| 234 | pgs = kmem_zalloc(pgssize, KM_SLEEP); |
| 235 | |
| 236 | /* |
| 237 | * adjust off to be block-aligned. |
| 238 | */ |
| 239 | |
| 240 | delta = off & (bsize - 1); |
| 241 | off -= delta; |
| 242 | len += delta; |
| 243 | |
| 244 | genfs_node_wrlock(vp); |
| 245 | mutex_enter(uobj->vmobjlock); |
| 246 | error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0, |
| 247 | VM_PROT_WRITE, 0, PGO_SYNCIO | PGO_PASTEOF | PGO_NOBLOCKALLOC | |
| 248 | PGO_NOTIMESTAMP | PGO_GLOCKHELD); |
| 249 | if (error) { |
| 250 | genfs_node_unlock(vp); |
| 251 | goto out; |
| 252 | } |
| 253 | |
| 254 | /* |
| 255 | * now allocate the range. |
| 256 | */ |
| 257 | |
| 258 | error = GOP_ALLOC(vp, off, len, flags, cred); |
| 259 | genfs_node_unlock(vp); |
| 260 | |
| 261 | /* |
| 262 | * if the allocation succeeded, clear PG_CLEAN on all the pages |
| 263 | * and clear PG_RDONLY on any pages that are now fully backed |
| 264 | * by disk blocks. if the allocation failed, we do not invalidate |
| 265 | * the pages since they might have already existed and been dirty, |
| 266 | * in which case we need to keep them around. if we created the pages, |
| 267 | * they will be clean and read-only, and leaving such pages |
| 268 | * in the cache won't cause any problems. |
| 269 | */ |
| 270 | |
| 271 | GOP_SIZE(vp, off + len, &eob, 0); |
| 272 | mutex_enter(uobj->vmobjlock); |
| 273 | mutex_enter(&uvm_pageqlock); |
| 274 | for (i = 0; i < npages; i++) { |
| 275 | KASSERT((pgs[i]->flags & PG_RELEASED) == 0); |
| 276 | if (!error) { |
| 277 | if (off <= pagestart + (i << PAGE_SHIFT) && |
| 278 | pagestart + ((i + 1) << PAGE_SHIFT) <= eob) { |
| 279 | pgs[i]->flags &= ~PG_RDONLY; |
| 280 | } |
| 281 | pgs[i]->flags &= ~PG_CLEAN; |
| 282 | } |
| 283 | uvm_pageactivate(pgs[i]); |
| 284 | } |
| 285 | mutex_exit(&uvm_pageqlock); |
| 286 | uvm_page_unbusy(pgs, npages); |
| 287 | mutex_exit(uobj->vmobjlock); |
| 288 | |
| 289 | out: |
| 290 | kmem_free(pgs, pgssize); |
| 291 | return error; |
| 292 | } |
| 293 | |
| 294 | int |
| 295 | ufs_truncate_retry(struct vnode *vp, uint64_t newsize, kauth_cred_t cred) |
| 296 | { |
| 297 | struct inode *ip = VTOI(vp); |
| 298 | struct mount *mp = vp->v_mount; |
| 299 | int error = 0; |
| 300 | |
| 301 | UFS_WAPBL_JUNLOCK_ASSERT(mp); |
| 302 | |
| 303 | /* |
| 304 | * Truncate might temporarily fail, loop until done. |
| 305 | */ |
| 306 | while (ip->i_size != newsize) { |
| 307 | error = UFS_WAPBL_BEGIN(mp); |
| 308 | if (error) |
| 309 | goto out; |
| 310 | |
| 311 | error = UFS_TRUNCATE(vp, newsize, 0, cred); |
| 312 | UFS_WAPBL_END(mp); |
| 313 | |
| 314 | if (error != 0 && error != EAGAIN) |
| 315 | goto out; |
| 316 | } |
| 317 | |
| 318 | out: |
| 319 | return error; |
| 320 | } |
| 321 | |