| 1 | /* $NetBSD: lfs_balloc.c,v 1.91 2016/08/07 02:42:32 dholland Exp $ */ |
| 2 | |
| 3 | /*- |
| 4 | * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. |
| 5 | * All rights reserved. |
| 6 | * |
| 7 | * This code is derived from software contributed to The NetBSD Foundation |
| 8 | * by Konrad E. Schroder <perseant@hhhh.org>. |
| 9 | * |
| 10 | * Redistribution and use in source and binary forms, with or without |
| 11 | * modification, are permitted provided that the following conditions |
| 12 | * are met: |
| 13 | * 1. Redistributions of source code must retain the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer. |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright |
| 16 | * notice, this list of conditions and the following disclaimer in the |
| 17 | * documentation and/or other materials provided with the distribution. |
| 18 | * |
| 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
| 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
| 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
| 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 29 | * POSSIBILITY OF SUCH DAMAGE. |
| 30 | */ |
| 31 | /* |
| 32 | * Copyright (c) 1989, 1991, 1993 |
| 33 | * The Regents of the University of California. All rights reserved. |
| 34 | * |
| 35 | * Redistribution and use in source and binary forms, with or without |
| 36 | * modification, are permitted provided that the following conditions |
| 37 | * are met: |
| 38 | * 1. Redistributions of source code must retain the above copyright |
| 39 | * notice, this list of conditions and the following disclaimer. |
| 40 | * 2. Redistributions in binary form must reproduce the above copyright |
| 41 | * notice, this list of conditions and the following disclaimer in the |
| 42 | * documentation and/or other materials provided with the distribution. |
| 43 | * 3. Neither the name of the University nor the names of its contributors |
| 44 | * may be used to endorse or promote products derived from this software |
| 45 | * without specific prior written permission. |
| 46 | * |
| 47 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 48 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 49 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 50 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 51 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 52 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 53 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 54 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 55 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 56 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 57 | * SUCH DAMAGE. |
| 58 | * |
| 59 | * @(#)lfs_balloc.c 8.4 (Berkeley) 5/8/95 |
| 60 | */ |
| 61 | |
| 62 | #include <sys/cdefs.h> |
| 63 | __KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.91 2016/08/07 02:42:32 dholland Exp $" ); |
| 64 | |
| 65 | #if defined(_KERNEL_OPT) |
| 66 | #include "opt_quota.h" |
| 67 | #endif |
| 68 | |
| 69 | #include <sys/param.h> |
| 70 | #include <sys/systm.h> |
| 71 | #include <sys/buf.h> |
| 72 | #include <sys/proc.h> |
| 73 | #include <sys/vnode.h> |
| 74 | #include <sys/mount.h> |
| 75 | #include <sys/resourcevar.h> |
| 76 | #include <sys/tree.h> |
| 77 | #include <sys/trace.h> |
| 78 | #include <sys/kauth.h> |
| 79 | |
| 80 | #include <miscfs/specfs/specdev.h> |
| 81 | |
| 82 | #include <ufs/lfs/ulfs_quotacommon.h> |
| 83 | #include <ufs/lfs/ulfs_inode.h> |
| 84 | #include <ufs/lfs/ulfsmount.h> |
| 85 | #include <ufs/lfs/ulfs_extern.h> |
| 86 | |
| 87 | #include <ufs/lfs/lfs.h> |
| 88 | #include <ufs/lfs/lfs_accessors.h> |
| 89 | #include <ufs/lfs/lfs_extern.h> |
| 90 | #include <ufs/lfs/lfs_kernel.h> |
| 91 | |
| 92 | #include <uvm/uvm.h> |
| 93 | |
| 94 | static int lfs_fragextend(struct vnode *, int, int, daddr_t, struct buf **, |
| 95 | kauth_cred_t); |
| 96 | |
| 97 | u_int64_t locked_fakequeue_count; |
| 98 | |
| 99 | /* |
| 100 | * Allocate a block, and do inode and filesystem block accounting for |
| 101 | * it and for any indirect blocks that may need to be created in order |
| 102 | * to handle this block. |
| 103 | * |
| 104 | * Blocks which have never been accounted for (i.e., which "do not |
| 105 | * exist") have disk address 0, which is translated by ulfs_bmap to |
| 106 | * the special value UNASSIGNED == -1, as in historical FFS-related |
| 107 | * code. |
| 108 | * |
| 109 | * Blocks which have been accounted for but which have not yet been |
| 110 | * written to disk are given the new special disk address UNWRITTEN == |
| 111 | * -2, so that they can be differentiated from completely new blocks. |
| 112 | * |
| 113 | * Note: it seems that bpp is passed as NULL for blocks that are file |
| 114 | * pages that will be handled by UVM and not the buffer cache. |
| 115 | * |
| 116 | * XXX: locking? |
| 117 | */ |
| 118 | /* VOP_BWRITE ULFS_NIADDR+2 times */ |
| 119 | int |
| 120 | lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred, |
| 121 | int flags, struct buf **bpp) |
| 122 | { |
| 123 | int offset; |
| 124 | daddr_t daddr, idaddr; |
| 125 | struct buf *ibp, *bp; |
| 126 | struct inode *ip; |
| 127 | struct lfs *fs; |
| 128 | struct indir indirs[ULFS_NIADDR+2], *idp; |
| 129 | daddr_t lbn, lastblock; |
| 130 | int bcount; |
| 131 | int error, frags, i, nsize, osize, num; |
| 132 | |
| 133 | ip = VTOI(vp); |
| 134 | fs = ip->i_lfs; |
| 135 | |
| 136 | /* Declare to humans that we might have the seglock here */ |
| 137 | ASSERT_MAYBE_SEGLOCK(fs); |
| 138 | |
| 139 | |
| 140 | /* offset within block */ |
| 141 | offset = lfs_blkoff(fs, startoffset); |
| 142 | |
| 143 | /* This is usually but not always exactly the block size */ |
| 144 | KASSERT(iosize <= lfs_sb_getbsize(fs)); |
| 145 | |
| 146 | /* block number (within file) */ |
| 147 | lbn = lfs_lblkno(fs, startoffset); |
| 148 | |
| 149 | /* |
| 150 | * This checks for whether pending stuff needs to be flushed |
| 151 | * out and potentially waits. It's been disabled since UBC |
| 152 | * support was added to LFS in 2003. -- dholland 20160806 |
| 153 | */ |
| 154 | /* (void)lfs_check(vp, lbn, 0); */ |
| 155 | |
| 156 | |
| 157 | /* |
| 158 | * Three cases: it's a block beyond the end of file, it's a block in |
| 159 | * the file that may or may not have been assigned a disk address or |
| 160 | * we're writing an entire block. |
| 161 | * |
| 162 | * Note, if the daddr is UNWRITTEN, the block already exists in |
| 163 | * the cache (it was read or written earlier). If so, make sure |
| 164 | * we don't count it as a new block or zero out its contents. If |
| 165 | * it did not, make sure we allocate any necessary indirect |
| 166 | * blocks. |
| 167 | * |
| 168 | * If we are writing a block beyond the end of the file, we need to |
| 169 | * check if the old last block was a fragment. If it was, we need |
| 170 | * to rewrite it. |
| 171 | */ |
| 172 | |
| 173 | if (bpp) |
| 174 | *bpp = NULL; |
| 175 | |
| 176 | /* Last block number in file */ |
| 177 | lastblock = lfs_lblkno(fs, ip->i_size); |
| 178 | |
| 179 | if (lastblock < ULFS_NDADDR && lastblock < lbn) { |
| 180 | /* |
| 181 | * The file is small enough to have fragments, and we're |
| 182 | * allocating past EOF. |
| 183 | * |
| 184 | * If the last block was a fragment we need to rewrite it |
| 185 | * as a full block. |
| 186 | */ |
| 187 | osize = lfs_blksize(fs, ip, lastblock); |
| 188 | if (osize < lfs_sb_getbsize(fs) && osize > 0) { |
| 189 | if ((error = lfs_fragextend(vp, osize, lfs_sb_getbsize(fs), |
| 190 | lastblock, |
| 191 | (bpp ? &bp : NULL), cred))) |
| 192 | return (error); |
| 193 | /* Update the file size with what we just did (only) */ |
| 194 | ip->i_size = (lastblock + 1) * lfs_sb_getbsize(fs); |
| 195 | lfs_dino_setsize(fs, ip->i_din, ip->i_size); |
| 196 | uvm_vnp_setsize(vp, ip->i_size); |
| 197 | ip->i_flag |= IN_CHANGE | IN_UPDATE; |
| 198 | /* if we got a buffer for this, write it out now */ |
| 199 | if (bpp) |
| 200 | (void) VOP_BWRITE(bp->b_vp, bp); |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | /* |
| 205 | * If the block we are writing is a direct block, it's the last |
| 206 | * block in the file, and offset + iosize is less than a full |
| 207 | * block, we can write one or more fragments. There are two cases: |
| 208 | * the block is brand new and we should allocate it the correct |
| 209 | * size or it already exists and contains some fragments and |
| 210 | * may need to extend it. |
| 211 | */ |
| 212 | if (lbn < ULFS_NDADDR && lfs_lblkno(fs, ip->i_size) <= lbn) { |
| 213 | osize = lfs_blksize(fs, ip, lbn); |
| 214 | nsize = lfs_fragroundup(fs, offset + iosize); |
| 215 | if (lfs_lblktosize(fs, lbn) >= ip->i_size) { |
| 216 | /* Brand new block or fragment */ |
| 217 | frags = lfs_numfrags(fs, nsize); |
| 218 | if (!ISSPACE(fs, frags, cred)) |
| 219 | return ENOSPC; |
| 220 | if (bpp) { |
| 221 | *bpp = bp = getblk(vp, lbn, nsize, 0, 0); |
| 222 | bp->b_blkno = UNWRITTEN; |
| 223 | if (flags & B_CLRBUF) |
| 224 | clrbuf(bp); |
| 225 | } |
| 226 | |
| 227 | /* |
| 228 | * Update the effective block count (this count |
| 229 | * includes blocks that don't have an on-disk |
| 230 | * presence or location yet) |
| 231 | */ |
| 232 | ip->i_lfs_effnblks += frags; |
| 233 | |
| 234 | /* account for the space we're taking */ |
| 235 | mutex_enter(&lfs_lock); |
| 236 | lfs_sb_subbfree(fs, frags); |
| 237 | mutex_exit(&lfs_lock); |
| 238 | |
| 239 | /* update the inode */ |
| 240 | lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN); |
| 241 | } else { |
| 242 | /* extending a block that already has fragments */ |
| 243 | |
| 244 | if (nsize <= osize) { |
| 245 | /* No need to extend */ |
| 246 | if (bpp && (error = bread(vp, lbn, osize, |
| 247 | 0, &bp))) |
| 248 | return error; |
| 249 | } else { |
| 250 | /* Extend existing block */ |
| 251 | if ((error = |
| 252 | lfs_fragextend(vp, osize, nsize, lbn, |
| 253 | (bpp ? &bp : NULL), cred))) |
| 254 | return error; |
| 255 | } |
| 256 | if (bpp) |
| 257 | *bpp = bp; |
| 258 | } |
| 259 | return 0; |
| 260 | } |
| 261 | |
| 262 | /* |
| 263 | * Look up what's already here. |
| 264 | */ |
| 265 | |
| 266 | error = ulfs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, NULL, NULL); |
| 267 | if (error) |
| 268 | return (error); |
| 269 | |
| 270 | KASSERT(daddr <= LFS_MAX_DADDR(fs)); |
| 271 | |
| 272 | /* |
| 273 | * Do byte accounting all at once, so we can gracefully fail *before* |
| 274 | * we start assigning blocks. |
| 275 | */ |
| 276 | frags = fs->um_seqinc; |
| 277 | bcount = 0; /* number of frags we need */ |
| 278 | if (daddr == UNASSIGNED) { |
| 279 | /* no block yet, going to need a whole block */ |
| 280 | bcount = frags; |
| 281 | } |
| 282 | for (i = 1; i < num; ++i) { |
| 283 | if (!indirs[i].in_exists) { |
| 284 | /* need an indirect block at this level */ |
| 285 | bcount += frags; |
| 286 | } |
| 287 | } |
| 288 | if (ISSPACE(fs, bcount, cred)) { |
| 289 | /* update the superblock's free block count */ |
| 290 | mutex_enter(&lfs_lock); |
| 291 | lfs_sb_subbfree(fs, bcount); |
| 292 | mutex_exit(&lfs_lock); |
| 293 | /* update the file's effective block count */ |
| 294 | ip->i_lfs_effnblks += bcount; |
| 295 | } else { |
| 296 | /* whoops, no can do */ |
| 297 | return ENOSPC; |
| 298 | } |
| 299 | |
| 300 | if (daddr == UNASSIGNED) { |
| 301 | /* |
| 302 | * There is nothing here yet. |
| 303 | */ |
| 304 | |
| 305 | /* |
| 306 | * If there's no indirect block in the inode, change it |
| 307 | * to UNWRITTEN to indicate that it exists but doesn't |
| 308 | * have an on-disk address yet. |
| 309 | * |
| 310 | * (Question: where's the block data initialized?) |
| 311 | */ |
| 312 | if (num > 0 && lfs_dino_getib(fs, ip->i_din, indirs[0].in_off) == 0) { |
| 313 | lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN); |
| 314 | } |
| 315 | |
| 316 | /* |
| 317 | * If we need more layers of indirect blocks, create what |
| 318 | * we need. |
| 319 | */ |
| 320 | if (num > 1) { |
| 321 | /* |
| 322 | * The outermost indirect block address is the one |
| 323 | * in the inode, so fetch that. |
| 324 | */ |
| 325 | idaddr = lfs_dino_getib(fs, ip->i_din, indirs[0].in_off); |
| 326 | /* |
| 327 | * For each layer of indirection... |
| 328 | */ |
| 329 | for (i = 1; i < num; ++i) { |
| 330 | /* |
| 331 | * Get a buffer for the indirect block data. |
| 332 | * |
| 333 | * (XXX: the logic here seems twisted. What's |
| 334 | * wrong with testing in_exists first and then |
| 335 | * doing either bread or getblk to get a |
| 336 | * buffer?) |
| 337 | */ |
| 338 | ibp = getblk(vp, indirs[i].in_lbn, |
| 339 | lfs_sb_getbsize(fs), 0,0); |
| 340 | if (!indirs[i].in_exists) { |
| 341 | /* |
| 342 | * There isn't actually a block here, |
| 343 | * so clear the buffer data and mark |
| 344 | * the address of the block as |
| 345 | * UNWRITTEN. |
| 346 | */ |
| 347 | clrbuf(ibp); |
| 348 | ibp->b_blkno = UNWRITTEN; |
| 349 | } else if (!(ibp->b_oflags & (BO_DELWRI | BO_DONE))) { |
| 350 | /* |
| 351 | * Otherwise read it in. |
| 352 | */ |
| 353 | ibp->b_blkno = LFS_FSBTODB(fs, idaddr); |
| 354 | ibp->b_flags |= B_READ; |
| 355 | VOP_STRATEGY(vp, ibp); |
| 356 | biowait(ibp); |
| 357 | } |
| 358 | |
| 359 | /* |
| 360 | * Now this indirect block exists, but |
| 361 | * the next one down may not yet. If |
| 362 | * so, set it to UNWRITTEN. This keeps |
| 363 | * the accounting straight. |
| 364 | */ |
| 365 | if (lfs_iblock_get(fs, ibp->b_data, indirs[i].in_off) == 0) |
| 366 | lfs_iblock_set(fs, ibp->b_data, indirs[i].in_off, |
| 367 | UNWRITTEN); |
| 368 | |
| 369 | /* get the block for the next iteration */ |
| 370 | idaddr = lfs_iblock_get(fs, ibp->b_data, indirs[i].in_off); |
| 371 | #ifdef DEBUG |
| 372 | if (vp == fs->lfs_ivnode) { |
| 373 | LFS_ENTER_LOG("balloc" , __FILE__, |
| 374 | __LINE__, indirs[i].in_lbn, |
| 375 | ibp->b_flags, curproc->p_pid); |
| 376 | } |
| 377 | #endif |
| 378 | /* |
| 379 | * Write out the updated indirect block. Note |
| 380 | * that this writes it out even if we didn't |
| 381 | * modify it - ultimately because the final |
| 382 | * block didn't exist we'll need to write a |
| 383 | * new version of all the blocks that lead to |
| 384 | * it. Hopefully all that gets in before any |
| 385 | * actual disk I/O so we don't end up writing |
| 386 | * any of them twice... this is currently not |
| 387 | * very clear. |
| 388 | */ |
| 389 | if ((error = VOP_BWRITE(ibp->b_vp, ibp))) |
| 390 | return error; |
| 391 | } |
| 392 | } |
| 393 | } |
| 394 | |
| 395 | |
| 396 | /* |
| 397 | * Get the existing block from the cache, if requested. |
| 398 | */ |
| 399 | if (bpp) |
| 400 | *bpp = bp = getblk(vp, lbn, lfs_blksize(fs, ip, lbn), 0, 0); |
| 401 | |
| 402 | /* |
| 403 | * Do accounting on blocks that represent pages. |
| 404 | */ |
| 405 | if (!bpp) |
| 406 | lfs_register_block(vp, lbn); |
| 407 | |
| 408 | /* |
| 409 | * The block we are writing may be a brand new block |
| 410 | * in which case we need to do accounting. |
| 411 | * |
| 412 | * We can tell a truly new block because ulfs_bmaparray will say |
| 413 | * it is UNASSIGNED. Once we allocate it we will assign it the |
| 414 | * disk address UNWRITTEN. |
| 415 | */ |
| 416 | if (daddr == UNASSIGNED) { |
| 417 | if (bpp) { |
| 418 | if (flags & B_CLRBUF) |
| 419 | clrbuf(bp); |
| 420 | |
| 421 | /* Note the new address */ |
| 422 | bp->b_blkno = UNWRITTEN; |
| 423 | } |
| 424 | |
| 425 | switch (num) { |
| 426 | case 0: |
| 427 | /* direct block - update the inode */ |
| 428 | lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN); |
| 429 | break; |
| 430 | case 1: |
| 431 | /* |
| 432 | * using a single indirect block - update the inode |
| 433 | * |
| 434 | * XXX: is this right? We already set this block |
| 435 | * pointer above. I think we want to be writing *in* |
| 436 | * the single indirect block and this case shouldn't |
| 437 | * exist. (just case 0 and default) |
| 438 | * -- dholland 20160806 |
| 439 | */ |
| 440 | lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN); |
| 441 | break; |
| 442 | default: |
| 443 | /* |
| 444 | * using multiple indirect blocks - update the |
| 445 | * innermost one |
| 446 | */ |
| 447 | idp = &indirs[num - 1]; |
| 448 | if (bread(vp, idp->in_lbn, lfs_sb_getbsize(fs), |
| 449 | B_MODIFY, &ibp)) |
| 450 | panic("lfs_balloc: bread bno %lld" , |
| 451 | (long long)idp->in_lbn); |
| 452 | lfs_iblock_set(fs, ibp->b_data, idp->in_off, UNWRITTEN); |
| 453 | #ifdef DEBUG |
| 454 | if (vp == fs->lfs_ivnode) { |
| 455 | LFS_ENTER_LOG("balloc" , __FILE__, |
| 456 | __LINE__, idp->in_lbn, |
| 457 | ibp->b_flags, curproc->p_pid); |
| 458 | } |
| 459 | #endif |
| 460 | VOP_BWRITE(ibp->b_vp, ibp); |
| 461 | } |
| 462 | } else if (bpp && !(bp->b_oflags & (BO_DONE|BO_DELWRI))) { |
| 463 | /* |
| 464 | * Not a brand new block, also not in the cache; |
| 465 | * read it in from disk. |
| 466 | */ |
| 467 | if (iosize == lfs_sb_getbsize(fs)) |
| 468 | /* Optimization: I/O is unnecessary. */ |
| 469 | bp->b_blkno = daddr; |
| 470 | else { |
| 471 | /* |
| 472 | * We need to read the block to preserve the |
| 473 | * existing bytes. |
| 474 | */ |
| 475 | bp->b_blkno = daddr; |
| 476 | bp->b_flags |= B_READ; |
| 477 | VOP_STRATEGY(vp, bp); |
| 478 | return (biowait(bp)); |
| 479 | } |
| 480 | } |
| 481 | |
| 482 | return (0); |
| 483 | } |
| 484 | |
| 485 | /* |
| 486 | * Extend a file that uses fragments with more fragments. |
| 487 | * |
| 488 | * XXX: locking? |
| 489 | */ |
| 490 | /* VOP_BWRITE 1 time */ |
| 491 | static int |
| 492 | lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn, |
| 493 | struct buf **bpp, kauth_cred_t cred) |
| 494 | { |
| 495 | struct inode *ip; |
| 496 | struct lfs *fs; |
| 497 | long frags; |
| 498 | int error; |
| 499 | size_t obufsize; |
| 500 | |
| 501 | /* XXX move this to a header file */ |
| 502 | /* (XXX: except it's not clear what purpose it serves) */ |
| 503 | extern long locked_queue_bytes; |
| 504 | |
| 505 | ip = VTOI(vp); |
| 506 | fs = ip->i_lfs; |
| 507 | |
| 508 | /* |
| 509 | * XXX: is there some reason we know more about the seglock |
| 510 | * state here than at the top of lfs_balloc? |
| 511 | */ |
| 512 | ASSERT_NO_SEGLOCK(fs); |
| 513 | |
| 514 | /* number of frags we're adding */ |
| 515 | frags = (long)lfs_numfrags(fs, nsize - osize); |
| 516 | |
| 517 | error = 0; |
| 518 | |
| 519 | /* |
| 520 | * Get the seglock so we don't enlarge blocks while a segment |
| 521 | * is being written. If we're called with bpp==NULL, though, |
| 522 | * we are only pretending to change a buffer, so we don't have to |
| 523 | * lock. |
| 524 | * |
| 525 | * XXX: the above comment is lying, as fs->lfs_fraglock is not |
| 526 | * the segment lock. |
| 527 | */ |
| 528 | top: |
| 529 | if (bpp) { |
| 530 | rw_enter(&fs->lfs_fraglock, RW_READER); |
| 531 | LFS_DEBUG_COUNTLOCKED("frag" ); |
| 532 | } |
| 533 | |
| 534 | /* check if we actually have enough frags available */ |
| 535 | if (!ISSPACE(fs, frags, cred)) { |
| 536 | error = ENOSPC; |
| 537 | goto out; |
| 538 | } |
| 539 | |
| 540 | /* |
| 541 | * If we are not asked to actually return the block, all we need |
| 542 | * to do is allocate space for it. UBC will handle dirtying the |
| 543 | * appropriate things and making sure it all goes to disk. |
| 544 | * Don't bother to read in that case. |
| 545 | */ |
| 546 | if (bpp && (error = bread(vp, lbn, osize, 0, bpp))) { |
| 547 | goto out; |
| 548 | } |
| 549 | #if defined(LFS_QUOTA) || defined(LFS_QUOTA2) |
| 550 | if ((error = lfs_chkdq(ip, frags, cred, 0))) { |
| 551 | if (bpp) |
| 552 | brelse(*bpp, 0); |
| 553 | goto out; |
| 554 | } |
| 555 | #endif |
| 556 | /* |
| 557 | * Adjust accounting for lfs_avail. If there's not enough room, |
| 558 | * we will have to wait for the cleaner, which we can't do while |
| 559 | * holding a block busy or while holding the seglock. In that case, |
| 560 | * release both and start over after waiting. |
| 561 | */ |
| 562 | |
| 563 | if (bpp && ((*bpp)->b_oflags & BO_DELWRI)) { |
| 564 | if (!lfs_fits(fs, frags)) { |
| 565 | if (bpp) |
| 566 | brelse(*bpp, 0); |
| 567 | #if defined(LFS_QUOTA) || defined(LFS_QUOTA2) |
| 568 | lfs_chkdq(ip, -frags, cred, 0); |
| 569 | #endif |
| 570 | rw_exit(&fs->lfs_fraglock); |
| 571 | lfs_availwait(fs, frags); |
| 572 | goto top; |
| 573 | } |
| 574 | lfs_sb_subavail(fs, frags); |
| 575 | } |
| 576 | |
| 577 | /* decrease the free block count in the superblock */ |
| 578 | mutex_enter(&lfs_lock); |
| 579 | lfs_sb_subbfree(fs, frags); |
| 580 | mutex_exit(&lfs_lock); |
| 581 | /* increase the file's effective block count */ |
| 582 | ip->i_lfs_effnblks += frags; |
| 583 | /* mark the inode dirty */ |
| 584 | ip->i_flag |= IN_CHANGE | IN_UPDATE; |
| 585 | |
| 586 | if (bpp) { |
| 587 | obufsize = (*bpp)->b_bufsize; |
| 588 | allocbuf(*bpp, nsize, 1); |
| 589 | |
| 590 | /* Adjust locked-list accounting */ |
| 591 | if (((*bpp)->b_flags & B_LOCKED) != 0 && |
| 592 | (*bpp)->b_iodone == NULL) { |
| 593 | mutex_enter(&lfs_lock); |
| 594 | locked_queue_bytes += (*bpp)->b_bufsize - obufsize; |
| 595 | mutex_exit(&lfs_lock); |
| 596 | } |
| 597 | |
| 598 | /* zero the new space */ |
| 599 | memset((char *)((*bpp)->b_data) + osize, 0, (u_int)(nsize - osize)); |
| 600 | } |
| 601 | |
| 602 | out: |
| 603 | if (bpp) { |
| 604 | rw_exit(&fs->lfs_fraglock); |
| 605 | } |
| 606 | return (error); |
| 607 | } |
| 608 | |
| 609 | static inline int |
| 610 | lge(struct lbnentry *a, struct lbnentry *b) |
| 611 | { |
| 612 | return a->lbn - b->lbn; |
| 613 | } |
| 614 | |
| 615 | SPLAY_PROTOTYPE(lfs_splay, lbnentry, entry, lge); |
| 616 | |
| 617 | SPLAY_GENERATE(lfs_splay, lbnentry, entry, lge); |
| 618 | |
| 619 | /* |
| 620 | * Record this lbn as being "write pending". We used to have this information |
| 621 | * on the buffer headers, but since pages don't have buffer headers we |
| 622 | * record it here instead. |
| 623 | */ |
| 624 | void |
| 625 | lfs_register_block(struct vnode *vp, daddr_t lbn) |
| 626 | { |
| 627 | struct lfs *fs; |
| 628 | struct inode *ip; |
| 629 | struct lbnentry *lbp; |
| 630 | |
| 631 | ip = VTOI(vp); |
| 632 | |
| 633 | /* Don't count metadata */ |
| 634 | if (lbn < 0 || vp->v_type != VREG || ip->i_number == LFS_IFILE_INUM) |
| 635 | return; |
| 636 | |
| 637 | fs = ip->i_lfs; |
| 638 | |
| 639 | ASSERT_NO_SEGLOCK(fs); |
| 640 | |
| 641 | /* If no space, wait for the cleaner */ |
| 642 | lfs_availwait(fs, lfs_btofsb(fs, 1 << lfs_sb_getbshift(fs))); |
| 643 | |
| 644 | lbp = (struct lbnentry *)pool_get(&lfs_lbnentry_pool, PR_WAITOK); |
| 645 | lbp->lbn = lbn; |
| 646 | mutex_enter(&lfs_lock); |
| 647 | if (SPLAY_INSERT(lfs_splay, &ip->i_lfs_lbtree, lbp) != NULL) { |
| 648 | mutex_exit(&lfs_lock); |
| 649 | /* Already there */ |
| 650 | pool_put(&lfs_lbnentry_pool, lbp); |
| 651 | return; |
| 652 | } |
| 653 | |
| 654 | ++ip->i_lfs_nbtree; |
| 655 | fs->lfs_favail += lfs_btofsb(fs, (1 << lfs_sb_getbshift(fs))); |
| 656 | fs->lfs_pages += lfs_sb_getbsize(fs) >> PAGE_SHIFT; |
| 657 | ++locked_fakequeue_count; |
| 658 | lfs_subsys_pages += lfs_sb_getbsize(fs) >> PAGE_SHIFT; |
| 659 | mutex_exit(&lfs_lock); |
| 660 | } |
| 661 | |
| 662 | static void |
| 663 | lfs_do_deregister(struct lfs *fs, struct inode *ip, struct lbnentry *lbp) |
| 664 | { |
| 665 | ASSERT_MAYBE_SEGLOCK(fs); |
| 666 | |
| 667 | mutex_enter(&lfs_lock); |
| 668 | --ip->i_lfs_nbtree; |
| 669 | SPLAY_REMOVE(lfs_splay, &ip->i_lfs_lbtree, lbp); |
| 670 | if (fs->lfs_favail > lfs_btofsb(fs, (1 << lfs_sb_getbshift(fs)))) |
| 671 | fs->lfs_favail -= lfs_btofsb(fs, (1 << lfs_sb_getbshift(fs))); |
| 672 | fs->lfs_pages -= lfs_sb_getbsize(fs) >> PAGE_SHIFT; |
| 673 | if (locked_fakequeue_count > 0) |
| 674 | --locked_fakequeue_count; |
| 675 | lfs_subsys_pages -= lfs_sb_getbsize(fs) >> PAGE_SHIFT; |
| 676 | mutex_exit(&lfs_lock); |
| 677 | |
| 678 | pool_put(&lfs_lbnentry_pool, lbp); |
| 679 | } |
| 680 | |
| 681 | void |
| 682 | lfs_deregister_block(struct vnode *vp, daddr_t lbn) |
| 683 | { |
| 684 | struct lfs *fs; |
| 685 | struct inode *ip; |
| 686 | struct lbnentry *lbp; |
| 687 | struct lbnentry tmp; |
| 688 | |
| 689 | ip = VTOI(vp); |
| 690 | |
| 691 | /* Don't count metadata */ |
| 692 | if (lbn < 0 || vp->v_type != VREG || ip->i_number == LFS_IFILE_INUM) |
| 693 | return; |
| 694 | |
| 695 | fs = ip->i_lfs; |
| 696 | tmp.lbn = lbn; |
| 697 | lbp = SPLAY_FIND(lfs_splay, &ip->i_lfs_lbtree, &tmp); |
| 698 | if (lbp == NULL) |
| 699 | return; |
| 700 | |
| 701 | lfs_do_deregister(fs, ip, lbp); |
| 702 | } |
| 703 | |
| 704 | void |
| 705 | lfs_deregister_all(struct vnode *vp) |
| 706 | { |
| 707 | struct lbnentry *lbp, *nlbp; |
| 708 | struct lfs_splay *hd; |
| 709 | struct lfs *fs; |
| 710 | struct inode *ip; |
| 711 | |
| 712 | ip = VTOI(vp); |
| 713 | fs = ip->i_lfs; |
| 714 | hd = &ip->i_lfs_lbtree; |
| 715 | |
| 716 | for (lbp = SPLAY_MIN(lfs_splay, hd); lbp != NULL; lbp = nlbp) { |
| 717 | nlbp = SPLAY_NEXT(lfs_splay, hd, lbp); |
| 718 | lfs_do_deregister(fs, ip, lbp); |
| 719 | } |
| 720 | } |
| 721 | |