| 1 | /* $NetBSD: uvm_vnode.c,v 1.102 2015/12/06 09:38:54 wiz Exp $ */ |
| 2 | |
| 3 | /* |
| 4 | * Copyright (c) 1997 Charles D. Cranor and Washington University. |
| 5 | * Copyright (c) 1991, 1993 |
| 6 | * The Regents of the University of California. |
| 7 | * Copyright (c) 1990 University of Utah. |
| 8 | * |
| 9 | * All rights reserved. |
| 10 | * |
| 11 | * This code is derived from software contributed to Berkeley by |
| 12 | * the Systems Programming Group of the University of Utah Computer |
| 13 | * Science Department. |
| 14 | * |
| 15 | * Redistribution and use in source and binary forms, with or without |
| 16 | * modification, are permitted provided that the following conditions |
| 17 | * are met: |
| 18 | * 1. Redistributions of source code must retain the above copyright |
| 19 | * notice, this list of conditions and the following disclaimer. |
| 20 | * 2. Redistributions in binary form must reproduce the above copyright |
| 21 | * notice, this list of conditions and the following disclaimer in the |
| 22 | * documentation and/or other materials provided with the distribution. |
| 23 | * 3. Neither the name of the University nor the names of its contributors |
| 24 | * may be used to endorse or promote products derived from this software |
| 25 | * without specific prior written permission. |
| 26 | * |
| 27 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 28 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 29 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 30 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 32 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 33 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 34 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 36 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 37 | * SUCH DAMAGE. |
| 38 | * |
| 39 | * @(#)vnode_pager.c 8.8 (Berkeley) 2/13/94 |
| 40 | * from: Id: uvm_vnode.c,v 1.1.2.26 1998/02/02 20:38:07 chuck Exp |
| 41 | */ |
| 42 | |
| 43 | /* |
| 44 | * uvm_vnode.c: the vnode pager. |
| 45 | */ |
| 46 | |
| 47 | #include <sys/cdefs.h> |
| 48 | __KERNEL_RCSID(0, "$NetBSD: uvm_vnode.c,v 1.102 2015/12/06 09:38:54 wiz Exp $" ); |
| 49 | |
| 50 | #ifdef _KERNEL_OPT |
| 51 | #include "opt_uvmhist.h" |
| 52 | #endif |
| 53 | |
| 54 | #include <sys/param.h> |
| 55 | #include <sys/systm.h> |
| 56 | #include <sys/kernel.h> |
| 57 | #include <sys/vnode.h> |
| 58 | #include <sys/disklabel.h> |
| 59 | #include <sys/ioctl.h> |
| 60 | #include <sys/fcntl.h> |
| 61 | #include <sys/conf.h> |
| 62 | #include <sys/pool.h> |
| 63 | #include <sys/mount.h> |
| 64 | |
| 65 | #include <miscfs/specfs/specdev.h> |
| 66 | |
| 67 | #include <uvm/uvm.h> |
| 68 | #include <uvm/uvm_readahead.h> |
| 69 | |
| 70 | #ifdef UVMHIST |
| 71 | UVMHIST_DEFINE(ubchist); |
| 72 | #endif |
| 73 | |
| 74 | /* |
| 75 | * functions |
| 76 | */ |
| 77 | |
| 78 | static void uvn_detach(struct uvm_object *); |
| 79 | static int uvn_get(struct uvm_object *, voff_t, struct vm_page **, int *, |
| 80 | int, vm_prot_t, int, int); |
| 81 | static int uvn_put(struct uvm_object *, voff_t, voff_t, int); |
| 82 | static void uvn_reference(struct uvm_object *); |
| 83 | |
| 84 | static int uvn_findpage(struct uvm_object *, voff_t, struct vm_page **, |
| 85 | int); |
| 86 | |
| 87 | /* |
| 88 | * master pager structure |
| 89 | */ |
| 90 | |
| 91 | const struct uvm_pagerops uvm_vnodeops = { |
| 92 | .pgo_reference = uvn_reference, |
| 93 | .pgo_detach = uvn_detach, |
| 94 | .pgo_get = uvn_get, |
| 95 | .pgo_put = uvn_put, |
| 96 | }; |
| 97 | |
| 98 | /* |
| 99 | * the ops! |
| 100 | */ |
| 101 | |
| 102 | /* |
| 103 | * uvn_reference |
| 104 | * |
| 105 | * duplicate a reference to a VM object. Note that the reference |
| 106 | * count must already be at least one (the passed in reference) so |
| 107 | * there is no chance of the uvn being killed or locked out here. |
| 108 | * |
| 109 | * => caller must call with object unlocked. |
| 110 | * => caller must be using the same accessprot as was used at attach time |
| 111 | */ |
| 112 | |
| 113 | static void |
| 114 | uvn_reference(struct uvm_object *uobj) |
| 115 | { |
| 116 | vref((struct vnode *)uobj); |
| 117 | } |
| 118 | |
| 119 | |
| 120 | /* |
| 121 | * uvn_detach |
| 122 | * |
| 123 | * remove a reference to a VM object. |
| 124 | * |
| 125 | * => caller must call with object unlocked and map locked. |
| 126 | */ |
| 127 | |
| 128 | static void |
| 129 | uvn_detach(struct uvm_object *uobj) |
| 130 | { |
| 131 | vrele((struct vnode *)uobj); |
| 132 | } |
| 133 | |
| 134 | /* |
| 135 | * uvn_put: flush page data to backing store. |
| 136 | * |
| 137 | * => object must be locked on entry! VOP_PUTPAGES must unlock it. |
| 138 | * => flags: PGO_SYNCIO -- use sync. I/O |
| 139 | * => note: caller must set PG_CLEAN and pmap_clear_modify (if needed) |
| 140 | */ |
| 141 | |
| 142 | static int |
| 143 | uvn_put(struct uvm_object *uobj, voff_t offlo, voff_t offhi, int flags) |
| 144 | { |
| 145 | struct vnode *vp = (struct vnode *)uobj; |
| 146 | int error; |
| 147 | |
| 148 | KASSERT(mutex_owned(vp->v_interlock)); |
| 149 | error = VOP_PUTPAGES(vp, offlo, offhi, flags); |
| 150 | |
| 151 | return error; |
| 152 | } |
| 153 | |
| 154 | |
| 155 | /* |
| 156 | * uvn_get: get pages (synchronously) from backing store |
| 157 | * |
| 158 | * => prefer map unlocked (not required) |
| 159 | * => object must be locked! we will _unlock_ it before starting any I/O. |
| 160 | * => flags: PGO_ALLPAGES: get all of the pages |
| 161 | * PGO_LOCKED: fault data structures are locked |
| 162 | * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx] |
| 163 | * => NOTE: caller must check for released pages!! |
| 164 | */ |
| 165 | |
| 166 | static int |
| 167 | uvn_get(struct uvm_object *uobj, voff_t offset, |
| 168 | struct vm_page **pps /* IN/OUT */, |
| 169 | int *npagesp /* IN (OUT if PGO_LOCKED)*/, |
| 170 | int centeridx, vm_prot_t access_type, int advice, int flags) |
| 171 | { |
| 172 | struct vnode *vp = (struct vnode *)uobj; |
| 173 | int error; |
| 174 | |
| 175 | UVMHIST_FUNC("uvn_get" ); UVMHIST_CALLED(ubchist); |
| 176 | |
| 177 | UVMHIST_LOG(ubchist, "vp %p off 0x%x" , vp, (int)offset, 0,0); |
| 178 | |
| 179 | if (vp->v_type == VREG && (access_type & VM_PROT_WRITE) == 0 |
| 180 | && (flags & PGO_LOCKED) == 0) { |
| 181 | vn_ra_allocctx(vp); |
| 182 | uvm_ra_request(vp->v_ractx, advice, uobj, offset, |
| 183 | *npagesp << PAGE_SHIFT); |
| 184 | } |
| 185 | |
| 186 | error = VOP_GETPAGES(vp, offset, pps, npagesp, centeridx, |
| 187 | access_type, advice, flags); |
| 188 | |
| 189 | KASSERT(((flags & PGO_LOCKED) != 0 && mutex_owned(vp->v_interlock)) || |
| 190 | (flags & PGO_LOCKED) == 0); |
| 191 | return error; |
| 192 | } |
| 193 | |
| 194 | |
| 195 | /* |
| 196 | * uvn_findpages: |
| 197 | * return the page for the uobj and offset requested, allocating if needed. |
| 198 | * => uobj must be locked. |
| 199 | * => returned pages will be BUSY. |
| 200 | */ |
| 201 | |
| 202 | int |
| 203 | uvn_findpages(struct uvm_object *uobj, voff_t offset, int *npagesp, |
| 204 | struct vm_page **pgs, int flags) |
| 205 | { |
| 206 | int i, count, found, npages, rv; |
| 207 | |
| 208 | count = found = 0; |
| 209 | npages = *npagesp; |
| 210 | if (flags & UFP_BACKWARD) { |
| 211 | for (i = npages - 1; i >= 0; i--, offset -= PAGE_SIZE) { |
| 212 | rv = uvn_findpage(uobj, offset, &pgs[i], flags); |
| 213 | if (rv == 0) { |
| 214 | if (flags & UFP_DIRTYONLY) |
| 215 | break; |
| 216 | } else |
| 217 | found++; |
| 218 | count++; |
| 219 | } |
| 220 | } else { |
| 221 | for (i = 0; i < npages; i++, offset += PAGE_SIZE) { |
| 222 | rv = uvn_findpage(uobj, offset, &pgs[i], flags); |
| 223 | if (rv == 0) { |
| 224 | if (flags & UFP_DIRTYONLY) |
| 225 | break; |
| 226 | } else |
| 227 | found++; |
| 228 | count++; |
| 229 | } |
| 230 | } |
| 231 | *npagesp = count; |
| 232 | return (found); |
| 233 | } |
| 234 | |
| 235 | static int |
| 236 | uvn_findpage(struct uvm_object *uobj, voff_t offset, struct vm_page **pgp, |
| 237 | int flags) |
| 238 | { |
| 239 | struct vm_page *pg; |
| 240 | bool dirty; |
| 241 | UVMHIST_FUNC("uvn_findpage" ); UVMHIST_CALLED(ubchist); |
| 242 | UVMHIST_LOG(ubchist, "vp %p off 0x%lx" , uobj, offset,0,0); |
| 243 | |
| 244 | KASSERT(mutex_owned(uobj->vmobjlock)); |
| 245 | |
| 246 | if (*pgp != NULL) { |
| 247 | UVMHIST_LOG(ubchist, "dontcare" , 0,0,0,0); |
| 248 | return 0; |
| 249 | } |
| 250 | for (;;) { |
| 251 | /* look for an existing page */ |
| 252 | pg = uvm_pagelookup(uobj, offset); |
| 253 | |
| 254 | /* nope? allocate one now */ |
| 255 | if (pg == NULL) { |
| 256 | if (flags & UFP_NOALLOC) { |
| 257 | UVMHIST_LOG(ubchist, "noalloc" , 0,0,0,0); |
| 258 | return 0; |
| 259 | } |
| 260 | pg = uvm_pagealloc(uobj, offset, NULL, |
| 261 | UVM_FLAG_COLORMATCH); |
| 262 | if (pg == NULL) { |
| 263 | if (flags & UFP_NOWAIT) { |
| 264 | UVMHIST_LOG(ubchist, "nowait" ,0,0,0,0); |
| 265 | return 0; |
| 266 | } |
| 267 | mutex_exit(uobj->vmobjlock); |
| 268 | uvm_wait("uvn_fp1" ); |
| 269 | mutex_enter(uobj->vmobjlock); |
| 270 | continue; |
| 271 | } |
| 272 | UVMHIST_LOG(ubchist, "alloced %p (color %u)" , pg, |
| 273 | VM_PGCOLOR_BUCKET(pg), 0,0); |
| 274 | break; |
| 275 | } else if (flags & UFP_NOCACHE) { |
| 276 | UVMHIST_LOG(ubchist, "nocache" ,0,0,0,0); |
| 277 | return 0; |
| 278 | } |
| 279 | |
| 280 | /* page is there, see if we need to wait on it */ |
| 281 | if ((pg->flags & PG_BUSY) != 0) { |
| 282 | if (flags & UFP_NOWAIT) { |
| 283 | UVMHIST_LOG(ubchist, "nowait" ,0,0,0,0); |
| 284 | return 0; |
| 285 | } |
| 286 | pg->flags |= PG_WANTED; |
| 287 | UVMHIST_LOG(ubchist, "wait %p (color %u)" , pg, |
| 288 | VM_PGCOLOR_BUCKET(pg), 0,0); |
| 289 | UVM_UNLOCK_AND_WAIT(pg, uobj->vmobjlock, 0, |
| 290 | "uvn_fp2" , 0); |
| 291 | mutex_enter(uobj->vmobjlock); |
| 292 | continue; |
| 293 | } |
| 294 | |
| 295 | /* skip PG_RDONLY pages if requested */ |
| 296 | if ((flags & UFP_NORDONLY) && (pg->flags & PG_RDONLY)) { |
| 297 | UVMHIST_LOG(ubchist, "nordonly" ,0,0,0,0); |
| 298 | return 0; |
| 299 | } |
| 300 | |
| 301 | /* stop on clean pages if requested */ |
| 302 | if (flags & UFP_DIRTYONLY) { |
| 303 | dirty = pmap_clear_modify(pg) || |
| 304 | (pg->flags & PG_CLEAN) == 0; |
| 305 | pg->flags |= PG_CLEAN; |
| 306 | if (!dirty) { |
| 307 | UVMHIST_LOG(ubchist, "dirtonly" , 0,0,0,0); |
| 308 | return 0; |
| 309 | } |
| 310 | } |
| 311 | |
| 312 | /* mark the page BUSY and we're done. */ |
| 313 | pg->flags |= PG_BUSY; |
| 314 | UVM_PAGE_OWN(pg, "uvn_findpage" ); |
| 315 | UVMHIST_LOG(ubchist, "found %p (color %u)" , |
| 316 | pg, VM_PGCOLOR_BUCKET(pg), 0,0); |
| 317 | break; |
| 318 | } |
| 319 | *pgp = pg; |
| 320 | return 1; |
| 321 | } |
| 322 | |
| 323 | /* |
| 324 | * uvm_vnp_setsize: grow or shrink a vnode uobj |
| 325 | * |
| 326 | * grow => just update size value |
| 327 | * shrink => toss un-needed pages |
| 328 | * |
| 329 | * => we assume that the caller has a reference of some sort to the |
| 330 | * vnode in question so that it will not be yanked out from under |
| 331 | * us. |
| 332 | */ |
| 333 | |
| 334 | void |
| 335 | uvm_vnp_setsize(struct vnode *vp, voff_t newsize) |
| 336 | { |
| 337 | struct uvm_object *uobj = &vp->v_uobj; |
| 338 | voff_t pgend = round_page(newsize); |
| 339 | voff_t oldsize; |
| 340 | UVMHIST_FUNC("uvm_vnp_setsize" ); UVMHIST_CALLED(ubchist); |
| 341 | |
| 342 | mutex_enter(uobj->vmobjlock); |
| 343 | UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x" , |
| 344 | vp, vp->v_size, newsize, 0); |
| 345 | |
| 346 | /* |
| 347 | * now check if the size has changed: if we shrink we had better |
| 348 | * toss some pages... |
| 349 | */ |
| 350 | |
| 351 | KASSERT(newsize != VSIZENOTSET && newsize >= 0); |
| 352 | KASSERT(vp->v_size <= vp->v_writesize); |
| 353 | KASSERT(vp->v_size == vp->v_writesize || |
| 354 | newsize == vp->v_writesize || newsize <= vp->v_size); |
| 355 | |
| 356 | oldsize = vp->v_writesize; |
| 357 | |
| 358 | /* |
| 359 | * check whether size shrinks |
| 360 | * if old size hasn't been set, there are no pages to drop |
| 361 | * if there was an integer overflow in pgend, then this is no shrink |
| 362 | */ |
| 363 | if (oldsize > pgend && oldsize != VSIZENOTSET && pgend >= 0) { |
| 364 | (void) uvn_put(uobj, pgend, 0, PGO_FREE | PGO_SYNCIO); |
| 365 | mutex_enter(uobj->vmobjlock); |
| 366 | } |
| 367 | vp->v_size = vp->v_writesize = newsize; |
| 368 | mutex_exit(uobj->vmobjlock); |
| 369 | } |
| 370 | |
| 371 | void |
| 372 | uvm_vnp_setwritesize(struct vnode *vp, voff_t newsize) |
| 373 | { |
| 374 | |
| 375 | mutex_enter(vp->v_interlock); |
| 376 | KASSERT(newsize != VSIZENOTSET && newsize >= 0); |
| 377 | KASSERT(vp->v_size != VSIZENOTSET); |
| 378 | KASSERT(vp->v_writesize != VSIZENOTSET); |
| 379 | KASSERT(vp->v_size <= vp->v_writesize); |
| 380 | KASSERT(vp->v_size <= newsize); |
| 381 | vp->v_writesize = newsize; |
| 382 | mutex_exit(vp->v_interlock); |
| 383 | } |
| 384 | |
| 385 | bool |
| 386 | uvn_text_p(struct uvm_object *uobj) |
| 387 | { |
| 388 | struct vnode *vp = (struct vnode *)uobj; |
| 389 | |
| 390 | return (vp->v_iflag & VI_EXECMAP) != 0; |
| 391 | } |
| 392 | |
| 393 | bool |
| 394 | uvn_clean_p(struct uvm_object *uobj) |
| 395 | { |
| 396 | struct vnode *vp = (struct vnode *)uobj; |
| 397 | |
| 398 | return (vp->v_iflag & VI_ONWORKLST) == 0; |
| 399 | } |
| 400 | |
| 401 | bool |
| 402 | uvn_needs_writefault_p(struct uvm_object *uobj) |
| 403 | { |
| 404 | struct vnode *vp = (struct vnode *)uobj; |
| 405 | |
| 406 | return uvn_clean_p(uobj) || |
| 407 | (vp->v_iflag & (VI_WRMAP|VI_WRMAPDIRTY)) == VI_WRMAP; |
| 408 | } |
| 409 | |