| 1 | /* $NetBSD: mm.c,v 1.22 2016/10/13 08:56:31 ryo Exp $ */ |
| 2 | |
| 3 | /*- |
| 4 | * Copyright (c) 2002, 2008, 2010 The NetBSD Foundation, Inc. |
| 5 | * All rights reserved. |
| 6 | * |
| 7 | * This code is derived from software contributed to The NetBSD Foundation |
| 8 | * by Christos Zoulas, Joerg Sonnenberger and Mindaugas Rasiukevicius. |
| 9 | * |
| 10 | * Redistribution and use in source and binary forms, with or without |
| 11 | * modification, are permitted provided that the following conditions |
| 12 | * are met: |
| 13 | * 1. Redistributions of source code must retain the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer. |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright |
| 16 | * notice, this list of conditions and the following disclaimer in the |
| 17 | * documentation and/or other materials provided with the distribution. |
| 18 | * |
| 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
| 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
| 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
| 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 29 | * POSSIBILITY OF SUCH DAMAGE. |
| 30 | */ |
| 31 | |
| 32 | /* |
| 33 | * Special /dev/{mem,kmem,zero,null} memory devices. |
| 34 | */ |
| 35 | |
| 36 | #include <sys/cdefs.h> |
| 37 | __KERNEL_RCSID(0, "$NetBSD: mm.c,v 1.22 2016/10/13 08:56:31 ryo Exp $" ); |
| 38 | |
| 39 | #include "opt_compat_netbsd.h" |
| 40 | |
| 41 | #include <sys/param.h> |
| 42 | #include <sys/conf.h> |
| 43 | #include <sys/ioctl.h> |
| 44 | #include <sys/mman.h> |
| 45 | #include <sys/uio.h> |
| 46 | #include <sys/termios.h> |
| 47 | |
| 48 | #include <dev/mm.h> |
| 49 | |
| 50 | #include <uvm/uvm_extern.h> |
| 51 | |
| 52 | static void * dev_zero_page __read_mostly; |
| 53 | static kmutex_t dev_mem_lock __cacheline_aligned; |
| 54 | static vaddr_t dev_mem_addr __read_mostly; |
| 55 | |
| 56 | static dev_type_read(mm_readwrite); |
| 57 | static dev_type_ioctl(mm_ioctl); |
| 58 | static dev_type_mmap(mm_mmap); |
| 59 | static dev_type_ioctl(mm_ioctl); |
| 60 | |
| 61 | const struct cdevsw mem_cdevsw = { |
| 62 | #ifdef __HAVE_MM_MD_OPEN |
| 63 | .d_open = mm_md_open, |
| 64 | #else |
| 65 | .d_open = nullopen, |
| 66 | #endif |
| 67 | .d_close = nullclose, |
| 68 | .d_read = mm_readwrite, |
| 69 | .d_write = mm_readwrite, |
| 70 | .d_ioctl = mm_ioctl, |
| 71 | .d_stop = nostop, |
| 72 | .d_tty = notty, |
| 73 | .d_poll = nopoll, |
| 74 | .d_mmap = mm_mmap, |
| 75 | .d_kqfilter = nokqfilter, |
| 76 | .d_discard = nodiscard, |
| 77 | .d_flag = D_MPSAFE |
| 78 | }; |
| 79 | |
| 80 | #ifdef pmax /* XXX */ |
| 81 | const struct cdevsw mem_ultrix_cdevsw = { |
| 82 | .d_open = nullopen, |
| 83 | .d_close = nullclose, |
| 84 | .d_read = mm_readwrite, |
| 85 | .d_write = mm_readwrite, |
| 86 | .d_ioctl = mm_ioctl, |
| 87 | .d_stop = nostop, |
| 88 | .d_tty = notty, |
| 89 | .d_poll = nopoll, |
| 90 | .d_mmap = mm_mmap, |
| 91 | .d_kqfilter = nokqfilter, |
| 92 | .d_discard = nodiscard, |
| 93 | .d_flag = D_MPSAFE |
| 94 | }; |
| 95 | #endif |
| 96 | |
| 97 | /* |
| 98 | * mm_init: initialize memory device driver. |
| 99 | */ |
| 100 | void |
| 101 | mm_init(void) |
| 102 | { |
| 103 | vaddr_t pg; |
| 104 | |
| 105 | mutex_init(&dev_mem_lock, MUTEX_DEFAULT, IPL_NONE); |
| 106 | |
| 107 | /* Read-only zero-page. */ |
| 108 | pg = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED|UVM_KMF_ZERO); |
| 109 | KASSERT(pg != 0); |
| 110 | pmap_protect(pmap_kernel(), pg, pg + PAGE_SIZE, VM_PROT_READ); |
| 111 | pmap_update(pmap_kernel()); |
| 112 | dev_zero_page = (void *)pg; |
| 113 | |
| 114 | #ifndef __HAVE_MM_MD_CACHE_ALIASING |
| 115 | /* KVA for mappings during I/O. */ |
| 116 | dev_mem_addr = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, |
| 117 | UVM_KMF_VAONLY|UVM_KMF_WAITVA); |
| 118 | KASSERT(dev_mem_addr != 0); |
| 119 | #else |
| 120 | dev_mem_addr = 0; |
| 121 | #endif |
| 122 | } |
| 123 | |
| 124 | |
| 125 | /* |
| 126 | * dev_mem_getva: get a special virtual address. If architecture requires, |
| 127 | * allocate VA according to PA, which avoids cache-aliasing issues. Use a |
| 128 | * constant, general mapping address otherwise. |
| 129 | */ |
| 130 | static inline vaddr_t |
| 131 | dev_mem_getva(paddr_t pa, int color) |
| 132 | { |
| 133 | #ifdef __HAVE_MM_MD_CACHE_ALIASING |
| 134 | return uvm_km_alloc(kernel_map, PAGE_SIZE, |
| 135 | color & uvmexp.colormask, |
| 136 | UVM_KMF_VAONLY | UVM_KMF_WAITVA | UVM_KMF_COLORMATCH); |
| 137 | #else |
| 138 | return dev_mem_addr; |
| 139 | #endif |
| 140 | } |
| 141 | |
| 142 | static inline void |
| 143 | dev_mem_relva(paddr_t pa, vaddr_t va) |
| 144 | { |
| 145 | #ifdef __HAVE_MM_MD_CACHE_ALIASING |
| 146 | uvm_km_free(kernel_map, va, PAGE_SIZE, UVM_KMF_VAONLY); |
| 147 | #else |
| 148 | KASSERT(dev_mem_addr == va); |
| 149 | #endif |
| 150 | } |
| 151 | |
| 152 | /* |
| 153 | * dev_kmem_readwrite: helper for DEV_MEM (/dev/mem) case of R/W. |
| 154 | */ |
| 155 | static int |
| 156 | dev_mem_readwrite(struct uio *uio, struct iovec *iov) |
| 157 | { |
| 158 | paddr_t paddr; |
| 159 | vaddr_t vaddr; |
| 160 | vm_prot_t prot; |
| 161 | size_t len, offset; |
| 162 | bool have_direct; |
| 163 | int error; |
| 164 | int color = 0; |
| 165 | |
| 166 | /* Check for wrap around. */ |
| 167 | if ((uintptr_t)uio->uio_offset != uio->uio_offset) { |
| 168 | return EFAULT; |
| 169 | } |
| 170 | paddr = uio->uio_offset & ~PAGE_MASK; |
| 171 | prot = (uio->uio_rw == UIO_WRITE) ? VM_PROT_WRITE : VM_PROT_READ; |
| 172 | error = mm_md_physacc(paddr, prot); |
| 173 | if (error) { |
| 174 | return error; |
| 175 | } |
| 176 | offset = uio->uio_offset & PAGE_MASK; |
| 177 | len = MIN(uio->uio_resid, PAGE_SIZE - offset); |
| 178 | |
| 179 | #ifdef __HAVE_MM_MD_CACHE_ALIASING |
| 180 | have_direct = mm_md_page_color(paddr, &color); |
| 181 | #else |
| 182 | have_direct = true; |
| 183 | color = 0; |
| 184 | #endif |
| 185 | |
| 186 | #ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS |
| 187 | /* Is physical address directly mapped? Return VA. */ |
| 188 | if (have_direct) |
| 189 | have_direct = mm_md_direct_mapped_phys(paddr, &vaddr); |
| 190 | #else |
| 191 | vaddr = 0; |
| 192 | have_direct = false; |
| 193 | #endif |
| 194 | if (!have_direct) { |
| 195 | /* Get a special virtual address. */ |
| 196 | const vaddr_t va = dev_mem_getva(paddr, color); |
| 197 | |
| 198 | /* Map selected KVA to physical address. */ |
| 199 | mutex_enter(&dev_mem_lock); |
| 200 | pmap_kenter_pa(va, paddr, prot, 0); |
| 201 | pmap_update(pmap_kernel()); |
| 202 | |
| 203 | /* Perform I/O. */ |
| 204 | vaddr = va + offset; |
| 205 | error = uiomove((void *)vaddr, len, uio); |
| 206 | |
| 207 | /* Unmap, flush before unlock. */ |
| 208 | pmap_kremove(va, PAGE_SIZE); |
| 209 | pmap_update(pmap_kernel()); |
| 210 | mutex_exit(&dev_mem_lock); |
| 211 | |
| 212 | /* "Release" the virtual address. */ |
| 213 | dev_mem_relva(paddr, va); |
| 214 | } else { |
| 215 | /* Direct map, just perform I/O. */ |
| 216 | vaddr += offset; |
| 217 | error = uiomove((void *)vaddr, len, uio); |
| 218 | } |
| 219 | return error; |
| 220 | } |
| 221 | |
| 222 | /* |
| 223 | * dev_kmem_readwrite: helper for DEV_KMEM (/dev/kmem) case of R/W. |
| 224 | */ |
| 225 | static int |
| 226 | dev_kmem_readwrite(struct uio *uio, struct iovec *iov) |
| 227 | { |
| 228 | void *addr; |
| 229 | size_t len, offset; |
| 230 | vm_prot_t prot; |
| 231 | int error; |
| 232 | bool md_kva; |
| 233 | |
| 234 | /* Check for wrap around. */ |
| 235 | addr = (void *)(intptr_t)uio->uio_offset; |
| 236 | if ((uintptr_t)addr != uio->uio_offset) { |
| 237 | return EFAULT; |
| 238 | } |
| 239 | /* |
| 240 | * Handle non-page aligned offset. |
| 241 | * Otherwise, we operate in page-by-page basis. |
| 242 | */ |
| 243 | offset = uio->uio_offset & PAGE_MASK; |
| 244 | len = MIN(uio->uio_resid, PAGE_SIZE - offset); |
| 245 | prot = (uio->uio_rw == UIO_WRITE) ? VM_PROT_WRITE : VM_PROT_READ; |
| 246 | |
| 247 | md_kva = false; |
| 248 | |
| 249 | #ifdef __HAVE_MM_MD_DIRECT_MAPPED_IO |
| 250 | paddr_t paddr; |
| 251 | /* MD case: is this is a directly mapped address? */ |
| 252 | if (mm_md_direct_mapped_io(addr, &paddr)) { |
| 253 | /* If so, validate physical address. */ |
| 254 | error = mm_md_physacc(paddr, prot); |
| 255 | if (error) { |
| 256 | return error; |
| 257 | } |
| 258 | md_kva = true; |
| 259 | } |
| 260 | #endif |
| 261 | if (!md_kva) { |
| 262 | bool checked = false; |
| 263 | |
| 264 | #ifdef __HAVE_MM_MD_KERNACC |
| 265 | /* MD check for the address. */ |
| 266 | error = mm_md_kernacc(addr, prot, &checked); |
| 267 | if (error) { |
| 268 | return error; |
| 269 | } |
| 270 | #endif |
| 271 | /* UVM check for the address (unless MD indicated to not). */ |
| 272 | if (!checked && !uvm_kernacc(addr, len, prot)) { |
| 273 | return EFAULT; |
| 274 | } |
| 275 | } |
| 276 | error = uiomove(addr, len, uio); |
| 277 | return error; |
| 278 | } |
| 279 | |
| 280 | /* |
| 281 | * dev_zero_readwrite: helper for DEV_ZERO (/dev/null) case of R/W. |
| 282 | */ |
| 283 | static inline int |
| 284 | dev_zero_readwrite(struct uio *uio, struct iovec *iov) |
| 285 | { |
| 286 | size_t len; |
| 287 | |
| 288 | /* Nothing to do for the write case. */ |
| 289 | if (uio->uio_rw == UIO_WRITE) { |
| 290 | uio->uio_resid = 0; |
| 291 | return 0; |
| 292 | } |
| 293 | /* |
| 294 | * Read in page-by-page basis, caller will continue. |
| 295 | * Cut appropriately for a single/last-iteration cases. |
| 296 | */ |
| 297 | len = MIN(iov->iov_len, PAGE_SIZE); |
| 298 | return uiomove(dev_zero_page, len, uio); |
| 299 | } |
| 300 | |
| 301 | /* |
| 302 | * mm_readwrite: general memory R/W function. |
| 303 | */ |
| 304 | static int |
| 305 | mm_readwrite(dev_t dev, struct uio *uio, int flags) |
| 306 | { |
| 307 | struct iovec *iov; |
| 308 | int error; |
| 309 | |
| 310 | #ifdef __HAVE_MM_MD_READWRITE |
| 311 | /* If defined - there are extra MD cases. */ |
| 312 | switch (minor(dev)) { |
| 313 | case DEV_MEM: |
| 314 | case DEV_KMEM: |
| 315 | case DEV_NULL: |
| 316 | case DEV_ZERO: |
| 317 | #if defined(COMPAT_16) && defined(__arm) |
| 318 | case _DEV_ZERO_oARM: |
| 319 | #endif |
| 320 | break; |
| 321 | default: |
| 322 | return mm_md_readwrite(dev, uio); |
| 323 | } |
| 324 | #endif |
| 325 | error = 0; |
| 326 | while (uio->uio_resid > 0 && error == 0) { |
| 327 | iov = uio->uio_iov; |
| 328 | if (iov->iov_len == 0) { |
| 329 | /* Processed; next I/O vector. */ |
| 330 | uio->uio_iov++; |
| 331 | uio->uio_iovcnt--; |
| 332 | KASSERT(uio->uio_iovcnt >= 0); |
| 333 | continue; |
| 334 | } |
| 335 | /* Helper functions will process in page-by-page basis. */ |
| 336 | switch (minor(dev)) { |
| 337 | case DEV_MEM: |
| 338 | error = dev_mem_readwrite(uio, iov); |
| 339 | break; |
| 340 | case DEV_KMEM: |
| 341 | error = dev_kmem_readwrite(uio, iov); |
| 342 | break; |
| 343 | case DEV_NULL: |
| 344 | if (uio->uio_rw == UIO_WRITE) { |
| 345 | uio->uio_resid = 0; |
| 346 | } |
| 347 | /* Break directly out of the loop. */ |
| 348 | return 0; |
| 349 | case DEV_FULL: |
| 350 | if (uio->uio_rw == UIO_WRITE) { |
| 351 | return ENOSPC; |
| 352 | } |
| 353 | /*FALLTHROUGH*/ |
| 354 | #if defined(COMPAT_16) && defined(__arm) |
| 355 | case _DEV_ZERO_oARM: |
| 356 | #endif |
| 357 | case DEV_ZERO: |
| 358 | error = dev_zero_readwrite(uio, iov); |
| 359 | break; |
| 360 | default: |
| 361 | error = ENXIO; |
| 362 | break; |
| 363 | } |
| 364 | } |
| 365 | return error; |
| 366 | } |
| 367 | |
| 368 | /* |
| 369 | * mm_mmap: general mmap() handler. |
| 370 | */ |
| 371 | static paddr_t |
| 372 | mm_mmap(dev_t dev, off_t off, int acc) |
| 373 | { |
| 374 | vm_prot_t prot; |
| 375 | |
| 376 | #ifdef __HAVE_MM_MD_MMAP |
| 377 | /* If defined - there are extra mmap() MD cases. */ |
| 378 | switch (minor(dev)) { |
| 379 | case DEV_MEM: |
| 380 | case DEV_KMEM: |
| 381 | case DEV_NULL: |
| 382 | #if defined(COMPAT_16) && defined(__arm) |
| 383 | case _DEV_ZERO_oARM: |
| 384 | #endif |
| 385 | case DEV_ZERO: |
| 386 | break; |
| 387 | default: |
| 388 | return mm_md_mmap(dev, off, acc); |
| 389 | } |
| 390 | #endif |
| 391 | /* |
| 392 | * /dev/null does not make sense, /dev/kmem is volatile and |
| 393 | * /dev/zero is handled in mmap already. |
| 394 | */ |
| 395 | if (minor(dev) != DEV_MEM) { |
| 396 | return -1; |
| 397 | } |
| 398 | |
| 399 | prot = 0; |
| 400 | if (acc & PROT_EXEC) |
| 401 | prot |= VM_PROT_EXECUTE; |
| 402 | if (acc & PROT_READ) |
| 403 | prot |= VM_PROT_READ; |
| 404 | if (acc & PROT_WRITE) |
| 405 | prot |= VM_PROT_WRITE; |
| 406 | |
| 407 | /* Validate the physical address. */ |
| 408 | if (mm_md_physacc(off, prot) != 0) { |
| 409 | return -1; |
| 410 | } |
| 411 | return off >> PGSHIFT; |
| 412 | } |
| 413 | |
| 414 | static int |
| 415 | mm_ioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) |
| 416 | { |
| 417 | |
| 418 | switch (cmd) { |
| 419 | case FIONBIO: |
| 420 | /* We never block anyway. */ |
| 421 | return 0; |
| 422 | |
| 423 | case FIOSETOWN: |
| 424 | case FIOGETOWN: |
| 425 | case TIOCGPGRP: |
| 426 | case TIOCSPGRP: |
| 427 | case TIOCGETA: |
| 428 | return ENOTTY; |
| 429 | |
| 430 | case FIOASYNC: |
| 431 | if ((*(int *)data) == 0) { |
| 432 | return 0; |
| 433 | } |
| 434 | /* FALLTHROUGH */ |
| 435 | default: |
| 436 | return EOPNOTSUPP; |
| 437 | } |
| 438 | } |
| 439 | |