| 1 | /* $NetBSD: if_tap.c,v 1.93 2016/10/02 14:17:07 christos Exp $ */ |
| 2 | |
| 3 | /* |
| 4 | * Copyright (c) 2003, 2004, 2008, 2009 The NetBSD Foundation. |
| 5 | * All rights reserved. |
| 6 | * |
| 7 | * Redistribution and use in source and binary forms, with or without |
| 8 | * modification, are permitted provided that the following conditions |
| 9 | * are met: |
| 10 | * 1. Redistributions of source code must retain the above copyright |
| 11 | * notice, this list of conditions and the following disclaimer. |
| 12 | * 2. Redistributions in binary form must reproduce the above copyright |
| 13 | * notice, this list of conditions and the following disclaimer in the |
| 14 | * documentation and/or other materials provided with the distribution. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
| 17 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
| 18 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 19 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
| 20 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | /* |
| 30 | * tap(4) is a virtual Ethernet interface. It appears as a real Ethernet |
| 31 | * device to the system, but can also be accessed by userland through a |
| 32 | * character device interface, which allows reading and injecting frames. |
| 33 | */ |
| 34 | |
| 35 | #include <sys/cdefs.h> |
| 36 | __KERNEL_RCSID(0, "$NetBSD: if_tap.c,v 1.93 2016/10/02 14:17:07 christos Exp $" ); |
| 37 | |
| 38 | #if defined(_KERNEL_OPT) |
| 39 | |
| 40 | #include "opt_modular.h" |
| 41 | #include "opt_compat_netbsd.h" |
| 42 | #endif |
| 43 | |
| 44 | #include <sys/param.h> |
| 45 | #include <sys/systm.h> |
| 46 | #include <sys/kernel.h> |
| 47 | #include <sys/malloc.h> |
| 48 | #include <sys/conf.h> |
| 49 | #include <sys/cprng.h> |
| 50 | #include <sys/device.h> |
| 51 | #include <sys/file.h> |
| 52 | #include <sys/filedesc.h> |
| 53 | #include <sys/poll.h> |
| 54 | #include <sys/proc.h> |
| 55 | #include <sys/select.h> |
| 56 | #include <sys/sockio.h> |
| 57 | #include <sys/sysctl.h> |
| 58 | #include <sys/kauth.h> |
| 59 | #include <sys/mutex.h> |
| 60 | #include <sys/intr.h> |
| 61 | #include <sys/stat.h> |
| 62 | #include <sys/device.h> |
| 63 | #include <sys/module.h> |
| 64 | #include <sys/atomic.h> |
| 65 | |
| 66 | #include <net/if.h> |
| 67 | #include <net/if_dl.h> |
| 68 | #include <net/if_ether.h> |
| 69 | #include <net/if_media.h> |
| 70 | #include <net/if_tap.h> |
| 71 | #include <net/bpf.h> |
| 72 | |
| 73 | #include <compat/sys/sockio.h> |
| 74 | |
| 75 | #include "ioconf.h" |
| 76 | |
| 77 | /* |
| 78 | * sysctl node management |
| 79 | * |
| 80 | * It's not really possible to use a SYSCTL_SETUP block with |
| 81 | * current module implementation, so it is easier to just define |
| 82 | * our own function. |
| 83 | * |
| 84 | * The handler function is a "helper" in Andrew Brown's sysctl |
| 85 | * framework terminology. It is used as a gateway for sysctl |
| 86 | * requests over the nodes. |
| 87 | * |
| 88 | * tap_log allows the module to log creations of nodes and |
| 89 | * destroy them all at once using sysctl_teardown. |
| 90 | */ |
| 91 | static int tap_node; |
| 92 | static int tap_sysctl_handler(SYSCTLFN_PROTO); |
| 93 | static void sysctl_tap_setup(struct sysctllog **); |
| 94 | |
| 95 | /* |
| 96 | * Since we're an Ethernet device, we need the 2 following |
| 97 | * components: a struct ethercom and a struct ifmedia |
| 98 | * since we don't attach a PHY to ourselves. |
| 99 | * We could emulate one, but there's no real point. |
| 100 | */ |
| 101 | |
| 102 | struct tap_softc { |
| 103 | device_t sc_dev; |
| 104 | struct ifmedia sc_im; |
| 105 | struct ethercom sc_ec; |
| 106 | int sc_flags; |
| 107 | #define TAP_INUSE 0x00000001 /* tap device can only be opened once */ |
| 108 | #define TAP_ASYNCIO 0x00000002 /* user is using async I/O (SIGIO) on the device */ |
| 109 | #define TAP_NBIO 0x00000004 /* user wants calls to avoid blocking */ |
| 110 | #define TAP_GOING 0x00000008 /* interface is being destroyed */ |
| 111 | struct selinfo sc_rsel; |
| 112 | pid_t sc_pgid; /* For async. IO */ |
| 113 | kmutex_t sc_rdlock; |
| 114 | kmutex_t sc_kqlock; |
| 115 | void *sc_sih; |
| 116 | struct timespec sc_atime; |
| 117 | struct timespec sc_mtime; |
| 118 | struct timespec sc_btime; |
| 119 | }; |
| 120 | |
| 121 | /* autoconf(9) glue */ |
| 122 | |
| 123 | static int tap_match(device_t, cfdata_t, void *); |
| 124 | static void tap_attach(device_t, device_t, void *); |
| 125 | static int tap_detach(device_t, int); |
| 126 | |
| 127 | CFATTACH_DECL_NEW(tap, sizeof(struct tap_softc), |
| 128 | tap_match, tap_attach, tap_detach, NULL); |
| 129 | extern struct cfdriver tap_cd; |
| 130 | |
| 131 | /* Real device access routines */ |
| 132 | static int tap_dev_close(struct tap_softc *); |
| 133 | static int tap_dev_read(int, struct uio *, int); |
| 134 | static int tap_dev_write(int, struct uio *, int); |
| 135 | static int tap_dev_ioctl(int, u_long, void *, struct lwp *); |
| 136 | static int tap_dev_poll(int, int, struct lwp *); |
| 137 | static int tap_dev_kqfilter(int, struct knote *); |
| 138 | |
| 139 | /* Fileops access routines */ |
| 140 | static int tap_fops_close(file_t *); |
| 141 | static int tap_fops_read(file_t *, off_t *, struct uio *, |
| 142 | kauth_cred_t, int); |
| 143 | static int tap_fops_write(file_t *, off_t *, struct uio *, |
| 144 | kauth_cred_t, int); |
| 145 | static int tap_fops_ioctl(file_t *, u_long, void *); |
| 146 | static int tap_fops_poll(file_t *, int); |
| 147 | static int tap_fops_stat(file_t *, struct stat *); |
| 148 | static int tap_fops_kqfilter(file_t *, struct knote *); |
| 149 | |
| 150 | static const struct fileops tap_fileops = { |
| 151 | .fo_read = tap_fops_read, |
| 152 | .fo_write = tap_fops_write, |
| 153 | .fo_ioctl = tap_fops_ioctl, |
| 154 | .fo_fcntl = fnullop_fcntl, |
| 155 | .fo_poll = tap_fops_poll, |
| 156 | .fo_stat = tap_fops_stat, |
| 157 | .fo_close = tap_fops_close, |
| 158 | .fo_kqfilter = tap_fops_kqfilter, |
| 159 | .fo_restart = fnullop_restart, |
| 160 | }; |
| 161 | |
| 162 | /* Helper for cloning open() */ |
| 163 | static int tap_dev_cloner(struct lwp *); |
| 164 | |
| 165 | /* Character device routines */ |
| 166 | static int tap_cdev_open(dev_t, int, int, struct lwp *); |
| 167 | static int tap_cdev_close(dev_t, int, int, struct lwp *); |
| 168 | static int tap_cdev_read(dev_t, struct uio *, int); |
| 169 | static int tap_cdev_write(dev_t, struct uio *, int); |
| 170 | static int tap_cdev_ioctl(dev_t, u_long, void *, int, struct lwp *); |
| 171 | static int tap_cdev_poll(dev_t, int, struct lwp *); |
| 172 | static int tap_cdev_kqfilter(dev_t, struct knote *); |
| 173 | |
| 174 | const struct cdevsw tap_cdevsw = { |
| 175 | .d_open = tap_cdev_open, |
| 176 | .d_close = tap_cdev_close, |
| 177 | .d_read = tap_cdev_read, |
| 178 | .d_write = tap_cdev_write, |
| 179 | .d_ioctl = tap_cdev_ioctl, |
| 180 | .d_stop = nostop, |
| 181 | .d_tty = notty, |
| 182 | .d_poll = tap_cdev_poll, |
| 183 | .d_mmap = nommap, |
| 184 | .d_kqfilter = tap_cdev_kqfilter, |
| 185 | .d_discard = nodiscard, |
| 186 | .d_flag = D_OTHER |
| 187 | }; |
| 188 | |
| 189 | #define TAP_CLONER 0xfffff /* Maximal minor value */ |
| 190 | |
| 191 | /* kqueue-related routines */ |
| 192 | static void tap_kqdetach(struct knote *); |
| 193 | static int tap_kqread(struct knote *, long); |
| 194 | |
| 195 | /* |
| 196 | * Those are needed by the if_media interface. |
| 197 | */ |
| 198 | |
| 199 | static int tap_mediachange(struct ifnet *); |
| 200 | static void tap_mediastatus(struct ifnet *, struct ifmediareq *); |
| 201 | |
| 202 | /* |
| 203 | * Those are needed by the ifnet interface, and would typically be |
| 204 | * there for any network interface driver. |
| 205 | * Some other routines are optional: watchdog and drain. |
| 206 | */ |
| 207 | |
| 208 | static void tap_start(struct ifnet *); |
| 209 | static void tap_stop(struct ifnet *, int); |
| 210 | static int tap_init(struct ifnet *); |
| 211 | static int tap_ioctl(struct ifnet *, u_long, void *); |
| 212 | |
| 213 | /* Internal functions */ |
| 214 | static int tap_lifaddr(struct ifnet *, u_long, struct ifaliasreq *); |
| 215 | static void tap_softintr(void *); |
| 216 | |
| 217 | /* |
| 218 | * tap is a clonable interface, although it is highly unrealistic for |
| 219 | * an Ethernet device. |
| 220 | * |
| 221 | * Here are the bits needed for a clonable interface. |
| 222 | */ |
| 223 | static int tap_clone_create(struct if_clone *, int); |
| 224 | static int tap_clone_destroy(struct ifnet *); |
| 225 | |
| 226 | struct if_clone tap_cloners = IF_CLONE_INITIALIZER("tap" , |
| 227 | tap_clone_create, |
| 228 | tap_clone_destroy); |
| 229 | |
| 230 | /* Helper functionis shared by the two cloning code paths */ |
| 231 | static struct tap_softc * tap_clone_creator(int); |
| 232 | int tap_clone_destroyer(device_t); |
| 233 | |
| 234 | static struct sysctllog *tap_sysctl_clog; |
| 235 | |
| 236 | #ifdef _MODULE |
| 237 | devmajor_t tap_bmajor = -1, tap_cmajor = -1; |
| 238 | #endif |
| 239 | |
| 240 | static u_int tap_count; |
| 241 | |
| 242 | void |
| 243 | tapattach(int n) |
| 244 | { |
| 245 | |
| 246 | /* |
| 247 | * Nothing to do here, initialization is handled by the |
| 248 | * module initialization code in tapinit() below). |
| 249 | */ |
| 250 | } |
| 251 | |
| 252 | static void |
| 253 | tapinit(void) |
| 254 | { |
| 255 | int error = config_cfattach_attach(tap_cd.cd_name, &tap_ca); |
| 256 | if (error) { |
| 257 | aprint_error("%s: unable to register cfattach\n" , |
| 258 | tap_cd.cd_name); |
| 259 | (void)config_cfdriver_detach(&tap_cd); |
| 260 | return; |
| 261 | } |
| 262 | |
| 263 | if_clone_attach(&tap_cloners); |
| 264 | sysctl_tap_setup(&tap_sysctl_clog); |
| 265 | #ifdef _MODULE |
| 266 | devsw_attach("tap" , NULL, &tap_bmajor, &tap_cdevsw, &tap_cmajor); |
| 267 | #endif |
| 268 | } |
| 269 | |
| 270 | static int |
| 271 | tapdetach(void) |
| 272 | { |
| 273 | int error = 0; |
| 274 | |
| 275 | if (tap_count != 0) |
| 276 | return EBUSY; |
| 277 | |
| 278 | #ifdef _MODULE |
| 279 | if (error == 0) |
| 280 | error = devsw_detach(NULL, &tap_cdevsw); |
| 281 | #endif |
| 282 | if (error == 0) |
| 283 | sysctl_teardown(&tap_sysctl_clog); |
| 284 | if (error == 0) |
| 285 | if_clone_detach(&tap_cloners); |
| 286 | |
| 287 | if (error == 0) |
| 288 | error = config_cfattach_detach(tap_cd.cd_name, &tap_ca); |
| 289 | |
| 290 | return error; |
| 291 | } |
| 292 | |
| 293 | /* Pretty much useless for a pseudo-device */ |
| 294 | static int |
| 295 | tap_match(device_t parent, cfdata_t cfdata, void *arg) |
| 296 | { |
| 297 | |
| 298 | return (1); |
| 299 | } |
| 300 | |
| 301 | void |
| 302 | tap_attach(device_t parent, device_t self, void *aux) |
| 303 | { |
| 304 | struct tap_softc *sc = device_private(self); |
| 305 | struct ifnet *ifp; |
| 306 | const struct sysctlnode *node; |
| 307 | int error; |
| 308 | uint8_t enaddr[ETHER_ADDR_LEN] = |
| 309 | { 0xf2, 0x0b, 0xa4, 0xff, 0xff, 0xff }; |
| 310 | char enaddrstr[3 * ETHER_ADDR_LEN]; |
| 311 | |
| 312 | sc->sc_dev = self; |
| 313 | sc->sc_sih = NULL; |
| 314 | getnanotime(&sc->sc_btime); |
| 315 | sc->sc_atime = sc->sc_mtime = sc->sc_btime; |
| 316 | sc->sc_flags = 0; |
| 317 | selinit(&sc->sc_rsel); |
| 318 | |
| 319 | /* |
| 320 | * Initialize the two locks for the device. |
| 321 | * |
| 322 | * We need a lock here because even though the tap device can be |
| 323 | * opened only once, the file descriptor might be passed to another |
| 324 | * process, say a fork(2)ed child. |
| 325 | * |
| 326 | * The Giant saves us from most of the hassle, but since the read |
| 327 | * operation can sleep, we don't want two processes to wake up at |
| 328 | * the same moment and both try and dequeue a single packet. |
| 329 | * |
| 330 | * The queue for event listeners (used by kqueue(9), see below) has |
| 331 | * to be protected too, so use a spin lock. |
| 332 | */ |
| 333 | mutex_init(&sc->sc_rdlock, MUTEX_DEFAULT, IPL_NONE); |
| 334 | mutex_init(&sc->sc_kqlock, MUTEX_DEFAULT, IPL_VM); |
| 335 | |
| 336 | if (!pmf_device_register(self, NULL, NULL)) |
| 337 | aprint_error_dev(self, "couldn't establish power handler\n" ); |
| 338 | |
| 339 | /* |
| 340 | * In order to obtain unique initial Ethernet address on a host, |
| 341 | * do some randomisation. It's not meant for anything but avoiding |
| 342 | * hard-coding an address. |
| 343 | */ |
| 344 | cprng_fast(&enaddr[3], 3); |
| 345 | |
| 346 | aprint_verbose_dev(self, "Ethernet address %s\n" , |
| 347 | ether_snprintf(enaddrstr, sizeof(enaddrstr), enaddr)); |
| 348 | |
| 349 | /* |
| 350 | * Why 1000baseT? Why not? You can add more. |
| 351 | * |
| 352 | * Note that there are 3 steps: init, one or several additions to |
| 353 | * list of supported media, and in the end, the selection of one |
| 354 | * of them. |
| 355 | */ |
| 356 | ifmedia_init(&sc->sc_im, 0, tap_mediachange, tap_mediastatus); |
| 357 | ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_1000_T, 0, NULL); |
| 358 | ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_1000_T|IFM_FDX, 0, NULL); |
| 359 | ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_100_TX, 0, NULL); |
| 360 | ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_100_TX|IFM_FDX, 0, NULL); |
| 361 | ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_10_T, 0, NULL); |
| 362 | ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL); |
| 363 | ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_AUTO, 0, NULL); |
| 364 | ifmedia_set(&sc->sc_im, IFM_ETHER|IFM_AUTO); |
| 365 | |
| 366 | /* |
| 367 | * One should note that an interface must do multicast in order |
| 368 | * to support IPv6. |
| 369 | */ |
| 370 | ifp = &sc->sc_ec.ec_if; |
| 371 | strcpy(ifp->if_xname, device_xname(self)); |
| 372 | ifp->if_softc = sc; |
| 373 | ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; |
| 374 | ifp->if_ioctl = tap_ioctl; |
| 375 | ifp->if_start = tap_start; |
| 376 | ifp->if_stop = tap_stop; |
| 377 | ifp->if_init = tap_init; |
| 378 | IFQ_SET_READY(&ifp->if_snd); |
| 379 | |
| 380 | sc->sc_ec.ec_capabilities = ETHERCAP_VLAN_MTU | ETHERCAP_JUMBO_MTU; |
| 381 | |
| 382 | /* Those steps are mandatory for an Ethernet driver. */ |
| 383 | if_initialize(ifp); |
| 384 | ether_ifattach(ifp, enaddr); |
| 385 | if_register(ifp); |
| 386 | |
| 387 | /* |
| 388 | * Add a sysctl node for that interface. |
| 389 | * |
| 390 | * The pointer transmitted is not a string, but instead a pointer to |
| 391 | * the softc structure, which we can use to build the string value on |
| 392 | * the fly in the helper function of the node. See the comments for |
| 393 | * tap_sysctl_handler for details. |
| 394 | * |
| 395 | * Usually sysctl_createv is called with CTL_CREATE as the before-last |
| 396 | * component. However, we can allocate a number ourselves, as we are |
| 397 | * the only consumer of the net.link.<iface> node. In this case, the |
| 398 | * unit number is conveniently used to number the node. CTL_CREATE |
| 399 | * would just work, too. |
| 400 | */ |
| 401 | if ((error = sysctl_createv(NULL, 0, NULL, |
| 402 | &node, CTLFLAG_READWRITE, |
| 403 | CTLTYPE_STRING, device_xname(self), NULL, |
| 404 | tap_sysctl_handler, 0, (void *)sc, 18, |
| 405 | CTL_NET, AF_LINK, tap_node, device_unit(sc->sc_dev), |
| 406 | CTL_EOL)) != 0) |
| 407 | aprint_error_dev(self, "sysctl_createv returned %d, ignoring\n" , |
| 408 | error); |
| 409 | } |
| 410 | |
| 411 | /* |
| 412 | * When detaching, we do the inverse of what is done in the attach |
| 413 | * routine, in reversed order. |
| 414 | */ |
| 415 | static int |
| 416 | tap_detach(device_t self, int flags) |
| 417 | { |
| 418 | struct tap_softc *sc = device_private(self); |
| 419 | struct ifnet *ifp = &sc->sc_ec.ec_if; |
| 420 | int error; |
| 421 | int s; |
| 422 | |
| 423 | sc->sc_flags |= TAP_GOING; |
| 424 | s = splnet(); |
| 425 | tap_stop(ifp, 1); |
| 426 | if_down(ifp); |
| 427 | splx(s); |
| 428 | |
| 429 | if (sc->sc_sih != NULL) { |
| 430 | softint_disestablish(sc->sc_sih); |
| 431 | sc->sc_sih = NULL; |
| 432 | } |
| 433 | |
| 434 | /* |
| 435 | * Destroying a single leaf is a very straightforward operation using |
| 436 | * sysctl_destroyv. One should be sure to always end the path with |
| 437 | * CTL_EOL. |
| 438 | */ |
| 439 | if ((error = sysctl_destroyv(NULL, CTL_NET, AF_LINK, tap_node, |
| 440 | device_unit(sc->sc_dev), CTL_EOL)) != 0) |
| 441 | aprint_error_dev(self, |
| 442 | "sysctl_destroyv returned %d, ignoring\n" , error); |
| 443 | ether_ifdetach(ifp); |
| 444 | if_detach(ifp); |
| 445 | ifmedia_delete_instance(&sc->sc_im, IFM_INST_ANY); |
| 446 | seldestroy(&sc->sc_rsel); |
| 447 | mutex_destroy(&sc->sc_rdlock); |
| 448 | mutex_destroy(&sc->sc_kqlock); |
| 449 | |
| 450 | pmf_device_deregister(self); |
| 451 | |
| 452 | return (0); |
| 453 | } |
| 454 | |
| 455 | /* |
| 456 | * This function is called by the ifmedia layer to notify the driver |
| 457 | * that the user requested a media change. A real driver would |
| 458 | * reconfigure the hardware. |
| 459 | */ |
| 460 | static int |
| 461 | tap_mediachange(struct ifnet *ifp) |
| 462 | { |
| 463 | return (0); |
| 464 | } |
| 465 | |
| 466 | /* |
| 467 | * Here the user asks for the currently used media. |
| 468 | */ |
| 469 | static void |
| 470 | tap_mediastatus(struct ifnet *ifp, struct ifmediareq *imr) |
| 471 | { |
| 472 | struct tap_softc *sc = (struct tap_softc *)ifp->if_softc; |
| 473 | imr->ifm_active = sc->sc_im.ifm_cur->ifm_media; |
| 474 | } |
| 475 | |
| 476 | /* |
| 477 | * This is the function where we SEND packets. |
| 478 | * |
| 479 | * There is no 'receive' equivalent. A typical driver will get |
| 480 | * interrupts from the hardware, and from there will inject new packets |
| 481 | * into the network stack. |
| 482 | * |
| 483 | * Once handled, a packet must be freed. A real driver might not be able |
| 484 | * to fit all the pending packets into the hardware, and is allowed to |
| 485 | * return before having sent all the packets. It should then use the |
| 486 | * if_flags flag IFF_OACTIVE to notify the upper layer. |
| 487 | * |
| 488 | * There are also other flags one should check, such as IFF_PAUSE. |
| 489 | * |
| 490 | * It is our duty to make packets available to BPF listeners. |
| 491 | * |
| 492 | * You should be aware that this function is called by the Ethernet layer |
| 493 | * at splnet(). |
| 494 | * |
| 495 | * When the device is opened, we have to pass the packet(s) to the |
| 496 | * userland. For that we stay in OACTIVE mode while the userland gets |
| 497 | * the packets, and we send a signal to the processes waiting to read. |
| 498 | * |
| 499 | * wakeup(sc) is the counterpart to the tsleep call in |
| 500 | * tap_dev_read, while selnotify() is used for kevent(2) and |
| 501 | * poll(2) (which includes select(2)) listeners. |
| 502 | */ |
| 503 | static void |
| 504 | tap_start(struct ifnet *ifp) |
| 505 | { |
| 506 | struct tap_softc *sc = (struct tap_softc *)ifp->if_softc; |
| 507 | struct mbuf *m0; |
| 508 | |
| 509 | if ((sc->sc_flags & TAP_INUSE) == 0) { |
| 510 | /* Simply drop packets */ |
| 511 | for(;;) { |
| 512 | IFQ_DEQUEUE(&ifp->if_snd, m0); |
| 513 | if (m0 == NULL) |
| 514 | return; |
| 515 | |
| 516 | ifp->if_opackets++; |
| 517 | bpf_mtap(ifp, m0); |
| 518 | |
| 519 | m_freem(m0); |
| 520 | } |
| 521 | } else if (!IFQ_IS_EMPTY(&ifp->if_snd)) { |
| 522 | ifp->if_flags |= IFF_OACTIVE; |
| 523 | wakeup(sc); |
| 524 | selnotify(&sc->sc_rsel, 0, 1); |
| 525 | if (sc->sc_flags & TAP_ASYNCIO) |
| 526 | softint_schedule(sc->sc_sih); |
| 527 | } |
| 528 | } |
| 529 | |
| 530 | static void |
| 531 | tap_softintr(void *cookie) |
| 532 | { |
| 533 | struct tap_softc *sc; |
| 534 | struct ifnet *ifp; |
| 535 | int a, b; |
| 536 | |
| 537 | sc = cookie; |
| 538 | |
| 539 | if (sc->sc_flags & TAP_ASYNCIO) { |
| 540 | ifp = &sc->sc_ec.ec_if; |
| 541 | if (ifp->if_flags & IFF_RUNNING) { |
| 542 | a = POLL_IN; |
| 543 | b = POLLIN|POLLRDNORM; |
| 544 | } else { |
| 545 | a = POLL_HUP; |
| 546 | b = 0; |
| 547 | } |
| 548 | fownsignal(sc->sc_pgid, SIGIO, a, b, NULL); |
| 549 | } |
| 550 | } |
| 551 | |
| 552 | /* |
| 553 | * A typical driver will only contain the following handlers for |
| 554 | * ioctl calls, except SIOCSIFPHYADDR. |
| 555 | * The latter is a hack I used to set the Ethernet address of the |
| 556 | * faked device. |
| 557 | * |
| 558 | * Note that both ifmedia_ioctl() and ether_ioctl() have to be |
| 559 | * called under splnet(). |
| 560 | */ |
| 561 | static int |
| 562 | tap_ioctl(struct ifnet *ifp, u_long cmd, void *data) |
| 563 | { |
| 564 | struct tap_softc *sc = (struct tap_softc *)ifp->if_softc; |
| 565 | struct ifreq *ifr = (struct ifreq *)data; |
| 566 | int s, error; |
| 567 | |
| 568 | s = splnet(); |
| 569 | |
| 570 | switch (cmd) { |
| 571 | #ifdef OSIOCSIFMEDIA |
| 572 | case OSIOCSIFMEDIA: |
| 573 | #endif |
| 574 | case SIOCSIFMEDIA: |
| 575 | case SIOCGIFMEDIA: |
| 576 | error = ifmedia_ioctl(ifp, ifr, &sc->sc_im, cmd); |
| 577 | break; |
| 578 | case SIOCSIFPHYADDR: |
| 579 | error = tap_lifaddr(ifp, cmd, (struct ifaliasreq *)data); |
| 580 | break; |
| 581 | default: |
| 582 | error = ether_ioctl(ifp, cmd, data); |
| 583 | if (error == ENETRESET) |
| 584 | error = 0; |
| 585 | break; |
| 586 | } |
| 587 | |
| 588 | splx(s); |
| 589 | |
| 590 | return (error); |
| 591 | } |
| 592 | |
| 593 | /* |
| 594 | * Helper function to set Ethernet address. This has been replaced by |
| 595 | * the generic SIOCALIFADDR ioctl on a PF_LINK socket. |
| 596 | */ |
| 597 | static int |
| 598 | tap_lifaddr(struct ifnet *ifp, u_long cmd, struct ifaliasreq *ifra) |
| 599 | { |
| 600 | const struct sockaddr *sa = &ifra->ifra_addr; |
| 601 | |
| 602 | if (sa->sa_family != AF_LINK) |
| 603 | return (EINVAL); |
| 604 | |
| 605 | if_set_sadl(ifp, sa->sa_data, ETHER_ADDR_LEN, false); |
| 606 | |
| 607 | return (0); |
| 608 | } |
| 609 | |
| 610 | /* |
| 611 | * _init() would typically be called when an interface goes up, |
| 612 | * meaning it should configure itself into the state in which it |
| 613 | * can send packets. |
| 614 | */ |
| 615 | static int |
| 616 | tap_init(struct ifnet *ifp) |
| 617 | { |
| 618 | ifp->if_flags |= IFF_RUNNING; |
| 619 | |
| 620 | tap_start(ifp); |
| 621 | |
| 622 | return (0); |
| 623 | } |
| 624 | |
| 625 | /* |
| 626 | * _stop() is called when an interface goes down. It is our |
| 627 | * responsability to validate that state by clearing the |
| 628 | * IFF_RUNNING flag. |
| 629 | * |
| 630 | * We have to wake up all the sleeping processes to have the pending |
| 631 | * read requests cancelled. |
| 632 | */ |
| 633 | static void |
| 634 | tap_stop(struct ifnet *ifp, int disable) |
| 635 | { |
| 636 | struct tap_softc *sc = (struct tap_softc *)ifp->if_softc; |
| 637 | |
| 638 | ifp->if_flags &= ~IFF_RUNNING; |
| 639 | wakeup(sc); |
| 640 | selnotify(&sc->sc_rsel, 0, 1); |
| 641 | if (sc->sc_flags & TAP_ASYNCIO) |
| 642 | softint_schedule(sc->sc_sih); |
| 643 | } |
| 644 | |
| 645 | /* |
| 646 | * The 'create' command of ifconfig can be used to create |
| 647 | * any numbered instance of a given device. Thus we have to |
| 648 | * make sure we have enough room in cd_devs to create the |
| 649 | * user-specified instance. config_attach_pseudo will do this |
| 650 | * for us. |
| 651 | */ |
| 652 | static int |
| 653 | tap_clone_create(struct if_clone *ifc, int unit) |
| 654 | { |
| 655 | if (tap_clone_creator(unit) == NULL) { |
| 656 | aprint_error("%s%d: unable to attach an instance\n" , |
| 657 | tap_cd.cd_name, unit); |
| 658 | return (ENXIO); |
| 659 | } |
| 660 | atomic_inc_uint(&tap_count); |
| 661 | return (0); |
| 662 | } |
| 663 | |
| 664 | /* |
| 665 | * tap(4) can be cloned by two ways: |
| 666 | * using 'ifconfig tap0 create', which will use the network |
| 667 | * interface cloning API, and call tap_clone_create above. |
| 668 | * opening the cloning device node, whose minor number is TAP_CLONER. |
| 669 | * See below for an explanation on how this part work. |
| 670 | */ |
| 671 | static struct tap_softc * |
| 672 | tap_clone_creator(int unit) |
| 673 | { |
| 674 | struct cfdata *cf; |
| 675 | |
| 676 | cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); |
| 677 | cf->cf_name = tap_cd.cd_name; |
| 678 | cf->cf_atname = tap_ca.ca_name; |
| 679 | if (unit == -1) { |
| 680 | /* let autoconf find the first free one */ |
| 681 | cf->cf_unit = 0; |
| 682 | cf->cf_fstate = FSTATE_STAR; |
| 683 | } else { |
| 684 | cf->cf_unit = unit; |
| 685 | cf->cf_fstate = FSTATE_NOTFOUND; |
| 686 | } |
| 687 | |
| 688 | return device_private(config_attach_pseudo(cf)); |
| 689 | } |
| 690 | |
| 691 | /* |
| 692 | * The clean design of if_clone and autoconf(9) makes that part |
| 693 | * really straightforward. The second argument of config_detach |
| 694 | * means neither QUIET nor FORCED. |
| 695 | */ |
| 696 | static int |
| 697 | tap_clone_destroy(struct ifnet *ifp) |
| 698 | { |
| 699 | struct tap_softc *sc = ifp->if_softc; |
| 700 | int error = tap_clone_destroyer(sc->sc_dev); |
| 701 | |
| 702 | if (error == 0) |
| 703 | atomic_dec_uint(&tap_count); |
| 704 | return error; |
| 705 | } |
| 706 | |
| 707 | int |
| 708 | tap_clone_destroyer(device_t dev) |
| 709 | { |
| 710 | cfdata_t cf = device_cfdata(dev); |
| 711 | int error; |
| 712 | |
| 713 | if ((error = config_detach(dev, 0)) != 0) |
| 714 | aprint_error_dev(dev, "unable to detach instance\n" ); |
| 715 | free(cf, M_DEVBUF); |
| 716 | |
| 717 | return (error); |
| 718 | } |
| 719 | |
| 720 | /* |
| 721 | * tap(4) is a bit of an hybrid device. It can be used in two different |
| 722 | * ways: |
| 723 | * 1. ifconfig tapN create, then use /dev/tapN to read/write off it. |
| 724 | * 2. open /dev/tap, get a new interface created and read/write off it. |
| 725 | * That interface is destroyed when the process that had it created exits. |
| 726 | * |
| 727 | * The first way is managed by the cdevsw structure, and you access interfaces |
| 728 | * through a (major, minor) mapping: tap4 is obtained by the minor number |
| 729 | * 4. The entry points for the cdevsw interface are prefixed by tap_cdev_. |
| 730 | * |
| 731 | * The second way is the so-called "cloning" device. It's a special minor |
| 732 | * number (chosen as the maximal number, to allow as much tap devices as |
| 733 | * possible). The user first opens the cloner (e.g., /dev/tap), and that |
| 734 | * call ends in tap_cdev_open. The actual place where it is handled is |
| 735 | * tap_dev_cloner. |
| 736 | * |
| 737 | * An tap device cannot be opened more than once at a time, so the cdevsw |
| 738 | * part of open() does nothing but noting that the interface is being used and |
| 739 | * hence ready to actually handle packets. |
| 740 | */ |
| 741 | |
| 742 | static int |
| 743 | tap_cdev_open(dev_t dev, int flags, int fmt, struct lwp *l) |
| 744 | { |
| 745 | struct tap_softc *sc; |
| 746 | |
| 747 | if (minor(dev) == TAP_CLONER) |
| 748 | return tap_dev_cloner(l); |
| 749 | |
| 750 | sc = device_lookup_private(&tap_cd, minor(dev)); |
| 751 | if (sc == NULL) |
| 752 | return (ENXIO); |
| 753 | |
| 754 | /* The device can only be opened once */ |
| 755 | if (sc->sc_flags & TAP_INUSE) |
| 756 | return (EBUSY); |
| 757 | sc->sc_flags |= TAP_INUSE; |
| 758 | return (0); |
| 759 | } |
| 760 | |
| 761 | /* |
| 762 | * There are several kinds of cloning devices, and the most simple is the one |
| 763 | * tap(4) uses. What it does is change the file descriptor with a new one, |
| 764 | * with its own fileops structure (which maps to the various read, write, |
| 765 | * ioctl functions). It starts allocating a new file descriptor with falloc, |
| 766 | * then actually creates the new tap devices. |
| 767 | * |
| 768 | * Once those two steps are successful, we can re-wire the existing file |
| 769 | * descriptor to its new self. This is done with fdclone(): it fills the fp |
| 770 | * structure as needed (notably f_devunit gets filled with the fifth parameter |
| 771 | * passed, the unit of the tap device which will allows us identifying the |
| 772 | * device later), and returns EMOVEFD. |
| 773 | * |
| 774 | * That magic value is interpreted by sys_open() which then replaces the |
| 775 | * current file descriptor by the new one (through a magic member of struct |
| 776 | * lwp, l_dupfd). |
| 777 | * |
| 778 | * The tap device is flagged as being busy since it otherwise could be |
| 779 | * externally accessed through the corresponding device node with the cdevsw |
| 780 | * interface. |
| 781 | */ |
| 782 | |
| 783 | static int |
| 784 | tap_dev_cloner(struct lwp *l) |
| 785 | { |
| 786 | struct tap_softc *sc; |
| 787 | file_t *fp; |
| 788 | int error, fd; |
| 789 | |
| 790 | if ((error = fd_allocfile(&fp, &fd)) != 0) |
| 791 | return (error); |
| 792 | |
| 793 | if ((sc = tap_clone_creator(-1)) == NULL) { |
| 794 | fd_abort(curproc, fp, fd); |
| 795 | return (ENXIO); |
| 796 | } |
| 797 | |
| 798 | sc->sc_flags |= TAP_INUSE; |
| 799 | |
| 800 | return fd_clone(fp, fd, FREAD|FWRITE, &tap_fileops, |
| 801 | (void *)(intptr_t)device_unit(sc->sc_dev)); |
| 802 | } |
| 803 | |
| 804 | /* |
| 805 | * While all other operations (read, write, ioctl, poll and kqfilter) are |
| 806 | * really the same whether we are in cdevsw or fileops mode, the close() |
| 807 | * function is slightly different in the two cases. |
| 808 | * |
| 809 | * As for the other, the core of it is shared in tap_dev_close. What |
| 810 | * it does is sufficient for the cdevsw interface, but the cloning interface |
| 811 | * needs another thing: the interface is destroyed when the processes that |
| 812 | * created it closes it. |
| 813 | */ |
| 814 | static int |
| 815 | tap_cdev_close(dev_t dev, int flags, int fmt, |
| 816 | struct lwp *l) |
| 817 | { |
| 818 | struct tap_softc *sc = |
| 819 | device_lookup_private(&tap_cd, minor(dev)); |
| 820 | |
| 821 | if (sc == NULL) |
| 822 | return (ENXIO); |
| 823 | |
| 824 | return tap_dev_close(sc); |
| 825 | } |
| 826 | |
| 827 | /* |
| 828 | * It might happen that the administrator used ifconfig to externally destroy |
| 829 | * the interface. In that case, tap_fops_close will be called while |
| 830 | * tap_detach is already happening. If we called it again from here, we |
| 831 | * would dead lock. TAP_GOING ensures that this situation doesn't happen. |
| 832 | */ |
| 833 | static int |
| 834 | tap_fops_close(file_t *fp) |
| 835 | { |
| 836 | int unit = fp->f_devunit; |
| 837 | struct tap_softc *sc; |
| 838 | int error; |
| 839 | |
| 840 | sc = device_lookup_private(&tap_cd, unit); |
| 841 | if (sc == NULL) |
| 842 | return (ENXIO); |
| 843 | |
| 844 | /* tap_dev_close currently always succeeds, but it might not |
| 845 | * always be the case. */ |
| 846 | KERNEL_LOCK(1, NULL); |
| 847 | if ((error = tap_dev_close(sc)) != 0) { |
| 848 | KERNEL_UNLOCK_ONE(NULL); |
| 849 | return (error); |
| 850 | } |
| 851 | |
| 852 | /* Destroy the device now that it is no longer useful, |
| 853 | * unless it's already being destroyed. */ |
| 854 | if ((sc->sc_flags & TAP_GOING) != 0) { |
| 855 | KERNEL_UNLOCK_ONE(NULL); |
| 856 | return (0); |
| 857 | } |
| 858 | |
| 859 | error = tap_clone_destroyer(sc->sc_dev); |
| 860 | KERNEL_UNLOCK_ONE(NULL); |
| 861 | return error; |
| 862 | } |
| 863 | |
| 864 | static int |
| 865 | tap_dev_close(struct tap_softc *sc) |
| 866 | { |
| 867 | struct ifnet *ifp; |
| 868 | int s; |
| 869 | |
| 870 | s = splnet(); |
| 871 | /* Let tap_start handle packets again */ |
| 872 | ifp = &sc->sc_ec.ec_if; |
| 873 | ifp->if_flags &= ~IFF_OACTIVE; |
| 874 | |
| 875 | /* Purge output queue */ |
| 876 | if (!(IFQ_IS_EMPTY(&ifp->if_snd))) { |
| 877 | struct mbuf *m; |
| 878 | |
| 879 | for (;;) { |
| 880 | IFQ_DEQUEUE(&ifp->if_snd, m); |
| 881 | if (m == NULL) |
| 882 | break; |
| 883 | |
| 884 | ifp->if_opackets++; |
| 885 | bpf_mtap(ifp, m); |
| 886 | m_freem(m); |
| 887 | } |
| 888 | } |
| 889 | splx(s); |
| 890 | |
| 891 | if (sc->sc_sih != NULL) { |
| 892 | softint_disestablish(sc->sc_sih); |
| 893 | sc->sc_sih = NULL; |
| 894 | } |
| 895 | sc->sc_flags &= ~(TAP_INUSE | TAP_ASYNCIO); |
| 896 | |
| 897 | return (0); |
| 898 | } |
| 899 | |
| 900 | static int |
| 901 | tap_cdev_read(dev_t dev, struct uio *uio, int flags) |
| 902 | { |
| 903 | return tap_dev_read(minor(dev), uio, flags); |
| 904 | } |
| 905 | |
| 906 | static int |
| 907 | tap_fops_read(file_t *fp, off_t *offp, struct uio *uio, |
| 908 | kauth_cred_t cred, int flags) |
| 909 | { |
| 910 | int error; |
| 911 | |
| 912 | KERNEL_LOCK(1, NULL); |
| 913 | error = tap_dev_read(fp->f_devunit, uio, flags); |
| 914 | KERNEL_UNLOCK_ONE(NULL); |
| 915 | return error; |
| 916 | } |
| 917 | |
| 918 | static int |
| 919 | tap_dev_read(int unit, struct uio *uio, int flags) |
| 920 | { |
| 921 | struct tap_softc *sc = device_lookup_private(&tap_cd, unit); |
| 922 | struct ifnet *ifp; |
| 923 | struct mbuf *m, *n; |
| 924 | int error = 0, s; |
| 925 | |
| 926 | if (sc == NULL) |
| 927 | return (ENXIO); |
| 928 | |
| 929 | getnanotime(&sc->sc_atime); |
| 930 | |
| 931 | ifp = &sc->sc_ec.ec_if; |
| 932 | if ((ifp->if_flags & IFF_UP) == 0) |
| 933 | return (EHOSTDOWN); |
| 934 | |
| 935 | /* |
| 936 | * In the TAP_NBIO case, we have to make sure we won't be sleeping |
| 937 | */ |
| 938 | if ((sc->sc_flags & TAP_NBIO) != 0) { |
| 939 | if (!mutex_tryenter(&sc->sc_rdlock)) |
| 940 | return (EWOULDBLOCK); |
| 941 | } else { |
| 942 | mutex_enter(&sc->sc_rdlock); |
| 943 | } |
| 944 | |
| 945 | s = splnet(); |
| 946 | if (IFQ_IS_EMPTY(&ifp->if_snd)) { |
| 947 | ifp->if_flags &= ~IFF_OACTIVE; |
| 948 | /* |
| 949 | * We must release the lock before sleeping, and re-acquire it |
| 950 | * after. |
| 951 | */ |
| 952 | mutex_exit(&sc->sc_rdlock); |
| 953 | if (sc->sc_flags & TAP_NBIO) |
| 954 | error = EWOULDBLOCK; |
| 955 | else |
| 956 | error = tsleep(sc, PSOCK|PCATCH, "tap" , 0); |
| 957 | splx(s); |
| 958 | |
| 959 | if (error != 0) |
| 960 | return (error); |
| 961 | /* The device might have been downed */ |
| 962 | if ((ifp->if_flags & IFF_UP) == 0) |
| 963 | return (EHOSTDOWN); |
| 964 | if ((sc->sc_flags & TAP_NBIO)) { |
| 965 | if (!mutex_tryenter(&sc->sc_rdlock)) |
| 966 | return (EWOULDBLOCK); |
| 967 | } else { |
| 968 | mutex_enter(&sc->sc_rdlock); |
| 969 | } |
| 970 | s = splnet(); |
| 971 | } |
| 972 | |
| 973 | IFQ_DEQUEUE(&ifp->if_snd, m); |
| 974 | ifp->if_flags &= ~IFF_OACTIVE; |
| 975 | splx(s); |
| 976 | if (m == NULL) { |
| 977 | error = 0; |
| 978 | goto out; |
| 979 | } |
| 980 | |
| 981 | ifp->if_opackets++; |
| 982 | bpf_mtap(ifp, m); |
| 983 | |
| 984 | /* |
| 985 | * One read is one packet. |
| 986 | */ |
| 987 | do { |
| 988 | error = uiomove(mtod(m, void *), |
| 989 | min(m->m_len, uio->uio_resid), uio); |
| 990 | m = n = m_free(m); |
| 991 | } while (m != NULL && uio->uio_resid > 0 && error == 0); |
| 992 | |
| 993 | if (m != NULL) |
| 994 | m_freem(m); |
| 995 | |
| 996 | out: |
| 997 | mutex_exit(&sc->sc_rdlock); |
| 998 | return (error); |
| 999 | } |
| 1000 | |
| 1001 | static int |
| 1002 | tap_fops_stat(file_t *fp, struct stat *st) |
| 1003 | { |
| 1004 | int error = 0; |
| 1005 | struct tap_softc *sc; |
| 1006 | int unit = fp->f_devunit; |
| 1007 | |
| 1008 | (void)memset(st, 0, sizeof(*st)); |
| 1009 | |
| 1010 | KERNEL_LOCK(1, NULL); |
| 1011 | sc = device_lookup_private(&tap_cd, unit); |
| 1012 | if (sc == NULL) { |
| 1013 | error = ENXIO; |
| 1014 | goto out; |
| 1015 | } |
| 1016 | |
| 1017 | st->st_dev = makedev(cdevsw_lookup_major(&tap_cdevsw), unit); |
| 1018 | st->st_atimespec = sc->sc_atime; |
| 1019 | st->st_mtimespec = sc->sc_mtime; |
| 1020 | st->st_ctimespec = st->st_birthtimespec = sc->sc_btime; |
| 1021 | st->st_uid = kauth_cred_geteuid(fp->f_cred); |
| 1022 | st->st_gid = kauth_cred_getegid(fp->f_cred); |
| 1023 | out: |
| 1024 | KERNEL_UNLOCK_ONE(NULL); |
| 1025 | return error; |
| 1026 | } |
| 1027 | |
| 1028 | static int |
| 1029 | tap_cdev_write(dev_t dev, struct uio *uio, int flags) |
| 1030 | { |
| 1031 | return tap_dev_write(minor(dev), uio, flags); |
| 1032 | } |
| 1033 | |
| 1034 | static int |
| 1035 | tap_fops_write(file_t *fp, off_t *offp, struct uio *uio, |
| 1036 | kauth_cred_t cred, int flags) |
| 1037 | { |
| 1038 | int error; |
| 1039 | |
| 1040 | KERNEL_LOCK(1, NULL); |
| 1041 | error = tap_dev_write(fp->f_devunit, uio, flags); |
| 1042 | KERNEL_UNLOCK_ONE(NULL); |
| 1043 | return error; |
| 1044 | } |
| 1045 | |
| 1046 | static int |
| 1047 | tap_dev_write(int unit, struct uio *uio, int flags) |
| 1048 | { |
| 1049 | struct tap_softc *sc = |
| 1050 | device_lookup_private(&tap_cd, unit); |
| 1051 | struct ifnet *ifp; |
| 1052 | struct mbuf *m, **mp; |
| 1053 | int error = 0; |
| 1054 | int s; |
| 1055 | |
| 1056 | if (sc == NULL) |
| 1057 | return (ENXIO); |
| 1058 | |
| 1059 | getnanotime(&sc->sc_mtime); |
| 1060 | ifp = &sc->sc_ec.ec_if; |
| 1061 | |
| 1062 | /* One write, one packet, that's the rule */ |
| 1063 | MGETHDR(m, M_DONTWAIT, MT_DATA); |
| 1064 | if (m == NULL) { |
| 1065 | ifp->if_ierrors++; |
| 1066 | return (ENOBUFS); |
| 1067 | } |
| 1068 | m->m_pkthdr.len = uio->uio_resid; |
| 1069 | |
| 1070 | mp = &m; |
| 1071 | while (error == 0 && uio->uio_resid > 0) { |
| 1072 | if (*mp != m) { |
| 1073 | MGET(*mp, M_DONTWAIT, MT_DATA); |
| 1074 | if (*mp == NULL) { |
| 1075 | error = ENOBUFS; |
| 1076 | break; |
| 1077 | } |
| 1078 | } |
| 1079 | (*mp)->m_len = min(MHLEN, uio->uio_resid); |
| 1080 | error = uiomove(mtod(*mp, void *), (*mp)->m_len, uio); |
| 1081 | mp = &(*mp)->m_next; |
| 1082 | } |
| 1083 | if (error) { |
| 1084 | ifp->if_ierrors++; |
| 1085 | m_freem(m); |
| 1086 | return (error); |
| 1087 | } |
| 1088 | |
| 1089 | ifp->if_ipackets++; |
| 1090 | m_set_rcvif(m, ifp); |
| 1091 | |
| 1092 | bpf_mtap(ifp, m); |
| 1093 | s = splnet(); |
| 1094 | if_input(ifp, m); |
| 1095 | splx(s); |
| 1096 | |
| 1097 | return (0); |
| 1098 | } |
| 1099 | |
| 1100 | static int |
| 1101 | tap_cdev_ioctl(dev_t dev, u_long cmd, void *data, int flags, |
| 1102 | struct lwp *l) |
| 1103 | { |
| 1104 | return tap_dev_ioctl(minor(dev), cmd, data, l); |
| 1105 | } |
| 1106 | |
| 1107 | static int |
| 1108 | tap_fops_ioctl(file_t *fp, u_long cmd, void *data) |
| 1109 | { |
| 1110 | return tap_dev_ioctl(fp->f_devunit, cmd, data, curlwp); |
| 1111 | } |
| 1112 | |
| 1113 | static int |
| 1114 | tap_dev_ioctl(int unit, u_long cmd, void *data, struct lwp *l) |
| 1115 | { |
| 1116 | struct tap_softc *sc = device_lookup_private(&tap_cd, unit); |
| 1117 | |
| 1118 | if (sc == NULL) |
| 1119 | return ENXIO; |
| 1120 | |
| 1121 | switch (cmd) { |
| 1122 | case FIONREAD: |
| 1123 | { |
| 1124 | struct ifnet *ifp = &sc->sc_ec.ec_if; |
| 1125 | struct mbuf *m; |
| 1126 | int s; |
| 1127 | |
| 1128 | s = splnet(); |
| 1129 | IFQ_POLL(&ifp->if_snd, m); |
| 1130 | |
| 1131 | if (m == NULL) |
| 1132 | *(int *)data = 0; |
| 1133 | else |
| 1134 | *(int *)data = m->m_pkthdr.len; |
| 1135 | splx(s); |
| 1136 | return 0; |
| 1137 | } |
| 1138 | case TIOCSPGRP: |
| 1139 | case FIOSETOWN: |
| 1140 | return fsetown(&sc->sc_pgid, cmd, data); |
| 1141 | case TIOCGPGRP: |
| 1142 | case FIOGETOWN: |
| 1143 | return fgetown(sc->sc_pgid, cmd, data); |
| 1144 | case FIOASYNC: |
| 1145 | if (*(int *)data) { |
| 1146 | if (sc->sc_sih == NULL) { |
| 1147 | sc->sc_sih = softint_establish(SOFTINT_CLOCK, |
| 1148 | tap_softintr, sc); |
| 1149 | if (sc->sc_sih == NULL) |
| 1150 | return EBUSY; /* XXX */ |
| 1151 | } |
| 1152 | sc->sc_flags |= TAP_ASYNCIO; |
| 1153 | } else { |
| 1154 | sc->sc_flags &= ~TAP_ASYNCIO; |
| 1155 | if (sc->sc_sih != NULL) { |
| 1156 | softint_disestablish(sc->sc_sih); |
| 1157 | sc->sc_sih = NULL; |
| 1158 | } |
| 1159 | } |
| 1160 | return 0; |
| 1161 | case FIONBIO: |
| 1162 | if (*(int *)data) |
| 1163 | sc->sc_flags |= TAP_NBIO; |
| 1164 | else |
| 1165 | sc->sc_flags &= ~TAP_NBIO; |
| 1166 | return 0; |
| 1167 | #ifdef OTAPGIFNAME |
| 1168 | case OTAPGIFNAME: |
| 1169 | #endif |
| 1170 | case TAPGIFNAME: |
| 1171 | { |
| 1172 | struct ifreq *ifr = (struct ifreq *)data; |
| 1173 | struct ifnet *ifp = &sc->sc_ec.ec_if; |
| 1174 | |
| 1175 | strlcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); |
| 1176 | return 0; |
| 1177 | } |
| 1178 | default: |
| 1179 | return ENOTTY; |
| 1180 | } |
| 1181 | } |
| 1182 | |
| 1183 | static int |
| 1184 | tap_cdev_poll(dev_t dev, int events, struct lwp *l) |
| 1185 | { |
| 1186 | return tap_dev_poll(minor(dev), events, l); |
| 1187 | } |
| 1188 | |
| 1189 | static int |
| 1190 | tap_fops_poll(file_t *fp, int events) |
| 1191 | { |
| 1192 | return tap_dev_poll(fp->f_devunit, events, curlwp); |
| 1193 | } |
| 1194 | |
| 1195 | static int |
| 1196 | tap_dev_poll(int unit, int events, struct lwp *l) |
| 1197 | { |
| 1198 | struct tap_softc *sc = |
| 1199 | device_lookup_private(&tap_cd, unit); |
| 1200 | int revents = 0; |
| 1201 | |
| 1202 | if (sc == NULL) |
| 1203 | return POLLERR; |
| 1204 | |
| 1205 | if (events & (POLLIN|POLLRDNORM)) { |
| 1206 | struct ifnet *ifp = &sc->sc_ec.ec_if; |
| 1207 | struct mbuf *m; |
| 1208 | int s; |
| 1209 | |
| 1210 | s = splnet(); |
| 1211 | IFQ_POLL(&ifp->if_snd, m); |
| 1212 | |
| 1213 | if (m != NULL) |
| 1214 | revents |= events & (POLLIN|POLLRDNORM); |
| 1215 | else { |
| 1216 | mutex_spin_enter(&sc->sc_kqlock); |
| 1217 | selrecord(l, &sc->sc_rsel); |
| 1218 | mutex_spin_exit(&sc->sc_kqlock); |
| 1219 | } |
| 1220 | splx(s); |
| 1221 | } |
| 1222 | revents |= events & (POLLOUT|POLLWRNORM); |
| 1223 | |
| 1224 | return (revents); |
| 1225 | } |
| 1226 | |
| 1227 | static struct filterops tap_read_filterops = { 1, NULL, tap_kqdetach, |
| 1228 | tap_kqread }; |
| 1229 | static struct filterops tap_seltrue_filterops = { 1, NULL, tap_kqdetach, |
| 1230 | filt_seltrue }; |
| 1231 | |
| 1232 | static int |
| 1233 | tap_cdev_kqfilter(dev_t dev, struct knote *kn) |
| 1234 | { |
| 1235 | return tap_dev_kqfilter(minor(dev), kn); |
| 1236 | } |
| 1237 | |
| 1238 | static int |
| 1239 | tap_fops_kqfilter(file_t *fp, struct knote *kn) |
| 1240 | { |
| 1241 | return tap_dev_kqfilter(fp->f_devunit, kn); |
| 1242 | } |
| 1243 | |
| 1244 | static int |
| 1245 | tap_dev_kqfilter(int unit, struct knote *kn) |
| 1246 | { |
| 1247 | struct tap_softc *sc = |
| 1248 | device_lookup_private(&tap_cd, unit); |
| 1249 | |
| 1250 | if (sc == NULL) |
| 1251 | return (ENXIO); |
| 1252 | |
| 1253 | KERNEL_LOCK(1, NULL); |
| 1254 | switch(kn->kn_filter) { |
| 1255 | case EVFILT_READ: |
| 1256 | kn->kn_fop = &tap_read_filterops; |
| 1257 | break; |
| 1258 | case EVFILT_WRITE: |
| 1259 | kn->kn_fop = &tap_seltrue_filterops; |
| 1260 | break; |
| 1261 | default: |
| 1262 | KERNEL_UNLOCK_ONE(NULL); |
| 1263 | return (EINVAL); |
| 1264 | } |
| 1265 | |
| 1266 | kn->kn_hook = sc; |
| 1267 | mutex_spin_enter(&sc->sc_kqlock); |
| 1268 | SLIST_INSERT_HEAD(&sc->sc_rsel.sel_klist, kn, kn_selnext); |
| 1269 | mutex_spin_exit(&sc->sc_kqlock); |
| 1270 | KERNEL_UNLOCK_ONE(NULL); |
| 1271 | return (0); |
| 1272 | } |
| 1273 | |
| 1274 | static void |
| 1275 | tap_kqdetach(struct knote *kn) |
| 1276 | { |
| 1277 | struct tap_softc *sc = (struct tap_softc *)kn->kn_hook; |
| 1278 | |
| 1279 | KERNEL_LOCK(1, NULL); |
| 1280 | mutex_spin_enter(&sc->sc_kqlock); |
| 1281 | SLIST_REMOVE(&sc->sc_rsel.sel_klist, kn, knote, kn_selnext); |
| 1282 | mutex_spin_exit(&sc->sc_kqlock); |
| 1283 | KERNEL_UNLOCK_ONE(NULL); |
| 1284 | } |
| 1285 | |
| 1286 | static int |
| 1287 | tap_kqread(struct knote *kn, long hint) |
| 1288 | { |
| 1289 | struct tap_softc *sc = (struct tap_softc *)kn->kn_hook; |
| 1290 | struct ifnet *ifp = &sc->sc_ec.ec_if; |
| 1291 | struct mbuf *m; |
| 1292 | int s, rv; |
| 1293 | |
| 1294 | KERNEL_LOCK(1, NULL); |
| 1295 | s = splnet(); |
| 1296 | IFQ_POLL(&ifp->if_snd, m); |
| 1297 | |
| 1298 | if (m == NULL) |
| 1299 | kn->kn_data = 0; |
| 1300 | else |
| 1301 | kn->kn_data = m->m_pkthdr.len; |
| 1302 | splx(s); |
| 1303 | rv = (kn->kn_data != 0 ? 1 : 0); |
| 1304 | KERNEL_UNLOCK_ONE(NULL); |
| 1305 | return rv; |
| 1306 | } |
| 1307 | |
| 1308 | /* |
| 1309 | * sysctl management routines |
| 1310 | * You can set the address of an interface through: |
| 1311 | * net.link.tap.tap<number> |
| 1312 | * |
| 1313 | * Note the consistent use of tap_log in order to use |
| 1314 | * sysctl_teardown at unload time. |
| 1315 | * |
| 1316 | * In the kernel you will find a lot of SYSCTL_SETUP blocks. Those |
| 1317 | * blocks register a function in a special section of the kernel |
| 1318 | * (called a link set) which is used at init_sysctl() time to cycle |
| 1319 | * through all those functions to create the kernel's sysctl tree. |
| 1320 | * |
| 1321 | * It is not possible to use link sets in a module, so the |
| 1322 | * easiest is to simply call our own setup routine at load time. |
| 1323 | * |
| 1324 | * In the SYSCTL_SETUP blocks you find in the kernel, nodes have the |
| 1325 | * CTLFLAG_PERMANENT flag, meaning they cannot be removed. Once the |
| 1326 | * whole kernel sysctl tree is built, it is not possible to add any |
| 1327 | * permanent node. |
| 1328 | * |
| 1329 | * It should be noted that we're not saving the sysctlnode pointer |
| 1330 | * we are returned when creating the "tap" node. That structure |
| 1331 | * cannot be trusted once out of the calling function, as it might |
| 1332 | * get reused. So we just save the MIB number, and always give the |
| 1333 | * full path starting from the root for later calls to sysctl_createv |
| 1334 | * and sysctl_destroyv. |
| 1335 | */ |
| 1336 | static void |
| 1337 | sysctl_tap_setup(struct sysctllog **clog) |
| 1338 | { |
| 1339 | const struct sysctlnode *node; |
| 1340 | int error = 0; |
| 1341 | |
| 1342 | if ((error = sysctl_createv(clog, 0, NULL, NULL, |
| 1343 | CTLFLAG_PERMANENT, |
| 1344 | CTLTYPE_NODE, "link" , NULL, |
| 1345 | NULL, 0, NULL, 0, |
| 1346 | CTL_NET, AF_LINK, CTL_EOL)) != 0) |
| 1347 | return; |
| 1348 | |
| 1349 | /* |
| 1350 | * The first four parameters of sysctl_createv are for management. |
| 1351 | * |
| 1352 | * The four that follows, here starting with a '0' for the flags, |
| 1353 | * describe the node. |
| 1354 | * |
| 1355 | * The next series of four set its value, through various possible |
| 1356 | * means. |
| 1357 | * |
| 1358 | * Last but not least, the path to the node is described. That path |
| 1359 | * is relative to the given root (third argument). Here we're |
| 1360 | * starting from the root. |
| 1361 | */ |
| 1362 | if ((error = sysctl_createv(clog, 0, NULL, &node, |
| 1363 | CTLFLAG_PERMANENT, |
| 1364 | CTLTYPE_NODE, "tap" , NULL, |
| 1365 | NULL, 0, NULL, 0, |
| 1366 | CTL_NET, AF_LINK, CTL_CREATE, CTL_EOL)) != 0) |
| 1367 | return; |
| 1368 | tap_node = node->sysctl_num; |
| 1369 | } |
| 1370 | |
| 1371 | /* |
| 1372 | * The helper functions make Andrew Brown's interface really |
| 1373 | * shine. It makes possible to create value on the fly whether |
| 1374 | * the sysctl value is read or written. |
| 1375 | * |
| 1376 | * As shown as an example in the man page, the first step is to |
| 1377 | * create a copy of the node to have sysctl_lookup work on it. |
| 1378 | * |
| 1379 | * Here, we have more work to do than just a copy, since we have |
| 1380 | * to create the string. The first step is to collect the actual |
| 1381 | * value of the node, which is a convenient pointer to the softc |
| 1382 | * of the interface. From there we create the string and use it |
| 1383 | * as the value, but only for the *copy* of the node. |
| 1384 | * |
| 1385 | * Then we let sysctl_lookup do the magic, which consists in |
| 1386 | * setting oldp and newp as required by the operation. When the |
| 1387 | * value is read, that means that the string will be copied to |
| 1388 | * the user, and when it is written, the new value will be copied |
| 1389 | * over in the addr array. |
| 1390 | * |
| 1391 | * If newp is NULL, the user was reading the value, so we don't |
| 1392 | * have anything else to do. If a new value was written, we |
| 1393 | * have to check it. |
| 1394 | * |
| 1395 | * If it is incorrect, we can return an error and leave 'node' as |
| 1396 | * it is: since it is a copy of the actual node, the change will |
| 1397 | * be forgotten. |
| 1398 | * |
| 1399 | * Upon a correct input, we commit the change to the ifnet |
| 1400 | * structure of our interface. |
| 1401 | */ |
| 1402 | static int |
| 1403 | tap_sysctl_handler(SYSCTLFN_ARGS) |
| 1404 | { |
| 1405 | struct sysctlnode node; |
| 1406 | struct tap_softc *sc; |
| 1407 | struct ifnet *ifp; |
| 1408 | int error; |
| 1409 | size_t len; |
| 1410 | char addr[3 * ETHER_ADDR_LEN]; |
| 1411 | uint8_t enaddr[ETHER_ADDR_LEN]; |
| 1412 | |
| 1413 | node = *rnode; |
| 1414 | sc = node.sysctl_data; |
| 1415 | ifp = &sc->sc_ec.ec_if; |
| 1416 | (void)ether_snprintf(addr, sizeof(addr), CLLADDR(ifp->if_sadl)); |
| 1417 | node.sysctl_data = addr; |
| 1418 | error = sysctl_lookup(SYSCTLFN_CALL(&node)); |
| 1419 | if (error || newp == NULL) |
| 1420 | return (error); |
| 1421 | |
| 1422 | len = strlen(addr); |
| 1423 | if (len < 11 || len > 17) |
| 1424 | return (EINVAL); |
| 1425 | |
| 1426 | /* Commit change */ |
| 1427 | if (ether_aton_r(enaddr, sizeof(enaddr), addr) != 0) |
| 1428 | return (EINVAL); |
| 1429 | if_set_sadl(ifp, enaddr, ETHER_ADDR_LEN, false); |
| 1430 | return (error); |
| 1431 | } |
| 1432 | |
| 1433 | /* |
| 1434 | * Module infrastructure |
| 1435 | */ |
| 1436 | #include "if_module.h" |
| 1437 | |
| 1438 | IF_MODULE(MODULE_CLASS_DRIVER, tap, "" ) |
| 1439 | |