| 1 | /* $NetBSD: ipsec_mbuf.c,v 1.12 2011/05/16 10:05:23 drochner Exp $ */ |
| 2 | /*- |
| 3 | * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting |
| 4 | * All rights reserved. |
| 5 | * |
| 6 | * Redistribution and use in source and binary forms, with or without |
| 7 | * modification, are permitted provided that the following conditions |
| 8 | * are met: |
| 9 | * 1. Redistributions of source code must retain the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer. |
| 11 | * 2. Redistributions in binary form must reproduce the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer in the |
| 13 | * documentation and/or other materials provided with the distribution. |
| 14 | * |
| 15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
| 16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
| 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 21 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 25 | * SUCH DAMAGE. |
| 26 | * |
| 27 | * $FreeBSD: /repoman/r/ncvs/src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.2 2003/03/28 20:32:53 sam Exp $ |
| 28 | */ |
| 29 | |
| 30 | #include <sys/cdefs.h> |
| 31 | __KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.12 2011/05/16 10:05:23 drochner Exp $" ); |
| 32 | |
| 33 | /* |
| 34 | * IPsec-specific mbuf routines. |
| 35 | */ |
| 36 | |
| 37 | #ifdef __FreeBSD__ |
| 38 | #include "opt_param.h" |
| 39 | #endif |
| 40 | |
| 41 | #include <sys/param.h> |
| 42 | #include <sys/systm.h> |
| 43 | #include <sys/mbuf.h> |
| 44 | #include <sys/socket.h> |
| 45 | |
| 46 | #include <net/route.h> |
| 47 | #include <netinet/in.h> |
| 48 | |
| 49 | #include <netipsec/ipsec.h> |
| 50 | #include <netipsec/ipsec_var.h> |
| 51 | #include <netipsec/ipsec_private.h> |
| 52 | |
| 53 | #include <netipsec/ipsec_osdep.h> |
| 54 | #include <net/net_osdep.h> |
| 55 | |
| 56 | /* |
| 57 | * Create a writable copy of the mbuf chain. While doing this |
| 58 | * we compact the chain with a goal of producing a chain with |
| 59 | * at most two mbufs. The second mbuf in this chain is likely |
| 60 | * to be a cluster. The primary purpose of this work is to create |
| 61 | * a writable packet for encryption, compression, etc. The |
| 62 | * secondary goal is to linearize the data so the data can be |
| 63 | * passed to crypto hardware in the most efficient manner possible. |
| 64 | */ |
| 65 | struct mbuf * |
| 66 | m_clone(struct mbuf *m0) |
| 67 | { |
| 68 | struct mbuf *m, *mprev; |
| 69 | struct mbuf *n, *mfirst, *mlast; |
| 70 | int len, off; |
| 71 | |
| 72 | IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf" )); |
| 73 | |
| 74 | mprev = NULL; |
| 75 | for (m = m0; m != NULL; m = mprev->m_next) { |
| 76 | /* |
| 77 | * Regular mbufs are ignored unless there's a cluster |
| 78 | * in front of it that we can use to coalesce. We do |
| 79 | * the latter mainly so later clusters can be coalesced |
| 80 | * also w/o having to handle them specially (i.e. convert |
| 81 | * mbuf+cluster -> cluster). This optimization is heavily |
| 82 | * influenced by the assumption that we're running over |
| 83 | * Ethernet where MCLBYTES is large enough that the max |
| 84 | * packet size will permit lots of coalescing into a |
| 85 | * single cluster. This in turn permits efficient |
| 86 | * crypto operations, especially when using hardware. |
| 87 | */ |
| 88 | if ((m->m_flags & M_EXT) == 0) { |
| 89 | if (mprev && (mprev->m_flags & M_EXT) && |
| 90 | m->m_len <= M_TRAILINGSPACE(mprev)) { |
| 91 | /* XXX: this ignores mbuf types */ |
| 92 | memcpy(mtod(mprev, char *) + mprev->m_len, |
| 93 | mtod(m, char *), m->m_len); |
| 94 | mprev->m_len += m->m_len; |
| 95 | mprev->m_next = m->m_next; /* unlink from chain */ |
| 96 | m_free(m); /* reclaim mbuf */ |
| 97 | IPSEC_STATINC(IPSEC_STAT_MBCOALESCED); |
| 98 | } else { |
| 99 | mprev = m; |
| 100 | } |
| 101 | continue; |
| 102 | } |
| 103 | /* |
| 104 | * Writable mbufs are left alone (for now). Note |
| 105 | * that for 4.x systems it's not possible to identify |
| 106 | * whether or not mbufs with external buffers are |
| 107 | * writable unless they use clusters. |
| 108 | */ |
| 109 | if (M_EXT_WRITABLE(m)) { |
| 110 | mprev = m; |
| 111 | continue; |
| 112 | } |
| 113 | |
| 114 | /* |
| 115 | * Not writable, replace with a copy or coalesce with |
| 116 | * the previous mbuf if possible (since we have to copy |
| 117 | * it anyway, we try to reduce the number of mbufs and |
| 118 | * clusters so that future work is easier). |
| 119 | */ |
| 120 | IPSEC_ASSERT(m->m_flags & M_EXT, |
| 121 | ("m_clone: m_flags 0x%x" , m->m_flags)); |
| 122 | /* NB: we only coalesce into a cluster or larger */ |
| 123 | if (mprev != NULL && (mprev->m_flags & M_EXT) && |
| 124 | m->m_len <= M_TRAILINGSPACE(mprev)) { |
| 125 | /* XXX: this ignores mbuf types */ |
| 126 | memcpy(mtod(mprev, char *) + mprev->m_len, |
| 127 | mtod(m, char *), m->m_len); |
| 128 | mprev->m_len += m->m_len; |
| 129 | mprev->m_next = m->m_next; /* unlink from chain */ |
| 130 | m_free(m); /* reclaim mbuf */ |
| 131 | IPSEC_STATINC(IPSEC_STAT_CLCOALESCED); |
| 132 | continue; |
| 133 | } |
| 134 | |
| 135 | /* |
| 136 | * Allocate new space to hold the copy... |
| 137 | */ |
| 138 | /* XXX why can M_PKTHDR be set past the first mbuf? */ |
| 139 | if (mprev == NULL && (m->m_flags & M_PKTHDR)) { |
| 140 | /* |
| 141 | * NB: if a packet header is present we must |
| 142 | * allocate the mbuf separately from any cluster |
| 143 | * because M_MOVE_PKTHDR will smash the data |
| 144 | * pointer and drop the M_EXT marker. |
| 145 | */ |
| 146 | MGETHDR(n, M_DONTWAIT, m->m_type); |
| 147 | if (n == NULL) { |
| 148 | m_freem(m0); |
| 149 | return (NULL); |
| 150 | } |
| 151 | M_MOVE_PKTHDR(n, m); |
| 152 | MCLGET(n, M_DONTWAIT); |
| 153 | if ((n->m_flags & M_EXT) == 0) { |
| 154 | m_free(n); |
| 155 | m_freem(m0); |
| 156 | return (NULL); |
| 157 | } |
| 158 | } else { |
| 159 | n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); |
| 160 | if (n == NULL) { |
| 161 | m_freem(m0); |
| 162 | return (NULL); |
| 163 | } |
| 164 | } |
| 165 | /* |
| 166 | * ... and copy the data. We deal with jumbo mbufs |
| 167 | * (i.e. m_len > MCLBYTES) by splitting them into |
| 168 | * clusters. We could just malloc a buffer and make |
| 169 | * it external but too many device drivers don't know |
| 170 | * how to break up the non-contiguous memory when |
| 171 | * doing DMA. |
| 172 | */ |
| 173 | len = m->m_len; |
| 174 | off = 0; |
| 175 | mfirst = n; |
| 176 | mlast = NULL; |
| 177 | for (;;) { |
| 178 | int cc = min(len, MCLBYTES); |
| 179 | memcpy(mtod(n, char *), mtod(m, char *) + off, cc); |
| 180 | n->m_len = cc; |
| 181 | if (mlast != NULL) |
| 182 | mlast->m_next = n; |
| 183 | mlast = n; |
| 184 | IPSEC_STATINC(IPSEC_STAT_CLCOPIED); |
| 185 | |
| 186 | len -= cc; |
| 187 | if (len <= 0) |
| 188 | break; |
| 189 | off += cc; |
| 190 | |
| 191 | n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); |
| 192 | if (n == NULL) { |
| 193 | m_freem(mfirst); |
| 194 | m_freem(m0); |
| 195 | return (NULL); |
| 196 | } |
| 197 | } |
| 198 | n->m_next = m->m_next; |
| 199 | if (mprev == NULL) |
| 200 | m0 = mfirst; /* new head of chain */ |
| 201 | else |
| 202 | mprev->m_next = mfirst; /* replace old mbuf */ |
| 203 | m_free(m); /* release old mbuf */ |
| 204 | mprev = mfirst; |
| 205 | } |
| 206 | return (m0); |
| 207 | } |
| 208 | |
| 209 | /* |
| 210 | * Make space for a new header of length hlen at skip bytes |
| 211 | * into the packet. When doing this we allocate new mbufs only |
| 212 | * when absolutely necessary. The mbuf where the new header |
| 213 | * is to go is returned together with an offset into the mbuf. |
| 214 | * If NULL is returned then the mbuf chain may have been modified; |
| 215 | * the caller is assumed to always free the chain. |
| 216 | */ |
| 217 | struct mbuf * |
| 218 | m_makespace(struct mbuf *m0, int skip, int hlen, int *off) |
| 219 | { |
| 220 | struct mbuf *m; |
| 221 | unsigned remain; |
| 222 | |
| 223 | IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf" )); |
| 224 | IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u" , hlen)); |
| 225 | |
| 226 | for (m = m0; m && skip > m->m_len; m = m->m_next) |
| 227 | skip -= m->m_len; |
| 228 | if (m == NULL) |
| 229 | return (NULL); |
| 230 | /* |
| 231 | * At this point skip is the offset into the mbuf m |
| 232 | * where the new header should be placed. Figure out |
| 233 | * if there's space to insert the new header. If so, |
| 234 | * and copying the remainder makese sense then do so. |
| 235 | * Otherwise insert a new mbuf in the chain, splitting |
| 236 | * the contents of m as needed. |
| 237 | */ |
| 238 | remain = m->m_len - skip; /* data to move */ |
| 239 | if (hlen > M_TRAILINGSPACE(m)) { |
| 240 | struct mbuf *n0, *n, **np; |
| 241 | int todo, len, done, alloc; |
| 242 | |
| 243 | n0 = NULL; |
| 244 | np = &n0; |
| 245 | alloc = 0; |
| 246 | done = 0; |
| 247 | todo = remain; |
| 248 | while (todo > 0) { |
| 249 | if (todo > MHLEN) { |
| 250 | n = m_getcl(M_DONTWAIT, m->m_type, 0); |
| 251 | len = MCLBYTES; |
| 252 | } |
| 253 | else { |
| 254 | n = m_get(M_DONTWAIT, m->m_type); |
| 255 | len = MHLEN; |
| 256 | } |
| 257 | if (n == NULL) { |
| 258 | m_freem(n0); |
| 259 | return NULL; |
| 260 | } |
| 261 | *np = n; |
| 262 | np = &n->m_next; |
| 263 | alloc++; |
| 264 | len = min(todo, len); |
| 265 | memcpy(n->m_data, mtod(m, char *) + skip + done, len); |
| 266 | n->m_len = len; |
| 267 | done += len; |
| 268 | todo -= len; |
| 269 | } |
| 270 | |
| 271 | if (hlen <= M_TRAILINGSPACE(m) + remain) { |
| 272 | m->m_len = skip + hlen; |
| 273 | *off = skip; |
| 274 | if (n0 != NULL) { |
| 275 | *np = m->m_next; |
| 276 | m->m_next = n0; |
| 277 | } |
| 278 | } |
| 279 | else { |
| 280 | n = m_get(M_DONTWAIT, m->m_type); |
| 281 | if (n == NULL) { |
| 282 | m_freem(n0); |
| 283 | return NULL; |
| 284 | } |
| 285 | alloc++; |
| 286 | |
| 287 | if ((n->m_next = n0) == NULL) |
| 288 | np = &n->m_next; |
| 289 | n0 = n; |
| 290 | |
| 291 | *np = m->m_next; |
| 292 | m->m_next = n0; |
| 293 | |
| 294 | n->m_len = hlen; |
| 295 | m->m_len = skip; |
| 296 | |
| 297 | m = n; /* header is at front ... */ |
| 298 | *off = 0; /* ... of new mbuf */ |
| 299 | } |
| 300 | |
| 301 | IPSEC_STATADD(IPSEC_STAT_MBINSERTED, alloc); |
| 302 | } else { |
| 303 | /* |
| 304 | * Copy the remainder to the back of the mbuf |
| 305 | * so there's space to write the new header. |
| 306 | */ |
| 307 | /* XXX can this be memcpy? does it handle overlap? */ |
| 308 | ovbcopy(mtod(m, char *) + skip, |
| 309 | mtod(m, char *) + skip + hlen, remain); |
| 310 | m->m_len += hlen; |
| 311 | *off = skip; |
| 312 | } |
| 313 | m0->m_pkthdr.len += hlen; /* adjust packet length */ |
| 314 | return m; |
| 315 | } |
| 316 | |
| 317 | /* |
| 318 | * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header |
| 319 | * length is updated, and a pointer to the first byte of the padding |
| 320 | * (which is guaranteed to be all in one mbuf) is returned. |
| 321 | */ |
| 322 | void * |
| 323 | m_pad(struct mbuf *m, int n) |
| 324 | { |
| 325 | register struct mbuf *m0, *m1; |
| 326 | register int len, pad; |
| 327 | void *retval; |
| 328 | |
| 329 | if (n <= 0) { /* No stupid arguments. */ |
| 330 | DPRINTF(("m_pad: pad length invalid (%d)\n" , n)); |
| 331 | m_freem(m); |
| 332 | return NULL; |
| 333 | } |
| 334 | |
| 335 | len = m->m_pkthdr.len; |
| 336 | pad = n; |
| 337 | m0 = m; |
| 338 | |
| 339 | while (m0->m_len < len) { |
| 340 | IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u" , len, m0->m_len));/*XXX*/ |
| 341 | len -= m0->m_len; |
| 342 | m0 = m0->m_next; |
| 343 | } |
| 344 | |
| 345 | if (m0->m_len != len) { |
| 346 | DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n" , |
| 347 | m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len)); |
| 348 | |
| 349 | m_freem(m); |
| 350 | return NULL; |
| 351 | } |
| 352 | |
| 353 | /* Check for zero-length trailing mbufs, and find the last one. */ |
| 354 | for (m1 = m0; m1->m_next; m1 = m1->m_next) { |
| 355 | if (m1->m_next->m_len != 0) { |
| 356 | DPRINTF(("m_pad: length mismatch (should be %d " |
| 357 | "instead of %d)\n" , |
| 358 | m->m_pkthdr.len, |
| 359 | m->m_pkthdr.len + m1->m_next->m_len)); |
| 360 | |
| 361 | m_freem(m); |
| 362 | return NULL; |
| 363 | } |
| 364 | |
| 365 | m0 = m1->m_next; |
| 366 | } |
| 367 | |
| 368 | if (pad > M_TRAILINGSPACE(m0)) { |
| 369 | /* Add an mbuf to the chain. */ |
| 370 | MGET(m1, M_DONTWAIT, MT_DATA); |
| 371 | if (m1 == 0) { |
| 372 | m_freem(m0); |
| 373 | DPRINTF(("m_pad: unable to get extra mbuf\n" )); |
| 374 | return NULL; |
| 375 | } |
| 376 | |
| 377 | m0->m_next = m1; |
| 378 | m0 = m1; |
| 379 | m0->m_len = 0; |
| 380 | } |
| 381 | |
| 382 | retval = m0->m_data + m0->m_len; |
| 383 | m0->m_len += pad; |
| 384 | m->m_pkthdr.len += pad; |
| 385 | |
| 386 | return retval; |
| 387 | } |
| 388 | |
| 389 | /* |
| 390 | * Remove hlen data at offset skip in the packet. This is used by |
| 391 | * the protocols strip protocol headers and associated data (e.g. IV, |
| 392 | * authenticator) on input. |
| 393 | */ |
| 394 | int |
| 395 | m_striphdr(struct mbuf *m, int skip, int hlen) |
| 396 | { |
| 397 | struct mbuf *m1; |
| 398 | int roff; |
| 399 | |
| 400 | /* Find beginning of header */ |
| 401 | m1 = m_getptr(m, skip, &roff); |
| 402 | if (m1 == NULL) |
| 403 | return (EINVAL); |
| 404 | |
| 405 | /* Remove the header and associated data from the mbuf. */ |
| 406 | if (roff == 0) { |
| 407 | /* The header was at the beginning of the mbuf */ |
| 408 | IPSEC_STATINC(IPSEC_STAT_INPUT_FRONT); |
| 409 | m_adj(m1, hlen); |
| 410 | if ((m1->m_flags & M_PKTHDR) == 0) |
| 411 | m->m_pkthdr.len -= hlen; |
| 412 | } else if (roff + hlen >= m1->m_len) { |
| 413 | struct mbuf *mo; |
| 414 | |
| 415 | /* |
| 416 | * Part or all of the header is at the end of this mbuf, |
| 417 | * so first let's remove the remainder of the header from |
| 418 | * the beginning of the remainder of the mbuf chain, if any. |
| 419 | */ |
| 420 | IPSEC_STATINC(IPSEC_STAT_INPUT_END); |
| 421 | if (roff + hlen > m1->m_len) { |
| 422 | /* Adjust the next mbuf by the remainder */ |
| 423 | m_adj(m1->m_next, roff + hlen - m1->m_len); |
| 424 | |
| 425 | /* The second mbuf is guaranteed not to have a pkthdr... */ |
| 426 | m->m_pkthdr.len -= (roff + hlen - m1->m_len); |
| 427 | } |
| 428 | |
| 429 | /* Now, let's unlink the mbuf chain for a second...*/ |
| 430 | mo = m1->m_next; |
| 431 | m1->m_next = NULL; |
| 432 | |
| 433 | /* ...and trim the end of the first part of the chain...sick */ |
| 434 | m_adj(m1, -(m1->m_len - roff)); |
| 435 | if ((m1->m_flags & M_PKTHDR) == 0) |
| 436 | m->m_pkthdr.len -= (m1->m_len - roff); |
| 437 | |
| 438 | /* Finally, let's relink */ |
| 439 | m1->m_next = mo; |
| 440 | } else { |
| 441 | /* |
| 442 | * The header lies in the "middle" of the mbuf; copy |
| 443 | * the remainder of the mbuf down over the header. |
| 444 | */ |
| 445 | IPSEC_STATINC(IPSEC_STAT_INPUT_MIDDLE); |
| 446 | ovbcopy(mtod(m1, u_char *) + roff + hlen, |
| 447 | mtod(m1, u_char *) + roff, |
| 448 | m1->m_len - (roff + hlen)); |
| 449 | m1->m_len -= hlen; |
| 450 | m->m_pkthdr.len -= hlen; |
| 451 | } |
| 452 | return (0); |
| 453 | } |
| 454 | |
| 455 | /* |
| 456 | * Diagnostic routine to check mbuf alignment as required by the |
| 457 | * crypto device drivers (that use DMA). |
| 458 | */ |
| 459 | void |
| 460 | m_checkalignment(const char* where, struct mbuf *m0, int off, int len) |
| 461 | { |
| 462 | int roff; |
| 463 | struct mbuf *m = m_getptr(m0, off, &roff); |
| 464 | void *addr; |
| 465 | |
| 466 | if (m == NULL) |
| 467 | return; |
| 468 | printf("%s (off %u len %u): " , where, off, len); |
| 469 | addr = mtod(m, char *) + roff; |
| 470 | do { |
| 471 | int mlen; |
| 472 | |
| 473 | if (((uintptr_t) addr) & 3) { |
| 474 | printf("addr misaligned %p," , addr); |
| 475 | break; |
| 476 | } |
| 477 | mlen = m->m_len; |
| 478 | if (mlen > len) |
| 479 | mlen = len; |
| 480 | len -= mlen; |
| 481 | if (len && (mlen & 3)) { |
| 482 | printf("len mismatch %u," , mlen); |
| 483 | break; |
| 484 | } |
| 485 | m = m->m_next; |
| 486 | addr = m ? mtod(m, void *) : NULL; |
| 487 | } while (m && len > 0); |
| 488 | for (m = m0; m; m = m->m_next) |
| 489 | printf(" [%p:%u]" , mtod(m, void *), m->m_len); |
| 490 | printf("\n" ); |
| 491 | } |
| 492 | |