| 1 | /* $NetBSD: tcp_var.h,v 1.177 2015/02/14 22:09:53 he Exp $ */ |
| 2 | |
| 3 | /* |
| 4 | * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. |
| 5 | * All rights reserved. |
| 6 | * |
| 7 | * Redistribution and use in source and binary forms, with or without |
| 8 | * modification, are permitted provided that the following conditions |
| 9 | * are met: |
| 10 | * 1. Redistributions of source code must retain the above copyright |
| 11 | * notice, this list of conditions and the following disclaimer. |
| 12 | * 2. Redistributions in binary form must reproduce the above copyright |
| 13 | * notice, this list of conditions and the following disclaimer in the |
| 14 | * documentation and/or other materials provided with the distribution. |
| 15 | * 3. Neither the name of the project nor the names of its contributors |
| 16 | * may be used to endorse or promote products derived from this software |
| 17 | * without specific prior written permission. |
| 18 | * |
| 19 | * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND |
| 20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE |
| 23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 29 | * SUCH DAMAGE. |
| 30 | */ |
| 31 | |
| 32 | /* |
| 33 | * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 |
| 34 | * |
| 35 | * NRL grants permission for redistribution and use in source and binary |
| 36 | * forms, with or without modification, of the software and documentation |
| 37 | * created at NRL provided that the following conditions are met: |
| 38 | * |
| 39 | * 1. Redistributions of source code must retain the above copyright |
| 40 | * notice, this list of conditions and the following disclaimer. |
| 41 | * 2. Redistributions in binary form must reproduce the above copyright |
| 42 | * notice, this list of conditions and the following disclaimer in the |
| 43 | * documentation and/or other materials provided with the distribution. |
| 44 | * 3. All advertising materials mentioning features or use of this software |
| 45 | * must display the following acknowledgements: |
| 46 | * This product includes software developed by the University of |
| 47 | * California, Berkeley and its contributors. |
| 48 | * This product includes software developed at the Information |
| 49 | * Technology Division, US Naval Research Laboratory. |
| 50 | * 4. Neither the name of the NRL nor the names of its contributors |
| 51 | * may be used to endorse or promote products derived from this software |
| 52 | * without specific prior written permission. |
| 53 | * |
| 54 | * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS |
| 55 | * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
| 56 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
| 57 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR |
| 58 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 59 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 60 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 61 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 62 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 63 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 64 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 65 | * |
| 66 | * The views and conclusions contained in the software and documentation |
| 67 | * are those of the authors and should not be interpreted as representing |
| 68 | * official policies, either expressed or implied, of the US Naval |
| 69 | * Research Laboratory (NRL). |
| 70 | */ |
| 71 | |
| 72 | /*- |
| 73 | * Copyright (c) 1997, 1998, 1999, 2001, 2005 The NetBSD Foundation, Inc. |
| 74 | * All rights reserved. |
| 75 | * |
| 76 | * This code is derived from software contributed to The NetBSD Foundation |
| 77 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, |
| 78 | * NASA Ames Research Center. |
| 79 | * This code is derived from software contributed to The NetBSD Foundation |
| 80 | * by Charles M. Hannum. |
| 81 | * |
| 82 | * Redistribution and use in source and binary forms, with or without |
| 83 | * modification, are permitted provided that the following conditions |
| 84 | * are met: |
| 85 | * 1. Redistributions of source code must retain the above copyright |
| 86 | * notice, this list of conditions and the following disclaimer. |
| 87 | * 2. Redistributions in binary form must reproduce the above copyright |
| 88 | * notice, this list of conditions and the following disclaimer in the |
| 89 | * documentation and/or other materials provided with the distribution. |
| 90 | * |
| 91 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
| 92 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
| 93 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 94 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
| 95 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 96 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 97 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 98 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 99 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 100 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 101 | * POSSIBILITY OF SUCH DAMAGE. |
| 102 | */ |
| 103 | |
| 104 | /* |
| 105 | * Copyright (c) 1982, 1986, 1993, 1994, 1995 |
| 106 | * The Regents of the University of California. All rights reserved. |
| 107 | * |
| 108 | * Redistribution and use in source and binary forms, with or without |
| 109 | * modification, are permitted provided that the following conditions |
| 110 | * are met: |
| 111 | * 1. Redistributions of source code must retain the above copyright |
| 112 | * notice, this list of conditions and the following disclaimer. |
| 113 | * 2. Redistributions in binary form must reproduce the above copyright |
| 114 | * notice, this list of conditions and the following disclaimer in the |
| 115 | * documentation and/or other materials provided with the distribution. |
| 116 | * 3. Neither the name of the University nor the names of its contributors |
| 117 | * may be used to endorse or promote products derived from this software |
| 118 | * without specific prior written permission. |
| 119 | * |
| 120 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 121 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 122 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 123 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 124 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 125 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 126 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 127 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 128 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 129 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 130 | * SUCH DAMAGE. |
| 131 | * |
| 132 | * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 |
| 133 | */ |
| 134 | |
| 135 | #ifndef _NETINET_TCP_VAR_H_ |
| 136 | #define _NETINET_TCP_VAR_H_ |
| 137 | |
| 138 | #if defined(_KERNEL_OPT) |
| 139 | #include "opt_inet.h" |
| 140 | #include "opt_mbuftrace.h" |
| 141 | |
| 142 | #endif |
| 143 | |
| 144 | /* |
| 145 | * Kernel variables for tcp. |
| 146 | */ |
| 147 | |
| 148 | #include <sys/callout.h> |
| 149 | |
| 150 | #ifdef TCP_SIGNATURE |
| 151 | /* |
| 152 | * Defines which are needed by the xform_tcp module and tcp_[in|out]put |
| 153 | * for SADB verification and lookup. |
| 154 | */ |
| 155 | #define TCP_SIGLEN 16 /* length of computed digest in bytes */ |
| 156 | #define TCP_KEYLEN_MIN 1 /* minimum length of TCP-MD5 key */ |
| 157 | #define TCP_KEYLEN_MAX 80 /* maximum length of TCP-MD5 key */ |
| 158 | /* |
| 159 | * Only a single SA per host may be specified at this time. An SPI is |
| 160 | * needed in order for the KEY_ALLOCSA() lookup to work. |
| 161 | */ |
| 162 | #define TCP_SIG_SPI 0x1000 |
| 163 | #endif /* TCP_SIGNATURE */ |
| 164 | |
| 165 | /* |
| 166 | * SACK option block. |
| 167 | */ |
| 168 | struct sackblk { |
| 169 | tcp_seq left; /* Left edge of sack block. */ |
| 170 | tcp_seq right; /* Right edge of sack block. */ |
| 171 | }; |
| 172 | |
| 173 | TAILQ_HEAD(sackhead, sackhole); |
| 174 | struct sackhole { |
| 175 | tcp_seq start; |
| 176 | tcp_seq end; |
| 177 | tcp_seq rxmit; |
| 178 | |
| 179 | TAILQ_ENTRY(sackhole) sackhole_q; |
| 180 | }; |
| 181 | |
| 182 | /* |
| 183 | * Tcp control block, one per tcp; fields: |
| 184 | */ |
| 185 | struct tcpcb { |
| 186 | int t_family; /* address family on the wire */ |
| 187 | struct ipqehead segq; /* sequencing queue */ |
| 188 | int t_segqlen; /* length of the above */ |
| 189 | callout_t t_timer[TCPT_NTIMERS];/* tcp timers */ |
| 190 | short t_state; /* state of this connection */ |
| 191 | short t_rxtshift; /* log(2) of rexmt exp. backoff */ |
| 192 | uint32_t t_rxtcur; /* current retransmit value */ |
| 193 | short t_dupacks; /* consecutive dup acks recd */ |
| 194 | /* |
| 195 | * t_partialacks: |
| 196 | * <0 not in fast recovery. |
| 197 | * ==0 in fast recovery. has not received partial acks |
| 198 | * >0 in fast recovery. has received partial acks |
| 199 | */ |
| 200 | short t_partialacks; /* partials acks during fast rexmit */ |
| 201 | u_short t_peermss; /* peer's maximum segment size */ |
| 202 | u_short t_ourmss; /* our's maximum segment size */ |
| 203 | u_short t_segsz; /* current segment size in use */ |
| 204 | char t_force; /* 1 if forcing out a byte */ |
| 205 | u_int t_flags; |
| 206 | #define TF_ACKNOW 0x0001 /* ack peer immediately */ |
| 207 | #define TF_DELACK 0x0002 /* ack, but try to delay it */ |
| 208 | #define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ |
| 209 | #define TF_NOOPT 0x0008 /* don't use tcp options */ |
| 210 | #define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ |
| 211 | #define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ |
| 212 | #define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ |
| 213 | #define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ |
| 214 | #define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ |
| 215 | #define TF_SYN_REXMT 0x0400 /* rexmit timer fired on SYN */ |
| 216 | #define TF_WILL_SACK 0x0800 /* try to use SACK */ |
| 217 | #define TF_REASSEMBLING 0x1000 /* we're busy reassembling */ |
| 218 | #define TF_DEAD 0x2000 /* dead and to-be-released */ |
| 219 | #define TF_PMTUD_PEND 0x4000 /* Path MTU Discovery pending */ |
| 220 | #define TF_ECN_PERMIT 0x10000 /* other side said is ECN-ready */ |
| 221 | #define TF_ECN_SND_CWR 0x20000 /* ECN CWR in queue */ |
| 222 | #define TF_ECN_SND_ECE 0x40000 /* ECN ECE in queue */ |
| 223 | #define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */ |
| 224 | |
| 225 | |
| 226 | struct mbuf *t_template; /* skeletal packet for transmit */ |
| 227 | struct inpcb *t_inpcb; /* back pointer to internet pcb */ |
| 228 | struct in6pcb *t_in6pcb; /* back pointer to internet pcb */ |
| 229 | callout_t t_delack_ch; /* delayed ACK callout */ |
| 230 | /* |
| 231 | * The following fields are used as in the protocol specification. |
| 232 | * See RFC793, Dec. 1981, page 21. |
| 233 | */ |
| 234 | /* send sequence variables */ |
| 235 | tcp_seq snd_una; /* send unacknowledged */ |
| 236 | tcp_seq snd_nxt; /* send next */ |
| 237 | tcp_seq snd_up; /* send urgent pointer */ |
| 238 | tcp_seq snd_wl1; /* window update seg seq number */ |
| 239 | tcp_seq snd_wl2; /* window update seg ack number */ |
| 240 | tcp_seq iss; /* initial send sequence number */ |
| 241 | u_long snd_wnd; /* send window */ |
| 242 | /* |
| 243 | * snd_recover |
| 244 | * it's basically same as the "recover" variable in RFC 2852 (NewReno). |
| 245 | * when entering fast retransmit, it's set to snd_max. |
| 246 | * newreno uses this to detect partial ack. |
| 247 | * snd_high |
| 248 | * it's basically same as the "send_high" variable in RFC 2852 (NewReno). |
| 249 | * on each RTO, it's set to snd_max. |
| 250 | * newreno uses this to avoid false fast retransmits. |
| 251 | */ |
| 252 | tcp_seq snd_recover; |
| 253 | tcp_seq snd_high; |
| 254 | /* receive sequence variables */ |
| 255 | u_long rcv_wnd; /* receive window */ |
| 256 | tcp_seq rcv_nxt; /* receive next */ |
| 257 | tcp_seq rcv_up; /* receive urgent pointer */ |
| 258 | tcp_seq irs; /* initial receive sequence number */ |
| 259 | /* |
| 260 | * Additional variables for this implementation. |
| 261 | */ |
| 262 | /* receive variables */ |
| 263 | tcp_seq rcv_adv; /* advertised window */ |
| 264 | |
| 265 | /* |
| 266 | * retransmit variables |
| 267 | * |
| 268 | * snd_max |
| 269 | * the highest sequence number we've ever sent. |
| 270 | * used to recognize retransmits. |
| 271 | */ |
| 272 | tcp_seq snd_max; |
| 273 | |
| 274 | /* congestion control (for slow start, source quench, retransmit after loss) */ |
| 275 | u_long snd_cwnd; /* congestion-controlled window */ |
| 276 | u_long snd_ssthresh; /* snd_cwnd size threshhold for |
| 277 | * for slow start exponential to |
| 278 | * linear switch |
| 279 | */ |
| 280 | /* auto-sizing variables */ |
| 281 | u_int rfbuf_cnt; /* recv buffer autoscaling byte count */ |
| 282 | uint32_t rfbuf_ts; /* recv buffer autoscaling timestamp */ |
| 283 | |
| 284 | /* |
| 285 | * transmit timing stuff. See below for scale of srtt and rttvar. |
| 286 | * "Variance" is actually smoothed difference. |
| 287 | */ |
| 288 | uint32_t t_rcvtime; /* time last segment received */ |
| 289 | uint32_t t_rtttime; /* time we started measuring rtt */ |
| 290 | tcp_seq t_rtseq; /* sequence number being timed */ |
| 291 | int32_t t_srtt; /* smoothed round-trip time */ |
| 292 | int32_t t_rttvar; /* variance in round-trip time */ |
| 293 | uint32_t t_rttmin; /* minimum rtt allowed */ |
| 294 | u_long max_sndwnd; /* largest window peer has offered */ |
| 295 | |
| 296 | /* out-of-band data */ |
| 297 | char t_oobflags; /* have some */ |
| 298 | char t_iobc; /* input character */ |
| 299 | #define TCPOOB_HAVEDATA 0x01 |
| 300 | #define TCPOOB_HADDATA 0x02 |
| 301 | short t_softerror; /* possible error not yet reported */ |
| 302 | |
| 303 | /* RFC 1323 variables */ |
| 304 | u_char snd_scale; /* window scaling for send window */ |
| 305 | u_char rcv_scale; /* window scaling for recv window */ |
| 306 | u_char request_r_scale; /* pending window scaling */ |
| 307 | u_char requested_s_scale; |
| 308 | u_int32_t ts_recent; /* timestamp echo data */ |
| 309 | u_int32_t ts_recent_age; /* when last updated */ |
| 310 | u_int32_t ts_timebase; /* our timebase */ |
| 311 | tcp_seq last_ack_sent; |
| 312 | |
| 313 | /* RFC 3465 variables */ |
| 314 | u_long t_bytes_acked; /* ABC "bytes_acked" parameter */ |
| 315 | |
| 316 | /* SACK stuff */ |
| 317 | #define TCP_SACK_MAX 3 |
| 318 | #define TCPSACK_NONE 0 |
| 319 | #define TCPSACK_HAVED 1 |
| 320 | u_char rcv_sack_flags; /* SACK flags. */ |
| 321 | struct sackblk rcv_dsack_block; /* RX D-SACK block. */ |
| 322 | struct ipqehead timeq; /* time sequenced queue. */ |
| 323 | struct sackhead snd_holes; /* TX SACK holes. */ |
| 324 | int snd_numholes; /* Number of TX SACK holes. */ |
| 325 | tcp_seq rcv_lastsack; /* last seq number(+1) sack'd by rcv'r*/ |
| 326 | tcp_seq sack_newdata; /* New data xmitted in this recovery |
| 327 | episode starts at this seq number*/ |
| 328 | tcp_seq snd_fack; /* FACK TCP. Forward-most data held by |
| 329 | peer. */ |
| 330 | |
| 331 | /* CUBIC variables */ |
| 332 | ulong snd_cubic_wmax; /* W_max */ |
| 333 | ulong snd_cubic_wmax_last; /* Used for fast convergence */ |
| 334 | ulong snd_cubic_ctime; /* Last congestion time */ |
| 335 | |
| 336 | /* pointer for syn cache entries*/ |
| 337 | LIST_HEAD(, syn_cache) t_sc; /* list of entries by this tcb */ |
| 338 | |
| 339 | /* prediction of next mbuf when using large window sizes */ |
| 340 | struct mbuf *t_lastm; /* last mbuf that data was sent from */ |
| 341 | int t_inoff; /* data offset in previous mbuf */ |
| 342 | int t_lastoff; /* last data address in mbuf chain */ |
| 343 | int t_lastlen; /* last length read from mbuf chain */ |
| 344 | |
| 345 | /* Path-MTU discovery blackhole detection */ |
| 346 | int t_mtudisc; /* perform mtudisc for this tcb */ |
| 347 | /* Path-MTU Discovery Information */ |
| 348 | u_int t_pmtud_mss_acked; /* MSS acked, lower bound for MTU */ |
| 349 | u_int t_pmtud_mtu_sent; /* MTU used, upper bound for MTU */ |
| 350 | tcp_seq t_pmtud_th_seq; /* TCP SEQ from ICMP payload */ |
| 351 | u_int t_pmtud_nextmtu; /* Advertised Next-Hop MTU from ICMP */ |
| 352 | u_short t_pmtud_ip_len; /* IP length from ICMP payload */ |
| 353 | u_short t_pmtud_ip_hl; /* IP header length from ICMP payload */ |
| 354 | |
| 355 | uint8_t t_ecn_retries; /* # of ECN setup retries */ |
| 356 | |
| 357 | const struct tcp_congctl *t_congctl; /* per TCB congctl algorithm */ |
| 358 | |
| 359 | /* Keepalive per socket */ |
| 360 | u_int t_keepinit; |
| 361 | u_int t_keepidle; |
| 362 | u_int t_keepintvl; |
| 363 | u_int t_keepcnt; |
| 364 | u_int t_maxidle; /* t_keepcnt * t_keepintvl */ |
| 365 | |
| 366 | u_int t_msl; /* MSL to use for this connexion */ |
| 367 | |
| 368 | /* maintain a few stats per connection: */ |
| 369 | uint32_t t_rcvoopack; /* out-of-order packets received */ |
| 370 | uint32_t t_sndrexmitpack; /* retransmit packets sent */ |
| 371 | uint32_t t_sndzerowin; /* zero-window updates sent */ |
| 372 | }; |
| 373 | |
| 374 | /* |
| 375 | * Macros to aid ECN TCP. |
| 376 | */ |
| 377 | #define TCP_ECN_ALLOWED(tp) (tp->t_flags & TF_ECN_PERMIT) |
| 378 | |
| 379 | /* |
| 380 | * Macros to aid SACK/FACK TCP. |
| 381 | */ |
| 382 | #define TCP_SACK_ENABLED(tp) (tp->t_flags & TF_WILL_SACK) |
| 383 | #define TCP_FACK_FASTRECOV(tp) \ |
| 384 | (TCP_SACK_ENABLED(tp) && \ |
| 385 | (SEQ_GT(tp->snd_fack, tp->snd_una + tcprexmtthresh * tp->t_segsz))) |
| 386 | |
| 387 | #ifdef _KERNEL |
| 388 | /* |
| 389 | * TCP reassembly queue locks. |
| 390 | */ |
| 391 | static __inline int tcp_reass_lock_try (struct tcpcb *) |
| 392 | __unused; |
| 393 | static __inline void tcp_reass_unlock (struct tcpcb *) |
| 394 | __unused; |
| 395 | |
| 396 | static __inline int |
| 397 | tcp_reass_lock_try(struct tcpcb *tp) |
| 398 | { |
| 399 | int s; |
| 400 | |
| 401 | /* |
| 402 | * Use splvm() -- we're blocking things that would cause |
| 403 | * mbuf allocation. |
| 404 | */ |
| 405 | s = splvm(); |
| 406 | if (tp->t_flags & TF_REASSEMBLING) { |
| 407 | splx(s); |
| 408 | return (0); |
| 409 | } |
| 410 | tp->t_flags |= TF_REASSEMBLING; |
| 411 | splx(s); |
| 412 | return (1); |
| 413 | } |
| 414 | |
| 415 | static __inline void |
| 416 | tcp_reass_unlock(struct tcpcb *tp) |
| 417 | { |
| 418 | int s; |
| 419 | |
| 420 | s = splvm(); |
| 421 | KASSERT((tp->t_flags & TF_REASSEMBLING) != 0); |
| 422 | tp->t_flags &= ~TF_REASSEMBLING; |
| 423 | splx(s); |
| 424 | } |
| 425 | |
| 426 | #ifdef DIAGNOSTIC |
| 427 | #define TCP_REASS_LOCK(tp) \ |
| 428 | do { \ |
| 429 | if (tcp_reass_lock_try(tp) == 0) { \ |
| 430 | printf("%s:%d: tcpcb %p reass already locked\n", \ |
| 431 | __FILE__, __LINE__, tp); \ |
| 432 | panic("tcp_reass_lock"); \ |
| 433 | } \ |
| 434 | } while (/*CONSTCOND*/ 0) |
| 435 | #define TCP_REASS_LOCK_CHECK(tp) \ |
| 436 | do { \ |
| 437 | if (((tp)->t_flags & TF_REASSEMBLING) == 0) { \ |
| 438 | printf("%s:%d: tcpcb %p reass lock not held\n", \ |
| 439 | __FILE__, __LINE__, tp); \ |
| 440 | panic("tcp reass lock check"); \ |
| 441 | } \ |
| 442 | } while (/*CONSTCOND*/ 0) |
| 443 | #else |
| 444 | #define TCP_REASS_LOCK(tp) (void) tcp_reass_lock_try((tp)) |
| 445 | #define TCP_REASS_LOCK_CHECK(tp) /* nothing */ |
| 446 | #endif |
| 447 | |
| 448 | #define TCP_REASS_UNLOCK(tp) tcp_reass_unlock((tp)) |
| 449 | #endif /* _KERNEL */ |
| 450 | |
| 451 | /* |
| 452 | * Queue for delayed ACK processing. |
| 453 | */ |
| 454 | #ifdef _KERNEL |
| 455 | extern int tcp_delack_ticks; |
| 456 | void tcp_delack(void *); |
| 457 | |
| 458 | #define TCP_RESTART_DELACK(tp) \ |
| 459 | callout_reset(&(tp)->t_delack_ch, tcp_delack_ticks, \ |
| 460 | tcp_delack, tp) |
| 461 | |
| 462 | #define TCP_SET_DELACK(tp) \ |
| 463 | do { \ |
| 464 | if (((tp)->t_flags & TF_DELACK) == 0) { \ |
| 465 | (tp)->t_flags |= TF_DELACK; \ |
| 466 | TCP_RESTART_DELACK(tp); \ |
| 467 | } \ |
| 468 | } while (/*CONSTCOND*/0) |
| 469 | |
| 470 | #define TCP_CLEAR_DELACK(tp) \ |
| 471 | do { \ |
| 472 | if ((tp)->t_flags & TF_DELACK) { \ |
| 473 | (tp)->t_flags &= ~TF_DELACK; \ |
| 474 | callout_stop(&(tp)->t_delack_ch); \ |
| 475 | } \ |
| 476 | } while (/*CONSTCOND*/0) |
| 477 | #endif /* _KERNEL */ |
| 478 | |
| 479 | /* |
| 480 | * Compute the current timestamp for a connection. |
| 481 | */ |
| 482 | #define TCP_TIMESTAMP(tp) (tcp_now - (tp)->ts_timebase) |
| 483 | |
| 484 | /* |
| 485 | * Handy way of passing around TCP option info. |
| 486 | */ |
| 487 | struct tcp_opt_info { |
| 488 | int ts_present; |
| 489 | u_int32_t ts_val; |
| 490 | u_int32_t ts_ecr; |
| 491 | u_int16_t maxseg; |
| 492 | }; |
| 493 | |
| 494 | #define TOF_SIGNATURE 0x0040 /* signature option present */ |
| 495 | #define TOF_SIGLEN 0x0080 /* sigature length valid (RFC2385) */ |
| 496 | |
| 497 | /* |
| 498 | * Data for the TCP compressed state engine. |
| 499 | */ |
| 500 | union syn_cache_sa { |
| 501 | struct sockaddr sa; |
| 502 | struct sockaddr_in sin; |
| 503 | #if 1 /*def INET6*/ |
| 504 | struct sockaddr_in6 sin6; |
| 505 | #endif |
| 506 | }; |
| 507 | |
| 508 | struct syn_cache { |
| 509 | TAILQ_ENTRY(syn_cache) sc_bucketq; /* link on bucket list */ |
| 510 | callout_t sc_timer; /* rexmt timer */ |
| 511 | struct route sc_route; |
| 512 | long sc_win; /* advertised window */ |
| 513 | int sc_bucketidx; /* our bucket index */ |
| 514 | u_int32_t sc_hash; |
| 515 | u_int32_t sc_timestamp; /* timestamp from SYN */ |
| 516 | u_int32_t sc_timebase; /* our local timebase */ |
| 517 | union syn_cache_sa sc_src; |
| 518 | union syn_cache_sa sc_dst; |
| 519 | tcp_seq sc_irs; |
| 520 | tcp_seq sc_iss; |
| 521 | u_int sc_rxtcur; /* current rxt timeout */ |
| 522 | u_int sc_rxttot; /* total time spend on queues */ |
| 523 | u_short sc_rxtshift; /* for computing backoff */ |
| 524 | u_short sc_flags; |
| 525 | |
| 526 | #define SCF_UNREACH 0x0001 /* we've had an unreach error */ |
| 527 | #define SCF_TIMESTAMP 0x0002 /* peer will do timestamps */ |
| 528 | #define SCF_DEAD 0x0004 /* this entry to be released */ |
| 529 | #define SCF_SACK_PERMIT 0x0008 /* peer will do SACK */ |
| 530 | #define SCF_ECN_PERMIT 0x0010 /* peer will do ECN */ |
| 531 | #define SCF_SIGNATURE 0x40 /* send MD5 digests */ |
| 532 | |
| 533 | struct mbuf *sc_ipopts; /* IP options */ |
| 534 | u_int16_t sc_peermaxseg; |
| 535 | u_int16_t sc_ourmaxseg; |
| 536 | u_int8_t sc_request_r_scale : 4, |
| 537 | sc_requested_s_scale : 4; |
| 538 | |
| 539 | struct tcpcb *sc_tp; /* tcb for listening socket */ |
| 540 | LIST_ENTRY(syn_cache) sc_tpq; /* list of entries by same tp */ |
| 541 | }; |
| 542 | |
| 543 | struct syn_cache_head { |
| 544 | TAILQ_HEAD(, syn_cache) sch_bucket; /* bucket entries */ |
| 545 | u_short sch_length; /* # entries in bucket */ |
| 546 | }; |
| 547 | |
| 548 | #define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb) |
| 549 | #ifdef INET6 |
| 550 | #define in6totcpcb(ip) ((struct tcpcb *)(ip)->in6p_ppcb) |
| 551 | #endif |
| 552 | #ifndef INET6 |
| 553 | #define sototcpcb(so) (intotcpcb(sotoinpcb(so))) |
| 554 | #else |
| 555 | #define sototcpcb(so) (((so)->so_proto->pr_domain->dom_family == AF_INET) \ |
| 556 | ? intotcpcb(sotoinpcb(so)) \ |
| 557 | : in6totcpcb(sotoin6pcb(so))) |
| 558 | #endif |
| 559 | |
| 560 | /* |
| 561 | * See RFC2988 for a discussion of RTO calculation; comments assume |
| 562 | * familiarity with that document. |
| 563 | * |
| 564 | * The smoothed round-trip time and estimated variance are stored as |
| 565 | * fixed point numbers. Historically, srtt was scaled by |
| 566 | * TCP_RTT_SHIFT bits, and rttvar by TCP_RTTVAR_SHIFT bits. Because |
| 567 | * the values coincide with the alpha and beta parameters suggested |
| 568 | * for RTO calculation (1/8 for srtt, 1/4 for rttvar), the combination |
| 569 | * of computing 1/8 of the new value and transforming it to the |
| 570 | * fixed-point representation required zero instructions. However, |
| 571 | * the storage representations no longer coincide with the alpha/beta |
| 572 | * shifts; instead, more fractional bits are present. |
| 573 | * |
| 574 | * The storage representation of srtt is 1/32 slow ticks, or 1/64 s. |
| 575 | * (The assumption that a slow tick is 500 ms should not be present in |
| 576 | * the code.) |
| 577 | * |
| 578 | * The storage representation of rttvar is 1/16 slow ticks, or 1/32 s. |
| 579 | * There may be some confusion about this in the code. |
| 580 | * |
| 581 | * For historical reasons, these scales are also used in smoothing the |
| 582 | * average (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). |
| 583 | * This results in alpha of 0.125 and beta of 0.25, following RFC2988 |
| 584 | * section 2.3 |
| 585 | * |
| 586 | * XXX Change SHIFT values to LGWEIGHT and REP_SHIFT, and adjust |
| 587 | * the code to use the correct ones. |
| 588 | */ |
| 589 | #define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */ |
| 590 | #define TCP_RTTVAR_SHIFT 2 /* multiplier for rttvar; 2 bits */ |
| 591 | |
| 592 | /* |
| 593 | * Compute TCP retransmission timer, following RFC2988. |
| 594 | * This macro returns a value in slow timeout ticks. |
| 595 | * |
| 596 | * Section 2.2 requires that the RTO value be |
| 597 | * srtt + max(G, 4*RTTVAR) |
| 598 | * where G is the clock granularity. |
| 599 | * |
| 600 | * This comment has not necessarily been updated for the new storage |
| 601 | * representation: |
| 602 | * |
| 603 | * Because of the way we do the smoothing, srtt and rttvar |
| 604 | * will each average +1/2 tick of bias. When we compute |
| 605 | * the retransmit timer, we want 1/2 tick of rounding and |
| 606 | * 1 extra tick because of +-1/2 tick uncertainty in the |
| 607 | * firing of the timer. The bias will give us exactly the |
| 608 | * 1.5 tick we need. But, because the bias is |
| 609 | * statistical, we have to test that we don't drop below |
| 610 | * the minimum feasible timer (which is 2 ticks). |
| 611 | * This macro assumes that the value of 1<<TCP_RTTVAR_SHIFT |
| 612 | * is the same as the multiplier for rttvar. |
| 613 | * |
| 614 | * This macro appears to be wrong; it should be checking rttvar*4 in |
| 615 | * ticks and making sure we use 1 instead if rttvar*4 rounds to 0. It |
| 616 | * appears to be treating srtt as being in the old storage |
| 617 | * representation, resulting in a factor of 4 extra. |
| 618 | */ |
| 619 | #define TCP_REXMTVAL(tp) \ |
| 620 | ((((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar) >> 2) |
| 621 | |
| 622 | /* |
| 623 | * Compute the initial window for slow start. |
| 624 | */ |
| 625 | #define TCP_INITIAL_WINDOW(iw, segsz) \ |
| 626 | min((iw) * (segsz), max(2 * (segsz), tcp_init_win_max[(iw)])) |
| 627 | |
| 628 | /* |
| 629 | * TCP statistics. |
| 630 | * Each counter is an unsigned 64-bit value. |
| 631 | * |
| 632 | * Many of these should be kept per connection, but that's inconvenient |
| 633 | * at the moment. |
| 634 | */ |
| 635 | #define TCP_STAT_CONNATTEMPT 0 /* connections initiated */ |
| 636 | #define TCP_STAT_ACCEPTS 1 /* connections accepted */ |
| 637 | #define TCP_STAT_CONNECTS 2 /* connections established */ |
| 638 | #define TCP_STAT_DROPS 3 /* connections dropped */ |
| 639 | #define TCP_STAT_CONNDROPS 4 /* embryonic connections dropped */ |
| 640 | #define TCP_STAT_CLOSED 5 /* conn. closed (includes drops) */ |
| 641 | #define TCP_STAT_SEGSTIMED 6 /* segs where we tried to get rtt */ |
| 642 | #define TCP_STAT_RTTUPDATED 7 /* times we succeeded */ |
| 643 | #define TCP_STAT_DELACK 8 /* delayed ACKs sent */ |
| 644 | #define TCP_STAT_TIMEOUTDROP 9 /* conn. dropped in rxmt timeout */ |
| 645 | #define TCP_STAT_REXMTTIMEO 10 /* retransmit timeouts */ |
| 646 | #define TCP_STAT_PERSISTTIMEO 11 /* persist timeouts */ |
| 647 | #define TCP_STAT_KEEPTIMEO 12 /* keepalive timeouts */ |
| 648 | #define TCP_STAT_KEEPPROBE 13 /* keepalive probes sent */ |
| 649 | #define TCP_STAT_KEEPDROPS 14 /* connections dropped in keepalive */ |
| 650 | #define TCP_STAT_PERSISTDROPS 15 /* connections dropped in persist */ |
| 651 | #define TCP_STAT_CONNSDRAINED 16 /* connections drained due to memory |
| 652 | shortage */ |
| 653 | #define TCP_STAT_PMTUBLACKHOLE 17 /* PMTUD blackhole detected */ |
| 654 | #define TCP_STAT_SNDTOTAL 18 /* total packets sent */ |
| 655 | #define TCP_STAT_SNDPACK 19 /* data packlets sent */ |
| 656 | #define TCP_STAT_SNDBYTE 20 /* data bytes sent */ |
| 657 | #define TCP_STAT_SNDREXMITPACK 21 /* data packets retransmitted */ |
| 658 | #define TCP_STAT_SNDREXMITBYTE 22 /* data bytes retransmitted */ |
| 659 | #define TCP_STAT_SNDACKS 23 /* ACK-only packets sent */ |
| 660 | #define TCP_STAT_SNDPROBE 24 /* window probes sent */ |
| 661 | #define TCP_STAT_SNDURG 25 /* packets sent with URG only */ |
| 662 | #define TCP_STAT_SNDWINUP 26 /* window update-only packets sent */ |
| 663 | #define TCP_STAT_SNDCTRL 27 /* control (SYN|FIN|RST) packets sent */ |
| 664 | #define TCP_STAT_RCVTOTAL 28 /* total packets received */ |
| 665 | #define TCP_STAT_RCVPACK 29 /* packets received in sequence */ |
| 666 | #define TCP_STAT_RCVBYTE 30 /* bytes received in sequence */ |
| 667 | #define TCP_STAT_RCVBADSUM 31 /* packets received with cksum errs */ |
| 668 | #define TCP_STAT_RCVBADOFF 32 /* packets received with bad offset */ |
| 669 | #define TCP_STAT_RCVMEMDROP 33 /* packets dropped for lack of memory */ |
| 670 | #define TCP_STAT_RCVSHORT 34 /* packets received too short */ |
| 671 | #define TCP_STAT_RCVDUPPACK 35 /* duplicate-only packets received */ |
| 672 | #define TCP_STAT_RCVDUPBYTE 36 /* duplicate-only bytes received */ |
| 673 | #define TCP_STAT_RCVPARTDUPPACK 37 /* packets with some duplicate data */ |
| 674 | #define TCP_STAT_RCVPARTDUPBYTE 38 /* dup. bytes in part-dup. packets */ |
| 675 | #define TCP_STAT_RCVOOPACK 39 /* out-of-order packets received */ |
| 676 | #define TCP_STAT_RCVOOBYTE 40 /* out-of-order bytes received */ |
| 677 | #define TCP_STAT_RCVPACKAFTERWIN 41 /* packets with data after window */ |
| 678 | #define TCP_STAT_RCVBYTEAFTERWIN 42 /* bytes received after window */ |
| 679 | #define TCP_STAT_RCVAFTERCLOSE 43 /* packets received after "close" */ |
| 680 | #define TCP_STAT_RCVWINPROBE 44 /* rcvd window probe packets */ |
| 681 | #define TCP_STAT_RCVDUPACK 45 /* rcvd duplicate ACKs */ |
| 682 | #define TCP_STAT_RCVACKTOOMUCH 46 /* rcvd ACKs for unsent data */ |
| 683 | #define TCP_STAT_RCVACKPACK 47 /* rcvd ACK packets */ |
| 684 | #define TCP_STAT_RCVACKBYTE 48 /* bytes ACKed by rcvd ACKs */ |
| 685 | #define TCP_STAT_RCVWINUPD 49 /* rcvd window update packets */ |
| 686 | #define TCP_STAT_PAWSDROP 50 /* segments dropped due to PAWS */ |
| 687 | #define TCP_STAT_PREDACK 51 /* times hdr predict OK for ACKs */ |
| 688 | #define TCP_STAT_PREDDAT 52 /* times hdr predict OK for data pkts */ |
| 689 | #define TCP_STAT_PCBHASHMISS 53 /* input packets missing PCB hash */ |
| 690 | #define TCP_STAT_NOPORT 54 /* no socket on port */ |
| 691 | #define TCP_STAT_BADSYN 55 /* received ACK for which we have |
| 692 | no SYN in compressed state */ |
| 693 | #define TCP_STAT_DELAYED_FREE 56 /* delayed pool_put() of tcpcb */ |
| 694 | #define TCP_STAT_SC_ADDED 57 /* # of sc entries added */ |
| 695 | #define TCP_STAT_SC_COMPLETED 58 /* # of sc connections completed */ |
| 696 | #define TCP_STAT_SC_TIMED_OUT 59 /* # of sc entries timed out */ |
| 697 | #define TCP_STAT_SC_OVERFLOWED 60 /* # of sc drops due to overflow */ |
| 698 | #define TCP_STAT_SC_RESET 61 /* # of sc drops due to RST */ |
| 699 | #define TCP_STAT_SC_UNREACH 62 /* # of sc drops due to ICMP unreach */ |
| 700 | #define TCP_STAT_SC_BUCKETOVERFLOW 63 /* # of sc drops due to bucket ovflow */ |
| 701 | #define TCP_STAT_SC_ABORTED 64 /* # of sc entries aborted (no mem) */ |
| 702 | #define TCP_STAT_SC_DUPESYN 65 /* # of duplicate SYNs received */ |
| 703 | #define TCP_STAT_SC_DROPPED 66 /* # of SYNs dropped (no route/mem) */ |
| 704 | #define TCP_STAT_SC_COLLISIONS 67 /* # of sc hash collisions */ |
| 705 | #define TCP_STAT_SC_RETRANSMITTED 68 /* # of sc retransmissions */ |
| 706 | #define TCP_STAT_SC_DELAYED_FREE 69 /* # of delayed pool_put()s */ |
| 707 | #define TCP_STAT_SELFQUENCH 70 /* # of ENOBUFS we get on output */ |
| 708 | #define TCP_STAT_BADSIG 71 /* # of drops due to bad signature */ |
| 709 | #define TCP_STAT_GOODSIG 72 /* # of packets with good signature */ |
| 710 | #define TCP_STAT_ECN_SHS 73 /* # of successful ECN handshakes */ |
| 711 | #define TCP_STAT_ECN_CE 74 /* # of packets with CE bit */ |
| 712 | #define TCP_STAT_ECN_ECT 75 /* # of packets with ECT(0) bit */ |
| 713 | |
| 714 | #define TCP_NSTATS 76 |
| 715 | |
| 716 | /* |
| 717 | * Names for TCP sysctl objects. |
| 718 | */ |
| 719 | #define TCPCTL_RFC1323 1 /* RFC1323 timestamps/scaling */ |
| 720 | #define TCPCTL_SENDSPACE 2 /* default send buffer */ |
| 721 | #define TCPCTL_RECVSPACE 3 /* default recv buffer */ |
| 722 | #define TCPCTL_MSSDFLT 4 /* default seg size */ |
| 723 | #define TCPCTL_SYN_CACHE_LIMIT 5 /* max size of comp. state engine */ |
| 724 | #define TCPCTL_SYN_BUCKET_LIMIT 6 /* max size of hash bucket */ |
| 725 | #if 0 /*obsoleted*/ |
| 726 | #define TCPCTL_SYN_CACHE_INTER 7 /* interval of comp. state timer */ |
| 727 | #endif |
| 728 | #define TCPCTL_INIT_WIN 8 /* initial window */ |
| 729 | #define TCPCTL_MSS_IFMTU 9 /* mss from interface, not in_maxmtu */ |
| 730 | #define TCPCTL_SACK 10 /* RFC2018 selective acknowledgement */ |
| 731 | #define TCPCTL_WSCALE 11 /* RFC1323 window scaling */ |
| 732 | #define TCPCTL_TSTAMP 12 /* RFC1323 timestamps */ |
| 733 | #define TCPCTL_COMPAT_42 13 /* 4.2BSD TCP bug work-arounds */ |
| 734 | #define TCPCTL_CWM 14 /* Congestion Window Monitoring */ |
| 735 | #define TCPCTL_CWM_BURSTSIZE 15 /* burst size allowed by CWM */ |
| 736 | #define TCPCTL_ACK_ON_PUSH 16 /* ACK immediately on PUSH */ |
| 737 | #define TCPCTL_KEEPIDLE 17 /* keepalive idle time */ |
| 738 | #define TCPCTL_KEEPINTVL 18 /* keepalive probe interval */ |
| 739 | #define TCPCTL_KEEPCNT 19 /* keepalive count */ |
| 740 | #define TCPCTL_SLOWHZ 20 /* PR_SLOWHZ (read-only) */ |
| 741 | #define TCPCTL_NEWRENO 21 /* NewReno Congestion Control */ |
| 742 | #define TCPCTL_LOG_REFUSED 22 /* Log refused connections */ |
| 743 | #if 0 /*obsoleted*/ |
| 744 | #define TCPCTL_RSTRATELIMIT 23 /* RST rate limit */ |
| 745 | #endif |
| 746 | #define TCPCTL_RSTPPSLIMIT 24 /* RST pps limit */ |
| 747 | #define TCPCTL_DELACK_TICKS 25 /* # ticks to delay ACK */ |
| 748 | #define TCPCTL_INIT_WIN_LOCAL 26 /* initial window for local nets */ |
| 749 | #define TCPCTL_IDENT 27 /* rfc 931 identd */ |
| 750 | #define TCPCTL_ACKDROPRATELIMIT 28 /* SYN/RST -> ACK rate limit */ |
| 751 | #define TCPCTL_LOOPBACKCKSUM 29 /* do TCP checksum on loopback */ |
| 752 | #define TCPCTL_STATS 30 /* TCP statistics */ |
| 753 | #define TCPCTL_DEBUG 31 /* TCP debug sockets */ |
| 754 | #define TCPCTL_DEBX 32 /* # of tcp debug sockets */ |
| 755 | #define TCPCTL_DROP 33 /* drop tcp connection */ |
| 756 | #define TCPCTL_MSL 34 /* Max Segment Life */ |
| 757 | #define TCPCTL_MAXID 35 |
| 758 | |
| 759 | #define TCPCTL_NAMES { \ |
| 760 | { 0, 0 }, \ |
| 761 | { "rfc1323", CTLTYPE_INT }, \ |
| 762 | { "sendspace", CTLTYPE_INT }, \ |
| 763 | { "recvspace", CTLTYPE_INT }, \ |
| 764 | { "mssdflt", CTLTYPE_INT }, \ |
| 765 | { "syn_cache_limit", CTLTYPE_INT }, \ |
| 766 | { "syn_bucket_limit", CTLTYPE_INT }, \ |
| 767 | { 0, 0 },\ |
| 768 | { "init_win", CTLTYPE_INT }, \ |
| 769 | { "mss_ifmtu", CTLTYPE_INT }, \ |
| 770 | { "sack", CTLTYPE_INT }, \ |
| 771 | { "win_scale", CTLTYPE_INT }, \ |
| 772 | { "timestamps", CTLTYPE_INT }, \ |
| 773 | { "compat_42", CTLTYPE_INT }, \ |
| 774 | { "cwm", CTLTYPE_INT }, \ |
| 775 | { "cwm_burstsize", CTLTYPE_INT }, \ |
| 776 | { "ack_on_push", CTLTYPE_INT }, \ |
| 777 | { "keepidle", CTLTYPE_INT }, \ |
| 778 | { "keepintvl", CTLTYPE_INT }, \ |
| 779 | { "keepcnt", CTLTYPE_INT }, \ |
| 780 | { "slowhz", CTLTYPE_INT }, \ |
| 781 | { 0, 0 }, \ |
| 782 | { "log_refused",CTLTYPE_INT }, \ |
| 783 | { 0, 0 }, \ |
| 784 | { "rstppslimit", CTLTYPE_INT }, \ |
| 785 | { "delack_ticks", CTLTYPE_INT }, \ |
| 786 | { "init_win_local", CTLTYPE_INT }, \ |
| 787 | { "ident", CTLTYPE_STRUCT }, \ |
| 788 | { "ackdropppslimit", CTLTYPE_INT }, \ |
| 789 | { "do_loopback_cksum", CTLTYPE_INT }, \ |
| 790 | { "stats", CTLTYPE_STRUCT }, \ |
| 791 | { "debug", CTLTYPE_STRUCT }, \ |
| 792 | { "debx", CTLTYPE_INT }, \ |
| 793 | { "drop", CTLTYPE_STRUCT }, \ |
| 794 | { "msl", CTLTYPE_INT }, \ |
| 795 | } |
| 796 | |
| 797 | #ifdef _KERNEL |
| 798 | |
| 799 | extern struct inpcbtable tcbtable; /* head of queue of active tcpcb's */ |
| 800 | extern const struct pr_usrreqs tcp_usrreqs; |
| 801 | |
| 802 | extern u_int32_t tcp_now; /* for RFC 1323 timestamps */ |
| 803 | extern int tcp_do_rfc1323; /* enabled/disabled? */ |
| 804 | extern int tcp_do_sack; /* SACK enabled/disabled? */ |
| 805 | extern int tcp_do_win_scale; /* RFC1323 window scaling enabled/disabled? */ |
| 806 | extern int tcp_do_timestamps; /* RFC1323 timestamps enabled/disabled? */ |
| 807 | extern int tcp_mssdflt; /* default seg size */ |
| 808 | extern int tcp_minmss; /* minimal seg size */ |
| 809 | extern int tcp_msl; /* max segment life */ |
| 810 | extern int tcp_init_win; /* initial window */ |
| 811 | extern int tcp_init_win_local; /* initial window for local nets */ |
| 812 | extern int tcp_init_win_max[11];/* max sizes for values of tcp_init_win_* */ |
| 813 | extern int tcp_mss_ifmtu; /* take MSS from interface, not in_maxmtu */ |
| 814 | extern int tcp_compat_42; /* work around ancient broken TCP peers */ |
| 815 | extern int tcp_cwm; /* enable Congestion Window Monitoring */ |
| 816 | extern int tcp_cwm_burstsize; /* burst size allowed by CWM */ |
| 817 | extern int tcp_ack_on_push; /* ACK immediately on PUSH */ |
| 818 | extern int tcp_syn_cache_limit; /* max entries for compressed state engine */ |
| 819 | extern int tcp_syn_bucket_limit;/* max entries per hash bucket */ |
| 820 | extern int tcp_log_refused; /* log refused connections */ |
| 821 | extern int tcp_do_ecn; /* TCP ECN enabled/disabled? */ |
| 822 | extern int tcp_ecn_maxretries; /* Max ECN setup retries */ |
| 823 | extern int tcp_do_rfc1948; /* ISS by cryptographic hash */ |
| 824 | extern int tcp_sack_tp_maxholes; /* Max holes per connection. */ |
| 825 | extern int tcp_sack_globalmaxholes; /* Max holes per system. */ |
| 826 | extern int tcp_sack_globalholes; /* Number of holes present. */ |
| 827 | extern int tcp_do_abc; /* RFC3465 ABC enabled/disabled? */ |
| 828 | extern int tcp_abc_aggressive; /* 1: L=2*SMSS 0: L=1*SMSS */ |
| 829 | |
| 830 | extern int tcp_msl_enable; /* enable TIME_WAIT truncation */ |
| 831 | extern int tcp_msl_loop; /* MSL for loopback */ |
| 832 | extern int tcp_msl_local; /* MSL for 'local' */ |
| 833 | extern int tcp_msl_remote; /* MSL otherwise */ |
| 834 | extern int tcp_msl_remote_threshold; /* RTT threshold */ |
| 835 | extern int tcp_rttlocal; /* Use RTT to decide who's 'local' */ |
| 836 | extern int tcp4_vtw_enable; |
| 837 | extern int tcp6_vtw_enable; |
| 838 | extern int tcp_vtw_was_enabled; |
| 839 | extern int tcp_vtw_entries; |
| 840 | |
| 841 | extern int tcp_rst_ppslim; |
| 842 | extern int tcp_ackdrop_ppslim; |
| 843 | |
| 844 | extern int tcp_syn_cache_size; |
| 845 | extern struct syn_cache_head tcp_syn_cache[]; |
| 846 | extern u_long syn_cache_count; |
| 847 | |
| 848 | #ifdef MBUFTRACE |
| 849 | extern struct mowner tcp_rx_mowner; |
| 850 | extern struct mowner tcp_tx_mowner; |
| 851 | extern struct mowner tcp_reass_mowner; |
| 852 | extern struct mowner tcp_sock_mowner; |
| 853 | extern struct mowner tcp_sock_rx_mowner; |
| 854 | extern struct mowner tcp_sock_tx_mowner; |
| 855 | extern struct mowner tcp_mowner; |
| 856 | #endif |
| 857 | |
| 858 | extern int tcp_do_autorcvbuf; |
| 859 | extern int tcp_autorcvbuf_inc; |
| 860 | extern int tcp_autorcvbuf_max; |
| 861 | extern int tcp_do_autosndbuf; |
| 862 | extern int tcp_autosndbuf_inc; |
| 863 | extern int tcp_autosndbuf_max; |
| 864 | |
| 865 | |
| 866 | #define TCPCTL_VARIABLES { \ |
| 867 | { 0 }, \ |
| 868 | { 1, 0, &tcp_do_rfc1323 }, \ |
| 869 | { 1, 0, &tcp_sendspace }, \ |
| 870 | { 1, 0, &tcp_recvspace }, \ |
| 871 | { 1, 0, &tcp_mssdflt }, \ |
| 872 | { 1, 0, &tcp_syn_cache_limit }, \ |
| 873 | { 1, 0, &tcp_syn_bucket_limit }, \ |
| 874 | { 0 }, \ |
| 875 | { 1, 0, &tcp_init_win }, \ |
| 876 | { 1, 0, &tcp_mss_ifmtu }, \ |
| 877 | { 1, 0, &tcp_do_sack }, \ |
| 878 | { 1, 0, &tcp_do_win_scale }, \ |
| 879 | { 1, 0, &tcp_do_timestamps }, \ |
| 880 | { 1, 0, &tcp_compat_42 }, \ |
| 881 | { 1, 0, &tcp_cwm }, \ |
| 882 | { 1, 0, &tcp_cwm_burstsize }, \ |
| 883 | { 1, 0, &tcp_ack_on_push }, \ |
| 884 | { 1, 0, &tcp_keepidle }, \ |
| 885 | { 1, 0, &tcp_keepintvl }, \ |
| 886 | { 1, 0, &tcp_keepcnt }, \ |
| 887 | { 1, 1, 0, PR_SLOWHZ }, \ |
| 888 | { 0 }, \ |
| 889 | { 1, 0, &tcp_log_refused }, \ |
| 890 | { 0 }, \ |
| 891 | { 1, 0, &tcp_rst_ppslim }, \ |
| 892 | { 1, 0, &tcp_delack_ticks }, \ |
| 893 | { 1, 0, &tcp_init_win_local }, \ |
| 894 | { 1, 0, &tcp_ackdrop_ppslim }, \ |
| 895 | } |
| 896 | |
| 897 | struct secasvar; |
| 898 | |
| 899 | void tcp_canceltimers(struct tcpcb *); |
| 900 | struct tcpcb * |
| 901 | tcp_close(struct tcpcb *); |
| 902 | int tcp_isdead(struct tcpcb *); |
| 903 | #ifdef INET6 |
| 904 | void *tcp6_ctlinput(int, const struct sockaddr *, void *); |
| 905 | #endif |
| 906 | void *tcp_ctlinput(int, const struct sockaddr *, void *); |
| 907 | int tcp_ctloutput(int, struct socket *, struct sockopt *); |
| 908 | struct tcpcb * |
| 909 | tcp_disconnect1(struct tcpcb *); |
| 910 | struct tcpcb * |
| 911 | tcp_drop(struct tcpcb *, int); |
| 912 | #ifdef TCP_SIGNATURE |
| 913 | int tcp_signature_apply(void *, void *, u_int); |
| 914 | struct secasvar *tcp_signature_getsav(struct mbuf *, struct tcphdr *); |
| 915 | int tcp_signature(struct mbuf *, struct tcphdr *, int, struct secasvar *, |
| 916 | char *); |
| 917 | #endif |
| 918 | void tcp_drain(void); |
| 919 | void tcp_drainstub(void); |
| 920 | void tcp_established(struct tcpcb *); |
| 921 | void tcp_init(void); |
| 922 | void tcp_init_common(unsigned); |
| 923 | #ifdef INET6 |
| 924 | int tcp6_input(struct mbuf **, int *, int); |
| 925 | #endif |
| 926 | void tcp_input(struct mbuf *, ...); |
| 927 | u_int tcp_hdrsz(struct tcpcb *); |
| 928 | u_long tcp_mss_to_advertise(const struct ifnet *, int); |
| 929 | void tcp_mss_from_peer(struct tcpcb *, int); |
| 930 | void tcp_tcpcb_template(void); |
| 931 | struct tcpcb * |
| 932 | tcp_newtcpcb(int, void *); |
| 933 | void tcp_notify(struct inpcb *, int); |
| 934 | #ifdef INET6 |
| 935 | void tcp6_notify(struct in6pcb *, int); |
| 936 | #endif |
| 937 | u_int tcp_optlen(struct tcpcb *); |
| 938 | int tcp_output(struct tcpcb *); |
| 939 | void tcp_pulloutofband(struct socket *, |
| 940 | struct tcphdr *, struct mbuf *, int); |
| 941 | void tcp_quench(struct inpcb *, int); |
| 942 | #ifdef INET6 |
| 943 | void tcp6_quench(struct in6pcb *, int); |
| 944 | #endif |
| 945 | void tcp_mtudisc(struct inpcb *, int); |
| 946 | #ifdef INET6 |
| 947 | void tcp6_mtudisc_callback(struct in6_addr *); |
| 948 | #endif |
| 949 | |
| 950 | void tcpipqent_init(void); |
| 951 | struct ipqent *tcpipqent_alloc(void); |
| 952 | void tcpipqent_free(struct ipqent *); |
| 953 | |
| 954 | int tcp_respond(struct tcpcb *, struct mbuf *, struct mbuf *, |
| 955 | struct tcphdr *, tcp_seq, tcp_seq, int); |
| 956 | void tcp_rmx_rtt(struct tcpcb *); |
| 957 | void tcp_setpersist(struct tcpcb *); |
| 958 | #ifdef TCP_SIGNATURE |
| 959 | int tcp_signature_compute(struct mbuf *, struct tcphdr *, int, int, |
| 960 | int, u_char *, u_int); |
| 961 | #endif |
| 962 | void tcp_slowtimo(void *); |
| 963 | extern callout_t tcp_slowtimo_ch; |
| 964 | void tcp_fasttimo(void); |
| 965 | struct mbuf * |
| 966 | tcp_template(struct tcpcb *); |
| 967 | void tcp_trace(short, short, struct tcpcb *, struct mbuf *, int); |
| 968 | struct tcpcb * |
| 969 | tcp_usrclosed(struct tcpcb *); |
| 970 | void tcp_usrreq_init(void); |
| 971 | void tcp_xmit_timer(struct tcpcb *, uint32_t); |
| 972 | tcp_seq tcp_new_iss(struct tcpcb *, tcp_seq); |
| 973 | tcp_seq tcp_new_iss1(void *, void *, u_int16_t, u_int16_t, size_t, |
| 974 | tcp_seq); |
| 975 | |
| 976 | void tcp_sack_init(void); |
| 977 | void tcp_new_dsack(struct tcpcb *, tcp_seq, u_int32_t); |
| 978 | void tcp_sack_option(struct tcpcb *, const struct tcphdr *, |
| 979 | const u_char *, int); |
| 980 | void tcp_del_sackholes(struct tcpcb *, const struct tcphdr *); |
| 981 | void tcp_free_sackholes(struct tcpcb *); |
| 982 | void tcp_sack_adjust(struct tcpcb *tp); |
| 983 | struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); |
| 984 | int tcp_sack_numblks(const struct tcpcb *); |
| 985 | #define TCP_SACK_OPTLEN(nblks) ((nblks) * 8 + 2 + 2) |
| 986 | |
| 987 | void tcp_statinc(u_int); |
| 988 | void tcp_statadd(u_int, uint64_t); |
| 989 | |
| 990 | int syn_cache_add(struct sockaddr *, struct sockaddr *, |
| 991 | struct tcphdr *, unsigned int, struct socket *, |
| 992 | struct mbuf *, u_char *, int, struct tcp_opt_info *); |
| 993 | void syn_cache_unreach(const struct sockaddr *, const struct sockaddr *, |
| 994 | struct tcphdr *); |
| 995 | struct socket *syn_cache_get(struct sockaddr *, struct sockaddr *, |
| 996 | struct tcphdr *, unsigned int, unsigned int, |
| 997 | struct socket *so, struct mbuf *); |
| 998 | void syn_cache_init(void); |
| 999 | void syn_cache_insert(struct syn_cache *, struct tcpcb *); |
| 1000 | struct syn_cache *syn_cache_lookup(const struct sockaddr *, const struct sockaddr *, |
| 1001 | struct syn_cache_head **); |
| 1002 | void syn_cache_reset(struct sockaddr *, struct sockaddr *, |
| 1003 | struct tcphdr *); |
| 1004 | int syn_cache_respond(struct syn_cache *, struct mbuf *); |
| 1005 | void syn_cache_timer(void *); |
| 1006 | void syn_cache_cleanup(struct tcpcb *); |
| 1007 | |
| 1008 | int tcp_input_checksum(int, struct mbuf *, const struct tcphdr *, int, int, |
| 1009 | int); |
| 1010 | #endif |
| 1011 | |
| 1012 | #endif /* !_NETINET_TCP_VAR_H_ */ |
| 1013 | |