| 1 | /* $NetBSD: ip_mroute.h,v 1.31 2008/08/07 06:20:14 cegger Exp $ */ |
| 2 | |
| 3 | #ifndef _NETINET_IP_MROUTE_H_ |
| 4 | #define _NETINET_IP_MROUTE_H_ |
| 5 | |
| 6 | /* |
| 7 | * Definitions for IP multicast forwarding. |
| 8 | * |
| 9 | * Written by David Waitzman, BBN Labs, August 1988. |
| 10 | * Modified by Steve Deering, Stanford, February 1989. |
| 11 | * Modified by Ajit Thyagarajan, PARC, August 1993. |
| 12 | * Modified by Ajit Thyagarajan, PARC, August 1994. |
| 13 | * Modified by Ahmed Helmy, SGI, June 1996. |
| 14 | * Modified by Pavlin Radoslavov, ICSI, October 2002. |
| 15 | * |
| 16 | * MROUTING Revision: 1.2 |
| 17 | * and PIM-SMv2 and PIM-DM support, advanced API support, |
| 18 | * bandwidth metering and signaling. |
| 19 | */ |
| 20 | |
| 21 | #include <sys/queue.h> |
| 22 | #include <sys/callout.h> |
| 23 | |
| 24 | #ifdef _KERNEL |
| 25 | struct sockopt; /* from <sys/socketvar.h> */ |
| 26 | #endif |
| 27 | |
| 28 | /* |
| 29 | * Multicast Routing set/getsockopt commands. |
| 30 | */ |
| 31 | #define MRT_INIT 100 /* initialize forwarder */ |
| 32 | #define MRT_DONE 101 /* shut down forwarder */ |
| 33 | #define MRT_ADD_VIF 102 /* create virtual interface */ |
| 34 | #define MRT_DEL_VIF 103 /* delete virtual interface */ |
| 35 | #define MRT_ADD_MFC 104 /* insert forwarding cache entry */ |
| 36 | #define MRT_DEL_MFC 105 /* delete forwarding cache entry */ |
| 37 | #define MRT_VERSION 106 /* get kernel version number */ |
| 38 | #define MRT_ASSERT 107 /* enable assert processing */ |
| 39 | #define MRT_PIM MRT_ASSERT /* enable PIM processing */ |
| 40 | #define MRT_API_SUPPORT 109 /* supported MRT API */ |
| 41 | #define MRT_API_CONFIG 110 /* config MRT API */ |
| 42 | #define MRT_ADD_BW_UPCALL 111 /* create bandwidth monitor */ |
| 43 | #define MRT_DEL_BW_UPCALL 112 /* delete bandwidth monitor */ |
| 44 | |
| 45 | |
| 46 | /* |
| 47 | * Types and macros for handling bitmaps with one bit per virtual interface. |
| 48 | */ |
| 49 | #define MAXVIFS 32 |
| 50 | typedef u_int32_t vifbitmap_t; |
| 51 | typedef u_int16_t vifi_t; /* type of a vif index */ |
| 52 | |
| 53 | #define VIFM_SET(n, m) ((m) |= (1 << (n))) |
| 54 | #define VIFM_CLR(n, m) ((m) &= ~(1 << (n))) |
| 55 | #define VIFM_ISSET(n, m) ((m) & (1 << (n))) |
| 56 | #define VIFM_SETALL(m) ((m) = 0xffffffff) |
| 57 | #define VIFM_CLRALL(m) ((m) = 0x00000000) |
| 58 | #define VIFM_COPY(mfrom, mto) ((mto) = (mfrom)) |
| 59 | #define VIFM_SAME(m1, m2) ((m1) == (m2)) |
| 60 | |
| 61 | #define VIFF_TUNNEL 0x1 /* vif represents a tunnel end-point */ |
| 62 | #define VIFF_SRCRT 0x2 /* tunnel uses IP src routing */ |
| 63 | #define VIFF_REGISTER 0x4 /* used for PIM Register encap/decap */ |
| 64 | |
| 65 | /* |
| 66 | * Argument structure for MRT_ADD_VIF. |
| 67 | * (MRT_DEL_VIF takes a single vifi_t argument.) |
| 68 | */ |
| 69 | struct vifctl { |
| 70 | vifi_t vifc_vifi; /* the index of the vif to be added */ |
| 71 | u_int8_t vifc_flags; /* VIFF_ flags defined below */ |
| 72 | u_int8_t vifc_threshold; /* min ttl required to forward on vif */ |
| 73 | u_int32_t vifc_rate_limit; /* max rate */ |
| 74 | struct in_addr vifc_lcl_addr;/* local interface address */ |
| 75 | struct in_addr vifc_rmt_addr;/* remote address (tunnels only) */ |
| 76 | }; |
| 77 | |
| 78 | /* |
| 79 | * Argument structure for MRT_ADD_MFC and MRT_DEL_MFC. |
| 80 | * XXX if you change this, make sure to change struct mfcctl2 as well. |
| 81 | */ |
| 82 | struct mfcctl { |
| 83 | struct in_addr mfcc_origin; /* ip origin of mcasts */ |
| 84 | struct in_addr mfcc_mcastgrp; /* multicast group associated */ |
| 85 | vifi_t mfcc_parent; /* incoming vif */ |
| 86 | u_int8_t mfcc_ttls[MAXVIFS]; /* forwarding ttls on vifs */ |
| 87 | }; |
| 88 | |
| 89 | /* |
| 90 | * The new argument structure for MRT_ADD_MFC and MRT_DEL_MFC overlays |
| 91 | * and extends the old struct mfcctl. |
| 92 | */ |
| 93 | struct mfcctl2 { |
| 94 | /* the mfcctl fields */ |
| 95 | struct in_addr mfcc_origin; /* ip origin of mcasts */ |
| 96 | struct in_addr mfcc_mcastgrp; /* multicast group associated*/ |
| 97 | vifi_t mfcc_parent; /* incoming vif */ |
| 98 | u_int8_t mfcc_ttls[MAXVIFS]; /* forwarding ttls on vifs */ |
| 99 | |
| 100 | /* extension fields */ |
| 101 | u_int8_t mfcc_flags[MAXVIFS]; /* the MRT_MFC_FLAGS_* flags */ |
| 102 | struct in_addr mfcc_rp; /* the RP address */ |
| 103 | }; |
| 104 | /* |
| 105 | * The advanced-API flags. |
| 106 | * |
| 107 | * The MRT_MFC_FLAGS_XXX API flags are also used as flags |
| 108 | * for the mfcc_flags field. |
| 109 | */ |
| 110 | #define MRT_MFC_FLAGS_DISABLE_WRONGVIF (1 << 0) /* disable WRONGVIF signals */ |
| 111 | #define MRT_MFC_FLAGS_BORDER_VIF (1 << 1) /* border vif */ |
| 112 | #define MRT_MFC_RP (1 << 8) /* enable RP address */ |
| 113 | #define MRT_MFC_BW_UPCALL (1 << 9) /* enable bw upcalls */ |
| 114 | #define MRT_MFC_FLAGS_ALL (MRT_MFC_FLAGS_DISABLE_WRONGVIF | \ |
| 115 | MRT_MFC_FLAGS_BORDER_VIF) |
| 116 | #define MRT_API_FLAGS_ALL (MRT_MFC_FLAGS_ALL | \ |
| 117 | MRT_MFC_RP | \ |
| 118 | MRT_MFC_BW_UPCALL) |
| 119 | |
| 120 | /* |
| 121 | * Structure for installing or delivering an upcall if the |
| 122 | * measured bandwidth is above or below a threshold. |
| 123 | * |
| 124 | * User programs (e.g. daemons) may have a need to know when the |
| 125 | * bandwidth used by some data flow is above or below some threshold. |
| 126 | * This interface allows the userland to specify the threshold (in |
| 127 | * bytes and/or packets) and the measurement interval. Flows are |
| 128 | * all packet with the same source and destination IP address. |
| 129 | * At the moment the code is only used for multicast destinations |
| 130 | * but there is nothing that prevents its use for unicast. |
| 131 | * |
| 132 | * The measurement interval cannot be shorter than some Tmin (currently, 3s). |
| 133 | * The threshold is set in packets and/or bytes per_interval. |
| 134 | * |
| 135 | * Measurement works as follows: |
| 136 | * |
| 137 | * For >= measurements: |
| 138 | * The first packet marks the start of a measurement interval. |
| 139 | * During an interval we count packets and bytes, and when we |
| 140 | * pass the threshold we deliver an upcall and we are done. |
| 141 | * The first packet after the end of the interval resets the |
| 142 | * count and restarts the measurement. |
| 143 | * |
| 144 | * For <= measurement: |
| 145 | * We start a timer to fire at the end of the interval, and |
| 146 | * then for each incoming packet we count packets and bytes. |
| 147 | * When the timer fires, we compare the value with the threshold, |
| 148 | * schedule an upcall if we are below, and restart the measurement |
| 149 | * (reschedule timer and zero counters). |
| 150 | */ |
| 151 | |
| 152 | struct bw_data { |
| 153 | struct timeval b_time; |
| 154 | u_int64_t b_packets; |
| 155 | u_int64_t b_bytes; |
| 156 | }; |
| 157 | |
| 158 | struct bw_upcall { |
| 159 | struct in_addr bu_src; /* source address */ |
| 160 | struct in_addr bu_dst; /* destination address */ |
| 161 | u_int32_t bu_flags; /* misc flags (see below) */ |
| 162 | #define BW_UPCALL_UNIT_PACKETS (1 << 0) /* threshold (in packets) */ |
| 163 | #define BW_UPCALL_UNIT_BYTES (1 << 1) /* threshold (in bytes) */ |
| 164 | #define BW_UPCALL_GEQ (1 << 2) /* upcall if bw >= threshold */ |
| 165 | #define BW_UPCALL_LEQ (1 << 3) /* upcall if bw <= threshold */ |
| 166 | #define BW_UPCALL_DELETE_ALL (1 << 4) /* delete all upcalls for s,d*/ |
| 167 | struct bw_data bu_threshold; /* the bw threshold */ |
| 168 | struct bw_data bu_measured; /* the measured bw */ |
| 169 | }; |
| 170 | |
| 171 | /* max. number of upcalls to deliver together */ |
| 172 | #define BW_UPCALLS_MAX 128 |
| 173 | /* min. threshold time interval for bandwidth measurement */ |
| 174 | #define BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC 3 |
| 175 | #define BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC 0 |
| 176 | |
| 177 | /* |
| 178 | * Argument structure used by mrouted to get src-grp pkt counts. |
| 179 | */ |
| 180 | struct sioc_sg_req { |
| 181 | struct in_addr src; |
| 182 | struct in_addr grp; |
| 183 | u_long pktcnt; |
| 184 | u_long bytecnt; |
| 185 | u_long wrong_if; |
| 186 | }; |
| 187 | |
| 188 | /* |
| 189 | * Argument structure used by mrouted to get vif pkt counts. |
| 190 | */ |
| 191 | struct sioc_vif_req { |
| 192 | vifi_t vifi; /* vif number */ |
| 193 | u_long icount; /* input packet count on vif */ |
| 194 | u_long ocount; /* output packet count on vif */ |
| 195 | u_long ibytes; /* input byte count on vif */ |
| 196 | u_long obytes; /* output byte count on vif */ |
| 197 | }; |
| 198 | |
| 199 | |
| 200 | /* |
| 201 | * The kernel's multicast routing statistics. |
| 202 | */ |
| 203 | struct mrtstat { |
| 204 | u_long mrts_mfc_lookups; /* # forw. cache hash table hits */ |
| 205 | u_long mrts_mfc_misses; /* # forw. cache hash table misses */ |
| 206 | u_long mrts_upcalls; /* # calls to mrouted */ |
| 207 | u_long mrts_no_route; /* no route for packet's origin */ |
| 208 | u_long mrts_bad_tunnel; /* malformed tunnel options */ |
| 209 | u_long mrts_cant_tunnel; /* no room for tunnel options */ |
| 210 | u_long mrts_wrong_if; /* arrived on wrong interface */ |
| 211 | u_long mrts_upq_ovflw; /* upcall Q overflow */ |
| 212 | u_long mrts_cache_cleanups; /* # entries with no upcalls */ |
| 213 | u_long mrts_drop_sel; /* pkts dropped selectively */ |
| 214 | u_long mrts_q_overflow; /* pkts dropped - Q overflow */ |
| 215 | u_long mrts_pkt2large; /* pkts dropped - size > BKT SIZE */ |
| 216 | u_long mrts_upq_sockfull; /* upcalls dropped - socket full */ |
| 217 | }; |
| 218 | |
| 219 | |
| 220 | #ifdef _KERNEL |
| 221 | |
| 222 | /* |
| 223 | * The kernel's virtual-interface structure. |
| 224 | */ |
| 225 | struct encaptab; |
| 226 | struct vif { |
| 227 | struct mbuf *tbf_q, **tbf_t; /* packet queue */ |
| 228 | struct timeval tbf_last_pkt_t; /* arr. time of last pkt */ |
| 229 | u_int32_t tbf_n_tok; /* no of tokens in bucket */ |
| 230 | u_int32_t tbf_q_len; /* length of queue at this vif */ |
| 231 | u_int32_t tbf_max_q_len; /* max. queue length */ |
| 232 | |
| 233 | u_int8_t v_flags; /* VIFF_ flags defined above */ |
| 234 | u_int8_t v_threshold; /* min ttl required to forward on vif */ |
| 235 | u_int32_t v_rate_limit; /* max rate */ |
| 236 | struct in_addr v_lcl_addr; /* local interface address */ |
| 237 | struct in_addr v_rmt_addr; /* remote address (tunnels only) */ |
| 238 | struct ifnet *v_ifp; /* pointer to interface */ |
| 239 | u_long v_pkt_in; /* # pkts in on interface */ |
| 240 | u_long v_pkt_out; /* # pkts out on interface */ |
| 241 | u_long v_bytes_in; /* # bytes in on interface */ |
| 242 | u_long v_bytes_out; /* # bytes out on interface */ |
| 243 | struct route v_route; /* cached route if this is a tunnel */ |
| 244 | callout_t v_repq_ch; /* for tbf_reprocess_q() */ |
| 245 | #ifdef RSVP_ISI |
| 246 | int v_rsvp_on; /* # RSVP listening on this vif */ |
| 247 | struct socket *v_rsvpd; /* # RSVPD daemon */ |
| 248 | #endif /* RSVP_ISI */ |
| 249 | const struct encaptab *v_encap_cookie; |
| 250 | }; |
| 251 | |
| 252 | /* |
| 253 | * The kernel's multicast forwarding cache entry structure. |
| 254 | * (A field for the type of service (mfc_tos) is to be added |
| 255 | * at a future point.) |
| 256 | */ |
| 257 | struct mfc { |
| 258 | LIST_ENTRY(mfc) mfc_hash; |
| 259 | struct in_addr mfc_origin; /* ip origin of mcasts */ |
| 260 | struct in_addr mfc_mcastgrp; /* multicast group associated */ |
| 261 | vifi_t mfc_parent; /* incoming vif */ |
| 262 | u_int8_t mfc_ttls[MAXVIFS]; /* forwarding ttls on vifs */ |
| 263 | u_long mfc_pkt_cnt; /* pkt count for src-grp */ |
| 264 | u_long mfc_byte_cnt; /* byte count for src-grp */ |
| 265 | u_long mfc_wrong_if; /* wrong if for src-grp */ |
| 266 | int mfc_expire; /* time to clean entry up */ |
| 267 | struct timeval mfc_last_assert; /* last time I sent an assert */ |
| 268 | struct rtdetq *mfc_stall; /* pkts waiting for route */ |
| 269 | u_int8_t mfc_flags[MAXVIFS]; /* the MRT_MFC_FLAGS_* flags */ |
| 270 | struct in_addr mfc_rp; /* the RP address */ |
| 271 | struct bw_meter *mfc_bw_meter; /* list of bandwidth meters */ |
| 272 | }; |
| 273 | |
| 274 | /* |
| 275 | * Structure used to communicate from kernel to multicast router. |
| 276 | * (Note the convenient similarity to an IP packet.) |
| 277 | */ |
| 278 | struct igmpmsg { |
| 279 | u_int32_t unused1; |
| 280 | u_int32_t unused2; |
| 281 | u_int8_t im_msgtype; /* what type of message */ |
| 282 | #define IGMPMSG_NOCACHE 1 /* no MFC in the kernel */ |
| 283 | #define IGMPMSG_WRONGVIF 2 /* packet came from wrong interface */ |
| 284 | #define IGMPMSG_WHOLEPKT 3 /* PIM pkt for user level encap. */ |
| 285 | #define IGMPMSG_BW_UPCALL 4 /* BW monitoring upcall */ |
| 286 | u_int8_t im_mbz; /* must be zero */ |
| 287 | u_int8_t im_vif; /* vif rec'd on */ |
| 288 | u_int8_t unused3; |
| 289 | struct in_addr im_src, im_dst; |
| 290 | } __packed; |
| 291 | |
| 292 | /* |
| 293 | * Argument structure used for pkt info. while upcall is made. |
| 294 | */ |
| 295 | struct rtdetq { |
| 296 | struct mbuf *m; /* a copy of the packet */ |
| 297 | struct ifnet *ifp; /* interface pkt came in on */ |
| 298 | #ifdef UPCALL_TIMING |
| 299 | struct timeval t; /* timestamp */ |
| 300 | #endif /* UPCALL_TIMING */ |
| 301 | struct rtdetq *next; |
| 302 | }; |
| 303 | |
| 304 | #define MFCTBLSIZ 256 |
| 305 | #define MAX_UPQ 4 /* max. no of pkts in upcall Q */ |
| 306 | |
| 307 | /* |
| 308 | * Token bucket filter code |
| 309 | */ |
| 310 | #define MAX_BKT_SIZE 10000 /* 10K bytes size */ |
| 311 | #define MAXQSIZE 10 /* max. no of pkts in token queue */ |
| 312 | |
| 313 | /* |
| 314 | * Structure for measuring the bandwidth and sending an upcall if the |
| 315 | * measured bandwidth is above or below a threshold. |
| 316 | */ |
| 317 | struct bw_meter { |
| 318 | struct bw_meter *bm_mfc_next; /* next bw meter (same mfc) */ |
| 319 | struct bw_meter *bm_time_next; /* next bw meter (same time) */ |
| 320 | uint32_t bm_time_hash; /* the time hash value */ |
| 321 | struct mfc *bm_mfc; /* the corresponding mfc */ |
| 322 | uint32_t bm_flags; /* misc flags (see below) */ |
| 323 | #define BW_METER_UNIT_PACKETS (1 << 0) /* threshold (in packets) */ |
| 324 | #define BW_METER_UNIT_BYTES (1 << 1) /* threshold (in bytes) */ |
| 325 | #define BW_METER_GEQ (1 << 2) /* upcall if bw >= threshold */ |
| 326 | #define BW_METER_LEQ (1 << 3) /* upcall if bw <= threshold */ |
| 327 | #define BW_METER_USER_FLAGS (BW_METER_UNIT_PACKETS | \ |
| 328 | BW_METER_UNIT_BYTES | \ |
| 329 | BW_METER_GEQ | \ |
| 330 | BW_METER_LEQ) |
| 331 | |
| 332 | #define BW_METER_UPCALL_DELIVERED (1 << 24) /* upcall was delivered */ |
| 333 | |
| 334 | struct bw_data bm_threshold; /* the upcall threshold */ |
| 335 | struct bw_data bm_measured; /* the measured bw */ |
| 336 | struct timeval bm_start_time; /* abs. time */ |
| 337 | }; |
| 338 | |
| 339 | int ip_mrouter_set(struct socket *, struct sockopt *); |
| 340 | int ip_mrouter_get(struct socket *, struct sockopt *); |
| 341 | int mrt_ioctl(struct socket *, u_long, void *); |
| 342 | int ip_mrouter_done(void); |
| 343 | void ip_mrouter_detach(struct ifnet *); |
| 344 | void reset_vif(struct vif *); |
| 345 | #ifdef RSVP_ISI |
| 346 | int ip_mforward(struct mbuf *, struct ifnet *, struct ip_moptions *); |
| 347 | int legal_vif_num(int); |
| 348 | int ip_rsvp_vif_init(struct socket *, struct mbuf *); |
| 349 | int ip_rsvp_vif_done(struct socket *, struct mbuf *); |
| 350 | void ip_rsvp_force_done(struct socket *); |
| 351 | void rsvp_input(struct mbuf *, int, int); |
| 352 | #else |
| 353 | int ip_mforward(struct mbuf *, struct ifnet *); |
| 354 | #endif |
| 355 | |
| 356 | #endif /* _KERNEL */ |
| 357 | |
| 358 | #endif /* !_NETINET_IP_MROUTE_H_ */ |
| 359 | |