/* * Mach Operating System * Copyright (c) 1993-1989 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ /* * Author: David B. Golub, Carnegie Mellon University * Date: 3/98 * * Network IO. * * Packet filter code taken from vaxif/enet.c written * CMU and Stanford. */ /* * Note: don't depend on anything in this file. * It may change a lot real soon. -cmaeda 11 June 1993 */ #include #include #include #include /* spl definitions */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if MACH_TTD #include #endif /* MACH_TTD */ #if MACH_TTD int kttd_async_counter= 0; #endif /* MACH_TTD */ /* * Packet Buffer Management * * This module manages a private pool of kmsg buffers. */ /* * List of net kmsgs queued to be sent to users. * Messages can be high priority or low priority. * The network thread processes high priority messages first. */ decl_simple_lock_data(,net_queue_lock) boolean_t net_thread_awake = FALSE; struct ipc_kmsg_queue net_queue_high; int net_queue_high_size = 0; int net_queue_high_max = 0; /* for debugging */ struct ipc_kmsg_queue net_queue_low; int net_queue_low_size = 0; int net_queue_low_max = 0; /* for debugging */ /* * List of net kmsgs that can be touched at interrupt level. * If it is empty, we will also steal low priority messages. */ decl_simple_lock_data(,net_queue_free_lock) struct ipc_kmsg_queue net_queue_free; int net_queue_free_size = 0; /* on free list */ int net_queue_free_max = 0; /* for debugging */ /* * This value is critical to network performance. * At least this many buffers should be sitting in net_queue_free. * If this is set too small, we will drop network packets. * Even a low drop rate (<1%) can cause severe network throughput problems. * We add one to net_queue_free_min for every filter. */ int net_queue_free_min = 3; int net_queue_free_hits = 0; /* for debugging */ int net_queue_free_steals = 0; /* for debugging */ int net_queue_free_misses = 0; /* for debugging */ int net_kmsg_send_high_hits = 0; /* for debugging */ int net_kmsg_send_low_hits = 0; /* for debugging */ int net_kmsg_send_high_misses = 0; /* for debugging */ int net_kmsg_send_low_misses = 0; /* for debugging */ int net_thread_awaken = 0; /* for debugging */ int net_ast_taken = 0; /* for debugging */ decl_simple_lock_data(,net_kmsg_total_lock) int net_kmsg_total = 0; /* total allocated */ int net_kmsg_max; /* initialized below */ vm_size_t net_kmsg_size; /* initialized below */ /* * We want more buffers when there aren't enough in the free queue * and the low priority queue. However, we don't want to allocate * more than net_kmsg_max. */ #define net_kmsg_want_more() \ (((net_queue_free_size + net_queue_low_size) < net_queue_free_min) && \ (net_kmsg_total < net_kmsg_max)) ipc_kmsg_t net_kmsg_get(void) { ipc_kmsg_t kmsg; spl_t s; /* * First check the list of free buffers. */ s = splimp(); simple_lock(&net_queue_free_lock); kmsg = ipc_kmsg_queue_first(&net_queue_free); if (kmsg != IKM_NULL) { ipc_kmsg_rmqueue_first_macro(&net_queue_free, kmsg); net_queue_free_size--; net_queue_free_hits++; } simple_unlock(&net_queue_free_lock); if (kmsg == IKM_NULL) { /* * Try to steal from the low priority queue. */ simple_lock(&net_queue_lock); kmsg = ipc_kmsg_queue_first(&net_queue_low); if (kmsg != IKM_NULL) { ipc_kmsg_rmqueue_first_macro(&net_queue_low, kmsg); net_queue_low_size--; net_queue_free_steals++; } simple_unlock(&net_queue_lock); } if (kmsg == IKM_NULL) net_queue_free_misses++; (void) splx(s); if (net_kmsg_want_more() || (kmsg == IKM_NULL)) { boolean_t awake; s = splimp(); simple_lock(&net_queue_lock); awake = net_thread_awake; net_thread_awake = TRUE; simple_unlock(&net_queue_lock); (void) splx(s); if (!awake) thread_wakeup((event_t) &net_thread_awake); } return kmsg; } void net_kmsg_put(const ipc_kmsg_t kmsg) { spl_t s; s = splimp(); simple_lock(&net_queue_free_lock); ipc_kmsg_enqueue_macro(&net_queue_free, kmsg); if (++net_queue_free_size > net_queue_free_max) net_queue_free_max = net_queue_free_size; simple_unlock(&net_queue_free_lock); (void) splx(s); } void net_kmsg_collect(void) { ipc_kmsg_t kmsg; spl_t s; s = splimp(); simple_lock(&net_queue_free_lock); while (net_queue_free_size > net_queue_free_min) { kmsg = ipc_kmsg_dequeue(&net_queue_free); net_queue_free_size--; simple_unlock(&net_queue_free_lock); (void) splx(s); net_kmsg_free(kmsg); simple_lock(&net_kmsg_total_lock); net_kmsg_total--; simple_unlock(&net_kmsg_total_lock); s = splimp(); simple_lock(&net_queue_free_lock); } simple_unlock(&net_queue_free_lock); (void) splx(s); } void net_kmsg_more(void) { ipc_kmsg_t kmsg; /* * Replenish net kmsg pool if low. We don't have the locks * necessary to look at these variables, but that's OK because * misread values aren't critical. The danger in this code is * that while we allocate buffers, interrupts are happening * which take buffers out of the free list. If we are not * careful, we will sit in the loop and allocate a zillion * buffers while a burst of packets arrives. So we count * buffers in the low priority queue as available, because * net_kmsg_get will make use of them, and we cap the total * number of buffers we are willing to allocate. */ while (net_kmsg_want_more()) { simple_lock(&net_kmsg_total_lock); net_kmsg_total++; simple_unlock(&net_kmsg_total_lock); kmsg = net_kmsg_alloc(); net_kmsg_put(kmsg); } } /* * Packet Filter Data Structures * * Each network interface has a set of packet filters * that are run on incoming packets. * * Each packet filter may represent a single network * session or multiple network sessions. For example, * all application level TCP sessions would be represented * by a single packet filter data structure. * * If a packet filter has a single session, we use a * struct net_rcv_port to represent it. If the packet * filter represents multiple sessions, we use a * struct net_hash_header to represent it. */ /* * Each interface has a write port and a set of read ports. * Each read port has one or more filters to determine what packets * should go to that port. */ /* * Receive port for net, with packet filter. * This data structure by itself represents a packet * filter for a single session. */ struct net_rcv_port { queue_chain_t input; /* list of input open_descriptors */ queue_chain_t output; /* list of output open_descriptors */ ipc_port_t rcv_port; /* port to send packet to */ int rcv_qlimit; /* port's qlimit */ int rcv_count; /* number of packets received */ int priority; /* priority for filter */ filter_t *filter_end; /* pointer to end of filter */ filter_t filter[NET_MAX_FILTER]; /* filter operations */ }; struct kmem_cache net_rcv_cache; /* cache of net_rcv_port structs */ #define NET_HASH_SIZE 256 #define N_NET_HASH 4 #define N_NET_HASH_KEYS 4 /* * A single hash entry. */ struct net_hash_entry { queue_chain_t chain; /* list of entries with same hval */ #define he_next chain.next #define he_prev chain.prev ipc_port_t rcv_port; /* destination port */ int rcv_qlimit; /* qlimit for the port */ unsigned int keys[N_NET_HASH_KEYS]; }; struct kmem_cache net_hash_entry_cache; /* * This structure represents a packet filter with multiple sessions. * * For example, all application level TCP sessions might be * represented by one of these structures. It looks like a * net_rcv_port struct so that both types can live on the * same packet filter queues. */ struct net_hash_header { struct net_rcv_port rcv; int n_keys; /* zero if not used */ int ref_count; /* reference count */ net_hash_entry_t table[NET_HASH_SIZE]; } filter_hash_header[N_NET_HASH]; decl_simple_lock_data(,net_hash_header_lock) #define HASH_ITERATE(head, elt) (elt) = (net_hash_entry_t) (head); do { #define HASH_ITERATE_END(head, elt) \ (elt) = (net_hash_entry_t) queue_next((queue_entry_t) (elt)); \ } while ((elt) != (head)); #define FILTER_ITERATE(if_port_list, fp, nextfp, chain) \ for ((fp) = (net_rcv_port_t) queue_first(if_port_list); \ !queue_end(if_port_list, (queue_entry_t)(fp)); \ (fp) = (nextfp)) { \ (nextfp) = (net_rcv_port_t) queue_next(chain); #define FILTER_ITERATE_END } /* entry_p must be net_rcv_port_t or net_hash_entry_t */ #define ENQUEUE_DEAD(dead, entry_p, chain) { \ (entry_p)->chain.next = (queue_entry_t) (dead); \ (dead) = (queue_entry_t)(entry_p); \ } /* * ethernet_priority: * * This function properly belongs in the ethernet interfaces; * it should not be called by this module. (We get packet * priorities as an argument to net_filter.) It is here * to avoid massive code duplication. * * Returns TRUE for high-priority packets. */ boolean_t ethernet_priority(kmsg) const ipc_kmsg_t kmsg; { unsigned char *addr = (unsigned char *) net_kmsg(kmsg)->header; /* * A simplistic check for broadcast packets. */ if ((addr[0] == 0xff) && (addr[1] == 0xff) && (addr[2] == 0xff) && (addr[3] == 0xff) && (addr[4] == 0xff) && (addr[5] == 0xff)) return FALSE; else return TRUE; } mach_msg_type_t header_type = { MACH_MSG_TYPE_BYTE, 8, NET_HDW_HDR_MAX, TRUE, FALSE, FALSE, 0 }; mach_msg_type_t packet_type = { MACH_MSG_TYPE_BYTE, /* name */ 8, /* size */ 0, /* number */ TRUE, /* inline */ FALSE, /* longform */ FALSE /* deallocate */ }; /* * net_deliver: * * Called and returns holding net_queue_lock, at splimp. * Dequeues a message and delivers it at spl0. * Returns FALSE if no messages. */ boolean_t net_deliver(boolean_t nonblocking) { ipc_kmsg_t kmsg; boolean_t high_priority; struct ipc_kmsg_queue send_list; /* * Pick up a pending network message and deliver it. * Deliver high priority messages before low priority. */ if ((kmsg = ipc_kmsg_dequeue(&net_queue_high)) != IKM_NULL) { net_queue_high_size--; high_priority = TRUE; } else if ((kmsg = ipc_kmsg_dequeue(&net_queue_low)) != IKM_NULL) { net_queue_low_size--; high_priority = FALSE; } else return FALSE; simple_unlock(&net_queue_lock); (void) spl0(); /* * Run the packet through the filters, * getting back a queue of packets to send. */ net_filter(kmsg, &send_list); if (!nonblocking) { /* * There is a danger of running out of available buffers * because they all get moved into the high priority queue * or a port queue. In particular, we might need to * allocate more buffers as we pull (previously available) * buffers out of the low priority queue. But we can only * allocate if we are allowed to block. */ net_kmsg_more(); } while ((kmsg = ipc_kmsg_dequeue(&send_list)) != IKM_NULL) { int count; /* * Fill in the rest of the kmsg. */ count = net_kmsg(kmsg)->net_rcv_msg_packet_count; ikm_init_special(kmsg, IKM_SIZE_NETWORK); kmsg->ikm_header.msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, 0); /* remember message sizes must be rounded up */ kmsg->ikm_header.msgh_size = (((mach_msg_size_t) (sizeof(struct net_rcv_msg) - sizeof net_kmsg(kmsg)->sent - NET_RCV_MAX + count)) + 3) &~ 3; kmsg->ikm_header.msgh_local_port = MACH_PORT_NULL; kmsg->ikm_header.msgh_kind = MACH_MSGH_KIND_NORMAL; kmsg->ikm_header.msgh_id = NET_RCV_MSG_ID; net_kmsg(kmsg)->header_type = header_type; net_kmsg(kmsg)->packet_type = packet_type; net_kmsg(kmsg)->net_rcv_msg_packet_count = count; /* * Send the packet to the destination port. Drop it * if the destination port is over its backlog. */ if (ipc_mqueue_send(kmsg, MACH_SEND_TIMEOUT, 0) == MACH_MSG_SUCCESS) { if (high_priority) net_kmsg_send_high_hits++; else net_kmsg_send_low_hits++; /* the receiver is responsible for the message now */ } else { if (high_priority) net_kmsg_send_high_misses++; else net_kmsg_send_low_misses++; ipc_kmsg_destroy(kmsg); } } (void) splimp(); simple_lock(&net_queue_lock); return TRUE; } /* * We want to deliver packets using ASTs, so we can avoid the * thread_wakeup/thread_block needed to get to the network * thread. However, we can't allocate memory in the AST handler, * because memory allocation might block. Hence we have the * network thread to allocate memory. The network thread also * delivers packets, so it can be allocating and delivering for a * burst. net_thread_awake is protected by net_queue_lock * (instead of net_queue_free_lock) so that net_packet and * net_ast can safely determine if the network thread is running. * This prevents a race that might leave a packet sitting without * being delivered. It is possible for net_kmsg_get to think * the network thread is awake, and so avoid a wakeup, and then * have the network thread sleep without allocating. The next * net_kmsg_get will do a wakeup. */ void net_ast(void) { spl_t s; net_ast_taken++; /* * If the network thread is awake, then we would * rather deliver messages from it, because * it can also allocate memory. */ s = splimp(); simple_lock(&net_queue_lock); while (!net_thread_awake && net_deliver(TRUE)) continue; /* * Prevent an unnecessary AST. Either the network * thread will deliver the messages, or there are * no messages left to deliver. */ simple_unlock(&net_queue_lock); (void) splsched(); ast_off(cpu_number(), AST_NETWORK); (void) splx(s); } void __attribute__ ((noreturn)) net_thread_continue(void) { for (;;) { spl_t s; net_thread_awaken++; /* * First get more buffers. */ net_kmsg_more(); s = splimp(); simple_lock(&net_queue_lock); while (net_deliver(FALSE)) continue; net_thread_awake = FALSE; assert_wait(&net_thread_awake, FALSE); simple_unlock(&net_queue_lock); (void) splx(s); counter(c_net_thread_block++); thread_block(net_thread_continue); } } void net_thread(void) { spl_t s; /* * We should be very high priority. */ thread_set_own_priority(0); /* * We sleep initially, so that we don't allocate any buffers * unless the network is really in use and they are needed. */ s = splimp(); simple_lock(&net_queue_lock); net_thread_awake = FALSE; assert_wait(&net_thread_awake, FALSE); simple_unlock(&net_queue_lock); (void) splx(s); counter(c_net_thread_block++); thread_block(net_thread_continue); net_thread_continue(); /*NOTREACHED*/ } void reorder_queue( queue_t first, queue_t last) { queue_entry_t prev, next; prev = first->prev; next = last->next; prev->next = last; next->prev = first; last->prev = prev; last->next = first; first->next = next; first->prev = last; } /* * Incoming packet. Header has already been moved to proper place. * We are already at splimp. */ void net_packet( struct ifnet *ifp, ipc_kmsg_t kmsg, unsigned int count, boolean_t priority) { boolean_t awake; #if MACH_TTD /* * Do a quick check to see if it is a kernel TTD packet. * * Only check if KernelTTD is enabled, ie. the current * device driver supports TTD, and the bootp succeeded. */ if (kttd_enabled && kttd_handle_async(kmsg)) { /* * Packet was a valid ttd packet and * doesn't need to be passed up to filter. * The ttd code put the used kmsg buffer * back onto the free list. */ if (kttd_debug) printf("**%x**", kttd_async_counter++); return; } #endif /* MACH_TTD */ kmsg->ikm_header.msgh_remote_port = (mach_port_t) ifp; net_kmsg(kmsg)->net_rcv_msg_packet_count = count; simple_lock(&net_queue_lock); if (priority) { ipc_kmsg_enqueue(&net_queue_high, kmsg); if (++net_queue_high_size > net_queue_high_max) net_queue_high_max = net_queue_high_size; } else { ipc_kmsg_enqueue(&net_queue_low, kmsg); if (++net_queue_low_size > net_queue_low_max) net_queue_low_max = net_queue_low_size; } /* * If the network thread is awake, then we don't * need to take an AST, because the thread will * deliver the packet. */ awake = net_thread_awake; simple_unlock(&net_queue_lock); if (!awake) { spl_t s = splsched(); ast_on(cpu_number(), AST_NETWORK); (void) splx(s); } } int net_filter_queue_reorder = 0; /* non-zero to enable reordering */ /* * Run a packet through the filters, returning a list of messages. * We are *not* called at interrupt level. */ void net_filter(kmsg, send_list) const ipc_kmsg_t kmsg; ipc_kmsg_queue_t send_list; { struct ifnet *ifp; net_rcv_port_t infp, nextfp; ipc_kmsg_t new_kmsg; net_hash_entry_t entp, *hash_headp; ipc_port_t dest; queue_entry_t dead_infp = (queue_entry_t) 0; queue_entry_t dead_entp = (queue_entry_t) 0; unsigned int ret_count; queue_head_t *if_port_list; int count = net_kmsg(kmsg)->net_rcv_msg_packet_count; ifp = (struct ifnet *) kmsg->ikm_header.msgh_remote_port; ipc_kmsg_queue_init(send_list); if (net_kmsg(kmsg)->sent) if_port_list = &ifp->if_snd_port_list; else if_port_list = &ifp->if_rcv_port_list; /* * Unfortunately we can't allocate or deallocate memory * while holding these locks. And we can't drop the locks * while examining the filter lists. * Both locks are hold in case a filter is removed from both * queues. */ simple_lock(&ifp->if_rcv_port_list_lock); simple_lock(&ifp->if_snd_port_list_lock); FILTER_ITERATE(if_port_list, infp, nextfp, net_kmsg(kmsg)->sent ? &infp->output : &infp->input) { entp = (net_hash_entry_t) 0; if ((infp->filter[0] & NETF_TYPE_MASK) == NETF_BPF) { ret_count = bpf_do_filter(infp, net_kmsg(kmsg)->packet + sizeof(struct packet_header), count - sizeof(struct packet_header), net_kmsg(kmsg)->header, ifp->if_header_size, &hash_headp, &entp); if (entp == (net_hash_entry_t) 0) dest = infp->rcv_port; else dest = entp->rcv_port; if (ret_count) ret_count += sizeof(struct packet_header); } else { ret_count = net_do_filter(infp, net_kmsg(kmsg)->packet, count, net_kmsg(kmsg)->header); if (ret_count) ret_count = count; dest = infp->rcv_port; } if (ret_count) { /* * Make a send right for the destination. */ dest = ipc_port_copy_send(dest); if (!IP_VALID(dest)) { /* * This filter is dead. We remove it from the * filter list and set it aside for deallocation. */ if (entp == (net_hash_entry_t) 0) { if (infp->filter[0] & NETF_IN) queue_remove(&ifp->if_rcv_port_list, infp, net_rcv_port_t, input); if (infp->filter[0] & NETF_OUT) queue_remove(&ifp->if_snd_port_list, infp, net_rcv_port_t, output); /* Use input only for queues of dead filters. */ ENQUEUE_DEAD(dead_infp, infp, input); continue; } else { hash_ent_remove (ifp, (net_hash_header_t)infp, FALSE, /* no longer used */ hash_headp, entp, &dead_entp); continue; } } /* * Deliver copy of packet to this channel. */ if (ipc_kmsg_queue_empty(send_list)) { /* * Only receiver, so far */ new_kmsg = kmsg; } else { /* * Other receivers - must allocate message and copy. */ new_kmsg = net_kmsg_get(); if (new_kmsg == IKM_NULL) { ipc_port_release_send(dest); break; } memcpy( net_kmsg(new_kmsg)->packet, net_kmsg(kmsg)->packet, ret_count); memcpy( net_kmsg(new_kmsg)->header, net_kmsg(kmsg)->header, NET_HDW_HDR_MAX); } net_kmsg(new_kmsg)->net_rcv_msg_packet_count = ret_count; new_kmsg->ikm_header.msgh_remote_port = (mach_port_t) dest; ipc_kmsg_enqueue(send_list, new_kmsg); { net_rcv_port_t prevfp; int rcount = ++infp->rcv_count; /* * See if ordering of filters is wrong */ if (infp->priority >= NET_HI_PRI) { #define REORDER_PRIO(chain) \ prevfp = (net_rcv_port_t) queue_prev(&infp->chain); \ /* \ * If infp is not the first element on the queue, \ * and the previous element is at equal priority \ * but has a lower count, then promote infp to \ * be in front of prevfp. \ */ \ if ((queue_t)prevfp != if_port_list && \ infp->priority == prevfp->priority) { \ /* \ * Threshold difference to prevent thrashing \ */ \ if (net_filter_queue_reorder \ && (100 + prevfp->rcv_count < rcount)) \ reorder_queue(&prevfp->chain, &infp->chain);\ } REORDER_PRIO(input); REORDER_PRIO(output); /* * High-priority filter -> no more deliveries */ break; } } } } FILTER_ITERATE_END simple_unlock(&ifp->if_snd_port_list_lock); simple_unlock(&ifp->if_rcv_port_list_lock); /* * Deallocate dead filters. */ if (dead_infp != 0) net_free_dead_infp(dead_infp); if (dead_entp != 0) net_free_dead_entp(dead_entp); if (ipc_kmsg_queue_empty(send_list)) { /* Not sent - recycle */ net_kmsg_put(kmsg); } } boolean_t net_do_filter(infp, data, data_count, header) net_rcv_port_t infp; const char * data; unsigned int data_count; const char * header; { int stack[NET_FILTER_STACK_DEPTH+1]; int *sp; filter_t *fp, *fpe; unsigned int op, arg; /* * The filter accesses the header and data * as unsigned short words. */ data_count /= sizeof(unsigned short); #define data_word ((unsigned short *)data) #define header_word ((unsigned short *)header) sp = &stack[NET_FILTER_STACK_DEPTH]; fp = &infp->filter[1]; /* filter[0] used for flags */ fpe = infp->filter_end; *sp = TRUE; while (fp < fpe) { arg = *fp++; op = NETF_OP(arg); arg = NETF_ARG(arg); switch (arg) { case NETF_NOPUSH: arg = *sp++; break; case NETF_PUSHZERO: arg = 0; break; case NETF_PUSHLIT: arg = *fp++; break; case NETF_PUSHIND: arg = *sp++; if (arg >= data_count) return FALSE; arg = data_word[arg]; break; case NETF_PUSHHDRIND: arg = *sp++; if (arg >= NET_HDW_HDR_MAX/sizeof(unsigned short)) return FALSE; arg = header_word[arg]; break; default: if (arg >= NETF_PUSHSTK) { arg = sp[arg - NETF_PUSHSTK]; } else if (arg >= NETF_PUSHHDR) { arg = header_word[arg - NETF_PUSHHDR]; } else { arg -= NETF_PUSHWORD; if (arg >= data_count) return FALSE; arg = data_word[arg]; } break; } switch (op) { case NETF_OP(NETF_NOP): *--sp = arg; break; case NETF_OP(NETF_AND): *sp &= arg; break; case NETF_OP(NETF_OR): *sp |= arg; break; case NETF_OP(NETF_XOR): *sp ^= arg; break; case NETF_OP(NETF_EQ): *sp = (*sp == arg); break; case NETF_OP(NETF_NEQ): *sp = (*sp != arg); break; case NETF_OP(NETF_LT): *sp = (*sp < arg); break; case NETF_OP(NETF_LE): *sp = (*sp <= arg); break; case NETF_OP(NETF_GT): *sp = (*sp > arg); break; case NETF_OP(NETF_GE): *sp = (*sp >= arg); break; case NETF_OP(NETF_COR): if (*sp++ == arg) return (TRUE); break; case NETF_OP(NETF_CAND): if (*sp++ != arg) return (FALSE); break; case NETF_OP(NETF_CNOR): if (*sp++ == arg) return (FALSE); break; case NETF_OP(NETF_CNAND): if (*sp++ != arg) return (TRUE); break; case NETF_OP(NETF_LSH): *sp <<= arg; break; case NETF_OP(NETF_RSH): *sp >>= arg; break; case NETF_OP(NETF_ADD): *sp += arg; break; case NETF_OP(NETF_SUB): *sp -= arg; break; } } return ((*sp) ? TRUE : FALSE); #undef data_word #undef header_word } /* * Check filter for invalid operations or stack over/under-flow. */ boolean_t parse_net_filter( filter_t *filter, unsigned int count) { int sp; filter_t *fpe = &filter[count]; filter_t op, arg; /* * count is at least 1, and filter[0] is used for flags. */ filter++; sp = NET_FILTER_STACK_DEPTH; for (; filter < fpe; filter++) { op = NETF_OP(*filter); arg = NETF_ARG(*filter); switch (arg) { case NETF_NOPUSH: break; case NETF_PUSHZERO: sp--; break; case NETF_PUSHLIT: filter++; if (filter >= fpe) return (FALSE); /* literal value not in filter */ sp--; break; case NETF_PUSHIND: case NETF_PUSHHDRIND: break; default: if (arg >= NETF_PUSHSTK) { if (arg - NETF_PUSHSTK + sp > NET_FILTER_STACK_DEPTH) return FALSE; } else if (arg >= NETF_PUSHHDR) { if (arg - NETF_PUSHHDR >= NET_HDW_HDR_MAX/sizeof(unsigned short)) return FALSE; } /* else... cannot check for packet bounds without packet */ sp--; break; } if (sp < 2) { return (FALSE); /* stack overflow */ } if (op == NETF_OP(NETF_NOP)) continue; /* * all non-NOP operators are binary. */ if (sp > NET_MAX_FILTER-2) return (FALSE); sp++; switch (op) { case NETF_OP(NETF_AND): case NETF_OP(NETF_OR): case NETF_OP(NETF_XOR): case NETF_OP(NETF_EQ): case NETF_OP(NETF_NEQ): case NETF_OP(NETF_LT): case NETF_OP(NETF_LE): case NETF_OP(NETF_GT): case NETF_OP(NETF_GE): case NETF_OP(NETF_COR): case NETF_OP(NETF_CAND): case NETF_OP(NETF_CNOR): case NETF_OP(NETF_CNAND): case NETF_OP(NETF_LSH): case NETF_OP(NETF_RSH): case NETF_OP(NETF_ADD): case NETF_OP(NETF_SUB): break; default: return (FALSE); } } return (TRUE); } /* * Set a filter for a network interface. * * We are given a naked send right for the rcv_port. * If we are successful, we must consume that right. */ io_return_t net_set_filter( struct ifnet *ifp, ipc_port_t rcv_port, int priority, filter_t *filter, unsigned int filter_count) { int filter_bytes; bpf_insn_t match; net_rcv_port_t infp, my_infp; net_rcv_port_t nextfp; net_hash_header_t hhp; net_hash_entry_t entp; net_hash_entry_t *head, nextentp; queue_entry_t dead_infp, dead_entp; int i; int ret, is_new_infp; io_return_t rval; boolean_t in, out; /* Initialize hash_entp to NULL to quiet GCC * warning about uninitialized variable. hash_entp is only * used when match != 0; in that case it is properly initialized * by kmem_cache_alloc(). */ net_hash_entry_t hash_entp = NULL; /* * Check the filter syntax. */ filter_bytes = CSPF_BYTES(filter_count); match = (bpf_insn_t) 0; if (filter_count == 0) { return (D_INVALID_OPERATION); } else if (!((filter[0] & NETF_IN) || (filter[0] & NETF_OUT))) { return (D_INVALID_OPERATION); /* NETF_IN or NETF_OUT required */ } else if ((filter[0] & NETF_TYPE_MASK) == NETF_BPF) { ret = bpf_validate((bpf_insn_t)filter, filter_bytes, &match); if (!ret) return (D_INVALID_OPERATION); } else if ((filter[0] & NETF_TYPE_MASK) == 0) { if (!parse_net_filter(filter, filter_count)) return (D_INVALID_OPERATION); } else { return (D_INVALID_OPERATION); } rval = D_SUCCESS; /* default return value */ dead_infp = dead_entp = 0; if (match == (bpf_insn_t) 0) { /* * If there is no match instruction, we allocate * a normal packet filter structure. */ my_infp = (net_rcv_port_t) kmem_cache_alloc(&net_rcv_cache); my_infp->rcv_port = rcv_port; is_new_infp = TRUE; } else { /* * If there is a match instruction, we assume there will be * multiple sessions with a common substructure and allocate * a hash table to deal with them. */ my_infp = 0; hash_entp = (net_hash_entry_t) kmem_cache_alloc(&net_hash_entry_cache); is_new_infp = FALSE; } /* * Look for an existing filter on the same reply port. * Look for filters with dead ports (for GC). * Look for a filter with the same code except KEY insns. */ void check_filter_list(queue_head_t *if_port_list) { FILTER_ITERATE(if_port_list, infp, nextfp, (if_port_list == &ifp->if_rcv_port_list) ? &infp->input : &infp->output) { if (infp->rcv_port == MACH_PORT_NULL) { if (match != 0 && infp->priority == priority && my_infp == 0 && (infp->filter_end - infp->filter) == filter_count && bpf_eq((bpf_insn_t)infp->filter, (bpf_insn_t)filter, filter_bytes)) my_infp = infp; for (i = 0; i < NET_HASH_SIZE; i++) { head = &((net_hash_header_t) infp)->table[i]; if (*head == 0) continue; /* * Check each hash entry to make sure the * destination port is still valid. Remove * any invalid entries. */ entp = *head; do { nextentp = (net_hash_entry_t) entp->he_next; /* checked without ip_lock(entp->rcv_port) */ if (entp->rcv_port == rcv_port || !IP_VALID(entp->rcv_port) || !ip_active(entp->rcv_port)) { ret = hash_ent_remove (ifp, (net_hash_header_t)infp, (my_infp == infp), head, entp, &dead_entp); if (ret) goto hash_loop_end; } entp = nextentp; /* While test checks head since hash_ent_remove might modify it. */ } while (*head != 0 && entp != *head); } hash_loop_end: ; } else if (infp->rcv_port == rcv_port || !IP_VALID(infp->rcv_port) || !ip_active(infp->rcv_port)) { /* Remove the old filter from lists */ if (infp->filter[0] & NETF_IN) queue_remove(&ifp->if_rcv_port_list, infp, net_rcv_port_t, input); if (infp->filter[0] & NETF_OUT) queue_remove(&ifp->if_snd_port_list, infp, net_rcv_port_t, output); ENQUEUE_DEAD(dead_infp, infp, input); } } FILTER_ITERATE_END } in = (filter[0] & NETF_IN) != 0; out = (filter[0] & NETF_OUT) != 0; simple_lock(&ifp->if_rcv_port_list_lock); simple_lock(&ifp->if_snd_port_list_lock); if (in) check_filter_list(&ifp->if_rcv_port_list); if (out) check_filter_list(&ifp->if_snd_port_list); if (my_infp == 0) { /* Allocate a dummy infp */ simple_lock(&net_hash_header_lock); for (i = 0; i < N_NET_HASH; i++) { if (filter_hash_header[i].n_keys == 0) break; } if (i == N_NET_HASH) { simple_unlock(&net_hash_header_lock); simple_unlock(&ifp->if_snd_port_list_lock); simple_unlock(&ifp->if_rcv_port_list_lock); ipc_port_release_send(rcv_port); if (match != 0) kmem_cache_free(&net_hash_entry_cache, (vm_offset_t)hash_entp); rval = D_NO_MEMORY; goto clean_and_return; } hhp = &filter_hash_header[i]; hhp->n_keys = match->jt; simple_unlock(&net_hash_header_lock); hhp->ref_count = 0; for (i = 0; i < NET_HASH_SIZE; i++) hhp->table[i] = 0; my_infp = (net_rcv_port_t)hhp; my_infp->rcv_port = MACH_PORT_NULL; /* indication of dummy */ is_new_infp = TRUE; } if (is_new_infp) { my_infp->priority = priority; my_infp->rcv_count = 0; /* Copy filter program. */ memcpy (my_infp->filter, filter, filter_bytes); my_infp->filter_end = (filter_t *)((char *)my_infp->filter + filter_bytes); if (match == 0) { my_infp->rcv_qlimit = net_add_q_info(rcv_port); } else { my_infp->rcv_qlimit = 0; } /* Insert my_infp according to priority */ if (in) { queue_iterate(&ifp->if_rcv_port_list, infp, net_rcv_port_t, input) if (priority > infp->priority) break; queue_enter(&ifp->if_rcv_port_list, my_infp, net_rcv_port_t, input); } if (out) { queue_iterate(&ifp->if_snd_port_list, infp, net_rcv_port_t, output) if (priority > infp->priority) break; queue_enter(&ifp->if_snd_port_list, my_infp, net_rcv_port_t, output); } } if (match != 0) { /* Insert to hash list */ net_hash_entry_t *p; hash_entp->rcv_port = rcv_port; for (i = 0; i < match->jt; i++) /* match->jt is n_keys */ hash_entp->keys[i] = match[i+1].k; p = &((net_hash_header_t)my_infp)-> table[bpf_hash(match->jt, hash_entp->keys)]; /* Not checking for the same key values */ if (*p == 0) { queue_init (&hash_entp->chain); *p = hash_entp; } else { enqueue_tail(&(*p)->chain, &hash_entp->chain); } ((net_hash_header_t)my_infp)->ref_count++; hash_entp->rcv_qlimit = net_add_q_info(rcv_port); } simple_unlock(&ifp->if_snd_port_list_lock); simple_unlock(&ifp->if_rcv_port_list_lock); clean_and_return: /* No locks are held at this point. */ if (dead_infp != 0) net_free_dead_infp(dead_infp); if (dead_entp != 0) net_free_dead_entp(dead_entp); return (rval); } /* * Other network operations */ io_return_t net_getstat( struct ifnet *ifp, dev_flavor_t flavor, dev_status_t status, /* pointer to OUT array */ mach_msg_type_number_t *count) /* OUT */ { switch (flavor) { case NET_STATUS: { struct net_status *ns = (struct net_status *)status; if (*count < NET_STATUS_COUNT) return (D_INVALID_OPERATION); ns->min_packet_size = ifp->if_header_size; ns->max_packet_size = ifp->if_header_size + ifp->if_mtu; ns->header_format = ifp->if_header_format; ns->header_size = ifp->if_header_size; ns->address_size = ifp->if_address_size; ns->flags = ifp->if_flags; ns->mapped_size = 0; *count = NET_STATUS_COUNT; break; } case NET_ADDRESS: { int addr_byte_count; int addr_int_count; int i; addr_byte_count = ifp->if_address_size; addr_int_count = (addr_byte_count + (sizeof(int)-1)) / sizeof(int); if (*count < addr_int_count) { /* XXX debug hack. */ printf ("net_getstat: count: %d, addr_int_count: %d\n", *count, addr_int_count); return (D_INVALID_OPERATION); } memcpy(status, ifp->if_address, addr_byte_count); if (addr_byte_count < addr_int_count * sizeof(int)) memset((char *)status + addr_byte_count, 0, (addr_int_count * sizeof(int) - addr_byte_count)); for (i = 0; i < addr_int_count; i++) { int word; word = status[i]; status[i] = htonl(word); } *count = addr_int_count; break; } default: return (D_INVALID_OPERATION); } return (D_SUCCESS); } io_return_t net_write( struct ifnet *ifp, int (*start)(), io_req_t ior) { spl_t s; kern_return_t rc; boolean_t wait; /* * Reject the write if the interface is down. */ if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) return (D_DEVICE_DOWN); /* * Reject the write if the packet is too large or too small. */ if (ior->io_count < ifp->if_header_size || ior->io_count > ifp->if_header_size + ifp->if_mtu) return (D_INVALID_SIZE); /* * Wire down the memory. */ rc = device_write_get(ior, &wait); if (rc != KERN_SUCCESS) return (rc); /* * Network interfaces can't cope with VM continuations. * If wait is set, just panic. */ if (wait) { panic("net_write: VM continuation"); } /* * Queue the packet on the output queue, and * start the device. */ s = splimp(); IF_ENQUEUE(&ifp->if_snd, ior); (*start)(ifp->if_unit); splx(s); return (D_IO_QUEUED); } /* * Initialize the whole package. */ void net_io_init(void) { vm_size_t size; size = sizeof(struct net_rcv_port); kmem_cache_init(&net_rcv_cache, "net_rcv_port", size, 0, NULL, 0); size = sizeof(struct net_hash_entry); kmem_cache_init(&net_hash_entry_cache, "net_hash_entry", size, 0, NULL, 0); size = ikm_plus_overhead(sizeof(struct net_rcv_msg)); net_kmsg_size = round_page(size); /* * net_kmsg_max caps the number of buffers * we are willing to allocate. By default, * we allow for net_queue_free_min plus * the queue limit for each filter. * (Added as the filters are added.) */ simple_lock_init(&net_kmsg_total_lock); if (net_kmsg_max == 0) net_kmsg_max = net_queue_free_min; simple_lock_init(&net_queue_free_lock); ipc_kmsg_queue_init(&net_queue_free); simple_lock_init(&net_queue_lock); ipc_kmsg_queue_init(&net_queue_high); ipc_kmsg_queue_init(&net_queue_low); simple_lock_init(&net_hash_header_lock); } /* ======== BPF: Berkeley Packet Filter ======== */ /*- * Copyright (c) 1990-1991 The Regents of the University of California. * All rights reserved. * * This code is derived from the Stanford/CMU enet packet filter, * (net/enet.c) distributed as part of 4.3BSD, and code contributed * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence * Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)bpf.c 7.5 (Berkeley) 7/15/91 */ #if defined(sparc) || defined(mips) || defined(ibm032) || defined(alpha) #define BPF_ALIGN #endif #ifndef BPF_ALIGN #define EXTRACT_SHORT(p) ((u_short)ntohs(*(u_short *)p)) #define EXTRACT_LONG(p) (ntohl(*(u_long *)p)) #else #define EXTRACT_SHORT(p)\ ((u_short)\ ((u_short)*((u_char *)p+0)<<8|\ (u_short)*((u_char *)p+1)<<0)) #define EXTRACT_LONG(p)\ ((u_long)*((u_char *)p+0)<<24|\ (u_long)*((u_char *)p+1)<<16|\ (u_long)*((u_char *)p+2)<<8|\ (u_long)*((u_char *)p+3)<<0) #endif /* * Execute the filter program starting at pc on the packet p * wirelen is the length of the original packet * buflen is the amount of data present */ int bpf_do_filter( net_rcv_port_t infp, char * p, /* packet data */ unsigned int wirelen, /* data_count (in bytes) */ char * header, unsigned int hlen, /* header len (in bytes) */ net_hash_entry_t **hash_headpp, net_hash_entry_t *entpp) /* out */ { bpf_insn_t pc, pc_end; unsigned int buflen; unsigned int A, X; int k; unsigned int mem[BPF_MEMWORDS]; /* Generic pointer to either HEADER or P according to the specified offset. */ char *data = NULL; pc = ((bpf_insn_t) infp->filter) + 1; /* filter[0].code is (NETF_BPF | flags) */ pc_end = (bpf_insn_t)infp->filter_end; buflen = NET_RCV_MAX; *entpp = 0; /* default */ A = 0; X = 0; for (; pc < pc_end; ++pc) { switch (pc->code) { default: #ifdef KERNEL return 0; #else abort(); #endif case BPF_RET|BPF_K: if (infp->rcv_port == MACH_PORT_NULL && *entpp == 0) { return 0; } return ((u_int)pc->k <= wirelen) ? pc->k : wirelen; case BPF_RET|BPF_A: if (infp->rcv_port == MACH_PORT_NULL && *entpp == 0) { return 0; } return ((u_int)A <= wirelen) ? A : wirelen; case BPF_RET|BPF_MATCH_IMM: if (bpf_match ((net_hash_header_t)infp, pc->jt, mem, hash_headpp, entpp)) { return ((u_int)pc->k <= wirelen) ? pc->k : wirelen; } return 0; case BPF_LD|BPF_W|BPF_ABS: k = pc->k; load_word: if ((u_int)k + sizeof(int) <= hlen) data = header; else if ((u_int)k + sizeof(int) <= buflen) { k -= hlen; data = p; } else return 0; #ifdef BPF_ALIGN if (((int)(data + k) & 3) != 0) A = EXTRACT_LONG(&data[k]); else #endif A = ntohl(*(int *)(data + k)); continue; case BPF_LD|BPF_H|BPF_ABS: k = pc->k; load_half: if ((u_int)k + sizeof(short) <= hlen) data = header; else if ((u_int)k + sizeof(short) <= buflen) { k -= hlen; data = p; } else return 0; A = EXTRACT_SHORT(&data[k]); continue; case BPF_LD|BPF_B|BPF_ABS: k = pc->k; load_byte: if ((u_int)k < hlen) data = header; else if ((u_int)k < buflen) { data = p; k -= hlen; } else return 0; A = data[k]; continue; case BPF_LD|BPF_W|BPF_LEN: A = wirelen; continue; case BPF_LDX|BPF_W|BPF_LEN: X = wirelen; continue; case BPF_LD|BPF_W|BPF_IND: k = X + pc->k; goto load_word; case BPF_LD|BPF_H|BPF_IND: k = X + pc->k; goto load_half; case BPF_LD|BPF_B|BPF_IND: k = X + pc->k; goto load_byte; case BPF_LDX|BPF_MSH|BPF_B: k = pc->k; if (k < hlen) data = header; else if (k < buflen) { data = p; k -= hlen; } else return 0; X = (data[k] & 0xf) << 2; continue; case BPF_LD|BPF_IMM: A = pc->k; continue; case BPF_LDX|BPF_IMM: X = pc->k; continue; case BPF_LD|BPF_MEM: A = mem[pc->k]; continue; case BPF_LDX|BPF_MEM: X = mem[pc->k]; continue; case BPF_ST: mem[pc->k] = A; continue; case BPF_STX: mem[pc->k] = X; continue; case BPF_JMP|BPF_JA: pc += pc->k; continue; case BPF_JMP|BPF_JGT|BPF_K: pc += (A > pc->k) ? pc->jt : pc->jf; continue; case BPF_JMP|BPF_JGE|BPF_K: pc += (A >= pc->k) ? pc->jt : pc->jf; continue; case BPF_JMP|BPF_JEQ|BPF_K: pc += (A == pc->k) ? pc->jt : pc->jf; continue; case BPF_JMP|BPF_JSET|BPF_K: pc += (A & pc->k) ? pc->jt : pc->jf; continue; case BPF_JMP|BPF_JGT|BPF_X: pc += (A > X) ? pc->jt : pc->jf; continue; case BPF_JMP|BPF_JGE|BPF_X: pc += (A >= X) ? pc->jt : pc->jf; continue; case BPF_JMP|BPF_JEQ|BPF_X: pc += (A == X) ? pc->jt : pc->jf; continue; case BPF_JMP|BPF_JSET|BPF_X: pc += (A & X) ? pc->jt : pc->jf; continue; case BPF_ALU|BPF_ADD|BPF_X: A += X; continue; case BPF_ALU|BPF_SUB|BPF_X: A -= X; continue; case BPF_ALU|BPF_MUL|BPF_X: A *= X; continue; case BPF_ALU|BPF_DIV|BPF_X: if (X == 0) return 0; A /= X; continue; case BPF_ALU|BPF_AND|BPF_X: A &= X; continue; case BPF_ALU|BPF_OR|BPF_X: A |= X; continue; case BPF_ALU|BPF_LSH|BPF_X: A <<= X; continue; case BPF_ALU|BPF_RSH|BPF_X: A >>= X; continue; case BPF_ALU|BPF_ADD|BPF_K: A += pc->k; continue; case BPF_ALU|BPF_SUB|BPF_K: A -= pc->k; continue; case BPF_ALU|BPF_MUL|BPF_K: A *= pc->k; continue; case BPF_ALU|BPF_DIV|BPF_K: A /= pc->k; continue; case BPF_ALU|BPF_AND|BPF_K: A &= pc->k; continue; case BPF_ALU|BPF_OR|BPF_K: A |= pc->k; continue; case BPF_ALU|BPF_LSH|BPF_K: A <<= pc->k; continue; case BPF_ALU|BPF_RSH|BPF_K: A >>= pc->k; continue; case BPF_ALU|BPF_NEG: A = -A; continue; case BPF_MISC|BPF_TAX: X = A; continue; case BPF_MISC|BPF_TXA: A = X; continue; } } return 0; } /* * Return 1 if the 'f' is a valid filter program without a MATCH * instruction. Return 2 if it is a valid filter program with a MATCH * instruction. Otherwise, return 0. * The constraints are that each jump be forward and to a valid * code. The code must terminate with either an accept or reject. * 'valid' is an array for use by the routine (it must be at least * 'len' bytes long). * * The kernel needs to be able to verify an application's filter code. * Otherwise, a bogus program could easily crash the system. */ int bpf_validate( bpf_insn_t f, int bytes, bpf_insn_t *match) { int i, j, len; bpf_insn_t p; len = BPF_BYTES2LEN(bytes); /* * f[0].code is already checked to be (NETF_BPF | flags). * So skip f[0]. */ for (i = 1; i < len; ++i) { /* * Check that that jumps are forward, and within * the code block. */ p = &f[i]; if (BPF_CLASS(p->code) == BPF_JMP) { int from = i + 1; if (BPF_OP(p->code) == BPF_JA) { if (from + p->k >= len) return 0; } else if (from + p->jt >= len || from + p->jf >= len) return 0; } /* * Check that memory operations use valid addresses. */ if ((BPF_CLASS(p->code) == BPF_ST || (BPF_CLASS(p->code) == BPF_LD && (p->code & 0xe0) == BPF_MEM)) && (p->k >= BPF_MEMWORDS || p->k < 0)) return 0; /* * Check for constant division by 0. */ if (p->code == (BPF_ALU|BPF_DIV|BPF_K) && p->k == 0) return 0; /* * Check for match instruction. * Only one match instruction per filter is allowed. */ if (p->code == (BPF_RET|BPF_MATCH_IMM)) { if (*match != 0 || p->jt == 0 || p->jt > N_NET_HASH_KEYS) return 0; i += p->jt; /* skip keys */ if (i + 1 > len) return 0; for (j = 1; j <= p->jt; j++) { if (p[j].code != (BPF_MISC|BPF_KEY)) return 0; } *match = p; } } if (BPF_CLASS(f[len - 1].code) == BPF_RET) return ((*match == 0) ? 1 : 2); else return 0; } int bpf_eq( bpf_insn_t f1, bpf_insn_t f2, int bytes) { int count; count = BPF_BYTES2LEN(bytes); for (; count--; f1++, f2++) { if (!BPF_INSN_EQ(f1, f2)) { if ( f1->code == (BPF_MISC|BPF_KEY) && f2->code == (BPF_MISC|BPF_KEY) ) continue; return FALSE; } }; return TRUE; } unsigned int bpf_hash (n, keys) int n; const unsigned int *keys; { unsigned int hval = 0; while (n--) { hval += *keys++; } return (hval % NET_HASH_SIZE); } int bpf_match (hash, n_keys, keys, hash_headpp, entpp) net_hash_header_t hash; int n_keys; const unsigned int *keys; net_hash_entry_t **hash_headpp, *entpp; { net_hash_entry_t head, entp; int i; if (n_keys != hash->n_keys) return FALSE; *hash_headpp = &hash->table[bpf_hash(n_keys, keys)]; head = **hash_headpp; if (head == 0) return FALSE; HASH_ITERATE (head, entp) { for (i = 0; i < n_keys; i++) { if (keys[i] != entp->keys[i]) break; } if (i == n_keys) { *entpp = entp; return TRUE; } } HASH_ITERATE_END (head, entp) return FALSE; } /* * Removes a hash entry (ENTP) from its queue (HEAD). * If the reference count of filter (HP) becomes zero and not USED, * HP is removed from the corresponding port lists and is freed. */ int hash_ent_remove( struct ifnet *ifp, net_hash_header_t hp, int used, net_hash_entry_t *head, net_hash_entry_t entp, queue_entry_t *dead_p) { hp->ref_count--; if (*head == entp) { if (queue_empty((queue_t) entp)) { *head = 0; ENQUEUE_DEAD(*dead_p, entp, chain); if (hp->ref_count == 0 && !used) { if (((net_rcv_port_t)hp)->filter[0] & NETF_IN) queue_remove(&ifp->if_rcv_port_list, (net_rcv_port_t)hp, net_rcv_port_t, input); if (((net_rcv_port_t)hp)->filter[0] & NETF_OUT) queue_remove(&ifp->if_snd_port_list, (net_rcv_port_t)hp, net_rcv_port_t, output); hp->n_keys = 0; return TRUE; } return FALSE; } else { *head = (net_hash_entry_t)queue_next((queue_t) entp); } } remqueue((queue_t)*head, (queue_entry_t)entp); ENQUEUE_DEAD(*dead_p, entp, chain); return FALSE; } int net_add_q_info(ipc_port_t rcv_port) { mach_port_msgcount_t qlimit = 0; /* * We use a new port, so increase net_queue_free_min * and net_kmsg_max to allow for more queued messages. */ if (IP_VALID(rcv_port)) { ip_lock(rcv_port); if (ip_active(rcv_port)) qlimit = rcv_port->ip_qlimit; ip_unlock(rcv_port); } simple_lock(&net_kmsg_total_lock); net_queue_free_min++; net_kmsg_max += qlimit + 1; simple_unlock(&net_kmsg_total_lock); return (int)qlimit; } void net_del_q_info(int qlimit) { simple_lock(&net_kmsg_total_lock); net_queue_free_min--; net_kmsg_max -= qlimit + 1; simple_unlock(&net_kmsg_total_lock); } /* * net_free_dead_infp (dead_infp) * queue_entry_t dead_infp; list of dead net_rcv_port_t. * * Deallocates dead net_rcv_port_t. * No locks should be held when called. */ void net_free_dead_infp(queue_entry_t dead_infp) { net_rcv_port_t infp, nextfp; for (infp = (net_rcv_port_t) dead_infp; infp != 0; infp = nextfp) { nextfp = (net_rcv_port_t) queue_next(&infp->input); ipc_port_release_send(infp->rcv_port); net_del_q_info(infp->rcv_qlimit); kmem_cache_free(&net_rcv_cache, (vm_offset_t) infp); } } /* * net_free_dead_entp (dead_entp) * queue_entry_t dead_entp; list of dead net_hash_entry_t. * * Deallocates dead net_hash_entry_t. * No locks should be held when called. */ void net_free_dead_entp(queue_entry_t dead_entp) { net_hash_entry_t entp, nextentp; for (entp = (net_hash_entry_t)dead_entp; entp != 0; entp = nextentp) { nextentp = (net_hash_entry_t) queue_next(&entp->chain); ipc_port_release_send(entp->rcv_port); net_del_q_info(entp->rcv_qlimit); kmem_cache_free(&net_hash_entry_cache, (vm_offset_t) entp); } }