BSD 4_3_Reno release
[unix-history] / usr / src / sys / netinet / in_pcb.c
index c79eea0..cd1ff78 100644 (file)
-/* in_pcb.c 4.13 81/12/11 */
-
-#include "../h/param.h"
-#include "../h/systm.h"
-#include "../h/dir.h"
-#include "../h/user.h"
-#include "../h/mbuf.h"
-#include "../h/socket.h"
-#include "../h/socketvar.h"
-#include "../net/in.h"
-#include "../net/in_systm.h"
-#include "../net/if.h"
-#include "../net/in_pcb.h"
-
 /*
 /*
- * Routines to manage internet protocol control blocks.
- *
- * At PRU_ATTACH time a protocol control block is allocated in
- * in_pcballoc() and inserted on a doubly-linked list of such blocks
- * for the protocol.  A port address is either requested (and verified
- * to not be in use) or assigned at this time.  We also allocate
- * space in the socket sockbuf structures here, although this is
- * not a clearly correct place to put this function.
+ * Copyright (c) 1982, 1986 Regents of the University of California.
+ * All rights reserved.
  *
  *
- * A connectionless protocol will have its protocol control block
- * removed at PRU_DETACH time, when the socket will be freed (freeing
- * the space reserved) and the block will be removed from the list of
- * blocks for its protocol.
+ * Redistribution is only permitted until one year after the first shipment
+ * of 4.4BSD by the Regents.  Otherwise, redistribution and use in source and
+ * binary forms are permitted provided that: (1) source distributions retain
+ * this entire copyright notice and comment, and (2) distributions including
+ * binaries display the following acknowledgement:  This product includes
+ * software developed by the University of California, Berkeley and its
+ * contributors'' in the documentation or other materials provided with the
+ * distribution and in all advertising materials mentioning features or use
+ * of this software.  Neither the name of the University nor the names of
+ * its contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  *
  *
- * A connection-based protocol may be connected to a remote peer at
- * PRU_CONNECT time through the routine in_pcbconnect().  In the normal
- * case a PRU_DISCONNECT occurs causing a in_pcbdisconnect().
- * It is also possible that higher-level routines will opt out of the
- * relationship with the connection before the connection shut down
- * is complete.  This often occurs in protocols like TCP where we must
- * hold on to the protocol control block for a unreasonably long time
- * after the connection is used up to avoid races in later connection
- * establishment.  To handle this we allow higher-level routines to
- * disassociate themselves from the socket, marking it SS_USERGONE while
- * the disconnect is in progress.  We notice that this has happened
- * when the disconnect is complete, and perform the PRU_DETACH operation,
- * freeing the socket.
- *
- * TODO:
- *     use hashing
+ *     @(#)in_pcb.c    7.13 (Berkeley) 6/28/90
  */
  */
+
+#include "param.h"
+#include "systm.h"
+#include "user.h"
+#include "malloc.h"
+#include "mbuf.h"
+#include "socket.h"
+#include "socketvar.h"
+#include "ioctl.h"
+#include "../net/if.h"
+#include "../net/route.h"
+#include "in.h"
+#include "in_systm.h"
+#include "ip.h"
+#include "in_pcb.h"
+#include "in_var.h"
+#include "protosw.h"
+
 struct in_addr zeroin_addr;
 
 struct in_addr zeroin_addr;
 
-/*
- * Allocate a protocol control block, space
- * for send and receive data, and local host information.
- * Return error.  If no error make socket point at pcb.
- */
-in_pcbattach(so, head, sndcc, rcvcc, sin)
+in_pcballoc(so, head)
        struct socket *so;
        struct inpcb *head;
        struct socket *so;
        struct inpcb *head;
-       int sndcc, rcvcc;
-       struct sockaddr_in *sin;
 {
        struct mbuf *m;
        register struct inpcb *inp;
 {
        struct mbuf *m;
        register struct inpcb *inp;
-       struct ifnet *ifp;
-       u_short lport;
-
-COUNT(IN_PCBATTACH);
-       if (sin) {
-               if (sin->sin_family != AF_INET)
-                       return (EAFNOSUPPORT);
-               ifp = if_ifwithaddr(sin->sin_addr);
-               if (ifp == 0)
-                       return (EADDRNOTAVAIL);
-               lport = sin->sin_port;
-               if (lport &&
-                   in_pcblookup(head, zeroin_addr, 0, sin->sin_addr, lport))
-                       return (EADDRINUSE);
-       } else {
-               ifp = if_ifwithaddr(ifnet->if_addr);
-               lport = 0;
-       }
-       m = m_getclr(0);
-       if (m == 0)
+
+       m = m_getclr(M_DONTWAIT, MT_PCB);
+       if (m == NULL)
                return (ENOBUFS);
                return (ENOBUFS);
-       if (sbreserve(&so->so_snd, sndcc) == 0)
-               goto bad;
-       if (sbreserve(&so->so_rcv, rcvcc) == 0)
-               goto bad2;
        inp = mtod(m, struct inpcb *);
        inp->inp_head = head;
        inp = mtod(m, struct inpcb *);
        inp->inp_head = head;
-       inp->inp_laddr = ifp->if_addr;
+       inp->inp_socket = so;
+       insque(inp, head);
+       so->so_pcb = (caddr_t)inp;
+       return (0);
+}
+       
+in_pcbbind(inp, nam)
+       register struct inpcb *inp;
+       struct mbuf *nam;
+{
+       register struct socket *so = inp->inp_socket;
+       register struct inpcb *head = inp->inp_head;
+       register struct sockaddr_in *sin;
+       u_short lport = 0;
+
+       if (in_ifaddr == 0)
+               return (EADDRNOTAVAIL);
+       if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
+               return (EINVAL);
+       if (nam == 0)
+               goto noname;
+       sin = mtod(nam, struct sockaddr_in *);
+       if (nam->m_len != sizeof (*sin))
+               return (EINVAL);
+       if (sin->sin_addr.s_addr != INADDR_ANY) {
+               int tport = sin->sin_port;
+
+               sin->sin_port = 0;              /* yech... */
+               if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
+                       return (EADDRNOTAVAIL);
+               sin->sin_port = tport;
+       }
+       lport = sin->sin_port;
+       if (lport) {
+               u_short aport = ntohs(lport);
+               int wild = 0;
+
+               /* GROSS */
+               if (aport < IPPORT_RESERVED && u.u_uid != 0)
+                       return (EACCES);
+               /* even GROSSER, but this is the Internet */
+               if ((so->so_options & SO_REUSEADDR) == 0 &&
+                   ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
+                    (so->so_options & SO_ACCEPTCONN) == 0))
+                       wild = INPLOOKUP_WILDCARD;
+               if (in_pcblookup(head,
+                   zeroin_addr, 0, sin->sin_addr, lport, wild))
+                       return (EADDRINUSE);
+       }
+       inp->inp_laddr = sin->sin_addr;
+noname:
        if (lport == 0)
                do {
        if (lport == 0)
                do {
-                       if (head->inp_lport++ < 1024)
-                               head->inp_lport = 1024;
+                       if (head->inp_lport++ < IPPORT_RESERVED ||
+                           head->inp_lport > IPPORT_USERRESERVED)
+                               head->inp_lport = IPPORT_RESERVED;
                        lport = htons(head->inp_lport);
                        lport = htons(head->inp_lport);
-               } while (in_pcblookup(head, zeroin_addr, 0, inp->inp_laddr, lport));
+               } while (in_pcblookup(head,
+                           zeroin_addr, 0, inp->inp_laddr, lport, 0));
        inp->inp_lport = lport;
        inp->inp_lport = lport;
-       inp->inp_socket = so;
-       insque(inp, head);
-       so->so_pcb = (caddr_t)inp;
-       sin = (struct sockaddr_in *)&so->so_addr;
-       sin->sin_family = AF_INET;
-       sin->sin_addr = inp->inp_laddr;
-       sin->sin_port = inp->inp_lport;
        return (0);
        return (0);
-bad2:
-       sbrelease(&so->so_snd);
-bad:
-       (void) m_free(m);
-       return (ENOBUFS);
 }
 
 }
 
-in_pcbconnect(inp, sin)
-       struct inpcb *inp;
-       struct sockaddr_in *sin;
+/*
+ * Connect from a socket to a specified address.
+ * Both address and port must be specified in argument sin.
+ * If don't have a local address for this socket yet,
+ * then pick one.
+ */
+in_pcbconnect(inp, nam)
+       register struct inpcb *inp;
+       struct mbuf *nam;
 {
 {
-       struct inpcb *xp;
+       struct in_ifaddr *ia;
+       struct sockaddr_in *ifaddr;
+       register struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
 
 
-COUNT(IN_PCBCONNECT);
+       if (nam->m_len != sizeof (*sin))
+               return (EINVAL);
        if (sin->sin_family != AF_INET)
                return (EAFNOSUPPORT);
        if (sin->sin_family != AF_INET)
                return (EAFNOSUPPORT);
-       if (sin->sin_addr.s_addr == 0 || sin->sin_port == 0)
+       if (sin->sin_port == 0)
                return (EADDRNOTAVAIL);
                return (EADDRNOTAVAIL);
-       xp = in_pcblookup(inp->inp_head, sin->sin_addr, sin->sin_port, inp->inp_laddr, inp->inp_lport);
-       if (xp->inp_faddr.s_addr)
+       if (in_ifaddr) {
+               /*
+                * If the destination address is INADDR_ANY,
+                * use the primary local address.
+                * If the supplied address is INADDR_BROADCAST,
+                * and the primary interface supports broadcast,
+                * choose the broadcast address for that interface.
+                */
+#define        satosin(sa)     ((struct sockaddr_in *)(sa))
+               if (sin->sin_addr.s_addr == INADDR_ANY)
+                   sin->sin_addr = IA_SIN(in_ifaddr)->sin_addr;
+               else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
+                 (in_ifaddr->ia_ifp->if_flags & IFF_BROADCAST))
+                   sin->sin_addr = satosin(&in_ifaddr->ia_broadaddr)->sin_addr;
+       }
+       if (inp->inp_laddr.s_addr == INADDR_ANY) {
+               register struct route *ro;
+               struct ifnet *ifp;
+
+               ia = (struct in_ifaddr *)0;
+               /* 
+                * If route is known or can be allocated now,
+                * our src addr is taken from the i/f, else punt.
+                */
+               ro = &inp->inp_route;
+               if (ro->ro_rt &&
+                   (satosin(&ro->ro_dst)->sin_addr.s_addr !=
+                       sin->sin_addr.s_addr || 
+                   inp->inp_socket->so_options & SO_DONTROUTE)) {
+                       RTFREE(ro->ro_rt);
+                       ro->ro_rt = (struct rtentry *)0;
+               }
+               if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
+                   (ro->ro_rt == (struct rtentry *)0 ||
+                   ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
+                       /* No route yet, so try to acquire one */
+                       ro->ro_dst.sa_family = AF_INET;
+                       ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
+                       ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
+                               sin->sin_addr;
+                       rtalloc(ro);
+               }
+               /*
+                * If we found a route, use the address
+                * corresponding to the outgoing interface
+                * unless it is the loopback (in case a route
+                * to our address on another net goes to loopback).
+                */
+               if (ro->ro_rt && (ifp = ro->ro_rt->rt_ifp) &&
+                   (ifp->if_flags & IFF_LOOPBACK) == 0)
+                       for (ia = in_ifaddr; ia; ia = ia->ia_next)
+                               if (ia->ia_ifp == ifp)
+                                       break;
+               if (ia == 0) {
+                       int fport = sin->sin_port;
+
+                       sin->sin_port = 0;
+                       ia = (struct in_ifaddr *)
+                           ifa_ifwithdstaddr((struct sockaddr *)sin);
+                       sin->sin_port = fport;
+                       if (ia == 0)
+                               ia = in_iaonnetof(in_netof(sin->sin_addr));
+                       if (ia == 0)
+                               ia = in_ifaddr;
+                       if (ia == 0)
+                               return (EADDRNOTAVAIL);
+               }
+               ifaddr = (struct sockaddr_in *)&ia->ia_addr;
+       }
+       if (in_pcblookup(inp->inp_head,
+           sin->sin_addr,
+           sin->sin_port,
+           inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
+           inp->inp_lport,
+           0))
                return (EADDRINUSE);
                return (EADDRINUSE);
+       if (inp->inp_laddr.s_addr == INADDR_ANY) {
+               if (inp->inp_lport == 0)
+                       (void)in_pcbbind(inp, (struct mbuf *)0);
+               inp->inp_laddr = ifaddr->sin_addr;
+       }
        inp->inp_faddr = sin->sin_addr;
        inp->inp_fport = sin->sin_port;
        return (0);
        inp->inp_faddr = sin->sin_addr;
        inp->inp_fport = sin->sin_port;
        return (0);
@@ -132,9 +224,9 @@ in_pcbdisconnect(inp)
        struct inpcb *inp;
 {
 
        struct inpcb *inp;
 {
 
-COUNT(IN_PCBDISCONNECT);
-       inp->inp_faddr.s_addr = 0;
-       if (inp->inp_socket->so_state & SS_USERGONE)
+       inp->inp_faddr.s_addr = INADDR_ANY;
+       inp->inp_fport = 0;
+       if (inp->inp_socket->so_state & SS_NOFDREF)
                in_pcbdetach(inp);
 }
 
                in_pcbdetach(inp);
 }
 
@@ -145,36 +237,191 @@ in_pcbdetach(inp)
 
        so->so_pcb = 0;
        sofree(so);
 
        so->so_pcb = 0;
        sofree(so);
+       if (inp->inp_options)
+               (void)m_free(inp->inp_options);
+       if (inp->inp_route.ro_rt)
+               rtfree(inp->inp_route.ro_rt);
        remque(inp);
        (void) m_free(dtom(inp));
 }
 
        remque(inp);
        (void) m_free(dtom(inp));
 }
 
+in_setsockaddr(inp, nam)
+       register struct inpcb *inp;
+       struct mbuf *nam;
+{
+       register struct sockaddr_in *sin;
+       
+       nam->m_len = sizeof (*sin);
+       sin = mtod(nam, struct sockaddr_in *);
+       bzero((caddr_t)sin, sizeof (*sin));
+       sin->sin_family = AF_INET;
+       sin->sin_len = sizeof(*sin);
+       sin->sin_port = inp->inp_lport;
+       sin->sin_addr = inp->inp_laddr;
+}
+
+in_setpeeraddr(inp, nam)
+       struct inpcb *inp;
+       struct mbuf *nam;
+{
+       register struct sockaddr_in *sin;
+       
+       nam->m_len = sizeof (*sin);
+       sin = mtod(nam, struct sockaddr_in *);
+       bzero((caddr_t)sin, sizeof (*sin));
+       sin->sin_family = AF_INET;
+       sin->sin_len = sizeof(*sin);
+       sin->sin_port = inp->inp_fport;
+       sin->sin_addr = inp->inp_faddr;
+}
+
+/*
+ * Pass some notification to all connections of a protocol
+ * associated with address dst.  The local address and/or port numbers
+ * may be specified to limit the search.  The "usual action" will be
+ * taken, depending on the ctlinput cmd.  The caller must filter any
+ * cmds that are uninteresting (e.g., no error in the map).
+ * Call the protocol specific routine (if any) to report
+ * any errors for each matching socket.
+ *
+ * Must be called at splnet.
+ */
+in_pcbnotify(head, dst, fport, laddr, lport, cmd, notify)
+       struct inpcb *head;
+       struct sockaddr *dst;
+       u_short fport, lport;
+       struct in_addr laddr;
+       int cmd, (*notify)();
+{
+       register struct inpcb *inp, *oinp;
+       struct in_addr faddr;
+       int errno;
+       int in_rtchange();
+       extern u_char inetctlerrmap[];
+
+       if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET)
+               return;
+       faddr = ((struct sockaddr_in *)dst)->sin_addr;
+       if (faddr.s_addr == INADDR_ANY)
+               return;
+
+       /*
+        * Redirects go to all references to the destination,
+        * and use in_rtchange to invalidate the route cache.
+        * Dead host indications: notify all references to the destination.
+        * Otherwise, if we have knowledge of the local port and address,
+        * deliver only to that socket.
+        */
+       if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
+               fport = 0;
+               lport = 0;
+               laddr.s_addr = 0;
+               if (cmd != PRC_HOSTDEAD)
+                       notify = in_rtchange;
+       }
+       errno = inetctlerrmap[cmd];
+       for (inp = head->inp_next; inp != head;) {
+               if (inp->inp_faddr.s_addr != faddr.s_addr ||
+                   inp->inp_socket == 0 ||
+                   (lport && inp->inp_lport != lport) ||
+                   (laddr.s_addr && inp->inp_laddr.s_addr != laddr.s_addr) ||
+                   (fport && inp->inp_fport != fport)) {
+                       inp = inp->inp_next;
+                       continue;
+               }
+               oinp = inp;
+               inp = inp->inp_next;
+               if (notify)
+                       (*notify)(oinp, errno);
+       }
+}
+
 /*
 /*
- * Look for a control block to accept a segment.
- * First choice is an exact address match.
- * Second choice is a match of local address, with
- * unspecified foreign address.
+ * Check for alternatives when higher level complains
+ * about service problems.  For now, invalidate cached
+ * routing information.  If the route was created dynamically
+ * (by a redirect), time to try a default gateway again.
  */
  */
+in_losing(inp)
+       struct inpcb *inp;
+{
+       register struct rtentry *rt;
+
+       if ((rt = inp->inp_route.ro_rt)) {
+               rt_missmsg(RTM_LOSING, &inp->inp_route.ro_dst,
+                           rt->rt_gateway, (struct sockaddr *)rt_mask(rt),
+                           (struct sockaddr *)0, rt->rt_flags, 0);
+               if (rt->rt_flags & RTF_DYNAMIC)
+                       (void) rtrequest(RTM_DELETE, rt_key(rt),
+                               rt->rt_gateway, rt_mask(rt), rt->rt_flags, 
+                               (struct rtentry **)0);
+               inp->inp_route.ro_rt = 0;
+               rtfree(rt);
+               /*
+                * A new route can be allocated
+                * the next time output is attempted.
+                */
+       }
+}
+
+/*
+ * After a routing change, flush old routing
+ * and allocate a (hopefully) better one.
+ */
+in_rtchange(inp)
+       register struct inpcb *inp;
+{
+       if (inp->inp_route.ro_rt) {
+               rtfree(inp->inp_route.ro_rt);
+               inp->inp_route.ro_rt = 0;
+               /*
+                * A new route can be allocated the next time
+                * output is attempted.
+                */
+       }
+}
+
 struct inpcb *
 struct inpcb *
-in_pcblookup(head, faddr, fport, laddr, lport)
+in_pcblookup(head, faddr, fport, laddr, lport, flags)
        struct inpcb *head;
        struct in_addr faddr, laddr;
        u_short fport, lport;
        struct inpcb *head;
        struct in_addr faddr, laddr;
        u_short fport, lport;
+       int flags;
 {
 {
-       register struct inpcb *inp;
-       struct inpcb *match = 0;
+       register struct inpcb *inp, *match = 0;
+       int matchwild = 3, wildcard;
 
        for (inp = head->inp_next; inp != head; inp = inp->inp_next) {
 
        for (inp = head->inp_next; inp != head; inp = inp->inp_next) {
-               if (inp->inp_laddr.s_addr != laddr.s_addr ||
-                   inp->inp_lport != lport)
+               if (inp->inp_lport != lport)
                        continue;
                        continue;
-               if (inp->inp_faddr.s_addr == 0) {
-                       match = inp;
+               wildcard = 0;
+               if (inp->inp_laddr.s_addr != INADDR_ANY) {
+                       if (laddr.s_addr == INADDR_ANY)
+                               wildcard++;
+                       else if (inp->inp_laddr.s_addr != laddr.s_addr)
+                               continue;
+               } else {
+                       if (laddr.s_addr != INADDR_ANY)
+                               wildcard++;
+               }
+               if (inp->inp_faddr.s_addr != INADDR_ANY) {
+                       if (faddr.s_addr == INADDR_ANY)
+                               wildcard++;
+                       else if (inp->inp_faddr.s_addr != faddr.s_addr ||
+                           inp->inp_fport != fport)
+                               continue;
+               } else {
+                       if (faddr.s_addr != INADDR_ANY)
+                               wildcard++;
+               }
+               if (wildcard && (flags & INPLOOKUP_WILDCARD) == 0)
                        continue;
                        continue;
+               if (wildcard < matchwild) {
+                       match = inp;
+                       matchwild = wildcard;
+                       if (matchwild == 0)
+                               break;
                }
                }
-               if (inp->inp_faddr.s_addr == faddr.s_addr &&
-                   inp->inp_fport == fport)
-                       return (inp);
        }
        return (match);
 }
        }
        return (match);
 }