BSD 4_3_Reno release
[unix-history] / usr / src / sys / netinet / in_pcb.c
index 209c610..cd1ff78 100644 (file)
-/*     in_pcb.c        4.25    82/04/10        */
-
-#include "../h/param.h"
-#include "../h/systm.h"
-#include "../h/dir.h"
-#include "../h/user.h"
-#include "../h/mbuf.h"
-#include "../h/socket.h"
-#include "../h/socketvar.h"
-#include "../net/in.h"
-#include "../net/in_systm.h"
-#include "../net/if.h"
-#include "../net/route.h"
-#include "../net/in_pcb.h"
-#include "../h/protosw.h"
-
 /*
 /*
- * Routines to manage internet protocol control blocks.
+ * Copyright (c) 1982, 1986 Regents of the University of California.
+ * All rights reserved.
  *
  *
- * At PRU_ATTACH time a protocol control block is allocated in
- * in_pcballoc() and inserted on a doubly-linked list of such blocks
- * for the protocol.  A port address is either requested (and verified
- * to not be in use) or assigned at this time.  We also allocate
- * space in the socket sockbuf structures here, although this is
- * not a clearly correct place to put this function.
+ * Redistribution is only permitted until one year after the first shipment
+ * of 4.4BSD by the Regents.  Otherwise, redistribution and use in source and
+ * binary forms are permitted provided that: (1) source distributions retain
+ * this entire copyright notice and comment, and (2) distributions including
+ * binaries display the following acknowledgement:  This product includes
+ * software developed by the University of California, Berkeley and its
+ * contributors'' in the documentation or other materials provided with the
+ * distribution and in all advertising materials mentioning features or use
+ * of this software.  Neither the name of the University nor the names of
+ * its contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  *
  *
- * A connectionless protocol will have its protocol control block
- * removed at PRU_DETACH time, when the socket will be freed (freeing
- * the space reserved) and the block will be removed from the list of
- * blocks for its protocol.
- *
- * A connection-based protocol may be connected to a remote peer at
- * PRU_CONNECT time through the routine in_pcbconnect().  In the normal
- * case a PRU_DISCONNECT occurs causing a in_pcbdisconnect().
- * It is also possible that higher-level routines will opt out of the
- * relationship with the connection before the connection shut down
- * is complete.  This often occurs in protocols like TCP where we must
- * hold on to the protocol control block for a unreasonably long time
- * after the connection is used up to avoid races in later connection
- * establishment.  To handle this we allow higher-level routines to
- * disassociate themselves from the socket, marking it SS_USERGONE while
- * the disconnect is in progress.  We notice that this has happened
- * when the disconnect is complete, and perform the PRU_DETACH operation,
- * freeing the socket.
- *
- * TODO:
- *     use hashing
+ *     @(#)in_pcb.c    7.13 (Berkeley) 6/28/90
  */
  */
+
+#include "param.h"
+#include "systm.h"
+#include "user.h"
+#include "malloc.h"
+#include "mbuf.h"
+#include "socket.h"
+#include "socketvar.h"
+#include "ioctl.h"
+#include "../net/if.h"
+#include "../net/route.h"
+#include "in.h"
+#include "in_systm.h"
+#include "ip.h"
+#include "in_pcb.h"
+#include "in_var.h"
+#include "protosw.h"
+
 struct in_addr zeroin_addr;
 
 struct in_addr zeroin_addr;
 
-/*
- * Allocate a protocol control block, space
- * for send and receive data, and local host information.
- * Return error.  If no error make socket point at pcb.
- */
-in_pcbattach(so, head, sndcc, rcvcc, sin)
+in_pcballoc(so, head)
        struct socket *so;
        struct inpcb *head;
        struct socket *so;
        struct inpcb *head;
-       int sndcc, rcvcc;
-       struct sockaddr_in *sin;
 {
        struct mbuf *m;
        register struct inpcb *inp;
 {
        struct mbuf *m;
        register struct inpcb *inp;
-       u_short lport = 0;
 
 
-COUNT(IN_PCBATTACH);
-       if (ifnet == 0)
-               return (EADDRNOTAVAIL);
-       if (sin) {
-               if (sin->sin_family != AF_INET)
-                       return (EAFNOSUPPORT);
-               if (sin->sin_addr.s_addr) {
-                       int tport = sin->sin_port;
-
-                       sin->sin_port = 0;              /* yech... */
-                       if (if_ifwithaddr((struct sockaddr *)sin) == 0)
-                               return (EADDRNOTAVAIL);
-                       sin->sin_port = tport;
-               }
-               lport = sin->sin_port;
-               if (lport) {
-                       u_short aport = lport;
-                       int wild = 0;
-#if vax
-                       aport = htons(aport);
-#endif
-                       /* GROSS */
-                       if (aport < IPPORT_RESERVED && u.u_uid != 0)
-                               return (EPERM);
-                       if ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
-                           (so->so_options & SO_ACCEPTCONN) == 0)
-                               wild = INPLOOKUP_WILDCARD;
-                       if (in_pcblookup(head,
-                           zeroin_addr, 0, sin->sin_addr, lport, wild))
-                               return (EADDRINUSE);
-               }
-       }
-       m = m_getclr(M_DONTWAIT);
-       if (m == 0)
+       m = m_getclr(M_DONTWAIT, MT_PCB);
+       if (m == NULL)
                return (ENOBUFS);
                return (ENOBUFS);
-       if (sbreserve(&so->so_snd, sndcc) == 0)
-               goto bad;
-       if (sbreserve(&so->so_rcv, rcvcc) == 0)
-               goto bad2;
        inp = mtod(m, struct inpcb *);
        inp->inp_head = head;
        inp = mtod(m, struct inpcb *);
        inp->inp_head = head;
-       if (sin)
-               inp->inp_laddr = sin->sin_addr;
+       inp->inp_socket = so;
+       insque(inp, head);
+       so->so_pcb = (caddr_t)inp;
+       return (0);
+}
+       
+in_pcbbind(inp, nam)
+       register struct inpcb *inp;
+       struct mbuf *nam;
+{
+       register struct socket *so = inp->inp_socket;
+       register struct inpcb *head = inp->inp_head;
+       register struct sockaddr_in *sin;
+       u_short lport = 0;
+
+       if (in_ifaddr == 0)
+               return (EADDRNOTAVAIL);
+       if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
+               return (EINVAL);
+       if (nam == 0)
+               goto noname;
+       sin = mtod(nam, struct sockaddr_in *);
+       if (nam->m_len != sizeof (*sin))
+               return (EINVAL);
+       if (sin->sin_addr.s_addr != INADDR_ANY) {
+               int tport = sin->sin_port;
+
+               sin->sin_port = 0;              /* yech... */
+               if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
+                       return (EADDRNOTAVAIL);
+               sin->sin_port = tport;
+       }
+       lport = sin->sin_port;
+       if (lport) {
+               u_short aport = ntohs(lport);
+               int wild = 0;
+
+               /* GROSS */
+               if (aport < IPPORT_RESERVED && u.u_uid != 0)
+                       return (EACCES);
+               /* even GROSSER, but this is the Internet */
+               if ((so->so_options & SO_REUSEADDR) == 0 &&
+                   ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
+                    (so->so_options & SO_ACCEPTCONN) == 0))
+                       wild = INPLOOKUP_WILDCARD;
+               if (in_pcblookup(head,
+                   zeroin_addr, 0, sin->sin_addr, lport, wild))
+                       return (EADDRINUSE);
+       }
+       inp->inp_laddr = sin->sin_addr;
+noname:
        if (lport == 0)
                do {
        if (lport == 0)
                do {
-                       if (head->inp_lport++ < IPPORT_RESERVED)
+                       if (head->inp_lport++ < IPPORT_RESERVED ||
+                           head->inp_lport > IPPORT_USERRESERVED)
                                head->inp_lport = IPPORT_RESERVED;
                        lport = htons(head->inp_lport);
                } while (in_pcblookup(head,
                            zeroin_addr, 0, inp->inp_laddr, lport, 0));
        inp->inp_lport = lport;
                                head->inp_lport = IPPORT_RESERVED;
                        lport = htons(head->inp_lport);
                } while (in_pcblookup(head,
                            zeroin_addr, 0, inp->inp_laddr, lport, 0));
        inp->inp_lport = lport;
-       inp->inp_socket = so;
-       insque(inp, head);
-       so->so_pcb = (caddr_t)inp;
        return (0);
        return (0);
-bad2:
-       sbrelease(&so->so_snd);
-bad:
-       (void) m_free(m);
-       return (ENOBUFS);
 }
 
 /*
 }
 
 /*
@@ -131,26 +120,88 @@ bad:
  * If don't have a local address for this socket yet,
  * then pick one.
  */
  * If don't have a local address for this socket yet,
  * then pick one.
  */
-in_pcbconnect(inp, sin)
-       struct inpcb *inp;
-       struct sockaddr_in *sin;
+in_pcbconnect(inp, nam)
+       register struct inpcb *inp;
+       struct mbuf *nam;
 {
 {
-       struct ifnet *ifp;
+       struct in_ifaddr *ia;
        struct sockaddr_in *ifaddr;
        struct sockaddr_in *ifaddr;
+       register struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
 
 
-COUNT(IN_PCBCONNECT);
+       if (nam->m_len != sizeof (*sin))
+               return (EINVAL);
        if (sin->sin_family != AF_INET)
                return (EAFNOSUPPORT);
        if (sin->sin_family != AF_INET)
                return (EAFNOSUPPORT);
-       if (sin->sin_addr.s_addr == 0 || sin->sin_port == 0)
+       if (sin->sin_port == 0)
                return (EADDRNOTAVAIL);
                return (EADDRNOTAVAIL);
-       if (inp->inp_laddr.s_addr == 0) {
-               ifp = if_ifonnetof(sin->sin_addr.s_net);
-               if (ifp == 0) {
-                       ifp = if_ifwithaf(AF_INET);
-                       if (ifp == 0)
-                               return (EADDRNOTAVAIL);         /* XXX */
+       if (in_ifaddr) {
+               /*
+                * If the destination address is INADDR_ANY,
+                * use the primary local address.
+                * If the supplied address is INADDR_BROADCAST,
+                * and the primary interface supports broadcast,
+                * choose the broadcast address for that interface.
+                */
+#define        satosin(sa)     ((struct sockaddr_in *)(sa))
+               if (sin->sin_addr.s_addr == INADDR_ANY)
+                   sin->sin_addr = IA_SIN(in_ifaddr)->sin_addr;
+               else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
+                 (in_ifaddr->ia_ifp->if_flags & IFF_BROADCAST))
+                   sin->sin_addr = satosin(&in_ifaddr->ia_broadaddr)->sin_addr;
+       }
+       if (inp->inp_laddr.s_addr == INADDR_ANY) {
+               register struct route *ro;
+               struct ifnet *ifp;
+
+               ia = (struct in_ifaddr *)0;
+               /* 
+                * If route is known or can be allocated now,
+                * our src addr is taken from the i/f, else punt.
+                */
+               ro = &inp->inp_route;
+               if (ro->ro_rt &&
+                   (satosin(&ro->ro_dst)->sin_addr.s_addr !=
+                       sin->sin_addr.s_addr || 
+                   inp->inp_socket->so_options & SO_DONTROUTE)) {
+                       RTFREE(ro->ro_rt);
+                       ro->ro_rt = (struct rtentry *)0;
                }
                }
-               ifaddr = (struct sockaddr_in *)&ifp->if_addr;
+               if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
+                   (ro->ro_rt == (struct rtentry *)0 ||
+                   ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
+                       /* No route yet, so try to acquire one */
+                       ro->ro_dst.sa_family = AF_INET;
+                       ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
+                       ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
+                               sin->sin_addr;
+                       rtalloc(ro);
+               }
+               /*
+                * If we found a route, use the address
+                * corresponding to the outgoing interface
+                * unless it is the loopback (in case a route
+                * to our address on another net goes to loopback).
+                */
+               if (ro->ro_rt && (ifp = ro->ro_rt->rt_ifp) &&
+                   (ifp->if_flags & IFF_LOOPBACK) == 0)
+                       for (ia = in_ifaddr; ia; ia = ia->ia_next)
+                               if (ia->ia_ifp == ifp)
+                                       break;
+               if (ia == 0) {
+                       int fport = sin->sin_port;
+
+                       sin->sin_port = 0;
+                       ia = (struct in_ifaddr *)
+                           ifa_ifwithdstaddr((struct sockaddr *)sin);
+                       sin->sin_port = fport;
+                       if (ia == 0)
+                               ia = in_iaonnetof(in_netof(sin->sin_addr));
+                       if (ia == 0)
+                               ia = in_ifaddr;
+                       if (ia == 0)
+                               return (EADDRNOTAVAIL);
+               }
+               ifaddr = (struct sockaddr_in *)&ia->ia_addr;
        }
        if (in_pcblookup(inp->inp_head,
            sin->sin_addr,
        }
        if (in_pcblookup(inp->inp_head,
            sin->sin_addr,
@@ -159,8 +210,11 @@ COUNT(IN_PCBCONNECT);
            inp->inp_lport,
            0))
                return (EADDRINUSE);
            inp->inp_lport,
            0))
                return (EADDRINUSE);
-       if (inp->inp_laddr.s_addr == 0)
+       if (inp->inp_laddr.s_addr == INADDR_ANY) {
+               if (inp->inp_lport == 0)
+                       (void)in_pcbbind(inp, (struct mbuf *)0);
                inp->inp_laddr = ifaddr->sin_addr;
                inp->inp_laddr = ifaddr->sin_addr;
+       }
        inp->inp_faddr = sin->sin_addr;
        inp->inp_fport = sin->sin_port;
        return (0);
        inp->inp_faddr = sin->sin_addr;
        inp->inp_fport = sin->sin_port;
        return (0);
@@ -170,10 +224,9 @@ in_pcbdisconnect(inp)
        struct inpcb *inp;
 {
 
        struct inpcb *inp;
 {
 
-COUNT(IN_PCBDISCONNECT);
-       inp->inp_faddr.s_addr = 0;
+       inp->inp_faddr.s_addr = INADDR_ANY;
        inp->inp_fport = 0;
        inp->inp_fport = 0;
-       if (inp->inp_socket->so_state & SS_USERGONE)
+       if (inp->inp_socket->so_state & SS_NOFDREF)
                in_pcbdetach(inp);
 }
 
                in_pcbdetach(inp);
 }
 
@@ -184,27 +237,150 @@ in_pcbdetach(inp)
 
        so->so_pcb = 0;
        sofree(so);
 
        so->so_pcb = 0;
        sofree(so);
+       if (inp->inp_options)
+               (void)m_free(inp->inp_options);
        if (inp->inp_route.ro_rt)
                rtfree(inp->inp_route.ro_rt);
        remque(inp);
        (void) m_free(dtom(inp));
 }
 
        if (inp->inp_route.ro_rt)
                rtfree(inp->inp_route.ro_rt);
        remque(inp);
        (void) m_free(dtom(inp));
 }
 
-in_setsockaddr(sin, inp)
-       register struct sockaddr_in *sin;
+in_setsockaddr(inp, nam)
        register struct inpcb *inp;
        register struct inpcb *inp;
+       struct mbuf *nam;
 {
 {
-       if (sin == 0 || inp == 0)
-               panic("setsockaddr_in");
+       register struct sockaddr_in *sin;
+       
+       nam->m_len = sizeof (*sin);
+       sin = mtod(nam, struct sockaddr_in *);
        bzero((caddr_t)sin, sizeof (*sin));
        sin->sin_family = AF_INET;
        bzero((caddr_t)sin, sizeof (*sin));
        sin->sin_family = AF_INET;
+       sin->sin_len = sizeof(*sin);
        sin->sin_port = inp->inp_lport;
        sin->sin_addr = inp->inp_laddr;
 }
 
        sin->sin_port = inp->inp_lport;
        sin->sin_addr = inp->inp_laddr;
 }
 
+in_setpeeraddr(inp, nam)
+       struct inpcb *inp;
+       struct mbuf *nam;
+{
+       register struct sockaddr_in *sin;
+       
+       nam->m_len = sizeof (*sin);
+       sin = mtod(nam, struct sockaddr_in *);
+       bzero((caddr_t)sin, sizeof (*sin));
+       sin->sin_family = AF_INET;
+       sin->sin_len = sizeof(*sin);
+       sin->sin_port = inp->inp_fport;
+       sin->sin_addr = inp->inp_faddr;
+}
+
+/*
+ * Pass some notification to all connections of a protocol
+ * associated with address dst.  The local address and/or port numbers
+ * may be specified to limit the search.  The "usual action" will be
+ * taken, depending on the ctlinput cmd.  The caller must filter any
+ * cmds that are uninteresting (e.g., no error in the map).
+ * Call the protocol specific routine (if any) to report
+ * any errors for each matching socket.
+ *
+ * Must be called at splnet.
+ */
+in_pcbnotify(head, dst, fport, laddr, lport, cmd, notify)
+       struct inpcb *head;
+       struct sockaddr *dst;
+       u_short fport, lport;
+       struct in_addr laddr;
+       int cmd, (*notify)();
+{
+       register struct inpcb *inp, *oinp;
+       struct in_addr faddr;
+       int errno;
+       int in_rtchange();
+       extern u_char inetctlerrmap[];
+
+       if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET)
+               return;
+       faddr = ((struct sockaddr_in *)dst)->sin_addr;
+       if (faddr.s_addr == INADDR_ANY)
+               return;
+
+       /*
+        * Redirects go to all references to the destination,
+        * and use in_rtchange to invalidate the route cache.
+        * Dead host indications: notify all references to the destination.
+        * Otherwise, if we have knowledge of the local port and address,
+        * deliver only to that socket.
+        */
+       if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
+               fport = 0;
+               lport = 0;
+               laddr.s_addr = 0;
+               if (cmd != PRC_HOSTDEAD)
+                       notify = in_rtchange;
+       }
+       errno = inetctlerrmap[cmd];
+       for (inp = head->inp_next; inp != head;) {
+               if (inp->inp_faddr.s_addr != faddr.s_addr ||
+                   inp->inp_socket == 0 ||
+                   (lport && inp->inp_lport != lport) ||
+                   (laddr.s_addr && inp->inp_laddr.s_addr != laddr.s_addr) ||
+                   (fport && inp->inp_fport != fport)) {
+                       inp = inp->inp_next;
+                       continue;
+               }
+               oinp = inp;
+               inp = inp->inp_next;
+               if (notify)
+                       (*notify)(oinp, errno);
+       }
+}
+
+/*
+ * Check for alternatives when higher level complains
+ * about service problems.  For now, invalidate cached
+ * routing information.  If the route was created dynamically
+ * (by a redirect), time to try a default gateway again.
+ */
+in_losing(inp)
+       struct inpcb *inp;
+{
+       register struct rtentry *rt;
+
+       if ((rt = inp->inp_route.ro_rt)) {
+               rt_missmsg(RTM_LOSING, &inp->inp_route.ro_dst,
+                           rt->rt_gateway, (struct sockaddr *)rt_mask(rt),
+                           (struct sockaddr *)0, rt->rt_flags, 0);
+               if (rt->rt_flags & RTF_DYNAMIC)
+                       (void) rtrequest(RTM_DELETE, rt_key(rt),
+                               rt->rt_gateway, rt_mask(rt), rt->rt_flags, 
+                               (struct rtentry **)0);
+               inp->inp_route.ro_rt = 0;
+               rtfree(rt);
+               /*
+                * A new route can be allocated
+                * the next time output is attempted.
+                */
+       }
+}
+
 /*
 /*
- * SHOULD ALLOW MATCH ON MULTI-HOMING ONLY
+ * After a routing change, flush old routing
+ * and allocate a (hopefully) better one.
  */
  */
+in_rtchange(inp)
+       register struct inpcb *inp;
+{
+       if (inp->inp_route.ro_rt) {
+               rtfree(inp->inp_route.ro_rt);
+               inp->inp_route.ro_rt = 0;
+               /*
+                * A new route can be allocated the next time
+                * output is attempted.
+                */
+       }
+}
+
 struct inpcb *
 in_pcblookup(head, faddr, fport, laddr, lport, flags)
        struct inpcb *head;
 struct inpcb *
 in_pcblookup(head, faddr, fport, laddr, lport, flags)
        struct inpcb *head;
@@ -219,23 +395,23 @@ in_pcblookup(head, faddr, fport, laddr, lport, flags)
                if (inp->inp_lport != lport)
                        continue;
                wildcard = 0;
                if (inp->inp_lport != lport)
                        continue;
                wildcard = 0;
-               if (inp->inp_laddr.s_addr != 0) {
-                       if (laddr.s_addr == 0)
+               if (inp->inp_laddr.s_addr != INADDR_ANY) {
+                       if (laddr.s_addr == INADDR_ANY)
                                wildcard++;
                        else if (inp->inp_laddr.s_addr != laddr.s_addr)
                                continue;
                } else {
                                wildcard++;
                        else if (inp->inp_laddr.s_addr != laddr.s_addr)
                                continue;
                } else {
-                       if (laddr.s_addr != 0)
+                       if (laddr.s_addr != INADDR_ANY)
                                wildcard++;
                }
                                wildcard++;
                }
-               if (inp->inp_faddr.s_addr != 0) {
-                       if (faddr.s_addr == 0)
+               if (inp->inp_faddr.s_addr != INADDR_ANY) {
+                       if (faddr.s_addr == INADDR_ANY)
                                wildcard++;
                        else if (inp->inp_faddr.s_addr != faddr.s_addr ||
                            inp->inp_fport != fport)
                                continue;
                } else {
                                wildcard++;
                        else if (inp->inp_faddr.s_addr != faddr.s_addr ||
                            inp->inp_fport != fport)
                                continue;
                } else {
-                       if (faddr.s_addr != 0)
+                       if (faddr.s_addr != INADDR_ANY)
                                wildcard++;
                }
                if (wildcard && (flags & INPLOOKUP_WILDCARD) == 0)
                                wildcard++;
                }
                if (wildcard && (flags & INPLOOKUP_WILDCARD) == 0)