* %sccs.include.redist.c%
* @(#)uipc_usrreq.c 7.40 (Berkeley) %G%
#include <sys/filedesc.h>
#include <sys/socketvar.h>
* Unix communications domain.
* rethink name space problems
* need a proper out-of-band
struct sockaddr sun_noname
= { sizeof(sun_noname
), AF_UNIX
};
ino_t unp_ino
; /* prototype for fake inode numbers */
uipc_usrreq(so
, req
, m
, nam
, control
)
struct mbuf
*m
, *nam
, *control
;
struct unpcb
*unp
= sotounpcb(so
);
register struct socket
*so2
;
struct proc
*p
= curproc
; /* XXX */
if (req
!= PRU_SEND
&& control
&& control
->m_len
) {
if (unp
== 0 && req
!= PRU_ATTACH
) {
error
= unp_bind(unp
, nam
, p
);
error
= unp_connect(so
, nam
, p
);
error
= unp_connect2(so
, (struct socket
*)nam
);
* Pass back name of connected socket,
* if it was bound and we are still connected
* (our peer may have closed already!).
if (unp
->unp_conn
&& unp
->unp_conn
->unp_addr
) {
nam
->m_len
= unp
->unp_conn
->unp_addr
->m_len
;
bcopy(mtod(unp
->unp_conn
->unp_addr
, caddr_t
),
mtod(nam
, caddr_t
), (unsigned)nam
->m_len
);
nam
->m_len
= sizeof(sun_noname
);
*(mtod(nam
, struct sockaddr
*)) = sun_noname
;
#define rcv (&so->so_rcv)
#define snd (&so2->so_snd)
so2
= unp
->unp_conn
->unp_socket
;
* Adjust backpressure on sender
* and wakeup any waiting to write.
snd
->sb_mbmax
+= unp
->unp_mbcnt
- rcv
->sb_mbcnt
;
unp
->unp_mbcnt
= rcv
->sb_mbcnt
;
snd
->sb_hiwat
+= unp
->unp_cc
- rcv
->sb_cc
;
unp
->unp_cc
= rcv
->sb_cc
;
if (control
&& (error
= unp_internalize(control
, p
)))
error
= unp_connect(so
, nam
, p
);
if (unp
->unp_conn
== 0) {
so2
= unp
->unp_conn
->unp_socket
;
from
= mtod(unp
->unp_addr
, struct sockaddr
*);
if (sbappendaddr(&so2
->so_rcv
, from
, m
, control
)) {
#define rcv (&so2->so_rcv)
#define snd (&so->so_snd)
if (so
->so_state
& SS_CANTSENDMORE
) {
so2
= unp
->unp_conn
->unp_socket
;
* Send to paired receive port, and then reduce
* send buffer hiwater marks to maintain backpressure.
if (sbappendcontrol(rcv
, m
, control
))
rcv
->sb_mbcnt
- unp
->unp_conn
->unp_mbcnt
;
unp
->unp_conn
->unp_mbcnt
= rcv
->sb_mbcnt
;
snd
->sb_hiwat
-= rcv
->sb_cc
- unp
->unp_conn
->unp_cc
;
unp
->unp_conn
->unp_cc
= rcv
->sb_cc
;
unp_drop(unp
, ECONNABORTED
);
((struct stat
*) m
)->st_blksize
= so
->so_snd
.sb_hiwat
;
if (so
->so_type
== SOCK_STREAM
&& unp
->unp_conn
!= 0) {
so2
= unp
->unp_conn
->unp_socket
;
((struct stat
*) m
)->st_blksize
+= so2
->so_rcv
.sb_cc
;
((struct stat
*) m
)->st_dev
= NODEV
;
unp
->unp_ino
= unp_ino
++;
((struct stat
*) m
)->st_ino
= unp
->unp_ino
;
nam
->m_len
= unp
->unp_addr
->m_len
;
bcopy(mtod(unp
->unp_addr
, caddr_t
),
mtod(nam
, caddr_t
), (unsigned)nam
->m_len
);
if (unp
->unp_conn
&& unp
->unp_conn
->unp_addr
) {
nam
->m_len
= unp
->unp_conn
->unp_addr
->m_len
;
bcopy(mtod(unp
->unp_conn
->unp_addr
, caddr_t
),
mtod(nam
, caddr_t
), (unsigned)nam
->m_len
);
* Both send and receive buffers are allocated PIPSIZ bytes of buffering
* for stream sockets, although the total for sender and receiver is
* Datagram sockets really use the sendspace as the maximum datagram size,
* and don't really want to reserve the sendspace. Their recvspace should
* be large enough for at least one max-size datagram plus address.
u_long unpst_sendspace
= PIPSIZ
;
u_long unpst_recvspace
= PIPSIZ
;
u_long unpdg_sendspace
= 2*1024; /* really max datagram size */
u_long unpdg_recvspace
= 4*1024;
int unp_rights
; /* file descriptors in flight */
register struct unpcb
*unp
;
if (so
->so_snd
.sb_hiwat
== 0 || so
->so_rcv
.sb_hiwat
== 0) {
error
= soreserve(so
, unpst_sendspace
, unpst_recvspace
);
error
= soreserve(so
, unpdg_sendspace
, unpdg_recvspace
);
m
= m_getclr(M_DONTWAIT
, MT_PCB
);
unp
= mtod(m
, struct unpcb
*);
so
->so_pcb
= (caddr_t
)unp
;
register struct unpcb
*unp
;
unp
->unp_vnode
->v_socket
= 0;
unp_drop(unp
->unp_refs
, ECONNRESET
);
soisdisconnected(unp
->unp_socket
);
unp
->unp_socket
->so_pcb
= 0;
(void) m_free(dtom(unp
));
* Normally the receive buffer is flushed later,
* in sofree, but if our receive buffer holds references
* to descriptors that are now garbage, we will dispose
* of those descriptor references after the garbage collector
* gets them (resulting in a "panic: closef: count < 0").
sorflush(unp
->unp_socket
);
struct sockaddr_un
*soun
= mtod(nam
, struct sockaddr_un
*);
register struct vnode
*vp
;
NDINIT(&nd
, CREATE
, FOLLOW
| LOCKPARENT
, UIO_SYSSPACE
,
if (unp
->unp_vnode
!= NULL
)
if (nam
->m_len
== MLEN
) {
if (*(mtod(nam
, caddr_t
) + nam
->m_len
- 1) != 0)
*(mtod(nam
, caddr_t
) + nam
->m_len
) = 0;
/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
VOP_ABORTOP(nd
.ni_dvp
, &nd
.ni_cnd
);
LEASE_CHECK(nd
.ni_dvp
, p
, p
->p_ucred
, LEASE_WRITE
);
if (error
= VOP_CREATE(nd
.ni_dvp
, &nd
.ni_vp
, &nd
.ni_cnd
, &vattr
))
vp
->v_socket
= unp
->unp_socket
;
unp
->unp_addr
= m_copy(nam
, 0, (int)M_COPYALL
);
register struct sockaddr_un
*soun
= mtod(nam
, struct sockaddr_un
*);
register struct vnode
*vp
;
register struct socket
*so2
, *so3
;
struct unpcb
*unp2
, *unp3
;
NDINIT(&nd
, LOOKUP
, FOLLOW
| LOCKLEAF
, UIO_SYSSPACE
, soun
->sun_path
, p
);
if (nam
->m_data
+ nam
->m_len
== &nam
->m_dat
[MLEN
]) { /* XXX */
if (*(mtod(nam
, caddr_t
) + nam
->m_len
- 1) != 0)
*(mtod(nam
, caddr_t
) + nam
->m_len
) = 0;
if (vp
->v_type
!= VSOCK
) {
if (error
= VOP_ACCESS(vp
, VWRITE
, p
->p_ucred
, p
))
if (so
->so_type
!= so2
->so_type
) {
if (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) {
if ((so2
->so_options
& SO_ACCEPTCONN
) == 0 ||
(so3
= sonewconn(so2
, 0)) == 0) {
m_copy(unp2
->unp_addr
, 0, (int)M_COPYALL
);
error
= unp_connect2(so
, so2
);
register struct socket
*so
;
register struct socket
*so2
;
register struct unpcb
*unp
= sotounpcb(so
);
register struct unpcb
*unp2
;
if (so2
->so_type
!= so
->so_type
)
unp
->unp_nextref
= unp2
->unp_refs
;
register struct unpcb
*unp2
= unp
->unp_conn
;
switch (unp
->unp_socket
->so_type
) {
if (unp2
->unp_refs
== unp
)
unp2
->unp_refs
= unp
->unp_nextref
;
if (unp2
->unp_nextref
== unp
)
unp2
= unp2
->unp_nextref
;
unp2
->unp_nextref
= unp
->unp_nextref
;
unp
->unp_socket
->so_state
&= ~SS_ISCONNECTED
;
soisdisconnected(unp
->unp_socket
);
soisdisconnected(unp2
->unp_socket
);
if (unp
->unp_socket
->so_type
== SOCK_STREAM
&& unp
->unp_conn
&&
(so
= unp
->unp_conn
->unp_socket
))
struct socket
*so
= unp
->unp_socket
;
so
->so_pcb
= (caddr_t
) 0;
(void) m_free(dtom(unp
));
struct proc
*p
= curproc
; /* XXX */
register struct cmsghdr
*cm
= mtod(rights
, struct cmsghdr
*);
register struct file
**rp
= (struct file
**)(cm
+ 1);
register struct file
*fp
;
int newfds
= (cm
->cmsg_len
- sizeof(*cm
)) / sizeof (int);
if (!fdavail(p
, newfds
)) {
for (i
= 0; i
< newfds
; i
++) {
for (i
= 0; i
< newfds
; i
++) {
panic("unp_externalize");
p
->p_fd
->fd_ofiles
[f
] = fp
;
unp_internalize(control
, p
)
struct filedesc
*fdp
= p
->p_fd
;
register struct cmsghdr
*cm
= mtod(control
, struct cmsghdr
*);
register struct file
**rp
;
register struct file
*fp
;
if (cm
->cmsg_type
!= SCM_RIGHTS
|| cm
->cmsg_level
!= SOL_SOCKET
||
cm
->cmsg_len
!= control
->m_len
)
oldfds
= (cm
->cmsg_len
- sizeof (*cm
)) / sizeof (int);
rp
= (struct file
**)(cm
+ 1);
for (i
= 0; i
< oldfds
; i
++) {
if ((unsigned)fd
>= fdp
->fd_nfiles
||
fdp
->fd_ofiles
[fd
] == NULL
)
rp
= (struct file
**)(cm
+ 1);
for (i
= 0; i
< oldfds
; i
++) {
fp
= fdp
->fd_ofiles
[*(int *)rp
];
int unp_defer
, unp_gcing
;
extern struct domain unixdomain
;
register struct file
*fp
, *nextfp
;
register struct socket
*so
;
struct file
**extra_ref
, **fpp
;
for (fp
= filehead
; fp
; fp
= fp
->f_filef
)
fp
->f_flag
&= ~(FMARK
|FDEFER
);
for (fp
= filehead
; fp
; fp
= fp
->f_filef
) {
if (fp
->f_flag
& FDEFER
) {
if (fp
->f_count
== fp
->f_msgcount
)
if (fp
->f_type
!= DTYPE_SOCKET
||
(so
= (struct socket
*)fp
->f_data
) == 0)
if (so
->so_proto
->pr_domain
!= &unixdomain
||
(so
->so_proto
->pr_flags
&PR_RIGHTS
) == 0)
if (so
->so_rcv
.sb_flags
& SB_LOCK
) {
* This is problematical; it's not clear
* we need to wait for the sockbuf to be
* unlocked (on a uniprocessor, at least),
* and it's also not clear what to do
* if sbwait returns an error due to receipt
* of a signal. If sbwait does return
* an error, we'll go into an infinite
* loop. Delete all of this for now.
(void) sbwait(&so
->so_rcv
);
unp_scan(so
->so_rcv
.sb_mb
, unp_mark
);
* We grab an extra reference to each of the file table entries
* that are not otherwise accessible and then free the rights
* that are stored in messages on them.
* The bug in the orginal code is a little tricky, so I'll describe
* what's wrong with it here.
* It is incorrect to simply unp_discard each entry for f_msgcount
* times -- consider the case of sockets A and B that contain
* references to each other. On a last close of some other socket,
* we trigger a gc since the number of outstanding rights (unp_rights)
* is non-zero. If during the sweep phase the gc code un_discards,
* we end up doing a (full) closef on the descriptor. A closef on A
* results in the following chain. Closef calls soo_close, which
* calls soclose. Soclose calls first (through the switch
* uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
* returns because the previous instance had set unp_gcing, and
* we return all the way back to soclose, which marks the socket
* with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
* to free up the rights that are queued in messages on the socket A,
* i.e., the reference on B. The sorflush calls via the dom_dispose
* switch unp_dispose, which unp_scans with unp_discard. This second
* instance of unp_discard just calls closef on B.
* Well, a similar chain occurs on B, resulting in a sorflush on B,
* which results in another closef on A. Unfortunately, A is already
* being closed, and the descriptor has already been marked with
* SS_NOFDREF, and soclose panics at this point.
* Here, we first take an extra reference to each inaccessible
* descriptor. Then, we call sorflush ourself, since we know
* it is a Unix domain socket anyhow. After we destroy all the
* rights carried in messages, we do a last closef to get rid
* of our extra reference. This is the last close, and the
* unp_detach etc will shut down the socket.
* 91/09/19, bsy@cs.cmu.edu
extra_ref
= malloc(nfiles
* sizeof(struct file
*), M_FILE
, M_WAITOK
);
for (nunref
= 0, fp
= filehead
, fpp
= extra_ref
; fp
; fp
= nextfp
) {
if (fp
->f_count
== fp
->f_msgcount
&& !(fp
->f_flag
& FMARK
)) {
for (i
= nunref
, fpp
= extra_ref
; --i
>= 0; ++fpp
)
sorflush((struct socket
*)(*fpp
)->f_data
);
for (i
= nunref
, fpp
= extra_ref
; --i
>= 0; ++fpp
)
free((caddr_t
)extra_ref
, M_FILE
);
unp_scan(m
, unp_discard
);
register struct mbuf
*m0
;
register struct file
**rp
;
register struct cmsghdr
*cm
;
for (m
= m0
; m
; m
= m
->m_next
)
if (m
->m_type
== MT_CONTROL
&&
m
->m_len
>= sizeof(*cm
)) {
cm
= mtod(m
, struct cmsghdr
*);
if (cm
->cmsg_level
!= SOL_SOCKET
||
cm
->cmsg_type
!= SCM_RIGHTS
)
qfds
= (cm
->cmsg_len
- sizeof *cm
)
/ sizeof (struct file
*);
rp
= (struct file
**)(cm
+ 1);
for (i
= 0; i
< qfds
; i
++)
break; /* XXX, but saves time */
fp
->f_flag
|= (FMARK
|FDEFER
);
(void) closef(fp
, (struct proc
*)NULL
);