use inb and outb from locore
[unix-history] / usr / src / sys / kern / uipc_usrreq.c
CommitLineData
da7c5cc6 1/*
96e6449f 2 *
dbf0c423 3 * %sccs.include.redist.c%
f1ff9444 4 *
389e86b2 5 * @(#)uipc_usrreq.c 7.40 (Berkeley) %G%
da7c5cc6 6 */
d6213d15 7
38a01dbe
KB
8#include <sys/param.h>
9#include <sys/systm.h>
10#include <sys/proc.h>
11#include <sys/filedesc.h>
12#include <sys/domain.h>
13#include <sys/protosw.h>
14#include <sys/socket.h>
15#include <sys/socketvar.h>
16#include <sys/unpcb.h>
17#include <sys/un.h>
18#include <sys/namei.h>
19#include <sys/vnode.h>
20#include <sys/file.h>
21#include <sys/stat.h>
22#include <sys/mbuf.h>
d6213d15
BJ
23
24/*
25 * Unix communications domain.
88a7a62a
SL
26 *
27 * TODO:
28 * SEQPACKET, RDM
8ecea7bc 29 * rethink name space problems
88a7a62a 30 * need a proper out-of-band
d6213d15 31 */
a2aebb63 32struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
0293c204 33ino_t unp_ino; /* prototype for fake inode numbers */
d6213d15
BJ
34
35/*ARGSUSED*/
0293c204 36uipc_usrreq(so, req, m, nam, control)
d6213d15
BJ
37 struct socket *so;
38 int req;
0293c204 39 struct mbuf *m, *nam, *control;
d6213d15
BJ
40{
41 struct unpcb *unp = sotounpcb(so);
42 register struct socket *so2;
d83a7d17 43 register int error = 0;
a1a7e02b 44 struct proc *p = curproc; /* XXX */
d6213d15 45
50b20c78
MK
46 if (req == PRU_CONTROL)
47 return (EOPNOTSUPP);
0293c204 48 if (req != PRU_SEND && control && control->m_len) {
88a7a62a
SL
49 error = EOPNOTSUPP;
50 goto release;
51 }
52 if (unp == 0 && req != PRU_ATTACH) {
53 error = EINVAL;
54 goto release;
55 }
d6213d15
BJ
56 switch (req) {
57
58 case PRU_ATTACH:
59 if (unp) {
4f083fd7 60 error = EISCONN;
d6213d15
BJ
61 break;
62 }
fca5570f 63 error = unp_attach(so);
d6213d15
BJ
64 break;
65
66 case PRU_DETACH:
67 unp_detach(unp);
68 break;
69
4f083fd7 70 case PRU_BIND:
a1a7e02b 71 error = unp_bind(unp, nam, p);
4f083fd7
SL
72 break;
73
74 case PRU_LISTEN:
f1ff9444 75 if (unp->unp_vnode == 0)
4f083fd7
SL
76 error = EINVAL;
77 break;
78
d6213d15 79 case PRU_CONNECT:
a1a7e02b 80 error = unp_connect(so, nam, p);
d6213d15
BJ
81 break;
82
88a7a62a 83 case PRU_CONNECT2:
d97afdcc 84 error = unp_connect2(so, (struct socket *)nam);
88a7a62a
SL
85 break;
86
d6213d15
BJ
87 case PRU_DISCONNECT:
88 unp_disconnect(unp);
89 break;
90
4f083fd7 91 case PRU_ACCEPT:
4aa1832d
MK
92 /*
93 * Pass back name of connected socket,
94 * if it was bound and we are still connected
95 * (our peer may have closed already!).
96 */
97 if (unp->unp_conn && unp->unp_conn->unp_addr) {
6c0894dd
MK
98 nam->m_len = unp->unp_conn->unp_addr->m_len;
99 bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
100 mtod(nam, caddr_t), (unsigned)nam->m_len);
101 } else {
102 nam->m_len = sizeof(sun_noname);
103 *(mtod(nam, struct sockaddr *)) = sun_noname;
104 }
d6213d15
BJ
105 break;
106
107 case PRU_SHUTDOWN:
108 socantsendmore(so);
5b09b219 109 unp_shutdown(unp);
d6213d15 110 break;
d6213d15
BJ
111
112 case PRU_RCVD:
113 switch (so->so_type) {
114
115 case SOCK_DGRAM:
116 panic("uipc 1");
5fe6f9d1 117 /*NOTREACHED*/
d6213d15 118
5fe6f9d1 119 case SOCK_STREAM:
d6213d15
BJ
120#define rcv (&so->so_rcv)
121#define snd (&so2->so_snd)
122 if (unp->unp_conn == 0)
123 break;
124 so2 = unp->unp_conn->unp_socket;
125 /*
6c0894dd 126 * Adjust backpressure on sender
d6213d15
BJ
127 * and wakeup any waiting to write.
128 */
6c0894dd
MK
129 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
130 unp->unp_mbcnt = rcv->sb_mbcnt;
131 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
132 unp->unp_cc = rcv->sb_cc;
9c40f33e 133 sowwakeup(so2);
d6213d15
BJ
134#undef snd
135#undef rcv
d6213d15
BJ
136 break;
137
138 default:
139 panic("uipc 2");
140 }
141 break;
142
143 case PRU_SEND:
a1a7e02b 144 if (control && (error = unp_internalize(control, p)))
d83a7d17 145 break;
d6213d15
BJ
146 switch (so->so_type) {
147
6c0894dd
MK
148 case SOCK_DGRAM: {
149 struct sockaddr *from;
150
fca5570f 151 if (nam) {
d6213d15
BJ
152 if (unp->unp_conn) {
153 error = EISCONN;
154 break;
155 }
a1a7e02b 156 error = unp_connect(so, nam, p);
d6213d15
BJ
157 if (error)
158 break;
159 } else {
160 if (unp->unp_conn == 0) {
161 error = ENOTCONN;
162 break;
163 }
164 }
165 so2 = unp->unp_conn->unp_socket;
6c0894dd
MK
166 if (unp->unp_addr)
167 from = mtod(unp->unp_addr, struct sockaddr *);
168 else
169 from = &sun_noname;
d83a7d17 170 if (sbappendaddr(&so2->so_rcv, from, m, control)) {
6c0894dd
MK
171 sorwakeup(so2);
172 m = 0;
d83a7d17 173 control = 0;
6c0894dd
MK
174 } else
175 error = ENOBUFS;
fca5570f 176 if (nam)
4f083fd7 177 unp_disconnect(unp);
d6213d15 178 break;
6c0894dd 179 }
d6213d15
BJ
180
181 case SOCK_STREAM:
182#define rcv (&so2->so_rcv)
183#define snd (&so->so_snd)
7068721c
MK
184 if (so->so_state & SS_CANTSENDMORE) {
185 error = EPIPE;
186 break;
187 }
d6213d15
BJ
188 if (unp->unp_conn == 0)
189 panic("uipc 3");
190 so2 = unp->unp_conn->unp_socket;
191 /*
6c0894dd
MK
192 * Send to paired receive port, and then reduce
193 * send buffer hiwater marks to maintain backpressure.
d6213d15
BJ
194 * Wake up readers.
195 */
d83a7d17 196 if (control) {
5b09b219
MK
197 if (sbappendcontrol(rcv, m, control))
198 control = 0;
d83a7d17 199 } else
6c0894dd
MK
200 sbappend(rcv, m);
201 snd->sb_mbmax -=
202 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
203 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
204 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
205 unp->unp_conn->unp_cc = rcv->sb_cc;
9c40f33e
MK
206 sorwakeup(so2);
207 m = 0;
d6213d15
BJ
208#undef snd
209#undef rcv
210 break;
211
212 default:
213 panic("uipc 4");
214 }
215 break;
216
217 case PRU_ABORT:
218 unp_drop(unp, ECONNABORTED);
219 break;
220
bfd4e82c
MK
221 case PRU_SENSE:
222 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
223 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
224 so2 = unp->unp_conn->unp_socket;
225 ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
226 }
0abd16a9 227 ((struct stat *) m)->st_dev = NODEV;
0293c204
KS
228 if (unp->unp_ino == 0)
229 unp->unp_ino = unp_ino++;
230 ((struct stat *) m)->st_ino = unp->unp_ino;
bfd4e82c 231 return (0);
d6213d15
BJ
232
233 case PRU_RCVOOB:
913b2f19 234 return (EOPNOTSUPP);
d6213d15
BJ
235
236 case PRU_SENDOOB:
9c40f33e 237 error = EOPNOTSUPP;
d6213d15
BJ
238 break;
239
240 case PRU_SOCKADDR:
e14f40ae
MK
241 if (unp->unp_addr) {
242 nam->m_len = unp->unp_addr->m_len;
243 bcopy(mtod(unp->unp_addr, caddr_t),
244 mtod(nam, caddr_t), (unsigned)nam->m_len);
245 } else
246 nam->m_len = 0;
d6213d15
BJ
247 break;
248
a7343092 249 case PRU_PEERADDR:
bbd1716e
MK
250 if (unp->unp_conn && unp->unp_conn->unp_addr) {
251 nam->m_len = unp->unp_conn->unp_addr->m_len;
252 bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
336a1154 253 mtod(nam, caddr_t), (unsigned)nam->m_len);
e14f40ae
MK
254 } else
255 nam->m_len = 0;
a7343092
SL
256 break;
257
d6213d15
BJ
258 case PRU_SLOWTIMO:
259 break;
260
261 default:
262 panic("piusrreq");
263 }
88a7a62a 264release:
d83a7d17
MK
265 if (control)
266 m_freem(control);
88a7a62a
SL
267 if (m)
268 m_freem(m);
e14b8185 269 return (error);
d6213d15
BJ
270}
271
bfd4e82c 272/*
6c0894dd
MK
273 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
274 * for stream sockets, although the total for sender and receiver is
275 * actually only PIPSIZ.
bfd4e82c
MK
276 * Datagram sockets really use the sendspace as the maximum datagram size,
277 * and don't really want to reserve the sendspace. Their recvspace should
278 * be large enough for at least one max-size datagram plus address.
279 */
280#define PIPSIZ 4096
4f5156ea
MK
281u_long unpst_sendspace = PIPSIZ;
282u_long unpst_recvspace = PIPSIZ;
283u_long unpdg_sendspace = 2*1024; /* really max datagram size */
284u_long unpdg_recvspace = 4*1024;
d6213d15 285
6c0894dd
MK
286int unp_rights; /* file descriptors in flight */
287
4f083fd7 288unp_attach(so)
d6213d15 289 struct socket *so;
d6213d15 290{
4f083fd7 291 register struct mbuf *m;
d6213d15 292 register struct unpcb *unp;
d6213d15
BJ
293 int error;
294
4f5156ea
MK
295 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
296 switch (so->so_type) {
bfd4e82c 297
4f5156ea
MK
298 case SOCK_STREAM:
299 error = soreserve(so, unpst_sendspace, unpst_recvspace);
300 break;
bfd4e82c 301
4f5156ea
MK
302 case SOCK_DGRAM:
303 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
304 break;
224df21f
KM
305
306 default:
307 panic("unp_attach");
4f5156ea
MK
308 }
309 if (error)
310 return (error);
bfd4e82c 311 }
cce93e4b 312 m = m_getclr(M_DONTWAIT, MT_PCB);
5fe6f9d1
SL
313 if (m == NULL)
314 return (ENOBUFS);
d6213d15
BJ
315 unp = mtod(m, struct unpcb *);
316 so->so_pcb = (caddr_t)unp;
317 unp->unp_socket = so;
d6213d15 318 return (0);
d6213d15
BJ
319}
320
d6213d15 321unp_detach(unp)
4f083fd7 322 register struct unpcb *unp;
d6213d15
BJ
323{
324
f1ff9444
KM
325 if (unp->unp_vnode) {
326 unp->unp_vnode->v_socket = 0;
327 vrele(unp->unp_vnode);
328 unp->unp_vnode = 0;
d6213d15
BJ
329 }
330 if (unp->unp_conn)
331 unp_disconnect(unp);
332 while (unp->unp_refs)
333 unp_drop(unp->unp_refs, ECONNRESET);
334 soisdisconnected(unp->unp_socket);
335 unp->unp_socket->so_pcb = 0;
6c0894dd 336 m_freem(unp->unp_addr);
4f083fd7 337 (void) m_free(dtom(unp));
389e86b2
KM
338 if (unp_rights) {
339 /*
340 * Normally the receive buffer is flushed later,
341 * in sofree, but if our receive buffer holds references
342 * to descriptors that are now garbage, we will dispose
343 * of those descriptor references after the garbage collector
344 * gets them (resulting in a "panic: closef: count < 0").
345 */
346 sorflush(unp->unp_socket);
6c0894dd 347 unp_gc();
389e86b2 348 }
d6213d15
BJ
349}
350
a1a7e02b 351unp_bind(unp, nam, p)
d6213d15 352 struct unpcb *unp;
4f083fd7 353 struct mbuf *nam;
a1a7e02b 354 struct proc *p;
d6213d15 355{
4f083fd7 356 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
f1ff9444 357 register struct vnode *vp;
f1ff9444 358 struct vattr vattr;
4f083fd7 359 int error;
8429d022 360 struct nameidata nd;
d6213d15 361
56b1696d
KM
362 NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
363 soun->sun_path, p);
e0d8abb8 364 if (unp->unp_vnode != NULL)
88a7a62a 365 return (EINVAL);
61375223
MK
366 if (nam->m_len == MLEN) {
367 if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
368 return (EINVAL);
369 } else
370 *(mtod(nam, caddr_t) + nam->m_len) = 0;
88a7a62a 371/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
56b1696d 372 if (error = namei(&nd))
f3bb1ae7 373 return (error);
56b1696d 374 vp = nd.ni_vp;
f1ff9444 375 if (vp != NULL) {
56b1696d
KM
376 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
377 if (nd.ni_dvp == vp)
378 vrele(nd.ni_dvp);
39d703d6 379 else
56b1696d 380 vput(nd.ni_dvp);
66955caf 381 vrele(vp);
f1ff9444 382 return (EADDRINUSE);
f3bb1ae7 383 }
3ee1461b 384 VATTR_NULL(&vattr);
f1ff9444
KM
385 vattr.va_type = VSOCK;
386 vattr.va_mode = 0777;
56b1696d
KM
387 LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
388 if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr))
d6213d15 389 return (error);
56b1696d 390 vp = nd.ni_vp;
f1ff9444
KM
391 vp->v_socket = unp->unp_socket;
392 unp->unp_vnode = vp;
6c0894dd 393 unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
c4ec2128 394 VOP_UNLOCK(vp);
d6213d15
BJ
395 return (0);
396}
397
a1a7e02b 398unp_connect(so, nam, p)
d6213d15 399 struct socket *so;
4f083fd7 400 struct mbuf *nam;
a1a7e02b 401 struct proc *p;
d6213d15 402{
4f083fd7 403 register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
f1ff9444 404 register struct vnode *vp;
e14f40ae 405 register struct socket *so2, *so3;
e14f40ae
MK
406 struct unpcb *unp2, *unp3;
407 int error;
8429d022 408 struct nameidata nd;
d6213d15 409
56b1696d 410 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
61375223
MK
411 if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) { /* XXX */
412 if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
413 return (EMSGSIZE);
414 } else
415 *(mtod(nam, caddr_t) + nam->m_len) = 0;
56b1696d 416 if (error = namei(&nd))
f1ff9444 417 return (error);
56b1696d 418 vp = nd.ni_vp;
f1ff9444 419 if (vp->v_type != VSOCK) {
d6213d15
BJ
420 error = ENOTSOCK;
421 goto bad;
422 }
a1a7e02b 423 if (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p))
6a1c9219 424 goto bad;
f1ff9444 425 so2 = vp->v_socket;
d6213d15
BJ
426 if (so2 == 0) {
427 error = ECONNREFUSED;
428 goto bad;
429 }
5a48956d
SL
430 if (so->so_type != so2->so_type) {
431 error = EPROTOTYPE;
432 goto bad;
433 }
e14f40ae
MK
434 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
435 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
0293c204 436 (so3 = sonewconn(so2, 0)) == 0) {
e14f40ae
MK
437 error = ECONNREFUSED;
438 goto bad;
439 }
440 unp2 = sotounpcb(so2);
441 unp3 = sotounpcb(so3);
442 if (unp2->unp_addr)
a2aebb63
KS
443 unp3->unp_addr =
444 m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
e14f40ae 445 so2 = so3;
5a48956d 446 }
d97afdcc 447 error = unp_connect2(so, so2);
88a7a62a 448bad:
c4ec2128 449 vput(vp);
88a7a62a
SL
450 return (error);
451}
452
d97afdcc 453unp_connect2(so, so2)
88a7a62a 454 register struct socket *so;
88a7a62a
SL
455 register struct socket *so2;
456{
457 register struct unpcb *unp = sotounpcb(so);
458 register struct unpcb *unp2;
459
460 if (so2->so_type != so->so_type)
461 return (EPROTOTYPE);
ccffacbd
SL
462 unp2 = sotounpcb(so2);
463 unp->unp_conn = unp2;
d6213d15
BJ
464 switch (so->so_type) {
465
466 case SOCK_DGRAM:
d6213d15
BJ
467 unp->unp_nextref = unp2->unp_refs;
468 unp2->unp_refs = unp;
9c40f33e 469 soisconnected(so);
d6213d15
BJ
470 break;
471
472 case SOCK_STREAM:
4f083fd7 473 unp2->unp_conn = unp;
ccffacbd 474 soisconnected(so);
0293c204 475 soisconnected(so2);
d6213d15
BJ
476 break;
477
478 default:
88a7a62a 479 panic("unp_connect2");
d6213d15 480 }
d6213d15 481 return (0);
d6213d15 482}
4f083fd7
SL
483
484unp_disconnect(unp)
485 struct unpcb *unp;
486{
487 register struct unpcb *unp2 = unp->unp_conn;
488
489 if (unp2 == 0)
490 return;
491 unp->unp_conn = 0;
4f083fd7
SL
492 switch (unp->unp_socket->so_type) {
493
494 case SOCK_DGRAM:
495 if (unp2->unp_refs == unp)
496 unp2->unp_refs = unp->unp_nextref;
497 else {
498 unp2 = unp2->unp_refs;
499 for (;;) {
500 if (unp2 == 0)
501 panic("unp_disconnect");
502 if (unp2->unp_nextref == unp)
503 break;
504 unp2 = unp2->unp_nextref;
505 }
506 unp2->unp_nextref = unp->unp_nextref;
507 }
508 unp->unp_nextref = 0;
c5935bca 509 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
4f083fd7
SL
510 break;
511
512 case SOCK_STREAM:
ccffacbd 513 soisdisconnected(unp->unp_socket);
4f083fd7
SL
514 unp2->unp_conn = 0;
515 soisdisconnected(unp2->unp_socket);
4f083fd7
SL
516 break;
517 }
518}
519
88a7a62a 520#ifdef notdef
4f083fd7
SL
521unp_abort(unp)
522 struct unpcb *unp;
523{
524
525 unp_detach(unp);
526}
88a7a62a 527#endif
4f083fd7 528
5b09b219 529unp_shutdown(unp)
4f083fd7
SL
530 struct unpcb *unp;
531{
5b09b219 532 struct socket *so;
4f083fd7 533
5b09b219
MK
534 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
535 (so = unp->unp_conn->unp_socket))
536 socantrcvmore(so);
4f083fd7
SL
537}
538
539unp_drop(unp, errno)
540 struct unpcb *unp;
541 int errno;
542{
d12671e0 543 struct socket *so = unp->unp_socket;
4f083fd7 544
d12671e0 545 so->so_error = errno;
4f083fd7 546 unp_disconnect(unp);
d12671e0
MK
547 if (so->so_head) {
548 so->so_pcb = (caddr_t) 0;
6c0894dd 549 m_freem(unp->unp_addr);
d12671e0
MK
550 (void) m_free(dtom(unp));
551 sofree(so);
552 }
4f083fd7
SL
553}
554
88a7a62a 555#ifdef notdef
4f083fd7
SL
556unp_drain()
557{
558
559}
88a7a62a
SL
560#endif
561
562unp_externalize(rights)
563 struct mbuf *rights;
564{
8429d022 565 struct proc *p = curproc; /* XXX */
88a7a62a 566 register int i;
0293c204
KS
567 register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
568 register struct file **rp = (struct file **)(cm + 1);
88a7a62a 569 register struct file *fp;
0293c204 570 int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
88a7a62a
SL
571 int f;
572
58073841 573 if (!fdavail(p, newfds)) {
88a7a62a
SL
574 for (i = 0; i < newfds; i++) {
575 fp = *rp;
576 unp_discard(fp);
577 *rp++ = 0;
578 }
579 return (EMSGSIZE);
580 }
581 for (i = 0; i < newfds; i++) {
8429d022 582 if (fdalloc(p, 0, &f))
88a7a62a
SL
583 panic("unp_externalize");
584 fp = *rp;
a52e940f 585 p->p_fd->fd_ofiles[f] = fp;
88a7a62a 586 fp->f_msgcount--;
6c0894dd 587 unp_rights--;
bb3b4851 588 *(int *)rp++ = f;
88a7a62a
SL
589 }
590 return (0);
591}
592
a1a7e02b 593unp_internalize(control, p)
d83a7d17 594 struct mbuf *control;
a1a7e02b 595 struct proc *p;
88a7a62a 596{
a1a7e02b 597 struct filedesc *fdp = p->p_fd;
d83a7d17 598 register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
88a7a62a 599 register struct file **rp;
88a7a62a 600 register struct file *fp;
d83a7d17
MK
601 register int i, fd;
602 int oldfds;
88a7a62a 603
d83a7d17
MK
604 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
605 cm->cmsg_len != control->m_len)
0293c204
KS
606 return (EINVAL);
607 oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
0293c204 608 rp = (struct file **)(cm + 1);
c4ec2128
KM
609 for (i = 0; i < oldfds; i++) {
610 fd = *(int *)rp++;
a52e940f
MK
611 if ((unsigned)fd >= fdp->fd_nfiles ||
612 fdp->fd_ofiles[fd] == NULL)
88a7a62a 613 return (EBADF);
c4ec2128 614 }
0293c204 615 rp = (struct file **)(cm + 1);
44eb2da3 616 for (i = 0; i < oldfds; i++) {
a52e940f 617 fp = fdp->fd_ofiles[*(int *)rp];
88a7a62a
SL
618 *rp++ = fp;
619 fp->f_count++;
620 fp->f_msgcount++;
6c0894dd 621 unp_rights++;
88a7a62a
SL
622 }
623 return (0);
624}
625
626int unp_defer, unp_gcing;
627int unp_mark();
261a8548 628extern struct domain unixdomain;
88a7a62a
SL
629
630unp_gc()
631{
902505c0 632 register struct file *fp, *nextfp;
88a7a62a 633 register struct socket *so;
cb9bf523
KM
634 struct file **extra_ref, **fpp;
635 int nunref, i;
88a7a62a
SL
636
637 if (unp_gcing)
638 return;
639 unp_gcing = 1;
640restart:
641 unp_defer = 0;
d7a29601 642 for (fp = filehead; fp; fp = fp->f_filef)
88a7a62a
SL
643 fp->f_flag &= ~(FMARK|FDEFER);
644 do {
d7a29601 645 for (fp = filehead; fp; fp = fp->f_filef) {
88a7a62a
SL
646 if (fp->f_count == 0)
647 continue;
648 if (fp->f_flag & FDEFER) {
649 fp->f_flag &= ~FDEFER;
650 unp_defer--;
651 } else {
652 if (fp->f_flag & FMARK)
653 continue;
654 if (fp->f_count == fp->f_msgcount)
655 continue;
656 fp->f_flag |= FMARK;
657 }
aa5945aa
MK
658 if (fp->f_type != DTYPE_SOCKET ||
659 (so = (struct socket *)fp->f_data) == 0)
88a7a62a 660 continue;
261a8548 661 if (so->so_proto->pr_domain != &unixdomain ||
c5935bca 662 (so->so_proto->pr_flags&PR_RIGHTS) == 0)
88a7a62a 663 continue;
5b09b219 664#ifdef notdef
88a7a62a 665 if (so->so_rcv.sb_flags & SB_LOCK) {
5b09b219
MK
666 /*
667 * This is problematical; it's not clear
668 * we need to wait for the sockbuf to be
669 * unlocked (on a uniprocessor, at least),
670 * and it's also not clear what to do
671 * if sbwait returns an error due to receipt
672 * of a signal. If sbwait does return
673 * an error, we'll go into an infinite
674 * loop. Delete all of this for now.
675 */
676 (void) sbwait(&so->so_rcv);
88a7a62a
SL
677 goto restart;
678 }
5b09b219 679#endif
88a7a62a
SL
680 unp_scan(so->so_rcv.sb_mb, unp_mark);
681 }
682 } while (unp_defer);
cb9bf523
KM
683 /*
684 * We grab an extra reference to each of the file table entries
685 * that are not otherwise accessible and then free the rights
686 * that are stored in messages on them.
687 *
688 * The bug in the orginal code is a little tricky, so I'll describe
689 * what's wrong with it here.
690 *
691 * It is incorrect to simply unp_discard each entry for f_msgcount
692 * times -- consider the case of sockets A and B that contain
693 * references to each other. On a last close of some other socket,
694 * we trigger a gc since the number of outstanding rights (unp_rights)
695 * is non-zero. If during the sweep phase the gc code un_discards,
696 * we end up doing a (full) closef on the descriptor. A closef on A
697 * results in the following chain. Closef calls soo_close, which
698 * calls soclose. Soclose calls first (through the switch
699 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
700 * returns because the previous instance had set unp_gcing, and
701 * we return all the way back to soclose, which marks the socket
702 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
703 * to free up the rights that are queued in messages on the socket A,
704 * i.e., the reference on B. The sorflush calls via the dom_dispose
705 * switch unp_dispose, which unp_scans with unp_discard. This second
706 * instance of unp_discard just calls closef on B.
707 *
708 * Well, a similar chain occurs on B, resulting in a sorflush on B,
709 * which results in another closef on A. Unfortunately, A is already
710 * being closed, and the descriptor has already been marked with
711 * SS_NOFDREF, and soclose panics at this point.
712 *
713 * Here, we first take an extra reference to each inaccessible
714 * descriptor. Then, we call sorflush ourself, since we know
715 * it is a Unix domain socket anyhow. After we destroy all the
716 * rights carried in messages, we do a last closef to get rid
717 * of our extra reference. This is the last close, and the
718 * unp_detach etc will shut down the socket.
719 *
720 * 91/09/19, bsy@cs.cmu.edu
721 */
722 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
723 for (nunref = 0, fp = filehead, fpp = extra_ref; fp; fp = nextfp) {
902505c0 724 nextfp = fp->f_filef;
88a7a62a
SL
725 if (fp->f_count == 0)
726 continue;
cb9bf523
KM
727 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
728 *fpp++ = fp;
729 nunref++;
730 fp->f_count++;
731 }
88a7a62a 732 }
cb9bf523
KM
733 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
734 sorflush((struct socket *)(*fpp)->f_data);
735 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
736 closef(*fpp);
737 free((caddr_t)extra_ref, M_FILE);
88a7a62a
SL
738 unp_gcing = 0;
739}
740
261a8548
MK
741unp_dispose(m)
742 struct mbuf *m;
743{
744 int unp_discard();
745
148703ec
MK
746 if (m)
747 unp_scan(m, unp_discard);
261a8548
MK
748}
749
750unp_scan(m0, op)
751 register struct mbuf *m0;
88a7a62a
SL
752 int (*op)();
753{
261a8548 754 register struct mbuf *m;
88a7a62a 755 register struct file **rp;
d83a7d17 756 register struct cmsghdr *cm;
88a7a62a 757 register int i;
148703ec 758 int qfds;
261a8548
MK
759
760 while (m0) {
761 for (m = m0; m; m = m->m_next)
d83a7d17
MK
762 if (m->m_type == MT_CONTROL &&
763 m->m_len >= sizeof(*cm)) {
0293c204 764 cm = mtod(m, struct cmsghdr *);
d83a7d17
MK
765 if (cm->cmsg_level != SOL_SOCKET ||
766 cm->cmsg_type != SCM_RIGHTS)
767 continue;
0293c204
KS
768 qfds = (cm->cmsg_len - sizeof *cm)
769 / sizeof (struct file *);
770 rp = (struct file **)(cm + 1);
261a8548
MK
771 for (i = 0; i < qfds; i++)
772 (*op)(*rp++);
773 break; /* XXX, but saves time */
774 }
148703ec 775 m0 = m0->m_act;
88a7a62a 776 }
88a7a62a
SL
777}
778
779unp_mark(fp)
780 struct file *fp;
781{
782
783 if (fp->f_flag & FMARK)
784 return;
785 unp_defer++;
786 fp->f_flag |= (FMARK|FDEFER);
787}
788
789unp_discard(fp)
790 struct file *fp;
791{
792
793 fp->f_msgcount--;
6c0894dd 794 unp_rights--;
84096e8e 795 (void) closef(fp, (struct proc *)NULL);
88a7a62a 796}