Initial import, 0.1 + pk 0.2.4-B1
[unix-history] / sys / nfs / nfs_vfsops.c
CommitLineData
15637ed4
RG
1/*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)nfs_vfsops.c 7.31 (Berkeley) 5/6/91
37 */
38
39#include "param.h"
40#include "conf.h"
41#include "ioctl.h"
42#include "signal.h"
43#include "proc.h"
44#include "namei.h"
45#include "vnode.h"
46#include "mount.h"
47#include "buf.h"
48#include "mbuf.h"
49#include "socket.h"
50#include "systm.h"
51
52#include "../net/if.h"
53#include "../net/route.h"
54#include "../netinet/in.h"
55
56#include "nfsv2.h"
57#include "nfsnode.h"
58#include "nfsmount.h"
59#include "nfs.h"
60#include "xdr_subs.h"
61#include "nfsm_subs.h"
62#include "nfsdiskless.h"
63
64/*
65 * nfs vfs operations.
66 */
67struct vfsops nfs_vfsops = {
68 nfs_mount,
69 nfs_start,
70 nfs_unmount,
71 nfs_root,
72 nfs_quotactl,
73 nfs_statfs,
74 nfs_sync,
75 nfs_fhtovp,
76 nfs_vptofh,
77 nfs_init,
78};
79
80static u_char nfs_mntid;
81extern u_long nfs_procids[NFS_NPROCS];
82extern u_long nfs_prog, nfs_vers;
83struct nfs_diskless nfs_diskless;
84void nfs_disconnect();
85
86#define TRUE 1
87#define FALSE 0
88
89/*
90 * nfs statfs call
91 */
92nfs_statfs(mp, sbp, p)
93 struct mount *mp;
94 register struct statfs *sbp;
95 struct proc *p;
96{
97 register struct vnode *vp;
98 register struct nfsv2_statfs *sfp;
99 register caddr_t cp;
100 register long t1;
101 caddr_t bpos, dpos, cp2;
102 u_long xid;
103 int error = 0;
104 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
105 struct nfsmount *nmp;
106 struct ucred *cred;
107 struct nfsnode *np;
108
109 nmp = VFSTONFS(mp);
110 if (error = nfs_nget(mp, &nmp->nm_fh, &np))
111 return (error);
112 vp = NFSTOV(np);
113 nfsstats.rpccnt[NFSPROC_STATFS]++;
114 cred = crget();
115 cred->cr_ngroups = 1;
116 nfsm_reqhead(nfs_procids[NFSPROC_STATFS], cred, NFSX_FH);
117 nfsm_fhtom(vp);
118 nfsm_request(vp, NFSPROC_STATFS, p, 0);
119 nfsm_disect(sfp, struct nfsv2_statfs *, NFSX_STATFS);
120 sbp->f_type = MOUNT_NFS;
121 sbp->f_flags = nmp->nm_flag;
122 sbp->f_bsize = fxdr_unsigned(long, sfp->sf_tsize);
123 sbp->f_fsize = fxdr_unsigned(long, sfp->sf_bsize);
124 sbp->f_blocks = fxdr_unsigned(long, sfp->sf_blocks);
125 sbp->f_bfree = fxdr_unsigned(long, sfp->sf_bfree);
126 sbp->f_bavail = fxdr_unsigned(long, sfp->sf_bavail);
127 sbp->f_files = 0;
128 sbp->f_ffree = 0;
129 if (sbp != &mp->mnt_stat) {
130 bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
131 bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
132 }
133 nfsm_reqdone;
134 nfs_nput(vp);
135 crfree(cred);
136 return (error);
137}
138
139/*
140 * Mount a remote root fs via. nfs. This depends on the info in the
141 * nfs_diskless structure that has been filled in properly by some primary
142 * bootstrap.
143 * It goes something like this:
144 * - do enough of "ifconfig" by calling ifioctl() so that the system
145 * can talk to the server
146 * - If nfs_diskless.mygateway is filled in, use that address as
147 * a default gateway.
148 * (This is done the 4.3 way with rtioctl() and should be changed)
149 * - hand craft the swap nfs vnode hanging off a fake mount point
150 * - build the rootfs mount point and call mountnfs() to do the rest.
151 */
152nfs_mountroot()
153{
154 register struct mount *mp;
155 register struct mbuf *m;
156 struct socket *so;
157 struct vnode *vp;
158 int error;
159
160 /*
161 * Do enough of ifconfig(8) so that critical net interface can
162 * talk to the server.
163 */
164 if (socreate(nfs_diskless.myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0))
165 panic("nfs ifconf");
166 if (ifioctl(so, SIOCAIFADDR, &nfs_diskless.myif))
167 panic("nfs ifconf2");
168 soclose(so);
169
170 /*
171 * If the gateway field is filled in, set it as the default route.
172 */
173#ifdef COMPAT_43
174 if (nfs_diskless.mygateway.sa_family == AF_INET) {
175 struct ortentry rt;
176 struct sockaddr_in *sin;
177
178 sin = (struct sockaddr_in *) &rt.rt_dst;
179 sin->sin_len = sizeof (struct sockaddr_in);
180 sin->sin_family = AF_INET;
181 sin->sin_addr.s_addr = 0; /* default */
182 bcopy((caddr_t)&nfs_diskless.mygateway, (caddr_t)&rt.rt_gateway,
183 sizeof (struct sockaddr_in));
184 rt.rt_flags = (RTF_UP | RTF_GATEWAY);
185 if (rtioctl(SIOCADDRT, (caddr_t)&rt))
186 panic("nfs root route");
187 }
188#endif /* COMPAT_43 */
189
190 /*
191 * If swapping to an nfs node (indicated by swdevt[0].sw_dev == NODEV):
192 * Create a fake mount point just for the swap vnode so that the
193 * swap file can be on a different server from the rootfs.
194 */
195 if (swdevt[0].sw_dev == NODEV) {
196 mp = (struct mount *)malloc((u_long)sizeof(struct mount),
197 M_MOUNT, M_NOWAIT);
198 if (mp == NULL)
199 panic("nfs root mount");
200 mp->mnt_op = &nfs_vfsops;
201 mp->mnt_flag = 0;
202 mp->mnt_exroot = 0;
203 mp->mnt_mounth = NULLVP;
204
205 /*
206 * Set up the diskless nfs_args for the swap mount point
207 * and then call mountnfs() to mount it.
208 * Since the swap file is not the root dir of a file system,
209 * hack it to a regular file.
210 */
211 nfs_diskless.swap_args.fh = (nfsv2fh_t *)nfs_diskless.swap_fh;
212 MGET(m, MT_SONAME, M_DONTWAIT);
213 if (m == NULL)
214 panic("nfs root mbuf");
215 bcopy((caddr_t)&nfs_diskless.swap_saddr, mtod(m, caddr_t),
216 nfs_diskless.swap_saddr.sa_len);
217 m->m_len = nfs_diskless.swap_saddr.sa_len;
218 if (mountnfs(&nfs_diskless.swap_args, mp, m, "/swap",
219 nfs_diskless.swap_hostnam, &vp))
220 panic("nfs swap");
221 vp->v_type = VREG;
222 vp->v_flag = 0;
223 swapdev_vp = vp;
224 VREF(vp);
225 swdevt[0].sw_vp = vp;
226 }
227
228 /*
229 * Create the rootfs mount point.
230 */
231 mp = (struct mount *)malloc((u_long)sizeof(struct mount),
232 M_MOUNT, M_NOWAIT);
233 if (mp == NULL)
234 panic("nfs root mount2");
235 mp->mnt_op = &nfs_vfsops;
236 mp->mnt_flag = MNT_RDONLY;
237 mp->mnt_exroot = 0;
238 mp->mnt_mounth = NULLVP;
239
240 /*
241 * Set up the root fs args and call mountnfs() to do the rest.
242 */
243 nfs_diskless.root_args.fh = (nfsv2fh_t *)nfs_diskless.root_fh;
244 MGET(m, MT_SONAME, M_DONTWAIT);
245 if (m == NULL)
246 panic("nfs root mbuf2");
247 bcopy((caddr_t)&nfs_diskless.root_saddr, mtod(m, caddr_t),
248 nfs_diskless.root_saddr.sa_len);
249 m->m_len = nfs_diskless.root_saddr.sa_len;
250 if (mountnfs(&nfs_diskless.root_args, mp, m, "/",
251 nfs_diskless.root_hostnam, &vp))
252 panic("nfs root");
253 if (vfs_lock(mp))
254 panic("nfs root2");
255 rootfs = mp;
256 mp->mnt_next = mp;
257 mp->mnt_prev = mp;
258 mp->mnt_vnodecovered = NULLVP;
259 vfs_unlock(mp);
260 rootvp = vp;
261 inittodr((time_t)0); /* There is no time in the nfs fsstat so ?? */
262 return (0);
263}
264
265/*
266 * VFS Operations.
267 *
268 * mount system call
269 * It seems a bit dumb to copyinstr() the host and path here and then
270 * bcopy() them in mountnfs(), but I wanted to detect errors before
271 * doing the sockargs() call because sockargs() allocates an mbuf and
272 * an error after that means that I have to release the mbuf.
273 */
274/* ARGSUSED */
275nfs_mount(mp, path, data, ndp, p)
276 struct mount *mp;
277 char *path;
278 caddr_t data;
279 struct nameidata *ndp;
280 struct proc *p;
281{
282 int error;
283 struct nfs_args args;
284 struct mbuf *nam;
285 struct vnode *vp;
286 char pth[MNAMELEN], hst[MNAMELEN];
287 u_int len;
288 nfsv2fh_t nfh;
289
290 if (mp->mnt_flag & MNT_UPDATE)
291 return (0);
292 if (error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args)))
293 return (error);
294 if (error = copyin((caddr_t)args.fh, (caddr_t)&nfh, sizeof (nfsv2fh_t)))
295 return (error);
296 if (error = copyinstr(path, pth, MNAMELEN-1, &len))
297 return (error);
298 bzero(&pth[len], MNAMELEN - len);
299 if (error = copyinstr(args.hostname, hst, MNAMELEN-1, &len))
300 return (error);
301 bzero(&hst[len], MNAMELEN - len);
302 /* sockargs() call must be after above copyin() calls */
303 if (error = sockargs(&nam, (caddr_t)args.addr,
304 sizeof (struct sockaddr), MT_SONAME))
305 return (error);
306 args.fh = &nfh;
307 error = mountnfs(&args, mp, nam, pth, hst, &vp);
308 return (error);
309}
310
311/*
312 * Common code for mount and mountroot
313 */
314mountnfs(argp, mp, nam, pth, hst, vpp)
315 register struct nfs_args *argp;
316 register struct mount *mp;
317 struct mbuf *nam;
318 char *pth, *hst;
319 struct vnode **vpp;
320{
321 register struct nfsmount *nmp;
322 struct proc *p = curproc; /* XXX */
323 struct nfsnode *np;
324 int error;
325 fsid_t tfsid;
326
327 MALLOC(nmp, struct nfsmount *, sizeof *nmp, M_NFSMNT, M_WAITOK);
328 bzero((caddr_t)nmp, sizeof *nmp);
329 mp->mnt_data = (qaddr_t)nmp;
330 /*
331 * Generate a unique nfs mount id. The problem is that a dev number
332 * is not unique across multiple systems. The techique is as follows:
333 * 1) Set to nblkdev,0 which will never be used otherwise
334 * 2) Generate a first guess as nblkdev,nfs_mntid where nfs_mntid is
335 * NOT 0
336 * 3) Loop searching the mount list for another one with same id
337 * If a match, increment val[0] and try again
338 * NB: I increment val[0] { a long } instead of nfs_mntid { a u_char }
339 * so that nfs is not limited to 255 mount points
340 * Incrementing the high order bits does no real harm, since it
341 * simply makes the major dev number tick up. The upper bound is
342 * set to major dev 127 to avoid any sign extention problems
343 */
344 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev, 0);
345 mp->mnt_stat.f_fsid.val[1] = MOUNT_NFS;
346 if (++nfs_mntid == 0)
347 ++nfs_mntid;
348 tfsid.val[0] = makedev(nblkdev, nfs_mntid);
349 tfsid.val[1] = MOUNT_NFS;
350 while (rootfs && getvfs(&tfsid)) {
351 tfsid.val[0]++;
352 nfs_mntid++;
353 }
354 if (major(tfsid.val[0]) > 127) {
355 error = ENOENT;
356 goto bad;
357 }
358 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
359 nmp->nm_mountp = mp;
360 nmp->nm_flag = argp->flags;
361 nmp->nm_rto = NFS_TIMEO;
362 nmp->nm_rtt = -1;
363 nmp->nm_rttvar = nmp->nm_rto << 1;
364 nmp->nm_retry = NFS_RETRANS;
365 nmp->nm_wsize = NFS_WSIZE;
366 nmp->nm_rsize = NFS_RSIZE;
367 bcopy((caddr_t)argp->fh, (caddr_t)&nmp->nm_fh, sizeof(nfsv2fh_t));
368 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
369 bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN);
370 nmp->nm_nam = nam;
371
372 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
373 nmp->nm_rto = argp->timeo;
374 /* NFS timeouts are specified in 1/10 sec. */
375 nmp->nm_rto = (nmp->nm_rto * 10) / NFS_HZ;
376 if (nmp->nm_rto < NFS_MINTIMEO)
377 nmp->nm_rto = NFS_MINTIMEO;
378 else if (nmp->nm_rto > NFS_MAXTIMEO)
379 nmp->nm_rto = NFS_MAXTIMEO;
380 nmp->nm_rttvar = nmp->nm_rto << 1;
381 }
382
383 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
384 nmp->nm_retry = argp->retrans;
385 if (nmp->nm_retry > NFS_MAXREXMIT)
386 nmp->nm_retry = NFS_MAXREXMIT;
387 }
388
389 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
390 nmp->nm_wsize = argp->wsize;
391 /* Round down to multiple of blocksize */
392 nmp->nm_wsize &= ~0x1ff;
393 if (nmp->nm_wsize <= 0)
394 nmp->nm_wsize = 512;
395 else if (nmp->nm_wsize > NFS_MAXDATA)
396 nmp->nm_wsize = NFS_MAXDATA;
397 }
398 if (nmp->nm_wsize > MAXBSIZE)
399 nmp->nm_wsize = MAXBSIZE;
400
401 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
402 nmp->nm_rsize = argp->rsize;
403 /* Round down to multiple of blocksize */
404 nmp->nm_rsize &= ~0x1ff;
405 if (nmp->nm_rsize <= 0)
406 nmp->nm_rsize = 512;
407 else if (nmp->nm_rsize > NFS_MAXDATA)
408 nmp->nm_rsize = NFS_MAXDATA;
409 }
410 if (nmp->nm_rsize > MAXBSIZE)
411 nmp->nm_rsize = MAXBSIZE;
412 /* Set up the sockets and per-host congestion */
413 nmp->nm_sotype = argp->sotype;
414 nmp->nm_soproto = argp->proto;
415 if (error = nfs_connect(nmp))
416 goto bad;
417
418 if (error = nfs_statfs(mp, &mp->mnt_stat, p))
419 goto bad;
420 /*
421 * A reference count is needed on the nfsnode representing the
422 * remote root. If this object is not persistent, then backward
423 * traversals of the mount point (i.e. "..") will not work if
424 * the nfsnode gets flushed out of the cache. Ufs does not have
425 * this problem, because one can identify root inodes by their
426 * number == ROOTINO (2).
427 */
428 if (error = nfs_nget(mp, &nmp->nm_fh, &np))
429 goto bad;
430 /*
431 * Unlock it, but keep the reference count.
432 */
433 nfs_unlock(NFSTOV(np));
434 *vpp = NFSTOV(np);
435
436 return (0);
437bad:
438 nfs_disconnect(nmp);
439 FREE(nmp, M_NFSMNT);
440 m_freem(nam);
441 return (error);
442}
443
444/*
445 * unmount system call
446 */
447nfs_unmount(mp, mntflags, p)
448 struct mount *mp;
449 int mntflags;
450 struct proc *p;
451{
452 register struct nfsmount *nmp;
453 struct nfsnode *np;
454 struct vnode *vp;
455 int error, flags = 0;
456 extern int doforce;
457
458 if (mntflags & MNT_FORCE) {
459 if (!doforce || mp == rootfs)
460 return (EINVAL);
461 flags |= FORCECLOSE;
462 }
463 nmp = VFSTONFS(mp);
464 /*
465 * Clear out the buffer cache
466 */
467 mntflushbuf(mp, 0);
468 if (mntinvalbuf(mp))
469 return (EBUSY);
470 /*
471 * Goes something like this..
472 * - Check for activity on the root vnode (other than ourselves).
473 * - Call vflush() to clear out vnodes for this file system,
474 * except for the root vnode.
475 * - Decrement reference on the vnode representing remote root.
476 * - Close the socket
477 * - Free up the data structures
478 */
479 /*
480 * We need to decrement the ref. count on the nfsnode representing
481 * the remote root. See comment in mountnfs(). The VFS unmount()
482 * has done vput on this vnode, otherwise we would get deadlock!
483 */
484 if (error = nfs_nget(mp, &nmp->nm_fh, &np))
485 return(error);
486 vp = NFSTOV(np);
487 if (vp->v_usecount > 2) {
488 vput(vp);
489 return (EBUSY);
490 }
491 if (error = vflush(mp, vp, flags)) {
492 vput(vp);
493 return (error);
494 }
495 /*
496 * Get rid of two reference counts, and unlock it on the second.
497 */
498 vrele(vp);
499 vput(vp);
500 nfs_disconnect(nmp);
501 m_freem(nmp->nm_nam);
502 free((caddr_t)nmp, M_NFSMNT);
503 return (0);
504}
505
506/*
507 * Return root of a filesystem
508 */
509nfs_root(mp, vpp)
510 struct mount *mp;
511 struct vnode **vpp;
512{
513 register struct vnode *vp;
514 struct nfsmount *nmp;
515 struct nfsnode *np;
516 int error;
517
518 nmp = VFSTONFS(mp);
519 if (error = nfs_nget(mp, &nmp->nm_fh, &np))
520 return (error);
521 vp = NFSTOV(np);
522 vp->v_type = VDIR;
523 vp->v_flag = VROOT;
524 *vpp = vp;
525 return (0);
526}
527
528extern int syncprt;
529
530/*
531 * Flush out the buffer cache
532 */
533/* ARGSUSED */
534nfs_sync(mp, waitfor)
535 struct mount *mp;
536 int waitfor;
537{
538 if (syncprt)
539 bufstats();
540 /*
541 * Force stale buffer cache information to be flushed.
542 */
543 mntflushbuf(mp, waitfor == MNT_WAIT ? B_SYNC : 0);
544 return (0);
545}
546
547/*
548 * At this point, this should never happen
549 */
550/* ARGSUSED */
551nfs_fhtovp(mp, fhp, vpp)
552 struct mount *mp;
553 struct fid *fhp;
554 struct vnode **vpp;
555{
556
557 return (EINVAL);
558}
559
560/*
561 * Vnode pointer to File handle, should never happen either
562 */
563/* ARGSUSED */
564nfs_vptofh(vp, fhp)
565 struct vnode *vp;
566 struct fid *fhp;
567{
568
569 return (EINVAL);
570}
571
572/*
573 * Vfs start routine, a no-op.
574 */
575/* ARGSUSED */
576nfs_start(mp, flags, p)
577 struct mount *mp;
578 int flags;
579 struct proc *p;
580{
581
582 return (0);
583}
584
585/*
586 * Do operations associated with quotas, not supported
587 */
588nfs_quotactl(mp, cmd, uid, arg, p)
589 struct mount *mp;
590 int cmd;
591 uid_t uid;
592 caddr_t arg;
593 struct proc *p;
594{
595#ifdef lint
596 mp = mp; cmd = cmd; uid = uid; arg = arg;
597#endif /* lint */
598 return (EOPNOTSUPP);
599}