pass V_SAVE to vinvalbuf
[unix-history] / usr / src / sys / miscfs / nullfs / null_vnops.c
CommitLineData
7279c039 1/*
1446b03c
KB
2 * Copyright (c) 1992, 1993
3 * The Regents of the University of California. All rights reserved.
7279c039 4 *
7936eee0
JH
5 * This code is derived from software contributed to Berkeley by
6 * John Heidemann of the UCLA Ficus project.
7279c039
JH
7 *
8 * %sccs.include.redist.c%
9 *
1446b03c 10 * @(#)null_vnops.c 8.1 (Berkeley) %G%
7279c039 11 *
11cda5c1
JH
12 * Ancestors:
13 * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92
14 * $Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
15 * ...and...
16 * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
7279c039
JH
17 */
18
19/*
11cda5c1
JH
20 * Null Layer
21 *
7936eee0
JH
22 * (See mount_null(8) for more information.)
23 *
11cda5c1
JH
24 * The null layer duplicates a portion of the file system
25 * name space under a new name. In this respect, it is
26 * similar to the loopback file system. It differs from
27 * the loopback fs in two respects: it is implemented using
7936eee0 28 * a stackable layers techniques, and it's "null-node"s stack above
11cda5c1
JH
29 * all lower-layer vnodes, not just over directory vnodes.
30 *
7936eee0
JH
31 * The null layer has two purposes. First, it serves as a demonstration
32 * of layering by proving a layer which does nothing. (It actually
33 * does everything the loopback file system does, which is slightly
34 * more than nothing.) Second, the null layer can serve as a prototype
35 * layer. Since it provides all necessary layer framework,
36 * new file system layers can be created very easily be starting
37 * with a null layer.
38 *
39 * The remainder of this man page examines the null layer as a basis
40 * for constructing new layers.
41 *
42 *
43 * INSTANTIATING NEW NULL LAYERS
44 *
45 * New null layers are created with mount_null(8).
46 * Mount_null(8) takes two arguments, the pathname
47 * of the lower vfs (target-pn) and the pathname where the null
48 * layer will appear in the namespace (alias-pn). After
49 * the null layer is put into place, the contents
50 * of target-pn subtree will be aliased under alias-pn.
51 *
52 *
53 * OPERATION OF A NULL LAYER
54 *
11cda5c1
JH
55 * The null layer is the minimum file system layer,
56 * simply bypassing all possible operations to the lower layer
7936eee0
JH
57 * for processing there. The majority of its activity centers
58 * on the bypass routine, though which nearly all vnode operations
59 * pass.
11cda5c1 60 *
7936eee0
JH
61 * The bypass routine accepts arbitrary vnode operations for
62 * handling by the lower layer. It begins by examing vnode
63 * operation arguments and replacing any null-nodes by their
64 * lower-layer equivlants. It then invokes the operation
65 * on the lower layer. Finally, it replaces the null-nodes
66 * in the arguments and, if a vnode is return by the operation,
67 * stacks a null-node on top of the returned vnode.
68 *
69 * Although bypass handles most operations,
70 * vop_getattr, _inactive, _reclaim, and _print are not bypassed.
71 * Vop_getattr must change the fsid being returned.
72 * Vop_inactive and vop_reclaim are not bypassed so that
11cda5c1 73 * they can handle freeing null-layer specific data.
7936eee0
JH
74 * Vop_print is not bypassed to avoid excessive debugging
75 * information.
11cda5c1 76 *
c8b2716f 77 *
7936eee0 78 * INSTANTIATING VNODE STACKS
c8b2716f 79 *
7936eee0
JH
80 * Mounting associates the null layer with a lower layer,
81 * effect stacking two VFSes. Vnode stacks are instead
82 * created on demand as files are accessed.
c8b2716f 83 *
7936eee0
JH
84 * The initial mount creates a single vnode stack for the
85 * root of the new null layer. All other vnode stacks
86 * are created as a result of vnode operations on
87 * this or other null vnode stacks.
c8b2716f 88 *
7936eee0
JH
89 * New vnode stacks come into existance as a result of
90 * an operation which returns a vnode.
91 * The bypass routine stacks a null-node above the new
92 * vnode before returning it to the caller.
93 *
94 * For example, imagine mounting a null layer with
95 * "mount_null /usr/include /dev/layer/null".
3fa2b388 96 * Changing directory to /dev/layer/null will assign
7936eee0
JH
97 * the root null-node (which was created when the null layer was mounted).
98 * Now consider opening "sys". A vop_lookup would be
99 * done on the root null-node. This operation would bypass through
100 * to the lower layer which would return a vnode representing
101 * the UFS "sys". Null_bypass then builds a null-node
102 * aliasing the UFS "sys" and returns this to the caller.
103 * Later operations on the null-node "sys" will repeat this
104 * process when constructing other vnode stacks.
105 *
106 *
107 * CREATING OTHER FILE SYSTEM LAYERS
c8b2716f
JH
108 *
109 * One of the easiest ways to construct new file system layers is to make
110 * a copy of the null layer, rename all files and variables, and
111 * then begin modifing the copy. Sed can be used to easily rename
112 * all variables.
113 *
7936eee0
JH
114 * The umap layer is an example of a layer descended from the
115 * null layer.
116 *
117 *
118 * INVOKING OPERATIONS ON LOWER LAYERS
119 *
120 * There are two techniques to invoke operations on a lower layer
121 * when the operation cannot be completely bypassed. Each method
122 * is appropriate in different situations. In both cases,
123 * it is the responsibility of the aliasing layer to make
124 * the operation arguments "correct" for the lower layer
125 * by mapping an vnode arguments to the lower layer.
126 *
127 * The first approach is to call the aliasing layer's bypass routine.
128 * This method is most suitable when you wish to invoke the operation
129 * currently being hanldled on the lower layer. It has the advantage
3fa2b388 130 * that the bypass routine already must do argument mapping.
7936eee0
JH
131 * An example of this is null_getattrs in the null layer.
132 *
133 * A second approach is to directly invoked vnode operations on
134 * the lower layer with the VOP_OPERATIONNAME interface.
135 * The advantage of this method is that it is easy to invoke
136 * arbitrary operations on the lower layer. The disadvantage
137 * is that vnodes arguments must be manualy mapped.
138 *
7279c039
JH
139 */
140
141#include <sys/param.h>
142#include <sys/systm.h>
143#include <sys/proc.h>
144#include <sys/time.h>
145#include <sys/types.h>
146#include <sys/vnode.h>
147#include <sys/mount.h>
148#include <sys/namei.h>
149#include <sys/malloc.h>
150#include <sys/buf.h>
3fa2b388 151#include <miscfs/nullfs/null.h>
7279c039 152
7279c039 153
11cda5c1 154int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
7279c039
JH
155
156/*
11cda5c1
JH
157 * This is the 10-Apr-92 bypass routine.
158 * This version has been optimized for speed, throwing away some
159 * safety checks. It should still always work, but it's not as
160 * robust to programmer errors.
161 * Define SAFETY to include some error checking code.
162 *
163 * In general, we map all vnodes going down and unmap them on the way back.
164 * As an exception to this, vnodes can be marked "unmapped" by setting
165 * the Nth bit in operation's vdesc_flags.
166 *
167 * Also, some BSD vnode operations have the side effect of vrele'ing
168 * their arguments. With stacking, the reference counts are held
169 * by the upper node, not the lower one, so we must handle these
170 * side-effects here. This is not of concern in Sun-derived systems
171 * since there are no such side-effects.
172 *
173 * This makes the following assumptions:
174 * - only one returned vpp
175 * - no INOUT vpp's (Sun's vop_open has one of these)
176 * - the vnode operation vector of the first vnode should be used
177 * to determine what implementation of the op should be invoked
178 * - all mapped vnodes are of our vnode-type (NEEDSWORK:
179 * problems on rmdir'ing mount points and renaming?)
180 */
181int
182null_bypass(ap)
3fa2b388
KM
183 struct vop_generic_args /* {
184 struct vnodeop_desc *a_desc;
185 <other random data follows, presumably>
186 } */ *ap;
11cda5c1 187{
c8b2716f
JH
188 extern int (**null_vnodeop_p)(); /* not extern, really "forward" */
189 register struct vnode **this_vp_p;
7279c039 190 int error;
11cda5c1
JH
191 struct vnode *old_vps[VDESC_MAX_VPS];
192 struct vnode **vps_p[VDESC_MAX_VPS];
193 struct vnode ***vppp;
194 struct vnodeop_desc *descp = ap->a_desc;
c8b2716f 195 int reles, i;
7279c039 196
11cda5c1
JH
197 if (null_bug_bypass)
198 printf ("null_bypass: %s\n", descp->vdesc_name);
7279c039 199
11cda5c1 200#ifdef SAFETY
7279c039 201 /*
11cda5c1 202 * We require at least one vp.
7279c039 203 */
30969417
JH
204 if (descp->vdesc_vp_offsets == NULL ||
205 descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
11cda5c1 206 panic ("null_bypass: no vp's in map.\n");
7279c039
JH
207#endif
208
209 /*
11cda5c1
JH
210 * Map the vnodes going in.
211 * Later, we'll invoke the operation based on
212 * the first mapped vnode's operation vector.
7279c039 213 */
c8b2716f 214 reles = descp->vdesc_flags;
30969417
JH
215 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
216 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
11cda5c1 217 break; /* bail out at end of list */
11cda5c1
JH
218 vps_p[i] = this_vp_p =
219 VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
c8b2716f
JH
220 /*
221 * We're not guaranteed that any but the first vnode
222 * are of our type. Check for and don't map any
7936eee0 223 * that aren't. (We must always map first vp or vclean fails.)
c8b2716f 224 */
72d8e8b4 225 if (i && (*this_vp_p)->v_op != null_vnodeop_p) {
c8b2716f
JH
226 old_vps[i] = NULL;
227 } else {
228 old_vps[i] = *this_vp_p;
229 *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
30969417
JH
230 /*
231 * XXX - Several operations have the side effect
232 * of vrele'ing their vp's. We must account for
233 * that. (This should go away in the future.)
234 */
c8b2716f
JH
235 if (reles & 1)
236 VREF(*this_vp_p);
30969417 237 }
11cda5c1 238
30969417 239 }
7279c039
JH
240
241 /*
11cda5c1
JH
242 * Call the operation on the lower layer
243 * with the modified argument structure.
7279c039 244 */
11cda5c1 245 error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
7279c039
JH
246
247 /*
11cda5c1
JH
248 * Maintain the illusion of call-by-value
249 * by restoring vnodes in the argument structure
250 * to their original value.
7279c039 251 */
c8b2716f 252 reles = descp->vdesc_flags;
30969417
JH
253 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
254 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
11cda5c1 255 break; /* bail out at end of list */
c8b2716f
JH
256 if (old_vps[i]) {
257 *(vps_p[i]) = old_vps[i];
258 if (reles & 1)
259 vrele(*(vps_p[i]));
30969417
JH
260 }
261 }
11cda5c1 262
7279c039 263 /*
30969417
JH
264 * Map the possible out-going vpp
265 * (Assumes that the lower layer always returns
266 * a VREF'ed vpp unless it gets an error.)
7279c039 267 */
11cda5c1
JH
268 if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
269 !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
270 !error) {
30969417
JH
271 /*
272 * XXX - even though some ops have vpp returned vp's,
273 * several ops actually vrele this before returning.
274 * We must avoid these ops.
7936eee0 275 * (This should go away when these ops are regularized.)
30969417 276 */
7936eee0
JH
277 if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
278 goto out;
30969417 279 vppp = VOPARG_OFFSETTO(struct vnode***,
11cda5c1 280 descp->vdesc_vpp_offset,ap);
c8b2716f 281 error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
30969417 282 }
7279c039 283
7936eee0 284 out:
7279c039
JH
285 return (error);
286}
287
11cda5c1 288
7279c039 289/*
7936eee0 290 * We handle getattr only to change the fsid.
7279c039 291 */
11cda5c1
JH
292int
293null_getattr(ap)
3fa2b388
KM
294 struct vop_getattr_args /* {
295 struct vnode *a_vp;
296 struct vattr *a_vap;
297 struct ucred *a_cred;
298 struct proc *a_p;
299 } */ *ap;
7279c039 300{
7279c039 301 int error;
30969417 302 if (error = null_bypass(ap))
3fa2b388 303 return (error);
11cda5c1
JH
304 /* Requires that arguments be restored. */
305 ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
3fa2b388 306 return (0);
7279c039
JH
307}
308
11cda5c1 309
11cda5c1 310int
3fa2b388
KM
311null_inactive(ap)
312 struct vop_inactive_args /* {
313 struct vnode *a_vp;
314 } */ *ap;
7279c039 315{
11cda5c1
JH
316 /*
317 * Do nothing (and _don't_ bypass).
318 * Wait to vrele lowervp until reclaim,
319 * so that until then our null_node is in the
320 * cache and reusable.
321 *
322 * NEEDSWORK: Someday, consider inactive'ing
323 * the lowervp and then trying to reactivate it
7936eee0 324 * with capabilities (v_id)
11cda5c1
JH
325 * like they do in the name lookup cache code.
326 * That's too much work for now.
327 */
3fa2b388 328 return (0);
7279c039
JH
329}
330
30969417 331int
3fa2b388
KM
332null_reclaim(ap)
333 struct vop_reclaim_args /* {
334 struct vnode *a_vp;
335 } */ *ap;
7279c039 336{
30969417
JH
337 struct vnode *vp = ap->a_vp;
338 struct null_node *xp = VTONULL(vp);
339 struct vnode *lowervp = xp->null_lowervp;
340
30969417 341 /*
7936eee0 342 * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
30969417
JH
343 * so we can't call VOPs on ourself.
344 */
345 /* After this assignment, this node will not be re-used. */
30969417
JH
346 xp->null_lowervp = NULL;
347 remque(xp);
348 FREE(vp->v_data, M_TEMP);
349 vp->v_data = NULL;
350 vrele (lowervp);
3fa2b388 351 return (0);
7279c039
JH
352}
353
7936eee0 354
30969417 355int
3fa2b388
KM
356null_print(ap)
357 struct vop_print_args /* {
358 struct vnode *a_vp;
359 } */ *ap;
7279c039 360{
7936eee0
JH
361 register struct vnode *vp = ap->a_vp;
362 printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp));
3fa2b388 363 return (0);
7279c039
JH
364}
365
7936eee0
JH
366
367/*
368 * XXX - vop_strategy must be hand coded because it has no
369 * vnode in its arguments.
370 * This goes away with a merged VM/buffer cache.
371 */
30969417 372int
3fa2b388
KM
373null_strategy(ap)
374 struct vop_strategy_args /* {
375 struct buf *a_bp;
376 } */ *ap;
7279c039 377{
7936eee0 378 struct buf *bp = ap->a_bp;
7279c039 379 int error;
11cda5c1 380 struct vnode *savedvp;
7279c039 381
7936eee0
JH
382 savedvp = bp->b_vp;
383 bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
7279c039 384
7936eee0 385 error = VOP_STRATEGY(bp);
7279c039 386
7936eee0 387 bp->b_vp = savedvp;
7279c039 388
3fa2b388 389 return (error);
7279c039
JH
390}
391
11cda5c1 392
7936eee0
JH
393/*
394 * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no
395 * vnode in its arguments.
396 * This goes away with a merged VM/buffer cache.
397 */
11cda5c1 398int
3fa2b388
KM
399null_bwrite(ap)
400 struct vop_bwrite_args /* {
401 struct buf *a_bp;
402 } */ *ap;
7279c039 403{
7936eee0
JH
404 struct buf *bp = ap->a_bp;
405 int error;
406 struct vnode *savedvp;
7279c039 407
7936eee0
JH
408 savedvp = bp->b_vp;
409 bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
72d8e8b4 410
7936eee0
JH
411 error = VOP_BWRITE(bp);
412
413 bp->b_vp = savedvp;
414
3fa2b388 415 return (error);
72d8e8b4 416}
7936eee0 417
7279c039 418/*
11cda5c1 419 * Global vfs data structures
7279c039 420 */
11cda5c1 421int (**null_vnodeop_p)();
c8b2716f 422struct vnodeopv_entry_desc null_vnodeop_entries[] = {
11cda5c1 423 { &vop_default_desc, null_bypass },
7279c039 424
11cda5c1
JH
425 { &vop_getattr_desc, null_getattr },
426 { &vop_inactive_desc, null_inactive },
427 { &vop_reclaim_desc, null_reclaim },
428 { &vop_print_desc, null_print },
7279c039 429
11cda5c1 430 { &vop_strategy_desc, null_strategy },
7936eee0 431 { &vop_bwrite_desc, null_bwrite },
7279c039 432
7279c039
JH
433 { (struct vnodeop_desc*)NULL, (int(*)())NULL }
434};
c8b2716f
JH
435struct vnodeopv_desc null_vnodeop_opv_desc =
436 { &null_vnodeop_p, null_vnodeop_entries };