Commit | Line | Data |
---|---|---|
7279c039 | 1 | /* |
1446b03c KB |
2 | * Copyright (c) 1992, 1993 |
3 | * The Regents of the University of California. All rights reserved. | |
7279c039 | 4 | * |
7936eee0 JH |
5 | * This code is derived from software contributed to Berkeley by |
6 | * John Heidemann of the UCLA Ficus project. | |
7279c039 JH |
7 | * |
8 | * %sccs.include.redist.c% | |
9 | * | |
1446b03c | 10 | * @(#)null_vnops.c 8.1 (Berkeley) %G% |
7279c039 | 11 | * |
11cda5c1 JH |
12 | * Ancestors: |
13 | * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92 | |
14 | * $Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $ | |
15 | * ...and... | |
16 | * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project | |
7279c039 JH |
17 | */ |
18 | ||
19 | /* | |
11cda5c1 JH |
20 | * Null Layer |
21 | * | |
7936eee0 JH |
22 | * (See mount_null(8) for more information.) |
23 | * | |
11cda5c1 JH |
24 | * The null layer duplicates a portion of the file system |
25 | * name space under a new name. In this respect, it is | |
26 | * similar to the loopback file system. It differs from | |
27 | * the loopback fs in two respects: it is implemented using | |
7936eee0 | 28 | * a stackable layers techniques, and it's "null-node"s stack above |
11cda5c1 JH |
29 | * all lower-layer vnodes, not just over directory vnodes. |
30 | * | |
7936eee0 JH |
31 | * The null layer has two purposes. First, it serves as a demonstration |
32 | * of layering by proving a layer which does nothing. (It actually | |
33 | * does everything the loopback file system does, which is slightly | |
34 | * more than nothing.) Second, the null layer can serve as a prototype | |
35 | * layer. Since it provides all necessary layer framework, | |
36 | * new file system layers can be created very easily be starting | |
37 | * with a null layer. | |
38 | * | |
39 | * The remainder of this man page examines the null layer as a basis | |
40 | * for constructing new layers. | |
41 | * | |
42 | * | |
43 | * INSTANTIATING NEW NULL LAYERS | |
44 | * | |
45 | * New null layers are created with mount_null(8). | |
46 | * Mount_null(8) takes two arguments, the pathname | |
47 | * of the lower vfs (target-pn) and the pathname where the null | |
48 | * layer will appear in the namespace (alias-pn). After | |
49 | * the null layer is put into place, the contents | |
50 | * of target-pn subtree will be aliased under alias-pn. | |
51 | * | |
52 | * | |
53 | * OPERATION OF A NULL LAYER | |
54 | * | |
11cda5c1 JH |
55 | * The null layer is the minimum file system layer, |
56 | * simply bypassing all possible operations to the lower layer | |
7936eee0 JH |
57 | * for processing there. The majority of its activity centers |
58 | * on the bypass routine, though which nearly all vnode operations | |
59 | * pass. | |
11cda5c1 | 60 | * |
7936eee0 JH |
61 | * The bypass routine accepts arbitrary vnode operations for |
62 | * handling by the lower layer. It begins by examing vnode | |
63 | * operation arguments and replacing any null-nodes by their | |
64 | * lower-layer equivlants. It then invokes the operation | |
65 | * on the lower layer. Finally, it replaces the null-nodes | |
66 | * in the arguments and, if a vnode is return by the operation, | |
67 | * stacks a null-node on top of the returned vnode. | |
68 | * | |
69 | * Although bypass handles most operations, | |
70 | * vop_getattr, _inactive, _reclaim, and _print are not bypassed. | |
71 | * Vop_getattr must change the fsid being returned. | |
72 | * Vop_inactive and vop_reclaim are not bypassed so that | |
11cda5c1 | 73 | * they can handle freeing null-layer specific data. |
7936eee0 JH |
74 | * Vop_print is not bypassed to avoid excessive debugging |
75 | * information. | |
11cda5c1 | 76 | * |
c8b2716f | 77 | * |
7936eee0 | 78 | * INSTANTIATING VNODE STACKS |
c8b2716f | 79 | * |
7936eee0 JH |
80 | * Mounting associates the null layer with a lower layer, |
81 | * effect stacking two VFSes. Vnode stacks are instead | |
82 | * created on demand as files are accessed. | |
c8b2716f | 83 | * |
7936eee0 JH |
84 | * The initial mount creates a single vnode stack for the |
85 | * root of the new null layer. All other vnode stacks | |
86 | * are created as a result of vnode operations on | |
87 | * this or other null vnode stacks. | |
c8b2716f | 88 | * |
7936eee0 JH |
89 | * New vnode stacks come into existance as a result of |
90 | * an operation which returns a vnode. | |
91 | * The bypass routine stacks a null-node above the new | |
92 | * vnode before returning it to the caller. | |
93 | * | |
94 | * For example, imagine mounting a null layer with | |
95 | * "mount_null /usr/include /dev/layer/null". | |
3fa2b388 | 96 | * Changing directory to /dev/layer/null will assign |
7936eee0 JH |
97 | * the root null-node (which was created when the null layer was mounted). |
98 | * Now consider opening "sys". A vop_lookup would be | |
99 | * done on the root null-node. This operation would bypass through | |
100 | * to the lower layer which would return a vnode representing | |
101 | * the UFS "sys". Null_bypass then builds a null-node | |
102 | * aliasing the UFS "sys" and returns this to the caller. | |
103 | * Later operations on the null-node "sys" will repeat this | |
104 | * process when constructing other vnode stacks. | |
105 | * | |
106 | * | |
107 | * CREATING OTHER FILE SYSTEM LAYERS | |
c8b2716f JH |
108 | * |
109 | * One of the easiest ways to construct new file system layers is to make | |
110 | * a copy of the null layer, rename all files and variables, and | |
111 | * then begin modifing the copy. Sed can be used to easily rename | |
112 | * all variables. | |
113 | * | |
7936eee0 JH |
114 | * The umap layer is an example of a layer descended from the |
115 | * null layer. | |
116 | * | |
117 | * | |
118 | * INVOKING OPERATIONS ON LOWER LAYERS | |
119 | * | |
120 | * There are two techniques to invoke operations on a lower layer | |
121 | * when the operation cannot be completely bypassed. Each method | |
122 | * is appropriate in different situations. In both cases, | |
123 | * it is the responsibility of the aliasing layer to make | |
124 | * the operation arguments "correct" for the lower layer | |
125 | * by mapping an vnode arguments to the lower layer. | |
126 | * | |
127 | * The first approach is to call the aliasing layer's bypass routine. | |
128 | * This method is most suitable when you wish to invoke the operation | |
129 | * currently being hanldled on the lower layer. It has the advantage | |
3fa2b388 | 130 | * that the bypass routine already must do argument mapping. |
7936eee0 JH |
131 | * An example of this is null_getattrs in the null layer. |
132 | * | |
133 | * A second approach is to directly invoked vnode operations on | |
134 | * the lower layer with the VOP_OPERATIONNAME interface. | |
135 | * The advantage of this method is that it is easy to invoke | |
136 | * arbitrary operations on the lower layer. The disadvantage | |
137 | * is that vnodes arguments must be manualy mapped. | |
138 | * | |
7279c039 JH |
139 | */ |
140 | ||
141 | #include <sys/param.h> | |
142 | #include <sys/systm.h> | |
143 | #include <sys/proc.h> | |
144 | #include <sys/time.h> | |
145 | #include <sys/types.h> | |
146 | #include <sys/vnode.h> | |
147 | #include <sys/mount.h> | |
148 | #include <sys/namei.h> | |
149 | #include <sys/malloc.h> | |
150 | #include <sys/buf.h> | |
3fa2b388 | 151 | #include <miscfs/nullfs/null.h> |
7279c039 | 152 | |
7279c039 | 153 | |
11cda5c1 | 154 | int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ |
7279c039 JH |
155 | |
156 | /* | |
11cda5c1 JH |
157 | * This is the 10-Apr-92 bypass routine. |
158 | * This version has been optimized for speed, throwing away some | |
159 | * safety checks. It should still always work, but it's not as | |
160 | * robust to programmer errors. | |
161 | * Define SAFETY to include some error checking code. | |
162 | * | |
163 | * In general, we map all vnodes going down and unmap them on the way back. | |
164 | * As an exception to this, vnodes can be marked "unmapped" by setting | |
165 | * the Nth bit in operation's vdesc_flags. | |
166 | * | |
167 | * Also, some BSD vnode operations have the side effect of vrele'ing | |
168 | * their arguments. With stacking, the reference counts are held | |
169 | * by the upper node, not the lower one, so we must handle these | |
170 | * side-effects here. This is not of concern in Sun-derived systems | |
171 | * since there are no such side-effects. | |
172 | * | |
173 | * This makes the following assumptions: | |
174 | * - only one returned vpp | |
175 | * - no INOUT vpp's (Sun's vop_open has one of these) | |
176 | * - the vnode operation vector of the first vnode should be used | |
177 | * to determine what implementation of the op should be invoked | |
178 | * - all mapped vnodes are of our vnode-type (NEEDSWORK: | |
179 | * problems on rmdir'ing mount points and renaming?) | |
180 | */ | |
181 | int | |
182 | null_bypass(ap) | |
3fa2b388 KM |
183 | struct vop_generic_args /* { |
184 | struct vnodeop_desc *a_desc; | |
185 | <other random data follows, presumably> | |
186 | } */ *ap; | |
11cda5c1 | 187 | { |
c8b2716f JH |
188 | extern int (**null_vnodeop_p)(); /* not extern, really "forward" */ |
189 | register struct vnode **this_vp_p; | |
7279c039 | 190 | int error; |
11cda5c1 JH |
191 | struct vnode *old_vps[VDESC_MAX_VPS]; |
192 | struct vnode **vps_p[VDESC_MAX_VPS]; | |
193 | struct vnode ***vppp; | |
194 | struct vnodeop_desc *descp = ap->a_desc; | |
c8b2716f | 195 | int reles, i; |
7279c039 | 196 | |
11cda5c1 JH |
197 | if (null_bug_bypass) |
198 | printf ("null_bypass: %s\n", descp->vdesc_name); | |
7279c039 | 199 | |
11cda5c1 | 200 | #ifdef SAFETY |
7279c039 | 201 | /* |
11cda5c1 | 202 | * We require at least one vp. |
7279c039 | 203 | */ |
30969417 JH |
204 | if (descp->vdesc_vp_offsets == NULL || |
205 | descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) | |
11cda5c1 | 206 | panic ("null_bypass: no vp's in map.\n"); |
7279c039 JH |
207 | #endif |
208 | ||
209 | /* | |
11cda5c1 JH |
210 | * Map the vnodes going in. |
211 | * Later, we'll invoke the operation based on | |
212 | * the first mapped vnode's operation vector. | |
7279c039 | 213 | */ |
c8b2716f | 214 | reles = descp->vdesc_flags; |
30969417 JH |
215 | for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { |
216 | if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) | |
11cda5c1 | 217 | break; /* bail out at end of list */ |
11cda5c1 JH |
218 | vps_p[i] = this_vp_p = |
219 | VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap); | |
c8b2716f JH |
220 | /* |
221 | * We're not guaranteed that any but the first vnode | |
222 | * are of our type. Check for and don't map any | |
7936eee0 | 223 | * that aren't. (We must always map first vp or vclean fails.) |
c8b2716f | 224 | */ |
72d8e8b4 | 225 | if (i && (*this_vp_p)->v_op != null_vnodeop_p) { |
c8b2716f JH |
226 | old_vps[i] = NULL; |
227 | } else { | |
228 | old_vps[i] = *this_vp_p; | |
229 | *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p); | |
30969417 JH |
230 | /* |
231 | * XXX - Several operations have the side effect | |
232 | * of vrele'ing their vp's. We must account for | |
233 | * that. (This should go away in the future.) | |
234 | */ | |
c8b2716f JH |
235 | if (reles & 1) |
236 | VREF(*this_vp_p); | |
30969417 | 237 | } |
11cda5c1 | 238 | |
30969417 | 239 | } |
7279c039 JH |
240 | |
241 | /* | |
11cda5c1 JH |
242 | * Call the operation on the lower layer |
243 | * with the modified argument structure. | |
7279c039 | 244 | */ |
11cda5c1 | 245 | error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); |
7279c039 JH |
246 | |
247 | /* | |
11cda5c1 JH |
248 | * Maintain the illusion of call-by-value |
249 | * by restoring vnodes in the argument structure | |
250 | * to their original value. | |
7279c039 | 251 | */ |
c8b2716f | 252 | reles = descp->vdesc_flags; |
30969417 JH |
253 | for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { |
254 | if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) | |
11cda5c1 | 255 | break; /* bail out at end of list */ |
c8b2716f JH |
256 | if (old_vps[i]) { |
257 | *(vps_p[i]) = old_vps[i]; | |
258 | if (reles & 1) | |
259 | vrele(*(vps_p[i])); | |
30969417 JH |
260 | } |
261 | } | |
11cda5c1 | 262 | |
7279c039 | 263 | /* |
30969417 JH |
264 | * Map the possible out-going vpp |
265 | * (Assumes that the lower layer always returns | |
266 | * a VREF'ed vpp unless it gets an error.) | |
7279c039 | 267 | */ |
11cda5c1 JH |
268 | if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && |
269 | !(descp->vdesc_flags & VDESC_NOMAP_VPP) && | |
270 | !error) { | |
30969417 JH |
271 | /* |
272 | * XXX - even though some ops have vpp returned vp's, | |
273 | * several ops actually vrele this before returning. | |
274 | * We must avoid these ops. | |
7936eee0 | 275 | * (This should go away when these ops are regularized.) |
30969417 | 276 | */ |
7936eee0 JH |
277 | if (descp->vdesc_flags & VDESC_VPP_WILLRELE) |
278 | goto out; | |
30969417 | 279 | vppp = VOPARG_OFFSETTO(struct vnode***, |
11cda5c1 | 280 | descp->vdesc_vpp_offset,ap); |
c8b2716f | 281 | error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp); |
30969417 | 282 | } |
7279c039 | 283 | |
7936eee0 | 284 | out: |
7279c039 JH |
285 | return (error); |
286 | } | |
287 | ||
11cda5c1 | 288 | |
7279c039 | 289 | /* |
7936eee0 | 290 | * We handle getattr only to change the fsid. |
7279c039 | 291 | */ |
11cda5c1 JH |
292 | int |
293 | null_getattr(ap) | |
3fa2b388 KM |
294 | struct vop_getattr_args /* { |
295 | struct vnode *a_vp; | |
296 | struct vattr *a_vap; | |
297 | struct ucred *a_cred; | |
298 | struct proc *a_p; | |
299 | } */ *ap; | |
7279c039 | 300 | { |
7279c039 | 301 | int error; |
30969417 | 302 | if (error = null_bypass(ap)) |
3fa2b388 | 303 | return (error); |
11cda5c1 JH |
304 | /* Requires that arguments be restored. */ |
305 | ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; | |
3fa2b388 | 306 | return (0); |
7279c039 JH |
307 | } |
308 | ||
11cda5c1 | 309 | |
11cda5c1 | 310 | int |
3fa2b388 KM |
311 | null_inactive(ap) |
312 | struct vop_inactive_args /* { | |
313 | struct vnode *a_vp; | |
314 | } */ *ap; | |
7279c039 | 315 | { |
11cda5c1 JH |
316 | /* |
317 | * Do nothing (and _don't_ bypass). | |
318 | * Wait to vrele lowervp until reclaim, | |
319 | * so that until then our null_node is in the | |
320 | * cache and reusable. | |
321 | * | |
322 | * NEEDSWORK: Someday, consider inactive'ing | |
323 | * the lowervp and then trying to reactivate it | |
7936eee0 | 324 | * with capabilities (v_id) |
11cda5c1 JH |
325 | * like they do in the name lookup cache code. |
326 | * That's too much work for now. | |
327 | */ | |
3fa2b388 | 328 | return (0); |
7279c039 JH |
329 | } |
330 | ||
30969417 | 331 | int |
3fa2b388 KM |
332 | null_reclaim(ap) |
333 | struct vop_reclaim_args /* { | |
334 | struct vnode *a_vp; | |
335 | } */ *ap; | |
7279c039 | 336 | { |
30969417 JH |
337 | struct vnode *vp = ap->a_vp; |
338 | struct null_node *xp = VTONULL(vp); | |
339 | struct vnode *lowervp = xp->null_lowervp; | |
340 | ||
30969417 | 341 | /* |
7936eee0 | 342 | * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p, |
30969417 JH |
343 | * so we can't call VOPs on ourself. |
344 | */ | |
345 | /* After this assignment, this node will not be re-used. */ | |
30969417 JH |
346 | xp->null_lowervp = NULL; |
347 | remque(xp); | |
348 | FREE(vp->v_data, M_TEMP); | |
349 | vp->v_data = NULL; | |
350 | vrele (lowervp); | |
3fa2b388 | 351 | return (0); |
7279c039 JH |
352 | } |
353 | ||
7936eee0 | 354 | |
30969417 | 355 | int |
3fa2b388 KM |
356 | null_print(ap) |
357 | struct vop_print_args /* { | |
358 | struct vnode *a_vp; | |
359 | } */ *ap; | |
7279c039 | 360 | { |
7936eee0 JH |
361 | register struct vnode *vp = ap->a_vp; |
362 | printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp)); | |
3fa2b388 | 363 | return (0); |
7279c039 JH |
364 | } |
365 | ||
7936eee0 JH |
366 | |
367 | /* | |
368 | * XXX - vop_strategy must be hand coded because it has no | |
369 | * vnode in its arguments. | |
370 | * This goes away with a merged VM/buffer cache. | |
371 | */ | |
30969417 | 372 | int |
3fa2b388 KM |
373 | null_strategy(ap) |
374 | struct vop_strategy_args /* { | |
375 | struct buf *a_bp; | |
376 | } */ *ap; | |
7279c039 | 377 | { |
7936eee0 | 378 | struct buf *bp = ap->a_bp; |
7279c039 | 379 | int error; |
11cda5c1 | 380 | struct vnode *savedvp; |
7279c039 | 381 | |
7936eee0 JH |
382 | savedvp = bp->b_vp; |
383 | bp->b_vp = NULLVPTOLOWERVP(bp->b_vp); | |
7279c039 | 384 | |
7936eee0 | 385 | error = VOP_STRATEGY(bp); |
7279c039 | 386 | |
7936eee0 | 387 | bp->b_vp = savedvp; |
7279c039 | 388 | |
3fa2b388 | 389 | return (error); |
7279c039 JH |
390 | } |
391 | ||
11cda5c1 | 392 | |
7936eee0 JH |
393 | /* |
394 | * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no | |
395 | * vnode in its arguments. | |
396 | * This goes away with a merged VM/buffer cache. | |
397 | */ | |
11cda5c1 | 398 | int |
3fa2b388 KM |
399 | null_bwrite(ap) |
400 | struct vop_bwrite_args /* { | |
401 | struct buf *a_bp; | |
402 | } */ *ap; | |
7279c039 | 403 | { |
7936eee0 JH |
404 | struct buf *bp = ap->a_bp; |
405 | int error; | |
406 | struct vnode *savedvp; | |
7279c039 | 407 | |
7936eee0 JH |
408 | savedvp = bp->b_vp; |
409 | bp->b_vp = NULLVPTOLOWERVP(bp->b_vp); | |
72d8e8b4 | 410 | |
7936eee0 JH |
411 | error = VOP_BWRITE(bp); |
412 | ||
413 | bp->b_vp = savedvp; | |
414 | ||
3fa2b388 | 415 | return (error); |
72d8e8b4 | 416 | } |
7936eee0 | 417 | |
7279c039 | 418 | /* |
11cda5c1 | 419 | * Global vfs data structures |
7279c039 | 420 | */ |
11cda5c1 | 421 | int (**null_vnodeop_p)(); |
c8b2716f | 422 | struct vnodeopv_entry_desc null_vnodeop_entries[] = { |
11cda5c1 | 423 | { &vop_default_desc, null_bypass }, |
7279c039 | 424 | |
11cda5c1 JH |
425 | { &vop_getattr_desc, null_getattr }, |
426 | { &vop_inactive_desc, null_inactive }, | |
427 | { &vop_reclaim_desc, null_reclaim }, | |
428 | { &vop_print_desc, null_print }, | |
7279c039 | 429 | |
11cda5c1 | 430 | { &vop_strategy_desc, null_strategy }, |
7936eee0 | 431 | { &vop_bwrite_desc, null_bwrite }, |
7279c039 | 432 | |
7279c039 JH |
433 | { (struct vnodeop_desc*)NULL, (int(*)())NULL } |
434 | }; | |
c8b2716f JH |
435 | struct vnodeopv_desc null_vnodeop_opv_desc = |
436 | { &null_vnodeop_p, null_vnodeop_entries }; |