from pseudo-device to option
[unix-history] / usr / src / sys / vm / vnode_pager.c
CommitLineData
619edcce
KM
1/*
2 * Copyright (c) 1990 University of Utah.
3 * Copyright (c) 1991 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the Systems Programming Group of the University of Utah Computer
8 * Science Department.
9 *
10 * %sccs.include.redist.c%
11 *
0ff212b4 12 * @(#)vnode_pager.c 7.16 (Berkeley) %G%
619edcce
KM
13 */
14
15/*
16 * Page to/from files (vnodes).
17 *
18 * TODO:
19 * pageouts
d73e4bbe 20 * fix credential use (uses current process credentials now)
619edcce 21 */
619edcce 22
719508e0
KB
23#include <sys/param.h>
24#include <sys/systm.h>
25#include <sys/proc.h>
26#include <sys/malloc.h>
27#include <sys/vnode.h>
28#include <sys/uio.h>
29#include <sys/mount.h>
619edcce 30
719508e0
KB
31#include <vm/vm.h>
32#include <vm/vm_page.h>
33#include <vm/vnode_pager.h>
619edcce
KM
34
35queue_head_t vnode_pager_list; /* list of managed vnodes */
36
37#ifdef DEBUG
38int vpagerdebug = 0x00;
39#define VDB_FOLLOW 0x01
40#define VDB_INIT 0x02
41#define VDB_IO 0x04
42#define VDB_FAIL 0x08
43#define VDB_ALLOC 0x10
44#define VDB_SIZE 0x20
45#endif
46
719508e0
KB
47static vm_pager_t vnode_pager_alloc __P((caddr_t, vm_size_t, vm_prot_t));
48static void vnode_pager_dealloc __P((vm_pager_t));
49static int vnode_pager_getpage
50 __P((vm_pager_t, vm_page_t, boolean_t));
51static boolean_t vnode_pager_haspage __P((vm_pager_t, vm_offset_t));
52static void vnode_pager_init __P((void));
53static int vnode_pager_io
54 __P((vn_pager_t, vm_page_t, enum uio_rw));
55static boolean_t vnode_pager_putpage
56 __P((vm_pager_t, vm_page_t, boolean_t));
57
58struct pagerops vnodepagerops = {
59 vnode_pager_init,
60 vnode_pager_alloc,
61 vnode_pager_dealloc,
62 vnode_pager_getpage,
63 vnode_pager_putpage,
64 vnode_pager_haspage
65};
66
67static void
619edcce
KM
68vnode_pager_init()
69{
70#ifdef DEBUG
71 if (vpagerdebug & VDB_FOLLOW)
72 printf("vnode_pager_init()\n");
73#endif
74 queue_init(&vnode_pager_list);
75}
76
77/*
78 * Allocate (or lookup) pager for a vnode.
79 * Handle is a vnode pointer.
80 */
719508e0 81static vm_pager_t
619edcce
KM
82vnode_pager_alloc(handle, size, prot)
83 caddr_t handle;
84 vm_size_t size;
85 vm_prot_t prot;
86{
87 register vm_pager_t pager;
88 register vn_pager_t vnp;
89 vm_object_t object;
90 struct vattr vattr;
91 struct vnode *vp;
9dbbaabe 92 struct proc *p = curproc; /* XXX */
619edcce
KM
93
94#ifdef DEBUG
95 if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC))
96 printf("vnode_pager_alloc(%x, %x, %x)\n", handle, size, prot);
97#endif
98 /*
99 * Pageout to vnode, no can do yet.
100 */
101 if (handle == NULL)
10375c3f 102 return(NULL);
619edcce
KM
103
104 /*
105 * Vnodes keep a pointer to any associated pager so no need to
106 * lookup with vm_pager_lookup.
107 */
108 vp = (struct vnode *)handle;
109 pager = (vm_pager_t)vp->v_vmdata;
10375c3f 110 if (pager == NULL) {
619edcce
KM
111 /*
112 * Allocate pager structures
113 */
114 pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
10375c3f
MK
115 if (pager == NULL)
116 return(NULL);
619edcce 117 vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK);
10375c3f 118 if (vnp == NULL) {
619edcce 119 free((caddr_t)pager, M_VMPAGER);
10375c3f 120 return(NULL);
619edcce
KM
121 }
122 /*
123 * And an object of the appropriate size
124 */
9dbbaabe 125 if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) {
619edcce
KM
126 object = vm_object_allocate(round_page(vattr.va_size));
127 vm_object_enter(object, pager);
128 vm_object_setpager(object, pager, 0, TRUE);
129 } else {
130 free((caddr_t)vnp, M_VMPGDATA);
131 free((caddr_t)pager, M_VMPAGER);
10375c3f 132 return(NULL);
619edcce
KM
133 }
134 /*
135 * Hold a reference to the vnode and initialize pager data.
136 */
137 VREF(vp);
138 vnp->vnp_flags = 0;
139 vnp->vnp_vp = vp;
140 vnp->vnp_size = vattr.va_size;
141 queue_enter(&vnode_pager_list, pager, vm_pager_t, pg_list);
142 pager->pg_handle = handle;
143 pager->pg_type = PG_VNODE;
144 pager->pg_ops = &vnodepagerops;
145 pager->pg_data = (caddr_t)vnp;
146 vp->v_vmdata = (caddr_t)pager;
147 } else {
148 /*
149 * vm_object_lookup() will remove the object from the
150 * cache if found and also gain a reference to the object.
151 */
152 object = vm_object_lookup(pager);
d73e4bbe 153#ifdef DEBUG
619edcce 154 vnp = (vn_pager_t)pager->pg_data;
d73e4bbe 155#endif
619edcce 156 }
619edcce
KM
157#ifdef DEBUG
158 if (vpagerdebug & VDB_ALLOC)
159 printf("vnode_pager_setup: vp %x sz %x pager %x object %x\n",
160 vp, vnp->vnp_size, pager, object);
161#endif
162 return(pager);
163}
164
719508e0 165static void
619edcce
KM
166vnode_pager_dealloc(pager)
167 vm_pager_t pager;
168{
169 register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
170 register struct vnode *vp;
9dbbaabe 171 struct proc *p = curproc; /* XXX */
619edcce
KM
172
173#ifdef DEBUG
174 if (vpagerdebug & VDB_FOLLOW)
175 printf("vnode_pager_dealloc(%x)\n", pager);
176#endif
177 if (vp = vnp->vnp_vp) {
178 vp->v_vmdata = NULL;
179 vp->v_flag &= ~VTEXT;
180#if 0
181 /* can hang if done at reboot on NFS FS */
9dbbaabe 182 (void) VOP_FSYNC(vp, p->p_ucred, p);
619edcce
KM
183#endif
184 vrele(vp);
185 }
186 queue_remove(&vnode_pager_list, pager, vm_pager_t, pg_list);
187 free((caddr_t)vnp, M_VMPGDATA);
188 free((caddr_t)pager, M_VMPAGER);
189}
190
719508e0 191static int
619edcce
KM
192vnode_pager_getpage(pager, m, sync)
193 vm_pager_t pager;
194 vm_page_t m;
195 boolean_t sync;
196{
197
198#ifdef DEBUG
199 if (vpagerdebug & VDB_FOLLOW)
200 printf("vnode_pager_getpage(%x, %x)\n", pager, m);
201#endif
202 return(vnode_pager_io((vn_pager_t)pager->pg_data, m, UIO_READ));
203}
204
719508e0 205static boolean_t
619edcce
KM
206vnode_pager_putpage(pager, m, sync)
207 vm_pager_t pager;
208 vm_page_t m;
209 boolean_t sync;
210{
211 int err;
212
213#ifdef DEBUG
214 if (vpagerdebug & VDB_FOLLOW)
215 printf("vnode_pager_putpage(%x, %x)\n", pager, m);
216#endif
10375c3f 217 if (pager == NULL)
ce9d2abc 218 return (FALSE); /* ??? */
619edcce
KM
219 err = vnode_pager_io((vn_pager_t)pager->pg_data, m, UIO_WRITE);
220 if (err == VM_PAGER_OK) {
2cbf9af3 221 m->flags |= PG_CLEAN; /* XXX - wrong place */
619edcce
KM
222 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); /* XXX - wrong place */
223 }
224 return(err);
225}
226
719508e0 227static boolean_t
619edcce
KM
228vnode_pager_haspage(pager, offset)
229 vm_pager_t pager;
230 vm_offset_t offset;
231{
232 register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
233 daddr_t bn;
234 int err;
235
236#ifdef DEBUG
237 if (vpagerdebug & VDB_FOLLOW)
238 printf("vnode_pager_haspage(%x, %x)\n", pager, offset);
239#endif
240
241 /*
242 * Offset beyond end of file, do not have the page
243 */
244 if (offset >= vnp->vnp_size) {
245#ifdef DEBUG
246 if (vpagerdebug & (VDB_FAIL|VDB_SIZE))
247 printf("vnode_pager_haspage: pg %x, off %x, size %x\n",
248 pager, offset, vnp->vnp_size);
249#endif
250 return(FALSE);
251 }
252
253 /*
254 * Read the index to find the disk block to read
255 * from. If there is no block, report that we don't
256 * have this data.
257 *
258 * Assumes that the vnode has whole page or nothing.
259 */
260 err = VOP_BMAP(vnp->vnp_vp,
a22e809c 261 offset / vnp->vnp_vp->v_mount->mnt_stat.f_iosize,
45ee8885 262 (struct vnode **)0, &bn, NULL);
619edcce
KM
263 if (err) {
264#ifdef DEBUG
265 if (vpagerdebug & VDB_FAIL)
266 printf("vnode_pager_haspage: BMAP err %d, pg %x, off %x\n",
267 err, pager, offset);
268#endif
269 return(TRUE);
270 }
271 return((long)bn < 0 ? FALSE : TRUE);
272}
273
274/*
275 * (XXX)
276 * Lets the VM system know about a change in size for a file.
277 * If this vnode is mapped into some address space (i.e. we have a pager
278 * for it) we adjust our own internal size and flush any cached pages in
279 * the associated object that are affected by the size change.
280 *
281 * Note: this routine may be invoked as a result of a pager put
282 * operation (possibly at object termination time), so we must be careful.
283 */
cc75e459 284void
619edcce
KM
285vnode_pager_setsize(vp, nsize)
286 struct vnode *vp;
287 u_long nsize;
288{
289 register vn_pager_t vnp;
290 register vm_object_t object;
291 vm_pager_t pager;
292
293 /*
294 * Not a mapped vnode
295 */
296 if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL)
297 return;
298 /*
299 * Hasn't changed size
300 */
301 pager = (vm_pager_t)vp->v_vmdata;
302 vnp = (vn_pager_t)pager->pg_data;
303 if (nsize == vnp->vnp_size)
304 return;
305 /*
306 * No object.
307 * This can happen during object termination since
308 * vm_object_page_clean is called after the object
309 * has been removed from the hash table, and clean
310 * may cause vnode write operations which can wind
311 * up back here.
312 */
313 object = vm_object_lookup(pager);
10375c3f 314 if (object == NULL)
619edcce
KM
315 return;
316
317#ifdef DEBUG
318 if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE))
319 printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n",
320 vp, object, vnp->vnp_size, nsize);
321#endif
322 /*
323 * File has shrunk.
324 * Toss any cached pages beyond the new EOF.
325 */
326 if (nsize < vnp->vnp_size) {
327 vm_object_lock(object);
328 vm_object_page_remove(object,
329 (vm_offset_t)nsize, vnp->vnp_size);
330 vm_object_unlock(object);
331 }
332 vnp->vnp_size = (vm_offset_t)nsize;
333 vm_object_deallocate(object);
334}
335
cc75e459 336void
619edcce
KM
337vnode_pager_umount(mp)
338 register struct mount *mp;
339{
340 register vm_pager_t pager, npager;
341 struct vnode *vp;
342
343 pager = (vm_pager_t) queue_first(&vnode_pager_list);
344 while (!queue_end(&vnode_pager_list, (queue_entry_t)pager)) {
345 /*
346 * Save the next pointer now since uncaching may
347 * terminate the object and render pager invalid
348 */
349 vp = ((vn_pager_t)pager->pg_data)->vnp_vp;
350 npager = (vm_pager_t) queue_next(&pager->pg_list);
351 if (mp == (struct mount *)0 || vp->v_mount == mp)
352 (void) vnode_pager_uncache(vp);
353 pager = npager;
354 }
355}
356
357/*
358 * Remove vnode associated object from the object cache.
359 *
360 * Note: this routine may be invoked as a result of a pager put
361 * operation (possibly at object termination time), so we must be careful.
362 */
cc75e459 363boolean_t
619edcce
KM
364vnode_pager_uncache(vp)
365 register struct vnode *vp;
366{
367 register vm_object_t object;
368 boolean_t uncached, locked;
369 vm_pager_t pager;
370
371 /*
372 * Not a mapped vnode
373 */
374 pager = (vm_pager_t)vp->v_vmdata;
10375c3f 375 if (pager == NULL)
619edcce
KM
376 return (TRUE);
377 /*
378 * Unlock the vnode if it is currently locked.
379 * We do this since uncaching the object may result
380 * in its destruction which may initiate paging
381 * activity which may necessitate locking the vnode.
382 */
383 locked = VOP_ISLOCKED(vp);
384 if (locked)
385 VOP_UNLOCK(vp);
386 /*
387 * Must use vm_object_lookup() as it actually removes
388 * the object from the cache list.
389 */
390 object = vm_object_lookup(pager);
391 if (object) {
392 uncached = (object->ref_count <= 1);
393 pager_cache(object, FALSE);
394 } else
395 uncached = TRUE;
396 if (locked)
397 VOP_LOCK(vp);
398 return(uncached);
399}
400
719508e0 401static int
619edcce
KM
402vnode_pager_io(vnp, m, rw)
403 register vn_pager_t vnp;
404 vm_page_t m;
405 enum uio_rw rw;
406{
407 struct uio auio;
408 struct iovec aiov;
409 vm_offset_t kva, foff;
410 int error, size;
9dbbaabe 411 struct proc *p = curproc; /* XXX */
619edcce
KM
412
413#ifdef DEBUG
414 if (vpagerdebug & VDB_FOLLOW)
415 printf("vnode_pager_io(%x, %x, %c): vnode %x\n",
416 vnp, m, rw == UIO_READ ? 'R' : 'W', vnp->vnp_vp);
417#endif
418 foff = m->offset + m->object->paging_offset;
419 /*
420 * Return failure if beyond current EOF
421 */
422 if (foff >= vnp->vnp_size) {
423#ifdef DEBUG
424 if (vpagerdebug & VDB_SIZE)
425 printf("vnode_pager_io: vp %x, off %d size %d\n",
426 vnp->vnp_vp, foff, vnp->vnp_size);
427#endif
428 return(VM_PAGER_BAD);
429 }
430 if (foff + PAGE_SIZE > vnp->vnp_size)
431 size = vnp->vnp_size - foff;
432 else
433 size = PAGE_SIZE;
434 /*
435 * Allocate a kernel virtual address and initialize so that
436 * we can use VOP_READ/WRITE routines.
437 */
438 kva = vm_pager_map_page(m);
439 aiov.iov_base = (caddr_t)kva;
440 aiov.iov_len = size;
441 auio.uio_iov = &aiov;
442 auio.uio_iovcnt = 1;
443 auio.uio_offset = foff;
444 auio.uio_segflg = UIO_SYSSPACE;
445 auio.uio_rw = rw;
446 auio.uio_resid = size;
9dbbaabe 447 auio.uio_procp = (struct proc *)0;
619edcce
KM
448#ifdef DEBUG
449 if (vpagerdebug & VDB_IO)
450 printf("vnode_pager_io: vp %x kva %x foff %x size %x",
451 vnp->vnp_vp, kva, foff, size);
452#endif
453 if (rw == UIO_READ)
9dbbaabe 454 error = VOP_READ(vnp->vnp_vp, &auio, 0, p->p_ucred);
619edcce 455 else
9dbbaabe 456 error = VOP_WRITE(vnp->vnp_vp, &auio, 0, p->p_ucred);
619edcce
KM
457#ifdef DEBUG
458 if (vpagerdebug & VDB_IO) {
459 if (error || auio.uio_resid)
460 printf(" returns error %x, resid %x",
461 error, auio.uio_resid);
462 printf("\n");
463 }
464#endif
465 if (!error) {
466 register int count = size - auio.uio_resid;
467
468 if (count == 0)
469 error = EINVAL;
470 else if (count != PAGE_SIZE && rw == UIO_READ)
719508e0 471 bzero((void *)(kva + count), PAGE_SIZE - count);
619edcce
KM
472 }
473 vm_pager_unmap_page(kva);
573e8f94 474 return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
619edcce 475}