This commit was generated by cvs2svn to track changes on a CVS vendor
[unix-history] / sys / vm / vnode_pager.c
CommitLineData
15637ed4
RG
1/*
2 * Copyright (c) 1990 University of Utah.
3 * Copyright (c) 1991 The Regents of the University of California.
4 * All rights reserved.
55768178 5 * Copyright (c) 1993 John S. Dyson
15637ed4
RG
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
1284e777 39 * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91
8d2d83f1 40 * $Id: vnode_pager.c,v 1.10 1994/01/31 04:22:01 davidg Exp $
15637ed4
RG
41 */
42
43/*
44 * Page to/from files (vnodes).
45 *
46 * TODO:
47 * pageouts
48 * fix credential use (uses current process credentials now)
49 */
15637ed4 50
55768178
DG
51/*
52 * MODIFICATIONS:
53 * John S. Dyson 08 Dec 93
54 *
55 * This file in conjunction with some vm_fault mods, eliminate the performance
56 * advantage for using the buffer cache and minimize memory copies.
57 *
58 * 1) Supports multiple - block reads
59 * 2) Bypasses buffer cache for reads
60 *
61 * TODO:
62 *
63 * 1) Totally bypass buffer cache for reads
64 * (Currently will still sometimes use buffer cache for reads)
65 * 2) Bypass buffer cache for writes
66 * (Code does not support it, but mods are simple)
67 */
68
15637ed4
RG
69#include "param.h"
70#include "proc.h"
71#include "malloc.h"
72#include "vnode.h"
73#include "uio.h"
74#include "mount.h"
75
76#include "vm_param.h"
55768178 77#include "vm.h"
15637ed4
RG
78#include "lock.h"
79#include "queue.h"
80#include "vm_prot.h"
81#include "vm_object.h"
82#include "vm_page.h"
83#include "vnode_pager.h"
55768178
DG
84#include "vm_map.h"
85#include "vm_pageout.h"
86#include "buf.h"
87#include "specdev.h"
15637ed4 88
bbc3f849
GW
89struct pagerops vnodepagerops = {
90 vnode_pager_init,
91 vnode_pager_alloc,
92 vnode_pager_dealloc,
93 vnode_pager_getpage,
55768178 94 vnode_pager_getmulti,
bbc3f849
GW
95 vnode_pager_putpage,
96 vnode_pager_haspage
97};
98
55768178
DG
99static int vnode_pager_io(vn_pager_t vnp, vm_page_t *m, int count, int reqpage,
100 enum uio_rw rw);
101struct buf * getpbuf() ;
102void relpbuf(struct buf *bp) ;
103
104extern vm_map_t pager_map;
fde1aeb2 105
15637ed4
RG
106queue_head_t vnode_pager_list; /* list of managed vnodes */
107
108#ifdef DEBUG
109int vpagerdebug = 0x00;
110#define VDB_FOLLOW 0x01
111#define VDB_INIT 0x02
112#define VDB_IO 0x04
113#define VDB_FAIL 0x08
114#define VDB_ALLOC 0x10
115#define VDB_SIZE 0x20
116#endif
117
118void
119vnode_pager_init()
120{
121#ifdef DEBUG
122 if (vpagerdebug & VDB_FOLLOW)
123 printf("vnode_pager_init()\n");
124#endif
125 queue_init(&vnode_pager_list);
126}
127
128/*
129 * Allocate (or lookup) pager for a vnode.
130 * Handle is a vnode pointer.
131 */
132vm_pager_t
ce619eaa 133vnode_pager_alloc(handle, size, prot, offset)
15637ed4
RG
134 caddr_t handle;
135 vm_size_t size;
136 vm_prot_t prot;
ce619eaa 137 vm_offset_t offset;
15637ed4
RG
138{
139 register vm_pager_t pager;
140 register vn_pager_t vnp;
141 vm_object_t object;
142 struct vattr vattr;
143 struct vnode *vp;
144 struct proc *p = curproc; /* XXX */
145
146#ifdef DEBUG
147 if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC))
148 printf("vnode_pager_alloc(%x, %x, %x)\n", handle, size, prot);
149#endif
150 /*
151 * Pageout to vnode, no can do yet.
152 */
153 if (handle == NULL)
154 return(NULL);
155
156 /*
157 * Vnodes keep a pointer to any associated pager so no need to
158 * lookup with vm_pager_lookup.
159 */
160 vp = (struct vnode *)handle;
161 pager = (vm_pager_t)vp->v_vmdata;
162 if (pager == NULL) {
163 /*
164 * Allocate pager structures
165 */
166 pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
167 if (pager == NULL)
168 return(NULL);
169 vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK);
170 if (vnp == NULL) {
171 free((caddr_t)pager, M_VMPAGER);
172 return(NULL);
173 }
174 /*
175 * And an object of the appropriate size
176 */
177 if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) {
178 object = vm_object_allocate(round_page(vattr.va_size));
179 vm_object_enter(object, pager);
180 vm_object_setpager(object, pager, 0, TRUE);
181 } else {
182 free((caddr_t)vnp, M_VMPGDATA);
183 free((caddr_t)pager, M_VMPAGER);
184 return(NULL);
185 }
186 /*
187 * Hold a reference to the vnode and initialize pager data.
188 */
189 VREF(vp);
190 vnp->vnp_flags = 0;
191 vnp->vnp_vp = vp;
192 vnp->vnp_size = vattr.va_size;
193 queue_enter(&vnode_pager_list, pager, vm_pager_t, pg_list);
194 pager->pg_handle = handle;
195 pager->pg_type = PG_VNODE;
196 pager->pg_ops = &vnodepagerops;
197 pager->pg_data = (caddr_t)vnp;
198 vp->v_vmdata = (caddr_t)pager;
199 } else {
200 /*
201 * vm_object_lookup() will remove the object from the
202 * cache if found and also gain a reference to the object.
203 */
204 object = vm_object_lookup(pager);
205#ifdef DEBUG
206 vnp = (vn_pager_t)pager->pg_data;
207#endif
208 }
209#ifdef DEBUG
210 if (vpagerdebug & VDB_ALLOC)
211 printf("vnode_pager_setup: vp %x sz %x pager %x object %x\n",
212 vp, vnp->vnp_size, pager, object);
213#endif
214 return(pager);
215}
216
217void
218vnode_pager_dealloc(pager)
219 vm_pager_t pager;
220{
221 register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
222 register struct vnode *vp;
223 struct proc *p = curproc; /* XXX */
224
225#ifdef DEBUG
226 if (vpagerdebug & VDB_FOLLOW)
227 printf("vnode_pager_dealloc(%x)\n", pager);
228#endif
229 if (vp = vnp->vnp_vp) {
230 vp->v_vmdata = NULL;
231 vp->v_flag &= ~VTEXT;
232#if 0
233 /* can hang if done at reboot on NFS FS */
234 (void) VOP_FSYNC(vp, p->p_ucred, p);
235#endif
236 vrele(vp);
237 }
238 queue_remove(&vnode_pager_list, pager, vm_pager_t, pg_list);
239 free((caddr_t)vnp, M_VMPGDATA);
240 free((caddr_t)pager, M_VMPAGER);
241}
242
55768178
DG
243int
244vnode_pager_getmulti(pager, m, count, reqpage, sync)
245 vm_pager_t pager;
246 vm_page_t *m;
247 int count;
248 int reqpage;
249 boolean_t sync;
250{
251
252 return vnode_pager_io((vn_pager_t) pager->pg_data, m, count, reqpage, UIO_READ);
253}
254
255
4c45483e 256int
15637ed4
RG
257vnode_pager_getpage(pager, m, sync)
258 vm_pager_t pager;
259 vm_page_t m;
260 boolean_t sync;
261{
262
55768178
DG
263 int err;
264 vm_page_t marray[1];
15637ed4
RG
265#ifdef DEBUG
266 if (vpagerdebug & VDB_FOLLOW)
267 printf("vnode_pager_getpage(%x, %x)\n", pager, m);
268#endif
55768178
DG
269 if (pager == NULL)
270 return FALSE;
271 marray[0] = m;
272
273 return vnode_pager_io((vn_pager_t)pager->pg_data, marray, 1, 0, UIO_READ);
15637ed4
RG
274}
275
276boolean_t
277vnode_pager_putpage(pager, m, sync)
278 vm_pager_t pager;
279 vm_page_t m;
280 boolean_t sync;
281{
282 int err;
55768178 283 vm_page_t marray[1];
15637ed4
RG
284
285#ifdef DEBUG
286 if (vpagerdebug & VDB_FOLLOW)
287 printf("vnode_pager_putpage(%x, %x)\n", pager, m);
288#endif
289 if (pager == NULL)
55768178
DG
290 return FALSE;
291 marray[0] = m;
292 err = vnode_pager_io((vn_pager_t)pager->pg_data, marray, 1, 0, UIO_WRITE);
293 return err;
15637ed4
RG
294}
295
296boolean_t
297vnode_pager_haspage(pager, offset)
298 vm_pager_t pager;
299 vm_offset_t offset;
300{
301 register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
302 daddr_t bn;
303 int err;
304
305#ifdef DEBUG
306 if (vpagerdebug & VDB_FOLLOW)
307 printf("vnode_pager_haspage(%x, %x)\n", pager, offset);
308#endif
309
310 /*
311 * Offset beyond end of file, do not have the page
312 */
313 if (offset >= vnp->vnp_size) {
314#ifdef DEBUG
315 if (vpagerdebug & (VDB_FAIL|VDB_SIZE))
316 printf("vnode_pager_haspage: pg %x, off %x, size %x\n",
317 pager, offset, vnp->vnp_size);
318#endif
319 return(FALSE);
320 }
321
322 /*
323 * Read the index to find the disk block to read
324 * from. If there is no block, report that we don't
325 * have this data.
326 *
327 * Assumes that the vnode has whole page or nothing.
328 */
329 err = VOP_BMAP(vnp->vnp_vp,
330 offset / vnp->vnp_vp->v_mount->mnt_stat.f_bsize,
331 (struct vnode **)0, &bn);
332 if (err) {
333#ifdef DEBUG
334 if (vpagerdebug & VDB_FAIL)
335 printf("vnode_pager_haspage: BMAP err %d, pg %x, off %x\n",
336 err, pager, offset);
337#endif
338 return(TRUE);
339 }
340 return((long)bn < 0 ? FALSE : TRUE);
341}
342
343/*
344 * (XXX)
345 * Lets the VM system know about a change in size for a file.
346 * If this vnode is mapped into some address space (i.e. we have a pager
347 * for it) we adjust our own internal size and flush any cached pages in
348 * the associated object that are affected by the size change.
349 *
350 * Note: this routine may be invoked as a result of a pager put
351 * operation (possibly at object termination time), so we must be careful.
352 */
4c45483e 353void
15637ed4
RG
354vnode_pager_setsize(vp, nsize)
355 struct vnode *vp;
356 u_long nsize;
357{
358 register vn_pager_t vnp;
359 register vm_object_t object;
360 vm_pager_t pager;
361
362 /*
363 * Not a mapped vnode
364 */
365 if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL)
366 return;
367 /*
368 * Hasn't changed size
369 */
370 pager = (vm_pager_t)vp->v_vmdata;
371 vnp = (vn_pager_t)pager->pg_data;
372 if (nsize == vnp->vnp_size)
373 return;
374 /*
375 * No object.
376 * This can happen during object termination since
377 * vm_object_page_clean is called after the object
378 * has been removed from the hash table, and clean
379 * may cause vnode write operations which can wind
380 * up back here.
381 */
382 object = vm_object_lookup(pager);
383 if (object == NULL)
384 return;
385
386#ifdef DEBUG
387 if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE))
388 printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n",
389 vp, object, vnp->vnp_size, nsize);
390#endif
391 /*
392 * File has shrunk.
393 * Toss any cached pages beyond the new EOF.
394 */
55768178 395 nsize = round_page(nsize);
15637ed4
RG
396 if (nsize < vnp->vnp_size) {
397 vm_object_lock(object);
398 vm_object_page_remove(object,
399 (vm_offset_t)nsize, vnp->vnp_size);
400 vm_object_unlock(object);
401 }
402 vnp->vnp_size = (vm_offset_t)nsize;
403 vm_object_deallocate(object);
404}
405
4c45483e 406void
15637ed4
RG
407vnode_pager_umount(mp)
408 register struct mount *mp;
409{
410 register vm_pager_t pager, npager;
411 struct vnode *vp;
412
413 pager = (vm_pager_t) queue_first(&vnode_pager_list);
414 while (!queue_end(&vnode_pager_list, (queue_entry_t)pager)) {
415 /*
416 * Save the next pointer now since uncaching may
417 * terminate the object and render pager invalid
418 */
419 vp = ((vn_pager_t)pager->pg_data)->vnp_vp;
420 npager = (vm_pager_t) queue_next(&pager->pg_list);
421 if (mp == (struct mount *)0 || vp->v_mount == mp)
422 (void) vnode_pager_uncache(vp);
423 pager = npager;
424 }
425}
426
427/*
428 * Remove vnode associated object from the object cache.
429 *
430 * Note: this routine may be invoked as a result of a pager put
431 * operation (possibly at object termination time), so we must be careful.
432 */
433boolean_t
434vnode_pager_uncache(vp)
435 register struct vnode *vp;
436{
437 register vm_object_t object;
438 boolean_t uncached, locked;
439 vm_pager_t pager;
440
441 /*
442 * Not a mapped vnode
443 */
444 pager = (vm_pager_t)vp->v_vmdata;
445 if (pager == NULL)
446 return (TRUE);
447 /*
448 * Unlock the vnode if it is currently locked.
449 * We do this since uncaching the object may result
450 * in its destruction which may initiate paging
451 * activity which may necessitate locking the vnode.
452 */
453 locked = VOP_ISLOCKED(vp);
454 if (locked)
455 VOP_UNLOCK(vp);
456 /*
457 * Must use vm_object_lookup() as it actually removes
458 * the object from the cache list.
459 */
460 object = vm_object_lookup(pager);
461 if (object) {
462 uncached = (object->ref_count <= 1);
463 pager_cache(object, FALSE);
464 } else
465 uncached = TRUE;
466 if (locked)
467 VOP_LOCK(vp);
468 return(uncached);
469}
470
55768178
DG
471
472void
473vnode_pager_freepage(m)
15637ed4 474 vm_page_t m;
55768178
DG
475{
476 PAGE_WAKEUP(m);
477 vm_page_free(m);
478}
479
480/*
481 * calculate the linear (byte) disk address of specified virtual
482 * file address
483 */
484vm_offset_t
485vnode_pager_addr(vp, address)
486 struct vnode *vp;
487 vm_offset_t address;
488{
489 int rtaddress;
490 int bsize;
491 vm_offset_t block;
492 struct vnode *rtvp;
493 int err;
494 int vblock, voffset;
495
496 bsize = vp->v_mount->mnt_stat.f_bsize;
497 vblock = address / bsize;
498 voffset = address % bsize;
499
500 err = VOP_BMAP(vp,vblock,&rtvp,&block);
501
502 rtaddress = block * DEV_BSIZE + voffset;
503
504 return rtaddress;
505}
506
507/*
508 * interrupt routine for I/O completion
509 */
510void
511vnode_pager_iodone(bp)
512 struct buf *bp;
513{
514 bp->b_flags |= B_DONE;
515 wakeup((caddr_t)bp);
516}
517
518/*
519 * vnode_pager_io:
520 * Perform read or write operation for vnode_paging
521 *
522 * args:
523 * vnp -- pointer to vnode pager data structure
524 * containing size and vnode pointer, etc
525 *
526 * m -- pointer to array of vm_page_t entries to
527 * do I/O to. It is not necessary to fill any
528 * pages except for the reqpage entry. If a
529 * page is not filled, it needs to be removed
530 * from its object...
531 *
532 * count -- number of pages for I/O
533 *
534 * reqpage -- fault requested page for I/O
535 * (index into vm_page_t entries above)
536 *
537 * rw -- UIO_READ or UIO_WRITE
538 *
539 * NOTICE!!!! direct writes look like that they are close to being
540 * implemented. They are not really, several things need
541 * to be done to make it work (subtile things.) Hack at
542 * your own risk (direct writes are scarey).
ce619eaa
DG
543 *
544 * ANOTHER NOTICE!!!!
545 * we currently only support direct I/O to filesystems whose
546 * contiguously allocated blocksize is at least a vm page.
547 * changes will be made in the future to support more flexibility.
55768178
DG
548 */
549
550int
551vnode_pager_io(vnp, m, count, reqpage, rw)
552 register vn_pager_t vnp;
553 vm_page_t *m;
554 int count, reqpage;
15637ed4
RG
555 enum uio_rw rw;
556{
55768178 557 int i,j;
15637ed4
RG
558 struct uio auio;
559 struct iovec aiov;
560 vm_offset_t kva, foff;
55768178 561 int size;
15637ed4 562 struct proc *p = curproc; /* XXX */
55768178
DG
563 vm_object_t object;
564 vm_offset_t paging_offset;
565 struct vnode *dp, *vp;
566 vm_offset_t mapsize;
567 int bsize;
568 int errtype=0; /* 0 is file type otherwise vm type */
569 int error = 0;
570
8d2d83f1 571 object = m[reqpage]->object; /* all vm_page_t items are in same object */
55768178 572 paging_offset = object->paging_offset;
15637ed4 573
15637ed4 574 /*
55768178 575 * get the UNDERLYING device for the file
15637ed4 576 */
55768178
DG
577 vp = vnp->vnp_vp;
578 bsize = vp->v_mount->mnt_stat.f_bsize;
579 VOP_BMAP(vp, 0, &dp, 0);
580
581 /*
582 * trim off unnecessary pages
583 */
584 for (i = reqpage - 1; i >= 0; --i) {
585 if (m[i]->object != object) {
586 for (j = 0; j <= i; j++)
587 vnode_pager_freepage(m[j]);
588 for (j = i + 1; j < count; j++) {
589 m[j - (i + 1)] = m[j];
590 }
591 count -= i + 1;
592 reqpage -= i + 1;
593 break;
594 }
15637ed4 595 }
55768178
DG
596 for (i = reqpage + 1; i < count; i++) {
597 if ((m[i]->object != object) ||
598 (m[i]->offset + paging_offset >= vnp->vnp_size)) {
599 for (j = i; j < count; j++)
600 vnode_pager_freepage(m[j]);
601 count = i;
602 break;
603 }
604 }
605
606 /*
607 * we only do direct I/O if the file is on a local
608 * BLOCK device and currently if it is a read operation only.
609 */
610
611 kva = 0;
612 mapsize = 0;
613 if (rw == UIO_READ && dp->v_type == VBLK &&
614 vp->v_mount->mnt_stat.f_type == MOUNT_UFS) {
615 /*
616 * we do not block for a kva, notice we default to a kva conservative behavior
617 */
618 kva = kmem_alloc_pageable(pager_map, (mapsize = count*NBPG));
619 }
620
621 if (!kva) {
622 /*
623 * here on I/O through VFS
624 */
55768178 625 for (i = 0; i < count; i++) {
8d2d83f1
DG
626 if (i != reqpage) {
627 vnode_pager_freepage(m[i]);
628 m[i] = 0;
629 }
630 }
631 m[0] = m[reqpage];
632 foff = m[0]->offset + paging_offset;
633 reqpage = 0;
634 count = 1;
55768178
DG
635 /*
636 * Return failure if beyond current EOF
637 */
8d2d83f1
DG
638 if (foff >= vnp->vnp_size) {
639 errtype = 1;
640 error = VM_PAGER_BAD;
641 } else {
55768178
DG
642 if (foff + NBPG > vnp->vnp_size)
643 size = vnp->vnp_size - foff;
644 else
645 size = NBPG;
8d2d83f1
DG
646/*
647 * Allocate a kernel virtual address and initialize so that
648 * we can use VOP_READ/WRITE routines.
649 */
650 kva = vm_pager_map_page(m[0]);
55768178
DG
651 aiov.iov_base = (caddr_t)kva;
652 aiov.iov_len = size;
653 auio.uio_iov = &aiov;
654 auio.uio_iovcnt = 1;
655 auio.uio_offset = foff;
656 auio.uio_segflg = UIO_SYSSPACE;
657 auio.uio_rw = rw;
658 auio.uio_resid = size;
659 auio.uio_procp = (struct proc *)0;
660 if (rw == UIO_READ) {
661 error = VOP_READ(vp, &auio, IO_PAGER, p->p_ucred);
662 } else {
663 error = VOP_WRITE(vp, &auio, IO_PAGER, p->p_ucred);
664 }
665 if (!error) {
666 register int count = size - auio.uio_resid;
8d2d83f1 667
55768178
DG
668 if (count == 0)
669 error = EINVAL;
670 else if (count != NBPG && rw == UIO_READ)
671 bzero((caddr_t)kva + count, NBPG - count);
672 }
673 vm_pager_unmap_page(kva);
674 }
675 } else {
676
677 /*
678 * here on direct device I/O
679 */
680 int first=0, last=count;
681 int reqaddr, firstaddr;
682 int block, offset;
683
684 struct buf *bp;
685 int s;
686 int failflag;
687
688 foff = m[reqpage]->offset + paging_offset;
689
690 /*
691 * This pathetic hack gets data from the buffer cache, if it's there.
692 * I believe that this is not really necessary, and the ends can
693 * be gotten by defaulting to the normal vfs read behavior, but this
694 * might be more efficient, because the will NOT invoke read-aheads
695 * and one of the purposes of this code is to bypass the buffer
696 * cache and keep from flushing it by reading in a program.
697 */
698 /*
699 * calculate logical block and offset
700 */
701 block = foff / bsize;
702 offset = foff % bsize;
703 s = splbio();
704
705 /*
706 * if we have a buffer in core, then try to use it
707 */
708 while (bp = incore(vp, block)) {
709 int amount;
710
711 /*
712 * wait until the buffer is avail or gone
713 */
714 if (bp->b_flags & B_BUSY) {
715 bp->b_flags |= B_WANTED;
716 tsleep ((caddr_t)bp, PVM, "vnwblk", 0);
717 continue;
718 }
719
720 amount = NBPG;
721 if ((foff + amount) > vnp->vnp_size)
722 amount = vnp->vnp_size - foff;
723
724 /*
725 * make sure that this page is in the buffer
726 */
727 if ((amount > 0) && (offset + amount) <= bp->b_bcount) {
728 bp->b_flags |= B_BUSY;
729 splx(s);
730
731 /*
732 * map the requested page
733 */
734 pmap_enter(vm_map_pmap(pager_map),
735 kva, VM_PAGE_TO_PHYS(m[reqpage]),
736 VM_PROT_DEFAULT, TRUE);
737 /*
738 * copy the data from the buffer
739 */
740 bcopy(bp->b_un.b_addr + offset, (caddr_t)kva, amount);
741 if (amount < NBPG) {
742 bzero((caddr_t)kva + amount, NBPG - amount);
743 }
744 /*
745 * unmap the page and free the kva
746 */
747 pmap_remove(vm_map_pmap(pager_map), kva, kva + NBPG);
748 kmem_free_wakeup(pager_map, kva, mapsize);
749 /*
750 * release the buffer back to the block subsystem
751 */
752 bp->b_flags &= ~B_BUSY;
753 wakeup((caddr_t)bp);
754 /*
755 * we did not have to do any work to get the requested
756 * page, the read behind/ahead does not justify a read
757 */
758 for (i = 0; i < count; i++) {
759 if (i != reqpage) {
760 vnode_pager_freepage(m[i]);
761 m[i] = 0;
762 }
763 }
764 /*
765 * sorry for the goto
766 */
767 goto finishup;
768 }
769 /*
770 * buffer is nowhere to be found, read from the disk
771 */
772 break;
773 }
55768178
DG
774
775 foff = m[reqpage]->offset + paging_offset;
776 reqaddr = vnode_pager_addr(vp, foff);
777 /*
778 * Make sure that our I/O request is contiguous.
779 * Scan backward and stop for the first discontiguous
780 * entry or stop for a page being in buffer cache.
781 */
782 failflag = 0;
783 for (i = reqpage - 1; i >= 0; --i) {
784 int myaddr;
785 if (failflag ||
786 incore(vp, (foff + (i - reqpage) * NBPG) / bsize) ||
787 (myaddr = vnode_pager_addr(vp, m[i]->offset + paging_offset))
788 != reqaddr + (i - reqpage) * NBPG) {
789 vnode_pager_freepage(m[i]);
790 m[i] = 0;
791 if (first == 0)
792 first = i + 1;
793 failflag = 1;
794 }
795 }
796
797 /*
798 * Scan forward and stop for the first discontiguous
799 * entry or stop for a page being in buffer cache.
800 */
801 failflag = 0;
802 for (i = reqpage + 1; i < count; i++) {
803 int myaddr;
804 if (failflag ||
805 incore(vp, (foff + (i - reqpage) * NBPG) / bsize) ||
806 (myaddr = vnode_pager_addr(vp, m[i]->offset + paging_offset))
807 != reqaddr + (i - reqpage) * NBPG) {
808 vnode_pager_freepage(m[i]);
809 m[i] = 0;
810 if (last == count)
811 last = i;
812 failflag = 1;
813 }
814 }
815
816 /*
817 * the first and last page have been calculated now, move input
818 * pages to be zero based...
819 */
820 count = last;
821 if (first != 0) {
822 for (i = first; i < count; i++) {
823 m[i - first] = m[i];
824 }
825 count -= first;
826 reqpage -= first;
827 }
828
829
830 /*
831 * calculate the file virtual address for the transfer
832 */
833 foff = m[0]->offset + paging_offset;
834 /*
835 * and get the disk physical address (in bytes)
836 */
837 firstaddr = vnode_pager_addr(vp, foff);
838
839 /*
840 * calculate the size of the transfer
841 */
842 if ((m[count - 1]->offset + paging_offset) + NBPG > vnp->vnp_size)
843 size = vnp->vnp_size - foff;
844 else
845 size = count * NBPG;
846
847
848 /*
849 * and map the pages to be read into the kva
850 */
851 for (i = 0; i < count; i++)
852 pmap_enter(vm_map_pmap(pager_map),
853 kva + NBPG * i, VM_PAGE_TO_PHYS(m[i]),
854 VM_PROT_DEFAULT, TRUE);
855 VHOLD(vp);
856 bp = getpbuf();
857
858 /* build a minimal buffer header */
859 bzero((caddr_t)bp, sizeof(struct buf));
860 bp->b_flags = B_BUSY | B_READ | B_CALL;
861 bp->b_iodone = vnode_pager_iodone;
862 /* B_PHYS is not set, but it is nice to fill this in */
863 bp->b_proc = &proc0;
864 bp->b_un.b_addr = (caddr_t) kva;
865 bp->b_blkno = firstaddr / DEV_BSIZE;
866 bp->b_vp = dp;
867
868 /* Should be a BLOCK or character DEVICE if we get here */
869 bp->b_dev = dp->v_rdev;
870 bp->b_bcount = NBPG * count;
871
872 /* do the input */
873 VOP_STRATEGY(bp);
874
875 /* we definitely need to be at splbio here */
876
877 while ((bp->b_flags & B_DONE) == 0) {
878 tsleep((caddr_t)bp, PVM, "vnread", 0);
879 }
880 splx(s);
881 if ((bp->b_flags & B_ERROR) != 0)
882 error = EIO;
883
884 if (!error) {
885 if (size != count * NBPG)
886 bzero((caddr_t)kva + size, NBPG * count - size);
887 }
888 HOLDRELE(vp);
889
890 pmap_remove(vm_map_pmap(pager_map), kva, kva + NBPG * count);
891 kmem_free_wakeup(pager_map, kva, mapsize);
892
893 /*
894 * free the buffer header back to the swap buffer pool
895 */
896 relpbuf(bp);
897
15637ed4 898 }
15637ed4 899
55768178
DG
900finishup:
901 if (rw == UIO_READ)
902 for (i = 0; i < count; i++) {
903 /*
904 * we dont mess with pages that have been already
905 * deallocated....
906 */
907 if (!m[i])
908 continue;
909 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
910 m[i]->flags |= PG_CLEAN;
911 m[i]->flags &= ~PG_LAUNDRY;
912 if (i != reqpage) {
913 /*
914 * whether or not to leave the page activated
915 * is up in the air, but we should put the page
916 * on a page queue somewhere. (it already is in
917 * the object).
a200ca2b
DG
918 * Result: It appears that emperical results show
919 * that deactivating pages is best.
55768178
DG
920 */
921 /*
922 * just in case someone was asking for this
923 * page we now tell them that it is ok to use
924 */
925 if (!error) {
a200ca2b 926 vm_page_deactivate(m[i]);
55768178
DG
927 PAGE_WAKEUP(m[i]);
928 m[i]->flags &= ~PG_FAKE;
929 } else {
930 vnode_pager_freepage(m[i]);
931 }
932 }
933 }
934 if (!error && rw == UIO_WRITE) {
935 pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage]));
936 m[reqpage]->flags |= PG_CLEAN;
937 m[reqpage]->flags &= ~PG_LAUNDRY;
938 }
939 if (error) {
940 printf("vnode pager error: %d\n", error);
15637ed4 941 }
55768178
DG
942 if (errtype)
943 return error;
15637ed4
RG
944 return (error ? VM_PAGER_FAIL : VM_PAGER_OK);
945}