Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /* |
2 | * Copyright (c) 1990 University of Utah. | |
3 | * Copyright (c) 1991 The Regents of the University of California. | |
4 | * All rights reserved. | |
55768178 | 5 | * Copyright (c) 1993 John S. Dyson |
15637ed4 RG |
6 | * |
7 | * This code is derived from software contributed to Berkeley by | |
8 | * the Systems Programming Group of the University of Utah Computer | |
9 | * Science Department. | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | |
14 | * 1. Redistributions of source code must retain the above copyright | |
15 | * notice, this list of conditions and the following disclaimer. | |
16 | * 2. Redistributions in binary form must reproduce the above copyright | |
17 | * notice, this list of conditions and the following disclaimer in the | |
18 | * documentation and/or other materials provided with the distribution. | |
19 | * 3. All advertising materials mentioning features or use of this software | |
20 | * must display the following acknowledgement: | |
21 | * This product includes software developed by the University of | |
22 | * California, Berkeley and its contributors. | |
23 | * 4. Neither the name of the University nor the names of its contributors | |
24 | * may be used to endorse or promote products derived from this software | |
25 | * without specific prior written permission. | |
26 | * | |
27 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
28 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
29 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
30 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
32 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
33 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
34 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
36 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
37 | * SUCH DAMAGE. | |
38 | * | |
1284e777 | 39 | * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 |
8d2d83f1 | 40 | * $Id: vnode_pager.c,v 1.10 1994/01/31 04:22:01 davidg Exp $ |
15637ed4 RG |
41 | */ |
42 | ||
43 | /* | |
44 | * Page to/from files (vnodes). | |
45 | * | |
46 | * TODO: | |
47 | * pageouts | |
48 | * fix credential use (uses current process credentials now) | |
49 | */ | |
15637ed4 | 50 | |
55768178 DG |
51 | /* |
52 | * MODIFICATIONS: | |
53 | * John S. Dyson 08 Dec 93 | |
54 | * | |
55 | * This file in conjunction with some vm_fault mods, eliminate the performance | |
56 | * advantage for using the buffer cache and minimize memory copies. | |
57 | * | |
58 | * 1) Supports multiple - block reads | |
59 | * 2) Bypasses buffer cache for reads | |
60 | * | |
61 | * TODO: | |
62 | * | |
63 | * 1) Totally bypass buffer cache for reads | |
64 | * (Currently will still sometimes use buffer cache for reads) | |
65 | * 2) Bypass buffer cache for writes | |
66 | * (Code does not support it, but mods are simple) | |
67 | */ | |
68 | ||
15637ed4 RG |
69 | #include "param.h" |
70 | #include "proc.h" | |
71 | #include "malloc.h" | |
72 | #include "vnode.h" | |
73 | #include "uio.h" | |
74 | #include "mount.h" | |
75 | ||
76 | #include "vm_param.h" | |
55768178 | 77 | #include "vm.h" |
15637ed4 RG |
78 | #include "lock.h" |
79 | #include "queue.h" | |
80 | #include "vm_prot.h" | |
81 | #include "vm_object.h" | |
82 | #include "vm_page.h" | |
83 | #include "vnode_pager.h" | |
55768178 DG |
84 | #include "vm_map.h" |
85 | #include "vm_pageout.h" | |
86 | #include "buf.h" | |
87 | #include "specdev.h" | |
15637ed4 | 88 | |
bbc3f849 GW |
89 | struct pagerops vnodepagerops = { |
90 | vnode_pager_init, | |
91 | vnode_pager_alloc, | |
92 | vnode_pager_dealloc, | |
93 | vnode_pager_getpage, | |
55768178 | 94 | vnode_pager_getmulti, |
bbc3f849 GW |
95 | vnode_pager_putpage, |
96 | vnode_pager_haspage | |
97 | }; | |
98 | ||
55768178 DG |
99 | static int vnode_pager_io(vn_pager_t vnp, vm_page_t *m, int count, int reqpage, |
100 | enum uio_rw rw); | |
101 | struct buf * getpbuf() ; | |
102 | void relpbuf(struct buf *bp) ; | |
103 | ||
104 | extern vm_map_t pager_map; | |
fde1aeb2 | 105 | |
15637ed4 RG |
106 | queue_head_t vnode_pager_list; /* list of managed vnodes */ |
107 | ||
108 | #ifdef DEBUG | |
109 | int vpagerdebug = 0x00; | |
110 | #define VDB_FOLLOW 0x01 | |
111 | #define VDB_INIT 0x02 | |
112 | #define VDB_IO 0x04 | |
113 | #define VDB_FAIL 0x08 | |
114 | #define VDB_ALLOC 0x10 | |
115 | #define VDB_SIZE 0x20 | |
116 | #endif | |
117 | ||
118 | void | |
119 | vnode_pager_init() | |
120 | { | |
121 | #ifdef DEBUG | |
122 | if (vpagerdebug & VDB_FOLLOW) | |
123 | printf("vnode_pager_init()\n"); | |
124 | #endif | |
125 | queue_init(&vnode_pager_list); | |
126 | } | |
127 | ||
128 | /* | |
129 | * Allocate (or lookup) pager for a vnode. | |
130 | * Handle is a vnode pointer. | |
131 | */ | |
132 | vm_pager_t | |
ce619eaa | 133 | vnode_pager_alloc(handle, size, prot, offset) |
15637ed4 RG |
134 | caddr_t handle; |
135 | vm_size_t size; | |
136 | vm_prot_t prot; | |
ce619eaa | 137 | vm_offset_t offset; |
15637ed4 RG |
138 | { |
139 | register vm_pager_t pager; | |
140 | register vn_pager_t vnp; | |
141 | vm_object_t object; | |
142 | struct vattr vattr; | |
143 | struct vnode *vp; | |
144 | struct proc *p = curproc; /* XXX */ | |
145 | ||
146 | #ifdef DEBUG | |
147 | if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC)) | |
148 | printf("vnode_pager_alloc(%x, %x, %x)\n", handle, size, prot); | |
149 | #endif | |
150 | /* | |
151 | * Pageout to vnode, no can do yet. | |
152 | */ | |
153 | if (handle == NULL) | |
154 | return(NULL); | |
155 | ||
156 | /* | |
157 | * Vnodes keep a pointer to any associated pager so no need to | |
158 | * lookup with vm_pager_lookup. | |
159 | */ | |
160 | vp = (struct vnode *)handle; | |
161 | pager = (vm_pager_t)vp->v_vmdata; | |
162 | if (pager == NULL) { | |
163 | /* | |
164 | * Allocate pager structures | |
165 | */ | |
166 | pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK); | |
167 | if (pager == NULL) | |
168 | return(NULL); | |
169 | vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); | |
170 | if (vnp == NULL) { | |
171 | free((caddr_t)pager, M_VMPAGER); | |
172 | return(NULL); | |
173 | } | |
174 | /* | |
175 | * And an object of the appropriate size | |
176 | */ | |
177 | if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) { | |
178 | object = vm_object_allocate(round_page(vattr.va_size)); | |
179 | vm_object_enter(object, pager); | |
180 | vm_object_setpager(object, pager, 0, TRUE); | |
181 | } else { | |
182 | free((caddr_t)vnp, M_VMPGDATA); | |
183 | free((caddr_t)pager, M_VMPAGER); | |
184 | return(NULL); | |
185 | } | |
186 | /* | |
187 | * Hold a reference to the vnode and initialize pager data. | |
188 | */ | |
189 | VREF(vp); | |
190 | vnp->vnp_flags = 0; | |
191 | vnp->vnp_vp = vp; | |
192 | vnp->vnp_size = vattr.va_size; | |
193 | queue_enter(&vnode_pager_list, pager, vm_pager_t, pg_list); | |
194 | pager->pg_handle = handle; | |
195 | pager->pg_type = PG_VNODE; | |
196 | pager->pg_ops = &vnodepagerops; | |
197 | pager->pg_data = (caddr_t)vnp; | |
198 | vp->v_vmdata = (caddr_t)pager; | |
199 | } else { | |
200 | /* | |
201 | * vm_object_lookup() will remove the object from the | |
202 | * cache if found and also gain a reference to the object. | |
203 | */ | |
204 | object = vm_object_lookup(pager); | |
205 | #ifdef DEBUG | |
206 | vnp = (vn_pager_t)pager->pg_data; | |
207 | #endif | |
208 | } | |
209 | #ifdef DEBUG | |
210 | if (vpagerdebug & VDB_ALLOC) | |
211 | printf("vnode_pager_setup: vp %x sz %x pager %x object %x\n", | |
212 | vp, vnp->vnp_size, pager, object); | |
213 | #endif | |
214 | return(pager); | |
215 | } | |
216 | ||
217 | void | |
218 | vnode_pager_dealloc(pager) | |
219 | vm_pager_t pager; | |
220 | { | |
221 | register vn_pager_t vnp = (vn_pager_t)pager->pg_data; | |
222 | register struct vnode *vp; | |
223 | struct proc *p = curproc; /* XXX */ | |
224 | ||
225 | #ifdef DEBUG | |
226 | if (vpagerdebug & VDB_FOLLOW) | |
227 | printf("vnode_pager_dealloc(%x)\n", pager); | |
228 | #endif | |
229 | if (vp = vnp->vnp_vp) { | |
230 | vp->v_vmdata = NULL; | |
231 | vp->v_flag &= ~VTEXT; | |
232 | #if 0 | |
233 | /* can hang if done at reboot on NFS FS */ | |
234 | (void) VOP_FSYNC(vp, p->p_ucred, p); | |
235 | #endif | |
236 | vrele(vp); | |
237 | } | |
238 | queue_remove(&vnode_pager_list, pager, vm_pager_t, pg_list); | |
239 | free((caddr_t)vnp, M_VMPGDATA); | |
240 | free((caddr_t)pager, M_VMPAGER); | |
241 | } | |
242 | ||
55768178 DG |
243 | int |
244 | vnode_pager_getmulti(pager, m, count, reqpage, sync) | |
245 | vm_pager_t pager; | |
246 | vm_page_t *m; | |
247 | int count; | |
248 | int reqpage; | |
249 | boolean_t sync; | |
250 | { | |
251 | ||
252 | return vnode_pager_io((vn_pager_t) pager->pg_data, m, count, reqpage, UIO_READ); | |
253 | } | |
254 | ||
255 | ||
4c45483e | 256 | int |
15637ed4 RG |
257 | vnode_pager_getpage(pager, m, sync) |
258 | vm_pager_t pager; | |
259 | vm_page_t m; | |
260 | boolean_t sync; | |
261 | { | |
262 | ||
55768178 DG |
263 | int err; |
264 | vm_page_t marray[1]; | |
15637ed4 RG |
265 | #ifdef DEBUG |
266 | if (vpagerdebug & VDB_FOLLOW) | |
267 | printf("vnode_pager_getpage(%x, %x)\n", pager, m); | |
268 | #endif | |
55768178 DG |
269 | if (pager == NULL) |
270 | return FALSE; | |
271 | marray[0] = m; | |
272 | ||
273 | return vnode_pager_io((vn_pager_t)pager->pg_data, marray, 1, 0, UIO_READ); | |
15637ed4 RG |
274 | } |
275 | ||
276 | boolean_t | |
277 | vnode_pager_putpage(pager, m, sync) | |
278 | vm_pager_t pager; | |
279 | vm_page_t m; | |
280 | boolean_t sync; | |
281 | { | |
282 | int err; | |
55768178 | 283 | vm_page_t marray[1]; |
15637ed4 RG |
284 | |
285 | #ifdef DEBUG | |
286 | if (vpagerdebug & VDB_FOLLOW) | |
287 | printf("vnode_pager_putpage(%x, %x)\n", pager, m); | |
288 | #endif | |
289 | if (pager == NULL) | |
55768178 DG |
290 | return FALSE; |
291 | marray[0] = m; | |
292 | err = vnode_pager_io((vn_pager_t)pager->pg_data, marray, 1, 0, UIO_WRITE); | |
293 | return err; | |
15637ed4 RG |
294 | } |
295 | ||
296 | boolean_t | |
297 | vnode_pager_haspage(pager, offset) | |
298 | vm_pager_t pager; | |
299 | vm_offset_t offset; | |
300 | { | |
301 | register vn_pager_t vnp = (vn_pager_t)pager->pg_data; | |
302 | daddr_t bn; | |
303 | int err; | |
304 | ||
305 | #ifdef DEBUG | |
306 | if (vpagerdebug & VDB_FOLLOW) | |
307 | printf("vnode_pager_haspage(%x, %x)\n", pager, offset); | |
308 | #endif | |
309 | ||
310 | /* | |
311 | * Offset beyond end of file, do not have the page | |
312 | */ | |
313 | if (offset >= vnp->vnp_size) { | |
314 | #ifdef DEBUG | |
315 | if (vpagerdebug & (VDB_FAIL|VDB_SIZE)) | |
316 | printf("vnode_pager_haspage: pg %x, off %x, size %x\n", | |
317 | pager, offset, vnp->vnp_size); | |
318 | #endif | |
319 | return(FALSE); | |
320 | } | |
321 | ||
322 | /* | |
323 | * Read the index to find the disk block to read | |
324 | * from. If there is no block, report that we don't | |
325 | * have this data. | |
326 | * | |
327 | * Assumes that the vnode has whole page or nothing. | |
328 | */ | |
329 | err = VOP_BMAP(vnp->vnp_vp, | |
330 | offset / vnp->vnp_vp->v_mount->mnt_stat.f_bsize, | |
331 | (struct vnode **)0, &bn); | |
332 | if (err) { | |
333 | #ifdef DEBUG | |
334 | if (vpagerdebug & VDB_FAIL) | |
335 | printf("vnode_pager_haspage: BMAP err %d, pg %x, off %x\n", | |
336 | err, pager, offset); | |
337 | #endif | |
338 | return(TRUE); | |
339 | } | |
340 | return((long)bn < 0 ? FALSE : TRUE); | |
341 | } | |
342 | ||
343 | /* | |
344 | * (XXX) | |
345 | * Lets the VM system know about a change in size for a file. | |
346 | * If this vnode is mapped into some address space (i.e. we have a pager | |
347 | * for it) we adjust our own internal size and flush any cached pages in | |
348 | * the associated object that are affected by the size change. | |
349 | * | |
350 | * Note: this routine may be invoked as a result of a pager put | |
351 | * operation (possibly at object termination time), so we must be careful. | |
352 | */ | |
4c45483e | 353 | void |
15637ed4 RG |
354 | vnode_pager_setsize(vp, nsize) |
355 | struct vnode *vp; | |
356 | u_long nsize; | |
357 | { | |
358 | register vn_pager_t vnp; | |
359 | register vm_object_t object; | |
360 | vm_pager_t pager; | |
361 | ||
362 | /* | |
363 | * Not a mapped vnode | |
364 | */ | |
365 | if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL) | |
366 | return; | |
367 | /* | |
368 | * Hasn't changed size | |
369 | */ | |
370 | pager = (vm_pager_t)vp->v_vmdata; | |
371 | vnp = (vn_pager_t)pager->pg_data; | |
372 | if (nsize == vnp->vnp_size) | |
373 | return; | |
374 | /* | |
375 | * No object. | |
376 | * This can happen during object termination since | |
377 | * vm_object_page_clean is called after the object | |
378 | * has been removed from the hash table, and clean | |
379 | * may cause vnode write operations which can wind | |
380 | * up back here. | |
381 | */ | |
382 | object = vm_object_lookup(pager); | |
383 | if (object == NULL) | |
384 | return; | |
385 | ||
386 | #ifdef DEBUG | |
387 | if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE)) | |
388 | printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n", | |
389 | vp, object, vnp->vnp_size, nsize); | |
390 | #endif | |
391 | /* | |
392 | * File has shrunk. | |
393 | * Toss any cached pages beyond the new EOF. | |
394 | */ | |
55768178 | 395 | nsize = round_page(nsize); |
15637ed4 RG |
396 | if (nsize < vnp->vnp_size) { |
397 | vm_object_lock(object); | |
398 | vm_object_page_remove(object, | |
399 | (vm_offset_t)nsize, vnp->vnp_size); | |
400 | vm_object_unlock(object); | |
401 | } | |
402 | vnp->vnp_size = (vm_offset_t)nsize; | |
403 | vm_object_deallocate(object); | |
404 | } | |
405 | ||
4c45483e | 406 | void |
15637ed4 RG |
407 | vnode_pager_umount(mp) |
408 | register struct mount *mp; | |
409 | { | |
410 | register vm_pager_t pager, npager; | |
411 | struct vnode *vp; | |
412 | ||
413 | pager = (vm_pager_t) queue_first(&vnode_pager_list); | |
414 | while (!queue_end(&vnode_pager_list, (queue_entry_t)pager)) { | |
415 | /* | |
416 | * Save the next pointer now since uncaching may | |
417 | * terminate the object and render pager invalid | |
418 | */ | |
419 | vp = ((vn_pager_t)pager->pg_data)->vnp_vp; | |
420 | npager = (vm_pager_t) queue_next(&pager->pg_list); | |
421 | if (mp == (struct mount *)0 || vp->v_mount == mp) | |
422 | (void) vnode_pager_uncache(vp); | |
423 | pager = npager; | |
424 | } | |
425 | } | |
426 | ||
427 | /* | |
428 | * Remove vnode associated object from the object cache. | |
429 | * | |
430 | * Note: this routine may be invoked as a result of a pager put | |
431 | * operation (possibly at object termination time), so we must be careful. | |
432 | */ | |
433 | boolean_t | |
434 | vnode_pager_uncache(vp) | |
435 | register struct vnode *vp; | |
436 | { | |
437 | register vm_object_t object; | |
438 | boolean_t uncached, locked; | |
439 | vm_pager_t pager; | |
440 | ||
441 | /* | |
442 | * Not a mapped vnode | |
443 | */ | |
444 | pager = (vm_pager_t)vp->v_vmdata; | |
445 | if (pager == NULL) | |
446 | return (TRUE); | |
447 | /* | |
448 | * Unlock the vnode if it is currently locked. | |
449 | * We do this since uncaching the object may result | |
450 | * in its destruction which may initiate paging | |
451 | * activity which may necessitate locking the vnode. | |
452 | */ | |
453 | locked = VOP_ISLOCKED(vp); | |
454 | if (locked) | |
455 | VOP_UNLOCK(vp); | |
456 | /* | |
457 | * Must use vm_object_lookup() as it actually removes | |
458 | * the object from the cache list. | |
459 | */ | |
460 | object = vm_object_lookup(pager); | |
461 | if (object) { | |
462 | uncached = (object->ref_count <= 1); | |
463 | pager_cache(object, FALSE); | |
464 | } else | |
465 | uncached = TRUE; | |
466 | if (locked) | |
467 | VOP_LOCK(vp); | |
468 | return(uncached); | |
469 | } | |
470 | ||
55768178 DG |
471 | |
472 | void | |
473 | vnode_pager_freepage(m) | |
15637ed4 | 474 | vm_page_t m; |
55768178 DG |
475 | { |
476 | PAGE_WAKEUP(m); | |
477 | vm_page_free(m); | |
478 | } | |
479 | ||
480 | /* | |
481 | * calculate the linear (byte) disk address of specified virtual | |
482 | * file address | |
483 | */ | |
484 | vm_offset_t | |
485 | vnode_pager_addr(vp, address) | |
486 | struct vnode *vp; | |
487 | vm_offset_t address; | |
488 | { | |
489 | int rtaddress; | |
490 | int bsize; | |
491 | vm_offset_t block; | |
492 | struct vnode *rtvp; | |
493 | int err; | |
494 | int vblock, voffset; | |
495 | ||
496 | bsize = vp->v_mount->mnt_stat.f_bsize; | |
497 | vblock = address / bsize; | |
498 | voffset = address % bsize; | |
499 | ||
500 | err = VOP_BMAP(vp,vblock,&rtvp,&block); | |
501 | ||
502 | rtaddress = block * DEV_BSIZE + voffset; | |
503 | ||
504 | return rtaddress; | |
505 | } | |
506 | ||
507 | /* | |
508 | * interrupt routine for I/O completion | |
509 | */ | |
510 | void | |
511 | vnode_pager_iodone(bp) | |
512 | struct buf *bp; | |
513 | { | |
514 | bp->b_flags |= B_DONE; | |
515 | wakeup((caddr_t)bp); | |
516 | } | |
517 | ||
518 | /* | |
519 | * vnode_pager_io: | |
520 | * Perform read or write operation for vnode_paging | |
521 | * | |
522 | * args: | |
523 | * vnp -- pointer to vnode pager data structure | |
524 | * containing size and vnode pointer, etc | |
525 | * | |
526 | * m -- pointer to array of vm_page_t entries to | |
527 | * do I/O to. It is not necessary to fill any | |
528 | * pages except for the reqpage entry. If a | |
529 | * page is not filled, it needs to be removed | |
530 | * from its object... | |
531 | * | |
532 | * count -- number of pages for I/O | |
533 | * | |
534 | * reqpage -- fault requested page for I/O | |
535 | * (index into vm_page_t entries above) | |
536 | * | |
537 | * rw -- UIO_READ or UIO_WRITE | |
538 | * | |
539 | * NOTICE!!!! direct writes look like that they are close to being | |
540 | * implemented. They are not really, several things need | |
541 | * to be done to make it work (subtile things.) Hack at | |
542 | * your own risk (direct writes are scarey). | |
ce619eaa DG |
543 | * |
544 | * ANOTHER NOTICE!!!! | |
545 | * we currently only support direct I/O to filesystems whose | |
546 | * contiguously allocated blocksize is at least a vm page. | |
547 | * changes will be made in the future to support more flexibility. | |
55768178 DG |
548 | */ |
549 | ||
550 | int | |
551 | vnode_pager_io(vnp, m, count, reqpage, rw) | |
552 | register vn_pager_t vnp; | |
553 | vm_page_t *m; | |
554 | int count, reqpage; | |
15637ed4 RG |
555 | enum uio_rw rw; |
556 | { | |
55768178 | 557 | int i,j; |
15637ed4 RG |
558 | struct uio auio; |
559 | struct iovec aiov; | |
560 | vm_offset_t kva, foff; | |
55768178 | 561 | int size; |
15637ed4 | 562 | struct proc *p = curproc; /* XXX */ |
55768178 DG |
563 | vm_object_t object; |
564 | vm_offset_t paging_offset; | |
565 | struct vnode *dp, *vp; | |
566 | vm_offset_t mapsize; | |
567 | int bsize; | |
568 | int errtype=0; /* 0 is file type otherwise vm type */ | |
569 | int error = 0; | |
570 | ||
8d2d83f1 | 571 | object = m[reqpage]->object; /* all vm_page_t items are in same object */ |
55768178 | 572 | paging_offset = object->paging_offset; |
15637ed4 | 573 | |
15637ed4 | 574 | /* |
55768178 | 575 | * get the UNDERLYING device for the file |
15637ed4 | 576 | */ |
55768178 DG |
577 | vp = vnp->vnp_vp; |
578 | bsize = vp->v_mount->mnt_stat.f_bsize; | |
579 | VOP_BMAP(vp, 0, &dp, 0); | |
580 | ||
581 | /* | |
582 | * trim off unnecessary pages | |
583 | */ | |
584 | for (i = reqpage - 1; i >= 0; --i) { | |
585 | if (m[i]->object != object) { | |
586 | for (j = 0; j <= i; j++) | |
587 | vnode_pager_freepage(m[j]); | |
588 | for (j = i + 1; j < count; j++) { | |
589 | m[j - (i + 1)] = m[j]; | |
590 | } | |
591 | count -= i + 1; | |
592 | reqpage -= i + 1; | |
593 | break; | |
594 | } | |
15637ed4 | 595 | } |
55768178 DG |
596 | for (i = reqpage + 1; i < count; i++) { |
597 | if ((m[i]->object != object) || | |
598 | (m[i]->offset + paging_offset >= vnp->vnp_size)) { | |
599 | for (j = i; j < count; j++) | |
600 | vnode_pager_freepage(m[j]); | |
601 | count = i; | |
602 | break; | |
603 | } | |
604 | } | |
605 | ||
606 | /* | |
607 | * we only do direct I/O if the file is on a local | |
608 | * BLOCK device and currently if it is a read operation only. | |
609 | */ | |
610 | ||
611 | kva = 0; | |
612 | mapsize = 0; | |
613 | if (rw == UIO_READ && dp->v_type == VBLK && | |
614 | vp->v_mount->mnt_stat.f_type == MOUNT_UFS) { | |
615 | /* | |
616 | * we do not block for a kva, notice we default to a kva conservative behavior | |
617 | */ | |
618 | kva = kmem_alloc_pageable(pager_map, (mapsize = count*NBPG)); | |
619 | } | |
620 | ||
621 | if (!kva) { | |
622 | /* | |
623 | * here on I/O through VFS | |
624 | */ | |
55768178 | 625 | for (i = 0; i < count; i++) { |
8d2d83f1 DG |
626 | if (i != reqpage) { |
627 | vnode_pager_freepage(m[i]); | |
628 | m[i] = 0; | |
629 | } | |
630 | } | |
631 | m[0] = m[reqpage]; | |
632 | foff = m[0]->offset + paging_offset; | |
633 | reqpage = 0; | |
634 | count = 1; | |
55768178 DG |
635 | /* |
636 | * Return failure if beyond current EOF | |
637 | */ | |
8d2d83f1 DG |
638 | if (foff >= vnp->vnp_size) { |
639 | errtype = 1; | |
640 | error = VM_PAGER_BAD; | |
641 | } else { | |
55768178 DG |
642 | if (foff + NBPG > vnp->vnp_size) |
643 | size = vnp->vnp_size - foff; | |
644 | else | |
645 | size = NBPG; | |
8d2d83f1 DG |
646 | /* |
647 | * Allocate a kernel virtual address and initialize so that | |
648 | * we can use VOP_READ/WRITE routines. | |
649 | */ | |
650 | kva = vm_pager_map_page(m[0]); | |
55768178 DG |
651 | aiov.iov_base = (caddr_t)kva; |
652 | aiov.iov_len = size; | |
653 | auio.uio_iov = &aiov; | |
654 | auio.uio_iovcnt = 1; | |
655 | auio.uio_offset = foff; | |
656 | auio.uio_segflg = UIO_SYSSPACE; | |
657 | auio.uio_rw = rw; | |
658 | auio.uio_resid = size; | |
659 | auio.uio_procp = (struct proc *)0; | |
660 | if (rw == UIO_READ) { | |
661 | error = VOP_READ(vp, &auio, IO_PAGER, p->p_ucred); | |
662 | } else { | |
663 | error = VOP_WRITE(vp, &auio, IO_PAGER, p->p_ucred); | |
664 | } | |
665 | if (!error) { | |
666 | register int count = size - auio.uio_resid; | |
8d2d83f1 | 667 | |
55768178 DG |
668 | if (count == 0) |
669 | error = EINVAL; | |
670 | else if (count != NBPG && rw == UIO_READ) | |
671 | bzero((caddr_t)kva + count, NBPG - count); | |
672 | } | |
673 | vm_pager_unmap_page(kva); | |
674 | } | |
675 | } else { | |
676 | ||
677 | /* | |
678 | * here on direct device I/O | |
679 | */ | |
680 | int first=0, last=count; | |
681 | int reqaddr, firstaddr; | |
682 | int block, offset; | |
683 | ||
684 | struct buf *bp; | |
685 | int s; | |
686 | int failflag; | |
687 | ||
688 | foff = m[reqpage]->offset + paging_offset; | |
689 | ||
690 | /* | |
691 | * This pathetic hack gets data from the buffer cache, if it's there. | |
692 | * I believe that this is not really necessary, and the ends can | |
693 | * be gotten by defaulting to the normal vfs read behavior, but this | |
694 | * might be more efficient, because the will NOT invoke read-aheads | |
695 | * and one of the purposes of this code is to bypass the buffer | |
696 | * cache and keep from flushing it by reading in a program. | |
697 | */ | |
698 | /* | |
699 | * calculate logical block and offset | |
700 | */ | |
701 | block = foff / bsize; | |
702 | offset = foff % bsize; | |
703 | s = splbio(); | |
704 | ||
705 | /* | |
706 | * if we have a buffer in core, then try to use it | |
707 | */ | |
708 | while (bp = incore(vp, block)) { | |
709 | int amount; | |
710 | ||
711 | /* | |
712 | * wait until the buffer is avail or gone | |
713 | */ | |
714 | if (bp->b_flags & B_BUSY) { | |
715 | bp->b_flags |= B_WANTED; | |
716 | tsleep ((caddr_t)bp, PVM, "vnwblk", 0); | |
717 | continue; | |
718 | } | |
719 | ||
720 | amount = NBPG; | |
721 | if ((foff + amount) > vnp->vnp_size) | |
722 | amount = vnp->vnp_size - foff; | |
723 | ||
724 | /* | |
725 | * make sure that this page is in the buffer | |
726 | */ | |
727 | if ((amount > 0) && (offset + amount) <= bp->b_bcount) { | |
728 | bp->b_flags |= B_BUSY; | |
729 | splx(s); | |
730 | ||
731 | /* | |
732 | * map the requested page | |
733 | */ | |
734 | pmap_enter(vm_map_pmap(pager_map), | |
735 | kva, VM_PAGE_TO_PHYS(m[reqpage]), | |
736 | VM_PROT_DEFAULT, TRUE); | |
737 | /* | |
738 | * copy the data from the buffer | |
739 | */ | |
740 | bcopy(bp->b_un.b_addr + offset, (caddr_t)kva, amount); | |
741 | if (amount < NBPG) { | |
742 | bzero((caddr_t)kva + amount, NBPG - amount); | |
743 | } | |
744 | /* | |
745 | * unmap the page and free the kva | |
746 | */ | |
747 | pmap_remove(vm_map_pmap(pager_map), kva, kva + NBPG); | |
748 | kmem_free_wakeup(pager_map, kva, mapsize); | |
749 | /* | |
750 | * release the buffer back to the block subsystem | |
751 | */ | |
752 | bp->b_flags &= ~B_BUSY; | |
753 | wakeup((caddr_t)bp); | |
754 | /* | |
755 | * we did not have to do any work to get the requested | |
756 | * page, the read behind/ahead does not justify a read | |
757 | */ | |
758 | for (i = 0; i < count; i++) { | |
759 | if (i != reqpage) { | |
760 | vnode_pager_freepage(m[i]); | |
761 | m[i] = 0; | |
762 | } | |
763 | } | |
764 | /* | |
765 | * sorry for the goto | |
766 | */ | |
767 | goto finishup; | |
768 | } | |
769 | /* | |
770 | * buffer is nowhere to be found, read from the disk | |
771 | */ | |
772 | break; | |
773 | } | |
55768178 DG |
774 | |
775 | foff = m[reqpage]->offset + paging_offset; | |
776 | reqaddr = vnode_pager_addr(vp, foff); | |
777 | /* | |
778 | * Make sure that our I/O request is contiguous. | |
779 | * Scan backward and stop for the first discontiguous | |
780 | * entry or stop for a page being in buffer cache. | |
781 | */ | |
782 | failflag = 0; | |
783 | for (i = reqpage - 1; i >= 0; --i) { | |
784 | int myaddr; | |
785 | if (failflag || | |
786 | incore(vp, (foff + (i - reqpage) * NBPG) / bsize) || | |
787 | (myaddr = vnode_pager_addr(vp, m[i]->offset + paging_offset)) | |
788 | != reqaddr + (i - reqpage) * NBPG) { | |
789 | vnode_pager_freepage(m[i]); | |
790 | m[i] = 0; | |
791 | if (first == 0) | |
792 | first = i + 1; | |
793 | failflag = 1; | |
794 | } | |
795 | } | |
796 | ||
797 | /* | |
798 | * Scan forward and stop for the first discontiguous | |
799 | * entry or stop for a page being in buffer cache. | |
800 | */ | |
801 | failflag = 0; | |
802 | for (i = reqpage + 1; i < count; i++) { | |
803 | int myaddr; | |
804 | if (failflag || | |
805 | incore(vp, (foff + (i - reqpage) * NBPG) / bsize) || | |
806 | (myaddr = vnode_pager_addr(vp, m[i]->offset + paging_offset)) | |
807 | != reqaddr + (i - reqpage) * NBPG) { | |
808 | vnode_pager_freepage(m[i]); | |
809 | m[i] = 0; | |
810 | if (last == count) | |
811 | last = i; | |
812 | failflag = 1; | |
813 | } | |
814 | } | |
815 | ||
816 | /* | |
817 | * the first and last page have been calculated now, move input | |
818 | * pages to be zero based... | |
819 | */ | |
820 | count = last; | |
821 | if (first != 0) { | |
822 | for (i = first; i < count; i++) { | |
823 | m[i - first] = m[i]; | |
824 | } | |
825 | count -= first; | |
826 | reqpage -= first; | |
827 | } | |
828 | ||
829 | ||
830 | /* | |
831 | * calculate the file virtual address for the transfer | |
832 | */ | |
833 | foff = m[0]->offset + paging_offset; | |
834 | /* | |
835 | * and get the disk physical address (in bytes) | |
836 | */ | |
837 | firstaddr = vnode_pager_addr(vp, foff); | |
838 | ||
839 | /* | |
840 | * calculate the size of the transfer | |
841 | */ | |
842 | if ((m[count - 1]->offset + paging_offset) + NBPG > vnp->vnp_size) | |
843 | size = vnp->vnp_size - foff; | |
844 | else | |
845 | size = count * NBPG; | |
846 | ||
847 | ||
848 | /* | |
849 | * and map the pages to be read into the kva | |
850 | */ | |
851 | for (i = 0; i < count; i++) | |
852 | pmap_enter(vm_map_pmap(pager_map), | |
853 | kva + NBPG * i, VM_PAGE_TO_PHYS(m[i]), | |
854 | VM_PROT_DEFAULT, TRUE); | |
855 | VHOLD(vp); | |
856 | bp = getpbuf(); | |
857 | ||
858 | /* build a minimal buffer header */ | |
859 | bzero((caddr_t)bp, sizeof(struct buf)); | |
860 | bp->b_flags = B_BUSY | B_READ | B_CALL; | |
861 | bp->b_iodone = vnode_pager_iodone; | |
862 | /* B_PHYS is not set, but it is nice to fill this in */ | |
863 | bp->b_proc = &proc0; | |
864 | bp->b_un.b_addr = (caddr_t) kva; | |
865 | bp->b_blkno = firstaddr / DEV_BSIZE; | |
866 | bp->b_vp = dp; | |
867 | ||
868 | /* Should be a BLOCK or character DEVICE if we get here */ | |
869 | bp->b_dev = dp->v_rdev; | |
870 | bp->b_bcount = NBPG * count; | |
871 | ||
872 | /* do the input */ | |
873 | VOP_STRATEGY(bp); | |
874 | ||
875 | /* we definitely need to be at splbio here */ | |
876 | ||
877 | while ((bp->b_flags & B_DONE) == 0) { | |
878 | tsleep((caddr_t)bp, PVM, "vnread", 0); | |
879 | } | |
880 | splx(s); | |
881 | if ((bp->b_flags & B_ERROR) != 0) | |
882 | error = EIO; | |
883 | ||
884 | if (!error) { | |
885 | if (size != count * NBPG) | |
886 | bzero((caddr_t)kva + size, NBPG * count - size); | |
887 | } | |
888 | HOLDRELE(vp); | |
889 | ||
890 | pmap_remove(vm_map_pmap(pager_map), kva, kva + NBPG * count); | |
891 | kmem_free_wakeup(pager_map, kva, mapsize); | |
892 | ||
893 | /* | |
894 | * free the buffer header back to the swap buffer pool | |
895 | */ | |
896 | relpbuf(bp); | |
897 | ||
15637ed4 | 898 | } |
15637ed4 | 899 | |
55768178 DG |
900 | finishup: |
901 | if (rw == UIO_READ) | |
902 | for (i = 0; i < count; i++) { | |
903 | /* | |
904 | * we dont mess with pages that have been already | |
905 | * deallocated.... | |
906 | */ | |
907 | if (!m[i]) | |
908 | continue; | |
909 | pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); | |
910 | m[i]->flags |= PG_CLEAN; | |
911 | m[i]->flags &= ~PG_LAUNDRY; | |
912 | if (i != reqpage) { | |
913 | /* | |
914 | * whether or not to leave the page activated | |
915 | * is up in the air, but we should put the page | |
916 | * on a page queue somewhere. (it already is in | |
917 | * the object). | |
a200ca2b DG |
918 | * Result: It appears that emperical results show |
919 | * that deactivating pages is best. | |
55768178 DG |
920 | */ |
921 | /* | |
922 | * just in case someone was asking for this | |
923 | * page we now tell them that it is ok to use | |
924 | */ | |
925 | if (!error) { | |
a200ca2b | 926 | vm_page_deactivate(m[i]); |
55768178 DG |
927 | PAGE_WAKEUP(m[i]); |
928 | m[i]->flags &= ~PG_FAKE; | |
929 | } else { | |
930 | vnode_pager_freepage(m[i]); | |
931 | } | |
932 | } | |
933 | } | |
934 | if (!error && rw == UIO_WRITE) { | |
935 | pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage])); | |
936 | m[reqpage]->flags |= PG_CLEAN; | |
937 | m[reqpage]->flags &= ~PG_LAUNDRY; | |
938 | } | |
939 | if (error) { | |
940 | printf("vnode pager error: %d\n", error); | |
15637ed4 | 941 | } |
55768178 DG |
942 | if (errtype) |
943 | return error; | |
15637ed4 RG |
944 | return (error ? VM_PAGER_FAIL : VM_PAGER_OK); |
945 | } |