Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /* |
2 | * Copyright (c) 1990 University of Utah. | |
3 | * Copyright (c) 1991 The Regents of the University of California. | |
4 | * All rights reserved. | |
55768178 | 5 | * Copyright (c) 1993 John S. Dyson |
15637ed4 RG |
6 | * |
7 | * This code is derived from software contributed to Berkeley by | |
8 | * the Systems Programming Group of the University of Utah Computer | |
9 | * Science Department. | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | |
14 | * 1. Redistributions of source code must retain the above copyright | |
15 | * notice, this list of conditions and the following disclaimer. | |
16 | * 2. Redistributions in binary form must reproduce the above copyright | |
17 | * notice, this list of conditions and the following disclaimer in the | |
18 | * documentation and/or other materials provided with the distribution. | |
19 | * 3. All advertising materials mentioning features or use of this software | |
20 | * must display the following acknowledgement: | |
21 | * This product includes software developed by the University of | |
22 | * California, Berkeley and its contributors. | |
23 | * 4. Neither the name of the University nor the names of its contributors | |
24 | * may be used to endorse or promote products derived from this software | |
25 | * without specific prior written permission. | |
26 | * | |
27 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
28 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
29 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
30 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
32 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
33 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
34 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
36 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
37 | * SUCH DAMAGE. | |
38 | * | |
1284e777 | 39 | * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 |
41aefbec | 40 | * $Id: vnode_pager.c,v 1.12 1994/03/07 11:39:18 davidg Exp $ |
15637ed4 RG |
41 | */ |
42 | ||
43 | /* | |
44 | * Page to/from files (vnodes). | |
45 | * | |
46 | * TODO: | |
47 | * pageouts | |
48 | * fix credential use (uses current process credentials now) | |
49 | */ | |
15637ed4 | 50 | |
55768178 DG |
51 | /* |
52 | * MODIFICATIONS: | |
53 | * John S. Dyson 08 Dec 93 | |
54 | * | |
55 | * This file in conjunction with some vm_fault mods, eliminate the performance | |
56 | * advantage for using the buffer cache and minimize memory copies. | |
57 | * | |
58 | * 1) Supports multiple - block reads | |
59 | * 2) Bypasses buffer cache for reads | |
60 | * | |
61 | * TODO: | |
62 | * | |
63 | * 1) Totally bypass buffer cache for reads | |
64 | * (Currently will still sometimes use buffer cache for reads) | |
65 | * 2) Bypass buffer cache for writes | |
66 | * (Code does not support it, but mods are simple) | |
67 | */ | |
68 | ||
15637ed4 RG |
69 | #include "param.h" |
70 | #include "proc.h" | |
71 | #include "malloc.h" | |
72 | #include "vnode.h" | |
73 | #include "uio.h" | |
74 | #include "mount.h" | |
75 | ||
76 | #include "vm_param.h" | |
55768178 | 77 | #include "vm.h" |
15637ed4 RG |
78 | #include "lock.h" |
79 | #include "queue.h" | |
80 | #include "vm_prot.h" | |
81 | #include "vm_object.h" | |
82 | #include "vm_page.h" | |
83 | #include "vnode_pager.h" | |
55768178 DG |
84 | #include "vm_map.h" |
85 | #include "vm_pageout.h" | |
86 | #include "buf.h" | |
87 | #include "specdev.h" | |
15637ed4 | 88 | |
bbc3f849 GW |
89 | struct pagerops vnodepagerops = { |
90 | vnode_pager_init, | |
91 | vnode_pager_alloc, | |
92 | vnode_pager_dealloc, | |
93 | vnode_pager_getpage, | |
55768178 | 94 | vnode_pager_getmulti, |
bbc3f849 | 95 | vnode_pager_putpage, |
22c84afa | 96 | 0, |
bbc3f849 GW |
97 | vnode_pager_haspage |
98 | }; | |
99 | ||
55768178 DG |
100 | static int vnode_pager_io(vn_pager_t vnp, vm_page_t *m, int count, int reqpage, |
101 | enum uio_rw rw); | |
102 | struct buf * getpbuf() ; | |
103 | void relpbuf(struct buf *bp) ; | |
104 | ||
105 | extern vm_map_t pager_map; | |
fde1aeb2 | 106 | |
15637ed4 RG |
107 | queue_head_t vnode_pager_list; /* list of managed vnodes */ |
108 | ||
109 | #ifdef DEBUG | |
110 | int vpagerdebug = 0x00; | |
111 | #define VDB_FOLLOW 0x01 | |
112 | #define VDB_INIT 0x02 | |
113 | #define VDB_IO 0x04 | |
114 | #define VDB_FAIL 0x08 | |
115 | #define VDB_ALLOC 0x10 | |
116 | #define VDB_SIZE 0x20 | |
117 | #endif | |
118 | ||
119 | void | |
120 | vnode_pager_init() | |
121 | { | |
122 | #ifdef DEBUG | |
123 | if (vpagerdebug & VDB_FOLLOW) | |
124 | printf("vnode_pager_init()\n"); | |
125 | #endif | |
126 | queue_init(&vnode_pager_list); | |
127 | } | |
128 | ||
129 | /* | |
130 | * Allocate (or lookup) pager for a vnode. | |
131 | * Handle is a vnode pointer. | |
132 | */ | |
133 | vm_pager_t | |
ce619eaa | 134 | vnode_pager_alloc(handle, size, prot, offset) |
15637ed4 RG |
135 | caddr_t handle; |
136 | vm_size_t size; | |
137 | vm_prot_t prot; | |
ce619eaa | 138 | vm_offset_t offset; |
15637ed4 RG |
139 | { |
140 | register vm_pager_t pager; | |
141 | register vn_pager_t vnp; | |
142 | vm_object_t object; | |
143 | struct vattr vattr; | |
144 | struct vnode *vp; | |
145 | struct proc *p = curproc; /* XXX */ | |
146 | ||
147 | #ifdef DEBUG | |
148 | if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC)) | |
149 | printf("vnode_pager_alloc(%x, %x, %x)\n", handle, size, prot); | |
150 | #endif | |
151 | /* | |
152 | * Pageout to vnode, no can do yet. | |
153 | */ | |
154 | if (handle == NULL) | |
155 | return(NULL); | |
156 | ||
157 | /* | |
158 | * Vnodes keep a pointer to any associated pager so no need to | |
159 | * lookup with vm_pager_lookup. | |
160 | */ | |
161 | vp = (struct vnode *)handle; | |
162 | pager = (vm_pager_t)vp->v_vmdata; | |
163 | if (pager == NULL) { | |
164 | /* | |
165 | * Allocate pager structures | |
166 | */ | |
167 | pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK); | |
168 | if (pager == NULL) | |
169 | return(NULL); | |
170 | vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); | |
171 | if (vnp == NULL) { | |
172 | free((caddr_t)pager, M_VMPAGER); | |
173 | return(NULL); | |
174 | } | |
175 | /* | |
176 | * And an object of the appropriate size | |
177 | */ | |
178 | if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) { | |
179 | object = vm_object_allocate(round_page(vattr.va_size)); | |
180 | vm_object_enter(object, pager); | |
181 | vm_object_setpager(object, pager, 0, TRUE); | |
182 | } else { | |
183 | free((caddr_t)vnp, M_VMPGDATA); | |
184 | free((caddr_t)pager, M_VMPAGER); | |
185 | return(NULL); | |
186 | } | |
187 | /* | |
188 | * Hold a reference to the vnode and initialize pager data. | |
189 | */ | |
190 | VREF(vp); | |
191 | vnp->vnp_flags = 0; | |
192 | vnp->vnp_vp = vp; | |
193 | vnp->vnp_size = vattr.va_size; | |
194 | queue_enter(&vnode_pager_list, pager, vm_pager_t, pg_list); | |
195 | pager->pg_handle = handle; | |
196 | pager->pg_type = PG_VNODE; | |
197 | pager->pg_ops = &vnodepagerops; | |
198 | pager->pg_data = (caddr_t)vnp; | |
199 | vp->v_vmdata = (caddr_t)pager; | |
200 | } else { | |
201 | /* | |
202 | * vm_object_lookup() will remove the object from the | |
203 | * cache if found and also gain a reference to the object. | |
204 | */ | |
205 | object = vm_object_lookup(pager); | |
206 | #ifdef DEBUG | |
207 | vnp = (vn_pager_t)pager->pg_data; | |
208 | #endif | |
209 | } | |
210 | #ifdef DEBUG | |
211 | if (vpagerdebug & VDB_ALLOC) | |
212 | printf("vnode_pager_setup: vp %x sz %x pager %x object %x\n", | |
213 | vp, vnp->vnp_size, pager, object); | |
214 | #endif | |
215 | return(pager); | |
216 | } | |
217 | ||
218 | void | |
219 | vnode_pager_dealloc(pager) | |
220 | vm_pager_t pager; | |
221 | { | |
222 | register vn_pager_t vnp = (vn_pager_t)pager->pg_data; | |
223 | register struct vnode *vp; | |
224 | struct proc *p = curproc; /* XXX */ | |
225 | ||
226 | #ifdef DEBUG | |
227 | if (vpagerdebug & VDB_FOLLOW) | |
228 | printf("vnode_pager_dealloc(%x)\n", pager); | |
229 | #endif | |
230 | if (vp = vnp->vnp_vp) { | |
231 | vp->v_vmdata = NULL; | |
232 | vp->v_flag &= ~VTEXT; | |
233 | #if 0 | |
234 | /* can hang if done at reboot on NFS FS */ | |
235 | (void) VOP_FSYNC(vp, p->p_ucred, p); | |
236 | #endif | |
237 | vrele(vp); | |
238 | } | |
239 | queue_remove(&vnode_pager_list, pager, vm_pager_t, pg_list); | |
240 | free((caddr_t)vnp, M_VMPGDATA); | |
241 | free((caddr_t)pager, M_VMPAGER); | |
242 | } | |
243 | ||
55768178 DG |
244 | int |
245 | vnode_pager_getmulti(pager, m, count, reqpage, sync) | |
246 | vm_pager_t pager; | |
247 | vm_page_t *m; | |
248 | int count; | |
249 | int reqpage; | |
250 | boolean_t sync; | |
251 | { | |
252 | ||
253 | return vnode_pager_io((vn_pager_t) pager->pg_data, m, count, reqpage, UIO_READ); | |
254 | } | |
255 | ||
256 | ||
4c45483e | 257 | int |
15637ed4 RG |
258 | vnode_pager_getpage(pager, m, sync) |
259 | vm_pager_t pager; | |
260 | vm_page_t m; | |
261 | boolean_t sync; | |
262 | { | |
263 | ||
55768178 DG |
264 | int err; |
265 | vm_page_t marray[1]; | |
15637ed4 RG |
266 | #ifdef DEBUG |
267 | if (vpagerdebug & VDB_FOLLOW) | |
268 | printf("vnode_pager_getpage(%x, %x)\n", pager, m); | |
269 | #endif | |
55768178 DG |
270 | if (pager == NULL) |
271 | return FALSE; | |
272 | marray[0] = m; | |
273 | ||
274 | return vnode_pager_io((vn_pager_t)pager->pg_data, marray, 1, 0, UIO_READ); | |
15637ed4 RG |
275 | } |
276 | ||
277 | boolean_t | |
278 | vnode_pager_putpage(pager, m, sync) | |
279 | vm_pager_t pager; | |
280 | vm_page_t m; | |
281 | boolean_t sync; | |
282 | { | |
283 | int err; | |
55768178 | 284 | vm_page_t marray[1]; |
15637ed4 RG |
285 | |
286 | #ifdef DEBUG | |
287 | if (vpagerdebug & VDB_FOLLOW) | |
288 | printf("vnode_pager_putpage(%x, %x)\n", pager, m); | |
289 | #endif | |
290 | if (pager == NULL) | |
55768178 DG |
291 | return FALSE; |
292 | marray[0] = m; | |
293 | err = vnode_pager_io((vn_pager_t)pager->pg_data, marray, 1, 0, UIO_WRITE); | |
294 | return err; | |
15637ed4 RG |
295 | } |
296 | ||
297 | boolean_t | |
298 | vnode_pager_haspage(pager, offset) | |
299 | vm_pager_t pager; | |
300 | vm_offset_t offset; | |
301 | { | |
302 | register vn_pager_t vnp = (vn_pager_t)pager->pg_data; | |
303 | daddr_t bn; | |
304 | int err; | |
305 | ||
306 | #ifdef DEBUG | |
307 | if (vpagerdebug & VDB_FOLLOW) | |
308 | printf("vnode_pager_haspage(%x, %x)\n", pager, offset); | |
309 | #endif | |
310 | ||
311 | /* | |
312 | * Offset beyond end of file, do not have the page | |
313 | */ | |
314 | if (offset >= vnp->vnp_size) { | |
315 | #ifdef DEBUG | |
316 | if (vpagerdebug & (VDB_FAIL|VDB_SIZE)) | |
317 | printf("vnode_pager_haspage: pg %x, off %x, size %x\n", | |
318 | pager, offset, vnp->vnp_size); | |
319 | #endif | |
320 | return(FALSE); | |
321 | } | |
322 | ||
323 | /* | |
324 | * Read the index to find the disk block to read | |
325 | * from. If there is no block, report that we don't | |
326 | * have this data. | |
327 | * | |
328 | * Assumes that the vnode has whole page or nothing. | |
329 | */ | |
330 | err = VOP_BMAP(vnp->vnp_vp, | |
331 | offset / vnp->vnp_vp->v_mount->mnt_stat.f_bsize, | |
332 | (struct vnode **)0, &bn); | |
333 | if (err) { | |
334 | #ifdef DEBUG | |
335 | if (vpagerdebug & VDB_FAIL) | |
336 | printf("vnode_pager_haspage: BMAP err %d, pg %x, off %x\n", | |
337 | err, pager, offset); | |
338 | #endif | |
339 | return(TRUE); | |
340 | } | |
341 | return((long)bn < 0 ? FALSE : TRUE); | |
342 | } | |
343 | ||
344 | /* | |
345 | * (XXX) | |
346 | * Lets the VM system know about a change in size for a file. | |
347 | * If this vnode is mapped into some address space (i.e. we have a pager | |
348 | * for it) we adjust our own internal size and flush any cached pages in | |
349 | * the associated object that are affected by the size change. | |
350 | * | |
351 | * Note: this routine may be invoked as a result of a pager put | |
352 | * operation (possibly at object termination time), so we must be careful. | |
353 | */ | |
4c45483e | 354 | void |
15637ed4 RG |
355 | vnode_pager_setsize(vp, nsize) |
356 | struct vnode *vp; | |
357 | u_long nsize; | |
358 | { | |
359 | register vn_pager_t vnp; | |
360 | register vm_object_t object; | |
361 | vm_pager_t pager; | |
362 | ||
363 | /* | |
364 | * Not a mapped vnode | |
365 | */ | |
366 | if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL) | |
367 | return; | |
368 | /* | |
369 | * Hasn't changed size | |
370 | */ | |
371 | pager = (vm_pager_t)vp->v_vmdata; | |
372 | vnp = (vn_pager_t)pager->pg_data; | |
373 | if (nsize == vnp->vnp_size) | |
374 | return; | |
375 | /* | |
376 | * No object. | |
377 | * This can happen during object termination since | |
378 | * vm_object_page_clean is called after the object | |
379 | * has been removed from the hash table, and clean | |
380 | * may cause vnode write operations which can wind | |
381 | * up back here. | |
382 | */ | |
383 | object = vm_object_lookup(pager); | |
384 | if (object == NULL) | |
385 | return; | |
386 | ||
387 | #ifdef DEBUG | |
388 | if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE)) | |
389 | printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n", | |
390 | vp, object, vnp->vnp_size, nsize); | |
391 | #endif | |
392 | /* | |
393 | * File has shrunk. | |
394 | * Toss any cached pages beyond the new EOF. | |
395 | */ | |
55768178 | 396 | nsize = round_page(nsize); |
15637ed4 RG |
397 | if (nsize < vnp->vnp_size) { |
398 | vm_object_lock(object); | |
399 | vm_object_page_remove(object, | |
400 | (vm_offset_t)nsize, vnp->vnp_size); | |
401 | vm_object_unlock(object); | |
402 | } | |
403 | vnp->vnp_size = (vm_offset_t)nsize; | |
404 | vm_object_deallocate(object); | |
405 | } | |
406 | ||
4c45483e | 407 | void |
15637ed4 RG |
408 | vnode_pager_umount(mp) |
409 | register struct mount *mp; | |
410 | { | |
411 | register vm_pager_t pager, npager; | |
412 | struct vnode *vp; | |
413 | ||
414 | pager = (vm_pager_t) queue_first(&vnode_pager_list); | |
415 | while (!queue_end(&vnode_pager_list, (queue_entry_t)pager)) { | |
416 | /* | |
417 | * Save the next pointer now since uncaching may | |
418 | * terminate the object and render pager invalid | |
419 | */ | |
420 | vp = ((vn_pager_t)pager->pg_data)->vnp_vp; | |
421 | npager = (vm_pager_t) queue_next(&pager->pg_list); | |
422 | if (mp == (struct mount *)0 || vp->v_mount == mp) | |
423 | (void) vnode_pager_uncache(vp); | |
424 | pager = npager; | |
425 | } | |
426 | } | |
427 | ||
428 | /* | |
429 | * Remove vnode associated object from the object cache. | |
430 | * | |
431 | * Note: this routine may be invoked as a result of a pager put | |
432 | * operation (possibly at object termination time), so we must be careful. | |
433 | */ | |
434 | boolean_t | |
435 | vnode_pager_uncache(vp) | |
436 | register struct vnode *vp; | |
437 | { | |
438 | register vm_object_t object; | |
439 | boolean_t uncached, locked; | |
440 | vm_pager_t pager; | |
441 | ||
442 | /* | |
443 | * Not a mapped vnode | |
444 | */ | |
445 | pager = (vm_pager_t)vp->v_vmdata; | |
446 | if (pager == NULL) | |
447 | return (TRUE); | |
448 | /* | |
449 | * Unlock the vnode if it is currently locked. | |
450 | * We do this since uncaching the object may result | |
451 | * in its destruction which may initiate paging | |
452 | * activity which may necessitate locking the vnode. | |
453 | */ | |
454 | locked = VOP_ISLOCKED(vp); | |
455 | if (locked) | |
456 | VOP_UNLOCK(vp); | |
457 | /* | |
458 | * Must use vm_object_lookup() as it actually removes | |
459 | * the object from the cache list. | |
460 | */ | |
461 | object = vm_object_lookup(pager); | |
462 | if (object) { | |
463 | uncached = (object->ref_count <= 1); | |
464 | pager_cache(object, FALSE); | |
465 | } else | |
466 | uncached = TRUE; | |
467 | if (locked) | |
468 | VOP_LOCK(vp); | |
469 | return(uncached); | |
470 | } | |
471 | ||
55768178 DG |
472 | |
473 | void | |
474 | vnode_pager_freepage(m) | |
15637ed4 | 475 | vm_page_t m; |
55768178 DG |
476 | { |
477 | PAGE_WAKEUP(m); | |
478 | vm_page_free(m); | |
479 | } | |
480 | ||
481 | /* | |
482 | * calculate the linear (byte) disk address of specified virtual | |
483 | * file address | |
484 | */ | |
485 | vm_offset_t | |
486 | vnode_pager_addr(vp, address) | |
487 | struct vnode *vp; | |
488 | vm_offset_t address; | |
489 | { | |
490 | int rtaddress; | |
491 | int bsize; | |
492 | vm_offset_t block; | |
493 | struct vnode *rtvp; | |
494 | int err; | |
495 | int vblock, voffset; | |
496 | ||
497 | bsize = vp->v_mount->mnt_stat.f_bsize; | |
498 | vblock = address / bsize; | |
499 | voffset = address % bsize; | |
500 | ||
501 | err = VOP_BMAP(vp,vblock,&rtvp,&block); | |
502 | ||
503 | rtaddress = block * DEV_BSIZE + voffset; | |
504 | ||
505 | return rtaddress; | |
506 | } | |
507 | ||
508 | /* | |
509 | * interrupt routine for I/O completion | |
510 | */ | |
511 | void | |
512 | vnode_pager_iodone(bp) | |
513 | struct buf *bp; | |
514 | { | |
515 | bp->b_flags |= B_DONE; | |
516 | wakeup((caddr_t)bp); | |
517 | } | |
518 | ||
519 | /* | |
520 | * vnode_pager_io: | |
521 | * Perform read or write operation for vnode_paging | |
522 | * | |
523 | * args: | |
524 | * vnp -- pointer to vnode pager data structure | |
525 | * containing size and vnode pointer, etc | |
526 | * | |
527 | * m -- pointer to array of vm_page_t entries to | |
528 | * do I/O to. It is not necessary to fill any | |
529 | * pages except for the reqpage entry. If a | |
530 | * page is not filled, it needs to be removed | |
531 | * from its object... | |
532 | * | |
533 | * count -- number of pages for I/O | |
534 | * | |
535 | * reqpage -- fault requested page for I/O | |
536 | * (index into vm_page_t entries above) | |
537 | * | |
538 | * rw -- UIO_READ or UIO_WRITE | |
539 | * | |
540 | * NOTICE!!!! direct writes look like that they are close to being | |
541 | * implemented. They are not really, several things need | |
542 | * to be done to make it work (subtile things.) Hack at | |
543 | * your own risk (direct writes are scarey). | |
ce619eaa DG |
544 | * |
545 | * ANOTHER NOTICE!!!! | |
546 | * we currently only support direct I/O to filesystems whose | |
547 | * contiguously allocated blocksize is at least a vm page. | |
548 | * changes will be made in the future to support more flexibility. | |
55768178 DG |
549 | */ |
550 | ||
551 | int | |
552 | vnode_pager_io(vnp, m, count, reqpage, rw) | |
553 | register vn_pager_t vnp; | |
554 | vm_page_t *m; | |
555 | int count, reqpage; | |
15637ed4 RG |
556 | enum uio_rw rw; |
557 | { | |
55768178 | 558 | int i,j; |
15637ed4 RG |
559 | struct uio auio; |
560 | struct iovec aiov; | |
561 | vm_offset_t kva, foff; | |
55768178 | 562 | int size; |
15637ed4 | 563 | struct proc *p = curproc; /* XXX */ |
55768178 DG |
564 | vm_object_t object; |
565 | vm_offset_t paging_offset; | |
566 | struct vnode *dp, *vp; | |
567 | vm_offset_t mapsize; | |
568 | int bsize; | |
569 | int errtype=0; /* 0 is file type otherwise vm type */ | |
570 | int error = 0; | |
41aefbec | 571 | int trimmed; |
55768178 | 572 | |
8d2d83f1 | 573 | object = m[reqpage]->object; /* all vm_page_t items are in same object */ |
55768178 | 574 | paging_offset = object->paging_offset; |
15637ed4 | 575 | |
15637ed4 | 576 | /* |
55768178 | 577 | * get the UNDERLYING device for the file |
15637ed4 | 578 | */ |
55768178 DG |
579 | vp = vnp->vnp_vp; |
580 | bsize = vp->v_mount->mnt_stat.f_bsize; | |
581 | VOP_BMAP(vp, 0, &dp, 0); | |
582 | ||
55768178 DG |
583 | /* |
584 | * we only do direct I/O if the file is on a local | |
585 | * BLOCK device and currently if it is a read operation only. | |
586 | */ | |
587 | ||
588 | kva = 0; | |
589 | mapsize = 0; | |
590 | if (rw == UIO_READ && dp->v_type == VBLK && | |
591 | vp->v_mount->mnt_stat.f_type == MOUNT_UFS) { | |
592 | /* | |
593 | * we do not block for a kva, notice we default to a kva conservative behavior | |
594 | */ | |
595 | kva = kmem_alloc_pageable(pager_map, (mapsize = count*NBPG)); | |
41aefbec DG |
596 | if( !kva) { |
597 | for (i = 0; i < count; i++) { | |
598 | if (i != reqpage) { | |
599 | vnode_pager_freepage(m[i]); | |
600 | m[i] = 0; | |
601 | } | |
602 | } | |
603 | m[0] = m[reqpage]; | |
604 | kva = vm_pager_map_page(m[0]); | |
605 | reqpage = 0; | |
606 | count = 1; | |
607 | } | |
55768178 DG |
608 | } |
609 | ||
610 | if (!kva) { | |
611 | /* | |
612 | * here on I/O through VFS | |
613 | */ | |
55768178 | 614 | for (i = 0; i < count; i++) { |
8d2d83f1 DG |
615 | if (i != reqpage) { |
616 | vnode_pager_freepage(m[i]); | |
617 | m[i] = 0; | |
618 | } | |
619 | } | |
620 | m[0] = m[reqpage]; | |
621 | foff = m[0]->offset + paging_offset; | |
622 | reqpage = 0; | |
623 | count = 1; | |
55768178 DG |
624 | /* |
625 | * Return failure if beyond current EOF | |
626 | */ | |
8d2d83f1 DG |
627 | if (foff >= vnp->vnp_size) { |
628 | errtype = 1; | |
629 | error = VM_PAGER_BAD; | |
630 | } else { | |
55768178 DG |
631 | if (foff + NBPG > vnp->vnp_size) |
632 | size = vnp->vnp_size - foff; | |
633 | else | |
634 | size = NBPG; | |
8d2d83f1 DG |
635 | /* |
636 | * Allocate a kernel virtual address and initialize so that | |
637 | * we can use VOP_READ/WRITE routines. | |
638 | */ | |
639 | kva = vm_pager_map_page(m[0]); | |
55768178 DG |
640 | aiov.iov_base = (caddr_t)kva; |
641 | aiov.iov_len = size; | |
642 | auio.uio_iov = &aiov; | |
643 | auio.uio_iovcnt = 1; | |
644 | auio.uio_offset = foff; | |
645 | auio.uio_segflg = UIO_SYSSPACE; | |
646 | auio.uio_rw = rw; | |
647 | auio.uio_resid = size; | |
648 | auio.uio_procp = (struct proc *)0; | |
649 | if (rw == UIO_READ) { | |
650 | error = VOP_READ(vp, &auio, IO_PAGER, p->p_ucred); | |
651 | } else { | |
652 | error = VOP_WRITE(vp, &auio, IO_PAGER, p->p_ucred); | |
653 | } | |
654 | if (!error) { | |
655 | register int count = size - auio.uio_resid; | |
8d2d83f1 | 656 | |
55768178 DG |
657 | if (count == 0) |
658 | error = EINVAL; | |
659 | else if (count != NBPG && rw == UIO_READ) | |
660 | bzero((caddr_t)kva + count, NBPG - count); | |
661 | } | |
662 | vm_pager_unmap_page(kva); | |
663 | } | |
664 | } else { | |
665 | ||
666 | /* | |
667 | * here on direct device I/O | |
668 | */ | |
669 | int first=0, last=count; | |
670 | int reqaddr, firstaddr; | |
671 | int block, offset; | |
672 | ||
673 | struct buf *bp; | |
674 | int s; | |
675 | int failflag; | |
676 | ||
677 | foff = m[reqpage]->offset + paging_offset; | |
678 | ||
679 | /* | |
680 | * This pathetic hack gets data from the buffer cache, if it's there. | |
681 | * I believe that this is not really necessary, and the ends can | |
682 | * be gotten by defaulting to the normal vfs read behavior, but this | |
683 | * might be more efficient, because the will NOT invoke read-aheads | |
684 | * and one of the purposes of this code is to bypass the buffer | |
685 | * cache and keep from flushing it by reading in a program. | |
686 | */ | |
687 | /* | |
688 | * calculate logical block and offset | |
689 | */ | |
690 | block = foff / bsize; | |
691 | offset = foff % bsize; | |
692 | s = splbio(); | |
693 | ||
694 | /* | |
695 | * if we have a buffer in core, then try to use it | |
696 | */ | |
697 | while (bp = incore(vp, block)) { | |
698 | int amount; | |
699 | ||
700 | /* | |
701 | * wait until the buffer is avail or gone | |
702 | */ | |
703 | if (bp->b_flags & B_BUSY) { | |
704 | bp->b_flags |= B_WANTED; | |
705 | tsleep ((caddr_t)bp, PVM, "vnwblk", 0); | |
706 | continue; | |
707 | } | |
708 | ||
709 | amount = NBPG; | |
710 | if ((foff + amount) > vnp->vnp_size) | |
711 | amount = vnp->vnp_size - foff; | |
712 | ||
713 | /* | |
714 | * make sure that this page is in the buffer | |
715 | */ | |
716 | if ((amount > 0) && (offset + amount) <= bp->b_bcount) { | |
717 | bp->b_flags |= B_BUSY; | |
718 | splx(s); | |
719 | ||
720 | /* | |
721 | * map the requested page | |
722 | */ | |
723 | pmap_enter(vm_map_pmap(pager_map), | |
724 | kva, VM_PAGE_TO_PHYS(m[reqpage]), | |
725 | VM_PROT_DEFAULT, TRUE); | |
726 | /* | |
727 | * copy the data from the buffer | |
728 | */ | |
729 | bcopy(bp->b_un.b_addr + offset, (caddr_t)kva, amount); | |
730 | if (amount < NBPG) { | |
731 | bzero((caddr_t)kva + amount, NBPG - amount); | |
732 | } | |
733 | /* | |
734 | * unmap the page and free the kva | |
735 | */ | |
736 | pmap_remove(vm_map_pmap(pager_map), kva, kva + NBPG); | |
737 | kmem_free_wakeup(pager_map, kva, mapsize); | |
738 | /* | |
739 | * release the buffer back to the block subsystem | |
740 | */ | |
741 | bp->b_flags &= ~B_BUSY; | |
742 | wakeup((caddr_t)bp); | |
743 | /* | |
744 | * we did not have to do any work to get the requested | |
745 | * page, the read behind/ahead does not justify a read | |
746 | */ | |
747 | for (i = 0; i < count; i++) { | |
748 | if (i != reqpage) { | |
749 | vnode_pager_freepage(m[i]); | |
750 | m[i] = 0; | |
751 | } | |
752 | } | |
753 | /* | |
754 | * sorry for the goto | |
755 | */ | |
756 | goto finishup; | |
757 | } | |
758 | /* | |
759 | * buffer is nowhere to be found, read from the disk | |
760 | */ | |
761 | break; | |
762 | } | |
55768178 DG |
763 | |
764 | foff = m[reqpage]->offset + paging_offset; | |
765 | reqaddr = vnode_pager_addr(vp, foff); | |
766 | /* | |
767 | * Make sure that our I/O request is contiguous. | |
768 | * Scan backward and stop for the first discontiguous | |
769 | * entry or stop for a page being in buffer cache. | |
770 | */ | |
771 | failflag = 0; | |
772 | for (i = reqpage - 1; i >= 0; --i) { | |
773 | int myaddr; | |
774 | if (failflag || | |
775 | incore(vp, (foff + (i - reqpage) * NBPG) / bsize) || | |
776 | (myaddr = vnode_pager_addr(vp, m[i]->offset + paging_offset)) | |
777 | != reqaddr + (i - reqpage) * NBPG) { | |
778 | vnode_pager_freepage(m[i]); | |
779 | m[i] = 0; | |
780 | if (first == 0) | |
781 | first = i + 1; | |
782 | failflag = 1; | |
783 | } | |
784 | } | |
785 | ||
786 | /* | |
787 | * Scan forward and stop for the first discontiguous | |
788 | * entry or stop for a page being in buffer cache. | |
789 | */ | |
790 | failflag = 0; | |
791 | for (i = reqpage + 1; i < count; i++) { | |
792 | int myaddr; | |
793 | if (failflag || | |
794 | incore(vp, (foff + (i - reqpage) * NBPG) / bsize) || | |
795 | (myaddr = vnode_pager_addr(vp, m[i]->offset + paging_offset)) | |
796 | != reqaddr + (i - reqpage) * NBPG) { | |
797 | vnode_pager_freepage(m[i]); | |
798 | m[i] = 0; | |
799 | if (last == count) | |
800 | last = i; | |
801 | failflag = 1; | |
802 | } | |
803 | } | |
804 | ||
805 | /* | |
806 | * the first and last page have been calculated now, move input | |
807 | * pages to be zero based... | |
808 | */ | |
809 | count = last; | |
810 | if (first != 0) { | |
811 | for (i = first; i < count; i++) { | |
812 | m[i - first] = m[i]; | |
813 | } | |
814 | count -= first; | |
815 | reqpage -= first; | |
816 | } | |
817 | ||
818 | ||
819 | /* | |
820 | * calculate the file virtual address for the transfer | |
821 | */ | |
822 | foff = m[0]->offset + paging_offset; | |
823 | /* | |
824 | * and get the disk physical address (in bytes) | |
825 | */ | |
826 | firstaddr = vnode_pager_addr(vp, foff); | |
827 | ||
828 | /* | |
829 | * calculate the size of the transfer | |
830 | */ | |
831 | if ((m[count - 1]->offset + paging_offset) + NBPG > vnp->vnp_size) | |
832 | size = vnp->vnp_size - foff; | |
833 | else | |
834 | size = count * NBPG; | |
835 | ||
836 | ||
837 | /* | |
838 | * and map the pages to be read into the kva | |
839 | */ | |
840 | for (i = 0; i < count; i++) | |
841 | pmap_enter(vm_map_pmap(pager_map), | |
842 | kva + NBPG * i, VM_PAGE_TO_PHYS(m[i]), | |
843 | VM_PROT_DEFAULT, TRUE); | |
844 | VHOLD(vp); | |
845 | bp = getpbuf(); | |
846 | ||
847 | /* build a minimal buffer header */ | |
848 | bzero((caddr_t)bp, sizeof(struct buf)); | |
849 | bp->b_flags = B_BUSY | B_READ | B_CALL; | |
850 | bp->b_iodone = vnode_pager_iodone; | |
851 | /* B_PHYS is not set, but it is nice to fill this in */ | |
852 | bp->b_proc = &proc0; | |
853 | bp->b_un.b_addr = (caddr_t) kva; | |
854 | bp->b_blkno = firstaddr / DEV_BSIZE; | |
855 | bp->b_vp = dp; | |
856 | ||
857 | /* Should be a BLOCK or character DEVICE if we get here */ | |
858 | bp->b_dev = dp->v_rdev; | |
859 | bp->b_bcount = NBPG * count; | |
860 | ||
861 | /* do the input */ | |
862 | VOP_STRATEGY(bp); | |
863 | ||
864 | /* we definitely need to be at splbio here */ | |
865 | ||
866 | while ((bp->b_flags & B_DONE) == 0) { | |
867 | tsleep((caddr_t)bp, PVM, "vnread", 0); | |
868 | } | |
869 | splx(s); | |
870 | if ((bp->b_flags & B_ERROR) != 0) | |
871 | error = EIO; | |
872 | ||
873 | if (!error) { | |
874 | if (size != count * NBPG) | |
875 | bzero((caddr_t)kva + size, NBPG * count - size); | |
876 | } | |
877 | HOLDRELE(vp); | |
878 | ||
879 | pmap_remove(vm_map_pmap(pager_map), kva, kva + NBPG * count); | |
880 | kmem_free_wakeup(pager_map, kva, mapsize); | |
881 | ||
882 | /* | |
883 | * free the buffer header back to the swap buffer pool | |
884 | */ | |
885 | relpbuf(bp); | |
886 | ||
15637ed4 | 887 | } |
15637ed4 | 888 | |
55768178 DG |
889 | finishup: |
890 | if (rw == UIO_READ) | |
891 | for (i = 0; i < count; i++) { | |
892 | /* | |
893 | * we dont mess with pages that have been already | |
894 | * deallocated.... | |
895 | */ | |
896 | if (!m[i]) | |
897 | continue; | |
898 | pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); | |
899 | m[i]->flags |= PG_CLEAN; | |
900 | m[i]->flags &= ~PG_LAUNDRY; | |
901 | if (i != reqpage) { | |
902 | /* | |
903 | * whether or not to leave the page activated | |
904 | * is up in the air, but we should put the page | |
905 | * on a page queue somewhere. (it already is in | |
906 | * the object). | |
a200ca2b DG |
907 | * Result: It appears that emperical results show |
908 | * that deactivating pages is best. | |
55768178 DG |
909 | */ |
910 | /* | |
911 | * just in case someone was asking for this | |
912 | * page we now tell them that it is ok to use | |
913 | */ | |
914 | if (!error) { | |
a200ca2b | 915 | vm_page_deactivate(m[i]); |
55768178 DG |
916 | PAGE_WAKEUP(m[i]); |
917 | m[i]->flags &= ~PG_FAKE; | |
918 | } else { | |
919 | vnode_pager_freepage(m[i]); | |
920 | } | |
921 | } | |
922 | } | |
923 | if (!error && rw == UIO_WRITE) { | |
924 | pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage])); | |
925 | m[reqpage]->flags |= PG_CLEAN; | |
926 | m[reqpage]->flags &= ~PG_LAUNDRY; | |
927 | } | |
928 | if (error) { | |
929 | printf("vnode pager error: %d\n", error); | |
15637ed4 | 930 | } |
55768178 DG |
931 | if (errtype) |
932 | return error; | |
15637ed4 RG |
933 | return (error ? VM_PAGER_FAIL : VM_PAGER_OK); |
934 | } |