Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /* |
2 | * Copyright (c) 1990 University of Utah. | |
3 | * Copyright (c) 1991 The Regents of the University of California. | |
4 | * All rights reserved. | |
5 | * | |
6 | * This code is derived from software contributed to Berkeley by | |
7 | * the Systems Programming Group of the University of Utah Computer | |
8 | * Science Department. | |
9 | * | |
10 | * Redistribution and use in source and binary forms, with or without | |
11 | * modification, are permitted provided that the following conditions | |
12 | * are met: | |
13 | * 1. Redistributions of source code must retain the above copyright | |
14 | * notice, this list of conditions and the following disclaimer. | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | |
16 | * notice, this list of conditions and the following disclaimer in the | |
17 | * documentation and/or other materials provided with the distribution. | |
18 | * 3. All advertising materials mentioning features or use of this software | |
19 | * must display the following acknowledgement: | |
20 | * This product includes software developed by the University of | |
21 | * California, Berkeley and its contributors. | |
22 | * 4. Neither the name of the University nor the names of its contributors | |
23 | * may be used to endorse or promote products derived from this software | |
24 | * without specific prior written permission. | |
25 | * | |
26 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
27 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
28 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
29 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
30 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
31 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
32 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
33 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
34 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
35 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
36 | * SUCH DAMAGE. | |
37 | * | |
55768178 DG |
38 | * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ |
39 | * from: @(#)swap_pager.c 7.4 (Berkeley) 5/7/91 | |
40 | * | |
0eefc4c9 | 41 | * $Id: swap_pager.c,v 1.17 1994/02/10 08:08:36 davidg Exp $ |
15637ed4 RG |
42 | */ |
43 | ||
736d20f2 | 44 | /* |
55768178 | 45 | * Mostly rewritten by John Dyson with help from David Greenman, 12-Jan-1994 |
736d20f2 DG |
46 | */ |
47 | ||
15637ed4 RG |
48 | #include "param.h" |
49 | #include "proc.h" | |
50 | #include "buf.h" | |
55768178 | 51 | #include "kernel.h" |
15637ed4 RG |
52 | #include "systm.h" |
53 | #include "specdev.h" | |
54 | #include "vnode.h" | |
55 | #include "malloc.h" | |
55768178 | 56 | #include "queue.h" |
15637ed4 RG |
57 | #include "rlist.h" |
58 | ||
736d20f2 DG |
59 | #include "vm_param.h" |
60 | #include "queue.h" | |
61 | #include "lock.h" | |
14591e5e | 62 | #include "vm.h" |
736d20f2 DG |
63 | #include "vm_prot.h" |
64 | #include "vm_object.h" | |
15637ed4 RG |
65 | #include "vm_page.h" |
66 | #include "vm_pageout.h" | |
67 | #include "swap_pager.h" | |
736d20f2 | 68 | #include "vm_map.h" |
15637ed4 | 69 | |
55768178 DG |
70 | #ifndef NPENDINGIO |
71 | #define NPENDINGIO 96 | |
72 | #endif | |
bbc3f849 | 73 | |
736d20f2 DG |
74 | extern int nswbuf; |
75 | int nswiodone; | |
76 | extern int vm_pageout_rate_limit; | |
77 | static int cleandone; | |
55768178 | 78 | extern int hz; |
736d20f2 DG |
79 | int swap_pager_full; |
80 | extern vm_map_t pager_map; | |
55768178 | 81 | extern int vm_pageout_pages_needed; |
736d20f2 | 82 | |
15637ed4 RG |
83 | struct swpagerclean { |
84 | queue_head_t spc_list; | |
85 | int spc_flags; | |
86 | struct buf *spc_bp; | |
87 | sw_pager_t spc_swp; | |
88 | vm_offset_t spc_kva; | |
89 | vm_page_t spc_m; | |
736d20f2 DG |
90 | } swcleanlist [NPENDINGIO] ; |
91 | ||
15637ed4 | 92 | typedef struct swpagerclean *swp_clean_t; |
55768178 | 93 | |
736d20f2 | 94 | extern vm_map_t kernel_map; |
15637ed4 RG |
95 | |
96 | /* spc_flags values */ | |
736d20f2 | 97 | #define SPC_ERROR 0x01 |
15637ed4 | 98 | |
736d20f2 | 99 | #define SWB_EMPTY (-1) |
15637ed4 | 100 | |
736d20f2 | 101 | queue_head_t swap_pager_done; /* list of compileted page cleans */ |
15637ed4 RG |
102 | queue_head_t swap_pager_inuse; /* list of pending page cleans */ |
103 | queue_head_t swap_pager_free; /* list of free pager clean structs */ | |
104 | queue_head_t swap_pager_list; /* list of "named" anon regions */ | |
55768178 DG |
105 | queue_head_t swap_pager_un_list; /* list of "unnamed" anon pagers */ |
106 | #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ | |
107 | int swap_pager_needflags; | |
108 | ||
109 | static queue_head_t *swp_qs[]={ | |
110 | &swap_pager_list, &swap_pager_un_list, (queue_head_t *) 0 | |
111 | }; | |
112 | ||
113 | struct pagerops swappagerops = { | |
114 | swap_pager_init, | |
115 | swap_pager_alloc, | |
116 | swap_pager_dealloc, | |
117 | swap_pager_getpage, | |
118 | swap_pager_getmulti, | |
119 | swap_pager_putpage, | |
120 | swap_pager_haspage | |
121 | }; | |
122 | ||
123 | extern int nswbuf; | |
15637ed4 | 124 | |
736d20f2 | 125 | int npendingio = NPENDINGIO; |
736d20f2 DG |
126 | int pendingiowait; |
127 | int require_swap_init; | |
55768178 | 128 | void swap_pager_finish(); |
736d20f2 | 129 | int dmmin, dmmax; |
55768178 DG |
130 | extern int vm_page_count; |
131 | ||
132 | struct buf * getpbuf() ; | |
133 | void relpbuf(struct buf *bp) ; | |
736d20f2 | 134 | |
15637ed4 RG |
135 | void |
136 | swap_pager_init() | |
137 | { | |
736d20f2 | 138 | register int i; |
55768178 | 139 | extern int dmmin, dmmax; |
15637ed4 | 140 | |
15637ed4 RG |
141 | dfltpagerops = &swappagerops; |
142 | queue_init(&swap_pager_list); | |
55768178 | 143 | queue_init(&swap_pager_un_list); |
15637ed4 RG |
144 | |
145 | /* | |
146 | * Initialize clean lists | |
147 | */ | |
148 | queue_init(&swap_pager_inuse); | |
736d20f2 | 149 | queue_init(&swap_pager_done); |
15637ed4 | 150 | queue_init(&swap_pager_free); |
736d20f2 DG |
151 | |
152 | require_swap_init = 1; | |
153 | ||
15637ed4 RG |
154 | /* |
155 | * Calculate the swap allocation constants. | |
156 | */ | |
15637ed4 | 157 | |
736d20f2 | 158 | dmmin = CLBYTES/DEV_BSIZE; |
55768178 | 159 | dmmax = btodb(SWB_NPAGES*NBPG)*2; |
736d20f2 | 160 | |
15637ed4 RG |
161 | } |
162 | ||
163 | /* | |
164 | * Allocate a pager structure and associated resources. | |
165 | * Note that if we are called from the pageout daemon (handle == NULL) | |
166 | * we should not wait for memory as it could resulting in deadlock. | |
167 | */ | |
168 | vm_pager_t | |
ce619eaa | 169 | swap_pager_alloc(handle, size, prot, offset) |
15637ed4 RG |
170 | caddr_t handle; |
171 | register vm_size_t size; | |
172 | vm_prot_t prot; | |
ce619eaa | 173 | vm_offset_t offset; |
15637ed4 RG |
174 | { |
175 | register vm_pager_t pager; | |
176 | register sw_pager_t swp; | |
15637ed4 | 177 | int waitok; |
736d20f2 DG |
178 | int i,j; |
179 | ||
55768178 | 180 | if (require_swap_init) { |
736d20f2 | 181 | register swp_clean_t spc; |
b7ae9810 | 182 | struct buf *bp; |
55768178 DG |
183 | /* |
184 | * kva's are allocated here so that we dont need to keep | |
185 | * doing kmem_alloc pageables at runtime | |
186 | */ | |
736d20f2 | 187 | for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) { |
55768178 | 188 | spc->spc_kva = kmem_alloc_pageable(pager_map, NBPG); |
b7ae9810 | 189 | if (!spc->spc_kva) { |
736d20f2 | 190 | break; |
b7ae9810 DG |
191 | } |
192 | spc->spc_bp = malloc(sizeof( *bp), M_TEMP, | |
193 | M_NOWAIT); | |
194 | if (!spc->spc_bp) { | |
195 | kmem_free_wakeup(pager_map, spc->spc_kva, NBPG); | |
196 | break; | |
197 | } | |
736d20f2 DG |
198 | spc->spc_flags = 0; |
199 | queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); | |
200 | } | |
201 | require_swap_init = 0; | |
202 | } | |
203 | ||
15637ed4 RG |
204 | /* |
205 | * If this is a "named" anonymous region, look it up and | |
206 | * return the appropriate pager if it exists. | |
207 | */ | |
208 | if (handle) { | |
209 | pager = vm_pager_lookup(&swap_pager_list, handle); | |
210 | if (pager != NULL) { | |
211 | /* | |
212 | * Use vm_object_lookup to gain a reference | |
213 | * to the object and also to remove from the | |
214 | * object cache. | |
215 | */ | |
216 | if (vm_object_lookup(pager) == NULL) | |
217 | panic("swap_pager_alloc: bad object"); | |
218 | return(pager); | |
219 | } | |
220 | } | |
736d20f2 | 221 | |
55768178 DG |
222 | if (swap_pager_full) |
223 | return(NULL); | |
224 | ||
15637ed4 RG |
225 | /* |
226 | * Pager doesn't exist, allocate swap management resources | |
227 | * and initialize. | |
228 | */ | |
736d20f2 | 229 | waitok = handle ? M_WAITOK : M_NOWAIT; |
15637ed4 RG |
230 | pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); |
231 | if (pager == NULL) | |
232 | return(NULL); | |
233 | swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); | |
234 | if (swp == NULL) { | |
15637ed4 RG |
235 | free((caddr_t)pager, M_VMPAGER); |
236 | return(NULL); | |
237 | } | |
238 | size = round_page(size); | |
15637ed4 | 239 | swp->sw_osize = size; |
736d20f2 | 240 | swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * NBPG) - 1) / btodb(SWB_NPAGES*NBPG); |
15637ed4 RG |
241 | swp->sw_blocks = (sw_blk_t) |
242 | malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), | |
736d20f2 | 243 | M_VMPGDATA, waitok); |
15637ed4 RG |
244 | if (swp->sw_blocks == NULL) { |
245 | free((caddr_t)swp, M_VMPGDATA); | |
246 | free((caddr_t)pager, M_VMPAGER); | |
55768178 | 247 | return(NULL); |
15637ed4 RG |
248 | } |
249 | bzero((caddr_t)swp->sw_blocks, | |
250 | swp->sw_nblocks * sizeof(*swp->sw_blocks)); | |
736d20f2 | 251 | |
55768178 DG |
252 | for (i = 0; i < swp->sw_nblocks; i++) { |
253 | for (j = 0; j < SWB_NPAGES; j++) | |
736d20f2 DG |
254 | swp->sw_blocks[i].swb_block[j] = SWB_EMPTY; |
255 | } | |
256 | ||
15637ed4 RG |
257 | swp->sw_poip = 0; |
258 | if (handle) { | |
259 | vm_object_t object; | |
260 | ||
261 | swp->sw_flags = SW_NAMED; | |
262 | queue_enter(&swap_pager_list, pager, vm_pager_t, pg_list); | |
263 | /* | |
264 | * Consistant with other pagers: return with object | |
265 | * referenced. Can't do this with handle == NULL | |
266 | * since it might be the pageout daemon calling. | |
267 | */ | |
268 | object = vm_object_allocate(size); | |
269 | vm_object_enter(object, pager); | |
270 | vm_object_setpager(object, pager, 0, FALSE); | |
271 | } else { | |
272 | swp->sw_flags = 0; | |
273 | queue_init(&pager->pg_list); | |
55768178 | 274 | queue_enter(&swap_pager_un_list, pager, vm_pager_t, pg_list); |
15637ed4 RG |
275 | } |
276 | pager->pg_handle = handle; | |
277 | pager->pg_ops = &swappagerops; | |
278 | pager->pg_type = PG_SWAP; | |
279 | pager->pg_data = (caddr_t)swp; | |
280 | ||
15637ed4 RG |
281 | return(pager); |
282 | } | |
283 | ||
736d20f2 | 284 | /* |
55768178 DG |
285 | * returns disk block associated with pager and offset |
286 | * additionally, as a side effect returns a flag indicating | |
287 | * if the block has been written | |
736d20f2 | 288 | */ |
55768178 | 289 | |
736d20f2 DG |
290 | static int * |
291 | swap_pager_diskaddr(swp, offset, valid) | |
292 | sw_pager_t swp; | |
293 | vm_offset_t offset; | |
294 | int *valid; | |
295 | { | |
296 | register sw_blk_t swb; | |
297 | int ix; | |
298 | ||
55768178 | 299 | if (valid) |
736d20f2 DG |
300 | *valid = 0; |
301 | ix = offset / (SWB_NPAGES*NBPG); | |
302 | if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { | |
303 | return(FALSE); | |
304 | } | |
305 | swb = &swp->sw_blocks[ix]; | |
306 | ix = (offset % (SWB_NPAGES*NBPG)) / NBPG; | |
55768178 | 307 | if (valid) |
736d20f2 DG |
308 | *valid = swb->swb_valid & (1<<ix); |
309 | return &swb->swb_block[ix]; | |
310 | } | |
311 | ||
55768178 DG |
312 | /* |
313 | * Utility routine to set the valid (written) bit for | |
314 | * a block associated with a pager and offset | |
315 | */ | |
736d20f2 | 316 | static void |
55768178 | 317 | swap_pager_setvalid(swp, offset, valid) |
736d20f2 DG |
318 | sw_pager_t swp; |
319 | vm_offset_t offset; | |
55768178 | 320 | int valid; |
736d20f2 DG |
321 | { |
322 | register sw_blk_t swb; | |
323 | int ix; | |
324 | ||
325 | ix = offset / (SWB_NPAGES*NBPG); | |
326 | if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) | |
327 | return; | |
328 | ||
329 | swb = &swp->sw_blocks[ix]; | |
330 | ix = (offset % (SWB_NPAGES*NBPG)) / NBPG; | |
55768178 DG |
331 | if (valid) |
332 | swb->swb_valid |= (1 << ix); | |
333 | else | |
334 | swb->swb_valid &= ~(1 << ix); | |
736d20f2 DG |
335 | return; |
336 | } | |
337 | ||
338 | /* | |
339 | * this routine frees swap blocks from a specified pager | |
340 | */ | |
341 | void | |
55768178 DG |
342 | swap_pager_freespace(pager, start, size) |
343 | vm_pager_t pager; | |
344 | vm_offset_t start; | |
345 | vm_offset_t size; | |
346 | { | |
736d20f2 DG |
347 | sw_pager_t swp = (sw_pager_t) pager->pg_data; |
348 | vm_offset_t i; | |
349 | int s; | |
350 | ||
351 | s = splbio(); | |
55768178 DG |
352 | for (i = start; i < round_page(start + size - 1); i += NBPG) { |
353 | int *addr = swap_pager_diskaddr(swp, i, 0); | |
354 | if (addr && *addr != SWB_EMPTY) { | |
736d20f2 DG |
355 | rlist_free(&swapmap, *addr, *addr + btodb(NBPG) - 1); |
356 | *addr = SWB_EMPTY; | |
55768178 | 357 | swap_pager_full = 0; |
736d20f2 DG |
358 | } |
359 | } | |
360 | splx(s); | |
361 | } | |
362 | ||
55768178 DG |
363 | /* |
364 | * swap_pager_reclaim frees up over-allocated space from all pagers | |
365 | * this eliminates internal fragmentation due to allocation of space | |
366 | * for segments that are never swapped to. It has been written so that | |
367 | * it does not block until the rlist_free operation occurs; it keeps | |
368 | * the queues consistant. | |
369 | */ | |
736d20f2 DG |
370 | |
371 | /* | |
55768178 | 372 | * Maximum number of blocks (pages) to reclaim per pass |
736d20f2 | 373 | */ |
55768178 DG |
374 | #define MAXRECLAIM 256 |
375 | ||
376 | void | |
377 | swap_pager_reclaim() | |
378 | { | |
379 | vm_pager_t p; | |
380 | sw_pager_t swp; | |
381 | int i, j, k; | |
382 | int s; | |
383 | int reclaimcount; | |
384 | static int reclaims[MAXRECLAIM]; | |
385 | static int in_reclaim; | |
386 | ||
ce619eaa DG |
387 | /* |
388 | * allow only one process to be in the swap_pager_reclaim subroutine | |
389 | */ | |
55768178 DG |
390 | s = splbio(); |
391 | if (in_reclaim) { | |
392 | tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0); | |
393 | splx(s); | |
394 | return; | |
395 | } | |
396 | in_reclaim = 1; | |
397 | reclaimcount = 0; | |
398 | ||
399 | /* for each pager queue */ | |
400 | for (k = 0; swp_qs[k]; k++) { | |
401 | ||
402 | p = (vm_pager_t) queue_first(swp_qs[k]); | |
403 | while (reclaimcount < MAXRECLAIM && | |
404 | !queue_end(swp_qs[k], (queue_entry_t) p)) { | |
405 | ||
406 | /* | |
407 | * see if any blocks associated with a pager has been | |
408 | * allocated but not used (written) | |
409 | */ | |
410 | swp = (sw_pager_t) p->pg_data; | |
411 | for (i = 0; i < swp->sw_nblocks; i++) { | |
412 | sw_blk_t swb = &swp->sw_blocks[i]; | |
413 | for (j = 0; j < SWB_NPAGES; j++) { | |
414 | if (swb->swb_block[j] != SWB_EMPTY && | |
415 | (swb->swb_valid & (1 << j)) == 0) { | |
416 | reclaims[reclaimcount++] = swb->swb_block[j]; | |
417 | swb->swb_block[j] = SWB_EMPTY; | |
418 | if (reclaimcount >= MAXRECLAIM) | |
419 | goto rfinished; | |
420 | } | |
421 | } | |
422 | } | |
423 | p = (vm_pager_t) queue_next(&p->pg_list); | |
424 | } | |
425 | } | |
426 | ||
427 | rfinished: | |
428 | ||
429 | /* | |
430 | * free the blocks that have been added to the reclaim list | |
431 | */ | |
432 | for (i = 0; i < reclaimcount; i++) { | |
433 | rlist_free(&swapmap, reclaims[i], reclaims[i] + btodb(NBPG) - 1); | |
434 | wakeup((caddr_t) &in_reclaim); | |
435 | swap_pager_full = 0; | |
436 | } | |
437 | ||
438 | splx(s); | |
439 | in_reclaim = 0; | |
440 | wakeup((caddr_t) &in_reclaim); | |
441 | } | |
442 | ||
443 | ||
444 | /* | |
445 | * swap_pager_copy copies blocks from one pager to another and | |
446 | * destroys the source pager | |
447 | */ | |
448 | ||
449 | void | |
450 | swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) | |
451 | vm_pager_t srcpager; | |
452 | vm_offset_t srcoffset; | |
453 | vm_pager_t dstpager; | |
454 | vm_offset_t dstoffset; | |
455 | vm_offset_t offset; | |
456 | { | |
736d20f2 | 457 | sw_pager_t srcswp, dstswp; |
55768178 | 458 | vm_offset_t i; |
736d20f2 DG |
459 | int s; |
460 | ||
461 | srcswp = (sw_pager_t) srcpager->pg_data; | |
462 | dstswp = (sw_pager_t) dstpager->pg_data; | |
463 | ||
ce619eaa DG |
464 | /* |
465 | * remove the source pager from the swap_pager internal queue | |
466 | */ | |
736d20f2 | 467 | s = splbio(); |
736d20f2 DG |
468 | if (srcswp->sw_flags & SW_NAMED) { |
469 | queue_remove(&swap_pager_list, srcpager, vm_pager_t, pg_list); | |
470 | srcswp->sw_flags &= ~SW_NAMED; | |
55768178 DG |
471 | } else { |
472 | queue_remove(&swap_pager_un_list, srcpager, vm_pager_t, pg_list); | |
736d20f2 DG |
473 | } |
474 | ||
55768178 DG |
475 | while (srcswp->sw_poip) { |
476 | tsleep((caddr_t)srcswp, PVM, "spgout", 0); | |
736d20f2 DG |
477 | } |
478 | splx(s); | |
479 | ||
ce619eaa DG |
480 | /* |
481 | * clean all of the pages that are currently active and finished | |
482 | */ | |
736d20f2 DG |
483 | (void) swap_pager_clean(NULL, B_WRITE); |
484 | ||
485 | s = splbio(); | |
486 | /* | |
487 | * clear source block before destination object | |
ce619eaa | 488 | * (release allocated space) |
736d20f2 | 489 | */ |
55768178 DG |
490 | for (i = 0; i < offset + srcoffset; i += NBPG) { |
491 | int *addr = swap_pager_diskaddr(srcswp, i, 0); | |
492 | if (addr && *addr != SWB_EMPTY) { | |
736d20f2 | 493 | rlist_free(&swapmap, *addr, *addr + btodb(NBPG) - 1); |
55768178 DG |
494 | *addr = SWB_EMPTY; |
495 | swap_pager_full = 0; | |
496 | } | |
736d20f2 DG |
497 | } |
498 | /* | |
499 | * transfer source to destination | |
500 | */ | |
55768178 | 501 | for (i = 0; i < dstswp->sw_osize; i += NBPG) { |
736d20f2 | 502 | int srcvalid, dstvalid; |
55768178 | 503 | int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset, |
736d20f2 DG |
504 | &srcvalid); |
505 | int *dstaddrp; | |
ce619eaa DG |
506 | /* |
507 | * see if the source has space allocated | |
508 | */ | |
55768178 | 509 | if (srcaddrp && *srcaddrp != SWB_EMPTY) { |
ce619eaa DG |
510 | /* |
511 | * if the source is valid and the dest has no space, then | |
512 | * copy the allocation from the srouce to the dest. | |
513 | */ | |
55768178 DG |
514 | if (srcvalid) { |
515 | dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid); | |
ce619eaa DG |
516 | /* |
517 | * if the dest already has a valid block, deallocate the | |
518 | * source block without copying. | |
519 | */ | |
55768178 | 520 | if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { |
736d20f2 DG |
521 | rlist_free(&swapmap, *dstaddrp, *dstaddrp + btodb(NBPG) - 1); |
522 | *dstaddrp = SWB_EMPTY; | |
55768178 | 523 | swap_pager_full = 0; |
736d20f2 | 524 | } |
55768178 | 525 | if (dstaddrp && *dstaddrp == SWB_EMPTY) { |
736d20f2 DG |
526 | *dstaddrp = *srcaddrp; |
527 | *srcaddrp = SWB_EMPTY; | |
55768178 | 528 | swap_pager_setvalid(dstswp, i + dstoffset, 1); |
736d20f2 DG |
529 | } |
530 | } | |
ce619eaa DG |
531 | /* |
532 | * if the source is not empty at this point, then deallocate the space. | |
533 | */ | |
55768178 | 534 | if (*srcaddrp != SWB_EMPTY) { |
736d20f2 | 535 | rlist_free(&swapmap, *srcaddrp, *srcaddrp + btodb(NBPG) - 1); |
55768178 DG |
536 | *srcaddrp = SWB_EMPTY; |
537 | swap_pager_full = 0; | |
538 | } | |
736d20f2 DG |
539 | } |
540 | } | |
541 | ||
542 | /* | |
543 | * deallocate the rest of the source object | |
544 | */ | |
55768178 DG |
545 | for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += NBPG) { |
546 | int *srcaddrp = swap_pager_diskaddr(srcswp, i, 0); | |
547 | if (srcaddrp && *srcaddrp != SWB_EMPTY) { | |
736d20f2 | 548 | rlist_free(&swapmap, *srcaddrp, *srcaddrp + btodb(NBPG) - 1); |
55768178 DG |
549 | *srcaddrp = SWB_EMPTY; |
550 | swap_pager_full = 0; | |
551 | } | |
736d20f2 DG |
552 | } |
553 | ||
554 | splx(s); | |
555 | ||
556 | free((caddr_t)srcswp->sw_blocks, M_VMPGDATA); | |
55768178 | 557 | srcswp->sw_blocks = 0; |
736d20f2 | 558 | free((caddr_t)srcswp, M_VMPGDATA); |
55768178 | 559 | srcpager->pg_data = 0; |
736d20f2 DG |
560 | free((caddr_t)srcpager, M_VMPAGER); |
561 | ||
55768178 | 562 | return; |
736d20f2 DG |
563 | } |
564 | ||
565 | ||
15637ed4 RG |
566 | void |
567 | swap_pager_dealloc(pager) | |
568 | vm_pager_t pager; | |
569 | { | |
736d20f2 | 570 | register int i,j; |
15637ed4 RG |
571 | register sw_blk_t bp; |
572 | register sw_pager_t swp; | |
15637ed4 RG |
573 | int s; |
574 | ||
15637ed4 RG |
575 | /* |
576 | * Remove from list right away so lookups will fail if we | |
577 | * block for pageout completion. | |
578 | */ | |
736d20f2 | 579 | s = splbio(); |
15637ed4 RG |
580 | swp = (sw_pager_t) pager->pg_data; |
581 | if (swp->sw_flags & SW_NAMED) { | |
582 | queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list); | |
583 | swp->sw_flags &= ~SW_NAMED; | |
55768178 DG |
584 | } else { |
585 | queue_remove(&swap_pager_un_list, pager, vm_pager_t, pg_list); | |
15637ed4 | 586 | } |
15637ed4 RG |
587 | /* |
588 | * Wait for all pageouts to finish and remove | |
589 | * all entries from cleaning list. | |
590 | */ | |
736d20f2 | 591 | |
55768178 DG |
592 | while (swp->sw_poip) { |
593 | tsleep((caddr_t)swp, PVM, "swpout", 0); | |
15637ed4 RG |
594 | } |
595 | splx(s); | |
736d20f2 DG |
596 | |
597 | ||
15637ed4 RG |
598 | (void) swap_pager_clean(NULL, B_WRITE); |
599 | ||
600 | /* | |
601 | * Free left over swap blocks | |
602 | */ | |
603 | s = splbio(); | |
736d20f2 | 604 | for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { |
55768178 | 605 | for (j = 0; j < SWB_NPAGES; j++) |
736d20f2 DG |
606 | if (bp->swb_block[j] != SWB_EMPTY) { |
607 | rlist_free(&swapmap, (unsigned)bp->swb_block[j], | |
608 | (unsigned)bp->swb_block[j] + btodb(NBPG) - 1); | |
55768178 DG |
609 | bp->swb_block[j] = SWB_EMPTY; |
610 | swap_pager_full = 0; | |
15637ed4 | 611 | } |
736d20f2 | 612 | } |
15637ed4 | 613 | splx(s); |
55768178 | 614 | |
15637ed4 RG |
615 | /* |
616 | * Free swap management resources | |
617 | */ | |
618 | free((caddr_t)swp->sw_blocks, M_VMPGDATA); | |
55768178 | 619 | swp->sw_blocks = 0; |
15637ed4 | 620 | free((caddr_t)swp, M_VMPGDATA); |
55768178 | 621 | pager->pg_data = 0; |
15637ed4 | 622 | free((caddr_t)pager, M_VMPAGER); |
736d20f2 DG |
623 | } |
624 | ||
ce619eaa DG |
625 | /* |
626 | * swap_pager_getmulti can get multiple pages. | |
627 | */ | |
736d20f2 DG |
628 | int |
629 | swap_pager_getmulti(pager, m, count, reqpage, sync) | |
630 | vm_pager_t pager; | |
631 | vm_page_t *m; | |
632 | int count; | |
633 | int reqpage; | |
634 | boolean_t sync; | |
635 | { | |
55768178 | 636 | return swap_pager_io((sw_pager_t) pager->pg_data, m, count, reqpage, B_READ); |
15637ed4 RG |
637 | } |
638 | ||
ce619eaa DG |
639 | /* |
640 | * swap_pager_getpage gets individual pages | |
641 | */ | |
4c45483e | 642 | int |
15637ed4 RG |
643 | swap_pager_getpage(pager, m, sync) |
644 | vm_pager_t pager; | |
645 | vm_page_t m; | |
646 | boolean_t sync; | |
647 | { | |
736d20f2 DG |
648 | vm_page_t marray[1]; |
649 | ||
650 | marray[0] = m; | |
651 | return swap_pager_io((sw_pager_t)pager->pg_data, marray, 1, 0, B_READ); | |
15637ed4 RG |
652 | } |
653 | ||
ce619eaa DG |
654 | /* |
655 | * swap_pager_putpage writes individual pages | |
656 | */ | |
4c45483e | 657 | int |
15637ed4 RG |
658 | swap_pager_putpage(pager, m, sync) |
659 | vm_pager_t pager; | |
660 | vm_page_t m; | |
661 | boolean_t sync; | |
662 | { | |
663 | int flags; | |
736d20f2 DG |
664 | vm_page_t marray[1]; |
665 | ||
15637ed4 | 666 | |
15637ed4 RG |
667 | if (pager == NULL) { |
668 | (void) swap_pager_clean(NULL, B_WRITE); | |
736d20f2 | 669 | return VM_PAGER_OK; |
15637ed4 | 670 | } |
736d20f2 DG |
671 | |
672 | marray[0] = m; | |
15637ed4 RG |
673 | flags = B_WRITE; |
674 | if (!sync) | |
675 | flags |= B_ASYNC; | |
736d20f2 | 676 | return(swap_pager_io((sw_pager_t)pager->pg_data, marray, 1, 0, flags)); |
15637ed4 RG |
677 | } |
678 | ||
736d20f2 | 679 | static inline int |
55768178 | 680 | swap_pager_block_index(swp, offset) |
736d20f2 DG |
681 | sw_pager_t swp; |
682 | vm_offset_t offset; | |
683 | { | |
55768178 | 684 | return (offset / (SWB_NPAGES*NBPG)); |
736d20f2 DG |
685 | } |
686 | ||
687 | static inline int | |
55768178 | 688 | swap_pager_block_offset(swp, offset) |
736d20f2 DG |
689 | sw_pager_t swp; |
690 | vm_offset_t offset; | |
691 | { | |
55768178 | 692 | return (offset % (SWB_NPAGES*NBPG)); |
736d20f2 DG |
693 | } |
694 | ||
ce619eaa DG |
695 | /* |
696 | * _swap_pager_haspage returns TRUE if the pager has data that has | |
697 | * been written out. | |
698 | */ | |
736d20f2 DG |
699 | static boolean_t |
700 | _swap_pager_haspage(swp, offset) | |
701 | sw_pager_t swp; | |
15637ed4 RG |
702 | vm_offset_t offset; |
703 | { | |
15637ed4 RG |
704 | register sw_blk_t swb; |
705 | int ix; | |
706 | ||
736d20f2 | 707 | ix = offset / (SWB_NPAGES*NBPG); |
15637ed4 | 708 | if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { |
15637ed4 RG |
709 | return(FALSE); |
710 | } | |
711 | swb = &swp->sw_blocks[ix]; | |
736d20f2 | 712 | ix = (offset % (SWB_NPAGES*NBPG)) / NBPG; |
55768178 DG |
713 | if (swb->swb_block[ix] != SWB_EMPTY) { |
714 | if (swb->swb_valid & (1 << ix)) | |
736d20f2 DG |
715 | return TRUE; |
716 | } | |
717 | ||
15637ed4 RG |
718 | return(FALSE); |
719 | } | |
720 | ||
ce619eaa DG |
721 | /* |
722 | * swap_pager_haspage is the externally accessible version of | |
723 | * _swap_pager_haspage above. this routine takes a vm_pager_t | |
724 | * for an argument instead of sw_pager_t. | |
725 | */ | |
736d20f2 DG |
726 | boolean_t |
727 | swap_pager_haspage(pager, offset) | |
728 | vm_pager_t pager; | |
729 | vm_offset_t offset; | |
730 | { | |
55768178 | 731 | return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset); |
736d20f2 DG |
732 | } |
733 | ||
ce619eaa DG |
734 | /* |
735 | * swap_pager_freepage is a convienience routine that clears the busy | |
736 | * bit and deallocates a page. | |
737 | */ | |
736d20f2 | 738 | static void |
55768178 DG |
739 | swap_pager_freepage(m) |
740 | vm_page_t m; | |
741 | { | |
736d20f2 DG |
742 | PAGE_WAKEUP(m); |
743 | vm_page_free(m); | |
744 | } | |
745 | ||
ce619eaa DG |
746 | /* |
747 | * swap_pager_ridpages is a convienience routine that deallocates all | |
748 | * but the required page. this is usually used in error returns that | |
749 | * need to invalidate the "extra" readahead pages. | |
750 | */ | |
736d20f2 | 751 | static void |
55768178 DG |
752 | swap_pager_ridpages(m, count, reqpage) |
753 | vm_page_t *m; | |
754 | int count; | |
755 | int reqpage; | |
756 | { | |
736d20f2 | 757 | int i; |
55768178 DG |
758 | int s; |
759 | ||
55768178 DG |
760 | for (i = 0; i < count; i++) |
761 | if (i != reqpage) | |
762 | swap_pager_freepage(m[i]); | |
736d20f2 DG |
763 | } |
764 | ||
765 | int swapwritecount=0; | |
766 | ||
ce619eaa DG |
767 | /* |
768 | * swap_pager_iodone1 is the completion routine for both reads and async writes | |
769 | */ | |
736d20f2 | 770 | void |
55768178 DG |
771 | swap_pager_iodone1(bp) |
772 | struct buf *bp; | |
773 | { | |
736d20f2 DG |
774 | bp->b_flags |= B_DONE; |
775 | bp->b_flags &= ~B_ASYNC; | |
776 | wakeup((caddr_t)bp); | |
55768178 | 777 | if ((bp->b_flags & B_READ) == 0) |
736d20f2 DG |
778 | vwakeup(bp); |
779 | } | |
15637ed4 RG |
780 | /* |
781 | * Scaled down version of swap(). | |
15637ed4 RG |
782 | * BOGUS: lower level IO routines expect a KVA so we have to map our |
783 | * provided physical page into the KVA to keep them happy. | |
784 | */ | |
4c45483e | 785 | int |
736d20f2 | 786 | swap_pager_io(swp, m, count, reqpage, flags) |
15637ed4 | 787 | register sw_pager_t swp; |
736d20f2 DG |
788 | vm_page_t *m; |
789 | int count, reqpage; | |
15637ed4 RG |
790 | int flags; |
791 | { | |
792 | register struct buf *bp; | |
793 | register sw_blk_t swb; | |
794 | register int s; | |
55768178 | 795 | int i, ix; |
15637ed4 RG |
796 | boolean_t rv; |
797 | vm_offset_t kva, off; | |
798 | swp_clean_t spc; | |
736d20f2 DG |
799 | int cluster; |
800 | vm_offset_t paging_offset; | |
801 | vm_object_t object; | |
55768178 | 802 | int reqaddr, mydskregion; |
736d20f2 | 803 | extern int dmmin, dmmax; |
15637ed4 | 804 | |
736d20f2 DG |
805 | spc = NULL; |
806 | ||
736d20f2 DG |
807 | object = m[reqpage]->object; |
808 | paging_offset = object->paging_offset; | |
809 | /* | |
810 | * First determine if the page exists in the pager if this is | |
811 | * a sync read. This quickly handles cases where we are | |
812 | * following shadow chains looking for the top level object | |
813 | * with the page. | |
814 | */ | |
815 | off = m[reqpage]->offset + paging_offset; | |
55768178 | 816 | ix = swap_pager_block_index(swp, off); |
736d20f2 DG |
817 | if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { |
818 | /* printf("swap pager: out of range\n"); */ | |
55768178 | 819 | swap_pager_ridpages(m, count, reqpage); |
736d20f2 DG |
820 | return(VM_PAGER_FAIL); |
821 | } | |
822 | ||
823 | ||
824 | swb = &swp->sw_blocks[ix]; | |
825 | off = swap_pager_block_offset(swp, off) / NBPG; | |
736d20f2 DG |
826 | reqaddr = swb->swb_block[off]; |
827 | ||
828 | /* make sure that our I/O request is contiguous */ | |
55768178 DG |
829 | if (flags & B_READ) { |
830 | int first = 0, last = count; | |
736d20f2 DG |
831 | int failed = 0; |
832 | int reqdskregion = reqaddr / dmmax; | |
55768178 DG |
833 | int valid; |
834 | ||
835 | if (reqaddr == SWB_EMPTY || | |
836 | (swb->swb_valid & (1 << off)) == 0) { | |
837 | swap_pager_ridpages(m, count, reqpage); | |
838 | return(VM_PAGER_FAIL); | |
839 | } | |
840 | ||
841 | /* | |
842 | * search backwards for the first contiguous page to transfer | |
843 | */ | |
844 | for (i = reqpage - 1; i >= 0; --i) { | |
845 | int *tmpaddr = swap_pager_diskaddr(swp, | |
846 | m[i]->offset + paging_offset,&valid); | |
847 | if (tmpaddr == 0 || failed || !valid || | |
848 | *tmpaddr != reqaddr + btodb((i - reqpage) * NBPG)) { | |
736d20f2 DG |
849 | failed = 1; |
850 | swap_pager_freepage(m[i]); | |
851 | m[i] = 0; | |
55768178 | 852 | if (first == 0) |
736d20f2 DG |
853 | first = i + 1; |
854 | } else { | |
855 | mydskregion = *tmpaddr / dmmax; | |
55768178 | 856 | if (mydskregion != reqdskregion) { |
736d20f2 DG |
857 | failed = 1; |
858 | swap_pager_freepage(m[i]); | |
859 | m[i] = 0; | |
860 | first = i + 1; | |
861 | } | |
862 | } | |
863 | } | |
55768178 DG |
864 | /* |
865 | * search forwards for the last contiguous page to transfer | |
866 | */ | |
736d20f2 | 867 | failed = 0; |
55768178 | 868 | for (i = reqpage + 1; i < count; i++) { |
0eefc4c9 | 869 | int *tmpaddr = swap_pager_diskaddr(swp, m[i]->offset + paging_offset,&valid); |
55768178 DG |
870 | if (tmpaddr == 0 || failed || !valid || |
871 | *tmpaddr != reqaddr + btodb((i - reqpage) * NBPG) ) { | |
736d20f2 DG |
872 | failed = 1; |
873 | swap_pager_freepage(m[i]); | |
874 | m[i] = 0; | |
55768178 | 875 | if (last == count) |
736d20f2 DG |
876 | last = i; |
877 | } else { | |
878 | mydskregion = *tmpaddr / dmmax; | |
55768178 | 879 | if (mydskregion != reqdskregion) { |
736d20f2 DG |
880 | failed = 1; |
881 | swap_pager_freepage(m[i]); | |
882 | m[i] = 0; | |
55768178 | 883 | if (last == count) |
736d20f2 DG |
884 | last = i; |
885 | } | |
886 | } | |
887 | } | |
888 | count = last; | |
55768178 DG |
889 | if (first != 0) { |
890 | for (i = first; i < count; i++) { | |
891 | m[i - first] = m[i]; | |
736d20f2 DG |
892 | } |
893 | count -= first; | |
894 | reqpage -= first; | |
895 | } | |
896 | } | |
ce619eaa DG |
897 | |
898 | /* | |
899 | * at this point: | |
900 | * "m" is a pointer to the array of vm_page_t for paging I/O | |
901 | * "count" is the number of vm_page_t entries represented by "m" | |
902 | * "object" is the vm_object_t for I/O | |
903 | * "reqpage" is the index into "m" for the page actually faulted | |
904 | */ | |
736d20f2 | 905 | |
15637ed4 RG |
906 | /* |
907 | * For reads (pageins) and synchronous writes, we clean up | |
908 | * all completed async pageouts. | |
909 | */ | |
910 | if ((flags & B_ASYNC) == 0) { | |
736d20f2 | 911 | swap_pager_clean(NULL, flags); |
15637ed4 RG |
912 | } |
913 | /* | |
914 | * For async writes (pageouts), we cleanup completed pageouts so | |
915 | * that all available resources are freed. Also tells us if this | |
916 | * page is already being cleaned. If it is, or no resources | |
917 | * are available, we try again later. | |
918 | */ | |
736d20f2 | 919 | else if (swap_pager_clean(m[reqpage], B_WRITE)) { |
55768178 DG |
920 | swap_pager_ridpages(m, count, reqpage); |
921 | return VM_PAGER_TRYAGAIN; | |
15637ed4 RG |
922 | } |
923 | ||
736d20f2 DG |
924 | spc = NULL; /* we might not use an spc data structure */ |
925 | kva = 0; | |
926 | ||
15637ed4 | 927 | /* |
55768178 DG |
928 | * we allocate a new kva for transfers > 1 page |
929 | * but for transfers == 1 page, the swap_pager_free list contains | |
ce619eaa | 930 | * entries that have pre-allocated kva's (for efficiency). |
15637ed4 | 931 | */ |
55768178 DG |
932 | if ((flags & B_READ) && count > 1) { |
933 | kva = kmem_alloc_pageable(pager_map, count*NBPG); | |
934 | } | |
935 | ||
ce619eaa | 936 | |
55768178 | 937 | if (!kva) { |
ce619eaa DG |
938 | /* |
939 | * if a kva has not been allocated, we can only do a one page transfer, | |
940 | * so we free the other pages that might have been allocated by vm_fault. | |
941 | */ | |
55768178 DG |
942 | for (i = 0; i < count; i++) { |
943 | if (i != reqpage) { | |
944 | swap_pager_freepage(m[i]); | |
736d20f2 DG |
945 | m[i] = 0; |
946 | } | |
15637ed4 | 947 | } |
736d20f2 DG |
948 | count = 1; |
949 | m[0] = m[reqpage]; | |
950 | reqpage = 0; | |
55768178 DG |
951 | /* |
952 | * get a swap pager clean data structure, block until we get it | |
953 | */ | |
954 | if (queue_empty(&swap_pager_free)) { | |
a200ca2b DG |
955 | /* |
956 | if ((flags & (B_ASYNC|B_READ)) == B_ASYNC) | |
957 | return VM_PAGER_TRYAGAIN; | |
958 | */ | |
55768178 | 959 | s = splbio(); |
92c70781 DG |
960 | if( curproc == pageproc) |
961 | (void) swap_pager_clean(NULL, B_WRITE); | |
962 | else | |
963 | wakeup((caddr_t) &vm_pages_needed); | |
55768178 DG |
964 | while (queue_empty(&swap_pager_free)) { |
965 | swap_pager_needflags |= SWAP_FREE_NEEDED; | |
966 | tsleep((caddr_t)&swap_pager_free, | |
967 | PVM, "swpfre", 0); | |
92c70781 DG |
968 | if (curproc == pageproc) |
969 | (void) swap_pager_clean(NULL, B_WRITE); | |
970 | else | |
971 | wakeup((caddr_t) &vm_pages_needed); | |
55768178 DG |
972 | } |
973 | splx(s); | |
974 | } | |
975 | queue_remove_first(&swap_pager_free, spc, swp_clean_t, spc_list); | |
736d20f2 | 976 | kva = spc->spc_kva; |
15637ed4 | 977 | } |
736d20f2 | 978 | |
15637ed4 RG |
979 | |
980 | /* | |
736d20f2 | 981 | * Determine swap block and allocate as necessary. |
55768178 DG |
982 | * We try to get SWB_NPAGES first, but then we punt and try |
983 | * to get one page. If that fails, we look at the allocation | |
984 | * data structures to find unused but allocated pages in other | |
985 | * pagers allocations. | |
15637ed4 | 986 | */ |
736d20f2 DG |
987 | if (reqaddr == SWB_EMPTY) { |
988 | int blk; | |
55768178 DG |
989 | int tries; |
990 | int ntoget; | |
991 | ||
992 | tries = 0; | |
993 | s = splbio(); | |
ce619eaa DG |
994 | /* |
995 | * if any other pages have been allocated in this block, we | |
996 | * only try to get one page. | |
997 | */ | |
55768178 DG |
998 | for (i = 0; i < SWB_NPAGES; i++) { |
999 | if (swb->swb_block[i] != SWB_EMPTY) | |
736d20f2 | 1000 | break; |
55768178 DG |
1001 | } |
1002 | ||
1003 | ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; | |
1004 | retrygetspace: | |
1005 | if (ntoget == SWB_NPAGES && | |
1006 | rlist_alloc(&swapmap, btodb(ntoget * NBPG),&blk)) { | |
1007 | for (i = 0; i < ntoget; i++) | |
1008 | swb->swb_block[i] = blk + btodb(NBPG) * i; | |
1009 | } else if (!rlist_alloc(&swapmap, btodb(NBPG), &swb->swb_block[off])) { | |
ce619eaa DG |
1010 | /* |
1011 | * if the allocation has failed, we try to reclaim space and | |
1012 | * retry. | |
1013 | */ | |
55768178 DG |
1014 | if (++tries == 1) { |
1015 | swap_pager_reclaim(); | |
1016 | goto retrygetspace; | |
1017 | } | |
ce619eaa DG |
1018 | /* |
1019 | * here on swap space full. | |
1020 | */ | |
55768178 | 1021 | if (spc) |
736d20f2 | 1022 | queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); |
55768178 | 1023 | if (swap_pager_full == 0) |
736d20f2 DG |
1024 | printf("swap_pager: out of swap space !!!\n"); |
1025 | swap_pager_full = 1; | |
55768178 DG |
1026 | swap_pager_ridpages(m, count, reqpage); |
1027 | splx(s); | |
1028 | return(VM_PAGER_TRYAGAIN); | |
736d20f2 | 1029 | } |
55768178 | 1030 | splx(s); |
736d20f2 DG |
1031 | swap_pager_full = 0; |
1032 | } | |
15637ed4 | 1033 | |
ce619eaa DG |
1034 | /* |
1035 | * map our page(s) into kva for I/O | |
1036 | */ | |
55768178 DG |
1037 | for (i = 0; i < count; i++) { |
1038 | pmap_enter(vm_map_pmap(pager_map), kva + NBPG * i, | |
736d20f2 DG |
1039 | VM_PAGE_TO_PHYS(m[i]), VM_PROT_ALL, TRUE); |
1040 | } | |
1041 | ||
ce619eaa DG |
1042 | |
1043 | /* | |
1044 | * get the base I/O offset into the swap file | |
1045 | */ | |
55768178 | 1046 | off = swap_pager_block_offset(swp, m[0]->offset + paging_offset) / NBPG; |
736d20f2 | 1047 | |
55768178 DG |
1048 | #ifdef DEBUG |
1049 | if (flags & B_READ && count > 1) | |
736d20f2 DG |
1050 | printf("obj: 0x%x off: 0x%x poff: 0x%x off: 0x%x, sz: %d blk: %d op: %s\n", |
1051 | object, m[0]->offset, paging_offset, off, count, swb->swb_block[off], flags&B_READ?"r":"w"); | |
55768178 | 1052 | #endif |
736d20f2 DG |
1053 | |
1054 | s = splbio(); | |
15637ed4 RG |
1055 | /* |
1056 | * Get a swap buffer header and perform the IO | |
1057 | */ | |
b7ae9810 DG |
1058 | if (spc) { |
1059 | bp = spc->spc_bp; | |
1060 | bzero(bp, sizeof *bp); | |
1061 | bp->b_spc = spc; | |
1062 | } else { | |
1063 | bp = getpbuf(); | |
1064 | } | |
15637ed4 RG |
1065 | bp->b_flags = B_BUSY | (flags & B_READ); |
1066 | bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ | |
597dcc29 | 1067 | bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; |
736d20f2 DG |
1068 | bp->b_un.b_addr = (caddr_t) kva; |
1069 | bp->b_blkno = swb->swb_block[off]; | |
15637ed4 RG |
1070 | VHOLD(swapdev_vp); |
1071 | bp->b_vp = swapdev_vp; | |
1072 | if (swapdev_vp->v_type == VBLK) | |
1073 | bp->b_dev = swapdev_vp->v_rdev; | |
736d20f2 | 1074 | bp->b_bcount = NBPG*count; |
15637ed4 RG |
1075 | if ((bp->b_flags & B_READ) == 0) |
1076 | swapdev_vp->v_numoutput++; | |
1077 | ||
1078 | /* | |
1079 | * If this is an async write we set up additional buffer fields | |
1080 | * and place a "cleaning" entry on the inuse queue. | |
1081 | */ | |
1082 | if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { | |
736d20f2 | 1083 | spc->spc_flags = 0; |
15637ed4 | 1084 | spc->spc_swp = swp; |
736d20f2 | 1085 | spc->spc_m = m[reqpage]; |
ce619eaa DG |
1086 | /* |
1087 | * the completion routine for async writes | |
1088 | */ | |
15637ed4 RG |
1089 | bp->b_flags |= B_CALL; |
1090 | bp->b_iodone = swap_pager_iodone; | |
597dcc29 DG |
1091 | bp->b_dirtyoff = 0; |
1092 | bp->b_dirtyend = bp->b_bcount; | |
15637ed4 RG |
1093 | swp->sw_poip++; |
1094 | queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list); | |
ce619eaa DG |
1095 | /* |
1096 | * we remember that we have used a block for paging. | |
1097 | */ | |
736d20f2 DG |
1098 | swb->swb_valid |= (1 << off); |
1099 | } else { | |
ce619eaa DG |
1100 | /* |
1101 | * here for sync write or any read | |
1102 | */ | |
55768178 | 1103 | if ((flags & B_READ) == 0) { |
ce619eaa DG |
1104 | /* |
1105 | * if we are writing, we remember that we have | |
1106 | * actually used a block for paging. | |
1107 | */ | |
736d20f2 | 1108 | swb->swb_valid |= (1 << off); |
55768178 DG |
1109 | swp->sw_poip++; |
1110 | } else { | |
1111 | swp->sw_piip++; | |
1112 | } | |
ce619eaa DG |
1113 | /* |
1114 | * the completion routine for reads and sync writes | |
1115 | */ | |
736d20f2 DG |
1116 | bp->b_flags |= B_CALL; |
1117 | bp->b_iodone = swap_pager_iodone1; | |
15637ed4 | 1118 | } |
ce619eaa DG |
1119 | /* |
1120 | * perform the I/O | |
1121 | */ | |
15637ed4 | 1122 | VOP_STRATEGY(bp); |
736d20f2 | 1123 | if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) { |
55768178 | 1124 | if ((bp->b_flags & B_DONE) == B_DONE) { |
736d20f2 DG |
1125 | swap_pager_clean(NULL, flags); |
1126 | } | |
1127 | splx(s); | |
15637ed4 RG |
1128 | return(VM_PAGER_PEND); |
1129 | } | |
ce619eaa DG |
1130 | |
1131 | /* | |
1132 | * wait for the sync I/O to complete | |
1133 | */ | |
15637ed4 | 1134 | while ((bp->b_flags & B_DONE) == 0) { |
736d20f2 DG |
1135 | tsleep((caddr_t)bp, PVM, (flags & B_READ)?"swread":"swwrt", 0); |
1136 | } | |
15637ed4 | 1137 | rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; |
736d20f2 | 1138 | bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); |
736d20f2 | 1139 | |
55768178 DG |
1140 | if (bp->b_flags & B_READ) { |
1141 | --swp->sw_piip; | |
1142 | if (swp->sw_piip == 0) | |
1143 | wakeup((caddr_t) swp); | |
1144 | } else { | |
1145 | --swp->sw_poip; | |
1146 | if (swp->sw_poip == 0) | |
1147 | wakeup((caddr_t) swp); | |
1148 | } | |
1149 | ||
736d20f2 DG |
1150 | if (bp->b_vp) |
1151 | brelvp(bp); | |
1152 | ||
ce619eaa DG |
1153 | /* |
1154 | * release the physical I/O buffer | |
1155 | */ | |
b7ae9810 DG |
1156 | if (!spc) |
1157 | relpbuf(bp); | |
55768178 | 1158 | |
736d20f2 DG |
1159 | splx(s); |
1160 | ||
ce619eaa DG |
1161 | /* |
1162 | * remove the mapping for kernel virtual | |
1163 | */ | |
55768178 | 1164 | pmap_remove(vm_map_pmap(pager_map), kva, kva + count * NBPG); |
736d20f2 | 1165 | |
ce619eaa DG |
1166 | /* |
1167 | * if we have written the page, then indicate that the page | |
1168 | * is clean. | |
1169 | */ | |
15637ed4 | 1170 | if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) { |
736d20f2 DG |
1171 | m[reqpage]->flags |= PG_CLEAN; |
1172 | pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage])); | |
ce619eaa DG |
1173 | /* |
1174 | * optimization, if a page has been read during the | |
1175 | * pageout process, we activate it. | |
1176 | */ | |
a200ca2b DG |
1177 | if ( (m[reqpage]->flags & PG_ACTIVE) == 0 && |
1178 | pmap_is_referenced(VM_PAGE_TO_PHYS(m[reqpage]))) | |
55768178 | 1179 | vm_page_activate(m[reqpage]); |
15637ed4 | 1180 | } |
736d20f2 | 1181 | |
55768178 | 1182 | if (spc) { |
ce619eaa DG |
1183 | /* |
1184 | * if we have used an spc, we need to free it. | |
1185 | */ | |
736d20f2 DG |
1186 | queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); |
1187 | } else { | |
55768178 | 1188 | for (i = 0; i < count; i++) { |
736d20f2 | 1189 | pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); |
736d20f2 DG |
1190 | m[i]->flags |= PG_CLEAN; |
1191 | m[i]->flags &= ~PG_LAUNDRY; | |
55768178 | 1192 | if (i != reqpage) { |
736d20f2 DG |
1193 | /* |
1194 | * whether or not to leave the page activated | |
1195 | * is up in the air, but we should put the page | |
1196 | * on a page queue somewhere. (it already is in | |
1197 | * the object). | |
a200ca2b DG |
1198 | * After some emperical results, it is best |
1199 | * to deactivate the readahead pages. | |
736d20f2 | 1200 | */ |
a200ca2b | 1201 | vm_page_deactivate(m[i]); |
55768178 | 1202 | |
736d20f2 DG |
1203 | /* |
1204 | * just in case someone was asking for this | |
1205 | * page we now tell them that it is ok to use | |
1206 | */ | |
55768178 | 1207 | m[i]->flags &= ~PG_FAKE; |
736d20f2 DG |
1208 | PAGE_WAKEUP(m[i]); |
1209 | } | |
1210 | } | |
1211 | /* | |
1212 | * and free the kernel virtual addresses | |
1213 | */ | |
55768178 | 1214 | kmem_free_wakeup(pager_map, kva, count * NBPG); |
736d20f2 | 1215 | } |
15637ed4 RG |
1216 | return(rv); |
1217 | } | |
1218 | ||
1219 | boolean_t | |
1220 | swap_pager_clean(m, rw) | |
1221 | vm_page_t m; | |
1222 | int rw; | |
1223 | { | |
1224 | register swp_clean_t spc, tspc; | |
1225 | register int s; | |
1226 | ||
15637ed4 | 1227 | tspc = NULL; |
55768178 | 1228 | if (queue_empty(&swap_pager_done)) |
736d20f2 | 1229 | return FALSE; |
15637ed4 | 1230 | for (;;) { |
736d20f2 | 1231 | s = splbio(); |
15637ed4 | 1232 | /* |
736d20f2 | 1233 | * Look up and removal from done list must be done |
15637ed4 RG |
1234 | * at splbio() to avoid conflicts with swap_pager_iodone. |
1235 | */ | |
736d20f2 DG |
1236 | spc = (swp_clean_t) queue_first(&swap_pager_done); |
1237 | while (!queue_end(&swap_pager_done, (queue_entry_t)spc)) { | |
55768178 | 1238 | pmap_remove(vm_map_pmap(pager_map), spc->spc_kva, ((vm_offset_t) spc->spc_kva) + NBPG); |
736d20f2 | 1239 | swap_pager_finish(spc); |
55768178 | 1240 | queue_remove(&swap_pager_done, spc, swp_clean_t, spc_list); |
736d20f2 | 1241 | goto doclean; |
15637ed4 RG |
1242 | } |
1243 | ||
1244 | /* | |
1245 | * No operations done, thats all we can do for now. | |
1246 | */ | |
736d20f2 | 1247 | |
15637ed4 | 1248 | splx(s); |
736d20f2 DG |
1249 | break; |
1250 | ||
15637ed4 RG |
1251 | /* |
1252 | * The desired page was found to be busy earlier in | |
1253 | * the scan but has since completed. | |
1254 | */ | |
736d20f2 | 1255 | doclean: |
15637ed4 | 1256 | if (tspc && tspc == spc) { |
15637ed4 RG |
1257 | tspc = NULL; |
1258 | } | |
736d20f2 | 1259 | spc->spc_flags = 0; |
15637ed4 | 1260 | queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); |
736d20f2 DG |
1261 | ++cleandone; |
1262 | splx(s); | |
15637ed4 | 1263 | } |
15637ed4 | 1264 | |
15637ed4 RG |
1265 | return(tspc ? TRUE : FALSE); |
1266 | } | |
1267 | ||
736d20f2 | 1268 | void |
15637ed4 RG |
1269 | swap_pager_finish(spc) |
1270 | register swp_clean_t spc; | |
1271 | { | |
736d20f2 DG |
1272 | vm_page_t m = spc->spc_m; |
1273 | vm_object_t object = m->object; | |
1274 | extern int vm_pageout_free_min; | |
15637ed4 | 1275 | |
736d20f2 | 1276 | if (--object->paging_in_progress == 0) |
15637ed4 RG |
1277 | thread_wakeup((int) object); |
1278 | ||
15637ed4 RG |
1279 | /* |
1280 | * If no error mark as clean and inform the pmap system. | |
1281 | * If error, mark as dirty so we will try again. | |
1282 | * (XXX could get stuck doing this, should give up after awhile) | |
1283 | */ | |
1284 | if (spc->spc_flags & SPC_ERROR) { | |
1285 | printf("swap_pager_finish: clean of page %x failed\n", | |
736d20f2 DG |
1286 | VM_PAGE_TO_PHYS(m)); |
1287 | m->flags |= PG_LAUNDRY; | |
15637ed4 | 1288 | } else { |
736d20f2 DG |
1289 | pmap_clear_modify(VM_PAGE_TO_PHYS(m)); |
1290 | m->flags |= PG_CLEAN; | |
1291 | } | |
55768178 | 1292 | |
ce619eaa DG |
1293 | /* |
1294 | * if a page has been read during pageout, then | |
1295 | * we activate the page. | |
1296 | */ | |
a200ca2b DG |
1297 | if ((m->flags & PG_ACTIVE) == 0 && |
1298 | pmap_is_referenced(VM_PAGE_TO_PHYS(m))) | |
55768178 DG |
1299 | vm_page_activate(m); |
1300 | ||
ce619eaa DG |
1301 | /* |
1302 | * we wakeup any processes that are waiting on | |
1303 | * this page. | |
1304 | */ | |
736d20f2 DG |
1305 | PAGE_WAKEUP(m); |
1306 | /* | |
1307 | * if we need memory desperately, then free it now | |
1308 | */ | |
55768178 | 1309 | if (vm_page_free_count < vm_page_free_reserved && |
ce619eaa | 1310 | (m->flags & PG_CLEAN) && m->wire_count == 0) { |
55768178 | 1311 | pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); |
736d20f2 | 1312 | vm_page_free(m); |
15637ed4 | 1313 | } |
736d20f2 | 1314 | --nswiodone; |
15637ed4 | 1315 | |
736d20f2 | 1316 | return; |
15637ed4 RG |
1317 | } |
1318 | ||
55768178 DG |
1319 | /* |
1320 | * swap_pager_iodone | |
1321 | */ | |
4c45483e | 1322 | void |
15637ed4 RG |
1323 | swap_pager_iodone(bp) |
1324 | register struct buf *bp; | |
1325 | { | |
1326 | register swp_clean_t spc; | |
1327 | daddr_t blk; | |
55768178 DG |
1328 | int s; |
1329 | ||
1330 | s = splbio(); | |
736d20f2 DG |
1331 | spc = (swp_clean_t) bp->b_spc; |
1332 | queue_remove(&swap_pager_inuse, spc, swp_clean_t, spc_list); | |
1333 | queue_enter(&swap_pager_done, spc, swp_clean_t, spc_list); | |
15637ed4 RG |
1334 | if (bp->b_flags & B_ERROR) { |
1335 | spc->spc_flags |= SPC_ERROR; | |
736d20f2 DG |
1336 | printf("error %d blkno %d sz %d ", |
1337 | bp->b_error, bp->b_blkno, bp->b_bcount); | |
15637ed4 | 1338 | } |
15637ed4 | 1339 | |
55768178 | 1340 | if ((bp->b_flags & B_READ) == 0) |
736d20f2 | 1341 | vwakeup(bp); |
15637ed4 | 1342 | |
736d20f2 DG |
1343 | bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC); |
1344 | if (bp->b_vp) { | |
1345 | brelvp(bp); | |
1346 | } | |
55768178 | 1347 | |
736d20f2 | 1348 | nswiodone++; |
55768178 DG |
1349 | if (--spc->spc_swp->sw_poip == 0) { |
1350 | wakeup((caddr_t)spc->spc_swp); | |
1351 | } | |
1352 | ||
1353 | if ((swap_pager_needflags & SWAP_FREE_NEEDED) || | |
1354 | queue_empty(&swap_pager_inuse)) { | |
1355 | swap_pager_needflags &= ~SWAP_FREE_NEEDED; | |
1356 | wakeup((caddr_t)&swap_pager_free); | |
92c70781 | 1357 | wakeup((caddr_t)&vm_pages_needed); |
55768178 | 1358 | } |
736d20f2 | 1359 | |
55768178 DG |
1360 | if (vm_pageout_pages_needed) { |
1361 | wakeup((caddr_t)&vm_pageout_pages_needed); | |
736d20f2 DG |
1362 | } |
1363 | ||
55768178 DG |
1364 | if (queue_empty(&swap_pager_inuse) || |
1365 | (vm_page_free_count < vm_page_free_min && | |
1366 | nswiodone + vm_page_free_count >= vm_page_free_min) ) { | |
1367 | wakeup((caddr_t)&vm_pages_needed); | |
15637ed4 | 1368 | } |
55768178 | 1369 | splx(s); |
15637ed4 | 1370 | } |
55768178 DG |
1371 | |
1372 | /* | |
1373 | * allocate a physical buffer | |
1374 | */ | |
1375 | struct buf * | |
1376 | getpbuf() { | |
1377 | int s; | |
1378 | struct buf *bp; | |
1379 | ||
1380 | s = splbio(); | |
1381 | /* get a bp from the swap buffer header pool */ | |
1382 | while (bswlist.av_forw == NULL) { | |
1383 | bswlist.b_flags |= B_WANTED; | |
1384 | tsleep((caddr_t)&bswlist, PVM, "wswbuf", 0); | |
1385 | } | |
1386 | bp = bswlist.av_forw; | |
1387 | bswlist.av_forw = bp->av_forw; | |
1388 | ||
1389 | splx(s); | |
1390 | ||
1391 | bzero(bp, sizeof *bp); | |
1392 | return bp; | |
1393 | } | |
1394 | ||
1395 | /* | |
1396 | * release a physical buffer | |
1397 | */ | |
1398 | void | |
1399 | relpbuf(bp) | |
1400 | struct buf *bp; | |
1401 | { | |
1402 | int s; | |
1403 | ||
1404 | s = splbio(); | |
1405 | bp->av_forw = bswlist.av_forw; | |
1406 | bswlist.av_forw = bp; | |
1407 | if (bswlist.b_flags & B_WANTED) { | |
1408 | bswlist.b_flags &= ~B_WANTED; | |
1409 | wakeup((caddr_t)&bswlist); | |
1410 | } | |
1411 | splx(s); | |
1412 | } | |
1413 | ||
a200ca2b DG |
1414 | /* |
1415 | * return true if any swap control structures can be allocated | |
1416 | */ | |
1417 | int | |
1418 | swap_pager_ready() { | |
1419 | if( queue_empty( &swap_pager_free)) | |
1420 | return 0; | |
1421 | else | |
1422 | return 1; | |
1423 | } |