Commit | Line | Data |
---|---|---|
da7c5cc6 KM |
1 | /* |
2 | * Copyright (c) 1982 Regents of the University of California. | |
3 | * All rights reserved. The Berkeley software License Agreement | |
4 | * specifies the terms and conditions for redistribution. | |
5 | * | |
d95fc990 | 6 | * @(#)kern_physio.c 6.9 (Berkeley) %G% |
da7c5cc6 | 7 | */ |
961945a8 SL |
8 | |
9 | #include "../machine/pte.h" | |
663dbc72 | 10 | |
94368568 JB |
11 | #include "param.h" |
12 | #include "systm.h" | |
13 | #include "dir.h" | |
14 | #include "user.h" | |
15 | #include "buf.h" | |
16 | #include "conf.h" | |
17 | #include "proc.h" | |
18 | #include "seg.h" | |
19 | #include "vm.h" | |
20 | #include "trace.h" | |
21 | #include "map.h" | |
22 | #include "uio.h" | |
663dbc72 | 23 | |
663dbc72 BJ |
24 | /* |
25 | * Swap IO headers - | |
26 | * They contain the necessary information for the swap I/O. | |
27 | * At any given time, a swap header can be in three | |
28 | * different lists. When free it is in the free list, | |
29 | * when allocated and the I/O queued, it is on the swap | |
30 | * device list, and finally, if the operation was a dirty | |
31 | * page push, when the I/O completes, it is inserted | |
32 | * in a list of cleaned pages to be processed by the pageout daemon. | |
33 | */ | |
4c05b581 | 34 | struct buf *swbuf; |
663dbc72 | 35 | |
663dbc72 BJ |
36 | /* |
37 | * swap I/O - | |
38 | * | |
39 | * If the flag indicates a dirty page push initiated | |
40 | * by the pageout daemon, we map the page into the i th | |
41 | * virtual page of process 2 (the daemon itself) where i is | |
42 | * the index of the swap header that has been allocated. | |
43 | * We simply initialize the header and queue the I/O but | |
44 | * do not wait for completion. When the I/O completes, | |
45 | * iodone() will link the header to a list of cleaned | |
46 | * pages to be processed by the pageout daemon. | |
47 | */ | |
48 | swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) | |
49 | struct proc *p; | |
50 | swblk_t dblkno; | |
51 | caddr_t addr; | |
39d536e6 | 52 | int nbytes, rdflg, flag; |
663dbc72 | 53 | dev_t dev; |
39d536e6 | 54 | u_int pfcent; |
663dbc72 BJ |
55 | { |
56 | register struct buf *bp; | |
e438ed8e | 57 | register u_int c; |
663dbc72 BJ |
58 | int p2dp; |
59 | register struct pte *dpte, *vpte; | |
530d0032 | 60 | int s; |
d668d9ba | 61 | extern swdone(); |
699e2902 | 62 | int error = 0; |
663dbc72 | 63 | |
d95fc990 | 64 | s = splbio(); |
663dbc72 BJ |
65 | while (bswlist.av_forw == NULL) { |
66 | bswlist.b_flags |= B_WANTED; | |
67 | sleep((caddr_t)&bswlist, PSWP+1); | |
68 | } | |
69 | bp = bswlist.av_forw; | |
70 | bswlist.av_forw = bp->av_forw; | |
530d0032 | 71 | splx(s); |
663dbc72 BJ |
72 | |
73 | bp->b_flags = B_BUSY | B_PHYS | rdflg | flag; | |
74 | if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0) | |
75 | if (rdflg == B_READ) | |
76 | sum.v_pswpin += btoc(nbytes); | |
77 | else | |
78 | sum.v_pswpout += btoc(nbytes); | |
79 | bp->b_proc = p; | |
80 | if (flag & B_DIRTY) { | |
81 | p2dp = ((bp - swbuf) * CLSIZE) * KLMAX; | |
82 | dpte = dptopte(&proc[2], p2dp); | |
83 | vpte = vtopte(p, btop(addr)); | |
84 | for (c = 0; c < nbytes; c += NBPG) { | |
85 | if (vpte->pg_pfnum == 0 || vpte->pg_fod) | |
86 | panic("swap bad pte"); | |
87 | *dpte++ = *vpte++; | |
88 | } | |
d668d9ba SL |
89 | bp->b_un.b_addr = (caddr_t)ctob(dptov(&proc[2], p2dp)); |
90 | bp->b_flags |= B_CALL; | |
91 | bp->b_iodone = swdone; | |
92 | bp->b_pfcent = pfcent; | |
663dbc72 BJ |
93 | } else |
94 | bp->b_un.b_addr = addr; | |
95 | while (nbytes > 0) { | |
e438ed8e BJ |
96 | bp->b_bcount = nbytes; |
97 | minphys(bp); | |
98 | c = bp->b_bcount; | |
663dbc72 BJ |
99 | bp->b_blkno = dblkno; |
100 | bp->b_dev = dev; | |
53f9ca20 BJ |
101 | #ifdef TRACE |
102 | trace(TR_SWAPIO, dev, bp->b_blkno); | |
103 | #endif | |
ca1f746a | 104 | physstrat(bp, bdevsw[major(dev)].d_strategy, PSWP); |
663dbc72 BJ |
105 | if (flag & B_DIRTY) { |
106 | if (c < nbytes) | |
107 | panic("big push"); | |
699e2902 | 108 | return (error); |
663dbc72 | 109 | } |
663dbc72 BJ |
110 | bp->b_un.b_addr += c; |
111 | bp->b_flags &= ~B_DONE; | |
112 | if (bp->b_flags & B_ERROR) { | |
113 | if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE) | |
114 | panic("hard IO err in swap"); | |
d03b3d84 | 115 | swkill(p, "swap: read error from swap device"); |
699e2902 | 116 | error = EIO; |
663dbc72 BJ |
117 | } |
118 | nbytes -= c; | |
919fe934 | 119 | dblkno += btodb(c); |
663dbc72 | 120 | } |
d95fc990 | 121 | s = splbio(); |
663dbc72 BJ |
122 | bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); |
123 | bp->av_forw = bswlist.av_forw; | |
124 | bswlist.av_forw = bp; | |
125 | if (bswlist.b_flags & B_WANTED) { | |
126 | bswlist.b_flags &= ~B_WANTED; | |
127 | wakeup((caddr_t)&bswlist); | |
128 | wakeup((caddr_t)&proc[2]); | |
129 | } | |
530d0032 | 130 | splx(s); |
699e2902 | 131 | return (error); |
663dbc72 BJ |
132 | } |
133 | ||
d668d9ba SL |
134 | /* |
135 | * Put a buffer on the clean list after I/O is done. | |
136 | * Called from biodone. | |
137 | */ | |
138 | swdone(bp) | |
139 | register struct buf *bp; | |
140 | { | |
141 | register int s; | |
142 | ||
143 | if (bp->b_flags & B_ERROR) | |
144 | panic("IO err in push"); | |
d95fc990 | 145 | s = splbio(); |
d668d9ba SL |
146 | bp->av_forw = bclnlist; |
147 | cnt.v_pgout++; | |
148 | cnt.v_pgpgout += bp->b_bcount / NBPG; | |
149 | bclnlist = bp; | |
150 | if (bswlist.b_flags & B_WANTED) | |
151 | wakeup((caddr_t)&proc[2]); | |
152 | splx(s); | |
153 | } | |
154 | ||
663dbc72 BJ |
155 | /* |
156 | * If rout == 0 then killed on swap error, else | |
157 | * rout is the name of the routine where we ran out of | |
158 | * swap space. | |
159 | */ | |
160 | swkill(p, rout) | |
161 | struct proc *p; | |
162 | char *rout; | |
163 | { | |
164 | ||
7cd10076 JB |
165 | printf("pid %d: %s\n", p->p_pid, rout); |
166 | uprintf("sorry, pid %d was killed in %s\n", p->p_pid, rout); | |
663dbc72 BJ |
167 | /* |
168 | * To be sure no looping (e.g. in vmsched trying to | |
169 | * swap out) mark process locked in core (as though | |
170 | * done by user) after killing it so noone will try | |
171 | * to swap it out. | |
172 | */ | |
a30d2e97 | 173 | psignal(p, SIGKILL); |
663dbc72 BJ |
174 | p->p_flag |= SULOCK; |
175 | } | |
176 | ||
663dbc72 BJ |
177 | /* |
178 | * Raw I/O. The arguments are | |
179 | * The strategy routine for the device | |
180 | * A buffer, which will always be a special buffer | |
181 | * header owned exclusively by the device for this purpose | |
182 | * The device number | |
183 | * Read/write flag | |
184 | * Essentially all the work is computing physical addresses and | |
185 | * validating them. | |
186 | * If the user has the proper access privilidges, the process is | |
187 | * marked 'delayed unlock' and the pages involved in the I/O are | |
188 | * faulted and locked. After the completion of the I/O, the above pages | |
189 | * are unlocked. | |
190 | */ | |
d6d7360b BJ |
191 | physio(strat, bp, dev, rw, mincnt, uio) |
192 | int (*strat)(); | |
193 | register struct buf *bp; | |
194 | dev_t dev; | |
195 | int rw; | |
196 | unsigned (*mincnt)(); | |
197 | struct uio *uio; | |
663dbc72 | 198 | { |
a196746e | 199 | register struct iovec *iov; |
663dbc72 BJ |
200 | register int c; |
201 | char *a; | |
d6d7360b | 202 | int s, error = 0; |
663dbc72 | 203 | |
d6d7360b | 204 | nextiov: |
406ddcbe | 205 | if (uio->uio_iovcnt == 0) |
d6d7360b | 206 | return (0); |
a196746e | 207 | iov = uio->uio_iov; |
406ddcbe | 208 | if (useracc(iov->iov_base,(u_int)iov->iov_len,rw==B_READ?B_WRITE:B_READ) == NULL) |
d6d7360b | 209 | return (EFAULT); |
d95fc990 | 210 | s = splbio(); |
663dbc72 BJ |
211 | while (bp->b_flags&B_BUSY) { |
212 | bp->b_flags |= B_WANTED; | |
213 | sleep((caddr_t)bp, PRIBIO+1); | |
214 | } | |
ef3b3d5a | 215 | splx(s); |
663dbc72 BJ |
216 | bp->b_error = 0; |
217 | bp->b_proc = u.u_procp; | |
d6d7360b BJ |
218 | bp->b_un.b_addr = iov->iov_base; |
219 | while (iov->iov_len > 0) { | |
663dbc72 BJ |
220 | bp->b_flags = B_BUSY | B_PHYS | rw; |
221 | bp->b_dev = dev; | |
919fe934 | 222 | bp->b_blkno = btodb(uio->uio_offset); |
d6d7360b | 223 | bp->b_bcount = iov->iov_len; |
663dbc72 BJ |
224 | (*mincnt)(bp); |
225 | c = bp->b_bcount; | |
226 | u.u_procp->p_flag |= SPHYSIO; | |
227 | vslock(a = bp->b_un.b_addr, c); | |
e438ed8e | 228 | physstrat(bp, strat, PRIBIO); |
d95fc990 | 229 | (void) splbio(); |
663dbc72 BJ |
230 | vsunlock(a, c, rw); |
231 | u.u_procp->p_flag &= ~SPHYSIO; | |
232 | if (bp->b_flags&B_WANTED) | |
233 | wakeup((caddr_t)bp); | |
530d0032 | 234 | splx(s); |
d6d7360b | 235 | c -= bp->b_resid; |
663dbc72 | 236 | bp->b_un.b_addr += c; |
d6d7360b BJ |
237 | iov->iov_len -= c; |
238 | uio->uio_resid -= c; | |
239 | uio->uio_offset += c; | |
961945a8 | 240 | /* temp kludge for tape drives */ |
35a494b8 | 241 | if (bp->b_resid || (bp->b_flags&B_ERROR)) |
52a593fa | 242 | break; |
663dbc72 BJ |
243 | } |
244 | bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS); | |
d6d7360b | 245 | error = geterror(bp); |
961945a8 SL |
246 | /* temp kludge for tape drives */ |
247 | if (bp->b_resid || error) | |
d6d7360b | 248 | return (error); |
d6d7360b BJ |
249 | uio->uio_iov++; |
250 | uio->uio_iovcnt--; | |
251 | goto nextiov; | |
663dbc72 BJ |
252 | } |
253 | ||
35a494b8 SL |
254 | #define MAXPHYS (63 * 1024) |
255 | ||
663dbc72 BJ |
256 | unsigned |
257 | minphys(bp) | |
d6d7360b | 258 | struct buf *bp; |
663dbc72 BJ |
259 | { |
260 | ||
35a494b8 SL |
261 | if (bp->b_bcount > MAXPHYS) |
262 | bp->b_bcount = MAXPHYS; | |
663dbc72 | 263 | } |