Commit | Line | Data |
---|---|---|
da7c5cc6 | 1 | /* |
0880b18e | 2 | * Copyright (c) 1982, 1986 Regents of the University of California. |
da7c5cc6 KM |
3 | * All rights reserved. The Berkeley software License Agreement |
4 | * specifies the terms and conditions for redistribution. | |
5 | * | |
d293217c | 6 | * @(#)kern_physio.c 7.15 (Berkeley) %G% |
da7c5cc6 | 7 | */ |
961945a8 | 8 | |
94368568 JB |
9 | #include "param.h" |
10 | #include "systm.h" | |
94368568 JB |
11 | #include "user.h" |
12 | #include "buf.h" | |
13 | #include "conf.h" | |
14 | #include "proc.h" | |
15 | #include "seg.h" | |
16 | #include "vm.h" | |
17 | #include "trace.h" | |
18 | #include "map.h" | |
c4ec2128 | 19 | #include "vnode.h" |
0f93ba7b | 20 | #include "specdev.h" |
d301d150 KM |
21 | |
22 | #include "machine/pte.h" | |
ec67a3ce MK |
23 | #ifdef SECSIZE |
24 | #include "file.h" | |
25 | #include "ioctl.h" | |
26 | #include "disklabel.h" | |
27 | #endif SECSIZE | |
663dbc72 | 28 | |
663dbc72 BJ |
29 | /* |
30 | * Swap IO headers - | |
31 | * They contain the necessary information for the swap I/O. | |
32 | * At any given time, a swap header can be in three | |
33 | * different lists. When free it is in the free list, | |
34 | * when allocated and the I/O queued, it is on the swap | |
35 | * device list, and finally, if the operation was a dirty | |
36 | * page push, when the I/O completes, it is inserted | |
37 | * in a list of cleaned pages to be processed by the pageout daemon. | |
38 | */ | |
4c05b581 | 39 | struct buf *swbuf; |
663dbc72 | 40 | |
663dbc72 BJ |
41 | /* |
42 | * swap I/O - | |
43 | * | |
44 | * If the flag indicates a dirty page push initiated | |
45 | * by the pageout daemon, we map the page into the i th | |
46 | * virtual page of process 2 (the daemon itself) where i is | |
47 | * the index of the swap header that has been allocated. | |
48 | * We simply initialize the header and queue the I/O but | |
49 | * do not wait for completion. When the I/O completes, | |
ec67a3ce | 50 | * biodone() will link the header to a list of cleaned |
663dbc72 BJ |
51 | * pages to be processed by the pageout daemon. |
52 | */ | |
c4ec2128 | 53 | swap(p, dblkno, addr, nbytes, rdflg, flag, vp, pfcent) |
663dbc72 BJ |
54 | struct proc *p; |
55 | swblk_t dblkno; | |
56 | caddr_t addr; | |
39d536e6 | 57 | int nbytes, rdflg, flag; |
c4ec2128 | 58 | struct vnode *vp; |
39d536e6 | 59 | u_int pfcent; |
663dbc72 BJ |
60 | { |
61 | register struct buf *bp; | |
663dbc72 | 62 | register struct pte *dpte, *vpte; |
c5648f55 KB |
63 | register u_int c; |
64 | int p2dp, s, error = 0; | |
65 | struct buf *getswbuf(); | |
66 | int swdone(); | |
663dbc72 | 67 | |
c5648f55 | 68 | bp = getswbuf(PSWP+1); |
663dbc72 | 69 | bp->b_flags = B_BUSY | B_PHYS | rdflg | flag; |
ec67a3ce MK |
70 | #ifdef SECSIZE |
71 | bp->b_blksize = DEV_BSIZE; | |
72 | #endif SECSIZE | |
663dbc72 BJ |
73 | if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0) |
74 | if (rdflg == B_READ) | |
75 | sum.v_pswpin += btoc(nbytes); | |
76 | else | |
77 | sum.v_pswpout += btoc(nbytes); | |
78 | bp->b_proc = p; | |
79 | if (flag & B_DIRTY) { | |
80 | p2dp = ((bp - swbuf) * CLSIZE) * KLMAX; | |
81 | dpte = dptopte(&proc[2], p2dp); | |
82 | vpte = vtopte(p, btop(addr)); | |
83 | for (c = 0; c < nbytes; c += NBPG) { | |
84 | if (vpte->pg_pfnum == 0 || vpte->pg_fod) | |
85 | panic("swap bad pte"); | |
86 | *dpte++ = *vpte++; | |
87 | } | |
d668d9ba SL |
88 | bp->b_un.b_addr = (caddr_t)ctob(dptov(&proc[2], p2dp)); |
89 | bp->b_flags |= B_CALL; | |
90 | bp->b_iodone = swdone; | |
91 | bp->b_pfcent = pfcent; | |
663dbc72 BJ |
92 | } else |
93 | bp->b_un.b_addr = addr; | |
94 | while (nbytes > 0) { | |
c4ec2128 | 95 | bp->b_blkno = dblkno; |
343a57bd KM |
96 | if (bp->b_vp) |
97 | brelvp(bp); | |
5dccc1f9 | 98 | VHOLD(vp); |
343a57bd KM |
99 | bp->b_vp = vp; |
100 | bp->b_dev = vp->v_rdev; | |
e438ed8e | 101 | bp->b_bcount = nbytes; |
26bd0870 KM |
102 | if ((bp->b_flags & B_READ) == 0) |
103 | vp->v_numoutput++; | |
e438ed8e BJ |
104 | minphys(bp); |
105 | c = bp->b_bcount; | |
53f9ca20 | 106 | #ifdef TRACE |
c4ec2128 | 107 | trace(TR_SWAPIO, vp, bp->b_blkno); |
cd682858 | 108 | #endif |
d293217c | 109 | #if defined(hp300) || defined(i386) |
cd682858 | 110 | vmapbuf(bp); |
53f9ca20 | 111 | #endif |
c4ec2128 | 112 | VOP_STRATEGY(bp); |
c5648f55 | 113 | /* pageout daemon doesn't wait for pushed pages */ |
663dbc72 BJ |
114 | if (flag & B_DIRTY) { |
115 | if (c < nbytes) | |
116 | panic("big push"); | |
ec67a3ce | 117 | return (0); |
663dbc72 | 118 | } |
d293217c | 119 | #if defined(hp300) || defined(i386) |
cd682858 KM |
120 | vunmapbuf(bp); |
121 | #endif | |
663dbc72 BJ |
122 | bp->b_un.b_addr += c; |
123 | bp->b_flags &= ~B_DONE; | |
124 | if (bp->b_flags & B_ERROR) { | |
125 | if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE) | |
126 | panic("hard IO err in swap"); | |
d03b3d84 | 127 | swkill(p, "swap: read error from swap device"); |
699e2902 | 128 | error = EIO; |
663dbc72 BJ |
129 | } |
130 | nbytes -= c; | |
ec67a3ce MK |
131 | #ifdef SECSIZE |
132 | if (flag & B_PGIN && nbytes > 0) | |
133 | panic("big pgin"); | |
134 | #endif SECSIZE | |
919fe934 | 135 | dblkno += btodb(c); |
663dbc72 | 136 | } |
663dbc72 | 137 | bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); |
c5648f55 | 138 | freeswbuf(bp); |
699e2902 | 139 | return (error); |
663dbc72 BJ |
140 | } |
141 | ||
d668d9ba SL |
142 | /* |
143 | * Put a buffer on the clean list after I/O is done. | |
144 | * Called from biodone. | |
145 | */ | |
146 | swdone(bp) | |
147 | register struct buf *bp; | |
148 | { | |
149 | register int s; | |
150 | ||
151 | if (bp->b_flags & B_ERROR) | |
152 | panic("IO err in push"); | |
d95fc990 | 153 | s = splbio(); |
d668d9ba SL |
154 | bp->av_forw = bclnlist; |
155 | cnt.v_pgout++; | |
156 | cnt.v_pgpgout += bp->b_bcount / NBPG; | |
157 | bclnlist = bp; | |
158 | if (bswlist.b_flags & B_WANTED) | |
159 | wakeup((caddr_t)&proc[2]); | |
d293217c | 160 | #if defined(hp300) || defined(i386) |
cd682858 KM |
161 | vunmapbuf(bp); |
162 | #endif | |
d668d9ba SL |
163 | splx(s); |
164 | } | |
165 | ||
663dbc72 BJ |
166 | /* |
167 | * If rout == 0 then killed on swap error, else | |
168 | * rout is the name of the routine where we ran out of | |
169 | * swap space. | |
170 | */ | |
171 | swkill(p, rout) | |
172 | struct proc *p; | |
173 | char *rout; | |
174 | { | |
175 | ||
7cd10076 JB |
176 | printf("pid %d: %s\n", p->p_pid, rout); |
177 | uprintf("sorry, pid %d was killed in %s\n", p->p_pid, rout); | |
663dbc72 BJ |
178 | /* |
179 | * To be sure no looping (e.g. in vmsched trying to | |
180 | * swap out) mark process locked in core (as though | |
181 | * done by user) after killing it so noone will try | |
182 | * to swap it out. | |
183 | */ | |
a30d2e97 | 184 | psignal(p, SIGKILL); |
663dbc72 BJ |
185 | p->p_flag |= SULOCK; |
186 | } | |
187 | ||
663dbc72 BJ |
188 | /* |
189 | * Raw I/O. The arguments are | |
190 | * The strategy routine for the device | |
c5648f55 KB |
191 | * A buffer, which will either be a special buffer header owned |
192 | * exclusively by the device for this purpose, or NULL, | |
193 | * indicating that we should use a swap buffer | |
663dbc72 BJ |
194 | * The device number |
195 | * Read/write flag | |
196 | * Essentially all the work is computing physical addresses and | |
197 | * validating them. | |
198 | * If the user has the proper access privilidges, the process is | |
199 | * marked 'delayed unlock' and the pages involved in the I/O are | |
200 | * faulted and locked. After the completion of the I/O, the above pages | |
201 | * are unlocked. | |
202 | */ | |
d6d7360b BJ |
203 | physio(strat, bp, dev, rw, mincnt, uio) |
204 | int (*strat)(); | |
205 | register struct buf *bp; | |
206 | dev_t dev; | |
207 | int rw; | |
c5648f55 | 208 | u_int (*mincnt)(); |
d6d7360b | 209 | struct uio *uio; |
663dbc72 | 210 | { |
a196746e | 211 | register struct iovec *iov; |
58c3cad7 | 212 | register int requested, done; |
663dbc72 | 213 | char *a; |
c5648f55 KB |
214 | int s, allocbuf = 0, error = 0; |
215 | struct buf *getswbuf(); | |
ec67a3ce MK |
216 | #ifdef SECSIZE |
217 | int bsize; | |
218 | struct partinfo dpart; | |
219 | #endif SECSIZE | |
663dbc72 | 220 | |
ec67a3ce MK |
221 | #ifdef SECSIZE |
222 | if ((unsigned)major(dev) < nchrdev && | |
223 | (*cdevsw[major(dev)].d_ioctl)(dev, DIOCGPART, (caddr_t)&dpart, | |
224 | FREAD) == 0) | |
225 | bsize = dpart.disklab->d_secsize; | |
226 | else | |
227 | bsize = DEV_BSIZE; | |
228 | #endif SECSIZE | |
229 | for (;;) { | |
230 | if (uio->uio_iovcnt == 0) | |
231 | return (0); | |
232 | iov = uio->uio_iov; | |
233 | if (useracc(iov->iov_base, (u_int)iov->iov_len, | |
234 | rw==B_READ? B_WRITE : B_READ) == NULL) | |
235 | return (EFAULT); | |
236 | s = splbio(); | |
237 | while (bp->b_flags&B_BUSY) { | |
238 | bp->b_flags |= B_WANTED; | |
239 | sleep((caddr_t)bp, PRIBIO+1); | |
240 | } | |
c5648f55 KB |
241 | if (!allocbuf) { /* only if sharing caller's buffer */ |
242 | s = splbio(); | |
243 | while (bp->b_flags&B_BUSY) { | |
244 | bp->b_flags |= B_WANTED; | |
245 | sleep((caddr_t)bp, PRIBIO+1); | |
246 | } | |
247 | splx(s); | |
248 | } | |
ec67a3ce MK |
249 | bp->b_error = 0; |
250 | bp->b_proc = u.u_procp; | |
251 | #ifdef SECSIZE | |
252 | bp->b_blksize = bsize; | |
253 | #endif SECSIZE | |
254 | bp->b_un.b_addr = iov->iov_base; | |
255 | while (iov->iov_len > 0) { | |
256 | bp->b_flags = B_BUSY | B_PHYS | rw; | |
257 | bp->b_dev = dev; | |
258 | #ifdef SECSIZE | |
259 | bp->b_blkno = uio->uio_offset / bsize; | |
260 | #else SECSIZE | |
261 | bp->b_blkno = btodb(uio->uio_offset); | |
262 | #endif SECSIZE | |
263 | bp->b_bcount = iov->iov_len; | |
264 | (*mincnt)(bp); | |
265 | c = bp->b_bcount; | |
266 | u.u_procp->p_flag |= SPHYSIO; | |
267 | vslock(a = bp->b_un.b_addr, c); | |
268 | physstrat(bp, strat, PRIBIO); | |
269 | (void) splbio(); | |
270 | vsunlock(a, c, rw); | |
271 | u.u_procp->p_flag &= ~SPHYSIO; | |
272 | if (bp->b_flags&B_WANTED) | |
273 | wakeup((caddr_t)bp); | |
274 | splx(s); | |
275 | c -= bp->b_resid; | |
276 | bp->b_un.b_addr += c; | |
277 | iov->iov_len -= c; | |
278 | uio->uio_resid -= c; | |
279 | uio->uio_offset += c; | |
280 | /* temp kludge for tape drives */ | |
281 | if (bp->b_resid || (bp->b_flags&B_ERROR)) | |
282 | break; | |
283 | } | |
284 | bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS); | |
285 | error = geterror(bp); | |
ec67a3ce MK |
286 | if (bp->b_resid || error) |
287 | return (error); | |
288 | uio->uio_iov++; | |
289 | uio->uio_iovcnt--; | |
663dbc72 | 290 | } |
cd682858 KM |
291 | #if defined(hp300) |
292 | DCIU(); | |
293 | #endif | |
c5648f55 KB |
294 | if (allocbuf) |
295 | freeswbuf(bp); | |
296 | return (error); | |
663dbc72 BJ |
297 | } |
298 | ||
c5648f55 | 299 | u_int |
663dbc72 | 300 | minphys(bp) |
d6d7360b | 301 | struct buf *bp; |
663dbc72 | 302 | { |
35a494b8 SL |
303 | if (bp->b_bcount > MAXPHYS) |
304 | bp->b_bcount = MAXPHYS; | |
663dbc72 | 305 | } |
c5648f55 KB |
306 | |
307 | static | |
308 | struct buf * | |
309 | getswbuf(prio) | |
310 | int prio; | |
311 | { | |
312 | int s; | |
313 | struct buf *bp; | |
314 | ||
315 | s = splbio(); | |
316 | while (bswlist.av_forw == NULL) { | |
317 | bswlist.b_flags |= B_WANTED; | |
318 | sleep((caddr_t)&bswlist, prio); | |
319 | } | |
320 | bp = bswlist.av_forw; | |
321 | bswlist.av_forw = bp->av_forw; | |
322 | splx(s); | |
323 | return (bp); | |
324 | } | |
325 | ||
326 | static | |
327 | freeswbuf(bp) | |
328 | struct buf *bp; | |
329 | { | |
330 | int s; | |
331 | ||
332 | s = splbio(); | |
333 | bp->av_forw = bswlist.av_forw; | |
334 | bswlist.av_forw = bp; | |
343a57bd KM |
335 | if (bp->b_vp) |
336 | brelvp(bp); | |
c5648f55 KB |
337 | if (bswlist.b_flags & B_WANTED) { |
338 | bswlist.b_flags &= ~B_WANTED; | |
339 | wakeup((caddr_t)&bswlist); | |
340 | wakeup((caddr_t)&proc[2]); | |
341 | } | |
342 | splx(s); | |
343 | } | |
344 | ||
345 | rawread(dev, uio) | |
346 | dev_t dev; | |
347 | struct uio *uio; | |
348 | { | |
349 | return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL, | |
350 | dev, B_READ, minphys, uio)); | |
351 | } | |
352 | ||
353 | rawwrite(dev, uio) | |
354 | dev_t dev; | |
355 | struct uio *uio; | |
356 | { | |
357 | return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL, | |
358 | dev, B_WRITE, minphys, uio)); | |
359 | } |