need struct st for a while yet
[unix-history] / usr / src / sys / kern / kern_physio.c
CommitLineData
da7c5cc6 1/*
0880b18e 2 * Copyright (c) 1982, 1986 Regents of the University of California.
da7c5cc6
KM
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 *
609e7cfa 6 * @(#)kern_physio.c 7.3 (Berkeley) %G%
da7c5cc6 7 */
961945a8
SL
8
9#include "../machine/pte.h"
663dbc72 10
94368568
JB
11#include "param.h"
12#include "systm.h"
13#include "dir.h"
14#include "user.h"
15#include "buf.h"
16#include "conf.h"
17#include "proc.h"
18#include "seg.h"
19#include "vm.h"
20#include "trace.h"
21#include "map.h"
22#include "uio.h"
ec67a3ce
MK
23#ifdef SECSIZE
24#include "file.h"
25#include "ioctl.h"
26#include "disklabel.h"
27#endif SECSIZE
663dbc72 28
663dbc72
BJ
29/*
30 * Swap IO headers -
31 * They contain the necessary information for the swap I/O.
32 * At any given time, a swap header can be in three
33 * different lists. When free it is in the free list,
34 * when allocated and the I/O queued, it is on the swap
35 * device list, and finally, if the operation was a dirty
36 * page push, when the I/O completes, it is inserted
37 * in a list of cleaned pages to be processed by the pageout daemon.
38 */
4c05b581 39struct buf *swbuf;
663dbc72 40
663dbc72
BJ
41/*
42 * swap I/O -
43 *
44 * If the flag indicates a dirty page push initiated
45 * by the pageout daemon, we map the page into the i th
46 * virtual page of process 2 (the daemon itself) where i is
47 * the index of the swap header that has been allocated.
48 * We simply initialize the header and queue the I/O but
49 * do not wait for completion. When the I/O completes,
ec67a3ce 50 * biodone() will link the header to a list of cleaned
663dbc72
BJ
51 * pages to be processed by the pageout daemon.
52 */
53swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent)
54 struct proc *p;
55 swblk_t dblkno;
56 caddr_t addr;
39d536e6 57 int nbytes, rdflg, flag;
663dbc72 58 dev_t dev;
39d536e6 59 u_int pfcent;
663dbc72
BJ
60{
61 register struct buf *bp;
e438ed8e 62 register u_int c;
663dbc72
BJ
63 int p2dp;
64 register struct pte *dpte, *vpte;
530d0032 65 int s;
d668d9ba 66 extern swdone();
699e2902 67 int error = 0;
663dbc72 68
d95fc990 69 s = splbio();
663dbc72
BJ
70 while (bswlist.av_forw == NULL) {
71 bswlist.b_flags |= B_WANTED;
72 sleep((caddr_t)&bswlist, PSWP+1);
73 }
74 bp = bswlist.av_forw;
75 bswlist.av_forw = bp->av_forw;
530d0032 76 splx(s);
663dbc72
BJ
77
78 bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
ec67a3ce
MK
79#ifdef SECSIZE
80 bp->b_blksize = DEV_BSIZE;
81#endif SECSIZE
663dbc72
BJ
82 if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
83 if (rdflg == B_READ)
84 sum.v_pswpin += btoc(nbytes);
85 else
86 sum.v_pswpout += btoc(nbytes);
87 bp->b_proc = p;
88 if (flag & B_DIRTY) {
89 p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
90 dpte = dptopte(&proc[2], p2dp);
91 vpte = vtopte(p, btop(addr));
92 for (c = 0; c < nbytes; c += NBPG) {
93 if (vpte->pg_pfnum == 0 || vpte->pg_fod)
94 panic("swap bad pte");
95 *dpte++ = *vpte++;
96 }
d668d9ba
SL
97 bp->b_un.b_addr = (caddr_t)ctob(dptov(&proc[2], p2dp));
98 bp->b_flags |= B_CALL;
99 bp->b_iodone = swdone;
100 bp->b_pfcent = pfcent;
663dbc72
BJ
101 } else
102 bp->b_un.b_addr = addr;
103 while (nbytes > 0) {
e438ed8e
BJ
104 bp->b_bcount = nbytes;
105 minphys(bp);
106 c = bp->b_bcount;
663dbc72
BJ
107 bp->b_blkno = dblkno;
108 bp->b_dev = dev;
53f9ca20
BJ
109#ifdef TRACE
110 trace(TR_SWAPIO, dev, bp->b_blkno);
111#endif
ca1f746a 112 physstrat(bp, bdevsw[major(dev)].d_strategy, PSWP);
663dbc72
BJ
113 if (flag & B_DIRTY) {
114 if (c < nbytes)
115 panic("big push");
ec67a3ce 116 return (0);
663dbc72 117 }
663dbc72
BJ
118 bp->b_un.b_addr += c;
119 bp->b_flags &= ~B_DONE;
120 if (bp->b_flags & B_ERROR) {
121 if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
122 panic("hard IO err in swap");
d03b3d84 123 swkill(p, "swap: read error from swap device");
699e2902 124 error = EIO;
663dbc72
BJ
125 }
126 nbytes -= c;
ec67a3ce
MK
127#ifdef SECSIZE
128 if (flag & B_PGIN && nbytes > 0)
129 panic("big pgin");
130#endif SECSIZE
919fe934 131 dblkno += btodb(c);
663dbc72 132 }
d95fc990 133 s = splbio();
663dbc72
BJ
134 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
135 bp->av_forw = bswlist.av_forw;
136 bswlist.av_forw = bp;
137 if (bswlist.b_flags & B_WANTED) {
138 bswlist.b_flags &= ~B_WANTED;
139 wakeup((caddr_t)&bswlist);
140 wakeup((caddr_t)&proc[2]);
141 }
530d0032 142 splx(s);
699e2902 143 return (error);
663dbc72
BJ
144}
145
d668d9ba
SL
146/*
147 * Put a buffer on the clean list after I/O is done.
148 * Called from biodone.
149 */
150swdone(bp)
151 register struct buf *bp;
152{
153 register int s;
154
155 if (bp->b_flags & B_ERROR)
156 panic("IO err in push");
d95fc990 157 s = splbio();
d668d9ba
SL
158 bp->av_forw = bclnlist;
159 cnt.v_pgout++;
160 cnt.v_pgpgout += bp->b_bcount / NBPG;
161 bclnlist = bp;
162 if (bswlist.b_flags & B_WANTED)
163 wakeup((caddr_t)&proc[2]);
164 splx(s);
165}
166
663dbc72
BJ
167/*
168 * If rout == 0 then killed on swap error, else
169 * rout is the name of the routine where we ran out of
170 * swap space.
171 */
172swkill(p, rout)
173 struct proc *p;
174 char *rout;
175{
176
7cd10076
JB
177 printf("pid %d: %s\n", p->p_pid, rout);
178 uprintf("sorry, pid %d was killed in %s\n", p->p_pid, rout);
663dbc72
BJ
179 /*
180 * To be sure no looping (e.g. in vmsched trying to
181 * swap out) mark process locked in core (as though
182 * done by user) after killing it so noone will try
183 * to swap it out.
184 */
a30d2e97 185 psignal(p, SIGKILL);
663dbc72
BJ
186 p->p_flag |= SULOCK;
187}
188
663dbc72
BJ
189/*
190 * Raw I/O. The arguments are
191 * The strategy routine for the device
192 * A buffer, which will always be a special buffer
193 * header owned exclusively by the device for this purpose
194 * The device number
195 * Read/write flag
196 * Essentially all the work is computing physical addresses and
197 * validating them.
198 * If the user has the proper access privilidges, the process is
199 * marked 'delayed unlock' and the pages involved in the I/O are
200 * faulted and locked. After the completion of the I/O, the above pages
201 * are unlocked.
202 */
d6d7360b
BJ
203physio(strat, bp, dev, rw, mincnt, uio)
204 int (*strat)();
205 register struct buf *bp;
206 dev_t dev;
207 int rw;
208 unsigned (*mincnt)();
209 struct uio *uio;
663dbc72 210{
a196746e 211 register struct iovec *iov;
663dbc72
BJ
212 register int c;
213 char *a;
d6d7360b 214 int s, error = 0;
ec67a3ce
MK
215#ifdef SECSIZE
216 int bsize;
217 struct partinfo dpart;
218#endif SECSIZE
663dbc72 219
ec67a3ce
MK
220#ifdef SECSIZE
221 if ((unsigned)major(dev) < nchrdev &&
222 (*cdevsw[major(dev)].d_ioctl)(dev, DIOCGPART, (caddr_t)&dpart,
223 FREAD) == 0)
224 bsize = dpart.disklab->d_secsize;
225 else
226 bsize = DEV_BSIZE;
227#endif SECSIZE
228 for (;;) {
229 if (uio->uio_iovcnt == 0)
230 return (0);
231 iov = uio->uio_iov;
232 if (useracc(iov->iov_base, (u_int)iov->iov_len,
233 rw==B_READ? B_WRITE : B_READ) == NULL)
234 return (EFAULT);
235 s = splbio();
236 while (bp->b_flags&B_BUSY) {
237 bp->b_flags |= B_WANTED;
238 sleep((caddr_t)bp, PRIBIO+1);
239 }
530d0032 240 splx(s);
ec67a3ce
MK
241 bp->b_error = 0;
242 bp->b_proc = u.u_procp;
243#ifdef SECSIZE
244 bp->b_blksize = bsize;
245#endif SECSIZE
246 bp->b_un.b_addr = iov->iov_base;
247 while (iov->iov_len > 0) {
248 bp->b_flags = B_BUSY | B_PHYS | rw;
249 bp->b_dev = dev;
250#ifdef SECSIZE
251 bp->b_blkno = uio->uio_offset / bsize;
252#else SECSIZE
253 bp->b_blkno = btodb(uio->uio_offset);
254#endif SECSIZE
255 bp->b_bcount = iov->iov_len;
256 (*mincnt)(bp);
257 c = bp->b_bcount;
258 u.u_procp->p_flag |= SPHYSIO;
259 vslock(a = bp->b_un.b_addr, c);
260 physstrat(bp, strat, PRIBIO);
261 (void) splbio();
262 vsunlock(a, c, rw);
263 u.u_procp->p_flag &= ~SPHYSIO;
264 if (bp->b_flags&B_WANTED)
265 wakeup((caddr_t)bp);
266 splx(s);
267 c -= bp->b_resid;
268 bp->b_un.b_addr += c;
269 iov->iov_len -= c;
270 uio->uio_resid -= c;
271 uio->uio_offset += c;
272 /* temp kludge for tape drives */
273 if (bp->b_resid || (bp->b_flags&B_ERROR))
274 break;
275 }
276 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
277 error = geterror(bp);
961945a8 278 /* temp kludge for tape drives */
ec67a3ce
MK
279 if (bp->b_resid || error)
280 return (error);
281 uio->uio_iov++;
282 uio->uio_iovcnt--;
663dbc72 283 }
663dbc72
BJ
284}
285
663dbc72
BJ
286unsigned
287minphys(bp)
d6d7360b 288 struct buf *bp;
663dbc72
BJ
289{
290
35a494b8
SL
291 if (bp->b_bcount > MAXPHYS)
292 bp->b_bcount = MAXPHYS;
663dbc72 293}