Commit | Line | Data |
---|---|---|
45338fc4 CL |
1 | .\" Copyright (c) 1983, 1991 Regents of the University of California. |
2 | .\" All rights reserved. | |
8a35f543 | 3 | .\" |
45338fc4 | 4 | .\" %sccs.include.redist.roff% |
a5ca12a6 | 5 | .\" |
45338fc4 CL |
6 | .\" @(#)fs.5 6.4 (Berkeley) %G% |
7 | .\" | |
8 | .Dd | |
9 | .Dt FS 5 | |
10 | .Os BSD 4.2 | |
11 | .Sh NAME | |
12 | .Nm fs , | |
13 | .Nm inode | |
14 | .Nd format of file system volume | |
15 | .Sh SYNOPSIS | |
16 | .Fd #include <sys/types.h> | |
17 | .Fd #include <ufs/fs.h> | |
18 | .Fd #include <ufs/inode.h> | |
19 | .Sh DESCRIPTION | |
20 | The files | |
21 | .Aq Pa fs.h | |
22 | and | |
23 | .Aq Pa inode.h | |
24 | declare several structures, defined variables and macros | |
25 | which are used to create and manage the underlying format of | |
26 | file system objects on random access devices (disks). | |
27 | .Pp | |
28 | The block size and number of blocks which | |
29 | comprise a file system are parameters of the file system. | |
30 | Sectors beginning at | |
31 | .Dv BBLOCK | |
32 | and continuing for | |
33 | .Dv BBSIZE | |
34 | are used | |
35 | for a disklabel and for some hardware primary | |
36 | and secondary bootstrapping programs. | |
37 | .Pp | |
38 | The actual file system begins at sector | |
39 | .Dv SBLOCK | |
40 | with the | |
41 | .Em super-block | |
42 | that is of size | |
43 | .Dv SBSIZE . | |
44 | The following structure described the super-block and is | |
45 | from the file | |
46 | .Aq Pa ufs/fs.h : | |
47 | .Bd -literal | |
e0a407ff KM |
48 | #define FS_MAGIC 0x011954 |
49 | struct fs { | |
50 | struct fs *fs_link; /* linked list of file systems */ | |
51 | struct fs *fs_rlink; /* used for incore super blocks */ | |
52 | daddr_t fs_sblkno; /* addr of super-block in filesys */ | |
53 | daddr_t fs_cblkno; /* offset of cyl-block in filesys */ | |
54 | daddr_t fs_iblkno; /* offset of inode-blocks in filesys */ | |
55 | daddr_t fs_dblkno; /* offset of first data after cg */ | |
56 | long fs_cgoffset; /* cylinder group offset in cylinder */ | |
57 | long fs_cgmask; /* used to calc mod fs_ntrak */ | |
58 | time_t fs_time; /* last time written */ | |
59 | long fs_size; /* number of blocks in fs */ | |
60 | long fs_dsize; /* number of data blocks in fs */ | |
61 | long fs_ncg; /* number of cylinder groups */ | |
62 | long fs_bsize; /* size of basic blocks in fs */ | |
63 | long fs_fsize; /* size of frag blocks in fs */ | |
64 | long fs_frag; /* number of frags in a block in fs */ | |
a5ca12a6 | 65 | /* these are configuration parameters */ |
e0a407ff KM |
66 | long fs_minfree; /* minimum percentage of free blocks */ |
67 | long fs_rotdelay; /* num of ms for optimal next block */ | |
68 | long fs_rps; /* disk revolutions per second */ | |
a5ca12a6 | 69 | /* these fields can be computed from the others */ |
e0a407ff KM |
70 | long fs_bmask; /* ``blkoff'' calc of blk offsets */ |
71 | long fs_fmask; /* ``fragoff'' calc of frag offsets */ | |
72 | long fs_bshift; /* ``lblkno'' calc of logical blkno */ | |
73 | long fs_fshift; /* ``numfrags'' calc number of frags */ | |
a5ca12a6 | 74 | /* these are configuration parameters */ |
e0a407ff KM |
75 | long fs_maxcontig; /* max number of contiguous blks */ |
76 | long fs_maxbpg; /* max number of blks per cyl group */ | |
a5ca12a6 | 77 | /* these fields can be computed from the others */ |
e0a407ff KM |
78 | long fs_fragshift; /* block to frag shift */ |
79 | long fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ | |
80 | long fs_sbsize; /* actual size of super block */ | |
81 | long fs_csmask; /* csum block offset */ | |
82 | long fs_csshift; /* csum block number */ | |
83 | long fs_nindir; /* value of NINDIR */ | |
84 | long fs_inopb; /* value of INOPB */ | |
85 | long fs_nspf; /* value of NSPF */ | |
86 | /* yet another configuration parameter */ | |
87 | long fs_optim; /* optimization preference, see below */ | |
88 | /* these fields are derived from the hardware */ | |
89 | long fs_npsect; /* # sectors/track including spares */ | |
90 | long fs_interleave; /* hardware sector interleave */ | |
91 | long fs_trackskew; /* sector 0 skew, per track */ | |
92 | long fs_headswitch; /* head switch time, usec */ | |
93 | long fs_trkseek; /* track-to-track seek, usec */ | |
a5ca12a6 | 94 | /* sizes determined by number of cylinder groups and their sizes */ |
e0a407ff KM |
95 | daddr_t fs_csaddr; /* blk addr of cyl grp summary area */ |
96 | long fs_cssize; /* size of cyl grp summary area */ | |
97 | long fs_cgsize; /* cylinder group size */ | |
98 | /* these fields are derived from the hardware */ | |
99 | long fs_ntrak; /* tracks per cylinder */ | |
100 | long fs_nsect; /* sectors per track */ | |
101 | long fs_spc; /* sectors per cylinder */ | |
a5ca12a6 | 102 | /* this comes from the disk driver partitioning */ |
e0a407ff | 103 | long fs_ncyl; /* cylinders in file system */ |
a5ca12a6 | 104 | /* these fields can be computed from the others */ |
e0a407ff KM |
105 | long fs_cpg; /* cylinders per group */ |
106 | long fs_ipg; /* inodes per group */ | |
107 | long fs_fpg; /* blocks per group * fs_frag */ | |
a5ca12a6 KM |
108 | /* this data must be re-computed after crashes */ |
109 | struct csum fs_cstotal; /* cylinder summary information */ | |
110 | /* these fields are cleared at mount time */ | |
e0a407ff KM |
111 | char fs_fmod; /* super block modified flag */ |
112 | char fs_clean; /* file system is clean flag */ | |
113 | char fs_ronly; /* mounted read-only flag */ | |
114 | char fs_flags; /* currently unused flag */ | |
a5ca12a6 KM |
115 | char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ |
116 | /* these fields retain the current block allocation info */ | |
e0a407ff KM |
117 | long fs_cgrotor; /* last cg searched */ |
118 | struct csum *fs_csp[MAXCSBUFS]; /* list of fs_cs info buffers */ | |
119 | long fs_cpc; /* cyl per cycle in postbl */ | |
120 | short fs_opostbl[16][8]; /* old rotation block list head */ | |
121 | long fs_sparecon[56]; /* reserved for future constants */ | |
122 | quad fs_qbmask; /* ~fs_bmask - for use with quad size */ | |
123 | quad fs_qfmask; /* ~fs_fmask - for use with quad size */ | |
45338fc4 | 124 | long fs_postblformat; /* format of positional layout tables */ |
e0a407ff KM |
125 | long fs_nrpos; /* number of rotaional positions */ |
126 | long fs_postbloff; /* (short) rotation block list head */ | |
127 | long fs_rotbloff; /* (u_char) blocks for each rotation */ | |
128 | long fs_magic; /* magic number */ | |
129 | u_char fs_space[1]; /* list of blocks for each rotation */ | |
a5ca12a6 KM |
130 | /* actually longer */ |
131 | }; | |
45338fc4 CL |
132 | .Ed |
133 | .Pp | |
a5ca12a6 KM |
134 | Each disk drive contains some number of file systems. |
135 | A file system consists of a number of cylinder groups. | |
136 | Each cylinder group has inodes and data. | |
45338fc4 | 137 | .Pp |
a5ca12a6 KM |
138 | A file system is described by its super-block, which in turn |
139 | describes the cylinder groups. The super-block is critical | |
140 | data and is replicated in each cylinder group to protect against | |
141 | catastrophic loss. This is done at file system creation | |
142 | time and the critical | |
143 | super-block data does not change, so the copies need not be | |
144 | referenced further unless disaster strikes. | |
45338fc4 | 145 | .Pp |
a5ca12a6 | 146 | Addresses stored in inodes are capable of addressing fragments |
45338fc4 CL |
147 | of `blocks'. File system blocks of at most size |
148 | .Dv MAXBSIZE | |
149 | can | |
a5ca12a6 | 150 | be optionally broken into 2, 4, or 8 pieces, each of which is |
45338fc4 CL |
151 | addressable; these pieces may be |
152 | .Dv DEV_BSIZE , | |
153 | or some multiple of | |
154 | a | |
155 | .Dv DEV_BSIZE | |
156 | unit. | |
157 | .Pp | |
a5ca12a6 KM |
158 | Large files consist of exclusively large data blocks. To avoid |
159 | undue wasted disk space, the last data block of a small file is | |
160 | allocated as only as many fragments of a large block as are | |
161 | necessary. The file system format retains only a single pointer | |
162 | to such a fragment, which is a piece of a single large block that | |
163 | has been divided. The size of such a fragment is determinable from | |
45338fc4 CL |
164 | information in the inode, using the |
165 | .Fn blksize fs ip lbn | |
166 | macro. | |
167 | .Pp | |
a5ca12a6 KM |
168 | The file system records space availability at the fragment level; |
169 | to determine block availability, aligned fragments are examined. | |
45338fc4 | 170 | .Pp |
a5ca12a6 KM |
171 | The root inode is the root of the file system. |
172 | Inode 0 can't be used for normal purposes and | |
173 | historically bad blocks were linked to inode 1, | |
174 | thus the root inode is 2 (inode 1 is no longer used for | |
175 | this purpose, however numerous dump tapes make this | |
176 | assumption, so we are stuck with it). | |
45338fc4 CL |
177 | .Pp |
178 | The | |
179 | .Fa fs_minfree | |
180 | element gives the minimum acceptable percentage of file system | |
9de6431b | 181 | blocks that may be free. If the freelist drops below this level |
e0a407ff | 182 | only the super-user may continue to allocate blocks. |
45338fc4 CL |
183 | The |
184 | .Fa fs_minfree | |
185 | element | |
e0a407ff | 186 | may be set to 0 if no reserve of free blocks is deemed necessary, |
a5ca12a6 KM |
187 | however severe performance degradations will be observed if the |
188 | file system is run at greater than 90% full; thus the default | |
189 | value of | |
45338fc4 | 190 | .Fa fs_minfree |
a5ca12a6 | 191 | is 10%. |
45338fc4 | 192 | .Pp |
a5ca12a6 KM |
193 | Empirically the best trade-off between block fragmentation and |
194 | overall disk utilization at a loading of 90% comes with a | |
e0a407ff | 195 | fragmentation of 8, thus the default fragment size is an eighth |
a5ca12a6 | 196 | of the block size. |
45338fc4 CL |
197 | .Pp |
198 | The element | |
199 | .Fa fs_optim | |
9de6431b KM |
200 | specifies whether the file system should try to minimize the time spent |
201 | allocating blocks, or if it should attempt to minimize the space | |
202 | fragmentation on the disk. | |
203 | If the value of fs_minfree (see above) is less than 10%, | |
204 | then the file system defaults to optimizing for space to avoid | |
205 | running out of full sized blocks. | |
206 | If the value of minfree is greater than or equal to 10%, | |
207 | fragmentation is unlikely to be problematical, and | |
208 | the file system defaults to optimizing for time. | |
45338fc4 CL |
209 | .Pp |
210 | .Em Cylinder group related limits : | |
a5ca12a6 KM |
211 | Each cylinder keeps track of the availability of blocks at different |
212 | rotational positions, so that sequential blocks can be laid out | |
e0a407ff KM |
213 | with minimum rotational latency. With the default of 8 distinguished |
214 | rotational positions, the resolution of the | |
a5ca12a6 | 215 | summary information is 2ms for a typical 3600 rpm drive. |
45338fc4 CL |
216 | .Pp |
217 | The element | |
218 | .Fa fs_rotdelay | |
a5ca12a6 | 219 | gives the minimum number of milliseconds to initiate |
45338fc4 CL |
220 | another disk transfer on the same cylinder. |
221 | It is used in determining the rotationally optimal | |
222 | layout for disk blocks within a file; | |
223 | the default value for | |
224 | .Fa fs_rotdelay | |
a5ca12a6 | 225 | is 2ms. |
45338fc4 | 226 | .Pp |
a5ca12a6 | 227 | Each file system has a statically allocated number of inodes. |
45338fc4 CL |
228 | An inode is allocated for each |
229 | .Dv NBPI | |
230 | bytes of disk space. | |
a5ca12a6 | 231 | The inode allocation strategy is extremely conservative. |
45338fc4 CL |
232 | .Pp |
233 | .Dv MINBSIZE | |
234 | is the smallest allowable block size. | |
235 | With a | |
236 | .Dv MINBSIZE | |
237 | of 4096 | |
a5ca12a6 KM |
238 | it is possible to create files of size |
239 | 2^32 with only two levels of indirection. | |
45338fc4 CL |
240 | .Dv MINBSIZE |
241 | must be big enough to hold a cylinder group block, | |
242 | thus changes to | |
243 | .Pq Fa struct cg | |
244 | must keep its size within | |
245 | .Dv MINBSIZE . | |
246 | Note that super-blocks are never more than size | |
247 | .Dv SBSIZE . | |
248 | .Pp | |
9de6431b | 249 | The path name on which the file system is mounted is maintained in |
45338fc4 CL |
250 | .Fa fs_fsmnt . |
251 | .Dv MAXMNTLEN | |
252 | defines the amount of space allocated in | |
253 | the super-block for this name. | |
a5ca12a6 | 254 | The limit on the amount of summary information per file system |
45338fc4 CL |
255 | is defined by |
256 | .Dv MAXCSBUFS. | |
e0a407ff | 257 | For a 4096 byte block size, it is currently parameterized for a |
a5ca12a6 | 258 | maximum of two million cylinders. |
45338fc4 | 259 | .Pp |
a5ca12a6 KM |
260 | Per cylinder group information is summarized in blocks allocated |
261 | from the first cylinder group's data blocks. | |
262 | These blocks are read in from | |
45338fc4 | 263 | .Fa fs_csaddr |
a5ca12a6 | 264 | (size |
45338fc4 CL |
265 | .Fa fs_cssize ) |
266 | in addition to the super-block. | |
267 | .Pp | |
268 | .Sy N.B.: | |
269 | .Xr sizeof Pq Fa struct csum | |
270 | must be a power of two in order for | |
271 | the | |
272 | .Fn fs_cs | |
273 | macro to work. | |
274 | .Pp | |
275 | The | |
276 | .Em "Super-block for a file system" : | |
e0a407ff | 277 | The size of the rotational layout tables |
45338fc4 CL |
278 | is limited by the fact that the super-block is of size |
279 | .Dv SBSIZE . | |
a5ca12a6 | 280 | The size of these tables is |
45338fc4 | 281 | .Em inversely |
a5ca12a6 KM |
282 | proportional to the block |
283 | size of the file system. The size of the tables is | |
284 | increased when sector sizes are not powers of two, | |
285 | as this increases the number of cylinders | |
45338fc4 CL |
286 | included before the rotational pattern repeats |
287 | .Pq Fa fs_cpc . | |
a5ca12a6 | 288 | The size of the rotational layout |
45338fc4 CL |
289 | tables is derived from the number of bytes remaining in |
290 | .Pq Fa struct fs . | |
291 | .Pp | |
e0a407ff KM |
292 | The number of blocks of data per cylinder group |
293 | is limited because cylinder groups are at most one block. | |
294 | The inode and free block tables | |
295 | must fit into a single block after deducting space for | |
45338fc4 CL |
296 | the cylinder group structure |
297 | .Pq Fa struct cg . | |
298 | .Pp | |
299 | The | |
300 | .Em Inode : | |
a5ca12a6 | 301 | The inode is the focus of all file activity in the |
45338fc4 CL |
302 | .Tn UNIX |
303 | file system. | |
304 | There is a unique inode allocated | |
a5ca12a6 KM |
305 | for each active file, |
306 | each current directory, each mounted-on file, | |
307 | text file, and the root. | |
308 | An inode is `named' by its device/i-number pair. | |
309 | For further information, see the include file | |
45338fc4 CL |
310 | .Aq Pa sys/inode.h . |
311 | .Sh HISTORY | |
312 | A super-block structure named filsys appeared in | |
313 | .At v6 . | |
314 | The file system described in this manual appeared | |
315 | in | |
316 | .Bx 4.2 . |