Commit | Line | Data |
---|---|---|
a5ca12a6 KM |
1 | .\" Copyright (c) 1983 Regents of the University of California. |
2 | .\" All rights reserved. The Berkeley software License Agreement | |
3 | .\" specifies the terms and conditions for redistribution. | |
8a35f543 | 4 | .\" |
ca67e7b4 | 5 | .\" @(#)fs.5 6.3 (Berkeley) 5/2/88 |
a5ca12a6 | 6 | .\" |
ca67e7b4 | 7 | .TH FS 5 "May 2, 1988" |
a5ca12a6 | 8 | .UC 5 |
8a35f543 | 9 | .SH NAME |
a5ca12a6 | 10 | fs, inode \- format of file system volume |
8a35f543 KM |
11 | .SH SYNOPSIS |
12 | .B #include <sys/types.h> | |
13 | .br | |
a5ca12a6 | 14 | .B #include <sys/fs.h> |
8a35f543 | 15 | .br |
a5ca12a6 | 16 | .B #include <sys/inode.h> |
8a35f543 | 17 | .SH DESCRIPTION |
a5ca12a6 | 18 | Every file system storage volume (disk, nine-track tape, for instance) |
8a35f543 | 19 | has a common format for certain vital information. |
a5ca12a6 KM |
20 | Every such volume is divided into a certain number of blocks. |
21 | The block size is a parameter of the file system. | |
9de6431b | 22 | Sectors beginning at BBLOCK and continuing for BBSIZE are used to |
e0a407ff KM |
23 | contain a label and for some hardware |
24 | primary and secondary bootstrapping programs. | |
8a35f543 | 25 | .PP |
9de6431b KM |
26 | The actual file system begins at sector SBLOCK with the |
27 | .I "super block" | |
28 | that is of size SBSIZE. | |
8a35f543 | 29 | The layout of the super block as defined by the include file |
a5ca12a6 | 30 | .RI < sys/fs.h > |
8a35f543 KM |
31 | is: |
32 | .PP | |
33 | .nf | |
e0a407ff KM |
34 | .ta \w'\ \ \ \ 'u +\w'daddr_t\ \ 'u +\w'fs_fsmnt[MAXMNTLEN];\ \ 'u |
35 | #define FS_MAGIC 0x011954 | |
36 | struct fs { | |
37 | struct fs *fs_link; /* linked list of file systems */ | |
38 | struct fs *fs_rlink; /* used for incore super blocks */ | |
39 | daddr_t fs_sblkno; /* addr of super-block in filesys */ | |
40 | daddr_t fs_cblkno; /* offset of cyl-block in filesys */ | |
41 | daddr_t fs_iblkno; /* offset of inode-blocks in filesys */ | |
42 | daddr_t fs_dblkno; /* offset of first data after cg */ | |
43 | long fs_cgoffset; /* cylinder group offset in cylinder */ | |
44 | long fs_cgmask; /* used to calc mod fs_ntrak */ | |
45 | time_t fs_time; /* last time written */ | |
46 | long fs_size; /* number of blocks in fs */ | |
47 | long fs_dsize; /* number of data blocks in fs */ | |
48 | long fs_ncg; /* number of cylinder groups */ | |
49 | long fs_bsize; /* size of basic blocks in fs */ | |
50 | long fs_fsize; /* size of frag blocks in fs */ | |
51 | long fs_frag; /* number of frags in a block in fs */ | |
a5ca12a6 | 52 | /* these are configuration parameters */ |
e0a407ff KM |
53 | long fs_minfree; /* minimum percentage of free blocks */ |
54 | long fs_rotdelay; /* num of ms for optimal next block */ | |
55 | long fs_rps; /* disk revolutions per second */ | |
a5ca12a6 | 56 | /* these fields can be computed from the others */ |
e0a407ff KM |
57 | long fs_bmask; /* ``blkoff'' calc of blk offsets */ |
58 | long fs_fmask; /* ``fragoff'' calc of frag offsets */ | |
59 | long fs_bshift; /* ``lblkno'' calc of logical blkno */ | |
60 | long fs_fshift; /* ``numfrags'' calc number of frags */ | |
a5ca12a6 | 61 | /* these are configuration parameters */ |
e0a407ff KM |
62 | long fs_maxcontig; /* max number of contiguous blks */ |
63 | long fs_maxbpg; /* max number of blks per cyl group */ | |
a5ca12a6 | 64 | /* these fields can be computed from the others */ |
e0a407ff KM |
65 | long fs_fragshift; /* block to frag shift */ |
66 | long fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ | |
67 | long fs_sbsize; /* actual size of super block */ | |
68 | long fs_csmask; /* csum block offset */ | |
69 | long fs_csshift; /* csum block number */ | |
70 | long fs_nindir; /* value of NINDIR */ | |
71 | long fs_inopb; /* value of INOPB */ | |
72 | long fs_nspf; /* value of NSPF */ | |
73 | /* yet another configuration parameter */ | |
74 | long fs_optim; /* optimization preference, see below */ | |
75 | /* these fields are derived from the hardware */ | |
76 | long fs_npsect; /* # sectors/track including spares */ | |
77 | long fs_interleave; /* hardware sector interleave */ | |
78 | long fs_trackskew; /* sector 0 skew, per track */ | |
79 | long fs_headswitch; /* head switch time, usec */ | |
80 | long fs_trkseek; /* track-to-track seek, usec */ | |
a5ca12a6 | 81 | /* sizes determined by number of cylinder groups and their sizes */ |
e0a407ff KM |
82 | daddr_t fs_csaddr; /* blk addr of cyl grp summary area */ |
83 | long fs_cssize; /* size of cyl grp summary area */ | |
84 | long fs_cgsize; /* cylinder group size */ | |
85 | /* these fields are derived from the hardware */ | |
86 | long fs_ntrak; /* tracks per cylinder */ | |
87 | long fs_nsect; /* sectors per track */ | |
88 | long fs_spc; /* sectors per cylinder */ | |
a5ca12a6 | 89 | /* this comes from the disk driver partitioning */ |
e0a407ff | 90 | long fs_ncyl; /* cylinders in file system */ |
a5ca12a6 | 91 | /* these fields can be computed from the others */ |
e0a407ff KM |
92 | long fs_cpg; /* cylinders per group */ |
93 | long fs_ipg; /* inodes per group */ | |
94 | long fs_fpg; /* blocks per group * fs_frag */ | |
a5ca12a6 KM |
95 | /* this data must be re-computed after crashes */ |
96 | struct csum fs_cstotal; /* cylinder summary information */ | |
97 | /* these fields are cleared at mount time */ | |
e0a407ff KM |
98 | char fs_fmod; /* super block modified flag */ |
99 | char fs_clean; /* file system is clean flag */ | |
100 | char fs_ronly; /* mounted read-only flag */ | |
101 | char fs_flags; /* currently unused flag */ | |
a5ca12a6 KM |
102 | char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ |
103 | /* these fields retain the current block allocation info */ | |
e0a407ff KM |
104 | long fs_cgrotor; /* last cg searched */ |
105 | struct csum *fs_csp[MAXCSBUFS]; /* list of fs_cs info buffers */ | |
106 | long fs_cpc; /* cyl per cycle in postbl */ | |
107 | short fs_opostbl[16][8]; /* old rotation block list head */ | |
108 | long fs_sparecon[56]; /* reserved for future constants */ | |
109 | quad fs_qbmask; /* ~fs_bmask - for use with quad size */ | |
110 | quad fs_qfmask; /* ~fs_fmask - for use with quad size */ | |
111 | long fs_postblformat; /* format of positional layout tables */ | |
112 | long fs_nrpos; /* number of rotaional positions */ | |
113 | long fs_postbloff; /* (short) rotation block list head */ | |
114 | long fs_rotbloff; /* (u_char) blocks for each rotation */ | |
115 | long fs_magic; /* magic number */ | |
116 | u_char fs_space[1]; /* list of blocks for each rotation */ | |
a5ca12a6 KM |
117 | /* actually longer */ |
118 | }; | |
8a35f543 | 119 | .fi |
a5ca12a6 KM |
120 | .LP |
121 | Each disk drive contains some number of file systems. | |
122 | A file system consists of a number of cylinder groups. | |
123 | Each cylinder group has inodes and data. | |
124 | .LP | |
125 | A file system is described by its super-block, which in turn | |
126 | describes the cylinder groups. The super-block is critical | |
127 | data and is replicated in each cylinder group to protect against | |
128 | catastrophic loss. This is done at file system creation | |
129 | time and the critical | |
130 | super-block data does not change, so the copies need not be | |
131 | referenced further unless disaster strikes. | |
132 | .LP | |
133 | Addresses stored in inodes are capable of addressing fragments | |
134 | of `blocks'. File system blocks of at most size MAXBSIZE can | |
135 | be optionally broken into 2, 4, or 8 pieces, each of which is | |
136 | addressable; these pieces may be DEV_BSIZE, or some multiple of | |
137 | a DEV_BSIZE unit. | |
138 | .LP | |
139 | Large files consist of exclusively large data blocks. To avoid | |
140 | undue wasted disk space, the last data block of a small file is | |
141 | allocated as only as many fragments of a large block as are | |
142 | necessary. The file system format retains only a single pointer | |
143 | to such a fragment, which is a piece of a single large block that | |
144 | has been divided. The size of such a fragment is determinable from | |
145 | information in the inode, using the ``blksize(fs, ip, lbn)'' macro. | |
146 | .LP | |
147 | The file system records space availability at the fragment level; | |
148 | to determine block availability, aligned fragments are examined. | |
149 | .LP | |
150 | The root inode is the root of the file system. | |
151 | Inode 0 can't be used for normal purposes and | |
152 | historically bad blocks were linked to inode 1, | |
153 | thus the root inode is 2 (inode 1 is no longer used for | |
154 | this purpose, however numerous dump tapes make this | |
155 | assumption, so we are stuck with it). | |
a5ca12a6 KM |
156 | .LP |
157 | .I fs_minfree | |
158 | gives the minimum acceptable percentage of file system | |
9de6431b | 159 | blocks that may be free. If the freelist drops below this level |
e0a407ff KM |
160 | only the super-user may continue to allocate blocks. |
161 | .I Fs_minfree | |
162 | may be set to 0 if no reserve of free blocks is deemed necessary, | |
a5ca12a6 KM |
163 | however severe performance degradations will be observed if the |
164 | file system is run at greater than 90% full; thus the default | |
165 | value of | |
166 | .I fs_minfree | |
167 | is 10%. | |
168 | .LP | |
169 | Empirically the best trade-off between block fragmentation and | |
170 | overall disk utilization at a loading of 90% comes with a | |
e0a407ff | 171 | fragmentation of 8, thus the default fragment size is an eighth |
a5ca12a6 KM |
172 | of the block size. |
173 | .LP | |
9de6431b KM |
174 | .I fs_optim |
175 | specifies whether the file system should try to minimize the time spent | |
176 | allocating blocks, or if it should attempt to minimize the space | |
177 | fragmentation on the disk. | |
178 | If the value of fs_minfree (see above) is less than 10%, | |
179 | then the file system defaults to optimizing for space to avoid | |
180 | running out of full sized blocks. | |
181 | If the value of minfree is greater than or equal to 10%, | |
182 | fragmentation is unlikely to be problematical, and | |
183 | the file system defaults to optimizing for time. | |
184 | .LP | |
a5ca12a6 KM |
185 | .I Cylinder group related |
186 | .IR limits : | |
187 | Each cylinder keeps track of the availability of blocks at different | |
188 | rotational positions, so that sequential blocks can be laid out | |
e0a407ff KM |
189 | with minimum rotational latency. With the default of 8 distinguished |
190 | rotational positions, the resolution of the | |
a5ca12a6 KM |
191 | summary information is 2ms for a typical 3600 rpm drive. |
192 | .LP | |
193 | .I fs_rotdelay | |
194 | gives the minimum number of milliseconds to initiate | |
195 | another disk transfer on the same cylinder. It is used in | |
196 | determining the rotationally optimal layout for disk blocks | |
197 | within a file; the default value for | |
198 | .I fs_rotdelay | |
199 | is 2ms. | |
200 | .LP | |
201 | Each file system has a statically allocated number of inodes. | |
202 | An inode is allocated for each NBPI bytes of disk space. | |
203 | The inode allocation strategy is extremely conservative. | |
204 | .LP | |
a5ca12a6 KM |
205 | MINBSIZE is the smallest allowable block size. |
206 | With a MINBSIZE of 4096 | |
207 | it is possible to create files of size | |
208 | 2^32 with only two levels of indirection. | |
209 | MINBSIZE must be big enough to hold a cylinder group block, | |
210 | thus changes to (struct cg) must keep its size within MINBSIZE. | |
a5ca12a6 KM |
211 | Note that super blocks are never more than size SBSIZE. |
212 | .LP | |
9de6431b | 213 | The path name on which the file system is mounted is maintained in |
a5ca12a6 KM |
214 | .IR fs_fsmnt . |
215 | MAXMNTLEN defines the amount of space allocated in | |
216 | the super block for this name. | |
217 | The limit on the amount of summary information per file system | |
e0a407ff KM |
218 | is defined by MAXCSBUFS. |
219 | For a 4096 byte block size, it is currently parameterized for a | |
a5ca12a6 KM |
220 | maximum of two million cylinders. |
221 | .LP | |
222 | Per cylinder group information is summarized in blocks allocated | |
223 | from the first cylinder group's data blocks. | |
224 | These blocks are read in from | |
225 | .I fs_csaddr | |
226 | (size | |
227 | .IR fs_cssize ) | |
228 | in addition to the super block. | |
229 | .LP | |
230 | .B N.B.: | |
231 | sizeof (struct csum) must be a power of two in order for | |
232 | the ``fs_cs'' macro to work. | |
233 | .LP | |
234 | .I Super block for a file | |
235 | .IR system : | |
e0a407ff | 236 | The size of the rotational layout tables |
a5ca12a6 KM |
237 | is limited by the fact that the super block is of size SBSIZE. |
238 | The size of these tables is | |
239 | .B inversely | |
240 | proportional to the block | |
241 | size of the file system. The size of the tables is | |
242 | increased when sector sizes are not powers of two, | |
243 | as this increases the number of cylinders | |
244 | included before the rotational pattern repeats ( | |
245 | .IR fs_cpc ). | |
246 | The size of the rotational layout | |
247 | tables is derived from the number of bytes remaining in (struct fs). | |
248 | .LP | |
e0a407ff KM |
249 | The number of blocks of data per cylinder group |
250 | is limited because cylinder groups are at most one block. | |
251 | The inode and free block tables | |
252 | must fit into a single block after deducting space for | |
253 | the cylinder group structure (struct cg). | |
a5ca12a6 KM |
254 | .LP |
255 | .IR Inode : | |
256 | The inode is the focus of all file activity in the | |
257 | UNIX file system. There is a unique inode allocated | |
258 | for each active file, | |
259 | each current directory, each mounted-on file, | |
260 | text file, and the root. | |
261 | An inode is `named' by its device/i-number pair. | |
262 | For further information, see the include file | |
263 | .RI < sys/inode.h >. |