Commit | Line | Data |
---|---|---|
a5ca12a6 KM |
1 | .\" Copyright (c) 1983 Regents of the University of California. |
2 | .\" All rights reserved. The Berkeley software License Agreement | |
3 | .\" specifies the terms and conditions for redistribution. | |
8a35f543 | 4 | .\" |
f64d08c0 | 5 | .\" @(#)fs.5 6.1 (Berkeley) %G% |
a5ca12a6 | 6 | .\" |
f64d08c0 | 7 | .TH FS 5 "" |
a5ca12a6 | 8 | .UC 5 |
8a35f543 | 9 | .SH NAME |
a5ca12a6 | 10 | fs, inode \- format of file system volume |
8a35f543 KM |
11 | .SH SYNOPSIS |
12 | .B #include <sys/types.h> | |
13 | .br | |
a5ca12a6 | 14 | .B #include <sys/fs.h> |
8a35f543 | 15 | .br |
a5ca12a6 | 16 | .B #include <sys/inode.h> |
8a35f543 | 17 | .SH DESCRIPTION |
a5ca12a6 | 18 | Every file system storage volume (disk, nine-track tape, for instance) |
8a35f543 | 19 | has a common format for certain vital information. |
a5ca12a6 KM |
20 | Every such volume is divided into a certain number of blocks. |
21 | The block size is a parameter of the file system. | |
22 | Sectors 0 to 15 on a file system are used to contain primary | |
23 | and secondary bootstrapping programs. | |
8a35f543 | 24 | .PP |
a5ca12a6 | 25 | The actual file system begins at sector 16 with the |
8a35f543 KM |
26 | .I "super block." |
27 | The layout of the super block as defined by the include file | |
a5ca12a6 | 28 | .RI < sys/fs.h > |
8a35f543 KM |
29 | is: |
30 | .PP | |
31 | .nf | |
a5ca12a6 KM |
32 | #define FS_MAGIC 0x011954 |
33 | struct fs { | |
34 | struct fs *fs_link; /* linked list of file systems */ | |
35 | struct fs *fs_rlink; /* used for incore super blocks */ | |
36 | daddr_t fs_sblkno; /* addr of super-block in filesys */ | |
37 | daddr_t fs_cblkno; /* offset of cyl-block in filesys */ | |
38 | daddr_t fs_iblkno; /* offset of inode-blocks in filesys */ | |
39 | daddr_t fs_dblkno; /* offset of first data after cg */ | |
40 | long fs_cgoffset; /* cylinder group offset in cylinder */ | |
41 | long fs_cgmask; /* used to calc mod fs_ntrak */ | |
42 | time_t fs_time; /* last time written */ | |
43 | long fs_size; /* number of blocks in fs */ | |
44 | long fs_dsize; /* number of data blocks in fs */ | |
45 | long fs_ncg; /* number of cylinder groups */ | |
46 | long fs_bsize; /* size of basic blocks in fs */ | |
47 | long fs_fsize; /* size of frag blocks in fs */ | |
48 | long fs_frag; /* number of frags in a block in fs */ | |
49 | /* these are configuration parameters */ | |
50 | long fs_minfree; /* minimum percentage of free blocks */ | |
51 | long fs_rotdelay; /* num of ms for optimal next block */ | |
52 | long fs_rps; /* disk revolutions per second */ | |
53 | /* these fields can be computed from the others */ | |
54 | long fs_bmask; /* ``blkoff'' calc of blk offsets */ | |
55 | long fs_fmask; /* ``fragoff'' calc of frag offsets */ | |
56 | long fs_bshift; /* ``lblkno'' calc of logical blkno */ | |
57 | long fs_fshift; /* ``numfrags'' calc number of frags */ | |
58 | /* these are configuration parameters */ | |
59 | long fs_maxcontig; /* max number of contiguous blks */ | |
60 | long fs_maxbpg; /* max number of blks per cyl group */ | |
61 | /* these fields can be computed from the others */ | |
62 | long fs_fragshift; /* block to frag shift */ | |
63 | long fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ | |
64 | long fs_sbsize; /* actual size of super block */ | |
65 | long fs_csmask; /* csum block offset */ | |
66 | long fs_csshift; /* csum block number */ | |
67 | long fs_nindir; /* value of NINDIR */ | |
68 | long fs_inopb; /* value of INOPB */ | |
69 | long fs_nspf; /* value of NSPF */ | |
70 | long fs_sparecon[6]; /* reserved for future constants */ | |
71 | /* sizes determined by number of cylinder groups and their sizes */ | |
72 | daddr_t fs_csaddr; /* blk addr of cyl grp summary area */ | |
73 | long fs_cssize; /* size of cyl grp summary area */ | |
74 | long fs_cgsize; /* cylinder group size */ | |
75 | /* these fields should be derived from the hardware */ | |
76 | long fs_ntrak; /* tracks per cylinder */ | |
77 | long fs_nsect; /* sectors per track */ | |
78 | long fs_spc; /* sectors per cylinder */ | |
79 | /* this comes from the disk driver partitioning */ | |
80 | long fs_ncyl; /* cylinders in file system */ | |
81 | /* these fields can be computed from the others */ | |
82 | long fs_cpg; /* cylinders per group */ | |
83 | long fs_ipg; /* inodes per group */ | |
84 | long fs_fpg; /* blocks per group * fs_frag */ | |
85 | /* this data must be re-computed after crashes */ | |
86 | struct csum fs_cstotal; /* cylinder summary information */ | |
87 | /* these fields are cleared at mount time */ | |
88 | char fs_fmod; /* super block modified flag */ | |
89 | char fs_clean; /* file system is clean flag */ | |
90 | char fs_ronly; /* mounted read-only flag */ | |
91 | char fs_flags; /* currently unused flag */ | |
92 | char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ | |
93 | /* these fields retain the current block allocation info */ | |
94 | long fs_cgrotor; /* last cg searched */ | |
95 | struct csum *fs_csp[MAXCSBUFS];/* list of fs_cs info buffers */ | |
96 | long fs_cpc; /* cyl per cycle in postbl */ | |
97 | short fs_postbl[MAXCPG][NRPOS];/* head of blocks for each rotation */ | |
98 | long fs_magic; /* magic number */ | |
99 | u_char fs_rotbl[1]; /* list of blocks for each rotation */ | |
100 | /* actually longer */ | |
101 | }; | |
8a35f543 | 102 | .fi |
a5ca12a6 KM |
103 | .LP |
104 | Each disk drive contains some number of file systems. | |
105 | A file system consists of a number of cylinder groups. | |
106 | Each cylinder group has inodes and data. | |
107 | .LP | |
108 | A file system is described by its super-block, which in turn | |
109 | describes the cylinder groups. The super-block is critical | |
110 | data and is replicated in each cylinder group to protect against | |
111 | catastrophic loss. This is done at file system creation | |
112 | time and the critical | |
113 | super-block data does not change, so the copies need not be | |
114 | referenced further unless disaster strikes. | |
115 | .LP | |
116 | Addresses stored in inodes are capable of addressing fragments | |
117 | of `blocks'. File system blocks of at most size MAXBSIZE can | |
118 | be optionally broken into 2, 4, or 8 pieces, each of which is | |
119 | addressable; these pieces may be DEV_BSIZE, or some multiple of | |
120 | a DEV_BSIZE unit. | |
121 | .LP | |
122 | Large files consist of exclusively large data blocks. To avoid | |
123 | undue wasted disk space, the last data block of a small file is | |
124 | allocated as only as many fragments of a large block as are | |
125 | necessary. The file system format retains only a single pointer | |
126 | to such a fragment, which is a piece of a single large block that | |
127 | has been divided. The size of such a fragment is determinable from | |
128 | information in the inode, using the ``blksize(fs, ip, lbn)'' macro. | |
129 | .LP | |
130 | The file system records space availability at the fragment level; | |
131 | to determine block availability, aligned fragments are examined. | |
132 | .LP | |
133 | The root inode is the root of the file system. | |
134 | Inode 0 can't be used for normal purposes and | |
135 | historically bad blocks were linked to inode 1, | |
136 | thus the root inode is 2 (inode 1 is no longer used for | |
137 | this purpose, however numerous dump tapes make this | |
138 | assumption, so we are stuck with it). | |
8a35f543 | 139 | The |
a5ca12a6 KM |
140 | .I lost+found |
141 | directory is given the next available | |
142 | inode when it is initially created by | |
143 | .IR mkfs . | |
144 | .LP | |
145 | .I fs_minfree | |
146 | gives the minimum acceptable percentage of file system | |
147 | blocks which may be free. If the freelist drops below this level | |
148 | only the super-user may continue to allocate blocks. This may | |
149 | be set to 0 if no reserve of free blocks is deemed necessary, | |
150 | however severe performance degradations will be observed if the | |
151 | file system is run at greater than 90% full; thus the default | |
152 | value of | |
153 | .I fs_minfree | |
154 | is 10%. | |
155 | .LP | |
156 | Empirically the best trade-off between block fragmentation and | |
157 | overall disk utilization at a loading of 90% comes with a | |
158 | fragmentation of 4, thus the default fragment size is a fourth | |
159 | of the block size. | |
160 | .LP | |
161 | .I Cylinder group related | |
162 | .IR limits : | |
163 | Each cylinder keeps track of the availability of blocks at different | |
164 | rotational positions, so that sequential blocks can be laid out | |
165 | with minimum rotational latency. NRPOS is the number of rotational | |
166 | positions which are distinguished. With NRPOS 8 the resolution of the | |
167 | summary information is 2ms for a typical 3600 rpm drive. | |
168 | .LP | |
169 | .I fs_rotdelay | |
170 | gives the minimum number of milliseconds to initiate | |
171 | another disk transfer on the same cylinder. It is used in | |
172 | determining the rotationally optimal layout for disk blocks | |
173 | within a file; the default value for | |
174 | .I fs_rotdelay | |
175 | is 2ms. | |
176 | .LP | |
177 | Each file system has a statically allocated number of inodes. | |
178 | An inode is allocated for each NBPI bytes of disk space. | |
179 | The inode allocation strategy is extremely conservative. | |
180 | .LP | |
181 | MAXIPG bounds the number of inodes per cylinder group, and | |
182 | is needed only to keep the structure simpler by having the | |
183 | only a single variable size element (the free bit map). | |
184 | .LP | |
185 | .B N.B.: | |
186 | MAXIPG must be a multiple of INOPB(fs). | |
187 | .LP | |
188 | MINBSIZE is the smallest allowable block size. | |
189 | With a MINBSIZE of 4096 | |
190 | it is possible to create files of size | |
191 | 2^32 with only two levels of indirection. | |
192 | MINBSIZE must be big enough to hold a cylinder group block, | |
193 | thus changes to (struct cg) must keep its size within MINBSIZE. | |
194 | MAXCPG is limited only to dimension an array in (struct cg); | |
195 | it can be made larger as long as that structure's size remains | |
196 | within the bounds dictated by MINBSIZE. | |
197 | Note that super blocks are never more than size SBSIZE. | |
198 | .LP | |
199 | The path name on which the file system is mounted is maintained | |
200 | in | |
201 | .IR fs_fsmnt . | |
202 | MAXMNTLEN defines the amount of space allocated in | |
203 | the super block for this name. | |
204 | The limit on the amount of summary information per file system | |
205 | is defined by MAXCSBUFS. It is currently parameterized for a | |
206 | maximum of two million cylinders. | |
207 | .LP | |
208 | Per cylinder group information is summarized in blocks allocated | |
209 | from the first cylinder group's data blocks. | |
210 | These blocks are read in from | |
211 | .I fs_csaddr | |
212 | (size | |
213 | .IR fs_cssize ) | |
214 | in addition to the super block. | |
215 | .LP | |
216 | .B N.B.: | |
217 | sizeof (struct csum) must be a power of two in order for | |
218 | the ``fs_cs'' macro to work. | |
219 | .LP | |
220 | .I Super block for a file | |
221 | .IR system : | |
222 | MAXBPC bounds the size of the rotational layout tables and | |
223 | is limited by the fact that the super block is of size SBSIZE. | |
224 | The size of these tables is | |
225 | .B inversely | |
226 | proportional to the block | |
227 | size of the file system. The size of the tables is | |
228 | increased when sector sizes are not powers of two, | |
229 | as this increases the number of cylinders | |
230 | included before the rotational pattern repeats ( | |
231 | .IR fs_cpc ). | |
232 | The size of the rotational layout | |
233 | tables is derived from the number of bytes remaining in (struct fs). | |
234 | .LP | |
235 | MAXBPG bounds the number of blocks of data per cylinder group, | |
236 | and is limited by the fact that cylinder groups are at most one block. | |
237 | The size of the free block table | |
238 | is derived from the size of blocks and the number | |
239 | of remaining bytes in the cylinder group structure (struct cg). | |
240 | .LP | |
241 | .IR Inode : | |
242 | The inode is the focus of all file activity in the | |
243 | UNIX file system. There is a unique inode allocated | |
244 | for each active file, | |
245 | each current directory, each mounted-on file, | |
246 | text file, and the root. | |
247 | An inode is `named' by its device/i-number pair. | |
248 | For further information, see the include file | |
249 | .RI < sys/inode.h >. |