| 1 | .\" Copyright (c) 1983, 1991 Regents of the University of California. |
| 2 | .\" All rights reserved. |
| 3 | .\" |
| 4 | .\" %sccs.include.redist.roff% |
| 5 | .\" |
| 6 | .\" @(#)fs.5 6.4 (Berkeley) %G% |
| 7 | .\" |
| 8 | .Dd |
| 9 | .Dt FS 5 |
| 10 | .Os BSD 4.2 |
| 11 | .Sh NAME |
| 12 | .Nm fs , |
| 13 | .Nm inode |
| 14 | .Nd format of file system volume |
| 15 | .Sh SYNOPSIS |
| 16 | .Fd #include <sys/types.h> |
| 17 | .Fd #include <ufs/fs.h> |
| 18 | .Fd #include <ufs/inode.h> |
| 19 | .Sh DESCRIPTION |
| 20 | The files |
| 21 | .Aq Pa fs.h |
| 22 | and |
| 23 | .Aq Pa inode.h |
| 24 | declare several structures, defined variables and macros |
| 25 | which are used to create and manage the underlying format of |
| 26 | file system objects on random access devices (disks). |
| 27 | .Pp |
| 28 | The block size and number of blocks which |
| 29 | comprise a file system are parameters of the file system. |
| 30 | Sectors beginning at |
| 31 | .Dv BBLOCK |
| 32 | and continuing for |
| 33 | .Dv BBSIZE |
| 34 | are used |
| 35 | for a disklabel and for some hardware primary |
| 36 | and secondary bootstrapping programs. |
| 37 | .Pp |
| 38 | The actual file system begins at sector |
| 39 | .Dv SBLOCK |
| 40 | with the |
| 41 | .Em super-block |
| 42 | that is of size |
| 43 | .Dv SBSIZE . |
| 44 | The following structure described the super-block and is |
| 45 | from the file |
| 46 | .Aq Pa ufs/fs.h : |
| 47 | .Bd -literal |
| 48 | #define FS_MAGIC 0x011954 |
| 49 | struct fs { |
| 50 | struct fs *fs_link; /* linked list of file systems */ |
| 51 | struct fs *fs_rlink; /* used for incore super blocks */ |
| 52 | daddr_t fs_sblkno; /* addr of super-block in filesys */ |
| 53 | daddr_t fs_cblkno; /* offset of cyl-block in filesys */ |
| 54 | daddr_t fs_iblkno; /* offset of inode-blocks in filesys */ |
| 55 | daddr_t fs_dblkno; /* offset of first data after cg */ |
| 56 | long fs_cgoffset; /* cylinder group offset in cylinder */ |
| 57 | long fs_cgmask; /* used to calc mod fs_ntrak */ |
| 58 | time_t fs_time; /* last time written */ |
| 59 | long fs_size; /* number of blocks in fs */ |
| 60 | long fs_dsize; /* number of data blocks in fs */ |
| 61 | long fs_ncg; /* number of cylinder groups */ |
| 62 | long fs_bsize; /* size of basic blocks in fs */ |
| 63 | long fs_fsize; /* size of frag blocks in fs */ |
| 64 | long fs_frag; /* number of frags in a block in fs */ |
| 65 | /* these are configuration parameters */ |
| 66 | long fs_minfree; /* minimum percentage of free blocks */ |
| 67 | long fs_rotdelay; /* num of ms for optimal next block */ |
| 68 | long fs_rps; /* disk revolutions per second */ |
| 69 | /* these fields can be computed from the others */ |
| 70 | long fs_bmask; /* ``blkoff'' calc of blk offsets */ |
| 71 | long fs_fmask; /* ``fragoff'' calc of frag offsets */ |
| 72 | long fs_bshift; /* ``lblkno'' calc of logical blkno */ |
| 73 | long fs_fshift; /* ``numfrags'' calc number of frags */ |
| 74 | /* these are configuration parameters */ |
| 75 | long fs_maxcontig; /* max number of contiguous blks */ |
| 76 | long fs_maxbpg; /* max number of blks per cyl group */ |
| 77 | /* these fields can be computed from the others */ |
| 78 | long fs_fragshift; /* block to frag shift */ |
| 79 | long fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ |
| 80 | long fs_sbsize; /* actual size of super block */ |
| 81 | long fs_csmask; /* csum block offset */ |
| 82 | long fs_csshift; /* csum block number */ |
| 83 | long fs_nindir; /* value of NINDIR */ |
| 84 | long fs_inopb; /* value of INOPB */ |
| 85 | long fs_nspf; /* value of NSPF */ |
| 86 | /* yet another configuration parameter */ |
| 87 | long fs_optim; /* optimization preference, see below */ |
| 88 | /* these fields are derived from the hardware */ |
| 89 | long fs_npsect; /* # sectors/track including spares */ |
| 90 | long fs_interleave; /* hardware sector interleave */ |
| 91 | long fs_trackskew; /* sector 0 skew, per track */ |
| 92 | long fs_headswitch; /* head switch time, usec */ |
| 93 | long fs_trkseek; /* track-to-track seek, usec */ |
| 94 | /* sizes determined by number of cylinder groups and their sizes */ |
| 95 | daddr_t fs_csaddr; /* blk addr of cyl grp summary area */ |
| 96 | long fs_cssize; /* size of cyl grp summary area */ |
| 97 | long fs_cgsize; /* cylinder group size */ |
| 98 | /* these fields are derived from the hardware */ |
| 99 | long fs_ntrak; /* tracks per cylinder */ |
| 100 | long fs_nsect; /* sectors per track */ |
| 101 | long fs_spc; /* sectors per cylinder */ |
| 102 | /* this comes from the disk driver partitioning */ |
| 103 | long fs_ncyl; /* cylinders in file system */ |
| 104 | /* these fields can be computed from the others */ |
| 105 | long fs_cpg; /* cylinders per group */ |
| 106 | long fs_ipg; /* inodes per group */ |
| 107 | long fs_fpg; /* blocks per group * fs_frag */ |
| 108 | /* this data must be re-computed after crashes */ |
| 109 | struct csum fs_cstotal; /* cylinder summary information */ |
| 110 | /* these fields are cleared at mount time */ |
| 111 | char fs_fmod; /* super block modified flag */ |
| 112 | char fs_clean; /* file system is clean flag */ |
| 113 | char fs_ronly; /* mounted read-only flag */ |
| 114 | char fs_flags; /* currently unused flag */ |
| 115 | char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ |
| 116 | /* these fields retain the current block allocation info */ |
| 117 | long fs_cgrotor; /* last cg searched */ |
| 118 | struct csum *fs_csp[MAXCSBUFS]; /* list of fs_cs info buffers */ |
| 119 | long fs_cpc; /* cyl per cycle in postbl */ |
| 120 | short fs_opostbl[16][8]; /* old rotation block list head */ |
| 121 | long fs_sparecon[56]; /* reserved for future constants */ |
| 122 | quad fs_qbmask; /* ~fs_bmask - for use with quad size */ |
| 123 | quad fs_qfmask; /* ~fs_fmask - for use with quad size */ |
| 124 | long fs_postblformat; /* format of positional layout tables */ |
| 125 | long fs_nrpos; /* number of rotaional positions */ |
| 126 | long fs_postbloff; /* (short) rotation block list head */ |
| 127 | long fs_rotbloff; /* (u_char) blocks for each rotation */ |
| 128 | long fs_magic; /* magic number */ |
| 129 | u_char fs_space[1]; /* list of blocks for each rotation */ |
| 130 | /* actually longer */ |
| 131 | }; |
| 132 | .Ed |
| 133 | .Pp |
| 134 | Each disk drive contains some number of file systems. |
| 135 | A file system consists of a number of cylinder groups. |
| 136 | Each cylinder group has inodes and data. |
| 137 | .Pp |
| 138 | A file system is described by its super-block, which in turn |
| 139 | describes the cylinder groups. The super-block is critical |
| 140 | data and is replicated in each cylinder group to protect against |
| 141 | catastrophic loss. This is done at file system creation |
| 142 | time and the critical |
| 143 | super-block data does not change, so the copies need not be |
| 144 | referenced further unless disaster strikes. |
| 145 | .Pp |
| 146 | Addresses stored in inodes are capable of addressing fragments |
| 147 | of `blocks'. File system blocks of at most size |
| 148 | .Dv MAXBSIZE |
| 149 | can |
| 150 | be optionally broken into 2, 4, or 8 pieces, each of which is |
| 151 | addressable; these pieces may be |
| 152 | .Dv DEV_BSIZE , |
| 153 | or some multiple of |
| 154 | a |
| 155 | .Dv DEV_BSIZE |
| 156 | unit. |
| 157 | .Pp |
| 158 | Large files consist of exclusively large data blocks. To avoid |
| 159 | undue wasted disk space, the last data block of a small file is |
| 160 | allocated as only as many fragments of a large block as are |
| 161 | necessary. The file system format retains only a single pointer |
| 162 | to such a fragment, which is a piece of a single large block that |
| 163 | has been divided. The size of such a fragment is determinable from |
| 164 | information in the inode, using the |
| 165 | .Fn blksize fs ip lbn |
| 166 | macro. |
| 167 | .Pp |
| 168 | The file system records space availability at the fragment level; |
| 169 | to determine block availability, aligned fragments are examined. |
| 170 | .Pp |
| 171 | The root inode is the root of the file system. |
| 172 | Inode 0 can't be used for normal purposes and |
| 173 | historically bad blocks were linked to inode 1, |
| 174 | thus the root inode is 2 (inode 1 is no longer used for |
| 175 | this purpose, however numerous dump tapes make this |
| 176 | assumption, so we are stuck with it). |
| 177 | .Pp |
| 178 | The |
| 179 | .Fa fs_minfree |
| 180 | element gives the minimum acceptable percentage of file system |
| 181 | blocks that may be free. If the freelist drops below this level |
| 182 | only the super-user may continue to allocate blocks. |
| 183 | The |
| 184 | .Fa fs_minfree |
| 185 | element |
| 186 | may be set to 0 if no reserve of free blocks is deemed necessary, |
| 187 | however severe performance degradations will be observed if the |
| 188 | file system is run at greater than 90% full; thus the default |
| 189 | value of |
| 190 | .Fa fs_minfree |
| 191 | is 10%. |
| 192 | .Pp |
| 193 | Empirically the best trade-off between block fragmentation and |
| 194 | overall disk utilization at a loading of 90% comes with a |
| 195 | fragmentation of 8, thus the default fragment size is an eighth |
| 196 | of the block size. |
| 197 | .Pp |
| 198 | The element |
| 199 | .Fa fs_optim |
| 200 | specifies whether the file system should try to minimize the time spent |
| 201 | allocating blocks, or if it should attempt to minimize the space |
| 202 | fragmentation on the disk. |
| 203 | If the value of fs_minfree (see above) is less than 10%, |
| 204 | then the file system defaults to optimizing for space to avoid |
| 205 | running out of full sized blocks. |
| 206 | If the value of minfree is greater than or equal to 10%, |
| 207 | fragmentation is unlikely to be problematical, and |
| 208 | the file system defaults to optimizing for time. |
| 209 | .Pp |
| 210 | .Em Cylinder group related limits : |
| 211 | Each cylinder keeps track of the availability of blocks at different |
| 212 | rotational positions, so that sequential blocks can be laid out |
| 213 | with minimum rotational latency. With the default of 8 distinguished |
| 214 | rotational positions, the resolution of the |
| 215 | summary information is 2ms for a typical 3600 rpm drive. |
| 216 | .Pp |
| 217 | The element |
| 218 | .Fa fs_rotdelay |
| 219 | gives the minimum number of milliseconds to initiate |
| 220 | another disk transfer on the same cylinder. |
| 221 | It is used in determining the rotationally optimal |
| 222 | layout for disk blocks within a file; |
| 223 | the default value for |
| 224 | .Fa fs_rotdelay |
| 225 | is 2ms. |
| 226 | .Pp |
| 227 | Each file system has a statically allocated number of inodes. |
| 228 | An inode is allocated for each |
| 229 | .Dv NBPI |
| 230 | bytes of disk space. |
| 231 | The inode allocation strategy is extremely conservative. |
| 232 | .Pp |
| 233 | .Dv MINBSIZE |
| 234 | is the smallest allowable block size. |
| 235 | With a |
| 236 | .Dv MINBSIZE |
| 237 | of 4096 |
| 238 | it is possible to create files of size |
| 239 | 2^32 with only two levels of indirection. |
| 240 | .Dv MINBSIZE |
| 241 | must be big enough to hold a cylinder group block, |
| 242 | thus changes to |
| 243 | .Pq Fa struct cg |
| 244 | must keep its size within |
| 245 | .Dv MINBSIZE . |
| 246 | Note that super-blocks are never more than size |
| 247 | .Dv SBSIZE . |
| 248 | .Pp |
| 249 | The path name on which the file system is mounted is maintained in |
| 250 | .Fa fs_fsmnt . |
| 251 | .Dv MAXMNTLEN |
| 252 | defines the amount of space allocated in |
| 253 | the super-block for this name. |
| 254 | The limit on the amount of summary information per file system |
| 255 | is defined by |
| 256 | .Dv MAXCSBUFS. |
| 257 | For a 4096 byte block size, it is currently parameterized for a |
| 258 | maximum of two million cylinders. |
| 259 | .Pp |
| 260 | Per cylinder group information is summarized in blocks allocated |
| 261 | from the first cylinder group's data blocks. |
| 262 | These blocks are read in from |
| 263 | .Fa fs_csaddr |
| 264 | (size |
| 265 | .Fa fs_cssize ) |
| 266 | in addition to the super-block. |
| 267 | .Pp |
| 268 | .Sy N.B.: |
| 269 | .Xr sizeof Pq Fa struct csum |
| 270 | must be a power of two in order for |
| 271 | the |
| 272 | .Fn fs_cs |
| 273 | macro to work. |
| 274 | .Pp |
| 275 | The |
| 276 | .Em "Super-block for a file system" : |
| 277 | The size of the rotational layout tables |
| 278 | is limited by the fact that the super-block is of size |
| 279 | .Dv SBSIZE . |
| 280 | The size of these tables is |
| 281 | .Em inversely |
| 282 | proportional to the block |
| 283 | size of the file system. The size of the tables is |
| 284 | increased when sector sizes are not powers of two, |
| 285 | as this increases the number of cylinders |
| 286 | included before the rotational pattern repeats |
| 287 | .Pq Fa fs_cpc . |
| 288 | The size of the rotational layout |
| 289 | tables is derived from the number of bytes remaining in |
| 290 | .Pq Fa struct fs . |
| 291 | .Pp |
| 292 | The number of blocks of data per cylinder group |
| 293 | is limited because cylinder groups are at most one block. |
| 294 | The inode and free block tables |
| 295 | must fit into a single block after deducting space for |
| 296 | the cylinder group structure |
| 297 | .Pq Fa struct cg . |
| 298 | .Pp |
| 299 | The |
| 300 | .Em Inode : |
| 301 | The inode is the focus of all file activity in the |
| 302 | .Tn UNIX |
| 303 | file system. |
| 304 | There is a unique inode allocated |
| 305 | for each active file, |
| 306 | each current directory, each mounted-on file, |
| 307 | text file, and the root. |
| 308 | An inode is `named' by its device/i-number pair. |
| 309 | For further information, see the include file |
| 310 | .Aq Pa sys/inode.h . |
| 311 | .Sh HISTORY |
| 312 | A super-block structure named filsys appeared in |
| 313 | .At v6 . |
| 314 | The file system described in this manual appeared |
| 315 | in |
| 316 | .Bx 4.2 . |