BSD 4_4_Lite2 release
[unix-history] / usr / src / libexec / lfs_cleanerd / cleanerd.c
/*-
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef lint
static char copyright[] =
"@(#) Copyright (c) 1992, 1993\n\
The Regents of the University of California. All rights reserved.\n";
#endif /* not lint */
#ifndef lint
static char sccsid[] = "@(#)cleanerd.c 8.5 (Berkeley) 6/10/95";
#endif /* not lint */
#include <sys/param.h>
#include <sys/mount.h>
#include <sys/time.h>
#include <ufs/ufs/dinode.h>
#include <ufs/lfs/lfs.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "clean.h"
char *special = "cleanerd";
int do_small = 0;
int do_mmap = 0;
int stat_report = 0;
struct cleaner_stats {
double util_tot;
double util_sos;
int blocks_read;
int blocks_written;
int segs_cleaned;
int segs_empty;
int segs_error;
} cleaner_stats;
struct seglist {
int sl_id; /* segment number */
int sl_cost; /* cleaning cost */
char sl_bytes; /* bytes in segment */
};
struct tossstruct {
struct lfs *lfs;
int seg;
};
#define CLEAN_BYTES 0x1
/* function prototypes for system calls; not sure where they should go */
int lfs_segwait __P((fsid_t *, struct timeval *));
int lfs_segclean __P((fsid_t *, u_long));
int lfs_bmapv __P((fsid_t *, BLOCK_INFO *, int));
int lfs_markv __P((fsid_t *, BLOCK_INFO *, int));
/* function prototypes */
int bi_tossold __P((const void *, const void *, const void *));
int choose_segments __P((FS_INFO *, struct seglist *,
int (*)(FS_INFO *, SEGUSE *)));
void clean_fs __P((FS_INFO *, int (*)(FS_INFO *, SEGUSE *), int, long));
int clean_loop __P((FS_INFO *, int, long));
int clean_segment __P((FS_INFO *, int));
int cost_benefit __P((FS_INFO *, SEGUSE *));
int cost_compare __P((const void *, const void *));
void sig_report __P((int));
/*
* Cleaning Cost Functions:
*
* These return the cost of cleaning a segment. The higher the cost value
* the better it is to clean the segment, so empty segments have the highest
* cost. (It is probably better to think of this as a priority value
* instead).
*
* This is the cost-benefit policy simulated and described in Rosenblum's
* 1991 SOSP paper.
*/
int
cost_benefit(fsp, su)
FS_INFO *fsp; /* file system information */
SEGUSE *su;
{
struct lfs *lfsp;
struct timeval t;
int age;
int live;
gettimeofday(&t, NULL);
live = su->su_nbytes;
age = t.tv_sec < su->su_lastmod ? 0 : t.tv_sec - su->su_lastmod;
lfsp = &fsp->fi_lfs;
if (live == 0)
return (t.tv_sec * lblkno(lfsp, seg_size(lfsp)));
else {
/*
* from lfsSegUsage.c (Mendel's code).
* priority calculation is done using INTEGER arithmetic.
* sizes are in BLOCKS (that is why we use lblkno below).
* age is in seconds.
*
* priority = ((seg_size - live) * age) / (seg_size + live)
*/
#ifdef VERBOSE
if (live < 0 || live > seg_size(lfsp)) {
err(0, "Bad segusage count: %d", live);
live = 0;
}
#endif
return (lblkno(lfsp, seg_size(lfsp) - live) * age)
/ lblkno(lfsp, seg_size(lfsp) + live);
}
}
int
main(argc, argv)
int argc;
char *argv[];
{
FS_INFO *fsp;
struct statfs *lstatfsp; /* file system stats */
struct timeval timeout; /* sleep timeout */
fsid_t fsid;
long clean_opts; /* cleaning options */
int i, nodaemon, segs_per_clean;
int opt, cmd_err;
char *fs_name; /* name of filesystem to clean */
extern int optind;
cmd_err = nodaemon = 0;
clean_opts = 0;
segs_per_clean = 1;
while ((opt = getopt(argc, argv, "bdmn:r:s")) != EOF) {
switch (opt) {
case 'b': /*
* Use live bytes to determine
* how many segs to clean.
*/
clean_opts |= CLEAN_BYTES;
break;
case 'd': /* Debug mode. */
nodaemon = 1;
break;
case 'm': /* Use mmap instead of read/write */
do_mmap = 1;
break;
case 'n': /* How many segs to clean at once */
segs_per_clean = atoi(optarg);
break;
case 'r': /* Report every stat_report segments */
stat_report = atoi(optarg);
break;
case 's': /* small writes */
do_small = 1;
break;
default:
++cmd_err;
}
}
argc -= optind;
argv += optind;
if (cmd_err || (argc != 1))
err(1, "usage: lfs_cleanerd [-smd] fs_name");
fs_name = argv[0];
signal(SIGINT, sig_report);
signal(SIGUSR1, sig_report);
signal(SIGUSR2, sig_report);
if (fs_getmntinfo(&lstatfsp, fs_name, "lfs") == 0) {
/* didn't find the filesystem */
err(1, "lfs_cleanerd: filesystem %s isn't an LFS!", fs_name);
}
if (!nodaemon) /* should we become a daemon, chdir to / & close fd's */
if (daemon(0, 0) == -1)
err(1, "lfs_cleanerd: couldn't become a daemon!");
timeout.tv_sec = 5*60; /* five minutes */
timeout.tv_usec = 0;
fsid.val[0] = 0;
fsid.val[1] = 0;
for (fsp = get_fs_info(lstatfsp, do_mmap); ;
reread_fs_info(fsp, do_mmap)) {
/*
* clean the filesystem, and, if it needed cleaning
* (i.e. it returned nonzero) try it again
* to make sure that some nasty process hasn't just
* filled the disk system up.
*/
if (clean_loop(fsp, segs_per_clean, clean_opts))
continue;
#ifdef VERBOSE
(void)printf("Cleaner going to sleep.\n");
#endif
if (lfs_segwait(&fsid, &timeout) < 0)
err(0, "lfs_segwait: returned error\n");
#ifdef VERBOSE
(void)printf("Cleaner waking up.\n");
#endif
}
}
/* return the number of segments cleaned */
int
clean_loop(fsp, nsegs, options)
FS_INFO *fsp; /* file system information */
int nsegs;
long options;
{
double loadavg[MAXLOADS];
time_t now;
u_long max_free_segs;
u_long db_per_seg;
/*
* Compute the maximum possible number of free segments, given the
* number of free blocks.
*/
db_per_seg = fsbtodb(&fsp->fi_lfs, fsp->fi_lfs.lfs_ssize);
max_free_segs = fsp->fi_lfs.lfs_bfree / db_per_seg;
/*
* We will clean if there are not enough free blocks or total clean
* space is less than BUSY_LIM % of possible clean space.
*/
now = time((time_t *)NULL);
#ifdef VERBOSE
printf("db_er_seg = %d max_free_segs = %d, bfree = %d avail = %d ",
db_per_seg, max_free_segs, fsp->fi_lfs.lfs_bfree,
fsp->fi_lfs.lfs_avail);
printf("clean = %d\n", fsp->fi_cip->clean);
#endif
if ((fsp->fi_lfs.lfs_bfree - fsp->fi_lfs.lfs_avail > db_per_seg &&
fsp->fi_lfs.lfs_avail < db_per_seg) ||
(fsp->fi_cip->clean < max_free_segs &&
(fsp->fi_cip->clean <= MIN_SEGS(&fsp->fi_lfs) ||
fsp->fi_cip->clean < max_free_segs * BUSY_LIM))) {
printf("Cleaner Running at %s (%d of %d segments available)\n",
ctime(&now), fsp->fi_cip->clean, max_free_segs);
clean_fs(fsp, cost_benefit, nsegs, options);
return (1);
} else {
/*
* We will also clean if the system is reasonably idle and
* the total clean space is less then IDLE_LIM % of possible
* clean space.
*/
if (getloadavg(loadavg, MAXLOADS) == -1) {
perror("getloadavg: failed\n");
return (-1);
}
if (loadavg[ONE_MIN] == 0.0 && loadavg[FIVE_MIN] &&
fsp->fi_cip->clean < max_free_segs * IDLE_LIM) {
clean_fs(fsp, cost_benefit, nsegs, options);
printf("Cleaner Running at %s (system idle)\n",
ctime(&now));
return (1);
}
}
printf("Cleaner Not Running at %s\n", ctime(&now));
return (0);
}
void
clean_fs(fsp, cost_func, nsegs, options)
FS_INFO *fsp; /* file system information */
int (*cost_func) __P((FS_INFO *, SEGUSE *));
int nsegs;
long options;
{
struct seglist *segs, *sp;
int to_clean, cleaned_bytes;
int i;
if ((segs =
malloc(fsp->fi_lfs.lfs_nseg * sizeof(struct seglist))) == NULL) {
err(0, "malloc failed");
return;
}
i = choose_segments(fsp, segs, cost_func);
#ifdef VERBOSE
printf("clean_fs: found %d segments to clean in file system %s\n",
i, fsp->fi_statfsp->f_mntonname);
fflush(stdout);
#endif
if (i) {
/* Check which cleaning algorithm to use. */
if (options & CLEAN_BYTES) {
cleaned_bytes = 0;
to_clean = nsegs <<
(fsp->fi_lfs.lfs_segshift + fsp->fi_lfs.lfs_bshift);
for (sp = segs; i && cleaned_bytes < to_clean;
i--, ++sp) {
if (clean_segment(fsp, sp->sl_id) < 0)
perror("clean_segment failed");
else if (lfs_segclean(&fsp->fi_statfsp->f_fsid,
sp->sl_id) < 0)
perror("lfs_segclean failed");
printf("Cleaned segment %d (%d bytes)\n",
sp->sl_id, sp->sl_bytes);
cleaned_bytes += sp->sl_bytes;
}
} else
for (i = MIN(i, nsegs), sp = segs; i-- ; ++sp) {
if (clean_segment(fsp, sp->sl_id) < 0)
perror("clean_segment failed");
else if (lfs_segclean(&fsp->fi_statfsp->f_fsid,
sp->sl_id) < 0)
perror("lfs_segclean failed");
printf("Completed cleaning segment %d\n", sp->sl_id);
}
}
free(segs);
}
/*
* Segment with the highest priority get sorted to the beginning of the
* list. This sort assumes that empty segments always have a higher
* cost/benefit than any utilized segment.
*/
int
cost_compare(a, b)
const void *a;
const void *b;
{
return (((struct seglist *)b)->sl_cost -
((struct seglist *)a)->sl_cost);
}
/*
* Returns the number of segments to be cleaned with the elements of seglist
* filled in.
*/
int
choose_segments(fsp, seglist, cost_func)
FS_INFO *fsp;
struct seglist *seglist;
int (*cost_func) __P((FS_INFO *, SEGUSE *));
{
struct lfs *lfsp;
struct seglist *sp;
SEGUSE *sup;
int i, nsegs;
lfsp = &fsp->fi_lfs;
#ifdef VERBOSE
(void)printf("Entering choose_segments\n");
#endif
dump_super(lfsp);
dump_cleaner_info(fsp->fi_cip);
for (sp = seglist, i = 0; i < lfsp->lfs_nseg; ++i) {
sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep, i);
PRINT_SEGUSE(sup, i);
if (!(sup->su_flags & SEGUSE_DIRTY) ||
sup->su_flags & SEGUSE_ACTIVE)
continue;
#ifdef VERBOSE
(void)printf("\tchoosing segment %d\n", i);
#endif
sp->sl_cost = (*cost_func)(fsp, sup);
sp->sl_id = i;
sp->sl_bytes = sup->su_nbytes;
++sp;
}
nsegs = sp - seglist;
qsort(seglist, nsegs, sizeof(struct seglist), cost_compare);
#ifdef VERBOSE
(void)printf("Returning %d segments\n", nsegs);
#endif
return (nsegs);
}
int
clean_segment(fsp, id)
FS_INFO *fsp; /* file system information */
int id; /* segment number */
{
BLOCK_INFO *block_array, *bp;
SEGUSE *sp;
struct lfs *lfsp;
struct tossstruct t;
caddr_t seg_buf;
double util;
int num_blocks, maxblocks, clean_blocks;
lfsp = &fsp->fi_lfs;
sp = SEGUSE_ENTRY(lfsp, fsp->fi_segusep, id);
#ifdef VERBOSE
(void)printf("cleaning segment %d: contains %lu bytes\n", id,
sp->su_nbytes);
fflush(stdout);
#endif
/* XXX could add debugging to verify that segment is really empty */
if (sp->su_nbytes == sp->su_nsums * LFS_SUMMARY_SIZE) {
++cleaner_stats.segs_empty;
return (0);
}
/* map the segment into a buffer */
if (mmap_segment(fsp, id, &seg_buf, do_mmap) < 0) {
err(0, "mmap_segment failed");
++cleaner_stats.segs_error;
return (-1);
}
/* get a list of blocks that are contained by the segment */
if (lfs_segmapv(fsp, id, seg_buf, &block_array, &num_blocks) < 0) {
err(0, "clean_segment: lfs_segmapv failed");
++cleaner_stats.segs_error;
return (-1);
}
cleaner_stats.blocks_read += fsp->fi_lfs.lfs_ssize;
#ifdef VERBOSE
(void)printf("lfs_segmapv returned %d blocks\n", num_blocks);
fflush(stdout);
#endif
/* get the current disk address of blocks contained by the segment */
if (lfs_bmapv(&fsp->fi_statfsp->f_fsid, block_array, num_blocks) < 0) {
perror("clean_segment: lfs_bmapv failed\n");
++cleaner_stats.segs_error;
return -1;
}
/* Now toss any blocks not in the current segment */
t.lfs = lfsp;
t.seg = id;
toss(block_array, &num_blocks, sizeof(BLOCK_INFO), bi_tossold, &t);
/* Check if last element should be tossed */
if (num_blocks && bi_tossold(&t, block_array + num_blocks - 1, NULL))
--num_blocks;
#ifdef VERBOSE
{
BLOCK_INFO *_bip;
u_long *lp;
int i;
(void)printf("after bmapv still have %d blocks\n", num_blocks);
fflush(stdout);
if (num_blocks)
printf("BLOCK INFOS\n");
for (_bip = block_array, i=0; i < num_blocks; ++_bip, ++i) {
PRINT_BINFO(_bip);
lp = (u_long *)_bip->bi_bp;
}
}
#endif
++cleaner_stats.segs_cleaned;
cleaner_stats.blocks_written += num_blocks;
util = ((double)num_blocks / fsp->fi_lfs.lfs_ssize);
cleaner_stats.util_tot += util;
cleaner_stats.util_sos += util * util;
if (do_small)
maxblocks = MAXPHYS / fsp->fi_lfs.lfs_bsize - 1;
else
maxblocks = num_blocks;
for (bp = block_array; num_blocks > 0; bp += clean_blocks) {
clean_blocks = maxblocks < num_blocks ? maxblocks : num_blocks;
if (lfs_markv(&fsp->fi_statfsp->f_fsid,
bp, clean_blocks) < 0) {
err(0, "clean_segment: lfs_markv failed");
++cleaner_stats.segs_error;
return (-1);
}
num_blocks -= clean_blocks;
}
free(block_array);
munmap_segment(fsp, seg_buf, do_mmap);
if (stat_report && cleaner_stats.segs_cleaned % stat_report == 0)
sig_report(SIGUSR1);
return (0);
}
int
bi_tossold(client, a, b)
const void *client;
const void *a;
const void *b;
{
const struct tossstruct *t;
t = (struct tossstruct *)client;
return (((BLOCK_INFO *)a)->bi_daddr == LFS_UNUSED_DADDR ||
datosn(t->lfs, ((BLOCK_INFO *)a)->bi_daddr) != t->seg);
}
void
sig_report(sig)
int sig;
{
double avg;
printf("lfs_cleanerd:\t%s%d\n\t\t%s%d\n\t\t%s%d\n\t\t%s%d\n\t\t%s%d\n",
"blocks_read ", cleaner_stats.blocks_read,
"blocks_written ", cleaner_stats.blocks_written,
"segs_cleaned ", cleaner_stats.segs_cleaned,
"segs_empty ", cleaner_stats.segs_empty,
"seg_error ", cleaner_stats.segs_error);
printf("\t\t%s%5.2f\n\t\t%s%5.2f\n",
"util_tot ", cleaner_stats.util_tot,
"util_sos ", cleaner_stats.util_sos);
printf("\t\tavg util: %4.2f std dev: %9.6f\n",
avg = cleaner_stats.util_tot / cleaner_stats.segs_cleaned,
cleaner_stats.util_sos / cleaner_stats.segs_cleaned - avg * avg);
if (sig == SIGUSR2) {
cleaner_stats.blocks_read = 0;
cleaner_stats.blocks_written = 0;
cleaner_stats.segs_cleaned = 0;
cleaner_stats.segs_empty = 0;
cleaner_stats.segs_error = 0;
cleaner_stats.util_tot = 0.0;
cleaner_stats.util_sos = 0.0;
}
if (sig == SIGINT)
exit(0);
}