static char sccsid
[] = "@(#)indent.c 4.1 (Berkeley) %G%";
This is the main program of the indent program. Indent will take a C
program source and reformat it into a semi-reasonable form.
The routine lexi scans tokens and passes them back one at a time to the
main routine. The subroutine parse takes care of much of the work of
figuring indentation level.
2) Enter a monster switch statement on the code returned by lexi. If
the indentation level for the line yet to be printed should be
changed, set the variable ind_level. If the indentation level for
the following line should be changed, set the variable i_l_follow.
November 1976 D A Willcox of CAC Initial coding
12/9/76 D A Willcox of CAC Fixed defaults for decl_com_ind
to be 8 less than com_ind if
left justifying declarations
12/9/76 D A Willcox of CAC Fixed processing of nested
1/7/77 D A Willcox of CAC Added check for overwrite of
Added code to handle -br and -bl
#include "indent_globs.h";
#include "indent_codes.h";
/* #define dolog 1 /* if this define is removed, then the code to
produce a log file will be removed */
struct templ
{ /* this is a template for the list of
char *str
; /* pointer to string which is a valid
int code
; /* code to be used in switch for processing
{ /* warning - because of the way that this
table is scanned, if one entry is an
initial substring of another, then the
longer entry should occur first */
"-d", 13, /* unindented comment placement */
"-bc", 10, /* break after command in decl */
"-nbc", 9, /* don't break after comma */
"-br", 14, /* put brace on right of stmt */
"-bl", 15, /* put brace on left by itself */
"-st", 16, /* use the standard input and output
char *in_name
= "Standard Input";
/* will always point to name of input file
char *out_name
= "Standard Output";
/* will always point to name of output file
int dec_ind
; /* current indentation for declarations */
int di_stack
[20]; /* a stack of structure indentation levels
int flushed_nl
; /* used when buffering up comments to
remember that a newline was passed over
int force_nl
; /* when true, code must be broken */
int hd_type
; /* used to store type of stmt for if (...),
register int i
; /* local loop counter */
int in_or_st
; /* Will be true iff there has been a
declarator (e.g. int or char) and no
left paren since the last semicolon.
When true, a { is starting a structure
definition or an initialization list */
register int j
; /* local loop counter */
int scase
; /* set to true when we see a case, so we
will know what to do with the following
int sp_sw
; /* when true, we are in the expressin of
if(...), while(...), etc. */
int squest
; /* when this is positive, we have seen a ?
without the matching : in a <c>?<s>:<s>
register char *t_ptr
; /* used for copying tokens */
int type_code
; /* the type of token, returned by lexi */
int want_blank
; /* set to true when the following token
should be prefixed by a blank. (Said
prefixing is ignored in some cases.) */
#ifdef dolog /* include declarations needed for log */
int log_fid
; /* fid of log file */
struct logtmpl
{ /* structure of a log entry */
int tvec
[2]; /* time of execution */
char inp
; /* input fid */
char outp
; /* output fid */
int nout
; /* # output lines */
int ncom
; /* # comments */
int wcom
; /* # lines w/ comments */
int wcode
; /* # lines w/code */
char mc
; /* max line size */
char ci
; /* comment indentation */
char inds
; /* indent size */
char dci
; /* decl comment indentation */
char ljus
; /* left just */
char lvcom
; /* leave commas */
char unin
; /* unindented comment indentation */
char uid
; /* the user id */
char bropt
; /* btype_2 */
/*-----------------------------------------------*\
\*-----------------------------------------------*/
combuf
[0] = codebuf
[0] = labbuf
[0] = ' ';
/* set up code, label, and comment buffers */
combuf
[1] = codebuf
[1] = labbuf
[1] = '\0';
s_lab
= e_lab
= labbuf
+ 1;
s_code
= e_code
= codebuf
+ 1;
s_com
= e_com
= combuf
+ 1;
buf_ptr
= buf_end
= in_buffer
;
had_eof
= in_decl
= decl_on_line
= break_comma
= false;
sp_sw
= force_nl
= false;
di_stack
[dec_nest
= 0] = 0;
want_blank
= in_stmt
= ind_stmt
= false;
/*--------------------------------------------------*\
\*--------------------------------------------------*/
max_col
= d_max_col
; /* set up some default values */
decl_com_ind
= 0; /* if this is not set to some positive
value by an arg, we will set this equal
unindent_displace
= d_unindent
;
leave_comma
= d_leave_comma
;
for (i
= 1; i
< argc
; ++i
) {
/* look thru args (if any) for changes to defaults */
if (argv
[i
][0] != '-') {/* no flag on parameter */
if (input
< 0) { /* we must have the input file */
in_name
= argv
[i
]; /* remember name of input
input
= open (in_name
, 0);
if (input
< 0) { /* check for open error */
printf ("Can't open %s\n", argv
[i
]);
if (output
< 0) { /* we have the output file */
out_name
= argv
[i
]; /* remember name of output file */
if (cmp (in_name
, out_name
) == 0) { /* attempt to
printf ("Input and output files must be different\n");
output
= creat (out_name
, 0644);
if (output
< 0) { /* check for create error */
printf ("Can't create %s\n", argv
[i
]);
printf ("Unknown parameter: %s\n", argv
[i
]);
printf ("Usage: indent file [ outfile ] [ options ]\n");
com_ind
= 2; /* don't put normal comments before column
if (decl_com_ind
<= 0) /* if not specified by user, set this */
decl_com_ind
= ljust_decl
? (com_ind
<= 10 ? 2 : com_ind
- 8) : com_ind
;
fill_buffer (); /* get first batch of stuff into input
\f/*-----------------------------------------------------
\*----------------------------------------------------*/
while (1) { /* this is the main loop. it will go until
type_code
= lexi (); /* lexi reads one token. The actual
characters read are stored in "token".
lexi returns a code indicating the type
* The following code moves everything following an if (), while (),
* else, etc. up to the start of the following stmt to a buffer. This
* allows proper handling of both kinds of brace placement.
while (search_brace
) { /* if we scanned an if(), while(), etc., we
might need to copy stuff into a buffer
*//* we must loop, copying stuff into save_com, until we find the
start of the stmt which follows the if, or whatever */
break; /* form feeds and newlines found here will
case lbrace
: /* this is a brace that starts the compound
/* ignore buffering if a comment wasn't stored up */
/* we either want to put the brace right after the if
/* go to common code to get out of this loop */
default: /* it is the start of a normal statment */
/* if we flushed a newline, make sure it is
/* ignore buffering if comment wasn't saved up */
/* if we should insert a nl here, put it into the
/* this will be re-increased when the nl is read from
if (verbose
&& !flushed_nl
)
/* print error msg if the line was not
printf ("%d: Line broken\n", line_no
);
for (t_ptr
= token
; *t_ptr
; ++t_ptr
)
/* copy token into temp buffer */
/* stop looking for start of stmt */
/* save current input buffer */
/* fix so that subsequent calls to lexi will take tokens
/* add trailing blank, just in case */
case comment
: /* we have a comment, so we must copy it
/* if this is the first comment, we must set up the
save_com
[0] = save_com
[1] = ' ';
/* add newline between comments */
/* copy in start of comment */
for (;;) { /* loop until we get to the end of the
if (*sc_end
++ == '*' && *buf_ptr
== '/')
/* we are at end of comment */
if (sc_end
>= &(save_com
[sc_size
])) {
/* check for temp buffer overflow */
printf ("%d: Internal buffer overflow.\n",
printf ("Move big comment from right after if,\
if (++buf_ptr
>= buf_end
)/* get past / in buffer */
if (type_code
!= 0)/* we must make this check, just in case
there was an unexpected EOF */
} /* end of while (serach_brace) */
if (type_code
== 0) { /* we got eof */
if (s_lab
!= e_lab
|| s_code
!= e_code
|| s_com
!= e_com
)/* must dump end of line */
if (i_l_follow
!= 0)/* check for balanced braces */
printf ("%d too few }'s\n", i_l_follow
);
#ifdef dolog /* only include this stuff if we want to
log_fid
= open ("/mnt/net/willcox/indent/indent_log", 1);
/* point to end of log */
/* set up the log entry */
logent
.wcode
= code_lines
;
logent
.dci
= decl_com_ind
;
logent
.ljus
= ljust_decl
;
logent
.lvcom
= leave_comma
;
logent
.unin
= unindent_displace
;
write (log_fid
, &logent
, sizeof logent
);
printf ("There were %d output lines and %d comments\n",
printf ("(Lines with comments)/(Lines with code): %6.3f\n",
(1.0 * com_lines
) / code_lines
);
(type_code
!= comment
) &&
(type_code
!= newline
) &&
(type_code
!= form_feed
)) {
(type_code
!= semicolon
) &&
)) { /* we should force a broken line here */
if (verbose
&& !flushed_nl
)
printf ("%d: Line broken\n", line_no
);
/* don't insert blank at line start */
in_stmt
= true; /* turn on flag which causes an extra level
of indentation. this is turned off by a
/* the turkey has embedded a comment in a line. fix it */
for (t_ptr
= s_com
; *t_ptr
; ++t_ptr
)
*e_code
= '\0';/* null terminate code sect */
if (type_code
!= comment
)
/* preserve force_nl thru a comment */
/* cancel forced newline after newline, form feed, etc */
/*----------------------------------------------------*\
| do switch on type of token scanned
\*----------------------------------------------------*/
switch (type_code
) { /* now, decide what to do with the token */
case form_feed
: /* found a form feed in line */
use_ff
= true; /* a form feed is treated much like a
++line_no
; /* keep track of input line number */
case lparen
: /* got a ( or [ */
++p_l_follow
; /* count parens to make Healy happy */
if (want_blank
&& *token
!= '[')
/* don't put space in front of square
while ((e_code
- s_code
) < dec_ind
)
if (in_or_st
&& *token
== '(') {
/* this is a kluge to make sure that declarations will be
aaigned right if proc decl has an explicit type on it,
/* I said this was a kluge... */
/* turn off flag for structure decl or initialization */
case rparen
: /* got a ) or ] */
printf ("%d: Extra %c\n", line_no
, *token
);
if (e_code
== s_code
)/* if the paren starts the line */
paren_level
= p_l_follow
;
if (sp_sw
&& (p_l_follow
== 0)) {
/* check for end of if (...), or some such */
/* must force newline after if */
/* inform lexi that a following operator is unary */
/* don't use stmt continuation indentation */
/* let parser worry about if, or whatever */
/* this should insure that constructs such as main(){... and
int[]{... have their braces put in the right place */
case unary_op
: /* this could be any unary operation */
if (in_decl
) { /* if this is a unary op in a *//*
declaration, we should indent this token
for (i
= 0; token
[i
]; ++i
);
/* find length of token */
while ((e_code
- s_code
) < (dec_ind
- i
))
for (t_ptr
= token
; *t_ptr
; ++t_ptr
)
/* move the token to buffer */
case binary_op
: /* any binary operation */
for (t_ptr
= token
; *t_ptr
; ++t_ptr
)
case postop
: /* got a trailing ++ or -- */
case question
: /* got a ? */
squest
++; /* this will be used when a later colon
appears so we can distinguish the
case casestmt
: /* got word 'case' or 'default' */
scase
= true; /* so we can process the later colon
for (t_ptr
= token
; *t_ptr
; ++t_ptr
)
case colon
: /* got a ':' */
/* it is part of the <c>?<n>: <n> construct */
/* seeing a label does not imply we are in a stmt */
for (t_ptr
= s_code
; *t_ptr
; ++t_ptr
)
/* turn everything so far into a label */
force_nl
= pcase
= scase
;
/* pcase will be used by dump_line to decide how to indent the
label. force_nl will force a case n: to be on a line by
case semicolon
: /* got a ';' */
/* we are not in an initialization or structure declaration */
scase
= false; /* these will only need resetting in a
if (in_decl
&& s_code
== e_code
)
/* align this in a declaration */
while ((e_code
- s_code
) < (dec_ind
- 1))
in_decl
= (dec_nest
> 0);
/* if we were in a first level structure declaration, we
if ((!sp_sw
|| hd_type
!= forstmt
) && p_l_follow
> 0) {
/* This should be true iff there were unbalanced parens in
the stmt. It is a bit complicated, because the
semicolon might be in a for stmt */
printf ("%d: Unbalanced parens\n", line_no
);
/* this is a check for a if, while, etc. with
/* don't lose the if, or whatever */
in_stmt
= (p_l_follow
> 0);
/* we are no longer in the middle of a stmt */
if (!sp_sw
) { /* if not if for (;;) */
/* let parser know about end of stmt */
/* force newline after a end of stmt */
case lbrace
: /* got a { */
/* force other stuff on same line as { onto new line */
if (s_code
!= e_code
&& !btype_2
) {
/* bracket is not alone on line */
printf ("%d: Line broken\n", line_no
);
/* check for preceeding unbalanced parens */
printf ("%d: Unbalanced parens\n", line_no
);
/* check for unclosed if, for, etc. */
/* don't put extra indentation on line with '{' */
if (in_decl
&& in_or_st
) {
/* this is either a structure declaration or an init */
di_stack
[dec_nest
++] = dec_ind
;
/* we can't be in the middle of a declaration, so don't do
special indentation of comments */
parse (lbrace
);/* let parser know about this */
if (want_blank
)/* put a blank before { if { is not at
case rbrace
: /* got a } */
/* check for unclosed if, for, else. */
printf ("%d: Unbalanced parens\n", line_no
);
/* } must be first on line */
printf ("%d: Line broken\n", line_no
);
in_stmt
= ind_stmt
= false;
/* we are in multi-level structure declaration */
dec_ind
= di_stack
[--dec_nest
];
parse (rbrace
);/* let parser know about this */
case swstmt
: /* got keyword "switch" */
/* keep this for when we have seen the expression */
goto copy_id
; /* go move the token into buffer */
case sp_paren
: /* token is if, while, for */
sp_sw
= true; /* the interesting stuff is done after the
hd_type
= (*token
== 'i' ? ifstmt
:
(*token
== 'w' ? whilestmt
: forstmt
));
/* remember the type of header for later use by parser */
goto copy_id
; /* copy the token into line */
case sp_nparen
: /* got else, do */
/* make sure this starts a line */
printf ("%d: Line broken\n", line_no
);
/* also, following stuff must go onto new line */
parse (*token
== 'e' ? elselit
: dolit
);
/* pass token on to parser */
goto copy_id
; /* move the token into line */
case decl
: /* we have a declaration type (int,
parse (decl
); /* let parser worry about indentation */
/* this might be a structure or initialization declaration */
in_decl
= decl_on_line
= true;
for (i
= 0; token
[i
++];);
/* get length of token */
dec_ind
= ((e_code
- s_code
+ i
) / ind_size
+ 1) * ind_size
;
/* this will tell us how far to indent subsequent identifiers
case ident
: /* got an identifier or constant */
if (in_decl
) { /* if we are in a declaration, we must
while ((e_code
- s_code
) < dec_ind
)
if (sp_sw
&& p_l_follow
== 0) {
/* check for if expr w/o parens *//* this will make
JRM's obsurd "for ever" statements work */
for (t_ptr
= token
; *t_ptr
; ++t_ptr
)
case period
: /* treat a period kind of like a binary
/* move the period into line */
/* don't put a blank after a period */
want_blank
= (s_code
!= e_code
);
/* only put blank after comma if comma does not start the line
if (in_decl
) /* align these in a declaration */
while ((e_code
- s_code
) < (dec_ind
- 1))
if (break_comma
&& p_l_follow
== 0 && !leave_comma
)
case preesc
: /* got the character '#' */
/* true iff the '#' was not at start of the line */
printf ("%d: What is this # doing here?\n", line_no
);
/* treat it as a binary operator */
*e_lab
++ = '#';/* move whole line to 'label' buffer */
while (*buf_ptr
!= '\n') {
if (*e_lab
++ == '/' && *buf_ptr
== '*') {
/* check for comment on preprocessor line */
/* skip back over slash */
while (*e_lab
== '\t' || *e_lab
== ' ')
/* strip off trailing blanks and tabs */
/* null terminate the line */
if (++buf_ptr
>= buf_end
)
/* space past start of comment */
/* don't let pr_comment think that this comment starts
/* treat this as a declaration for comment placement
/* go process the comment */
*e_lab
= '\0'; /* null terminate line */
break; /* subsequent processing of the newline
character will cause the line to be
case comment
: /* we have gotten a /* this is a biggie */
} /* end of big switch stmt */
*e_code
= '\0'; /* make sure code section is null
} /* end of main while (1) loop */
* copy input file to backup file
* if in_name is /blah/blah/blah/file, then backup file
* then make the backup file the input and original
/* construct file name .Bfile */
for (p
= in_name
; *p
; p
++);/* skip to end of string */
while (p
> in_name
&& *p
!= '/')/* find last '/' */
sprintf (bakfile
, ".B%s", p
);
/* copy in_name to backup file */
bakchn
= creat (bakfile
, 0600);
printf ("can't create backup file \"%s\"\n", bakfile
);
while (n
= read (input
, buff
, 512))
/* re-open backup file as the input file */
input
= open (bakfile
, 0);
printf ("can't re-open backup file\n");
/* now the original input file will be the output */
output
= creat (in_name
, 0644);
printf ("can't create %s\n", in_name
);
for (j
= 0; options
[j
].str
!= 0; ++j
) {
/* look thru list of possible options */
if (eqin (options
[j
].str
, arg
)) {
break; /* get out of for loop */
if (options
[j
].str
== 0) { /* illegal arg given */
printf ("Unknown parameter: %s\n", arg
);
switch (options
[j
].code
) {
max_col
= atoi (&arg
[2]);
com_ind
= atoi (&arg
[2]);
ind_size
= atoi (&arg
[2]);
case 4: /* have -cdnnn */
decl_com_ind
= atoi (&arg
[3]);
unindent_displace
= atoi (&arg
[2]);
* GETPRO - get profile file
* profile file is max 127 characters
char *name
, /* profile file name, as in '.indent.pro'
*buf
; /* will receive contents of .pro file */
strcat (file
, getenv ("HOME"));
n
= read (chn
, buf
, 127);
buf
[n
--] = 0; /* null terminate line */
* strip off arguments in a string:
* p is address of a character pointer
* nextchr returns pointer to front of first arg
* arg is null terminated.
* p is reset to after arg for subsequent calls
while (*f
&& (*f
== ' ' || *f
== '\t'))
while (*b
&& (*b
!= ' ' && *b
!= '\t'))
if (getpro (".indent.pro", line
) < 0)
if(verbose
) printf ("profile: %s\n", b
);
while (*(f
= nxtarg (&b
)))