* Copyright (c) 1980 Regents of the University of California.
* Copyright (c) 1976 Board of Trustees of the University of Illinois.
* Redistribution and use in source and binary forms are permitted
* provided that the above copyright notice and this paragraph are
* duplicated in all such forms and that any documentation,
* advertising materials, and other materials related to such
* distribution and use acknowledge that the software was developed
* by the University of California, Berkeley and the University
* of Illinois, Urbana. The name of either
* University may not be used to endorse or promote products derived
* from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
"@(#) Copyright (c) 1980 Regents of the University of California.\n\
Copyright (c) 1976 Board of Trustees of the University of Illinois.\n\
static char sccsid
[] = "@(#)indent.c 5.8 (Berkeley) %G%";
This is the main program of the indent program. Indent will take a C
program source and reformat it into a semi-reasonable form.
The routine lexi scans tokens and passes them back one at a time to the
main routine. The subroutine parse takes care of much of the work of
figuring indentation level.
2) Enter a monster switch statement on the code returned by lexi. If
the indentation level for the line yet to be printed should be
changed, set the variable ps.ind_level. If the indentation level for
the following line should be changed, set the variable ps.i_l_follow.
#include "indent_globs.h"
#include "indent_codes.h"
char *in_name
= "Standard Input"; /* will always point to name of
char *out_name
= "Standard Output"; /* will always point to
extern int found_err
; /* if any error occurred */
int dec_ind
; /* current indentation for declarations */
int di_stack
[20]; /* a stack of structure indentation levels */
int flushed_nl
; /* used when buffering up comments to
* remember that a newline was passed over */
int force_nl
; /* when true, code must be broken */
int hd_type
; /* used to store type of stmt for if
* (...), for (...), etc */
register int i
; /* local loop counter */
register int j
; /* local loop counter */
int scase
; /* set to true when we see a case, so we
* will know what to do with the following
int sp_sw
; /* when true, we are in the expressin of
* if(...), while(...), etc. */
int squest
; /* when this is positive, we have seen a ?
* without the matching : in a <c>?<s>:<s>
register char *t_ptr
; /* used for copying tokens */
int type_code
; /* the type of token, returned by lexi */
int last_else
= 0; /* true iff last keyword was an else */
/*-----------------------------------------------*\
\*-----------------------------------------------*/
ps
.p_stack
[0] = stmt
; /* this is the parser's stack */
ps
.last_nl
= true; /* this is true if the last thing scanned
ps
.last_token
= semicolon
;
combuf
[0] = codebuf
[0] = labbuf
[0] = ' '; /* set up code, label, and
combuf
[1] = codebuf
[1] = labbuf
[1] = '\0';
s_lab
= e_lab
= labbuf
+ 1;
s_code
= e_code
= codebuf
+ 1;
s_com
= e_com
= combuf
+ 1;
buf_ptr
= buf_end
= in_buffer
;
had_eof
= ps
.in_decl
= ps
.decl_on_line
= break_comma
= false;
sp_sw
= force_nl
= false;
di_stack
[ps
.dec_nest
= 0] = 0;
ps
.want_blank
= ps
.in_stmt
= ps
.ind_stmt
= false;
scase
= ps
.pcase
= false;
/*--------------------------------------------------*\
\*--------------------------------------------------*/
* Unfortunately, we must look for -npro here because the profiles
* are read before the command line arguments.
for (i
= 1; i
< argc
; ++i
)
if (strcmp(argv
[i
], "-npro") == 0)
input
= 0; /* cancel -st if it was in the profiles, */
output
= 0; /* as it doesn't make any sense there. */
for (i
= 1; i
< argc
; ++i
) {
* look thru args (if any) for changes to defaults
if (argv
[i
][0] != '-') {/* no flag on parameter */
if (input
== 0) { /* we must have the input file */
in_name
= argv
[i
]; /* remember name of input file */
input
= fopen(in_name
, "r");
if (input
== 0) { /* check for open error */
fprintf(stderr
, "indent: can't open %s\n", argv
[i
]);
} else if (output
== 0) { /* we have the output file */
out_name
= argv
[i
]; /* remember name of output file */
if (strcmp(in_name
, out_name
) == 0) { /* attempt to overwrite
fprintf(stderr
, "indent: input and output files must be different\n");
output
= fopen(out_name
, "w");
if (output
== 0) { /* check for create error */
fprintf(stderr
, "indent: can't create %s\n", argv
[i
]);
fprintf(stderr
, "indent: unknown parameter: %s\n", argv
[i
]);
printf("Usage: indent file [ outfile ] [ options ]\n");
* Adjust parameters that are out of range, or set defaults if
* no values were specified.
ps
.com_ind
= 2; /* dont put normal comments before column
if (block_comment_max_col
<= 0)
block_comment_max_col
= max_col
;
if (ps
.decl_com_ind
<= 0) /* if not specified by user, set this */
ps
.decl_com_ind
= ps
.ljust_decl
? ps
.com_ind
- 8 : ps
.com_ind
;
if (ps
.decl_com_ind
<= 1)
if (continuation_indent
== 0)
continuation_indent
= ps
.ind_size
;
fill_buffer(); /* get first batch of stuff into input
register char *p
= buf_ptr
;
col
= ((col
- 1) & ~7) + 9;
ps
.ind_level
= ps
.i_l_follow
= col
/ ps
.ind_size
;
register char *p
= in_name
,
fprintf(output
, ".Fn \"%s\"\n", beg
);
while (1) { /* this is the main loop. it will go
type_code
= lexi(); /* lexi reads one token. The actual
* characters read are stored in "token".
* lexi returns a code indicating the type
is_procname
= ps
.procname
[0];
* The following code moves everything following an if (), while
* (), else, etc. up to the start of the following stmt to a
* buffer. This allows proper handling of both kinds of brace
while (ps
.search_brace
) { /* if we scanned an if(), while(),
* etc., we might need to copy
* stuff into a buffer we must
* loop, copying stuff into
* save_com, until we find the
* start of the stmt which follows
break; /* form feeds and newlines found here will
case lbrace
: /* this is a brace that starts the
if (sc_end
== 0) { /* ignore buffering if a comment
save_com
[0] = '{'; /* we either want to put
goto sw_buffer
; /* go to common code to get out of
case comment
: /* we have a comment, so we must copy it
if (sc_end
== 0) { /* if this is the first
* comment, we must set up
save_com
[0] = save_com
[1] = ' ';
*sc_end
++ = '\n'; /* add newline between
*sc_end
++ = '/'; /* copy in start of
for (;;) { /* loop until we get to the end of
if (*sc_end
++ == '*' && *buf_ptr
== '/')
break; /* we are at end of comment */
if (sc_end
>= &(save_com
[sc_size
])) { /* check for temp buffer
diag(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever.");
*sc_end
++ = '/'; /* add ending slash */
if (++buf_ptr
>= buf_end
) /* get past / in buffer */
default: /* it is the start of a normal statment */
if (flushed_nl
) /* if we flushed a newline, make
if (type_code
== sp_paren
&& *token
== 'i'
&& last_else
&& ps
.else_if
|| type_code
== sp_nparen
&& *token
== 'e'
&& e_code
!= s_code
&& e_code
[-1] == '}')
if (sc_end
== 0) { /* ignore buffering if comment
if (force_nl
) { /* if we should insert a nl here,
* put it into the buffer */
--line_no
; /* this will be re-increased when
* the nl is read from the buffer */
if (verbose
&& !flushed_nl
) /* print error msg if
for (t_ptr
= token
; *t_ptr
; ++t_ptr
)
*sc_end
++ = *t_ptr
; /* copy token into temp
ps
.search_brace
= false; /* stop looking for start
bp_save
= buf_ptr
; /* save current input buffer */
buf_ptr
= save_com
; /* fix so that subsequent calls to
* lexi will take tokens out of
*sc_end
++ = ' '; /* add trailing blank, just in
if (type_code
!= 0) /* we must make this check, just in case
* there was an unexpected EOF */
type_code
= lexi(); /* read another token */
is_procname
= ps
.procname
[0];
} /* end of while (serach_brace) */
if (type_code
== 0) { /* we got eof */
if (s_lab
!= e_lab
|| s_code
!= e_code
|| s_com
!= e_com
) /* must dump end of line */
if (ps
.tos
> 1) /* check for balanced braces */
diag(1, "Stuff missing from end of file.");
printf("There were %d output lines and %d comments\n",
ps
.out_lines
, ps
.out_coms
);
printf("(Lines with comments)/(Lines with code): %6.3f\n",
(1.0 * ps
.com_lines
) / code_lines
);
exit(ps
.tos
> 1 || found_err
);
(type_code
!= comment
) &&
(type_code
!= newline
) &&
(type_code
!= form_feed
)) {
(type_code
!= semicolon
) &&
)) { /* we should force a broken line here */
if (verbose
&& !flushed_nl
)
ps
.want_blank
= false; /* dont insert blank at line start */
ps
.in_stmt
= true; /* turn on flag which causes an extra
* level of indentation. this is turned
if (s_com
!= e_com
) { /* the turkey has embedded a
* comment in a line. fix it */
for (t_ptr
= s_com
; *t_ptr
; ++t_ptr
)
*e_code
= '\0'; /* null terminate code sect */
} else if (type_code
!= comment
) /* preserve force_nl thru
* cancel forced newline after newline, form feed, etc
/*----------------------------------------------------*\
| do switch on type of token scanned
\*----------------------------------------------------*/
switch (type_code
) { /* now, decide what to do with the token */
case form_feed
: /* found a form feed in line */
ps
.use_ff
= true; /* a form feed is treated much
if (ps
.last_token
!= comma
|| ps
.p_l_follow
> 0
|| !ps
.leave_comma
|| !break_comma
|| s_com
!= e_com
) {
++line_no
; /* keep track of input line number */
case lparen
: /* got a '(' or '[' */
++ps
.p_l_follow
;/* count parens to make Healy happy */
if (ps
.want_blank
&& *token
!= '[' &&
(ps
.last_token
!= ident
|| proc_calls_space
|| (ps
.its_a_keyword
&& !ps
.sizeof_keyword
)))
if (ps
.in_decl
&& !ps
.block_init
)
if (troff
&& !ps
.dumped_decl_indent
) {
ps
.dumped_decl_indent
= 1;
sprintf(e_code
, "\\c\n.Du %dp+\200p \"%s\"\n", dec_ind
* 7, token
);
e_code
+= strlen(e_code
);
while ((e_code
- s_code
) < dec_ind
)
ps
.paren_indents
[ps
.p_l_follow
- 1] = e_code
- s_code
;
if (ps
.in_or_st
&& *token
== '(') {
* this is a kluge to make sure that declarations will
* be aligned right if proc decl has an explicit type
* on it, i.e. "int a(x) {..."
parse(semicolon
); /* I said this was a kluge... */
ps
.in_or_st
= false; /* turn off flag for
if (ps
.sizeof_keyword
) ps
.sizeof_mask
|= 1<<ps
.p_l_follow
;
case rparen
: /* got a ')' or ']' */
if (ps
.cast_mask
& (1 << ps
.p_l_follow
) & ~ps
.sizeof_mask
) {
ps
.cast_mask
&= (1 << ps
.p_l_follow
) - 1;
ps
.sizeof_mask
&= (1 << ps
.p_l_follow
) - 1;
if (--ps
.p_l_follow
< 0) {
diag(0, "Extra %c", *token
);
if (e_code
== s_code
) /* if the paren starts the line */
ps
.paren_level
= ps
.p_l_follow
; /* then indent it */
if (sp_sw
&& (ps
.p_l_follow
== 0)) { /* check for end of if
force_nl
= true; /* must force newline after if */
ps
.last_u_d
= true; /* inform lexi that a following
ps
.in_stmt
= false; /* dont use stmt continuation
parse(hd_type
); /* let parser worry about if, or
ps
.search_brace
= btype_2
; /* this should insure that
* braces put in the right
case unary_op
: /* this could be any unary operation */
if (troff
&& !ps
.dumped_decl_indent
&& ps
.in_decl
) {
sprintf(e_code
, "\\c\n.Du %dp+\200p \"%s\"\n", dec_ind
* 7, token
);
ps
.dumped_decl_indent
= 1;
e_code
+= strlen(e_code
);
if (ps
.in_decl
&& !ps
.block_init
) { /* if this is a unary op
for (i
= 0; token
[i
]; ++i
); /* find length of token */
while ((e_code
- s_code
) < (dec_ind
- i
))
*e_code
++ = ' '; /* pad it */
if (troff
&& token
[0] == '-' && token
[1] == '>')
for (t_ptr
= res
; *t_ptr
; ++t_ptr
)
case binary_op
: /* any binary operation */
for (t_ptr
= res
; *t_ptr
; ++t_ptr
)
*e_code
++ = *t_ptr
; /* move the operator */
case postop
: /* got a trailing ++ or -- */
case question
: /* got a ? */
squest
++; /* this will be used when a later colon
* appears so we can distinguish the
* <c>?<n>:<n> construct */
case casestmt
: /* got word 'case' or 'default' */
scase
= true; /* so we can process the later colon
case colon
: /* got a ':' */
if (squest
> 0) { /* it is part of the <c>?<n>: <n>
ps
.in_stmt
= false; /* seeing a label does not imply
for (t_ptr
= s_code
; *t_ptr
; ++t_ptr
)
*e_lab
++ = *t_ptr
; /* turn everything so far into a
force_nl
= ps
.pcase
= scase
; /* ps.pcase will be used
case semicolon
: /* got a ';' */
ps
.in_or_st
= false; /* we are not in an initialization
* or structure declaration */
scase
= false; /* these will only need resetting in a
if (ps
.last_token
== rparen
)
ps
.in_parameter_declaration
= 0;
if (ps
.in_decl
&& s_code
== e_code
&& !ps
.block_init
)
while ((e_code
- s_code
) < (dec_ind
- 1))
ps
.in_decl
= (ps
.dec_nest
> 0); /* if we were in a first
if ((!sp_sw
|| hd_type
!= forstmt
) && ps
.p_l_follow
> 0) {
* This should be true iff there were unbalanced
* parens in the stmt. It is a bit complicated,
* because the semicolon might be in a for stmt
diag(1, "Unbalanced parens");
if (sp_sw
) {/* this is a check for a if, while, etc.
* with unbalanced parens */
parse(hd_type
); /* dont lose the if, or whatever */
ps
.in_stmt
= (ps
.p_l_follow
> 0); /* we are no longer in
* the middle of a stmt */
if (!sp_sw
) { /* if not if for (;;) */
parse(semicolon
); /* let parser know about end of
force_nl
= true; /* force newline after a end of
case lbrace
: /* got a '{' */
ps
.in_stmt
= false; /* dont indent the {} */
force_nl
= true; /* force other stuff on same line
* as '{' onto new line */
if (s_code
!= e_code
&& !ps
.block_init
) {
} else if (ps
.in_parameter_declaration
&& !ps
.in_or_st
) {
if (ps
.in_parameter_declaration
)
prefix_blankline_requested
= 0;
if (ps
.p_l_follow
> 0) { /* check for preceding
diag(1, "Unbalanced parens");
if (sp_sw
) {/* check for unclosed if, for, etc. */
ps
.ind_level
= ps
.i_l_follow
;
ps
.ind_stmt
= false; /* dont put extra
if (ps
.in_decl
&& ps
.in_or_st
) { /* this is either a
di_stack
[ps
.dec_nest
++] = dec_ind
;
ps
.decl_on_line
= false; /* we cant be in the
* declaration, so dont do
ps
.in_parameter_declaration
= 0;
parse(lbrace
); /* let parser know about this */
if (ps
.want_blank
) /* put a blank before '{' if '{'
* is not at start of line */
case rbrace
: /* got a '}' */
if (ps
.p_l_follow
) { /* check for unclosed if, for,
diag(1, "Unbalanced parens");
if (s_code
!= e_code
&& !ps
.block_init
) { /* '}' must be first on
ps
.in_stmt
= ps
.ind_stmt
= false;
if (ps
.dec_nest
> 0) { /* we are in multi-level structure
dec_ind
= di_stack
[--ps
.dec_nest
];
if (ps
.dec_nest
== 0 && !ps
.in_parameter_declaration
)
prefix_blankline_requested
= 0;
parse(rbrace
); /* let parser know about this */
ps
.search_brace
= cuddle_else
&& ps
.p_stack
[ps
.tos
] == ifhead
&& ps
.il
[ps
.tos
] >= ps
.ind_level
;
if (ps
.tos
<= 1 && blanklines_after_procs
&& ps
.dec_nest
<= 0)
postfix_blankline_requested
= 1;
case swstmt
: /* got keyword "switch" */
hd_type
= swstmt
; /* keep this for when we have seen
goto copy_id
; /* go move the token into buffer */
case sp_paren
: /* token is if, while, for */
sp_sw
= true; /* the interesting stuff is done after the
* expression is scanned */
hd_type
= (*token
== 'i' ? ifstmt
:
(*token
== 'w' ? whilestmt
: forstmt
));
* remember the type of header for later use by parser
goto copy_id
; /* copy the token into line */
case sp_nparen
: /* got else, do */
if (e_code
!= s_code
&& (!cuddle_else
|| e_code
[-1] != '}')) {
dump_line(); /* make sure this starts a line */
force_nl
= true; /* also, following stuff must go
if (e_code
!= s_code
) { /* make sure this starts a
force_nl
= true; /* also, following stuff must go
goto copy_id
; /* move the token into line */
case decl
: /* we have a declaration type (int,
parse(decl
); /* let parser worry about indentation */
if (ps
.last_token
== rparen
&& ps
.tos
<= 1)
ps
.in_parameter_declaration
= 1;
if (ps
.in_parameter_declaration
&& ps
.indent_parameters
&& ps
.dec_nest
== 0) {
ps
.ind_level
= ps
.i_l_follow
= 1;
ps
.in_or_st
= true; /* this might be a structure or
* initialization declaration */
ps
.in_decl
= ps
.decl_on_line
= true;
if ( /* !ps.in_or_st && */ ps
.dec_nest
<= 0)
prefix_blankline_requested
= 0;
for (i
= 0; token
[i
++];); /* get length of token */
* dec_ind = e_code - s_code + (ps.decl_indent>i ?
dec_ind
= ps
.decl_indent
> 0 ? ps
.decl_indent
: i
;
case ident
: /* got an identifier or constant */
if (ps
.in_decl
) { /* if we are in a declaration, we
* must indent identifier */
if (is_procname
== 0 || !procnames_start_line
) {
if (troff
&& !ps
.dumped_decl_indent
) {
sprintf(e_code
, "\\c\n.De %dp+\200p\n", dec_ind
* 7);
ps
.dumped_decl_indent
= 1;
e_code
+= strlen(e_code
);
while ((e_code
- s_code
) < dec_ind
)
if (dec_ind
&& s_code
!= e_code
)
} else if (sp_sw
&& ps
.p_l_follow
== 0) {
if (troff
&& ps
.its_a_keyword
) {
for (t_ptr
= token
; *t_ptr
; ++t_ptr
)
if (troff
&& ps
.its_a_keyword
) {
case period
: /* treat a period kind of like a binary
*e_code
++ = '.';/* move the period into line */
ps
.want_blank
= false; /* dont put a blank after a period */
ps
.want_blank
= (s_code
!= e_code
); /* only put blank after
if (ps
.in_decl
&& is_procname
== 0 && !ps
.block_init
)
while ((e_code
- s_code
) < (dec_ind
- 1))
if (ps
.p_l_follow
== 0) {
if (break_comma
&& !ps
.leave_comma
)
case preesc
: /* got the character '#' */
*e_lab
++ = '#'; /* move whole line to 'label' buffer */
while (*buf_ptr
!= '\n' || in_comment
) {
if (*buf_ptr
== '*' && !in_comment
&& !quote
) {
if (*buf_ptr
== '/' && in_comment
) {
while (e_lab
> s_lab
&& (e_lab
[-1] == ' ' || e_lab
[-1] == '\t'))
if (e_lab
== com_end
&& bp_save
== 0) { /* comment on
if (sc_end
== 0) /* if this is the first
* comment, we must set up
*sc_end
++ = '\n'; /* add newline between
bcopy(com_start
, sc_end
, com_end
- com_start
);
sc_end
+= com_end
- com_start
;
while (e_lab
> s_lab
&& (e_lab
[-1] == ' ' || e_lab
[-1] == '\t'))
bp_save
= buf_ptr
; /* save current input
buf_ptr
= save_com
; /* fix so that subsequent
* calls to lexi will take
* tokens out of save_com */
*sc_end
++ = ' '; /* add trailing blank,
*e_lab
= '\0'; /* null terminate line */
if (strncmp(s_lab
, "#if", 3) == 0)
if (ifdef_level
< sizeof state_stack
/ sizeof state_stack
[0]) {
match_state
[ifdef_level
].tos
= -1;
state_stack
[ifdef_level
++] = ps
;
diag(1, "#if stack overflow");
else if (strncmp(s_lab
, "#else", 5) == 0)
diag(1, "Unmatched #else");
match_state
[ifdef_level
- 1] = ps
;
ps
= state_stack
[ifdef_level
- 1];
} else if (strncmp(s_lab
, "#endif", 6) == 0)
diag(1, "Unmatched #endif");
* This match needs to be more intelligent before
if (match_state
[ifdef_level
].tos
>= 0
&& bcmp(&ps
, &match_state
[ifdef_level
], sizeof ps
))
diag(0, "Syntactically inconsistant #ifdef alternatives.");
break; /* subsequent processing of the newline
* character will cause the line to be
case comment
: /* we have gotten a /* this is a biggie */
if (flushed_nl
) { /* we should force a broken line
ps
.want_blank
= false; /* dont insert blank at
} /* end of big switch stmt */
*e_code
= '\0'; /* make sure code section is null
if (type_code
!= comment
&& type_code
!= newline
&& type_code
!= preesc
)
ps
.last_token
= type_code
;
} /* end of main while (1) loop */
* copy input file to backup file. If in_name is /blah/blah/blah/file, then
* backup file will be "file.BAK". Then make the backup file the input and
* original input file the output.
if ((p
= rindex(in_name
, '/')) != NULL
)
sprintf(bakfile
, "%s.BAK", p
);
/* copy in_name to backup file */
bakchn
= creat(bakfile
, 0600);
fprintf(stderr
, "indent: can't create backup file \"%s\"\n", bakfile
);
while ((n
= read(fileno(input
), buff
, sizeof buff
)) > 0)
if (write(bakchn
, buff
, n
) != n
) {
fprintf(stderr
, "indent: error writing backup file \"%s\"\n",
fprintf(stderr
, "indent: error reading input file \"%s\"\n", in_name
);
/* re-open backup file as the input file */
input
= fopen(bakfile
, "r");
fprintf(stderr
, "indent: can't re-open backup file\n");
/* now the original input file will be the output */
output
= fopen(in_name
, "w");
fprintf(stderr
, "indent: can't create %s\n", in_name
);