* Copyright (c) 1992 Keith Muller.
* Copyright (c) 1992 The Regents of the University of California.
* This code is derived from software contributed to Berkeley by
* Keith Muller of the University of California, San Diego.
* %sccs.include.redist.c%
static char sccsid
[] = "@(#)pat_rep.c 1.2 (Berkeley) %G%";
* routines to handle pattern matching, name modification (regular expression
* substitution and interactive renames), and destination name modification for
* copy (-rw). Both file name and link names are adjusted as required in these
#define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */
static PATTERN
*pathead
= NULL
; /* file pattern match list head */
static PATTERN
*pattail
= NULL
; /* file pattern match list tail */
static REPLACE
*rephead
= NULL
; /* replacement string list head */
static REPLACE
*reptail
= NULL
; /* replacement string list tail */
static int rep_name
__P((char *, int *, int));
static int tty_rename
__P((register ARCHD
*));
static int fix_path
__P((char *, int *, char *, int));
static int resub
__P((regexp
*, char *, char *, register char *));
static int resub
__P((regex_t
*, regmatch_t
*, char *, char *, char *));
* parses the -s replacement string; compiles the regular expression
* and stores the compiled value and it's replacement string together in
* replacement string list. Input to this function is of the form:
* The first char in the string specifies the delimiter used by this
* replacement string. "Old" is a regular expression in "ed" format which
* is compiled by regcomp() and is applied to filenames. "new" is the
* substitution string; p and g are options flags for printing and global
* replacement (over the single filename)
* 0 if a proper replacement string and regular expression was added to
* the list of replacement patterns; -1 otherwise.
rep_add(register char *str
)
* throw out the bad parameters
if ((str
== NULL
) || (*str
== '\0')) {
warn(1, "Empty replacement string");
* first character in the string specifies what the delimiter is for
if ((pt1
= strchr(str
+1, *str
)) == NULL
) {
warn(1, "Invalid replacement string %s", str
);
* allocate space for the node that handles this replacement pattern
* and split out the regular expression and try to compile it
if ((rep
= (REPLACE
*)malloc(sizeof(REPLACE
))) == NULL
) {
warn(1, "Unable to allocate memory for replacement string");
if ((rep
->rcmp
= regcomp(str
+1)) == NULL
) {
if ((res
= regcomp(&(rep
->rcmp
), str
+1, 0)) != 0) {
regerror(res
, &(rep
->rcmp
), rebuf
, sizeof(rebuf
));
warn(1, "%s while compiling regular expression %s", rebuf
, str
);
* put the delimiter back in case we need an error message and
* locate the delimiter at the end of the replacement string
* we then point the node at the new substitution string
if ((pt2
= strchr(pt1
, *str
)) == NULL
) {
(void)free((char *)rep
->rcmp
);
warn(1, "Invalid replacement string %s", str
);
(void)free((char *)rep
->rcmp
);
warn(1, "Invalid replacement string option %s", str
);
* all done, link it in at the end
* add a pattern match to the pattern match list. Pattern matches are used
* to select which archive members are extracted. (They appear as
* arguments to pax in the list and read modes). If no patterns are
* supplied to pax, all members in the archive will be selected (and the
* pattern match list is empty).
* 0 if the pattern was added to the list, -1 otherwise
if ((str
== NULL
) || (*str
== '\0')) {
warn(1, "Empty pattern string");
* allocate space for the pattern and store the pattern. the pattern is
* part of argv so do not bother to copy it, just point at it. Add the
* node to the end of the pattern list
if ((pt
= (PATTERN
*)malloc(sizeof(PATTERN
))) == NULL
) {
warn(1, "Unable to allocate memory for pattern string");
* complain if any the user supplied pattern did not result in a match to
* a selected archive member.
* walk down the list checking the flags to make sure MTCH was set,
for (pt
= pathead
; pt
!= NULL
; pt
= pt
->fow
) {
warn(1, "WARNING! These patterns were not matched:");
(void)fprintf(stderr
, "%s\n", pt
->pstr
);
* the archive member which matches a pattern was selected. Mark the
* pattern as having selected an archive member. arcn->pat points at the
* pattern that was matched. arcn->pat is set in pat_match()
* NOTE: When the -c option is used, we are called when there was no match
* by pat_match() (that means we did match before the inverted sense of
* the logic). Now this seems really strange at first, but with -c we
* need to keep track of those patterns that cause a archive member to NOT
* be selected (it found an archive member with a specified pattern)
* 0 if the pattern pointed at by arcn->pat was tagged as creating a
pat_sel(register ARCHD
*arcn
)
* if no patterns just return
if ((pathead
== NULL
) || ((pt
= arcn
->pat
) == NULL
))
* when we are NOT limited to a single match per pattern mark the
if (dflag
|| (arcn
->type
!= PAX_DIR
))
* ok we matched a directory and we are allowing
* subtree matches. We add this as a DIR_MTCH pattern
* so all its children will match. Note we know that
* when successful, pat_add() puts the pattern at the
* tail (yup a kludge). In the code below will make
if ((pat_add(arcn
->name
) < 0) || ((pt
= pattail
) == NULL
))
* we reach this point only when we allow a single selected match per
* pattern, or we have to add a DIR_MATCH pattern. if the pattern
* matched a directory and we do not have -d * (dflag) we are done
* with this pattern. We may also be handed a file in the subtree of a
* directory. in that case when we are operating with -d, this pattern
* was already selected and we are done
if (!dflag
&& (arcn
->type
== PAX_DIR
)) {
* we are allowing subtree matches at directories, mark the
* node as a directory match so pat_match() will only match
* children of this directory (we replace the pattern with the
* directory name to enforce this subtree only match)
* pat_match() looks for DIR_MTCH to determine what comparison
* technique to use when it checks for a pattern match
if ((pt
->pstr
= strdup(arcn
->name
)) == NULL
) {
warn(1, "Pattern select out of memory");
pt
->plen
= strlen(pt
->pstr
);
* strip off any trailing /, this should really never happen
if (*(pt
->pstr
+ len
) == '/') {
*(pt
->pstr
+ len
) = '\0';
pt
->flgs
|= DIR_MTCH
| MTCH
;
* it is not a directory, we are then done with this pattern, so we
* delete it from the list, as it can never be used for another match
* Seems kind of strange to do for a -c, but the pax spec is really
* vague on the interaction of -c -n and -d. We assume that when -c
* and the pattern rejects a member (i.e. it matched it) it is done.
* In effect we place the order of the flags as having -c last.
while ((pt
!= NULL
) && (pt
!= arcn
->pat
)) {
* should never happen....
warn(1, "Pattern list inconsistant");
* see if this archive member matches any supplied pattern, if a match
* is found, arcn->pat is set to point at the potential pattern. Later if
* this archive member is "selected" we process and mark the pattern as
* one which matched a selected archive member (see pat_sel())
* 0 if this archive member should be processed, 1 if it should be
* skipped and -1 if we are done with all patterns (and pax should quit
* looking for more members)
pat_match(register ARCHD
*arcn
)
* if there are no more patterns and we have -n (and not -c) we are
* done. otherwise with no patterns to match, matches all
* have to search down the list one at a time looking for a match.
* check for a file name match unless we have DIR_MTCH set in
* this pattern then we want a prefix match
if (pt
->flgs
& DIR_MTCH
) {
* this pattern was matched before to a directory
* as we must have -n set for this (but not -d). We can
* only match CHILDREN of that directory so we must use
if ((strncmp(pt
->pstr
, arcn
->name
, pt
->plen
) == 0) &&
(arcn
->name
[pt
->plen
] == '/'))
} else if (fnmatch(pt
->pstr
, arcn
->name
, 0) == 0)
* return the result, remember that cflag (-c) inverts the sense of a
* we had a match, now when we invert the sense (-c) we reject this
* member. However we have to tag the pattern a being successful, (in a
* match, not in selecting a archive member) so we call pat_sel() here.
* modify a selected file name. first attempt to apply replacement string
* expressions, then apply interactive file rename. We apply replacement
* string expressions to both filenames and file links (if we didn't the
* links would point to the wrong place, and we could never be able to
* move an archive that has a file link in it). When we rename files
* interactively, we store that mapping (old name to user input name) so
* if we spot any file links to the old file name in the future, we will
* know exactly how to fix the file link.
* 0 continue to process file, 1 skip this file, -1 pax is finished
mod_name(register ARCHD
*arcn
)
* IMPORTANT: We have a problem. what do we do with symlinks?
* Modifying a hard link name makes sense, as we know the file it
* points at should have been seen already in the archive (and if it
* wasn't seen because of a read error or a bad archive, we lose
* anyway). But there are no such requirements for symlinks. On one
* hand the symlink that refers to a file in the archive will have to
* be modified to so it will still work at its new location in the
* file system. On the other hand a symlink that points elsewhere (and
* should continue to do so) should not be modified. There is clearly
* no perfect solution here. So we handle them like hardlinks. Clearly
* a replacement made by the interactive rename mapping is very likely
* to be correct since it applies to a single file and is an exact
* match. The regular expression replacements are a little harder to
* justify though. We claim that the symlink name is only likely
* to be replaced when it points within the file tree being moved and
* in that case it should be modified. what we really need to do is to
* call an oracle here. :)
* we have replacement strings, modify the name and the link
if ((res
= rep_name(arcn
->name
, &(arcn
->nlen
), 1)) != 0)
if (((arcn
->type
== PAX_SLK
) || (arcn
->type
== PAX_HLK
) ||
(arcn
->type
== PAX_HRG
)) &&
((res
= rep_name(arcn
->ln_name
, &(arcn
->ln_nlen
), 0)) != 0))
* perform interactive file rename, then map the link if any
if ((res
= tty_rename(arcn
)) != 0)
if ((arcn
->type
== PAX_SLK
) || (arcn
->type
== PAX_HLK
) ||
sub_name(arcn
->ln_name
, &(arcn
->ln_nlen
));
* Prompt the user for a replacement file name. A "." keeps the old name,
* a empty line skips the file, and an EOF on reading the tty, will cause
* pax to stop processing and exit. Otherwise the file name input, replaces
* 0 process this file, 1 skip this file, -1 we need to exit pax
tty_rename(register ARCHD
*arcn
)
char tmpname
[PAXPATHLEN
+2];
* prompt user for the replacement name for a file, keep trying until
* we get some reasonable input. Archives may have more than one file
* on them with the same name (from updates etc). We print verbose info
* on the file so the user knows what is up.
tty_prnt("\nATTENTION: Pax interactive file rename operation.\n");
tty_prnt("Input new name, or a \".\" to keep the old name, ");
tty_prnt("or a \"return\" to skip this file.\n");
if (tty_read(tmpname
, sizeof(tmpname
)) < 0)
if (strcmp(tmpname
, "..") == 0) {
tty_prnt("Try again, illegal file name: ..\n");
if (strlen(tmpname
) > PAXPATHLEN
) {
tty_prnt("Try again, file name too long\n");
* empty file name, skips this file. a "." leaves it alone
if (tmpname
[0] == '\0') {
tty_prnt("Skipping file.\n");
if ((tmpname
[0] == '.') && (tmpname
[1] == '\0')) {
tty_prnt("Processing continues, name unchanged.\n");
* ok the name changed. We may run into links that point at this
* file later. we have to remember where the user sent the file
* in order to repair any links.
tty_prnt("Processing continues, name changed to: %s\n", tmpname
);
res
= add_name(arcn
->name
, arcn
->nlen
, tmpname
);
arcn
->nlen
= l_strncpy(arcn
->name
, tmpname
, PAXPATHLEN
+1);
* fix up the file name and the link name (if any) so this file will land
* in the destination directory (used during copy() -rw).
* 0 if ok, -1 if failure (name too long)
set_dest(register ARCHD
*arcn
, char *dest_dir
, int dir_len
)
set_dest(arcn
, dest_dir
, dir_len
)
if (fix_path(arcn
->name
, &(arcn
->nlen
), dest_dir
, dir_len
) < 0)
* It is really hard to deal with symlinks here, we cannot be sure
* if the name they point was moved (or will be moved). It is best to
if ((arcn
->type
!= PAX_HLK
) && (arcn
->type
!= PAX_HRG
))
if (fix_path(arcn
->ln_name
, &(arcn
->ln_nlen
), dest_dir
, dir_len
) < 0)
* concatenate dir_name and or_name and store the result in or_name (if
* it fits). This is one ugly function.
* 0 if ok, -1 if the final name is too long
fix_path( char *or_name
, int *or_len
, char *dir_name
, int dir_len
)
fix_path(or_name
, or_len
, dir_name
, dir_len
)
* we shift the or_name to the right enough to tack in the dir_name
* at the front. We make sure we have enough space for it all before
* we start. since dest always ends in a slash, we skip of or_name
* if it also starts with one.
if ((len
= dest
- or_name
) > PAXPATHLEN
) {
warn(1, "File name %s/%s, too long", dir_name
, start
);
src
= dir_name
+ dir_len
- 1;
* splice in the destination directory name
* walk down the list of replacement strings applying each one in order.
* when we find one with a successful substitution, we modify the name
* as specified. if required, we print the results. if the resulting name
* is empty, we will skip this archive member. We use the regexp(3)
* routines (regexp() ought to win a prize as having the most cryptic
* library function manual page).
* name is the file name we are going to apply the regular expressions to
* nlen is the length of this name (and is modified to hold the length of
* prnt is a flag that says whether to print the final result.
* 0 if substitution was successful, 1 if we are to skip the file (the name
rep_name(char *name
, int *nlen
, int prnt
)
rep_name(name
, nlen
, prnt
)
regmatch_t pm
[MAXSUBEXP
];
char nname
[PAXPATHLEN
+1]; /* final result of all replacements */
char buf1
[PAXPATHLEN
+1]; /* where we work on the name */
* copy the name into buf1, where we will work on it. We need to keep
* the orig string around so we can print out the result of the final
* replacement. We build up the final result in nname. inpt points at
* the string we apply the regular expression to. prnt is used to
* suppress printing when we handle replacements on the link field
* (the user already saw that substitution go by)
(void)strcpy(buf1
, name
);
endpt
= outpt
+ PAXPATHLEN
;
* try each replacement string in order
* check for a successful substitution, if not go to
* the next pattern, or cleanup if we were global
if (regexec(pt
->rcmp
, inpt
) == 0)
if (regexec(&(pt
->rcmp
), inpt
, MAXSUBEXP
, pm
, 0) != 0)
* ok we found one. We have three parts, the prefix
* which did not match, the section that did and the
* tail (that also did not match). Copy the prefix to
* the final output buffer (watching to make sure we
* do not create a string too long).
rpt
= pt
->rcmp
->startp
[0];
rpt
= inpt
+ pm
[0].rm_so
;
while ((inpt
< rpt
) && (outpt
< endpt
))
* for the second part (which matched the regular
* expression) apply the substitution using the
* replacement string and place it the prefix in the
* final output. If we have problems, skip it.
if ((res
= resub(pt
->rcmp
,pt
->nstr
,outpt
,endpt
)) < 0) {
if ((res
= resub(&(pt
->rcmp
),pm
,pt
->nstr
,outpt
,endpt
))
warn(1, "Replacement name error %s",
* we set up to look again starting at the first
* character in the tail (of the input string right
* after the last character matched by the regular
* expression (inpt always points at the first char in
* the string to process). If we are not doing a global
* substitution, we will use inpt to copy the tail to
* the final result. Make sure we do not overrun the
inpt
= pt
->rcmp
->endp
[0];
if ((outpt
== endpt
) || (*inpt
== '\0'))
* if the user wants global we keep trying to
* substitute until it fails, then we are done.
} while (pt
->flgs
& GLOB
);
* a successful substitution did NOT occur, try the next one
* we had a substitution, copy the last tail piece (if there is
* room) to the final result
while ((outpt
< endpt
) && (*inpt
!= '\0'))
if ((outpt
== endpt
) && (*inpt
!= '\0')) {
warn(1,"Replacement name too long %s >> %s",
* inform the user of the result if wanted
if (prnt
&& (pt
->flgs
& PRNT
)) {
(void)fprintf(stderr
,"%s >> <empty string>\n",
(void)fprintf(stderr
,"%s >> %s\n", name
, nname
);
* if empty inform the caller this file is to be skipped
* otherwise copy the new name over the orig name and return
*nlen
= l_strncpy(name
, nname
, PAXPATHLEN
+ 1);
* apply the replacement to the matched expression. expand out the old
* style ed(1) subexpression expansion.
* -1 if error, or the number of characters added to the destination.
resub(regexp
*prog
, char *src
, char *dest
, register char *destend
)
resub(prog
, src
, dest
, destend
)
while ((dpt
< destend
) && ((c
= *spt
++) != '\0')) {
else if ((c
== '\\') && (*spt
>= '0') && (*spt
<= '9'))
if ((c
== '\\') && ((*spt
== '\\') || (*spt
== '&')))
if ((prog
->startp
[no
] == NULL
) || (prog
->endp
[no
] == NULL
) ||
((len
= prog
->endp
[no
] - prog
->startp
[no
]) <= 0))
* copy the subexpression to the destination.
* fail if we run out of space or the match string is damaged
if (len
> (destend
- dpt
))
if (l_strncpy(dpt
, prog
->startp
[no
], len
) != len
)
* apply the replacement to the matched expression. expand out the old
* style ed(1) subexpression expansion.
* -1 if error, or the number of characters added to the destination.
resub(regex_t
*rp
, register regmatch_t
*pm
, char *src
, char *dest
,
resub(rp
, pm
, src
, dest
, destend
)
register regmatch_t
*pmpt
;
while ((dpt
< destend
) && ((c
= *spt
++) != '\0')) {
* see if we just have an ordinary replacement character
* or we refer to a subexpression.
} else if ((c
== '\\') && (*spt
>= '0') && (*spt
<= '9')) {
* make sure there is a subexpression as specified
if ((len
= *spt
++ - '0') > subexcnt
)
* Ordinary character, just copy it
if ((c
== '\\') && ((*spt
== '\\') || (*spt
== '&')))
* continue if the subexpression is bogus
if ((pmpt
->rm_so
< 0) || (pmpt
->rm_eo
< 0) ||
((len
= pmpt
->rm_eo
- pmpt
->rm_so
) <= 0))
* copy the subexpression to the destination.
* fail if we run out of space or the match string is damaged
if (len
> (destend
- dpt
))
if (l_strncpy(dpt
, src
+ pmpt
->rm_so
, len
) != len
)