* Copyright (c) 1990 Jan-Simon Pendry
* Copyright (c) 1990 Imperial College of Science, Technology & Medicine
* Copyright (c) 1990 The Regents of the University of California.
* This code is derived from software contributed to Berkeley by
* Jan-Simon Pendry at Imperial College, London.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* @(#)map.c 5.3 (Berkeley) 5/12/91
* $Id: map.c,v 5.2.1.7 91/05/07 22:18:05 jsp Alpha $
* Generation numbers are allocated to every node created
* by amd. When a filehandle is computed and sent to the
* kernel, the generation number makes sure that it is safe
* to reallocate a node slot even when the kernel has a cached
* reference to its old incarnation.
* No garbage collection is done, since it is assumed that
* there is no way that 2^32 generation numbers could ever
* be allocated by a single run of amd - there is simply
* not enough cpu time available.
static unsigned int am_gen
= 2; /* Initial generation number */
#define new_gen() (am_gen++)
am_node
**exported_ap
= (am_node
**) 0;
int exported_ap_size
= 0;
int first_free_map
= 0; /* First available free slot */
int last_used_map
= -1; /* Last unavailable used slot */
static int timeout_mp_id
; /* Id from last call to timeout */
* This is the default attributes field which
* is copied into every new node to be created.
* The individual filesystem fs_init() routines
* patch the copy to represent the particular
* details for the relevant filesystem type
static struct fattr gen_fattr
= {
NFSMODE_LNK
| 0777, /* mode */
static int exported_ap_realloc_map
P((int nsize
));
static int exported_ap_realloc_map(nsize
)
* If a second realloc occasionally causes Amd to die
* in then include this check.
if (exported_ap_size
!= 0) /* XXX */
* this shouldn't happen, but...
if (nsize
< 0 || nsize
== exported_ap_size
)
exported_ap
= (am_node
**) xrealloc((voidp
) exported_ap
, nsize
* sizeof(am_node
*));
if (nsize
> exported_ap_size
)
bzero((char*) (exported_ap
+exported_ap_size
),
(nsize
- exported_ap_size
) * sizeof(am_node
*));
exported_ap_size
= nsize
;
* The root of the mount tree.
* Allocate a new mount slot and create
* Fills in the map number of the node,
* but leaves everything else uninitialised.
am_node
*exported_ap_alloc(P_void
)
* First check if there are any slots left, realloc if needed
if (first_free_map
>= exported_ap_size
)
if (!exported_ap_realloc_map(exported_ap_size
+ NEXP_AP
))
* Grab the next free slot
mpp
= exported_ap
+ first_free_map
;
mp
= *mpp
= ALLOC(am_node
);
bzero((char *) mp
, sizeof(*mp
));
mp
->am_mapno
= first_free_map
++;
while (first_free_map
< exported_ap_size
&& exported_ap
[first_free_map
])
if (first_free_map
> last_used_map
)
last_used_map
= first_free_map
- 1;
* Shrink exported_ap if reasonable
if (last_used_map
< exported_ap_size
- (NEXP_AP
+ NEXP_AP_MARGIN
))
exported_ap_realloc_map(exported_ap_size
- NEXP_AP
);
/*dlog("alloc_exp: last_used_map = %d, first_free_map = %d\n",
last_used_map, first_free_map);*/
void exported_ap_free
P((am_node
*mp
));
void exported_ap_free(mp
)
* Zero the slot pointer to avoid double free's
exported_ap
[mp
->am_mapno
] = 0;
* Update the free and last_used indices
if (mp
->am_mapno
== last_used_map
)
while (last_used_map
>= 0 && exported_ap
[last_used_map
] == 0)
if (first_free_map
> mp
->am_mapno
)
first_free_map
= mp
->am_mapno
;
/*dlog("free_exp: last_used_map = %d, first_free_map = %d\n",
last_used_map, first_free_map);*/
* Insert mp into the correct place,
* where p_mp is its parent node.
* A new node gets placed as the youngest sibling
* of any other children, and the parent's child
* pointer is adjusted to point to the new child node.
* If this is going in at the root then flag it
* so that it cannot be unmounted by amq.
mp
->am_flags
|= AMF_ROOT
;
mp
->am_osib
= p_mp
->am_child
;
mp
->am_osib
->am_ysib
= mp
;
* Remove am from its place in the mount tree
if (mp
->am_child
&& mp
->am_parent
) {
plog(XLOG_WARNING
, "children of \"%s\" still exist - deleting anyway", mp
->am_path
);
* 2. Update parent's child pointer
if (mp
->am_parent
&& mp
->am_parent
->am_child
== mp
)
mp
->am_parent
->am_child
= mp
->am_osib
;
* 3. Unlink from sibling chain
mp
->am_ysib
->am_osib
= mp
->am_osib
;
mp
->am_osib
->am_ysib
= mp
->am_ysib
;
* Compute a new time to live value for a node.
mp
->am_ttl
= clocktime();
mp
->am_fattr
.atime
.seconds
= mp
->am_ttl
;
mp
->am_ttl
+= mp
->am_timeo
; /* sun's -tl option */
void mk_fattr
P((am_node
*mp
, ftype vntype
));
void mk_fattr(mp
, vntype
)
mp
->am_fattr
.type
= NFDIR
;
mp
->am_fattr
.mode
= NFSMODE_DIR
| 0555;
mp
->am_fattr
.type
= NFLNK
;
mp
->am_fattr
.mode
= NFSMODE_LNK
| 0777;
plog(XLOG_FATAL
, "Unknown fattr type %d - ignored", vntype
);
* Initialise an allocated mount node.
* It is assumed that the mount node was bzero'd
* before getting here so anything that would
* be set to zero isn't done here.
/* mp->am_mapno initalised by exported_ap_alloc */
mp
->am_mnt
= new_mntfs();
mp
->am_name
= strdup(dir
);
mp
->am_path
= strdup(dir
);
mp
->am_attr
.status
= NFS_OK
;
mp
->am_fattr
= gen_fattr
;
mp
->am_fattr
.atime
.seconds
= clocktime();
mp
->am_fattr
.atime
.useconds
= 0;
mp
->am_fattr
.mtime
= mp
->am_fattr
.ctime
= mp
->am_fattr
.atime
;
mp
->am_stats
.s_mtime
= mp
->am_fattr
.atime
.seconds
;
* The node must be already unmounted.
* Convert from file handle to
am_node
*fh_to_mp3(fhp
, rp
, c_or_d
)
struct am_fh
*fp
= (struct am_fh
*) fhp
;
* Check process id matches
* If it doesn't then it is probably
* from an old kernel cached filehandle
* which is now out of date.
if (fp
->fhh_pid
!= mypid
)
* Make sure the index is valid before
* exported_ap is referenced.
if (fp
->fhh_id
< 0 || fp
->fhh_id
>= exported_ap_size
)
* Get hold of the supposed mount node
ap
= exported_ap
[fp
->fhh_id
];
* If it exists then maybe...
* Check the generation number in the node
* matches the one from the kernel. If not
* then the old node has been timed out and
if (ap
->am_gen
!= fp
->fhh_gen
) {
* If the node is hung then locate a new node
* for it. This implements the replicated filesystem
if (ap
->am_mnt
&& FSRV_ISDOWN(ap
->am_mnt
->mf_server
) && ap
->am_parent
) {
dlog("fh_to_mp3: %s (%s) is hung:- call lookup",
orig_ap
->am_path
, orig_ap
->am_mnt
->mf_info
);
* Update modify time of parent node.
* With any luck the kernel will re-stat
* the child node and get new information.
orig_ap
->am_fattr
.mtime
.seconds
= clocktime();
* Call the parent's lookup routine for an object
* with the same name. This may return -1 in error
* if a mount is in progress. In any case, if no
* mount node is returned the error code is propagated
if (c_or_d
== VLOOK_CREATE
) {
ap
= (*orig_ap
->am_parent
->am_mnt
->mf_ops
->lookuppn
)(orig_ap
->am_parent
,
orig_ap
->am_name
, &error
, c_or_d
);
if (error
< 0 && amd_state
== Finishing
)
* Update last access to original node. This
* avoids timing it out and so sending ESTALE
* XXX - Not sure we need this anymore (jsp, 90/10/6).
* Disallow references to objects being unmounted, unless
* they are automount points.
if (ap
->am_mnt
&& (ap
->am_mnt
->mf_flags
& MFF_UNMOUNTING
) &&
!(ap
->am_flags
& AMF_ROOT
)) {
if (amd_state
== Finishing
)
if (!ap
|| !ap
->am_mnt
) {
* If we are shutting down then it is likely
* that this node has disappeared because of
* a fast timeout. To avoid things thrashing
* just pretend it doesn't exist at all. If
* ESTALE is returned, some NFS clients just
* keep retrying (stupid or what - if it's
* stale now, what's it going to be in 5 minutes?)
if (amd_state
== Finishing
)
return fh_to_mp2(fhp
, &dummy
);
* Convert from automount node to
struct am_fh
*fp
= (struct am_fh
*) fhp
;
fp
->fhh_id
= mp
->am_mapno
;
* .. and the generation number
fp
->fhh_gen
= mp
->am_gen
;
* .. to make a "unique" triple that will never
* be reallocated except across reboots (which doesn't matter)
* or if we are unlucky enough to be given the same
* pid as a previous amd (very unlikely).
static am_node
*find_ap2
P((char *dir
, am_node
*mp
));
static am_node
*find_ap2(dir
, mp
)
if (strcmp(mp
->am_path
, dir
) == 0)
if ((mp
->am_mnt
->mf_flags
& MFF_MOUNTED
) &&
strcmp(mp
->am_mnt
->mf_mount
, dir
) == 0)
mp2
= find_ap2(dir
, mp
->am_osib
);
return find_ap2(dir
, mp
->am_child
);
* Find the mount node corresponding
* to dir. dir can match either the
* automount path or, if the node is
* mounted, the mount location.
am_node
*find_ap
P((char *dir
));
for (i
= last_used_map
; i
>= 0; --i
) {
am_node
*mp
= exported_ap
[i
];
if (mp
&& (mp
->am_flags
& AMF_ROOT
)) {
mp
= find_ap2(dir
, exported_ap
[i
]);
* Find the mount node corresponding
* to the mntfs structure.
am_node
*find_mf
P((mntfs
*mf
));
for (i
= last_used_map
; i
>= 0; --i
) {
am_node
*mp
= exported_ap
[i
];
if (mp
&& mp
->am_mnt
== mf
)
* Get the filehandle for a particular named directory.
* This is used during the bootstrap to tell the kernel
* the filehandles of the initial automount points.
am_node
*mp
= root_ap(dir
, TRUE
);
* Patch up PID to match main server...
((struct am_fh
*) &nfh
)->fhh_pid
= pid
;
dlog("root_fh substitutes pid %d", pid
);
* Should never get here...
plog(XLOG_ERROR
, "Can't find root filehandle for %s", dir
);
am_node
*root_ap(dir
, path
)
am_node
*mp
= find_ap(dir
);
if (mp
&& mp
->am_parent
== root_node
)
* Timeout all nodes waiting on
void map_flush_srvr
P((fserver
*fs
));
for (i
= last_used_map
; i
>= 0; --i
) {
am_node
*mp
= exported_ap
[i
];
if (mp
&& mp
->am_mnt
&& mp
->am_mnt
->mf_server
== fs
) {
plog(XLOG_INFO
, "Flushed %s; dependent on %s", mp
->am_path
, fs
->fs_host
);
mp
->am_ttl
= clocktime();
* Mount a top level automount node
* by calling lookup in the parent
* (root) node which will cause the
* automount node to be automounted.
int mount_auto_node
P((char *dir
, voidp arg
));
int mount_auto_node(dir
, arg
)
(void) afs_ops
.lookuppn((am_node
*) arg
, dir
, &error
, VLOOK_CREATE
);
plog(XLOG_ERROR
, "Could not mount %s: %m", dir
);
* Cause all the top-level mount nodes
int mount_exported
P((void));
* Iterate over all the nodes to be started
return root_keyiter((void (*)P((char*,void*))) mount_auto_node
, root_node
);
* Construct top-level node
void make_root_node
P((void));
char *rootmap
= ROOT_MAP
;
root_node
= exported_ap_alloc();
* Allocate a new mounted filesystem
root_mnt
= find_mntfs(&root_ops
, (am_opts
*) 0, "", rootmap
, "", "");
* Replace the initial null reference
free_mntfs(root_node
->am_mnt
);
root_node
->am_mnt
= root_mnt
;
if (root_mnt
->mf_ops
->fs_init
)
(*root_mnt
->mf_ops
->fs_init
)(root_mnt
);
root_mnt
->mf_error
= (*root_mnt
->mf_ops
->mount_fs
)(root_node
);
* Cause all the nodes to be unmounted by timing
void umount_exported(P_void
)
for (i
= last_used_map
; i
>= 0; --i
) {
am_node
*mp
= exported_ap
[i
];
if (mf
->mf_flags
& MFF_UNMOUNTING
) {
* If this node is being unmounted then
* just ignore it. However, this could
* prevent amd from finishing if the
* unmount gets blocked since the am_node
* will never be free'd. am_unmounted needs
* telling about this possibility. - XXX
if (mf
&& !(mf
->mf_ops
->fs_flags
& FS_DIRECTORY
)) {
* When shutting down this had better
* look like a directory, otherwise it
if ((--immediate_abort
< 0 && !(mp
->am_flags
& AMF_ROOT
) && mp
->am_parent
) ||
(mf
->mf_flags
& MFF_RESTART
)) {
* Just throw this node away without
* bothering to unmount it. If the
* server is not known to be up then
* don't discard the mounted on directory
(mf
->mf_server
->fs_flags
& (FSF_DOWN
|FSF_VALID
)) != FSF_VALID
)
mf
->mf_flags
&= ~MFF_MKMNT
;
* Any other node gets forcibly
mp
->am_flags
&= ~AMF_NOTIMEOUT
;
mp
->am_mnt
->mf_flags
&= ~MFF_RSTKEEP
;
static int unmount_node
P((am_node
*mp
));
static int unmount_node(mp
)
if ((mf
->mf_flags
& MFF_ERROR
) || mf
->mf_refc
> 1) {
if (mf
->mf_flags
& MFF_ERROR
)
dlog("No-op unmount of error node %s", mf
->mf_info
);
dlog("Unmounting %s (%s)", mf
->mf_mount
, mf
->mf_info
);
error
= (*mf
->mf_ops
->umount_fs
)(mp
);
dlog("%s: unmount: %m", mf
->mf_mount
);
#ifdef FLUSH_KERNEL_NAME_CACHE
static void flush_kernel_name_cache
P((am_node
*));
static void flush_kernel_name_cache(mp
)
int islink
= (mp
->am_mnt
->mf_fattr
.type
== NFLNK
);
int isdir
= (mp
->am_mnt
->mf_fattr
.type
== NFDIR
);
if (unlink(mp
->am_path
) < 0)
if (rmdir(mp
->am_path
) < 0)
plog(XLOG_WARNING
, "failed to clear \"%s\" from dnlc: %m", mp
->am_path
);
#endif /* FLUSH_KERNEL_NAME_CACHE */
static int unmount_node_wrap
P((voidp vp
));
static int unmount_node_wrap(vp
)
#ifndef FLUSH_KERNEL_NAME_CACHE
return unmount_node((am_node
*) vp
);
#else /* FLUSH_KERNEL_NAME_CACHE */
* This code should just say:
* return unmount_node((am_node *) vp);
* The kernel keeps a cached copy of filehandles,
* and doesn't ever uncache them (apparently). So
* when Amd times out a node the kernel will have a
* stale filehandle. When the kernel next uses the
* filehandle it gets ESTALE.
* Arrange that when a node is removed an unlink or
* rmdir is done on that path so that the kernel
* cache is done. Yes - yuck.
* This can all be removed (and the background
* unmount flag in sfs_ops) if/when the kernel does
* If the unlink or rmdir failed then just log a warning,
* don't fail the unmount. This can occur if the kernel
* client code decides that the object is still referenced
* and should be renamed rather than discarded.
* There is still a race condition here...
* if another process is trying to access the same
* filesystem at the time we get here, then
* it will block, since the MF_UNMOUNTING flag will
* be set. That may, or may not, cause the entire
* system to deadlock. Hmmm...
am_node
*mp
= (am_node
*) vp
;
int isauto
= mp
->am_parent
&& (mp
->am_parent
->am_mnt
->mf_fattr
.type
== NFDIR
);
int error
= unmount_node(mp
);
if (isauto
&& (int)amd_state
< (int)Finishing
)
flush_kernel_name_cache(mp
);
#endif /* FLUSH_KERNEL_NAME_CACHE */
static void free_map_if_success(rc
, term
, closure
)
am_node
*mp
= (am_node
*) closure
;
* Not unmounting any more
mf
->mf_flags
&= ~MFF_UNMOUNTING
;
* If a timeout was defered because the underlying filesystem
* was busy then arrange for a timeout as soon as possible.
if (mf
->mf_flags
& MFF_WANTTIMO
) {
mf
->mf_flags
&= ~MFF_WANTTIMO
;
plog(XLOG_ERROR
, "unmount for %s got signal %d", mp
->am_path
, term
);
#if defined(DEBUG) && defined(SIGTRAP)
* dbx likes to put a trap on exit().
* Pretend it succeeded for now...
plog(XLOG_STATS
, "\"%s\" on %s still active", mp
->am_path
, mf
->mf_mount
);
plog(XLOG_ERROR
, "%s: unmount: %m", mp
->am_path
);
* Wakeup anything waiting for this mount
static void unmount_mp(mp
)
plog(XLOG_INFO
, "\"%s\" on %s timed out", mp
->am_path
, mp
->am_mnt
->mf_mount
);
if ((mf
->mf_ops
->fs_flags
& FS_UBACKGROUND
) &&
(mf
->mf_flags
& MFF_MOUNTED
)) {
if (mf
->mf_refc
== 1 && !FSRV_ISUP(mf
->mf_server
)) {
* Don't try to unmount from a server that is known to be down
if (!(mf
->mf_flags
& MFF_LOGDOWN
)) {
/* Only log this once, otherwise gets a bit boring */
plog(XLOG_STATS
, "file server %s is down - timeout of \"%s\" ignored", mf
->mf_server
->fs_host
, mp
->am_path
);
mf
->mf_flags
|= MFF_LOGDOWN
;
/* Clear logdown flag - since the server must be up */
mf
->mf_flags
&= ~MFF_LOGDOWN
;
dlog("\"%s\" on %s timed out", mp
->am_path
, mp
->am_mnt
->mf_mount
);
/*dlog("Will background the unmount attempt");*/
* Note that we are unmounting this node
mf
->mf_flags
|= MFF_UNMOUNTING
;
run_task(unmount_node_wrap
, (voidp
) mp
,
free_map_if_success
, (voidp
) mp
);
dlog("unmount attempt backgrounded");
dlog("\"%s\" on %s timed out", mp
->am_path
, mp
->am_mnt
->mf_mount
);
dlog("Trying unmount in foreground");
mf
->mf_flags
|= MFF_UNMOUNTING
;
free_map_if_success(unmount_node(mp
), 0, (voidp
) mp
);
dlog("unmount attempt done");
#define smallest_t(t1, t2) \
(t1 != NEVER ? (t2 != NEVER ? (t1 < t2 ? t1 : t2) : t1) : t2)
#define IGNORE_FLAGS (MFF_MOUNTING|MFF_UNMOUNTING|MFF_RESTART)
time_t now
= clocktime();
dlog("Timing out automount points...");
for (i
= last_used_map
; i
>= 0; --i
) {
am_node
*mp
= exported_ap
[i
];
* Just continue if nothing mounted, or can't be timed out.
if (!mp
|| (mp
->am_flags
& AMF_NOTIMEOUT
))
* Pick up mounted filesystem
* Don't delete last reference to a restarted filesystem.
if ((mf
->mf_flags
& MFF_RSTKEEP
) && mf
->mf_refc
== 1)
* If there is action on this filesystem then ignore it
if (!(mf
->mf_flags
& IGNORE_FLAGS
)) {
mf
->mf_flags
&= ~MFF_WANTTIMO
;
/*dlog("t is initially @%d, zero in %d secs", t, t - now);*/
* Move the ttl forward to avoid thrashing effects
* on the next call to timeout!
if (mp
->am_timeo_w
< 4 * am_timeo_w
)
mp
->am_timeo_w
+= am_timeo_w
;
mp
->am_ttl
= now
+ mp
->am_timeo_w
;
* If the next ttl is smallest, use that
t
= smallest_t(t
, mp
->am_ttl
);
/*dlog("after ttl t is @%d, zero in %d secs", t, t - now);*/
if (!mp
->am_child
&& mf
->mf_error
>= 0 && expired
)
} else if (mf
->mf_flags
& MFF_UNMOUNTING
) {
mf
->mf_flags
|= MFF_WANTTIMO
;
dlog("No further timeouts");
* Sanity check to avoid runaways.
* Absolutely should never get this but
* if you do without this trap amd will thrash.
plog(XLOG_ERROR
, "Got a zero interval in timeout_mp()!");
* XXX - when shutting down, make things happen faster
if ((int)amd_state
>= (int)Finishing
)
dlog("Next mount timeout in %ds", t
- now
);
timeout_mp_id
= timeout(t
- now
, timeout_mp
, 0);
* Cause timeout_mp to be called soonest
void reschedule_timeout_mp()
untimeout(timeout_mp_id
);
timeout_mp_id
= timeout(0, timeout_mp
, 0);