* Copyright (c) 1993 The Regents of the University of California.
* Copyright (c) 1993 Jan-Simon Pendry
* This code is derived from software contributed to Berkeley by
* %sccs.include.redist.c%
* @(#)procfs_vnops.c 8.2 (Berkeley) %G%
* $Id: procfs_vnops.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
#include <sys/resourcevar.h>
#include <miscfs/procfs/procfs.h>
#include <vm/vm.h> /* for PAGE_SIZE */
* This is a list of the valid names in the
* process-specific sub-directories. It is
* used in procfs_lookup and procfs_readdir
char d_name
[PROCFS_NAMELEN
];
#define N(s) sizeof(s)-1, s
{ N("status"), Pstatus
},
{ N("notepg"), Pnotepg
},
#define Nprocent (sizeof(procent)/sizeof(procent[0]))
static pid_t atopid
__P((const char *, u_int
));
* set things up for doing i/o on
* the pfsnode (vp). (vp) is locked
* on entry, and should be left locked
* for procfs we don't need to do anything
* in particular for i/o. all that is done
* is to support exclusive open on process
struct vop_open_args
*ap
;
struct pfsnode
*pfs
= VTOPFS(ap
->a_vp
);
if (PFIND(pfs
->pfs_pid
) == 0)
return (ENOENT
); /* was ESRCH, jsp */
if ((pfs
->pfs_flags
& FWRITE
) && (ap
->a_mode
& O_EXCL
) ||
(pfs
->pfs_flags
& O_EXCL
) && (ap
->a_mode
& FWRITE
))
pfs
->pfs_flags
= ap
->a_mode
& (FWRITE
|O_EXCL
);
* close the pfsnode (vp) after doing i/o.
* (vp) is not locked on entry or exit.
* nothing to do for procfs other than undo
* any exclusive open flag (see _open above).
struct vop_close_args
*ap
;
struct pfsnode
*pfs
= VTOPFS(ap
->a_vp
);
if ((ap
->a_fflag
& FWRITE
) && (pfs
->pfs_flags
& O_EXCL
))
pfs
->pfs_flags
&= ~(FWRITE
|O_EXCL
);
* do an ioctl operation on pfsnode (vp).
* (vp) is not locked on entry or exit.
struct vop_ioctl_args
*ap
;
* do block mapping for pfsnode (vp).
* since we don't use the buffer cache
* for procfs this function should never
* be called. in any case, it's not clear
* what part of the kernel ever makes use
* of this function. for sanity, this is the
* usual no-op bmap, although returning
* (EIO) would be a reasonable alternative.
struct vop_bmap_args
*ap
;
* _inactive is called when the pfsnode
* is vrele'd and the reference count goes
* to zero. (vp) will be on the vnode free
* list, so to get it back vget() must be
* for procfs, check if the process is still
* alive and if it isn't then just throw away
* the vnode by calling vgone(). this may
* be overkill and a waste of time since the
* chances are that the process will still be
* there and PFIND is not free.
* (vp) is not locked on entry or exit.
struct vop_inactive_args
*ap
;
struct pfsnode
*pfs
= VTOPFS(ap
->a_vp
);
if (PFIND(pfs
->pfs_pid
) == 0)
* _reclaim is called when getnewvnode()
* wants to make use of an entry on the vnode
* free list. at this time the filesystem needs
* to free any private data and remove the node
* from any private lists.
struct vop_reclaim_args
*ap
;
error
= procfs_freevp(ap
->a_vp
);
* _print is used for debugging.
* just print a readable description
struct vop_print_args
*ap
;
struct pfsnode
*pfs
= VTOPFS(ap
->a_vp
);
printf("tag VT_PROCFS, pid %d, mode %x, flags %x\n",
pfs
->pfs_mode
, pfs
->pfs_flags
);
* _abortop is called when operations such as
* rename and create fail. this entry is responsible
* for undoing any side-effects caused by the lookup.
* this will always include freeing the pathname buffer.
struct vop_abortop_args
*ap
;
if ((ap
->a_cnp
->cn_flags
& (HASBUF
| SAVESTART
)) == HASBUF
)
FREE(ap
->a_cnp
->cn_pnbuf
, M_NAMEI
);
* generic entry point for unsupported operations
* Invent attributes for pfsnode (vp) and store
* Directories lengths are returned as zero since
* any real length would require the genuine size
* to be computed, and nothing cares anyway.
* this is relatively minimal for procfs.
struct vop_getattr_args
*ap
;
struct pfsnode
*pfs
= VTOPFS(ap
->a_vp
);
struct vattr
*vap
= ap
->a_vap
;
/* first check the process still exists */
procp
= PFIND(pfs
->pfs_pid
);
/* start by zeroing out the attributes */
/* next do all the common fields */
vap
->va_type
= ap
->a_vp
->v_type
;
vap
->va_mode
= pfs
->pfs_mode
;
vap
->va_fileid
= pfs
->pfs_fileno
;
vap
->va_blocksize
= PAGE_SIZE
;
vap
->va_bytes
= vap
->va_size
= 0;
* Make all times be current TOD.
* It would be possible to get the process start
* time from the p_stat structure, but there's
* no "file creation" time stamp anyway, and the
* p_stat structure is not addressible if u. gets
* swapped out for that process.
microtime(&vap
->va_ctime
);
vap
->va_atime
= vap
->va_mtime
= vap
->va_ctime
;
* now do the object specific fields
* The size could be set from struct reg, but it's hardly
* worth the trouble, and it puts some (potentially) machine
* dependent data into this machine-independent code. If it
* becomes important then this function should break out into
* a per-file stat function in the corresponding .c file.
vap
->va_uid
= procp
->p_ucred
->cr_uid
;
vap
->va_gid
= procp
->p_ucred
->cr_gid
;
vap
->va_bytes
= vap
->va_size
=
ctob(procp
->p_vmspace
->vm_tsize
+
procp
->p_vmspace
->vm_dsize
+
procp
->p_vmspace
->vm_ssize
);
vap
->va_uid
= procp
->p_ucred
->cr_uid
;
vap
->va_gid
= procp
->p_ucred
->cr_gid
;
vap
->va_uid
= procp
->p_ucred
->cr_uid
;
vap
->va_gid
= procp
->p_ucred
->cr_gid
;
struct vop_setattr_args
*ap
;
* just fake out attribute setting
* it's not good to generate an error
* return, otherwise things like creat()
* will fail when they try to set the
* file length to 0. worse, this means
* that echo $note > /proc/$pid/note will fail.
* implement access checking.
* something very similar to this code is duplicated
* throughout the 4bsd kernel and should be moved
* into kern/vfs_subr.c sometime.
* actually, the check for super-user is slightly
* broken since it will allow read access to write-only
* objects. this doesn't cause any particular trouble
* but does mean that the i/o entry points need to check
* that the operation really does make sense.
struct vop_access_args
*ap
;
* If you're the super-user,
if (ap
->a_cred
->cr_uid
== (uid_t
) 0)
if (error
= VOP_GETATTR(ap
->a_vp
, vap
, ap
->a_cred
, ap
->a_p
))
* Access check is based on only one of owner, group, public.
* If not owner, then check group. If not a member of the
* group, then check public access.
if (ap
->a_cred
->cr_uid
!= vap
->va_uid
) {
gp
= ap
->a_cred
->cr_groups
;
for (i
= 0; i
< ap
->a_cred
->cr_ngroups
; i
++, gp
++)
if ((vap
->va_mode
& ap
->a_mode
) == ap
->a_mode
)
* lookup. this is incredibly complicated in the
* general case, however for most pseudo-filesystems
* very little needs to be done.
* unless you want to get a migraine, just make sure your
* filesystem doesn't do any locking of its own. otherwise
* read and inwardly digest ufs_lookup().
struct vop_lookup_args
*ap
;
struct componentname
*cnp
= ap
->a_cnp
;
struct vnode
**vpp
= ap
->a_vpp
;
struct vnode
*dvp
= ap
->a_dvp
;
char *pname
= cnp
->cn_nameptr
;
if (cnp
->cn_namelen
== 1 && *pname
== '.') {
if (cnp
->cn_flags
& ISDOTDOT
)
if (CNEQ(cnp
, "curproc", 7))
pid
= cnp
->cn_proc
->p_pid
;
pid
= atopid(pname
, cnp
->cn_namelen
);
error
= procfs_allocvp(dvp
->v_mount
, &nvp
, pid
, Pproc
);
if (cnp
->cn_flags
& ISDOTDOT
) {
error
= procfs_root(dvp
->v_mount
, vpp
);
procp
= PFIND(pfs
->pfs_pid
);
for (i
= 0; i
< Nprocent
; i
++) {
struct pfsnames
*dp
= &procent
[i
];
if (cnp
->cn_namelen
== dp
->d_namlen
&&
bcmp(pname
, dp
->d_name
, dp
->d_namlen
) == 0) {
pfs_type
= dp
->d_pfstype
;
nvp
= procfs_findtextvp(procp
);
error
= procfs_allocvp(dvp
->v_mount
, &nvp
,
* readdir returns directory entries from pfsnode (vp).
* the strategy here with procfs is to generate a single
* directory entry at a time (struct pfsdent) and then
* copy that out to userland using uiomove. a more efficent
* though more complex implementation, would try to minimize
* the number of calls to uiomove(). for procfs, this is
* hardly worth the added code complexity.
* this should just be done through read()
struct vop_readdir_args
*ap
;
struct uio
*uio
= ap
->a_uio
;
if (uio
->uio_resid
< UIO_MX
)
if (uio
->uio_offset
& (UIO_MX
-1))
i
= uio
->uio_offset
/ UIO_MX
;
* this is for the process-specific sub-directories.
* all that is needed to is copy out all the entries
* from the procent[] table (top of this file).
while (uio
->uio_resid
>= UIO_MX
) {
dp
->d_fileno
= PROCFS_FILENO(pfs
->pfs_pid
, dt
->d_pfstype
);
dp
->d_namlen
= dt
->d_namlen
;
bcopy(dt
->d_name
, dp
->d_name
, sizeof(dt
->d_name
)-1);
error
= uiomove((caddr_t
) dp
, UIO_MX
, uio
);
* this is for the root of the procfs filesystem
* what is needed is a special entry for "curproc"
* followed by an entry for each process on allproc
#define PROCFS_XFILES 1 /* number of other entries, like "curproc" */
while (p
&& uio
->uio_resid
>= UIO_MX
) {
bzero((char *) dp
, UIO_MX
);
/* ship out entry for "curproc" */
dp
->d_fileno
= PROCFS_FILENO(PID_MAX
+1, Pproc
);
dp
->d_namlen
= sprintf(dp
->d_name
, "curproc");
dp
->d_fileno
= PROCFS_FILENO(p
->p_pid
, Pproc
);
dp
->d_namlen
= sprintf(dp
->d_name
, "%ld", (long) p
->p_pid
);
if (p
== 0 && doingzomb
== 0) {
error
= uiomove((caddr_t
) dp
, UIO_MX
, uio
);
uio
->uio_offset
= i
* UIO_MX
;
* convert decimal ascii to pid_t
* procfs vnode operations.
int (**procfs_vnodeop_p
)();
struct vnodeopv_entry_desc procfs_vnodeop_entries
[] = {
{ &vop_default_desc
, vn_default_error
},
{ &vop_lookup_desc
, procfs_lookup
}, /* lookup */
{ &vop_create_desc
, procfs_create
}, /* create */
{ &vop_mknod_desc
, procfs_mknod
}, /* mknod */
{ &vop_open_desc
, procfs_open
}, /* open */
{ &vop_close_desc
, procfs_close
}, /* close */
{ &vop_access_desc
, procfs_access
}, /* access */
{ &vop_getattr_desc
, procfs_getattr
}, /* getattr */
{ &vop_setattr_desc
, procfs_setattr
}, /* setattr */
{ &vop_read_desc
, procfs_read
}, /* read */
{ &vop_write_desc
, procfs_write
}, /* write */
{ &vop_ioctl_desc
, procfs_ioctl
}, /* ioctl */
{ &vop_select_desc
, procfs_select
}, /* select */
{ &vop_mmap_desc
, procfs_mmap
}, /* mmap */
{ &vop_fsync_desc
, procfs_fsync
}, /* fsync */
{ &vop_seek_desc
, procfs_seek
}, /* seek */
{ &vop_remove_desc
, procfs_remove
}, /* remove */
{ &vop_link_desc
, procfs_link
}, /* link */
{ &vop_rename_desc
, procfs_rename
}, /* rename */
{ &vop_mkdir_desc
, procfs_mkdir
}, /* mkdir */
{ &vop_rmdir_desc
, procfs_rmdir
}, /* rmdir */
{ &vop_symlink_desc
, procfs_symlink
}, /* symlink */
{ &vop_readdir_desc
, procfs_readdir
}, /* readdir */
{ &vop_readlink_desc
, procfs_readlink
}, /* readlink */
{ &vop_abortop_desc
, procfs_abortop
}, /* abortop */
{ &vop_inactive_desc
, procfs_inactive
}, /* inactive */
{ &vop_reclaim_desc
, procfs_reclaim
}, /* reclaim */
{ &vop_lock_desc
, procfs_lock
}, /* lock */
{ &vop_unlock_desc
, procfs_unlock
}, /* unlock */
{ &vop_bmap_desc
, procfs_bmap
}, /* bmap */
{ &vop_strategy_desc
, procfs_strategy
}, /* strategy */
{ &vop_print_desc
, procfs_print
}, /* print */
{ &vop_islocked_desc
, procfs_islocked
}, /* islocked */
{ &vop_pathconf_desc
, procfs_pathconf
}, /* pathconf */
{ &vop_advlock_desc
, procfs_advlock
}, /* advlock */
{ &vop_blkatoff_desc
, procfs_blkatoff
}, /* blkatoff */
{ &vop_valloc_desc
, procfs_valloc
}, /* valloc */
{ &vop_vfree_desc
, procfs_vfree
}, /* vfree */
{ &vop_truncate_desc
, procfs_truncate
}, /* truncate */
{ &vop_update_desc
, procfs_update
}, /* update */
{ (struct vnodeop_desc
*)NULL
, (int(*)())NULL
}
struct vnodeopv_desc procfs_vnodeop_opv_desc
=
{ &procfs_vnodeop_p
, procfs_vnodeop_entries
};