/*	$NetBSD: procfs_vfsops.c,v 1.120 2024/09/14 01:37:42 pgoyette Exp $	*/

/*
 * Copyright (c) 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	@(#)procfs_vfsops.c	8.7 (Berkeley) 5/10/95
 */

/*
 * Copyright (c) 1993 Jan-Simon Pendry
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	@(#)procfs_vfsops.c	8.7 (Berkeley) 5/10/95
 */

/*
 * procfs VFS interface
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: procfs_vfsops.c,v 1.120 2024/09/14 01:37:42 pgoyette Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#include "opt_sysv_ipc.h"
#include "opt_mqueue.h"
#endif

#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/buf.h>
#include <sys/dirent.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/fstrans.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/vnode.h>

#include <miscfs/genfs/genfs.h>

#include <miscfs/procfs/procfs.h>

#include <uvm/uvm_extern.h>			/* for PAGE_SIZE */

MODULE(MODULE_CLASS_VFS, procfs, "ptrace_common"
#if defined(MQUEUE)
				 ",mqueue"
#endif
#if defined(SYSVSHM) || defined(SYSVSEM) || defined(SYSVMSG)
				 ",sysv_ipc"
#endif
);

VFS_PROTOS(procfs);

#define PROCFS_HASHSIZE	256
#define PROCFS_EXEC_HOOK ((void *)1)
#define PROCFS_EXIT_HOOK ((void *)2)

static kauth_listener_t procfs_listener;
static void *procfs_exechook;
static void *procfs_exithook;
LIST_HEAD(hashhead, pfsnode);
static u_long procfs_hashmask;
static struct hashhead *procfs_hashtab;
static kmutex_t procfs_hashlock;

static struct hashhead *
procfs_hashhead(pid_t pid)
{

	return &procfs_hashtab[pid & procfs_hashmask];
}

void
procfs_hashrem(struct pfsnode *pfs)
{

	mutex_enter(&procfs_hashlock);
	LIST_REMOVE(pfs, pfs_hash);
	mutex_exit(&procfs_hashlock);
}

/*
 * VFS Operations.
 *
 * mount system call
 */
/* ARGSUSED */
int
procfs_mount(
    struct mount *mp,
    const char *path,
    void *data,
    size_t *data_len)
{
	struct lwp *l = curlwp;
	struct procfsmount *pmnt;
	struct procfs_args *args = data;
	int error;

	if (args == NULL)
		return EINVAL;

	if (UIO_MX & (UIO_MX-1)) {
		log(LOG_ERR, "procfs: invalid directory entry size");
		return (EINVAL);
	}

	if (mp->mnt_flag & MNT_GETARGS) {
		if (*data_len < sizeof *args)
			return EINVAL;

		pmnt = VFSTOPROC(mp);
		if (pmnt == NULL)
			return EIO;
		args->version = PROCFS_ARGSVERSION;
		args->flags = pmnt->pmnt_flags;
		*data_len = sizeof *args;
		return 0;
	}

	if (mp->mnt_flag & MNT_UPDATE)
		return (EOPNOTSUPP);

	if (*data_len >= sizeof *args && args->version != PROCFS_ARGSVERSION)
		return EINVAL;

	pmnt = kmem_zalloc(sizeof(struct procfsmount), KM_SLEEP);

	mp->mnt_stat.f_namemax = PROCFS_MAXNAMLEN;
	mp->mnt_flag |= MNT_LOCAL;
	mp->mnt_data = pmnt;
	vfs_getnewfsid(mp);

	error = set_statvfs_info(path, UIO_USERSPACE, "procfs", UIO_SYSSPACE,
	    mp->mnt_op->vfs_name, mp, l);
	if (*data_len >= sizeof *args)
		pmnt->pmnt_flags = args->flags;
	else
		pmnt->pmnt_flags = 0;

	mp->mnt_iflag |= IMNT_MPSAFE | IMNT_SHRLOOKUP;
	return error;
}

/*
 * unmount system call
 */
int
procfs_unmount(struct mount *mp, int mntflags)
{
	int error;
	int flags = 0;

	if (mntflags & MNT_FORCE)
		flags |= FORCECLOSE;

	if ((error = vflush(mp, 0, flags)) != 0)
		return (error);

	kmem_free(mp->mnt_data, sizeof(struct procfsmount));
	mp->mnt_data = NULL;

	return 0;
}

int
procfs_root(struct mount *mp, int lktype, struct vnode **vpp)
{
	int error;

	error = procfs_allocvp(mp, vpp, 0, PFSroot, -1);
	if (error == 0) {
		error = vn_lock(*vpp, lktype);
		if (error != 0) {
			vrele(*vpp);
			*vpp = NULL;
		}
	}

	return error;
}

/* ARGSUSED */
int
procfs_start(struct mount *mp, int flags)
{

	return (0);
}

/*
 * Get file system statistics.
 */
int
procfs_statvfs(struct mount *mp, struct statvfs *sbp)
{

	genfs_statvfs(mp, sbp);

	sbp->f_bsize = PAGE_SIZE;
	sbp->f_frsize = PAGE_SIZE;
	sbp->f_iosize = PAGE_SIZE;
	sbp->f_blocks = 1;
	sbp->f_files = maxproc;					/* approx */
	sbp->f_ffree = maxproc - atomic_load_relaxed(&nprocs);	/* approx */
	sbp->f_favail = maxproc - atomic_load_relaxed(&nprocs);	/* approx */

	return (0);
}

/*ARGSUSED*/
int
procfs_sync(
    struct mount *mp,
    int waitfor,
    kauth_cred_t uc)
{

	return (0);
}

/*ARGSUSED*/
int
procfs_vget(struct mount *mp, ino_t ino, int lktype,
    struct vnode **vpp)
{
	return (EOPNOTSUPP);
}

int
procfs_loadvnode(struct mount *mp, struct vnode *vp,
    const void *key, size_t key_len, const void **new_key)
{
	int error;
	struct pfskey pfskey;
	struct pfsnode *pfs;

	KASSERT(key_len == sizeof(pfskey));
	memcpy(&pfskey, key, key_len);

	pfs = kmem_alloc(sizeof(*pfs), KM_SLEEP);
	pfs->pfs_pid = pfskey.pk_pid;
	pfs->pfs_type = pfskey.pk_type;
	pfs->pfs_fd = pfskey.pk_fd;
	pfs->pfs_vnode = vp;
	pfs->pfs_mount = mp;
	pfs->pfs_flags = 0;
	pfs->pfs_fileno =
	    PROCFS_FILENO(pfs->pfs_pid, pfs->pfs_type, pfs->pfs_fd);
	vp->v_tag = VT_PROCFS;
	vp->v_op = procfs_vnodeop_p;
	vp->v_data = pfs;

	switch (pfs->pfs_type) {
	case PFSroot:	/* /proc = dr-xr-xr-x */
		vp->v_vflag |= VV_ROOT;
		/*FALLTHROUGH*/
	case PFSproc:	/* /proc/N = dr-xr-xr-x */
		pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
		vp->v_type = VDIR;
		break;

	case PFStask:	/* /proc/N/task = dr-xr-xr-x */
		if (pfs->pfs_fd == -1) {
			pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|
			    S_IROTH|S_IXOTH;
			vp->v_type = VDIR;
			break;
		}
		/*FALLTHROUGH*/
	case PFScurproc:	/* /proc/curproc = lr-xr-xr-x */
	case PFSself:	/* /proc/self    = lr-xr-xr-x */
	case PFScwd:	/* /proc/N/cwd = lr-xr-xr-x */
	case PFSchroot:	/* /proc/N/chroot = lr-xr-xr-x */
	case PFSexe:	/* /proc/N/exe = lr-xr-xr-x */
		pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
		vp->v_type = VLNK;
		break;

	case PFSfd:
		if (pfs->pfs_fd == -1) {	/* /proc/N/fd = dr-x------ */
			pfs->pfs_mode = S_IRUSR|S_IXUSR;
			vp->v_type = VDIR;
		} else {	/* /proc/N/fd/M = [ps-]rw------- */
			file_t *fp;
			vnode_t *vxp;
			struct proc *p;

			mutex_enter(&proc_lock);
			p = procfs_proc_find(mp, pfs->pfs_pid);
			mutex_exit(&proc_lock);
			if (p == NULL) {
				error = ENOENT;
				goto bad;
			}
			KASSERT(rw_read_held(&p->p_reflock));
			if ((fp = fd_getfile2(p, pfs->pfs_fd)) == NULL) {
				error = EBADF;
				goto bad;
			}

			pfs->pfs_mode = S_IRUSR|S_IWUSR;
			switch (fp->f_type) {
			case DTYPE_VNODE:
				vxp = fp->f_vnode;

				/*
				 * We make symlinks for directories
				 * to avoid cycles.
				 */
				if (vxp->v_type == VDIR ||
				    procfs_proc_is_linux_compat())
					goto symlink;
				vp->v_type = vxp->v_type;
				break;
			case DTYPE_PIPE:
				vp->v_type = VFIFO;
				break;
			case DTYPE_SOCKET:
				vp->v_type = VSOCK;
				break;
			case DTYPE_KQUEUE:
			case DTYPE_MISC:
			case DTYPE_SEM:
			symlink:
				pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|
				    S_IXGRP|S_IROTH|S_IXOTH;
				vp->v_type = VLNK;
				break;
			default:
				error = EOPNOTSUPP;
				closef(fp);
				goto bad;
			}
			closef(fp);
		}
		break;

	case PFSfile:	/* /proc/N/file = -rw------- */
	case PFSmem:	/* /proc/N/mem = -rw------- */
	case PFSregs:	/* /proc/N/regs = -rw------- */
	case PFSfpregs:	/* /proc/N/fpregs = -rw------- */
		pfs->pfs_mode = S_IRUSR|S_IWUSR;
		vp->v_type = VREG;
		break;

	case PFSnote:	/* /proc/N/note = --w------ */
	case PFSnotepg:	/* /proc/N/notepg = --w------ */
		pfs->pfs_mode = S_IWUSR;
		vp->v_type = VREG;
		break;

	case PFSmap:		/* /proc/N/map = -r-------- */
	case PFSmaps:		/* /proc/N/maps = -r-------- */
	case PFSauxv:		/* /proc/N/auxv = -r-------- */
	case PFSenviron:	/* /proc/N/environ = -r-------- */
		pfs->pfs_mode = S_IRUSR;
		vp->v_type = VREG;
		break;

	case PFSstatus:		/* /proc/N/status = -r--r--r-- */
	case PFSstat:		/* /proc/N/stat = -r--r--r-- */
	case PFScmdline:	/* /proc/N/cmdline = -r--r--r-- */
	case PFSemul:		/* /proc/N/emul = -r--r--r-- */
	case PFSmeminfo:	/* /proc/meminfo = -r--r--r-- */
	case PFScpustat:	/* /proc/stat = -r--r--r-- */
	case PFSdevices:	/* /proc/devices = -r--r--r-- */
	case PFScpuinfo:	/* /proc/cpuinfo = -r--r--r-- */
	case PFSuptime:		/* /proc/uptime = -r--r--r-- */
	case PFSmounts:		/* /proc/mounts = -r--r--r-- */
	case PFSloadavg:	/* /proc/loadavg = -r--r--r-- */
	case PFSstatm:		/* /proc/N/statm = -r--r--r-- */
	case PFSversion:	/* /proc/version = -r--r--r-- */
	case PFSlimit:		/* /proc/N/limit = -r--r--r-- */
	case PFSlimits:		/* /proc/N/limits = -r--r--r-- */
		pfs->pfs_mode = S_IRUSR|S_IRGRP|S_IROTH;
		vp->v_type = VREG;
		break;

	case PFSsys:	/* /proc/sys = dr-xr-xr-x */
	case PFSsysfs:	/* /proc/sys/fs = dr-xr-xr-x */
	case PFSmqueue:	/* /proc/sys/fs/mqueue = dr-xr-xr-x */
	case PFSsysvipc:/* /proc/sysvipc = dr-xr-xr-x */
		if (pfs->pfs_fd == -1) {
			pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|
			    S_IROTH|S_IXOTH;
			vp->v_type = VDIR;
			break;
		}
		/*FALLTHROUGH*/
	case PFSmq_msg_def:	/* /proc/sys/fs/mqueue/msg_default = -r--r--r-- */
	case PFSmq_msg_max:	/* /proc/sys/fs/mqueue/msg_max = -r--r--r-- */
	case PFSmq_siz_def:	/* /proc/sys/fs/mqueue/msgsize_default = -r--r--r-- */
	case PFSmq_siz_max:	/* /proc/sys/fs/mqueue/msgsize_max = -r--r--r-- */
	case PFSmq_qmax:	/* /proc/sys/fs/mqueue/queues_max = -r--r--r-- */
	case PFSsysvipc_msg:	/* /proc/sysvipc/msg = -r--r--r-- */
	case PFSsysvipc_sem:	/* /proc/sysvipc/sem = -r--r--r-- */
	case PFSsysvipc_shm:	/* /proc/sysvipc/shm = -r--r--r-- */
		pfs->pfs_mode = S_IRUSR|S_IRGRP|S_IROTH;
		vp->v_type = VREG;
		break;

#ifdef __HAVE_PROCFS_MACHDEP
	PROCFS_MACHDEP_NODETYPE_CASES
		procfs_machdep_allocvp(vp);
		break;
#endif

	default:
		panic("procfs_allocvp");
	}

	mutex_enter(&procfs_hashlock);
	LIST_INSERT_HEAD(procfs_hashhead(pfs->pfs_pid), pfs, pfs_hash);
	mutex_exit(&procfs_hashlock);

	uvm_vnp_setsize(vp, 0);
	*new_key = &pfs->pfs_key;

	return 0;

bad:
	vp->v_tag =VT_NON;
	vp->v_type = VNON;
	vp->v_op = NULL;
	vp->v_data = NULL;
	kmem_free(pfs, sizeof(*pfs));
	return error;
}

void
procfs_init(void)
{

}

void
procfs_reinit(void)
{

}

void
procfs_done(void)
{

}

extern const struct vnodeopv_desc procfs_vnodeop_opv_desc;

const struct vnodeopv_desc * const procfs_vnodeopv_descs[] = {
	&procfs_vnodeop_opv_desc,
	NULL,
};

struct vfsops procfs_vfsops = {
	.vfs_name = MOUNT_PROCFS,
	.vfs_min_mount_data = sizeof (struct procfs_args),
	.vfs_mount = procfs_mount,
	.vfs_start = procfs_start,
	.vfs_unmount = procfs_unmount,
	.vfs_root = procfs_root,
	.vfs_quotactl = (void *)eopnotsupp,
	.vfs_statvfs = procfs_statvfs,
	.vfs_sync = procfs_sync,
	.vfs_vget = procfs_vget,
	.vfs_loadvnode = procfs_loadvnode,
	.vfs_fhtovp = (void *)eopnotsupp,
	.vfs_vptofh = (void *)eopnotsupp,
	.vfs_init = procfs_init,
	.vfs_reinit = procfs_reinit,
	.vfs_done = procfs_done,
	.vfs_snapshot = (void *)eopnotsupp,
	.vfs_extattrctl = vfs_stdextattrctl,
	.vfs_suspendctl = genfs_suspendctl,
	.vfs_renamelock_enter = genfs_renamelock_enter,
	.vfs_renamelock_exit = genfs_renamelock_exit,
	.vfs_fsync = (void *)eopnotsupp,
	.vfs_opv_descs = procfs_vnodeopv_descs
};

static void
procfs_exechook_cb(struct proc *p, void *arg)
{
	struct hashhead *head;
	struct pfsnode *pfs;
	struct mount *mp;
	struct pfskey key;
	struct vnode *vp;
	int error;

	if (arg == PROCFS_EXEC_HOOK && !(p->p_flag & PK_SUGID))
		return;

	head = procfs_hashhead(p->p_pid);

again:
	mutex_enter(&procfs_hashlock);
	LIST_FOREACH(pfs, head, pfs_hash) {
		if (pfs->pfs_pid != p->p_pid)
			continue;
		mp = pfs->pfs_mount;
		key = pfs->pfs_key;
		vfs_ref(mp);
		mutex_exit(&procfs_hashlock);

		error = vcache_get(mp, &key, sizeof(key), &vp);
		vfs_rele(mp);
		if (error != 0)
			goto again;
		if (vrecycle(vp))
			goto again;
		do {
			error = vfs_suspend(mp, 0);
		} while (error == EINTR || error == ERESTART);
		vgone(vp);
		if (error == 0)
			vfs_resume(mp);
		goto again;
	}
	mutex_exit(&procfs_hashlock);
}

static int
procfs_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
	struct proc *p;
	struct pfsnode *pfs;
	int result;

	result = KAUTH_RESULT_DEFER;
	p = arg0;
	pfs = arg1;

	if (action != KAUTH_PROCESS_PROCFS)
		return result;

	switch (pfs->pfs_type) {
	case PFSregs:
	case PFSfpregs:
	case PFSmem:
		if (kauth_cred_getuid(cred) != kauth_cred_getuid(p->p_cred) ||
		    ISSET(p->p_flag, PK_SUGID))
			break;

		/*FALLTHROUGH*/
	default:
		result = KAUTH_RESULT_ALLOW;
		break;
	}

	return result;
}

SYSCTL_SETUP(procfs_sysctl_setup, "procfs sysctl")
{

	sysctl_createv(clog, 0, NULL, NULL,
		       CTLFLAG_PERMANENT,
		       CTLTYPE_NODE, "procfs",
		       SYSCTL_DESCR("Process file system"),
		       NULL, 0, NULL, 0,
		       CTL_VFS, 12, CTL_EOL);
	/*
	 * XXX the "12" above could be dynamic, thereby eliminating
	 * one more instance of the "number to vfs" mapping problem,
	 * but "12" is the order as taken from sys/mount.h
	 */
}

static int
procfs_modcmd(modcmd_t cmd, void *arg)
{
	int error;

	switch (cmd) {
	case MODULE_CMD_INIT:
		error = vfs_attach(&procfs_vfsops);
		if (error != 0)
			break;

		procfs_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
		    procfs_listener_cb, NULL);

		procfs_exechook = exechook_establish(procfs_exechook_cb,
		    PROCFS_EXEC_HOOK);
		procfs_exithook = exithook_establish(procfs_exechook_cb,
		    PROCFS_EXIT_HOOK);

		mutex_init(&procfs_hashlock, MUTEX_DEFAULT, IPL_NONE);
		procfs_hashtab = hashinit(PROCFS_HASHSIZE, HASH_LIST, true,
		    &procfs_hashmask);

		break;
	case MODULE_CMD_FINI:
		error = vfs_detach(&procfs_vfsops);
		if (error != 0)
			break;
		kauth_unlisten_scope(procfs_listener);
		exechook_disestablish(procfs_exechook);
		exithook_disestablish(procfs_exithook);
		mutex_destroy(&procfs_hashlock);
		hashdone(procfs_hashtab, HASH_LIST, procfs_hashmask);
		break;
	default:
		error = ENOTTY;
		break;
	}

	return (error);
}