freebsd/sys: Import VFS support

Update #4475
This commit is contained in:
Chris Johns 2021-07-22 11:50:13 +10:00
parent e56b5cb135
commit 1739d74f7d
32 changed files with 43844 additions and 0 deletions

View File

@ -0,0 +1,159 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)dead_vnops.c 8.1 (Berkeley) 6/10/93
* $FreeBSD$
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/poll.h>
#include <sys/vnode.h>
/*
* Prototypes for dead operations on vnodes.
*/
static vop_lookup_t dead_lookup;
static vop_open_t dead_open;
static vop_getwritemount_t dead_getwritemount;
static vop_rename_t dead_rename;
static vop_unset_text_t dead_unset_text;
struct vop_vector dead_vnodeops = {
.vop_default = &default_vnodeops,
.vop_access = VOP_EBADF,
.vop_advlock = VOP_EBADF,
.vop_bmap = VOP_EBADF,
.vop_create = VOP_PANIC,
.vop_getattr = VOP_EBADF,
.vop_getwritemount = dead_getwritemount,
.vop_inactive = VOP_NULL,
.vop_ioctl = VOP_EBADF,
.vop_link = VOP_PANIC,
.vop_lookup = dead_lookup,
.vop_mkdir = VOP_PANIC,
.vop_mknod = VOP_PANIC,
.vop_open = dead_open,
.vop_pathconf = VOP_EBADF, /* per pathconf(2) */
.vop_poll = dead_poll,
.vop_read = dead_read,
.vop_readdir = VOP_EBADF,
.vop_readlink = VOP_EBADF,
.vop_reclaim = VOP_NULL,
.vop_remove = VOP_PANIC,
.vop_rename = dead_rename,
.vop_rmdir = VOP_PANIC,
.vop_setattr = VOP_EBADF,
.vop_symlink = VOP_PANIC,
.vop_vptocnp = VOP_EBADF,
.vop_unset_text = dead_unset_text,
.vop_write = dead_write,
};
static int
dead_getwritemount(struct vop_getwritemount_args *ap)
{
*(ap->a_mpp) = NULL;
return (0);
}
/*
* Trivial lookup routine that always fails.
*/
static int
dead_lookup(struct vop_lookup_args *ap)
{
*ap->a_vpp = NULL;
return (ENOTDIR);
}
/*
* Open always fails as if device did not exist.
*/
static int
dead_open(struct vop_open_args *ap)
{
return (ENXIO);
}
int
dead_read(struct vop_read_args *ap)
{
/*
* Return EOF for tty devices, EIO for others
*/
if ((ap->a_vp->v_vflag & VV_ISTTY) == 0)
return (EIO);
return (0);
}
int
dead_write(struct vop_write_args *ap)
{
return (EIO);
}
int
dead_poll(struct vop_poll_args *ap)
{
if (ap->a_events & ~POLLSTANDARD)
return (POLLNVAL);
/*
* Let the user find out that the descriptor is gone.
*/
return (POLLHUP | ((POLLIN | POLLRDNORM) & ap->a_events));
}
static int
dead_rename(struct vop_rename_args *ap)
{
vop_rename_fail(ap);
return (EXDEV);
}
static int
dead_unset_text(struct vop_unset_text_args *ap)
{
return (0);
}

View File

@ -0,0 +1,491 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2001 Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_pseudofs.h"
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sbuf.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <fs/pseudofs/pseudofs.h>
#include <fs/pseudofs/pseudofs_internal.h>
static MALLOC_DEFINE(M_PFSNODES, "pfs_nodes", "pseudofs nodes");
SYSCTL_NODE(_vfs, OID_AUTO, pfs, CTLFLAG_RW, 0,
"pseudofs");
#ifdef PSEUDOFS_TRACE
int pfs_trace;
SYSCTL_INT(_vfs_pfs, OID_AUTO, trace, CTLFLAG_RW, &pfs_trace, 0,
"enable tracing of pseudofs vnode operations");
#endif
#if PFS_FSNAMELEN != MFSNAMELEN
#error "PFS_FSNAMELEN is not equal to MFSNAMELEN"
#endif
/*
* Allocate and initialize a node
*/
static struct pfs_node *
pfs_alloc_node_flags(struct pfs_info *pi, const char *name, pfs_type_t type, int flags)
{
struct pfs_node *pn;
int malloc_flags;
KASSERT(strlen(name) < PFS_NAMELEN,
("%s(): node name is too long", __func__));
if (flags & PFS_NOWAIT)
malloc_flags = M_NOWAIT | M_ZERO;
else
malloc_flags = M_WAITOK | M_ZERO;
pn = malloc(sizeof *pn, M_PFSNODES, malloc_flags);
if (pn == NULL)
return (NULL);
mtx_init(&pn->pn_mutex, "pfs_node", NULL, MTX_DEF | MTX_DUPOK);
strlcpy(pn->pn_name, name, sizeof pn->pn_name);
pn->pn_type = type;
pn->pn_info = pi;
return (pn);
}
static struct pfs_node *
pfs_alloc_node(struct pfs_info *pi, const char *name, pfs_type_t type)
{
return (pfs_alloc_node_flags(pi, name, type, 0));
}
/*
* Add a node to a directory
*/
static void
pfs_add_node(struct pfs_node *parent, struct pfs_node *pn)
{
#ifdef INVARIANTS
struct pfs_node *iter;
#endif
KASSERT(parent != NULL,
("%s(): parent is NULL", __func__));
KASSERT(pn->pn_parent == NULL,
("%s(): node already has a parent", __func__));
KASSERT(parent->pn_info != NULL,
("%s(): parent has no pn_info", __func__));
KASSERT(parent->pn_type == pfstype_dir ||
parent->pn_type == pfstype_procdir ||
parent->pn_type == pfstype_root,
("%s(): parent is not a directory", __func__));
#ifdef INVARIANTS
/* XXX no locking! */
if (pn->pn_type == pfstype_procdir)
for (iter = parent; iter != NULL; iter = iter->pn_parent)
KASSERT(iter->pn_type != pfstype_procdir,
("%s(): nested process directories", __func__));
for (iter = parent->pn_nodes; iter != NULL; iter = iter->pn_next) {
KASSERT(strcmp(pn->pn_name, iter->pn_name) != 0,
("%s(): homonymous siblings", __func__));
if (pn->pn_type == pfstype_procdir)
KASSERT(iter->pn_type != pfstype_procdir,
("%s(): sibling process directories", __func__));
}
#endif
pn->pn_parent = parent;
pfs_fileno_alloc(pn);
pfs_lock(parent);
pn->pn_next = parent->pn_nodes;
if ((parent->pn_flags & PFS_PROCDEP) != 0)
pn->pn_flags |= PFS_PROCDEP;
parent->pn_nodes = pn;
pfs_unlock(parent);
}
/*
* Detach a node from its aprent
*/
static void
pfs_detach_node(struct pfs_node *pn)
{
struct pfs_node *parent = pn->pn_parent;
struct pfs_node **iter;
KASSERT(parent != NULL, ("%s(): node has no parent", __func__));
KASSERT(parent->pn_info == pn->pn_info,
("%s(): parent has different pn_info", __func__));
pfs_lock(parent);
iter = &parent->pn_nodes;
while (*iter != NULL) {
if (*iter == pn) {
*iter = pn->pn_next;
break;
}
iter = &(*iter)->pn_next;
}
pn->pn_parent = NULL;
pfs_unlock(parent);
}
/*
* Add . and .. to a directory
*/
static int
pfs_fixup_dir_flags(struct pfs_node *parent, int flags)
{
struct pfs_node *dot, *dotdot;
dot = pfs_alloc_node_flags(parent->pn_info, ".", pfstype_this, flags);
if (dot == NULL)
return (ENOMEM);
dotdot = pfs_alloc_node_flags(parent->pn_info, "..", pfstype_parent, flags);
if (dotdot == NULL) {
pfs_destroy(dot);
return (ENOMEM);
}
pfs_add_node(parent, dot);
pfs_add_node(parent, dotdot);
return (0);
}
static void
pfs_fixup_dir(struct pfs_node *parent)
{
pfs_fixup_dir_flags(parent, 0);
}
/*
* Create a directory
*/
struct pfs_node *
pfs_create_dir(struct pfs_node *parent, const char *name,
pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy,
int flags)
{
struct pfs_node *pn;
int rc;
pn = pfs_alloc_node_flags(parent->pn_info, name,
(flags & PFS_PROCDEP) ? pfstype_procdir : pfstype_dir, flags);
if (pn == NULL)
return (NULL);
pn->pn_attr = attr;
pn->pn_vis = vis;
pn->pn_destroy = destroy;
pn->pn_flags = flags;
pfs_add_node(parent, pn);
rc = pfs_fixup_dir_flags(pn, flags);
if (rc) {
pfs_destroy(pn);
return (NULL);
}
return (pn);
}
/*
* Create a file
*/
struct pfs_node *
pfs_create_file(struct pfs_node *parent, const char *name, pfs_fill_t fill,
pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy,
int flags)
{
struct pfs_node *pn;
pn = pfs_alloc_node_flags(parent->pn_info, name, pfstype_file, flags);
if (pn == NULL)
return (NULL);
pn->pn_fill = fill;
pn->pn_attr = attr;
pn->pn_vis = vis;
pn->pn_destroy = destroy;
pn->pn_flags = flags;
pfs_add_node(parent, pn);
return (pn);
}
/*
* Create a symlink
*/
struct pfs_node *
pfs_create_link(struct pfs_node *parent, const char *name, pfs_fill_t fill,
pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy,
int flags)
{
struct pfs_node *pn;
pn = pfs_alloc_node_flags(parent->pn_info, name, pfstype_symlink, flags);
if (pn == NULL)
return (NULL);
pn->pn_fill = fill;
pn->pn_attr = attr;
pn->pn_vis = vis;
pn->pn_destroy = destroy;
pn->pn_flags = flags;
pfs_add_node(parent, pn);
return (pn);
}
/*
* Locate a node by name
*/
struct pfs_node *
pfs_find_node(struct pfs_node *parent, const char *name)
{
struct pfs_node *pn;
pfs_lock(parent);
for (pn = parent->pn_nodes; pn != NULL; pn = pn->pn_next)
if (strcmp(pn->pn_name, name) == 0)
break;
pfs_unlock(parent);
return (pn);
}
/*
* Destroy a node and all its descendants. If the node to be destroyed
* has a parent, the parent's mutex must be held.
*/
int
pfs_destroy(struct pfs_node *pn)
{
struct pfs_node *iter;
KASSERT(pn != NULL,
("%s(): node is NULL", __func__));
KASSERT(pn->pn_info != NULL,
("%s(): node has no pn_info", __func__));
if (pn->pn_parent)
pfs_detach_node(pn);
/* destroy children */
if (pn->pn_type == pfstype_dir ||
pn->pn_type == pfstype_procdir ||
pn->pn_type == pfstype_root) {
pfs_lock(pn);
while (pn->pn_nodes != NULL) {
iter = pn->pn_nodes;
pn->pn_nodes = iter->pn_next;
iter->pn_parent = NULL;
pfs_unlock(pn);
pfs_destroy(iter);
pfs_lock(pn);
}
pfs_unlock(pn);
}
/* revoke vnodes and fileno */
pfs_purge(pn);
/* callback to free any private resources */
if (pn->pn_destroy != NULL)
pn_destroy(pn);
/* destroy the node */
pfs_fileno_free(pn);
mtx_destroy(&pn->pn_mutex);
free(pn, M_PFSNODES);
return (0);
}
/*
* Mount a pseudofs instance
*/
int
pfs_mount(struct pfs_info *pi, struct mount *mp)
{
struct statfs *sbp;
if (mp->mnt_flag & MNT_UPDATE)
return (EOPNOTSUPP);
MNT_ILOCK(mp);
mp->mnt_flag |= MNT_LOCAL;
MNT_IUNLOCK(mp);
mp->mnt_data = pi;
vfs_getnewfsid(mp);
sbp = &mp->mnt_stat;
vfs_mountedfrom(mp, pi->pi_name);
sbp->f_bsize = PAGE_SIZE;
sbp->f_iosize = PAGE_SIZE;
sbp->f_blocks = 1;
sbp->f_bfree = 0;
sbp->f_bavail = 0;
sbp->f_files = 1;
sbp->f_ffree = 0;
return (0);
}
/*
* Compatibility shim for old mount(2) system call
*/
int
pfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
{
int error;
error = kernel_mount(ma, flags);
return (error);
}
/*
* Unmount a pseudofs instance
*/
int
pfs_unmount(struct mount *mp, int mntflags)
{
int error;
error = vflush(mp, 0, (mntflags & MNT_FORCE) ? FORCECLOSE : 0,
curthread);
return (error);
}
/*
* Return a root vnode
*/
int
pfs_root(struct mount *mp, int flags, struct vnode **vpp)
{
struct pfs_info *pi;
pi = (struct pfs_info *)mp->mnt_data;
return (pfs_vncache_alloc(mp, vpp, pi->pi_root, NO_PID));
}
/*
* Return filesystem stats
*/
int
pfs_statfs(struct mount *mp, struct statfs *sbp)
{
/* no-op: always called with mp->mnt_stat */
return (0);
}
/*
* Initialize a pseudofs instance
*/
int
pfs_init(struct pfs_info *pi, struct vfsconf *vfc)
{
struct pfs_node *root;
int error;
pfs_fileno_init(pi);
/* set up the root directory */
root = pfs_alloc_node(pi, "/", pfstype_root);
pi->pi_root = root;
pfs_fileno_alloc(root);
pfs_fixup_dir(root);
/* construct file hierarchy */
error = (pi->pi_init)(pi, vfc);
if (error) {
pfs_destroy(root);
pi->pi_root = NULL;
return (error);
}
if (bootverbose)
printf("%s registered\n", pi->pi_name);
return (0);
}
/*
* Destroy a pseudofs instance
*/
int
pfs_uninit(struct pfs_info *pi, struct vfsconf *vfc)
{
int error;
pfs_destroy(pi->pi_root);
pi->pi_root = NULL;
pfs_fileno_uninit(pi);
if (bootverbose)
printf("%s unregistered\n", pi->pi_name);
error = (pi->pi_uninit)(pi, vfc);
return (error);
}
/*
* Handle load / unload events
*/
static int
pfs_modevent(module_t mod, int evt, void *arg)
{
switch (evt) {
case MOD_LOAD:
pfs_vncache_load();
break;
case MOD_UNLOAD:
case MOD_SHUTDOWN:
pfs_vncache_unload();
break;
default:
return EOPNOTSUPP;
break;
}
return 0;
}
/*
* Module declaration
*/
static moduledata_t pseudofs_data = {
"pseudofs",
pfs_modevent,
NULL
};
DECLARE_MODULE(pseudofs, pseudofs_data, SI_SUB_EXEC, SI_ORDER_FIRST);
MODULE_VERSION(pseudofs, 1);

View File

@ -0,0 +1,312 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2001 Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _PSEUDOFS_H_INCLUDED
#define _PSEUDOFS_H_INCLUDED
#include <sys/jail.h>
/*
* Opaque structures
*/
struct mntarg;
struct mount;
struct nameidata;
struct proc;
struct sbuf;
struct statfs;
struct thread;
struct uio;
struct vfsconf;
struct vnode;
/*
* Limits and constants
*/
#define PFS_NAMELEN 128
#define PFS_FSNAMELEN 16 /* equal to MFSNAMELEN */
#define PFS_DELEN (offsetof(struct dirent, d_name) + PFS_NAMELEN)
typedef enum {
pfstype_none = 0,
pfstype_root,
pfstype_dir,
pfstype_this,
pfstype_parent,
pfstype_file,
pfstype_symlink,
pfstype_procdir
} pfs_type_t;
/*
* Flags
*/
#define PFS_RD 0x0001 /* readable */
#define PFS_WR 0x0002 /* writeable */
#define PFS_RDWR (PFS_RD|PFS_WR)
#define PFS_RAWRD 0x0004 /* raw reader */
#define PFS_RAWWR 0x0008 /* raw writer */
#define PFS_RAW (PFS_RAWRD|PFS_RAWWR)
#define PFS_PROCDEP 0x0010 /* process-dependent */
#define PFS_NOWAIT 0x0020 /* allow malloc to fail */
/*
* Data structures
*/
struct pfs_info;
struct pfs_node;
/*
* Init / uninit callback
*/
#define PFS_INIT_ARGS \
struct pfs_info *pi, struct vfsconf *vfc
#define PFS_INIT_ARGNAMES \
pi, vfc
#define PFS_INIT_PROTO(name) \
int name(PFS_INIT_ARGS);
typedef int (*pfs_init_t)(PFS_INIT_ARGS);
/*
* Filler callback
* Called with proc held but unlocked
*/
#define PFS_FILL_ARGS \
struct thread *td, struct proc *p, struct pfs_node *pn, \
struct sbuf *sb, struct uio *uio
#define PFS_FILL_ARGNAMES \
td, p, pn, sb, uio
#define PFS_FILL_PROTO(name) \
int name(PFS_FILL_ARGS);
typedef int (*pfs_fill_t)(PFS_FILL_ARGS);
/*
* Attribute callback
* Called with proc locked
*/
struct vattr;
#define PFS_ATTR_ARGS \
struct thread *td, struct proc *p, struct pfs_node *pn, \
struct vattr *vap
#define PFS_ATTR_ARGNAMES \
td, p, pn, vap
#define PFS_ATTR_PROTO(name) \
int name(PFS_ATTR_ARGS);
typedef int (*pfs_attr_t)(PFS_ATTR_ARGS);
/*
* Visibility callback
* Called with proc locked
*/
#define PFS_VIS_ARGS \
struct thread *td, struct proc *p, struct pfs_node *pn
#define PFS_VIS_ARGNAMES \
td, p, pn
#define PFS_VIS_PROTO(name) \
int name(PFS_VIS_ARGS);
typedef int (*pfs_vis_t)(PFS_VIS_ARGS);
/*
* Ioctl callback
* Called with proc locked
*/
#define PFS_IOCTL_ARGS \
struct thread *td, struct proc *p, struct pfs_node *pn, \
unsigned long cmd, void *data
#define PFS_IOCTL_ARGNAMES \
td, p, pn, cmd, data
#define PFS_IOCTL_PROTO(name) \
int name(PFS_IOCTL_ARGS);
typedef int (*pfs_ioctl_t)(PFS_IOCTL_ARGS);
/*
* Getextattr callback
* Called with proc locked
*/
#define PFS_GETEXTATTR_ARGS \
struct thread *td, struct proc *p, struct pfs_node *pn, \
int attrnamespace, const char *name, struct uio *uio, \
size_t *size, struct ucred *cred
#define PFS_GETEXTATTR_ARGNAMES \
td, p, pn, attrnamespace, name, uio, size, cred
#define PFS_GETEXTATTR_PROTO(name) \
int name(PFS_GETEXTATTR_ARGS);
struct ucred;
typedef int (*pfs_getextattr_t)(PFS_GETEXTATTR_ARGS);
/*
* Last-close callback
* Called with proc locked
*/
#define PFS_CLOSE_ARGS \
struct thread *td, struct proc *p, struct pfs_node *pn
#define PFS_CLOSE_ARGNAMES \
td, p, pn
#define PFS_CLOSE_PROTO(name) \
int name(PFS_CLOSE_ARGS);
typedef int (*pfs_close_t)(PFS_CLOSE_ARGS);
/*
* Destroy callback
*/
#define PFS_DESTROY_ARGS \
struct pfs_node *pn
#define PFS_DESTROY_ARGNAMES \
pn
#define PFS_DESTROY_PROTO(name) \
int name(PFS_DESTROY_ARGS);
typedef int (*pfs_destroy_t)(PFS_DESTROY_ARGS);
/*
* pfs_info: describes a pseudofs instance
*
* The pi_mutex is only used to avoid using the global subr_unit lock
* for unrhdr. The rest of struct pfs_info is only modified during
* vfs_init() and vfs_uninit() of the consumer filesystem.
*/
struct pfs_info {
char pi_name[PFS_FSNAMELEN];
pfs_init_t pi_init;
pfs_init_t pi_uninit;
/* members below this line are initialized at run time */
struct pfs_node *pi_root;
struct mtx pi_mutex;
struct unrhdr *pi_unrhdr;
};
/*
* pfs_node: describes a node (file or directory) within a pseudofs
*
* - Fields marked (o) are protected by the node's own mutex.
* - Fields marked (p) are protected by the node's parent's mutex.
* - Remaining fields are not protected by any lock and are assumed to be
* immutable once the node has been created.
*
* To prevent deadlocks, if a node's mutex is to be held at the same time
* as its parent's (e.g. when adding or removing nodes to a directory),
* the parent's mutex must always be acquired first. Unfortunately, this
* is not enforcable by WITNESS.
*/
struct pfs_node {
char pn_name[PFS_NAMELEN];
pfs_type_t pn_type;
int pn_flags;
struct mtx pn_mutex;
void *pn_data; /* (o) */
pfs_fill_t pn_fill;
pfs_ioctl_t pn_ioctl;
pfs_close_t pn_close;
pfs_attr_t pn_attr;
pfs_vis_t pn_vis;
pfs_getextattr_t pn_getextattr;
pfs_destroy_t pn_destroy;
struct pfs_info *pn_info;
u_int32_t pn_fileno; /* (o) */
struct pfs_node *pn_parent; /* (o) */
struct pfs_node *pn_nodes; /* (o) */
struct pfs_node *pn_next; /* (p) */
};
/*
* VFS interface
*/
int pfs_mount (struct pfs_info *pi, struct mount *mp);
int pfs_cmount (struct mntarg *ma, void *data, uint64_t flags);
int pfs_unmount (struct mount *mp, int mntflags);
int pfs_root (struct mount *mp, int flags,
struct vnode **vpp);
int pfs_statfs (struct mount *mp, struct statfs *sbp);
int pfs_init (struct pfs_info *pi, struct vfsconf *vfc);
int pfs_uninit (struct pfs_info *pi, struct vfsconf *vfc);
/*
* Directory structure construction and manipulation
*/
struct pfs_node *pfs_create_dir (struct pfs_node *parent, const char *name,
pfs_attr_t attr, pfs_vis_t vis,
pfs_destroy_t destroy, int flags);
struct pfs_node *pfs_create_file(struct pfs_node *parent, const char *name,
pfs_fill_t fill, pfs_attr_t attr,
pfs_vis_t vis, pfs_destroy_t destroy,
int flags);
struct pfs_node *pfs_create_link(struct pfs_node *parent, const char *name,
pfs_fill_t fill, pfs_attr_t attr,
pfs_vis_t vis, pfs_destroy_t destroy,
int flags);
struct pfs_node *pfs_find_node (struct pfs_node *parent, const char *name);
void pfs_purge (struct pfs_node *pn);
int pfs_destroy (struct pfs_node *pn);
/*
* Now for some initialization magic...
*/
#define PSEUDOFS(name, version, flags) \
\
static struct pfs_info name##_info = { \
#name, \
name##_init, \
name##_uninit, \
}; \
\
static int \
_##name##_mount(struct mount *mp) { \
return (pfs_mount(&name##_info, mp)); \
} \
\
static int \
_##name##_init(struct vfsconf *vfc) { \
return (pfs_init(&name##_info, vfc)); \
} \
\
static int \
_##name##_uninit(struct vfsconf *vfc) { \
return (pfs_uninit(&name##_info, vfc)); \
} \
\
static struct vfsops name##_vfsops = { \
.vfs_cmount = pfs_cmount, \
.vfs_init = _##name##_init, \
.vfs_mount = _##name##_mount, \
.vfs_root = pfs_root, \
.vfs_statfs = pfs_statfs, \
.vfs_uninit = _##name##_uninit, \
.vfs_unmount = pfs_unmount, \
}; \
VFS_SET(name##_vfsops, name, VFCF_SYNTHETIC | flags); \
MODULE_VERSION(name, version); \
MODULE_DEPEND(name, pseudofs, 1, 1, 1);
#endif

View File

@ -0,0 +1,159 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2001 Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_pseudofs.h"
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <fs/pseudofs/pseudofs.h>
#include <fs/pseudofs/pseudofs_internal.h>
/*
* Initialize fileno bitmap
*/
void
pfs_fileno_init(struct pfs_info *pi)
{
mtx_init(&pi->pi_mutex, "pfs_fileno", NULL, MTX_DEF);
pi->pi_unrhdr = new_unrhdr(3, INT_MAX / NO_PID, &pi->pi_mutex);
}
/*
* Tear down fileno bitmap
*/
void
pfs_fileno_uninit(struct pfs_info *pi)
{
delete_unrhdr(pi->pi_unrhdr);
pi->pi_unrhdr = NULL;
mtx_destroy(&pi->pi_mutex);
}
/*
* Allocate a file number
*/
void
pfs_fileno_alloc(struct pfs_node *pn)
{
if (pn->pn_parent)
PFS_TRACE(("%s/%s", pn->pn_parent->pn_name, pn->pn_name));
else
PFS_TRACE(("%s", pn->pn_name));
pfs_assert_not_owned(pn);
switch (pn->pn_type) {
case pfstype_root:
/* root must always be 2 */
pn->pn_fileno = 2;
break;
case pfstype_dir:
case pfstype_file:
case pfstype_symlink:
case pfstype_procdir:
pn->pn_fileno = alloc_unr(pn->pn_info->pi_unrhdr);
break;
case pfstype_this:
KASSERT(pn->pn_parent != NULL,
("%s(): pfstype_this node has no parent", __func__));
pn->pn_fileno = pn->pn_parent->pn_fileno;
break;
case pfstype_parent:
KASSERT(pn->pn_parent != NULL,
("%s(): pfstype_parent node has no parent", __func__));
if (pn->pn_parent->pn_type == pfstype_root) {
pn->pn_fileno = pn->pn_parent->pn_fileno;
break;
}
KASSERT(pn->pn_parent->pn_parent != NULL,
("%s(): pfstype_parent node has no grandparent", __func__));
pn->pn_fileno = pn->pn_parent->pn_parent->pn_fileno;
break;
case pfstype_none:
KASSERT(0,
("%s(): pfstype_none node", __func__));
break;
}
#if 0
printf("%s(): %s: ", __func__, pn->pn_info->pi_name);
if (pn->pn_parent) {
if (pn->pn_parent->pn_parent) {
printf("%s/", pn->pn_parent->pn_parent->pn_name);
}
printf("%s/", pn->pn_parent->pn_name);
}
printf("%s -> %d\n", pn->pn_name, pn->pn_fileno);
#endif
}
/*
* Release a file number
*/
void
pfs_fileno_free(struct pfs_node *pn)
{
pfs_assert_not_owned(pn);
switch (pn->pn_type) {
case pfstype_root:
/* not allocated from unrhdr */
return;
case pfstype_dir:
case pfstype_file:
case pfstype_symlink:
case pfstype_procdir:
free_unr(pn->pn_info->pi_unrhdr, pn->pn_fileno);
break;
case pfstype_this:
case pfstype_parent:
/* ignore these, as they don't "own" their file number */
break;
case pfstype_none:
KASSERT(0,
("pfs_fileno_free() called for pfstype_none node"));
break;
}
}

View File

@ -0,0 +1,213 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2001 Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _PSEUDOFS_INTERNAL_H_INCLUDED
#define _PSEUDOFS_INTERNAL_H_INCLUDED
/*
* Sysctl subtree
*/
SYSCTL_DECL(_vfs_pfs);
/*
* Vnode data
*/
struct pfs_vdata {
struct pfs_node *pvd_pn;
pid_t pvd_pid;
struct vnode *pvd_vnode;
struct pfs_vdata*pvd_prev, *pvd_next;
int pvd_dead:1;
};
/*
* Vnode cache
*/
void pfs_vncache_load (void);
void pfs_vncache_unload (void);
int pfs_vncache_alloc (struct mount *, struct vnode **,
struct pfs_node *, pid_t pid);
int pfs_vncache_free (struct vnode *);
/*
* File number bitmap
*/
void pfs_fileno_init (struct pfs_info *);
void pfs_fileno_uninit (struct pfs_info *);
void pfs_fileno_alloc (struct pfs_node *);
void pfs_fileno_free (struct pfs_node *);
/*
* Debugging
*/
#ifdef PSEUDOFS_TRACE
extern int pfs_trace;
#define PFS_TRACE(foo) \
do { \
if (pfs_trace) { \
printf("%s(): line %d: ", __func__, __LINE__); \
printf foo ; \
printf("\n"); \
} \
} while (0)
#define PFS_RETURN(err) \
do { \
if (pfs_trace) { \
printf("%s(): line %d: returning %d\n", \
__func__, __LINE__, err); \
} \
return (err); \
} while (0)
#else
#define PFS_TRACE(foo) \
do { /* nothing */ } while (0)
#define PFS_RETURN(err) \
return (err)
#endif
/*
* Inline helpers for locking
*/
static inline void
pfs_lock(struct pfs_node *pn)
{
mtx_lock(&pn->pn_mutex);
}
static inline void
pfs_unlock(struct pfs_node *pn)
{
mtx_unlock(&pn->pn_mutex);
}
static inline void
pfs_assert_owned(struct pfs_node *pn)
{
mtx_assert(&pn->pn_mutex, MA_OWNED);
}
static inline void
pfs_assert_not_owned(struct pfs_node *pn)
{
mtx_assert(&pn->pn_mutex, MA_NOTOWNED);
}
static inline int
pn_fill(PFS_FILL_ARGS)
{
PFS_TRACE(("%s", pn->pn_name));
KASSERT(pn->pn_fill != NULL, ("%s(): no callback", __func__));
if (p != NULL) {
PROC_LOCK_ASSERT(p, MA_NOTOWNED);
PROC_ASSERT_HELD(p);
}
pfs_assert_not_owned(pn);
return ((pn->pn_fill)(PFS_FILL_ARGNAMES));
}
static inline int
pn_attr(PFS_ATTR_ARGS)
{
PFS_TRACE(("%s", pn->pn_name));
KASSERT(pn->pn_attr != NULL, ("%s(): no callback", __func__));
if (p != NULL)
PROC_LOCK_ASSERT(p, MA_OWNED);
pfs_assert_not_owned(pn);
return ((pn->pn_attr)(PFS_ATTR_ARGNAMES));
}
static inline int
pn_vis(PFS_VIS_ARGS)
{
PFS_TRACE(("%s", pn->pn_name));
KASSERT(pn->pn_vis != NULL, ("%s(): no callback", __func__));
KASSERT(p != NULL, ("%s(): no process", __func__));
PROC_LOCK_ASSERT(p, MA_OWNED);
pfs_assert_not_owned(pn);
return ((pn->pn_vis)(PFS_VIS_ARGNAMES));
}
static inline int
pn_ioctl(PFS_IOCTL_ARGS)
{
PFS_TRACE(("%s", pn->pn_name));
KASSERT(pn->pn_ioctl != NULL, ("%s(): no callback", __func__));
if (p != NULL)
PROC_LOCK_ASSERT(p, MA_OWNED);
pfs_assert_not_owned(pn);
return ((pn->pn_ioctl)(PFS_IOCTL_ARGNAMES));
}
static inline int
pn_getextattr(PFS_GETEXTATTR_ARGS)
{
PFS_TRACE(("%s", pn->pn_name));
KASSERT(pn->pn_getextattr != NULL, ("%s(): no callback", __func__));
if (p != NULL)
PROC_LOCK_ASSERT(p, MA_OWNED);
pfs_assert_not_owned(pn);
return ((pn->pn_getextattr)(PFS_GETEXTATTR_ARGNAMES));
}
static inline int
pn_close(PFS_CLOSE_ARGS)
{
PFS_TRACE(("%s", pn->pn_name));
KASSERT(pn->pn_close != NULL, ("%s(): no callback", __func__));
if (p != NULL)
PROC_LOCK_ASSERT(p, MA_OWNED);
pfs_assert_not_owned(pn);
return ((pn->pn_close)(PFS_CLOSE_ARGNAMES));
}
static inline int
pn_destroy(PFS_DESTROY_ARGS)
{
PFS_TRACE(("%s", pn->pn_name));
KASSERT(pn->pn_destroy != NULL, ("%s(): no callback", __func__));
pfs_assert_not_owned(pn);
return ((pn->pn_destroy)(PFS_DESTROY_ARGNAMES));
}
#endif

View File

@ -0,0 +1,333 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2001 Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_pseudofs.h"
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/eventhandler.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <fs/pseudofs/pseudofs.h>
#include <fs/pseudofs/pseudofs_internal.h>
static MALLOC_DEFINE(M_PFSVNCACHE, "pfs_vncache", "pseudofs vnode cache");
static struct mtx pfs_vncache_mutex;
static struct pfs_vdata *pfs_vncache;
static eventhandler_tag pfs_exit_tag;
static void pfs_exit(void *arg, struct proc *p);
static void pfs_purge_locked(struct pfs_node *pn, bool force);
static SYSCTL_NODE(_vfs_pfs, OID_AUTO, vncache, CTLFLAG_RW, 0,
"pseudofs vnode cache");
static int pfs_vncache_entries;
SYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, entries, CTLFLAG_RD,
&pfs_vncache_entries, 0,
"number of entries in the vnode cache");
static int pfs_vncache_maxentries;
SYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, maxentries, CTLFLAG_RD,
&pfs_vncache_maxentries, 0,
"highest number of entries in the vnode cache");
static int pfs_vncache_hits;
SYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, hits, CTLFLAG_RD,
&pfs_vncache_hits, 0,
"number of cache hits since initialization");
static int pfs_vncache_misses;
SYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, misses, CTLFLAG_RD,
&pfs_vncache_misses, 0,
"number of cache misses since initialization");
extern struct vop_vector pfs_vnodeops; /* XXX -> .h file */
/*
* Initialize vnode cache
*/
void
pfs_vncache_load(void)
{
mtx_init(&pfs_vncache_mutex, "pfs_vncache", NULL, MTX_DEF);
pfs_exit_tag = EVENTHANDLER_REGISTER(process_exit, pfs_exit, NULL,
EVENTHANDLER_PRI_ANY);
}
/*
* Tear down vnode cache
*/
void
pfs_vncache_unload(void)
{
EVENTHANDLER_DEREGISTER(process_exit, pfs_exit_tag);
mtx_lock(&pfs_vncache_mutex);
pfs_purge_locked(NULL, true);
mtx_unlock(&pfs_vncache_mutex);
KASSERT(pfs_vncache_entries == 0,
("%d vncache entries remaining", pfs_vncache_entries));
mtx_destroy(&pfs_vncache_mutex);
}
/*
* Allocate a vnode
*/
int
pfs_vncache_alloc(struct mount *mp, struct vnode **vpp,
struct pfs_node *pn, pid_t pid)
{
struct pfs_vdata *pvd, *pvd2;
struct vnode *vp;
int error;
/*
* See if the vnode is in the cache.
* XXX linear search is not very efficient.
*/
retry:
mtx_lock(&pfs_vncache_mutex);
for (pvd = pfs_vncache; pvd; pvd = pvd->pvd_next) {
if (pvd->pvd_pn == pn && pvd->pvd_pid == pid &&
pvd->pvd_vnode->v_mount == mp) {
vp = pvd->pvd_vnode;
VI_LOCK(vp);
mtx_unlock(&pfs_vncache_mutex);
if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, curthread) == 0) {
++pfs_vncache_hits;
*vpp = vp;
/*
* Some callers cache_enter(vp) later, so
* we have to make sure it's not in the
* VFS cache so it doesn't get entered
* twice. A better solution would be to
* make pfs_vncache_alloc() responsible
* for entering the vnode in the VFS
* cache.
*/
cache_purge(vp);
return (0);
}
goto retry;
}
}
mtx_unlock(&pfs_vncache_mutex);
/* nope, get a new one */
pvd = malloc(sizeof *pvd, M_PFSVNCACHE, M_WAITOK);
pvd->pvd_next = pvd->pvd_prev = NULL;
error = getnewvnode("pseudofs", mp, &pfs_vnodeops, vpp);
if (error) {
free(pvd, M_PFSVNCACHE);
return (error);
}
pvd->pvd_pn = pn;
pvd->pvd_pid = pid;
(*vpp)->v_data = pvd;
switch (pn->pn_type) {
case pfstype_root:
(*vpp)->v_vflag = VV_ROOT;
#if 0
printf("root vnode allocated\n");
#endif
/* fall through */
case pfstype_dir:
case pfstype_this:
case pfstype_parent:
case pfstype_procdir:
(*vpp)->v_type = VDIR;
break;
case pfstype_file:
(*vpp)->v_type = VREG;
break;
case pfstype_symlink:
(*vpp)->v_type = VLNK;
break;
case pfstype_none:
KASSERT(0, ("pfs_vncache_alloc called for null node\n"));
default:
panic("%s has unexpected type: %d", pn->pn_name, pn->pn_type);
}
/*
* Propagate flag through to vnode so users know it can change
* if the process changes (i.e. execve)
*/
if ((pn->pn_flags & PFS_PROCDEP) != 0)
(*vpp)->v_vflag |= VV_PROCDEP;
pvd->pvd_vnode = *vpp;
vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
VN_LOCK_AREC(*vpp);
error = insmntque(*vpp, mp);
if (error != 0) {
free(pvd, M_PFSVNCACHE);
*vpp = NULLVP;
return (error);
}
retry2:
mtx_lock(&pfs_vncache_mutex);
/*
* Other thread may race with us, creating the entry we are
* going to insert into the cache. Recheck after
* pfs_vncache_mutex is reacquired.
*/
for (pvd2 = pfs_vncache; pvd2; pvd2 = pvd2->pvd_next) {
if (pvd2->pvd_pn == pn && pvd2->pvd_pid == pid &&
pvd2->pvd_vnode->v_mount == mp) {
vp = pvd2->pvd_vnode;
VI_LOCK(vp);
mtx_unlock(&pfs_vncache_mutex);
if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, curthread) == 0) {
++pfs_vncache_hits;
vgone(*vpp);
vput(*vpp);
*vpp = vp;
cache_purge(vp);
return (0);
}
goto retry2;
}
}
++pfs_vncache_misses;
if (++pfs_vncache_entries > pfs_vncache_maxentries)
pfs_vncache_maxentries = pfs_vncache_entries;
pvd->pvd_prev = NULL;
pvd->pvd_next = pfs_vncache;
if (pvd->pvd_next)
pvd->pvd_next->pvd_prev = pvd;
pfs_vncache = pvd;
mtx_unlock(&pfs_vncache_mutex);
return (0);
}
/*
* Free a vnode
*/
int
pfs_vncache_free(struct vnode *vp)
{
struct pfs_vdata *pvd;
mtx_lock(&pfs_vncache_mutex);
pvd = (struct pfs_vdata *)vp->v_data;
KASSERT(pvd != NULL, ("pfs_vncache_free(): no vnode data\n"));
if (pvd->pvd_next)
pvd->pvd_next->pvd_prev = pvd->pvd_prev;
if (pvd->pvd_prev) {
pvd->pvd_prev->pvd_next = pvd->pvd_next;
--pfs_vncache_entries;
} else if (pfs_vncache == pvd) {
pfs_vncache = pvd->pvd_next;
--pfs_vncache_entries;
}
mtx_unlock(&pfs_vncache_mutex);
free(pvd, M_PFSVNCACHE);
vp->v_data = NULL;
return (0);
}
/*
* Purge the cache of dead entries
*
* This is extremely inefficient due to the fact that vgone() not only
* indirectly modifies the vnode cache, but may also sleep. We can
* neither hold pfs_vncache_mutex across a vgone() call, nor make any
* assumptions about the state of the cache after vgone() returns. In
* consequence, we must start over after every vgone() call, and keep
* trying until we manage to traverse the entire cache.
*
* The only way to improve this situation is to change the data structure
* used to implement the cache.
*/
static void
pfs_purge_locked(struct pfs_node *pn, bool force)
{
struct pfs_vdata *pvd;
struct vnode *vnp;
mtx_assert(&pfs_vncache_mutex, MA_OWNED);
pvd = pfs_vncache;
while (pvd != NULL) {
if (force || pvd->pvd_dead ||
(pn != NULL && pvd->pvd_pn == pn)) {
vnp = pvd->pvd_vnode;
vhold(vnp);
mtx_unlock(&pfs_vncache_mutex);
VOP_LOCK(vnp, LK_EXCLUSIVE);
vgone(vnp);
VOP_UNLOCK(vnp, 0);
mtx_lock(&pfs_vncache_mutex);
vdrop(vnp);
pvd = pfs_vncache;
} else {
pvd = pvd->pvd_next;
}
}
}
void
pfs_purge(struct pfs_node *pn)
{
mtx_lock(&pfs_vncache_mutex);
pfs_purge_locked(pn, false);
mtx_unlock(&pfs_vncache_mutex);
}
/*
* Free all vnodes associated with a defunct process
*/
static void
pfs_exit(void *arg, struct proc *p)
{
struct pfs_vdata *pvd;
int dead;
if (pfs_vncache == NULL)
return;
mtx_lock(&pfs_vncache_mutex);
for (pvd = pfs_vncache, dead = 0; pvd != NULL; pvd = pvd->pvd_next)
if (pvd->pvd_pid == p->p_pid)
dead = pvd->pvd_dead = 1;
if (dead)
pfs_purge_locked(NULL, false);
mtx_unlock(&pfs_vncache_mutex);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

1719
freebsd/sys/kern/kern_lock.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,695 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2013 EMC Corp.
* Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
* Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
/*
* Path-compressed radix trie implementation.
*
* The implementation takes into account the following rationale:
* - Size of the nodes should be as small as possible but still big enough
* to avoid a large maximum depth for the trie. This is a balance
* between the necessity to not wire too much physical memory for the nodes
* and the necessity to avoid too much cache pollution during the trie
* operations.
* - There is not a huge bias toward the number of lookup operations over
* the number of insert and remove operations. This basically implies
* that optimizations supposedly helping one operation but hurting the
* other might be carefully evaluated.
* - On average not many nodes are expected to be fully populated, hence
* level compression may just complicate things.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_ddb.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/pctrie.h>
#ifdef DDB
#include <ddb/ddb.h>
#endif
#define PCTRIE_MASK (PCTRIE_COUNT - 1)
#define PCTRIE_LIMIT (howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) - 1)
/* Flag bits stored in node pointers. */
#define PCTRIE_ISLEAF 0x1
#define PCTRIE_FLAGS 0x1
#define PCTRIE_PAD PCTRIE_FLAGS
/* Returns one unit associated with specified level. */
#define PCTRIE_UNITLEVEL(lev) \
((uint64_t)1 << ((lev) * PCTRIE_WIDTH))
struct pctrie_node {
uint64_t pn_owner; /* Owner of record. */
uint16_t pn_count; /* Valid children. */
uint16_t pn_clev; /* Current level. */
void *pn_child[PCTRIE_COUNT]; /* Child nodes. */
};
/*
* Allocate a node. Pre-allocation should ensure that the request
* will always be satisfied.
*/
static __inline struct pctrie_node *
pctrie_node_get(struct pctrie *ptree, pctrie_alloc_t allocfn, uint64_t owner,
uint16_t count, uint16_t clevel)
{
struct pctrie_node *node;
node = allocfn(ptree);
if (node == NULL)
return (NULL);
node->pn_owner = owner;
node->pn_count = count;
node->pn_clev = clevel;
return (node);
}
/*
* Free radix node.
*/
static __inline void
pctrie_node_put(struct pctrie *ptree, struct pctrie_node *node,
pctrie_free_t freefn)
{
#ifdef INVARIANTS
int slot;
KASSERT(node->pn_count == 0,
("pctrie_node_put: node %p has %d children", node,
node->pn_count));
for (slot = 0; slot < PCTRIE_COUNT; slot++)
KASSERT(node->pn_child[slot] == NULL,
("pctrie_node_put: node %p has a child", node));
#endif
freefn(ptree, node);
}
/*
* Return the position in the array for a given level.
*/
static __inline int
pctrie_slot(uint64_t index, uint16_t level)
{
return ((index >> (level * PCTRIE_WIDTH)) & PCTRIE_MASK);
}
/* Trims the key after the specified level. */
static __inline uint64_t
pctrie_trimkey(uint64_t index, uint16_t level)
{
uint64_t ret;
ret = index;
if (level > 0) {
ret >>= level * PCTRIE_WIDTH;
ret <<= level * PCTRIE_WIDTH;
}
return (ret);
}
/*
* Get the root node for a tree.
*/
static __inline struct pctrie_node *
pctrie_getroot(struct pctrie *ptree)
{
return ((struct pctrie_node *)ptree->pt_root);
}
/*
* Set the root node for a tree.
*/
static __inline void
pctrie_setroot(struct pctrie *ptree, struct pctrie_node *node)
{
ptree->pt_root = (uintptr_t)node;
}
/*
* Returns TRUE if the specified node is a leaf and FALSE otherwise.
*/
static __inline boolean_t
pctrie_isleaf(struct pctrie_node *node)
{
return (((uintptr_t)node & PCTRIE_ISLEAF) != 0);
}
/*
* Returns the associated val extracted from node.
*/
static __inline uint64_t *
pctrie_toval(struct pctrie_node *node)
{
return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS));
}
/*
* Adds the val as a child of the provided node.
*/
static __inline void
pctrie_addval(struct pctrie_node *node, uint64_t index, uint16_t clev,
uint64_t *val)
{
int slot;
slot = pctrie_slot(index, clev);
node->pn_child[slot] = (void *)((uintptr_t)val | PCTRIE_ISLEAF);
}
/*
* Returns the slot where two keys differ.
* It cannot accept 2 equal keys.
*/
static __inline uint16_t
pctrie_keydiff(uint64_t index1, uint64_t index2)
{
uint16_t clev;
KASSERT(index1 != index2, ("%s: passing the same key value %jx",
__func__, (uintmax_t)index1));
index1 ^= index2;
for (clev = PCTRIE_LIMIT;; clev--)
if (pctrie_slot(index1, clev) != 0)
return (clev);
}
/*
* Returns TRUE if it can be determined that key does not belong to the
* specified node. Otherwise, returns FALSE.
*/
static __inline boolean_t
pctrie_keybarr(struct pctrie_node *node, uint64_t idx)
{
if (node->pn_clev < PCTRIE_LIMIT) {
idx = pctrie_trimkey(idx, node->pn_clev + 1);
return (idx != node->pn_owner);
}
return (FALSE);
}
/*
* Internal helper for pctrie_reclaim_allnodes().
* This function is recursive.
*/
static void
pctrie_reclaim_allnodes_int(struct pctrie *ptree, struct pctrie_node *node,
pctrie_free_t freefn)
{
int slot;
KASSERT(node->pn_count <= PCTRIE_COUNT,
("pctrie_reclaim_allnodes_int: bad count in node %p", node));
for (slot = 0; node->pn_count != 0; slot++) {
if (node->pn_child[slot] == NULL)
continue;
if (!pctrie_isleaf(node->pn_child[slot]))
pctrie_reclaim_allnodes_int(ptree,
node->pn_child[slot], freefn);
node->pn_child[slot] = NULL;
node->pn_count--;
}
pctrie_node_put(ptree, node, freefn);
}
/*
* pctrie node zone initializer.
*/
int
pctrie_zone_init(void *mem, int size __unused, int flags __unused)
{
struct pctrie_node *node;
node = mem;
memset(node->pn_child, 0, sizeof(node->pn_child));
return (0);
}
size_t
pctrie_node_size(void)
{
return (sizeof(struct pctrie_node));
}
/*
* Inserts the key-value pair into the trie.
* Panics if the key already exists.
*/
int
pctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn)
{
uint64_t index, newind;
void **parentp;
struct pctrie_node *node, *tmp;
uint64_t *m;
int slot;
uint16_t clev;
index = *val;
/*
* The owner of record for root is not really important because it
* will never be used.
*/
node = pctrie_getroot(ptree);
if (node == NULL) {
ptree->pt_root = (uintptr_t)val | PCTRIE_ISLEAF;
return (0);
}
parentp = (void **)&ptree->pt_root;
for (;;) {
if (pctrie_isleaf(node)) {
m = pctrie_toval(node);
if (*m == index)
panic("%s: key %jx is already present",
__func__, (uintmax_t)index);
clev = pctrie_keydiff(*m, index);
tmp = pctrie_node_get(ptree, allocfn,
pctrie_trimkey(index, clev + 1), 2, clev);
if (tmp == NULL)
return (ENOMEM);
*parentp = tmp;
pctrie_addval(tmp, index, clev, val);
pctrie_addval(tmp, *m, clev, m);
return (0);
} else if (pctrie_keybarr(node, index))
break;
slot = pctrie_slot(index, node->pn_clev);
if (node->pn_child[slot] == NULL) {
node->pn_count++;
pctrie_addval(node, index, node->pn_clev, val);
return (0);
}
parentp = &node->pn_child[slot];
node = node->pn_child[slot];
}
/*
* A new node is needed because the right insertion level is reached.
* Setup the new intermediate node and add the 2 children: the
* new object and the older edge.
*/
newind = node->pn_owner;
clev = pctrie_keydiff(newind, index);
tmp = pctrie_node_get(ptree, allocfn,
pctrie_trimkey(index, clev + 1), 2, clev);
if (tmp == NULL)
return (ENOMEM);
*parentp = tmp;
pctrie_addval(tmp, index, clev, val);
slot = pctrie_slot(newind, clev);
tmp->pn_child[slot] = node;
return (0);
}
/*
* Returns the value stored at the index. If the index is not present,
* NULL is returned.
*/
uint64_t *
pctrie_lookup(struct pctrie *ptree, uint64_t index)
{
struct pctrie_node *node;
uint64_t *m;
int slot;
node = pctrie_getroot(ptree);
while (node != NULL) {
if (pctrie_isleaf(node)) {
m = pctrie_toval(node);
if (*m == index)
return (m);
else
break;
} else if (pctrie_keybarr(node, index))
break;
slot = pctrie_slot(index, node->pn_clev);
node = node->pn_child[slot];
}
return (NULL);
}
/*
* Look up the nearest entry at a position bigger than or equal to index.
*/
uint64_t *
pctrie_lookup_ge(struct pctrie *ptree, uint64_t index)
{
struct pctrie_node *stack[PCTRIE_LIMIT];
uint64_t inc;
uint64_t *m;
struct pctrie_node *child, *node;
#ifdef INVARIANTS
int loops = 0;
#endif
int slot, tos;
node = pctrie_getroot(ptree);
if (node == NULL)
return (NULL);
else if (pctrie_isleaf(node)) {
m = pctrie_toval(node);
if (*m >= index)
return (m);
else
return (NULL);
}
tos = 0;
for (;;) {
/*
* If the keys differ before the current bisection node,
* then the search key might rollback to the earliest
* available bisection node or to the smallest key
* in the current node (if the owner is bigger than the
* search key).
*/
if (pctrie_keybarr(node, index)) {
if (index > node->pn_owner) {
ascend:
KASSERT(++loops < 1000,
("pctrie_lookup_ge: too many loops"));
/*
* Pop nodes from the stack until either the
* stack is empty or a node that could have a
* matching descendant is found.
*/
do {
if (tos == 0)
return (NULL);
node = stack[--tos];
} while (pctrie_slot(index,
node->pn_clev) == (PCTRIE_COUNT - 1));
/*
* The following computation cannot overflow
* because index's slot at the current level
* is less than PCTRIE_COUNT - 1.
*/
index = pctrie_trimkey(index,
node->pn_clev);
index += PCTRIE_UNITLEVEL(node->pn_clev);
} else
index = node->pn_owner;
KASSERT(!pctrie_keybarr(node, index),
("pctrie_lookup_ge: keybarr failed"));
}
slot = pctrie_slot(index, node->pn_clev);
child = node->pn_child[slot];
if (pctrie_isleaf(child)) {
m = pctrie_toval(child);
if (*m >= index)
return (m);
} else if (child != NULL)
goto descend;
/*
* Look for an available edge or val within the current
* bisection node.
*/
if (slot < (PCTRIE_COUNT - 1)) {
inc = PCTRIE_UNITLEVEL(node->pn_clev);
index = pctrie_trimkey(index, node->pn_clev);
do {
index += inc;
slot++;
child = node->pn_child[slot];
if (pctrie_isleaf(child)) {
m = pctrie_toval(child);
if (*m >= index)
return (m);
} else if (child != NULL)
goto descend;
} while (slot < (PCTRIE_COUNT - 1));
}
KASSERT(child == NULL || pctrie_isleaf(child),
("pctrie_lookup_ge: child is radix node"));
/*
* If a value or edge bigger than the search slot is not found
* in the current node, ascend to the next higher-level node.
*/
goto ascend;
descend:
KASSERT(node->pn_clev > 0,
("pctrie_lookup_ge: pushing leaf's parent"));
KASSERT(tos < PCTRIE_LIMIT,
("pctrie_lookup_ge: stack overflow"));
stack[tos++] = node;
node = child;
}
}
/*
* Look up the nearest entry at a position less than or equal to index.
*/
uint64_t *
pctrie_lookup_le(struct pctrie *ptree, uint64_t index)
{
struct pctrie_node *stack[PCTRIE_LIMIT];
uint64_t inc;
uint64_t *m;
struct pctrie_node *child, *node;
#ifdef INVARIANTS
int loops = 0;
#endif
int slot, tos;
node = pctrie_getroot(ptree);
if (node == NULL)
return (NULL);
else if (pctrie_isleaf(node)) {
m = pctrie_toval(node);
if (*m <= index)
return (m);
else
return (NULL);
}
tos = 0;
for (;;) {
/*
* If the keys differ before the current bisection node,
* then the search key might rollback to the earliest
* available bisection node or to the largest key
* in the current node (if the owner is smaller than the
* search key).
*/
if (pctrie_keybarr(node, index)) {
if (index > node->pn_owner) {
index = node->pn_owner + PCTRIE_COUNT *
PCTRIE_UNITLEVEL(node->pn_clev);
} else {
ascend:
KASSERT(++loops < 1000,
("pctrie_lookup_le: too many loops"));
/*
* Pop nodes from the stack until either the
* stack is empty or a node that could have a
* matching descendant is found.
*/
do {
if (tos == 0)
return (NULL);
node = stack[--tos];
} while (pctrie_slot(index,
node->pn_clev) == 0);
/*
* The following computation cannot overflow
* because index's slot at the current level
* is greater than 0.
*/
index = pctrie_trimkey(index,
node->pn_clev);
}
index--;
KASSERT(!pctrie_keybarr(node, index),
("pctrie_lookup_le: keybarr failed"));
}
slot = pctrie_slot(index, node->pn_clev);
child = node->pn_child[slot];
if (pctrie_isleaf(child)) {
m = pctrie_toval(child);
if (*m <= index)
return (m);
} else if (child != NULL)
goto descend;
/*
* Look for an available edge or value within the current
* bisection node.
*/
if (slot > 0) {
inc = PCTRIE_UNITLEVEL(node->pn_clev);
index |= inc - 1;
do {
index -= inc;
slot--;
child = node->pn_child[slot];
if (pctrie_isleaf(child)) {
m = pctrie_toval(child);
if (*m <= index)
return (m);
} else if (child != NULL)
goto descend;
} while (slot > 0);
}
KASSERT(child == NULL || pctrie_isleaf(child),
("pctrie_lookup_le: child is radix node"));
/*
* If a value or edge smaller than the search slot is not found
* in the current node, ascend to the next higher-level node.
*/
goto ascend;
descend:
KASSERT(node->pn_clev > 0,
("pctrie_lookup_le: pushing leaf's parent"));
KASSERT(tos < PCTRIE_LIMIT,
("pctrie_lookup_le: stack overflow"));
stack[tos++] = node;
node = child;
}
}
/*
* Remove the specified index from the tree.
* Panics if the key is not present.
*/
void
pctrie_remove(struct pctrie *ptree, uint64_t index, pctrie_free_t freefn)
{
struct pctrie_node *node, *parent;
uint64_t *m;
int i, slot;
node = pctrie_getroot(ptree);
if (pctrie_isleaf(node)) {
m = pctrie_toval(node);
if (*m != index)
panic("%s: invalid key found", __func__);
pctrie_setroot(ptree, NULL);
return;
}
parent = NULL;
for (;;) {
if (node == NULL)
panic("pctrie_remove: impossible to locate the key");
slot = pctrie_slot(index, node->pn_clev);
if (pctrie_isleaf(node->pn_child[slot])) {
m = pctrie_toval(node->pn_child[slot]);
if (*m != index)
panic("%s: invalid key found", __func__);
node->pn_child[slot] = NULL;
node->pn_count--;
if (node->pn_count > 1)
break;
for (i = 0; i < PCTRIE_COUNT; i++)
if (node->pn_child[i] != NULL)
break;
KASSERT(i != PCTRIE_COUNT,
("%s: invalid node configuration", __func__));
if (parent == NULL)
pctrie_setroot(ptree, node->pn_child[i]);
else {
slot = pctrie_slot(index, parent->pn_clev);
KASSERT(parent->pn_child[slot] == node,
("%s: invalid child value", __func__));
parent->pn_child[slot] = node->pn_child[i];
}
node->pn_count--;
node->pn_child[i] = NULL;
pctrie_node_put(ptree, node, freefn);
break;
}
parent = node;
node = node->pn_child[slot];
}
}
/*
* Remove and free all the nodes from the tree.
* This function is recursive but there is a tight control on it as the
* maximum depth of the tree is fixed.
*/
void
pctrie_reclaim_allnodes(struct pctrie *ptree, pctrie_free_t freefn)
{
struct pctrie_node *root;
root = pctrie_getroot(ptree);
if (root == NULL)
return;
pctrie_setroot(ptree, NULL);
if (!pctrie_isleaf(root))
pctrie_reclaim_allnodes_int(ptree, root, freefn);
}
#ifdef DDB
/*
* Show details about the given node.
*/
DB_SHOW_COMMAND(pctrienode, db_show_pctrienode)
{
struct pctrie_node *node;
int i;
if (!have_addr)
return;
node = (struct pctrie_node *)addr;
db_printf("node %p, owner %jx, children count %u, level %u:\n",
(void *)node, (uintmax_t)node->pn_owner, node->pn_count,
node->pn_clev);
for (i = 0; i < PCTRIE_COUNT; i++)
if (node->pn_child[i] != NULL)
db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
i, (void *)node->pn_child[i],
pctrie_isleaf(node->pn_child[i]) ?
pctrie_toval(node->pn_child[i]) : NULL,
node->pn_clev);
}
#endif /* DDB */

600
freebsd/sys/kern/vfs_acl.c Normal file
View File

@ -0,0 +1,600 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 1999-2006, 2016-2017 Robert N. M. Watson
* All rights reserved.
*
* This software was developed by Robert Watson for the TrustedBSD Project.
*
* Portions of this software were developed by BAE Systems, the University of
* Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL
* contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent
* Computing (TC) research program.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Developed by the TrustedBSD Project.
*
* ACL system calls and other functions common across different ACL types.
* Type-specific routines go into subr_acl_<type>.c.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysproto.h>
#include <sys/capsicum.h>
#include <sys/fcntl.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/namei.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/proc.h>
#include <sys/sysent.h>
#include <sys/acl.h>
#include <security/audit/audit.h>
#include <security/mac/mac_framework.h>
CTASSERT(ACL_MAX_ENTRIES >= OLDACL_MAX_ENTRIES);
MALLOC_DEFINE(M_ACL, "acl", "Access Control Lists");
static int kern___acl_aclcheck_path(struct thread *td, const char *path,
acl_type_t type, struct acl *aclp, int follow);
static int kern___acl_delete_path(struct thread *td, const char *path,
acl_type_t type, int follow);
static int kern___acl_get_path(struct thread *td, const char *path,
acl_type_t type, struct acl *aclp, int follow);
static int kern___acl_set_path(struct thread *td, const char *path,
acl_type_t type, const struct acl *aclp, int follow);
static int vacl_set_acl(struct thread *td, struct vnode *vp,
acl_type_t type, const struct acl *aclp);
static int vacl_get_acl(struct thread *td, struct vnode *vp,
acl_type_t type, struct acl *aclp);
static int vacl_aclcheck(struct thread *td, struct vnode *vp,
acl_type_t type, const struct acl *aclp);
int
acl_copy_oldacl_into_acl(const struct oldacl *source, struct acl *dest)
{
int i;
if (source->acl_cnt < 0 || source->acl_cnt > OLDACL_MAX_ENTRIES)
return (EINVAL);
bzero(dest, sizeof(*dest));
dest->acl_cnt = source->acl_cnt;
dest->acl_maxcnt = ACL_MAX_ENTRIES;
for (i = 0; i < dest->acl_cnt; i++) {
dest->acl_entry[i].ae_tag = source->acl_entry[i].ae_tag;
dest->acl_entry[i].ae_id = source->acl_entry[i].ae_id;
dest->acl_entry[i].ae_perm = source->acl_entry[i].ae_perm;
}
return (0);
}
int
acl_copy_acl_into_oldacl(const struct acl *source, struct oldacl *dest)
{
int i;
if (source->acl_cnt > OLDACL_MAX_ENTRIES)
return (EINVAL);
bzero(dest, sizeof(*dest));
dest->acl_cnt = source->acl_cnt;
for (i = 0; i < dest->acl_cnt; i++) {
dest->acl_entry[i].ae_tag = source->acl_entry[i].ae_tag;
dest->acl_entry[i].ae_id = source->acl_entry[i].ae_id;
dest->acl_entry[i].ae_perm = source->acl_entry[i].ae_perm;
}
return (0);
}
/*
* At one time, "struct ACL" was extended in order to add support for NFSv4
* ACLs. Instead of creating compatibility versions of all the ACL-related
* syscalls, they were left intact. It's possible to find out what the code
* calling these syscalls (libc) expects basing on "type" argument - if it's
* either ACL_TYPE_ACCESS_OLD or ACL_TYPE_DEFAULT_OLD (which previously were
* known as ACL_TYPE_ACCESS and ACL_TYPE_DEFAULT), then it's the "struct
* oldacl". If it's something else, then it's the new "struct acl". In the
* latter case, the routines below just copyin/copyout the contents. In the
* former case, they copyin the "struct oldacl" and convert it to the new
* format.
*/
static int
acl_copyin(const void *user_acl, struct acl *kernel_acl, acl_type_t type)
{
int error;
struct oldacl old;
switch (type) {
case ACL_TYPE_ACCESS_OLD:
case ACL_TYPE_DEFAULT_OLD:
error = copyin(user_acl, &old, sizeof(old));
if (error != 0)
break;
acl_copy_oldacl_into_acl(&old, kernel_acl);
break;
default:
error = copyin(user_acl, kernel_acl, sizeof(*kernel_acl));
if (kernel_acl->acl_maxcnt != ACL_MAX_ENTRIES)
return (EINVAL);
}
return (error);
}
static int
acl_copyout(const struct acl *kernel_acl, void *user_acl, acl_type_t type)
{
uint32_t am;
int error;
struct oldacl old;
switch (type) {
case ACL_TYPE_ACCESS_OLD:
case ACL_TYPE_DEFAULT_OLD:
error = acl_copy_acl_into_oldacl(kernel_acl, &old);
if (error != 0)
break;
error = copyout(&old, user_acl, sizeof(old));
break;
default:
error = fueword32((char *)user_acl +
offsetof(struct acl, acl_maxcnt), &am);
if (error == -1)
return (EFAULT);
if (am != ACL_MAX_ENTRIES)
return (EINVAL);
error = copyout(kernel_acl, user_acl, sizeof(*kernel_acl));
}
return (error);
}
/*
* Convert "old" type - ACL_TYPE_{ACCESS,DEFAULT}_OLD - into its "new"
* counterpart. It's required for old (pre-NFSv4 ACLs) libc to work
* with new kernel. Fixing 'type' for old binaries with new libc
* is being done in lib/libc/posix1e/acl_support.c:_acl_type_unold().
*/
static int
acl_type_unold(int type)
{
switch (type) {
case ACL_TYPE_ACCESS_OLD:
return (ACL_TYPE_ACCESS);
case ACL_TYPE_DEFAULT_OLD:
return (ACL_TYPE_DEFAULT);
default:
return (type);
}
}
/*
* These calls wrap the real vnode operations, and are called by the syscall
* code once the syscall has converted the path or file descriptor to a vnode
* (unlocked). The aclp pointer is assumed still to point to userland, so
* this should not be consumed within the kernel except by syscall code.
* Other code should directly invoke VOP_{SET,GET}ACL.
*/
/*
* Given a vnode, set its ACL.
*/
static int
vacl_set_acl(struct thread *td, struct vnode *vp, acl_type_t type,
const struct acl *aclp)
{
struct acl *inkernelacl;
struct mount *mp;
int error;
AUDIT_ARG_VALUE(type);
inkernelacl = acl_alloc(M_WAITOK);
error = acl_copyin(aclp, inkernelacl, type);
if (error != 0)
goto out;
error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
if (error != 0)
goto out;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
AUDIT_ARG_VNODE1(vp);
#ifdef MAC
error = mac_vnode_check_setacl(td->td_ucred, vp, type, inkernelacl);
if (error != 0)
goto out_unlock;
#endif
error = VOP_SETACL(vp, acl_type_unold(type), inkernelacl,
td->td_ucred, td);
#ifdef MAC
out_unlock:
#endif
VOP_UNLOCK(vp, 0);
vn_finished_write(mp);
out:
acl_free(inkernelacl);
return (error);
}
/*
* Given a vnode, get its ACL.
*/
static int
vacl_get_acl(struct thread *td, struct vnode *vp, acl_type_t type,
struct acl *aclp)
{
struct acl *inkernelacl;
int error;
AUDIT_ARG_VALUE(type);
inkernelacl = acl_alloc(M_WAITOK | M_ZERO);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
AUDIT_ARG_VNODE1(vp);
#ifdef MAC
error = mac_vnode_check_getacl(td->td_ucred, vp, type);
if (error != 0)
goto out;
#endif
error = VOP_GETACL(vp, acl_type_unold(type), inkernelacl,
td->td_ucred, td);
#ifdef MAC
out:
#endif
VOP_UNLOCK(vp, 0);
if (error == 0)
error = acl_copyout(inkernelacl, aclp, type);
acl_free(inkernelacl);
return (error);
}
/*
* Given a vnode, delete its ACL.
*/
static int
vacl_delete(struct thread *td, struct vnode *vp, acl_type_t type)
{
struct mount *mp;
int error;
AUDIT_ARG_VALUE(type);
error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
if (error != 0)
return (error);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
AUDIT_ARG_VNODE1(vp);
#ifdef MAC
error = mac_vnode_check_deleteacl(td->td_ucred, vp, type);
if (error != 0)
goto out;
#endif
error = VOP_SETACL(vp, acl_type_unold(type), 0, td->td_ucred, td);
#ifdef MAC
out:
#endif
VOP_UNLOCK(vp, 0);
vn_finished_write(mp);
return (error);
}
/*
* Given a vnode, check whether an ACL is appropriate for it
*
* XXXRW: No vnode lock held so can't audit vnode state...?
*/
static int
vacl_aclcheck(struct thread *td, struct vnode *vp, acl_type_t type,
const struct acl *aclp)
{
struct acl *inkernelacl;
int error;
inkernelacl = acl_alloc(M_WAITOK);
error = acl_copyin(aclp, inkernelacl, type);
if (error != 0)
goto out;
error = VOP_ACLCHECK(vp, acl_type_unold(type), inkernelacl,
td->td_ucred, td);
out:
acl_free(inkernelacl);
return (error);
}
/*
* syscalls -- convert the path/fd to a vnode, and call vacl_whatever. Don't
* need to lock, as the vacl_ code will get/release any locks required.
*/
/*
* Given a file path, get an ACL for it
*/
int
sys___acl_get_file(struct thread *td, struct __acl_get_file_args *uap)
{
return (kern___acl_get_path(td, uap->path, uap->type, uap->aclp,
FOLLOW));
}
/*
* Given a file path, get an ACL for it; don't follow links.
*/
int
sys___acl_get_link(struct thread *td, struct __acl_get_link_args *uap)
{
return(kern___acl_get_path(td, uap->path, uap->type, uap->aclp,
NOFOLLOW));
}
static int
kern___acl_get_path(struct thread *td, const char *path, acl_type_t type,
struct acl *aclp, int follow)
{
struct nameidata nd;
int error;
NDINIT(&nd, LOOKUP, follow | AUDITVNODE1, UIO_USERSPACE, path, td);
error = namei(&nd);
if (error == 0) {
error = vacl_get_acl(td, nd.ni_vp, type, aclp);
NDFREE(&nd, 0);
}
return (error);
}
/*
* Given a file path, set an ACL for it.
*/
int
sys___acl_set_file(struct thread *td, struct __acl_set_file_args *uap)
{
return(kern___acl_set_path(td, uap->path, uap->type, uap->aclp,
FOLLOW));
}
/*
* Given a file path, set an ACL for it; don't follow links.
*/
int
sys___acl_set_link(struct thread *td, struct __acl_set_link_args *uap)
{
return(kern___acl_set_path(td, uap->path, uap->type, uap->aclp,
NOFOLLOW));
}
static int
kern___acl_set_path(struct thread *td, const char *path,
acl_type_t type, const struct acl *aclp, int follow)
{
struct nameidata nd;
int error;
NDINIT(&nd, LOOKUP, follow | AUDITVNODE1, UIO_USERSPACE, path, td);
error = namei(&nd);
if (error == 0) {
error = vacl_set_acl(td, nd.ni_vp, type, aclp);
NDFREE(&nd, 0);
}
return (error);
}
/*
* Given a file descriptor, get an ACL for it.
*/
int
sys___acl_get_fd(struct thread *td, struct __acl_get_fd_args *uap)
{
struct file *fp;
cap_rights_t rights;
int error;
AUDIT_ARG_FD(uap->filedes);
error = getvnode(td, uap->filedes,
cap_rights_init(&rights, CAP_ACL_GET), &fp);
if (error == 0) {
error = vacl_get_acl(td, fp->f_vnode, uap->type, uap->aclp);
fdrop(fp, td);
}
return (error);
}
/*
* Given a file descriptor, set an ACL for it.
*/
int
sys___acl_set_fd(struct thread *td, struct __acl_set_fd_args *uap)
{
struct file *fp;
cap_rights_t rights;
int error;
AUDIT_ARG_FD(uap->filedes);
error = getvnode(td, uap->filedes,
cap_rights_init(&rights, CAP_ACL_SET), &fp);
if (error == 0) {
error = vacl_set_acl(td, fp->f_vnode, uap->type, uap->aclp);
fdrop(fp, td);
}
return (error);
}
/*
* Given a file path, delete an ACL from it.
*/
int
sys___acl_delete_file(struct thread *td, struct __acl_delete_file_args *uap)
{
return (kern___acl_delete_path(td, uap->path, uap->type, FOLLOW));
}
/*
* Given a file path, delete an ACL from it; don't follow links.
*/
int
sys___acl_delete_link(struct thread *td, struct __acl_delete_link_args *uap)
{
return (kern___acl_delete_path(td, uap->path, uap->type, NOFOLLOW));
}
static int
kern___acl_delete_path(struct thread *td, const char *path,
acl_type_t type, int follow)
{
struct nameidata nd;
int error;
NDINIT(&nd, LOOKUP, follow, UIO_USERSPACE, path, td);
error = namei(&nd);
if (error == 0) {
error = vacl_delete(td, nd.ni_vp, type);
NDFREE(&nd, 0);
}
return (error);
}
/*
* Given a file path, delete an ACL from it.
*/
int
sys___acl_delete_fd(struct thread *td, struct __acl_delete_fd_args *uap)
{
struct file *fp;
cap_rights_t rights;
int error;
AUDIT_ARG_FD(uap->filedes);
error = getvnode(td, uap->filedes,
cap_rights_init(&rights, CAP_ACL_DELETE), &fp);
if (error == 0) {
error = vacl_delete(td, fp->f_vnode, uap->type);
fdrop(fp, td);
}
return (error);
}
/*
* Given a file path, check an ACL for it.
*/
int
sys___acl_aclcheck_file(struct thread *td, struct __acl_aclcheck_file_args *uap)
{
return (kern___acl_aclcheck_path(td, uap->path, uap->type, uap->aclp,
FOLLOW));
}
/*
* Given a file path, check an ACL for it; don't follow links.
*/
int
sys___acl_aclcheck_link(struct thread *td, struct __acl_aclcheck_link_args *uap)
{
return (kern___acl_aclcheck_path(td, uap->path, uap->type, uap->aclp,
NOFOLLOW));
}
static int
kern___acl_aclcheck_path(struct thread *td, const char *path, acl_type_t type,
struct acl *aclp, int follow)
{
struct nameidata nd;
int error;
NDINIT(&nd, LOOKUP, follow, UIO_USERSPACE, path, td);
error = namei(&nd);
if (error == 0) {
error = vacl_aclcheck(td, nd.ni_vp, type, aclp);
NDFREE(&nd, 0);
}
return (error);
}
/*
* Given a file descriptor, check an ACL for it.
*/
int
sys___acl_aclcheck_fd(struct thread *td, struct __acl_aclcheck_fd_args *uap)
{
struct file *fp;
cap_rights_t rights;
int error;
AUDIT_ARG_FD(uap->filedes);
error = getvnode(td, uap->filedes,
cap_rights_init(&rights, CAP_ACL_CHECK), &fp);
if (error == 0) {
error = vacl_aclcheck(td, fp->f_vnode, uap->type, uap->aclp);
fdrop(fp, td);
}
return (error);
}
struct acl *
acl_alloc(int flags)
{
struct acl *aclp;
aclp = malloc(sizeof(*aclp), M_ACL, flags);
if (aclp == NULL)
return (NULL);
aclp->acl_maxcnt = ACL_MAX_ENTRIES;
return (aclp);
}
void
acl_free(struct acl *aclp)
{
free(aclp, M_ACL);
}

2987
freebsd/sys/kern/vfs_aio.c Normal file

File diff suppressed because it is too large Load Diff

5474
freebsd/sys/kern/vfs_bio.c Normal file

File diff suppressed because it is too large Load Diff

2604
freebsd/sys/kern/vfs_cache.c Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,528 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/dirent.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/rmlock.h>
#include <sys/refcount.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/vnode.h>
#include <netinet/in.h>
#include <net/radix.h>
static MALLOC_DEFINE(M_NETADDR, "export_host", "Export host address structure");
#if defined(INET) || defined(INET6)
static struct radix_node_head *vfs_create_addrlist_af(
struct radix_node_head **prnh, int off);
#endif
static void vfs_free_addrlist(struct netexport *nep);
static int vfs_free_netcred(struct radix_node *rn, void *w);
static void vfs_free_addrlist_af(struct radix_node_head **prnh);
static int vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
struct export_args *argp);
static struct netcred *vfs_export_lookup(struct mount *, struct sockaddr *);
/*
* Network address lookup element
*/
struct netcred {
struct radix_node netc_rnodes[2];
int netc_exflags;
struct ucred *netc_anon;
int netc_numsecflavors;
int netc_secflavors[MAXSECFLAVORS];
};
/*
* Network export information
*/
struct netexport {
struct netcred ne_defexported; /* Default export */
struct radix_node_head *ne4;
struct radix_node_head *ne6;
};
/*
* Build hash lists of net addresses and hang them off the mount point.
* Called by vfs_export() to set up the lists of export addresses.
*/
static int
vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
struct export_args *argp)
{
struct netcred *np;
struct radix_node_head *rnh;
int i;
struct radix_node *rn;
struct sockaddr *saddr, *smask = NULL;
#if defined(INET6) || defined(INET)
int off;
#endif
int error;
/*
* XXX: This routine converts from a `struct xucred'
* (argp->ex_anon) to a `struct ucred' (np->netc_anon). This
* operation is questionable; for example, what should be done
* with fields like cr_uidinfo and cr_prison? Currently, this
* routine does not touch them (leaves them as NULL).
*/
if (argp->ex_anon.cr_version != XUCRED_VERSION) {
vfs_mount_error(mp, "ex_anon.cr_version: %d != %d",
argp->ex_anon.cr_version, XUCRED_VERSION);
return (EINVAL);
}
if (argp->ex_addrlen == 0) {
if (mp->mnt_flag & MNT_DEFEXPORTED) {
vfs_mount_error(mp,
"MNT_DEFEXPORTED already set for mount %p", mp);
return (EPERM);
}
np = &nep->ne_defexported;
np->netc_exflags = argp->ex_flags;
np->netc_anon = crget();
np->netc_anon->cr_uid = argp->ex_anon.cr_uid;
crsetgroups(np->netc_anon, argp->ex_anon.cr_ngroups,
argp->ex_anon.cr_groups);
np->netc_anon->cr_prison = &prison0;
prison_hold(np->netc_anon->cr_prison);
np->netc_numsecflavors = argp->ex_numsecflavors;
bcopy(argp->ex_secflavors, np->netc_secflavors,
sizeof(np->netc_secflavors));
MNT_ILOCK(mp);
mp->mnt_flag |= MNT_DEFEXPORTED;
MNT_IUNLOCK(mp);
return (0);
}
#if MSIZE <= 256
if (argp->ex_addrlen > MLEN) {
vfs_mount_error(mp, "ex_addrlen %d is greater than %d",
argp->ex_addrlen, MLEN);
return (EINVAL);
}
#endif
i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK | M_ZERO);
saddr = (struct sockaddr *) (np + 1);
if ((error = copyin(argp->ex_addr, saddr, argp->ex_addrlen)))
goto out;
if (saddr->sa_family == AF_UNSPEC || saddr->sa_family > AF_MAX) {
error = EINVAL;
vfs_mount_error(mp, "Invalid saddr->sa_family: %d");
goto out;
}
if (saddr->sa_len > argp->ex_addrlen)
saddr->sa_len = argp->ex_addrlen;
if (argp->ex_masklen) {
smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
error = copyin(argp->ex_mask, smask, argp->ex_masklen);
if (error)
goto out;
if (smask->sa_len > argp->ex_masklen)
smask->sa_len = argp->ex_masklen;
}
rnh = NULL;
switch (saddr->sa_family) {
#ifdef INET
case AF_INET:
if ((rnh = nep->ne4) == NULL) {
off = offsetof(struct sockaddr_in, sin_addr) << 3;
rnh = vfs_create_addrlist_af(&nep->ne4, off);
}
break;
#endif
#ifdef INET6
case AF_INET6:
if ((rnh = nep->ne6) == NULL) {
off = offsetof(struct sockaddr_in6, sin6_addr) << 3;
rnh = vfs_create_addrlist_af(&nep->ne6, off);
}
break;
#endif
}
if (rnh == NULL) {
error = ENOBUFS;
vfs_mount_error(mp, "%s %s %d",
"Unable to initialize radix node head ",
"for address family", saddr->sa_family);
goto out;
}
RADIX_NODE_HEAD_LOCK(rnh);
rn = (*rnh->rnh_addaddr)(saddr, smask, &rnh->rh, np->netc_rnodes);
RADIX_NODE_HEAD_UNLOCK(rnh);
if (rn == NULL || np != (struct netcred *)rn) { /* already exists */
error = EPERM;
vfs_mount_error(mp,
"netcred already exists for given addr/mask");
goto out;
}
np->netc_exflags = argp->ex_flags;
np->netc_anon = crget();
np->netc_anon->cr_uid = argp->ex_anon.cr_uid;
crsetgroups(np->netc_anon, argp->ex_anon.cr_ngroups,
argp->ex_anon.cr_groups);
np->netc_anon->cr_prison = &prison0;
prison_hold(np->netc_anon->cr_prison);
np->netc_numsecflavors = argp->ex_numsecflavors;
bcopy(argp->ex_secflavors, np->netc_secflavors,
sizeof(np->netc_secflavors));
return (0);
out:
free(np, M_NETADDR);
return (error);
}
/* Helper for vfs_free_addrlist. */
/* ARGSUSED */
static int
vfs_free_netcred(struct radix_node *rn, void *w)
{
struct radix_node_head *rnh = (struct radix_node_head *) w;
struct ucred *cred;
(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, &rnh->rh);
cred = ((struct netcred *)rn)->netc_anon;
if (cred != NULL)
crfree(cred);
free(rn, M_NETADDR);
return (0);
}
#if defined(INET) || defined(INET6)
static struct radix_node_head *
vfs_create_addrlist_af(struct radix_node_head **prnh, int off)
{
if (rn_inithead((void **)prnh, off) == 0)
return (NULL);
RADIX_NODE_HEAD_LOCK_INIT(*prnh);
return (*prnh);
}
#endif
static void
vfs_free_addrlist_af(struct radix_node_head **prnh)
{
struct radix_node_head *rnh;
rnh = *prnh;
RADIX_NODE_HEAD_LOCK(rnh);
(*rnh->rnh_walktree)(&rnh->rh, vfs_free_netcred, rnh);
RADIX_NODE_HEAD_UNLOCK(rnh);
RADIX_NODE_HEAD_DESTROY(rnh);
rn_detachhead((void **)prnh);
prnh = NULL;
}
/*
* Free the net address hash lists that are hanging off the mount points.
*/
static void
vfs_free_addrlist(struct netexport *nep)
{
struct ucred *cred;
if (nep->ne4 != NULL)
vfs_free_addrlist_af(&nep->ne4);
if (nep->ne6 != NULL)
vfs_free_addrlist_af(&nep->ne6);
cred = nep->ne_defexported.netc_anon;
if (cred != NULL)
crfree(cred);
}
/*
* High level function to manipulate export options on a mount point
* and the passed in netexport.
* Struct export_args *argp is the variable used to twiddle options,
* the structure is described in sys/mount.h
*/
int
vfs_export(struct mount *mp, struct export_args *argp)
{
struct netexport *nep;
int error;
if (argp->ex_numsecflavors < 0
|| argp->ex_numsecflavors >= MAXSECFLAVORS)
return (EINVAL);
error = 0;
lockmgr(&mp->mnt_explock, LK_EXCLUSIVE, NULL);
nep = mp->mnt_export;
if (argp->ex_flags & MNT_DELEXPORT) {
if (nep == NULL) {
error = ENOENT;
goto out;
}
if (mp->mnt_flag & MNT_EXPUBLIC) {
vfs_setpublicfs(NULL, NULL, NULL);
MNT_ILOCK(mp);
mp->mnt_flag &= ~MNT_EXPUBLIC;
MNT_IUNLOCK(mp);
}
vfs_free_addrlist(nep);
mp->mnt_export = NULL;
free(nep, M_MOUNT);
nep = NULL;
MNT_ILOCK(mp);
mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
MNT_IUNLOCK(mp);
}
if (argp->ex_flags & MNT_EXPORTED) {
if (nep == NULL) {
nep = malloc(sizeof(struct netexport), M_MOUNT, M_WAITOK | M_ZERO);
mp->mnt_export = nep;
}
if (argp->ex_flags & MNT_EXPUBLIC) {
if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
goto out;
MNT_ILOCK(mp);
mp->mnt_flag |= MNT_EXPUBLIC;
MNT_IUNLOCK(mp);
}
if ((error = vfs_hang_addrlist(mp, nep, argp)))
goto out;
MNT_ILOCK(mp);
mp->mnt_flag |= MNT_EXPORTED;
MNT_IUNLOCK(mp);
}
out:
lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);
/*
* Once we have executed the vfs_export() command, we do
* not want to keep the "export" option around in the
* options list, since that will cause subsequent MNT_UPDATE
* calls to fail. The export information is saved in
* mp->mnt_export, so we can safely delete the "export" mount option
* here.
*/
vfs_deleteopt(mp->mnt_optnew, "export");
vfs_deleteopt(mp->mnt_opt, "export");
return (error);
}
/*
* Set the publicly exported filesystem (WebNFS). Currently, only
* one public filesystem is possible in the spec (RFC 2054 and 2055)
*/
int
vfs_setpublicfs(struct mount *mp, struct netexport *nep,
struct export_args *argp)
{
int error;
struct vnode *rvp;
char *cp;
/*
* mp == NULL -> invalidate the current info, the FS is
* no longer exported. May be called from either vfs_export
* or unmount, so check if it hasn't already been done.
*/
if (mp == NULL) {
if (nfs_pub.np_valid) {
nfs_pub.np_valid = 0;
if (nfs_pub.np_index != NULL) {
free(nfs_pub.np_index, M_TEMP);
nfs_pub.np_index = NULL;
}
}
return (0);
}
/*
* Only one allowed at a time.
*/
if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
return (EBUSY);
/*
* Get real filehandle for root of exported FS.
*/
bzero(&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rvp)))
return (error);
if ((error = VOP_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
return (error);
vput(rvp);
/*
* If an indexfile was specified, pull it in.
*/
if (argp->ex_indexfile != NULL) {
if (nfs_pub.np_index == NULL)
nfs_pub.np_index = malloc(MAXNAMLEN + 1, M_TEMP,
M_WAITOK);
error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
MAXNAMLEN, (size_t *)0);
if (!error) {
/*
* Check for illegal filenames.
*/
for (cp = nfs_pub.np_index; *cp; cp++) {
if (*cp == '/') {
error = EINVAL;
break;
}
}
}
if (error) {
free(nfs_pub.np_index, M_TEMP);
nfs_pub.np_index = NULL;
return (error);
}
}
nfs_pub.np_mount = mp;
nfs_pub.np_valid = 1;
return (0);
}
/*
* Used by the filesystems to determine if a given network address
* (passed in 'nam') is present in their exports list, returns a pointer
* to struct netcred so that the filesystem can examine it for
* access rights (read/write/etc).
*/
static struct netcred *
vfs_export_lookup(struct mount *mp, struct sockaddr *nam)
{
RADIX_NODE_HEAD_RLOCK_TRACKER;
struct netexport *nep;
struct netcred *np = NULL;
struct radix_node_head *rnh;
struct sockaddr *saddr;
nep = mp->mnt_export;
if (nep == NULL)
return (NULL);
if ((mp->mnt_flag & MNT_EXPORTED) == 0)
return (NULL);
/*
* Lookup in the export list
*/
if (nam != NULL) {
saddr = nam;
rnh = NULL;
switch (saddr->sa_family) {
case AF_INET:
rnh = nep->ne4;
break;
case AF_INET6:
rnh = nep->ne6;
break;
}
if (rnh != NULL) {
RADIX_NODE_HEAD_RLOCK(rnh);
np = (struct netcred *) (*rnh->rnh_matchaddr)(saddr, &rnh->rh);
RADIX_NODE_HEAD_RUNLOCK(rnh);
if (np != NULL && (np->netc_rnodes->rn_flags & RNF_ROOT) != 0)
return (NULL);
}
}
/*
* If no address match, use the default if it exists.
*/
if (np == NULL && (mp->mnt_flag & MNT_DEFEXPORTED) != 0)
return (&nep->ne_defexported);
return (np);
}
/*
* XXX: This comment comes from the deprecated ufs_check_export()
* XXX: and may not entirely apply, but lacking something better:
* This is the generic part of fhtovp called after the underlying
* filesystem has validated the file handle.
*
* Verify that a host should have access to a filesystem.
*/
int
vfs_stdcheckexp(struct mount *mp, struct sockaddr *nam, int *extflagsp,
struct ucred **credanonp, int *numsecflavors, int **secflavors)
{
struct netcred *np;
lockmgr(&mp->mnt_explock, LK_SHARED, NULL);
np = vfs_export_lookup(mp, nam);
if (np == NULL) {
lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);
*credanonp = NULL;
return (EACCES);
}
*extflagsp = np->netc_exflags;
if ((*credanonp = np->netc_anon) != NULL)
crhold(*credanonp);
if (numsecflavors)
*numsecflavors = np->netc_numsecflavors;
if (secflavors)
*secflavors = np->netc_secflavors;
lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);
return (0);
}

View File

@ -0,0 +1,757 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 1999-2001 Robert N. M. Watson
* All rights reserved.
*
* This software was developed by Robert Watson for the TrustedBSD Project.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/capsicum.h>
#include <sys/lock.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/sysproto.h>
#include <sys/fcntl.h>
#include <sys/namei.h>
#include <sys/filedesc.h>
#include <sys/limits.h>
#include <sys/vnode.h>
#include <sys/proc.h>
#include <sys/extattr.h>
#include <security/audit/audit.h>
#include <security/mac/mac_framework.h>
static int kern_extattr_set_path(struct thread *td, const char *path,
int attrnamespace, const char *attrname, void *data,
size_t nbytes, int follow);
static int kern_extattr_get_path(struct thread *td, const char *path,
int attrnamespace, const char *attrname, void *data,
size_t nbytes, int follow);
static int kern_extattr_delete_path(struct thread *td, const char *path,
int attrnamespace, const char *attrname, int follow);
static int kern_extattr_list_path(struct thread *td, const char *path,
int attrnamespace, void *data, size_t nbytes, int follow);
/*
* Syscall to push extended attribute configuration information into the VFS.
* Accepts a path, which it converts to a mountpoint, as well as a command
* (int cmd), and attribute name and misc data.
*
* Currently this is used only by UFS1 extended attributes.
*/
#ifndef _SYS_SYSPROTO_H_
struct extattrctl_args {
const char *path;
int cmd;
const char *filename;
int attrnamespace;
const char *attrname;
};
#endif
int
sys_extattrctl(struct thread *td, struct extattrctl_args *uap)
{
struct vnode *filename_vp;
struct nameidata nd;
struct mount *mp, *mp_writable;
char attrname[EXTATTR_MAXNAMELEN];
int error;
AUDIT_ARG_CMD(uap->cmd);
AUDIT_ARG_VALUE(uap->attrnamespace);
/*
* uap->attrname is not always defined. We check again later when we
* invoke the VFS call so as to pass in NULL there if needed.
*/
if (uap->attrname != NULL) {
error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
NULL);
if (error)
return (error);
}
AUDIT_ARG_TEXT(attrname);
mp = NULL;
filename_vp = NULL;
if (uap->filename != NULL) {
NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE2,
UIO_USERSPACE, uap->filename, td);
error = namei(&nd);
if (error)
return (error);
filename_vp = nd.ni_vp;
NDFREE(&nd, NDF_NO_VP_RELE);
}
/* uap->path is always defined. */
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1,
UIO_USERSPACE, uap->path, td);
error = namei(&nd);
if (error)
goto out;
mp = nd.ni_vp->v_mount;
error = vfs_busy(mp, 0);
if (error) {
NDFREE(&nd, 0);
mp = NULL;
goto out;
}
VOP_UNLOCK(nd.ni_vp, 0);
error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
NDFREE(&nd, NDF_NO_VP_UNLOCK);
if (error)
goto out;
if (filename_vp != NULL) {
/*
* uap->filename is not always defined. If it is,
* grab a vnode lock, which VFS_EXTATTRCTL() will
* later release.
*/
error = vn_lock(filename_vp, LK_EXCLUSIVE);
if (error) {
vn_finished_write(mp_writable);
goto out;
}
}
error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
uap->attrname != NULL ? attrname : NULL);
vn_finished_write(mp_writable);
out:
if (mp != NULL)
vfs_unbusy(mp);
/*
* VFS_EXTATTRCTL will have unlocked, but not de-ref'd, filename_vp,
* so vrele it if it is defined.
*/
if (filename_vp != NULL)
vrele(filename_vp);
return (error);
}
/*-
* Set a named extended attribute on a file or directory
*
* Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
* kernelspace string pointer "attrname", userspace buffer
* pointer "data", buffer length "nbytes", thread "td".
* Returns: 0 on success, an error number otherwise
* Locks: none
* References: vp must be a valid reference for the duration of the call
*/
static int
extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
void *data, size_t nbytes, struct thread *td)
{
struct mount *mp;
struct uio auio;
struct iovec aiov;
ssize_t cnt;
int error;
if (nbytes > IOSIZE_MAX)
return (EINVAL);
error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
if (error)
return (error);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
aiov.iov_base = data;
aiov.iov_len = nbytes;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_offset = 0;
auio.uio_resid = nbytes;
auio.uio_rw = UIO_WRITE;
auio.uio_segflg = UIO_USERSPACE;
auio.uio_td = td;
cnt = nbytes;
#ifdef MAC
error = mac_vnode_check_setextattr(td->td_ucred, vp, attrnamespace,
attrname);
if (error)
goto done;
#endif
error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
td->td_ucred, td);
cnt -= auio.uio_resid;
td->td_retval[0] = cnt;
#ifdef MAC
done:
#endif
VOP_UNLOCK(vp, 0);
vn_finished_write(mp);
return (error);
}
#ifndef _SYS_SYSPROTO_H_
struct extattr_set_fd_args {
int fd;
int attrnamespace;
const char *attrname;
void *data;
size_t nbytes;
};
#endif
int
sys_extattr_set_fd(struct thread *td, struct extattr_set_fd_args *uap)
{
struct file *fp;
char attrname[EXTATTR_MAXNAMELEN];
cap_rights_t rights;
int error;
AUDIT_ARG_FD(uap->fd);
AUDIT_ARG_VALUE(uap->attrnamespace);
error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
if (error)
return (error);
AUDIT_ARG_TEXT(attrname);
error = getvnode(td, uap->fd,
cap_rights_init(&rights, CAP_EXTATTR_SET), &fp);
if (error)
return (error);
error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
attrname, uap->data, uap->nbytes, td);
fdrop(fp, td);
return (error);
}
#ifndef _SYS_SYSPROTO_H_
struct extattr_set_file_args {
const char *path;
int attrnamespace;
const char *attrname;
void *data;
size_t nbytes;
};
#endif
int
sys_extattr_set_file(struct thread *td, struct extattr_set_file_args *uap)
{
return (kern_extattr_set_path(td, uap->path, uap->attrnamespace,
uap->attrname, uap->data, uap->nbytes, FOLLOW));
}
#ifndef _SYS_SYSPROTO_H_
struct extattr_set_link_args {
const char *path;
int attrnamespace;
const char *attrname;
void *data;
size_t nbytes;
};
#endif
int
sys_extattr_set_link(struct thread *td, struct extattr_set_link_args *uap)
{
return (kern_extattr_set_path(td, uap->path, uap->attrnamespace,
uap->attrname, uap->data, uap->nbytes, NOFOLLOW));
}
static int
kern_extattr_set_path(struct thread *td, const char *path, int attrnamespace,
const char *uattrname, void *data, size_t nbytes, int follow)
{
struct nameidata nd;
char attrname[EXTATTR_MAXNAMELEN];
int error;
AUDIT_ARG_VALUE(attrnamespace);
error = copyinstr(uattrname, attrname, EXTATTR_MAXNAMELEN, NULL);
if (error)
return (error);
AUDIT_ARG_TEXT(attrname);
NDINIT(&nd, LOOKUP, follow | AUDITVNODE1, UIO_USERSPACE, path, td);
error = namei(&nd);
if (error)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = extattr_set_vp(nd.ni_vp, attrnamespace, attrname, data,
nbytes, td);
vrele(nd.ni_vp);
return (error);
}
/*-
* Get a named extended attribute on a file or directory
*
* Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
* kernelspace string pointer "attrname", userspace buffer
* pointer "data", buffer length "nbytes", thread "td".
* Returns: 0 on success, an error number otherwise
* Locks: none
* References: vp must be a valid reference for the duration of the call
*/
static int
extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
void *data, size_t nbytes, struct thread *td)
{
struct uio auio, *auiop;
struct iovec aiov;
ssize_t cnt;
size_t size, *sizep;
int error;
if (nbytes > IOSIZE_MAX)
return (EINVAL);
vn_lock(vp, LK_SHARED | LK_RETRY);
/*
* Slightly unusual semantics: if the user provides a NULL data
* pointer, they don't want to receive the data, just the maximum
* read length.
*/
auiop = NULL;
sizep = NULL;
cnt = 0;
if (data != NULL) {
aiov.iov_base = data;
aiov.iov_len = nbytes;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_offset = 0;
auio.uio_resid = nbytes;
auio.uio_rw = UIO_READ;
auio.uio_segflg = UIO_USERSPACE;
auio.uio_td = td;
auiop = &auio;
cnt = nbytes;
} else
sizep = &size;
#ifdef MAC
error = mac_vnode_check_getextattr(td->td_ucred, vp, attrnamespace,
attrname);
if (error)
goto done;
#endif
error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
td->td_ucred, td);
if (auiop != NULL) {
cnt -= auio.uio_resid;
td->td_retval[0] = cnt;
} else
td->td_retval[0] = size;
#ifdef MAC
done:
#endif
VOP_UNLOCK(vp, 0);
return (error);
}
#ifndef _SYS_SYSPROTO_H_
struct extattr_get_fd_args {
int fd;
int attrnamespace;
const char *attrname;
void *data;
size_t nbytes;
};
#endif
int
sys_extattr_get_fd(struct thread *td, struct extattr_get_fd_args *uap)
{
struct file *fp;
char attrname[EXTATTR_MAXNAMELEN];
cap_rights_t rights;
int error;
AUDIT_ARG_FD(uap->fd);
AUDIT_ARG_VALUE(uap->attrnamespace);
error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
if (error)
return (error);
AUDIT_ARG_TEXT(attrname);
error = getvnode(td, uap->fd,
cap_rights_init(&rights, CAP_EXTATTR_GET), &fp);
if (error)
return (error);
error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
attrname, uap->data, uap->nbytes, td);
fdrop(fp, td);
return (error);
}
#ifndef _SYS_SYSPROTO_H_
struct extattr_get_file_args {
const char *path;
int attrnamespace;
const char *attrname;
void *data;
size_t nbytes;
};
#endif
int
sys_extattr_get_file(struct thread *td, struct extattr_get_file_args *uap)
{
return (kern_extattr_get_path(td, uap->path, uap->attrnamespace,
uap->attrname, uap->data, uap->nbytes, FOLLOW));
}
#ifndef _SYS_SYSPROTO_H_
struct extattr_get_link_args {
const char *path;
int attrnamespace;
const char *attrname;
void *data;
size_t nbytes;
};
#endif
int
sys_extattr_get_link(struct thread *td, struct extattr_get_link_args *uap)
{
return (kern_extattr_get_path(td, uap->path, uap->attrnamespace,
uap->attrname, uap->data, uap->nbytes, NOFOLLOW));
}
static int
kern_extattr_get_path(struct thread *td, const char *path, int attrnamespace,
const char *uattrname, void *data, size_t nbytes, int follow)
{
struct nameidata nd;
char attrname[EXTATTR_MAXNAMELEN];
int error;
AUDIT_ARG_VALUE(attrnamespace);
error = copyinstr(uattrname, attrname, EXTATTR_MAXNAMELEN, NULL);
if (error)
return (error);
AUDIT_ARG_TEXT(attrname);
NDINIT(&nd, LOOKUP, follow | AUDITVNODE1, UIO_USERSPACE, path, td);
error = namei(&nd);
if (error)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = extattr_get_vp(nd.ni_vp, attrnamespace, attrname, data,
nbytes, td);
vrele(nd.ni_vp);
return (error);
}
/*
* extattr_delete_vp(): Delete a named extended attribute on a file or
* directory
*
* Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
* kernelspace string pointer "attrname", proc "p"
* Returns: 0 on success, an error number otherwise
* Locks: none
* References: vp must be a valid reference for the duration of the call
*/
static int
extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
struct thread *td)
{
struct mount *mp;
int error;
error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
if (error)
return (error);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
#ifdef MAC
error = mac_vnode_check_deleteextattr(td->td_ucred, vp, attrnamespace,
attrname);
if (error)
goto done;
#endif
error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
td);
if (error == EOPNOTSUPP)
error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
td->td_ucred, td);
#ifdef MAC
done:
#endif
VOP_UNLOCK(vp, 0);
vn_finished_write(mp);
return (error);
}
#ifndef _SYS_SYSPROTO_H_
struct extattr_delete_fd_args {
int fd;
int attrnamespace;
const char *attrname;
};
#endif
int
sys_extattr_delete_fd(struct thread *td, struct extattr_delete_fd_args *uap)
{
struct file *fp;
char attrname[EXTATTR_MAXNAMELEN];
cap_rights_t rights;
int error;
AUDIT_ARG_FD(uap->fd);
AUDIT_ARG_VALUE(uap->attrnamespace);
error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
if (error)
return (error);
AUDIT_ARG_TEXT(attrname);
error = getvnode(td, uap->fd,
cap_rights_init(&rights, CAP_EXTATTR_DELETE), &fp);
if (error)
return (error);
error = extattr_delete_vp(fp->f_vnode, uap->attrnamespace,
attrname, td);
fdrop(fp, td);
return (error);
}
#ifndef _SYS_SYSPROTO_H_
struct extattr_delete_file_args {
const char *path;
int attrnamespace;
const char *attrname;
};
#endif
int
sys_extattr_delete_file(struct thread *td, struct extattr_delete_file_args *uap)
{
return (kern_extattr_delete_path(td, uap->path, uap->attrnamespace,
uap->attrname, FOLLOW));
}
#ifndef _SYS_SYSPROTO_H_
struct extattr_delete_link_args {
const char *path;
int attrnamespace;
const char *attrname;
};
#endif
int
sys_extattr_delete_link(struct thread *td, struct extattr_delete_link_args *uap)
{
return (kern_extattr_delete_path(td, uap->path, uap->attrnamespace,
uap->attrname, NOFOLLOW));
}
static int
kern_extattr_delete_path(struct thread *td, const char *path, int attrnamespace,
const char *uattrname, int follow)
{
struct nameidata nd;
char attrname[EXTATTR_MAXNAMELEN];
int error;
AUDIT_ARG_VALUE(attrnamespace);
error = copyinstr(uattrname, attrname, EXTATTR_MAXNAMELEN, NULL);
if (error)
return(error);
AUDIT_ARG_TEXT(attrname);
NDINIT(&nd, LOOKUP, follow | AUDITVNODE1, UIO_USERSPACE, path, td);
error = namei(&nd);
if (error)
return(error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = extattr_delete_vp(nd.ni_vp, attrnamespace, attrname, td);
vrele(nd.ni_vp);
return(error);
}
/*-
* Retrieve a list of extended attributes on a file or directory.
*
* Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
* userspace buffer pointer "data", buffer length "nbytes",
* thread "td".
* Returns: 0 on success, an error number otherwise
* Locks: none
* References: vp must be a valid reference for the duration of the call
*/
static int
extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
size_t nbytes, struct thread *td)
{
struct uio auio, *auiop;
size_t size, *sizep;
struct iovec aiov;
ssize_t cnt;
int error;
if (nbytes > IOSIZE_MAX)
return (EINVAL);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
auiop = NULL;
sizep = NULL;
cnt = 0;
if (data != NULL) {
aiov.iov_base = data;
aiov.iov_len = nbytes;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_offset = 0;
auio.uio_resid = nbytes;
auio.uio_rw = UIO_READ;
auio.uio_segflg = UIO_USERSPACE;
auio.uio_td = td;
auiop = &auio;
cnt = nbytes;
} else
sizep = &size;
#ifdef MAC
error = mac_vnode_check_listextattr(td->td_ucred, vp, attrnamespace);
if (error)
goto done;
#endif
error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
td->td_ucred, td);
if (auiop != NULL) {
cnt -= auio.uio_resid;
td->td_retval[0] = cnt;
} else
td->td_retval[0] = size;
#ifdef MAC
done:
#endif
VOP_UNLOCK(vp, 0);
return (error);
}
#ifndef _SYS_SYSPROTO_H_
struct extattr_list_fd_args {
int fd;
int attrnamespace;
void *data;
size_t nbytes;
};
#endif
int
sys_extattr_list_fd(struct thread *td, struct extattr_list_fd_args *uap)
{
struct file *fp;
cap_rights_t rights;
int error;
AUDIT_ARG_FD(uap->fd);
AUDIT_ARG_VALUE(uap->attrnamespace);
error = getvnode(td, uap->fd,
cap_rights_init(&rights, CAP_EXTATTR_LIST), &fp);
if (error)
return (error);
error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
uap->nbytes, td);
fdrop(fp, td);
return (error);
}
#ifndef _SYS_SYSPROTO_H_
struct extattr_list_file_args {
const char *path;
int attrnamespace;
void *data;
size_t nbytes;
}
#endif
int
sys_extattr_list_file(struct thread *td, struct extattr_list_file_args *uap)
{
return (kern_extattr_list_path(td, uap->path, uap->attrnamespace,
uap->data, uap->nbytes, FOLLOW));
}
#ifndef _SYS_SYSPROTO_H_
struct extattr_list_link_args {
const char *path;
int attrnamespace;
void *data;
size_t nbytes;
};
#endif
int
sys_extattr_list_link(struct thread *td, struct extattr_list_link_args *uap)
{
return (kern_extattr_list_path(td, uap->path, uap->attrnamespace,
uap->data, uap->nbytes, NOFOLLOW));
}
static int
kern_extattr_list_path(struct thread *td, const char *path, int attrnamespace,
void *data, size_t nbytes, int follow)
{
struct nameidata nd;
int error;
AUDIT_ARG_VALUE(attrnamespace);
NDINIT(&nd, LOOKUP, follow | AUDITVNODE1, UIO_USERSPACE, path, td);
error = namei(&nd);
if (error)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = extattr_list_vp(nd.ni_vp, attrnamespace, data, nbytes, td);
vrele(nd.ni_vp);
return (error);
}

234
freebsd/sys/kern/vfs_hash.c Normal file
View File

@ -0,0 +1,234 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2005 Poul-Henning Kamp
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/rwlock.h>
#include <sys/vnode.h>
static MALLOC_DEFINE(M_VFS_HASH, "vfs_hash", "VFS hash table");
static LIST_HEAD(vfs_hash_head, vnode) *vfs_hash_tbl;
static LIST_HEAD(,vnode) vfs_hash_side;
static u_long vfs_hash_mask;
static struct rwlock vfs_hash_lock;
static void
vfs_hashinit(void *dummy __unused)
{
vfs_hash_tbl = hashinit(desiredvnodes, M_VFS_HASH, &vfs_hash_mask);
rw_init(&vfs_hash_lock, "vfs hash");
LIST_INIT(&vfs_hash_side);
}
/* Must be SI_ORDER_SECOND so desiredvnodes is available */
SYSINIT(vfs_hash, SI_SUB_VFS, SI_ORDER_SECOND, vfs_hashinit, NULL);
u_int
vfs_hash_index(struct vnode *vp)
{
return (vp->v_hash + vp->v_mount->mnt_hashseed);
}
static struct vfs_hash_head *
vfs_hash_bucket(const struct mount *mp, u_int hash)
{
return (&vfs_hash_tbl[(hash + mp->mnt_hashseed) & vfs_hash_mask]);
}
int
vfs_hash_get(const struct mount *mp, u_int hash, int flags, struct thread *td,
struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg)
{
struct vnode *vp;
int error;
while (1) {
rw_rlock(&vfs_hash_lock);
LIST_FOREACH(vp, vfs_hash_bucket(mp, hash), v_hashlist) {
if (vp->v_hash != hash)
continue;
if (vp->v_mount != mp)
continue;
if (fn != NULL && fn(vp, arg))
continue;
vhold(vp);
rw_runlock(&vfs_hash_lock);
error = vget(vp, flags | LK_VNHELD, td);
if (error == ENOENT && (flags & LK_NOWAIT) == 0)
break;
if (error)
return (error);
*vpp = vp;
return (0);
}
if (vp == NULL) {
rw_runlock(&vfs_hash_lock);
*vpp = NULL;
return (0);
}
}
}
void
vfs_hash_ref(const struct mount *mp, u_int hash, struct thread *td,
struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg)
{
struct vnode *vp;
while (1) {
rw_rlock(&vfs_hash_lock);
LIST_FOREACH(vp, vfs_hash_bucket(mp, hash), v_hashlist) {
if (vp->v_hash != hash)
continue;
if (vp->v_mount != mp)
continue;
if (fn != NULL && fn(vp, arg))
continue;
vhold(vp);
rw_runlock(&vfs_hash_lock);
vref(vp);
vdrop(vp);
*vpp = vp;
return;
}
if (vp == NULL) {
rw_runlock(&vfs_hash_lock);
*vpp = NULL;
return;
}
}
}
void
vfs_hash_remove(struct vnode *vp)
{
rw_wlock(&vfs_hash_lock);
LIST_REMOVE(vp, v_hashlist);
rw_wunlock(&vfs_hash_lock);
}
int
vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td,
struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg)
{
struct vnode *vp2;
int error;
*vpp = NULL;
while (1) {
rw_wlock(&vfs_hash_lock);
LIST_FOREACH(vp2,
vfs_hash_bucket(vp->v_mount, hash), v_hashlist) {
if (vp2->v_hash != hash)
continue;
if (vp2->v_mount != vp->v_mount)
continue;
if (fn != NULL && fn(vp2, arg))
continue;
vhold(vp2);
rw_wunlock(&vfs_hash_lock);
error = vget(vp2, flags | LK_VNHELD, td);
if (error == ENOENT && (flags & LK_NOWAIT) == 0)
break;
rw_wlock(&vfs_hash_lock);
LIST_INSERT_HEAD(&vfs_hash_side, vp, v_hashlist);
rw_wunlock(&vfs_hash_lock);
vput(vp);
if (!error)
*vpp = vp2;
return (error);
}
if (vp2 == NULL)
break;
}
vp->v_hash = hash;
LIST_INSERT_HEAD(vfs_hash_bucket(vp->v_mount, hash), vp, v_hashlist);
rw_wunlock(&vfs_hash_lock);
return (0);
}
void
vfs_hash_rehash(struct vnode *vp, u_int hash)
{
rw_wlock(&vfs_hash_lock);
LIST_REMOVE(vp, v_hashlist);
LIST_INSERT_HEAD(vfs_hash_bucket(vp->v_mount, hash), vp, v_hashlist);
vp->v_hash = hash;
rw_wunlock(&vfs_hash_lock);
}
void
vfs_hash_changesize(int newmaxvnodes)
{
struct vfs_hash_head *vfs_hash_newtbl, *vfs_hash_oldtbl;
u_long vfs_hash_newmask, vfs_hash_oldmask;
struct vnode *vp;
int i;
vfs_hash_newtbl = hashinit(newmaxvnodes, M_VFS_HASH,
&vfs_hash_newmask);
/* If same hash table size, nothing to do */
if (vfs_hash_mask == vfs_hash_newmask) {
free(vfs_hash_newtbl, M_VFS_HASH);
return;
}
/*
* Move everything from the old hash table to the new table.
* None of the vnodes in the table can be recycled because to
* do so, they have to be removed from the hash table.
*/
rw_wlock(&vfs_hash_lock);
vfs_hash_oldtbl = vfs_hash_tbl;
vfs_hash_oldmask = vfs_hash_mask;
vfs_hash_tbl = vfs_hash_newtbl;
vfs_hash_mask = vfs_hash_newmask;
for (i = 0; i <= vfs_hash_oldmask; i++) {
while ((vp = LIST_FIRST(&vfs_hash_oldtbl[i])) != NULL) {
LIST_REMOVE(vp, v_hashlist);
LIST_INSERT_HEAD(
vfs_hash_bucket(vp->v_mount, vp->v_hash),
vp, v_hashlist);
}
}
rw_wunlock(&vfs_hash_lock);
free(vfs_hash_oldtbl, M_VFS_HASH);
}

376
freebsd/sys/kern/vfs_init.c Normal file
View File

@ -0,0 +1,376 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed
* to Berkeley by John Heidemann of the UCLA Ficus project.
*
* Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vfs_init.c 8.3 (Berkeley) 1/4/94
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/fnv_hash.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/linker.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/sx.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <sys/malloc.h>
static int vfs_register(struct vfsconf *);
static int vfs_unregister(struct vfsconf *);
MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
/*
* The highest defined VFS number.
*/
int maxvfsconf = VFS_GENERIC + 1;
/*
* Single-linked list of configured VFSes.
* New entries are added/deleted by vfs_register()/vfs_unregister()
*/
struct vfsconfhead vfsconf = TAILQ_HEAD_INITIALIZER(vfsconf);
struct sx vfsconf_sx;
SX_SYSINIT(vfsconf, &vfsconf_sx, "vfsconf");
/*
* Loader.conf variable vfs.typenumhash enables setting vfc_typenum using a hash
* calculation on vfc_name, so that it doesn't change when file systems are
* loaded in a different order. This will avoid the NFS server file handles from
* changing for file systems that use vfc_typenum in their fsid.
*/
static int vfs_typenumhash = 1;
SYSCTL_INT(_vfs, OID_AUTO, typenumhash, CTLFLAG_RDTUN, &vfs_typenumhash, 0,
"Set vfc_typenum using a hash calculation on vfc_name, so that it does not"
"change when file systems are loaded in a different order.");
/*
* A Zen vnode attribute structure.
*
* Initialized when the first filesystem registers by vfs_register().
*/
struct vattr va_null;
/*
* vfs_init.c
*
* Allocate and fill in operations vectors.
*
* An undocumented feature of this approach to defining operations is that
* there can be multiple entries in vfs_opv_descs for the same operations
* vector. This allows third parties to extend the set of operations
* supported by another layer in a binary compatibile way. For example,
* assume that NFS needed to be modified to support Ficus. NFS has an entry
* (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by
* default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions)
* listing those new operations Ficus adds to NFS, all without modifying the
* NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but
* that is a(whole)nother story.) This is a feature.
*/
/*
* Routines having to do with the management of the vnode table.
*/
static struct vfsconf *
vfs_byname_locked(const char *name)
{
struct vfsconf *vfsp;
sx_assert(&vfsconf_sx, SA_LOCKED);
if (!strcmp(name, "ffs"))
name = "ufs";
TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
if (!strcmp(name, vfsp->vfc_name))
return (vfsp);
}
return (NULL);
}
struct vfsconf *
vfs_byname(const char *name)
{
struct vfsconf *vfsp;
vfsconf_slock();
vfsp = vfs_byname_locked(name);
vfsconf_sunlock();
return (vfsp);
}
struct vfsconf *
vfs_byname_kld(const char *fstype, struct thread *td, int *error)
{
struct vfsconf *vfsp;
int fileid, loaded;
vfsp = vfs_byname(fstype);
if (vfsp != NULL)
return (vfsp);
/* Try to load the respective module. */
*error = kern_kldload(td, fstype, &fileid);
loaded = (*error == 0);
if (*error == EEXIST)
*error = 0;
if (*error)
return (NULL);
/* Look up again to see if the VFS was loaded. */
vfsp = vfs_byname(fstype);
if (vfsp == NULL) {
if (loaded)
(void)kern_kldunload(td, fileid, LINKER_UNLOAD_FORCE);
*error = ENODEV;
return (NULL);
}
return (vfsp);
}
/* Register a new filesystem type in the global table */
static int
vfs_register(struct vfsconf *vfc)
{
struct sysctl_oid *oidp;
struct vfsops *vfsops;
static int once;
struct vfsconf *tvfc;
uint32_t hashval;
int secondpass;
if (!once) {
vattr_null(&va_null);
once = 1;
}
if (vfc->vfc_version != VFS_VERSION) {
printf("ERROR: filesystem %s, unsupported ABI version %x\n",
vfc->vfc_name, vfc->vfc_version);
return (EINVAL);
}
vfsconf_lock();
if (vfs_byname_locked(vfc->vfc_name) != NULL) {
vfsconf_unlock();
return (EEXIST);
}
if (vfs_typenumhash != 0) {
/*
* Calculate a hash on vfc_name to use for vfc_typenum. Unless
* all of 1<->255 are assigned, it is limited to 8bits since
* that is what ZFS uses from vfc_typenum and is also the
* preferred range for vfs_getnewfsid().
*/
hashval = fnv_32_str(vfc->vfc_name, FNV1_32_INIT);
hashval &= 0xff;
secondpass = 0;
do {
/* Look for and fix any collision. */
TAILQ_FOREACH(tvfc, &vfsconf, vfc_list) {
if (hashval == tvfc->vfc_typenum) {
if (hashval == 255 && secondpass == 0) {
hashval = 1;
secondpass = 1;
} else
hashval++;
break;
}
}
} while (tvfc != NULL);
vfc->vfc_typenum = hashval;
if (vfc->vfc_typenum >= maxvfsconf)
maxvfsconf = vfc->vfc_typenum + 1;
} else
vfc->vfc_typenum = maxvfsconf++;
TAILQ_INSERT_TAIL(&vfsconf, vfc, vfc_list);
/*
* Initialise unused ``struct vfsops'' fields, to use
* the vfs_std*() functions. Note, we need the mount
* and unmount operations, at the least. The check
* for vfsops available is just a debugging aid.
*/
KASSERT(vfc->vfc_vfsops != NULL,
("Filesystem %s has no vfsops", vfc->vfc_name));
/*
* Check the mount and unmount operations.
*/
vfsops = vfc->vfc_vfsops;
KASSERT(vfsops->vfs_mount != NULL,
("Filesystem %s has no mount op", vfc->vfc_name));
KASSERT(vfsops->vfs_unmount != NULL,
("Filesystem %s has no unmount op", vfc->vfc_name));
if (vfsops->vfs_root == NULL)
/* return file system's root vnode */
vfsops->vfs_root = vfs_stdroot;
if (vfsops->vfs_quotactl == NULL)
/* quota control */
vfsops->vfs_quotactl = vfs_stdquotactl;
if (vfsops->vfs_statfs == NULL)
/* return file system's status */
vfsops->vfs_statfs = vfs_stdstatfs;
if (vfsops->vfs_sync == NULL)
/*
* flush unwritten data (nosync)
* file systems can use vfs_stdsync
* explicitly by setting it in the
* vfsop vector.
*/
vfsops->vfs_sync = vfs_stdnosync;
if (vfsops->vfs_vget == NULL)
/* convert an inode number to a vnode */
vfsops->vfs_vget = vfs_stdvget;
if (vfsops->vfs_fhtovp == NULL)
/* turn an NFS file handle into a vnode */
vfsops->vfs_fhtovp = vfs_stdfhtovp;
if (vfsops->vfs_checkexp == NULL)
/* check if file system is exported */
vfsops->vfs_checkexp = vfs_stdcheckexp;
if (vfsops->vfs_init == NULL)
/* file system specific initialisation */
vfsops->vfs_init = vfs_stdinit;
if (vfsops->vfs_uninit == NULL)
/* file system specific uninitialisation */
vfsops->vfs_uninit = vfs_stduninit;
if (vfsops->vfs_extattrctl == NULL)
/* extended attribute control */
vfsops->vfs_extattrctl = vfs_stdextattrctl;
if (vfsops->vfs_sysctl == NULL)
vfsops->vfs_sysctl = vfs_stdsysctl;
if (vfc->vfc_flags & VFCF_JAIL)
prison_add_vfs(vfc);
/*
* Call init function for this VFS...
*/
(*(vfc->vfc_vfsops->vfs_init))(vfc);
vfsconf_unlock();
/*
* If this filesystem has a sysctl node under vfs
* (i.e. vfs.xxfs), then change the oid number of that node to
* match the filesystem's type number. This allows user code
* which uses the type number to read sysctl variables defined
* by the filesystem to continue working. Since the oids are
* in a sorted list, we need to make sure the order is
* preserved by re-registering the oid after modifying its
* number.
*/
sysctl_wlock();
SLIST_FOREACH(oidp, SYSCTL_CHILDREN(&sysctl___vfs), oid_link) {
if (strcmp(oidp->oid_name, vfc->vfc_name) == 0) {
sysctl_unregister_oid(oidp);
oidp->oid_number = vfc->vfc_typenum;
sysctl_register_oid(oidp);
break;
}
}
sysctl_wunlock();
return (0);
}
/* Remove registration of a filesystem type */
static int
vfs_unregister(struct vfsconf *vfc)
{
struct vfsconf *vfsp;
int error, maxtypenum;
vfsconf_lock();
vfsp = vfs_byname_locked(vfc->vfc_name);
if (vfsp == NULL) {
vfsconf_unlock();
return (EINVAL);
}
if (vfsp->vfc_refcount != 0) {
vfsconf_unlock();
return (EBUSY);
}
if (vfc->vfc_vfsops->vfs_uninit != NULL) {
error = (*vfc->vfc_vfsops->vfs_uninit)(vfsp);
if (error != 0) {
vfsconf_unlock();
return (error);
}
}
TAILQ_REMOVE(&vfsconf, vfsp, vfc_list);
maxtypenum = VFS_GENERIC;
TAILQ_FOREACH(vfsp, &vfsconf, vfc_list)
if (maxtypenum < vfsp->vfc_typenum)
maxtypenum = vfsp->vfc_typenum;
maxvfsconf = maxtypenum + 1;
vfsconf_unlock();
return (0);
}
/*
* Standard kernel module handling code for filesystem modules.
* Referenced from VFS_SET().
*/
int
vfs_modevent(module_t mod, int type, void *data)
{
struct vfsconf *vfc;
int error = 0;
vfc = (struct vfsconf *)data;
switch (type) {
case MOD_LOAD:
if (vfc)
error = vfs_register(vfc);
break;
case MOD_UNLOAD:
if (vfc)
error = vfs_unregister(vfc);
break;
default:
error = EOPNOTSUPP;
break;
}
return (error);
}

File diff suppressed because it is too large Load Diff

2052
freebsd/sys/kern/vfs_mount.c Normal file

File diff suppressed because it is too large Load Diff

5719
freebsd/sys/kern/vfs_subr.c Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

2607
freebsd/sys/kern/vfs_vnops.c Normal file

File diff suppressed because it is too large Load Diff

184
freebsd/sys/sys/bio.h Normal file
View File

@ -0,0 +1,184 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)buf.h 8.9 (Berkeley) 3/30/95
* $FreeBSD$
*/
#ifndef _SYS_BIO_H_
#define _SYS_BIO_H_
#include <sys/queue.h>
#include <sys/disk_zone.h>
/* bio_cmd */
#define BIO_READ 0x01 /* Read I/O data */
#define BIO_WRITE 0x02 /* Write I/O data */
#define BIO_DELETE 0x03 /* TRIM or free blocks, i.e. mark as unused */
#define BIO_GETATTR 0x04 /* Get GEOM attributes of object */
#define BIO_FLUSH 0x05 /* Commit outstanding I/O now */
#define BIO_CMD0 0x06 /* Available for local hacks */
#define BIO_CMD1 0x07 /* Available for local hacks */
#define BIO_CMD2 0x08 /* Available for local hacks */
#define BIO_ZONE 0x09 /* Zone command */
/* bio_flags */
#define BIO_ERROR 0x01 /* An error occurred processing this bio. */
#define BIO_DONE 0x02 /* This bio is finished. */
#define BIO_ONQUEUE 0x04 /* This bio is in a queue & not yet taken. */
/*
* This bio must be executed after all previous bios in the queue have been
* executed, and before any successive bios can be executed.
*/
#define BIO_ORDERED 0x08
#define BIO_UNMAPPED 0x10
#define BIO_TRANSIENT_MAPPING 0x20
#define BIO_VLIST 0x40
#ifdef _KERNEL
struct disk;
struct bio;
struct vm_map;
/* Empty classifier tag, to prevent further classification. */
#define BIO_NOTCLASSIFIED (void *)(~0UL)
typedef void bio_task_t(void *);
/*
* The bio structure describes an I/O operation in the kernel.
*/
struct bio {
uint16_t bio_cmd; /* I/O operation. */
uint16_t bio_flags; /* General flags. */
uint16_t bio_cflags; /* Private use by the consumer. */
uint16_t bio_pflags; /* Private use by the provider. */
struct cdev *bio_dev; /* Device to do I/O on. */
struct disk *bio_disk; /* Valid below geom_disk.c only */
off_t bio_offset; /* Offset into file. */
long bio_bcount; /* Valid bytes in buffer. */
caddr_t bio_data; /* Memory, superblocks, indirect etc. */
struct vm_page **bio_ma; /* Or unmapped. */
int bio_ma_offset; /* Offset in the first page of bio_ma. */
int bio_ma_n; /* Number of pages in bio_ma. */
int bio_error; /* Errno for BIO_ERROR. */
long bio_resid; /* Remaining I/O in bytes. */
void (*bio_done)(struct bio *);
void *bio_driver1; /* Private use by the provider. */
void *bio_driver2; /* Private use by the provider. */
void *bio_caller1; /* Private use by the consumer. */
void *bio_caller2; /* Private use by the consumer. */
TAILQ_ENTRY(bio) bio_queue; /* Disksort queue. */
const char *bio_attribute; /* Attribute for BIO_[GS]ETATTR */
struct disk_zone_args bio_zone;/* Used for BIO_ZONE */
struct g_consumer *bio_from; /* GEOM linkage */
struct g_provider *bio_to; /* GEOM linkage */
off_t bio_length; /* Like bio_bcount */
off_t bio_completed; /* Inverse of bio_resid */
u_int bio_children; /* Number of spawned bios */
u_int bio_inbed; /* Children safely home by now */
struct bio *bio_parent; /* Pointer to parent */
struct bintime bio_t0; /* Time request started */
bio_task_t *bio_task; /* Task_queue handler */
void *bio_task_arg; /* Argument to above */
void *bio_classifier1; /* Classifier tag. */
void *bio_classifier2; /* Classifier tag. */
#ifdef DIAGNOSTIC
void *_bio_caller1;
void *_bio_caller2;
uint8_t _bio_cflags;
#endif
#if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
struct buf *bio_track_bp; /* Parent buf for tracking */
#endif
/* XXX: these go away when bio chaining is introduced */
daddr_t bio_pblkno; /* physical block number */
};
struct uio;
struct devstat;
struct bio_queue_head {
TAILQ_HEAD(bio_queue, bio) queue;
off_t last_offset;
struct bio *insert_point;
int total;
int batched;
};
extern struct vm_map *bio_transient_map;
extern int bio_transient_maxcnt;
void biodone(struct bio *bp);
void biofinish(struct bio *bp, struct devstat *stat, int error);
int biowait(struct bio *bp, const char *wchan);
#if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
void biotrack_buf(struct bio *bp, const char *location);
static __inline void
biotrack(struct bio *bp, const char *location)
{
if (bp->bio_track_bp != NULL)
biotrack_buf(bp, location);
}
#else
static __inline void
biotrack(struct bio *bp __unused, const char *location __unused)
{
}
#endif
void bioq_disksort(struct bio_queue_head *ap, struct bio *bp);
struct bio *bioq_first(struct bio_queue_head *head);
struct bio *bioq_takefirst(struct bio_queue_head *head);
void bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error);
void bioq_init(struct bio_queue_head *head);
void bioq_insert_head(struct bio_queue_head *head, struct bio *bp);
void bioq_insert_tail(struct bio_queue_head *head, struct bio *bp);
void bioq_remove(struct bio_queue_head *head, struct bio *bp);
int physio(struct cdev *dev, struct uio *uio, int ioflag);
#define physread physio
#define physwrite physio
#endif /* _KERNEL */
#endif /* !_SYS_BIO_H_ */

226
freebsd/sys/sys/namei.h Normal file
View File

@ -0,0 +1,226 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1985, 1989, 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)namei.h 8.5 (Berkeley) 1/9/95
* $FreeBSD$
*/
#ifndef _SYS_NAMEI_H_
#define _SYS_NAMEI_H_
#include <sys/caprights.h>
#include <sys/filedesc.h>
#include <sys/queue.h>
#include <sys/_uio.h>
struct componentname {
/*
* Arguments to lookup.
*/
u_long cn_nameiop; /* namei operation */
u_int64_t cn_flags; /* flags to namei */
struct thread *cn_thread;/* thread requesting lookup */
struct ucred *cn_cred; /* credentials */
int cn_lkflags; /* Lock flags LK_EXCLUSIVE or LK_SHARED */
/*
* Shared between lookup and commit routines.
*/
char *cn_pnbuf; /* pathname buffer */
char *cn_nameptr; /* pointer to looked up name */
long cn_namelen; /* length of looked up component */
};
struct nameicap_tracker;
TAILQ_HEAD(nameicap_tracker_head, nameicap_tracker);
/*
* Encapsulation of namei parameters.
*/
struct nameidata {
/*
* Arguments to namei/lookup.
*/
const char *ni_dirp; /* pathname pointer */
enum uio_seg ni_segflg; /* location of pathname */
cap_rights_t ni_rightsneeded; /* rights required to look up vnode */
/*
* Arguments to lookup.
*/
struct vnode *ni_startdir; /* starting directory */
struct vnode *ni_rootdir; /* logical root directory */
struct vnode *ni_topdir; /* logical top directory */
int ni_dirfd; /* starting directory for *at functions */
int ni_lcf; /* local call flags */
/*
* Results: returned from namei
*/
struct filecaps ni_filecaps; /* rights the *at base has */
/*
* Results: returned from/manipulated by lookup
*/
struct vnode *ni_vp; /* vnode of result */
struct vnode *ni_dvp; /* vnode of intermediate directory */
/*
* Results: flags returned from namei
*/
u_int ni_resflags;
/*
* Shared between namei and lookup/commit routines.
*/
size_t ni_pathlen; /* remaining chars in path */
char *ni_next; /* next location in pathname */
u_int ni_loopcnt; /* count of symlinks encountered */
/*
* Lookup parameters: this structure describes the subset of
* information from the nameidata structure that is passed
* through the VOP interface.
*/
struct componentname ni_cnd;
struct nameicap_tracker_head ni_cap_tracker;
};
#ifdef _KERNEL
/*
* namei operations
*/
#define LOOKUP 0 /* perform name lookup only */
#define CREATE 1 /* setup for file creation */
#define DELETE 2 /* setup for file deletion */
#define RENAME 3 /* setup for file renaming */
#define OPMASK 3 /* mask for operation */
/*
* namei operational modifier flags, stored in ni_cnd.flags
*/
#define LOCKLEAF 0x0004 /* lock vnode on return */
#define LOCKPARENT 0x0008 /* want parent vnode returned locked */
#define WANTPARENT 0x0010 /* want parent vnode returned unlocked */
#define NOCACHE 0x0020 /* name must not be left in cache */
#define FOLLOW 0x0040 /* follow symbolic links */
#define LOCKSHARED 0x0100 /* Shared lock leaf */
#define NOFOLLOW 0x0000 /* do not follow symbolic links (pseudo) */
#define MODMASK 0x01fc /* mask of operational modifiers */
/*
* Namei parameter descriptors.
*
* SAVENAME may be set by either the callers of namei or by VOP_LOOKUP.
* If the caller of namei sets the flag (for example execve wants to
* know the name of the program that is being executed), then it must
* free the buffer. If VOP_LOOKUP sets the flag, then the buffer must
* be freed by either the commit routine or the VOP_ABORT routine.
* SAVESTART is set only by the callers of namei. It implies SAVENAME
* plus the addition of saving the parent directory that contains the
* name in ni_startdir. It allows repeated calls to lookup for the
* name being sought. The caller is responsible for releasing the
* buffer and for vrele'ing ni_startdir.
*/
#define RDONLY 0x00000200 /* lookup with read-only semantics */
#define HASBUF 0x00000400 /* has allocated pathname buffer */
#define SAVENAME 0x00000800 /* save pathname buffer */
#define SAVESTART 0x00001000 /* save starting directory */
#define ISDOTDOT 0x00002000 /* current component name is .. */
#define MAKEENTRY 0x00004000 /* entry is to be added to name cache */
#define ISLASTCN 0x00008000 /* this is last component of pathname */
#define ISSYMLINK 0x00010000 /* symlink needs interpretation */
#define ISWHITEOUT 0x00020000 /* found whiteout */
#define DOWHITEOUT 0x00040000 /* do whiteouts */
#define WILLBEDIR 0x00080000 /* new files will be dirs; allow trailing / */
#define ISUNICODE 0x00100000 /* current component name is unicode*/
#define ISOPEN 0x00200000 /* caller is opening; return a real vnode. */
#define NOCROSSMOUNT 0x00400000 /* do not cross mount points */
#define NOMACCHECK 0x00800000 /* do not perform MAC checks */
#define AUDITVNODE1 0x04000000 /* audit the looked up vnode information */
#define AUDITVNODE2 0x08000000 /* audit the looked up vnode information */
#define TRAILINGSLASH 0x10000000 /* path ended in a slash */
#define NOCAPCHECK 0x20000000 /* do not perform capability checks */
#define NOEXECCHECK 0x40000000 /* do not perform exec check on dir */
#define PARAMASK 0x7ffffe00 /* mask of parameter descriptors */
/*
* Namei results flags
*/
#define NIRES_ABS 0x00000001 /* Path was absolute */
/*
* Flags in ni_lcf, valid for the duration of the namei call.
*/
#define NI_LCF_STRICTRELATIVE 0x0001 /* relative lookup only */
#define NI_LCF_CAP_DOTDOT 0x0002 /* ".." in strictrelative case */
/*
* Initialization of a nameidata structure.
*/
#define NDINIT(ndp, op, flags, segflg, namep, td) \
NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, NULL, 0, td)
#define NDINIT_AT(ndp, op, flags, segflg, namep, dirfd, td) \
NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, 0, td)
#define NDINIT_ATRIGHTS(ndp, op, flags, segflg, namep, dirfd, rightsp, td) \
NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, rightsp, td)
#define NDINIT_ATVP(ndp, op, flags, segflg, namep, vp, td) \
NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, vp, 0, td)
void NDINIT_ALL(struct nameidata *ndp, u_long op, u_long flags,
enum uio_seg segflg, const char *namep, int dirfd, struct vnode *startdir,
cap_rights_t *rightsp, struct thread *td);
#define NDF_NO_DVP_RELE 0x00000001
#define NDF_NO_DVP_UNLOCK 0x00000002
#define NDF_NO_DVP_PUT 0x00000003
#define NDF_NO_VP_RELE 0x00000004
#define NDF_NO_VP_UNLOCK 0x00000008
#define NDF_NO_VP_PUT 0x0000000c
#define NDF_NO_STARTDIR_RELE 0x00000010
#define NDF_NO_FREE_PNBUF 0x00000020
#define NDF_ONLY_PNBUF (~NDF_NO_FREE_PNBUF)
void NDFREE(struct nameidata *, const u_int);
int namei(struct nameidata *ndp);
int lookup(struct nameidata *ndp);
int relookup(struct vnode *dvp, struct vnode **vpp,
struct componentname *cnp);
#endif
/*
* Stats on usefulness of namei caches.
*/
struct nchstats {
long ncs_goodhits; /* hits that we can really use */
long ncs_neghits; /* negative hits that we can use */
long ncs_badhits; /* hits we must drop */
long ncs_falsehits; /* hits with id mismatch */
long ncs_miss; /* misses */
long ncs_long; /* long names that ignore cache */
long ncs_pass2; /* names found with passes == 2 */
long ncs_2passes; /* number of times we attempt it */
};
extern struct nchstats nchstats;
#endif /* !_SYS_NAMEI_H_ */

152
freebsd/sys/sys/pctrie.h Normal file
View File

@ -0,0 +1,152 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2013 EMC Corp.
* Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
* Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS_PCTRIE_H_
#define _SYS_PCTRIE_H_
#include <sys/_pctrie.h>
#ifdef _KERNEL
#define PCTRIE_DEFINE(name, type, field, allocfn, freefn) \
\
CTASSERT(sizeof(((struct type *)0)->field) == sizeof(uint64_t)); \
/* \
* XXX This assert protects flag bits, it does not enforce natural \
* alignment. 32bit architectures do not naturally align 64bit fields. \
*/ \
CTASSERT((__offsetof(struct type, field) & (sizeof(uint32_t) - 1)) == 0); \
\
static __inline struct type * \
name##_PCTRIE_VAL2PTR(uint64_t *val) \
{ \
\
if (val == NULL) \
return (NULL); \
return (struct type *) \
((uintptr_t)val - __offsetof(struct type, field)); \
} \
\
static __inline uint64_t * \
name##_PCTRIE_PTR2VAL(struct type *ptr) \
{ \
\
return &ptr->field; \
} \
\
static __inline int \
name##_PCTRIE_INSERT(struct pctrie *ptree, struct type *ptr) \
{ \
\
return pctrie_insert(ptree, name##_PCTRIE_PTR2VAL(ptr), \
allocfn); \
} \
\
static __inline struct type * \
name##_PCTRIE_LOOKUP(struct pctrie *ptree, uint64_t key) \
{ \
\
return name##_PCTRIE_VAL2PTR(pctrie_lookup(ptree, key)); \
} \
\
static __inline __unused struct type * \
name##_PCTRIE_LOOKUP_LE(struct pctrie *ptree, uint64_t key) \
{ \
\
return name##_PCTRIE_VAL2PTR(pctrie_lookup_le(ptree, key)); \
} \
\
static __inline __unused struct type * \
name##_PCTRIE_LOOKUP_GE(struct pctrie *ptree, uint64_t key) \
{ \
\
return name##_PCTRIE_VAL2PTR(pctrie_lookup_ge(ptree, key)); \
} \
\
static __inline __unused void \
name##_PCTRIE_RECLAIM(struct pctrie *ptree) \
{ \
\
pctrie_reclaim_allnodes(ptree, freefn); \
} \
\
static __inline void \
name##_PCTRIE_REMOVE(struct pctrie *ptree, uint64_t key) \
{ \
\
pctrie_remove(ptree, key, freefn); \
}
typedef void *(*pctrie_alloc_t)(struct pctrie *ptree);
typedef void (*pctrie_free_t)(struct pctrie *ptree, void *node);
int pctrie_insert(struct pctrie *ptree, uint64_t *val,
pctrie_alloc_t allocfn);
uint64_t *pctrie_lookup(struct pctrie *ptree, uint64_t key);
uint64_t *pctrie_lookup_ge(struct pctrie *ptree, uint64_t key);
uint64_t *pctrie_lookup_le(struct pctrie *ptree, uint64_t key);
void pctrie_reclaim_allnodes(struct pctrie *ptree,
pctrie_free_t freefn);
void pctrie_remove(struct pctrie *ptree, uint64_t key,
pctrie_free_t freefn);
size_t pctrie_node_size(void);
int pctrie_zone_init(void *mem, int size, int flags);
static __inline void
pctrie_init(struct pctrie *ptree)
{
ptree->pt_root = 0;
}
static __inline boolean_t
pctrie_is_empty(struct pctrie *ptree)
{
return (ptree->pt_root == 0);
}
/*
* These widths should allow the pointers to a node's children to fit within
* a single cache line. The extra levels from a narrow width should not be
* a problem thanks to path compression.
*/
#ifdef __LP64__
#define PCTRIE_WIDTH 4
#else
#define PCTRIE_WIDTH 3
#endif
#define PCTRIE_COUNT (1 << PCTRIE_WIDTH)
#endif /* _KERNEL */
#endif /* !_SYS_PCTRIE_H_ */

View File

@ -0,0 +1,317 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2002 Ian Dowse. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS_SYSCALLSUBR_H_
#define _SYS_SYSCALLSUBR_H_
#include <sys/signal.h>
#include <sys/socket.h>
#include <sys/mac.h>
#include <sys/mount.h>
#include <sys/_cpuset.h>
#include <sys/_domainset.h>
#include <sys/_uio.h>
struct __wrusage;
struct file;
struct filecaps;
enum idtype;
struct itimerval;
struct image_args;
struct jail;
struct kevent;
struct kevent_copyops;
struct kld_file_stat;
struct ksiginfo;
struct mbuf;
struct msghdr;
struct msqid_ds;
struct pollfd;
struct ogetdirentries_args;
struct rlimit;
struct rusage;
struct sched_param;
union semun;
struct sockaddr;
struct stat;
struct thr_param;
struct uio;
typedef int (*mmap_check_fp_fn)(struct file *, int, int, int);
int kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg,
size_t buflen, size_t path_max);
int kern_accept(struct thread *td, int s, struct sockaddr **name,
socklen_t *namelen, struct file **fp);
int kern_accept4(struct thread *td, int s, struct sockaddr **name,
socklen_t *namelen, int flags, struct file **fp);
int kern_accessat(struct thread *td, int fd, char *path,
enum uio_seg pathseg, int flags, int mode);
int kern_adjtime(struct thread *td, struct timeval *delta,
struct timeval *olddelta);
int kern_alternate_path(struct thread *td, const char *prefix, const char *path,
enum uio_seg pathseg, char **pathbuf, int create, int dirfd);
int kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa);
int kern_break(struct thread *td, uintptr_t *addr);
int kern_cap_ioctls_limit(struct thread *td, int fd, u_long *cmds,
size_t ncmds);
int kern_cap_rights_limit(struct thread *td, int fd, cap_rights_t *rights);
int kern_chdir(struct thread *td, char *path, enum uio_seg pathseg);
int kern_clock_getcpuclockid2(struct thread *td, id_t id, int which,
clockid_t *clk_id);
int kern_clock_getres(struct thread *td, clockid_t clock_id,
struct timespec *ts);
int kern_clock_gettime(struct thread *td, clockid_t clock_id,
struct timespec *ats);
int kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags,
const struct timespec *rqtp, struct timespec *rmtp);
int kern_clock_settime(struct thread *td, clockid_t clock_id,
struct timespec *ats);
int kern_close(struct thread *td, int fd);
int kern_connectat(struct thread *td, int dirfd, int fd,
struct sockaddr *sa);
int kern_cpuset_getaffinity(struct thread *td, cpulevel_t level,
cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp);
int kern_cpuset_setaffinity(struct thread *td, cpulevel_t level,
cpuwhich_t which, id_t id, size_t cpusetsize,
const cpuset_t *maskp);
int kern_cpuset_getdomain(struct thread *td, cpulevel_t level,
cpuwhich_t which, id_t id, size_t domainsetsize,
domainset_t *maskp, int *policyp);
int kern_cpuset_setdomain(struct thread *td, cpulevel_t level,
cpuwhich_t which, id_t id, size_t domainsetsize,
const domainset_t *maskp, int policy);
int kern_cpuset_getid(struct thread *td, cpulevel_t level,
cpuwhich_t which, id_t id, cpusetid_t *setid);
int kern_cpuset_setid(struct thread *td, cpuwhich_t which,
id_t id, cpusetid_t setid);
int kern_dup(struct thread *td, u_int mode, int flags, int old, int new);
int kern_execve(struct thread *td, struct image_args *args,
struct mac *mac_p);
int kern_fchmodat(struct thread *td, int fd, char *path,
enum uio_seg pathseg, mode_t mode, int flag);
int kern_fchownat(struct thread *td, int fd, char *path,
enum uio_seg pathseg, int uid, int gid, int flag);
int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg);
int kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg);
int kern_fhstat(struct thread *td, fhandle_t fh, struct stat *buf);
int kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf);
int kern_fpathconf(struct thread *td, int fd, int name, long *valuep);
int kern_fstat(struct thread *td, int fd, struct stat *sbp);
int kern_fstatfs(struct thread *td, int fd, struct statfs *buf);
int kern_fsync(struct thread *td, int fd, bool fullsync);
int kern_ftruncate(struct thread *td, int fd, off_t length);
int kern_futimes(struct thread *td, int fd, struct timeval *tptr,
enum uio_seg tptrseg);
int kern_futimens(struct thread *td, int fd, struct timespec *tptr,
enum uio_seg tptrseg);
int kern_getdirentries(struct thread *td, int fd, char *buf, size_t count,
off_t *basep, ssize_t *residp, enum uio_seg bufseg);
int kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
size_t *countp, enum uio_seg bufseg, int mode);
int kern_getitimer(struct thread *, u_int, struct itimerval *);
int kern_getppid(struct thread *);
int kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
socklen_t *alen);
int kern_getrusage(struct thread *td, int who, struct rusage *rup);
int kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
socklen_t *alen);
int kern_getsockopt(struct thread *td, int s, int level, int name,
void *optval, enum uio_seg valseg, socklen_t *valsize);
int kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data);
int kern_jail(struct thread *td, struct jail *j);
int kern_jail_get(struct thread *td, struct uio *options, int flags);
int kern_jail_set(struct thread *td, struct uio *options, int flags);
int kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
struct kevent_copyops *k_ops, const struct timespec *timeout);
int kern_kevent_anonymous(struct thread *td, int nevents,
struct kevent_copyops *k_ops);
int kern_kevent_fp(struct thread *td, struct file *fp, int nchanges,
int nevents, struct kevent_copyops *k_ops,
const struct timespec *timeout);
int kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps);
int kern_kldload(struct thread *td, const char *file, int *fileid);
int kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat);
int kern_kldunload(struct thread *td, int fileid, int flags);
int kern_linkat(struct thread *td, int fd1, int fd2, char *path1,
char *path2, enum uio_seg segflg, int follow);
int kern_listen(struct thread *td, int s, int backlog);
int kern_lseek(struct thread *td, int fd, off_t offset, int whence);
int kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
struct timeval *tptr, enum uio_seg tptrseg);
int kern_madvise(struct thread *td, uintptr_t addr, size_t len, int behav);
int kern_mincore(struct thread *td, uintptr_t addr, size_t len, char *vec);
int kern_mkdirat(struct thread *td, int fd, char *path,
enum uio_seg segflg, int mode);
int kern_mkfifoat(struct thread *td, int fd, char *path,
enum uio_seg pathseg, int mode);
int kern_mknodat(struct thread *td, int fd, char *path,
enum uio_seg pathseg, int mode, dev_t dev);
int kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr,
size_t len);
int kern_mmap(struct thread *td, uintptr_t addr, size_t size, int prot,
int flags, int fd, off_t pos);
int kern_mmap_fpcheck(struct thread *td, uintptr_t addr, size_t len,
int prot, int flags, int fd, off_t pos,
mmap_check_fp_fn check_fp_fn);
int kern_mprotect(struct thread *td, uintptr_t addr, size_t size, int prot);
int kern_msgctl(struct thread *, int, int, struct msqid_ds *);
int kern_msgrcv(struct thread *, int, void *, size_t, long, int, long *);
int kern_msgsnd(struct thread *, int, const void *, size_t, int, long);
int kern_msync(struct thread *td, uintptr_t addr, size_t size, int flags);
int kern_munlock(struct thread *td, uintptr_t addr, size_t size);
int kern_munmap(struct thread *td, uintptr_t addr, size_t size);
int kern_nanosleep(struct thread *td, struct timespec *rqt,
struct timespec *rmt);
int kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
long *ploff);
int kern_openat(struct thread *td, int fd, char *path,
enum uio_seg pathseg, int flags, int mode);
int kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg,
int name, u_long flags, long *valuep);
int kern_pipe(struct thread *td, int fildes[2], int flags,
struct filecaps *fcaps1, struct filecaps *fcaps2);
int kern_poll(struct thread *td, struct pollfd *fds, u_int nfds,
struct timespec *tsp, sigset_t *uset);
int kern_posix_error(struct thread *td, int error);
int kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
int advice);
int kern_posix_fallocate(struct thread *td, int fd, off_t offset,
off_t len);
int kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com,
void *data);
int kern_pread(struct thread *td, int fd, void *buf, size_t nbyte,
off_t offset);
int kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset);
int kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou,
fd_set *ex, struct timeval *tvp, sigset_t *uset, int abi_nfdbits);
int kern_ptrace(struct thread *td, int req, pid_t pid, void *addr,
int data);
int kern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte,
off_t offset);
int kern_pwritev(struct thread *td, int fd, struct uio *auio, off_t offset);
int kern_readlinkat(struct thread *td, int fd, char *path,
enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count);
int kern_readv(struct thread *td, int fd, struct uio *auio);
int kern_recvit(struct thread *td, int s, struct msghdr *mp,
enum uio_seg fromseg, struct mbuf **controlp);
int kern_renameat(struct thread *td, int oldfd, char *old, int newfd,
char *new, enum uio_seg pathseg);
int kern_rmdirat(struct thread *td, int fd, char *path,
enum uio_seg pathseg);
int kern_sched_getparam(struct thread *td, struct thread *targettd,
struct sched_param *param);
int kern_sched_getscheduler(struct thread *td, struct thread *targettd,
int *policy);
int kern_sched_setparam(struct thread *td, struct thread *targettd,
struct sched_param *param);
int kern_sched_setscheduler(struct thread *td, struct thread *targettd,
int policy, struct sched_param *param);
int kern_sched_rr_get_interval(struct thread *td, pid_t pid,
struct timespec *ts);
int kern_sched_rr_get_interval_td(struct thread *td, struct thread *targettd,
struct timespec *ts);
int kern_semctl(struct thread *td, int semid, int semnum, int cmd,
union semun *arg, register_t *rval);
int kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits);
int kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
struct mbuf *control, enum uio_seg segflg);
int kern_setgroups(struct thread *td, u_int ngrp, gid_t *groups);
int kern_setitimer(struct thread *, u_int, struct itimerval *,
struct itimerval *);
int kern_setrlimit(struct thread *, u_int, struct rlimit *);
int kern_setsockopt(struct thread *td, int s, int level, int name,
void *optval, enum uio_seg valseg, socklen_t valsize);
int kern_settimeofday(struct thread *td, struct timeval *tv,
struct timezone *tzp);
int kern_shm_open(struct thread *td, const char *userpath, int flags,
mode_t mode, struct filecaps *fcaps);
int kern_shmat(struct thread *td, int shmid, const void *shmaddr,
int shmflg);
int kern_shmctl(struct thread *td, int shmid, int cmd, void *buf,
size_t *bufsz);
int kern_shutdown(struct thread *td, int s, int how);
int kern_sigaction(struct thread *td, int sig, const struct sigaction *act,
struct sigaction *oact, int flags);
int kern_sigaltstack(struct thread *td, stack_t *ss, stack_t *oss);
int kern_sigprocmask(struct thread *td, int how,
sigset_t *set, sigset_t *oset, int flags);
int kern_sigsuspend(struct thread *td, sigset_t mask);
int kern_sigtimedwait(struct thread *td, sigset_t waitset,
struct ksiginfo *ksi, struct timespec *timeout);
int kern_sigqueue(struct thread *td, pid_t pid, int signum,
union sigval *value);
int kern_socket(struct thread *td, int domain, int type, int protocol);
int kern_statat(struct thread *td, int flag, int fd, char *path,
enum uio_seg pathseg, struct stat *sbp,
void (*hook)(struct vnode *vp, struct stat *sbp));
int kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
struct statfs *buf);
int kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
enum uio_seg segflg);
int kern_ktimer_create(struct thread *td, clockid_t clock_id,
struct sigevent *evp, int *timerid, int preset_id);
int kern_ktimer_delete(struct thread *, int);
int kern_ktimer_settime(struct thread *td, int timer_id, int flags,
struct itimerspec *val, struct itimerspec *oval);
int kern_ktimer_gettime(struct thread *td, int timer_id,
struct itimerspec *val);
int kern_ktimer_getoverrun(struct thread *td, int timer_id);
int kern_thr_alloc(struct proc *, int pages, struct thread **);
int kern_thr_exit(struct thread *td);
int kern_thr_new(struct thread *td, struct thr_param *param);
int kern_thr_suspend(struct thread *td, struct timespec *tsp);
int kern_truncate(struct thread *td, char *path, enum uio_seg pathseg,
off_t length);
int kern_unlinkat(struct thread *td, int fd, char *path,
enum uio_seg pathseg, ino_t oldinum);
int kern_utimesat(struct thread *td, int fd, char *path,
enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg);
int kern_utimensat(struct thread *td, int fd, char *path,
enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg,
int follow);
int kern_wait(struct thread *td, pid_t pid, int *status, int options,
struct rusage *rup);
int kern_wait6(struct thread *td, enum idtype idtype, id_t id, int *status,
int options, struct __wrusage *wrup, siginfo_t *sip);
int kern_writev(struct thread *td, int fd, struct uio *auio);
int kern_socketpair(struct thread *td, int domain, int type, int protocol,
int *rsv);
/* flags for kern_sigaction */
#define KSA_OSIGSET 0x0001 /* uses osigact_t */
#define KSA_FREEBSD4 0x0002 /* uses ucontext4 */
struct freebsd11_dirent;
int freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int
count, long *basep, void (*func)(struct freebsd11_dirent *));
#endif /* !_SYS_SYSCALLSUBR_H_ */

327
freebsd/sys/sys/sysent.h Normal file
View File

@ -0,0 +1,327 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1982, 1988, 1991 The Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS_SYSENT_H_
#define _SYS_SYSENT_H_
#include <bsm/audit.h>
struct rlimit;
struct sysent;
struct thread;
struct ksiginfo;
struct syscall_args;
enum systrace_probe_t {
SYSTRACE_ENTRY,
SYSTRACE_RETURN,
};
typedef int sy_call_t(struct thread *, void *);
typedef void (*systrace_probe_func_t)(struct syscall_args *,
enum systrace_probe_t, int);
typedef void (*systrace_args_func_t)(int, void *, uint64_t *, int *);
#ifdef _KERNEL
extern bool systrace_enabled;
#endif
extern systrace_probe_func_t systrace_probe_func;
struct sysent { /* system call table */
int sy_narg; /* number of arguments */
sy_call_t *sy_call; /* implementing function */
au_event_t sy_auevent; /* audit event associated with syscall */
systrace_args_func_t sy_systrace_args_func;
/* optional argument conversion function. */
u_int32_t sy_entry; /* DTrace entry ID for systrace. */
u_int32_t sy_return; /* DTrace return ID for systrace. */
u_int32_t sy_flags; /* General flags for system calls. */
u_int32_t sy_thrcnt;
};
/*
* A system call is permitted in capability mode.
*/
#define SYF_CAPENABLED 0x00000001
#define SY_THR_FLAGMASK 0x7
#define SY_THR_STATIC 0x1
#define SY_THR_DRAINING 0x2
#define SY_THR_ABSENT 0x4
#define SY_THR_INCR 0x8
#ifdef KLD_MODULE
#define SY_THR_STATIC_KLD 0
#else
#define SY_THR_STATIC_KLD SY_THR_STATIC
#endif
struct image_params;
struct __sigset;
struct trapframe;
struct vnode;
struct sysentvec {
int sv_size; /* number of entries */
struct sysent *sv_table; /* pointer to sysent */
u_int sv_mask; /* optional mask to index */
int sv_errsize; /* size of errno translation table */
const int *sv_errtbl; /* errno translation table */
int (*sv_transtrap)(int, int);
/* translate trap-to-signal mapping */
int (*sv_fixup)(register_t **, struct image_params *);
/* stack fixup function */
void (*sv_sendsig)(void (*)(int), struct ksiginfo *, struct __sigset *);
/* send signal */
char *sv_sigcode; /* start of sigtramp code */
int *sv_szsigcode; /* size of sigtramp code */
char *sv_name; /* name of binary type */
int (*sv_coredump)(struct thread *, struct vnode *, off_t, int);
/* function to dump core, or NULL */
int (*sv_imgact_try)(struct image_params *);
void (*sv_stackgap)(struct image_params *, u_long *);
int sv_minsigstksz; /* minimum signal stack size */
int sv_pagesize; /* spare / no longer used */
vm_offset_t sv_minuser; /* VM_MIN_ADDRESS */
vm_offset_t sv_maxuser; /* VM_MAXUSER_ADDRESS */
vm_offset_t sv_usrstack; /* USRSTACK */
vm_offset_t sv_psstrings; /* PS_STRINGS */
int sv_stackprot; /* vm protection for stack */
register_t *(*sv_copyout_strings)(struct image_params *);
void (*sv_setregs)(struct thread *, struct image_params *,
u_long);
void (*sv_fixlimit)(struct rlimit *, int);
u_long *sv_maxssiz;
u_int sv_flags;
void (*sv_set_syscall_retval)(struct thread *, int);
int (*sv_fetch_syscall_args)(struct thread *);
const char **sv_syscallnames;
vm_offset_t sv_timekeep_base;
vm_offset_t sv_shared_page_base;
vm_offset_t sv_shared_page_len;
vm_offset_t sv_sigcode_base;
void *sv_shared_page_obj;
void (*sv_schedtail)(struct thread *);
void (*sv_thread_detach)(struct thread *);
int (*sv_trap)(struct thread *);
u_long *sv_hwcap; /* Value passed in AT_HWCAP. */
u_long *sv_hwcap2; /* Value passed in AT_HWCAP2. */
};
#define SV_ILP32 0x000100 /* 32-bit executable. */
#define SV_LP64 0x000200 /* 64-bit executable. */
#define SV_IA32 0x004000 /* Intel 32-bit executable. */
#define SV_AOUT 0x008000 /* a.out executable. */
#define SV_SHP 0x010000 /* Shared page. */
#define SV_CAPSICUM 0x020000 /* Force cap_enter() on startup. */
#define SV_TIMEKEEP 0x040000 /* Shared page timehands. */
#define SV_ASLR 0x080000 /* ASLR allowed. */
#define SV_ABI_MASK 0xff
#define SV_ABI_ERRNO(p, e) ((p)->p_sysent->sv_errsize <= 0 ? e : \
((e) >= (p)->p_sysent->sv_errsize ? -1 : (p)->p_sysent->sv_errtbl[e]))
#define SV_PROC_FLAG(p, x) ((p)->p_sysent->sv_flags & (x))
#define SV_PROC_ABI(p) ((p)->p_sysent->sv_flags & SV_ABI_MASK)
#define SV_CURPROC_FLAG(x) SV_PROC_FLAG(curproc, x)
#define SV_CURPROC_ABI() SV_PROC_ABI(curproc)
/* same as ELFOSABI_XXX, to prevent header pollution */
#define SV_ABI_LINUX 3
#define SV_ABI_FREEBSD 9
#define SV_ABI_CLOUDABI 17
#define SV_ABI_UNDEF 255
#ifdef _KERNEL
extern struct sysentvec aout_sysvec;
extern struct sysent sysent[];
extern const char *syscallnames[];
#if defined(__amd64__)
extern int i386_read_exec;
#endif
#define NO_SYSCALL (-1)
struct module;
struct syscall_module_data {
int (*chainevh)(struct module *, int, void *); /* next handler */
void *chainarg; /* arg for next event handler */
int *offset; /* offset into sysent */
struct sysent *new_sysent; /* new sysent */
struct sysent old_sysent; /* old sysent */
int flags; /* flags for syscall_register */
};
/* separate initialization vector so it can be used in a substructure */
#define SYSENT_INIT_VALS(_syscallname) { \
.sy_narg = (sizeof(struct _syscallname ## _args ) \
/ sizeof(register_t)), \
.sy_call = (sy_call_t *)&sys_##_syscallname, \
.sy_auevent = SYS_AUE_##_syscallname, \
.sy_systrace_args_func = NULL, \
.sy_entry = 0, \
.sy_return = 0, \
.sy_flags = 0, \
.sy_thrcnt = 0 \
}
#define MAKE_SYSENT(syscallname) \
static struct sysent syscallname##_sysent = SYSENT_INIT_VALS(syscallname);
#define MAKE_SYSENT_COMPAT(syscallname) \
static struct sysent syscallname##_sysent = { \
(sizeof(struct syscallname ## _args ) \
/ sizeof(register_t)), \
(sy_call_t *)& syscallname, \
SYS_AUE_##syscallname \
}
#define SYSCALL_MODULE(name, offset, new_sysent, evh, arg) \
static struct syscall_module_data name##_syscall_mod = { \
evh, arg, offset, new_sysent, { 0, NULL, AUE_NULL } \
}; \
\
static moduledata_t name##_mod = { \
"sys/" #name, \
syscall_module_handler, \
&name##_syscall_mod \
}; \
DECLARE_MODULE(name, name##_mod, SI_SUB_SYSCALLS, SI_ORDER_MIDDLE)
#define SYSCALL_MODULE_HELPER(syscallname) \
static int syscallname##_syscall = SYS_##syscallname; \
MAKE_SYSENT(syscallname); \
SYSCALL_MODULE(syscallname, \
& syscallname##_syscall, & syscallname##_sysent, \
NULL, NULL)
#define SYSCALL_MODULE_PRESENT(syscallname) \
(sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmnosys && \
sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmressys)
/*
* Syscall registration helpers with resource allocation handling.
*/
struct syscall_helper_data {
struct sysent new_sysent;
struct sysent old_sysent;
int syscall_no;
int registered;
};
#define SYSCALL_INIT_HELPER_F(syscallname, flags) { \
.new_sysent = { \
.sy_narg = (sizeof(struct syscallname ## _args ) \
/ sizeof(register_t)), \
.sy_call = (sy_call_t *)& sys_ ## syscallname, \
.sy_auevent = SYS_AUE_##syscallname, \
.sy_flags = (flags) \
}, \
.syscall_no = SYS_##syscallname \
}
#define SYSCALL_INIT_HELPER_COMPAT_F(syscallname, flags) { \
.new_sysent = { \
.sy_narg = (sizeof(struct syscallname ## _args ) \
/ sizeof(register_t)), \
.sy_call = (sy_call_t *)& syscallname, \
.sy_auevent = SYS_AUE_##syscallname, \
.sy_flags = (flags) \
}, \
.syscall_no = SYS_##syscallname \
}
#define SYSCALL_INIT_HELPER(syscallname) \
SYSCALL_INIT_HELPER_F(syscallname, 0)
#define SYSCALL_INIT_HELPER_COMPAT(syscallname) \
SYSCALL_INIT_HELPER_COMPAT_F(syscallname, 0)
#define SYSCALL_INIT_LAST { \
.syscall_no = NO_SYSCALL \
}
int syscall_module_handler(struct module *mod, int what, void *arg);
int syscall_helper_register(struct syscall_helper_data *sd, int flags);
int syscall_helper_unregister(struct syscall_helper_data *sd);
/* Implementation, exposed for COMPAT code */
int kern_syscall_register(struct sysent *sysents, int *offset,
struct sysent *new_sysent, struct sysent *old_sysent, int flags);
int kern_syscall_deregister(struct sysent *sysents, int offset,
const struct sysent *old_sysent);
int kern_syscall_module_handler(struct sysent *sysents,
struct module *mod, int what, void *arg);
int kern_syscall_helper_register(struct sysent *sysents,
struct syscall_helper_data *sd, int flags);
int kern_syscall_helper_unregister(struct sysent *sysents,
struct syscall_helper_data *sd);
struct proc;
const char *syscallname(struct proc *p, u_int code);
/* Special purpose system call functions. */
struct nosys_args;
int lkmnosys(struct thread *, struct nosys_args *);
int lkmressys(struct thread *, struct nosys_args *);
int _syscall_thread_enter(struct thread *td, struct sysent *se);
void _syscall_thread_exit(struct thread *td, struct sysent *se);
static inline int
syscall_thread_enter(struct thread *td, struct sysent *se)
{
if (__predict_true((se->sy_thrcnt & SY_THR_STATIC) != 0))
return (0);
return (_syscall_thread_enter(td, se));
}
static inline void
syscall_thread_exit(struct thread *td, struct sysent *se)
{
if (__predict_true((se->sy_thrcnt & SY_THR_STATIC) != 0))
return;
_syscall_thread_exit(td, se);
}
int shared_page_alloc(int size, int align);
int shared_page_fill(int size, int align, const void *data);
void shared_page_write(int base, int size, const void *data);
void exec_sysvec_init(void *param);
void exec_inittk(void);
#define INIT_SYSENTVEC(name, sv) \
SYSINIT(name, SI_SUB_EXEC, SI_ORDER_ANY, \
(sysinit_cfunc_t)exec_sysvec_init, sv);
#endif /* _KERNEL */
#endif /* !_SYS_SYSENT_H_ */

145
freebsd/sys/sys/vmem.h Normal file
View File

@ -0,0 +1,145 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c)2006 YAMAMOTO Takashi,
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* From $NetBSD: vmem.h,v 1.20 2013/01/29 21:26:24 para Exp $ */
/* $FreeBSD$ */
#ifndef _SYS_VMEM_H_
#define _SYS_VMEM_H_
#include <sys/types.h>
#ifdef _KERNEL
typedef struct vmem vmem_t;
typedef uintptr_t vmem_addr_t;
typedef size_t vmem_size_t;
#define VMEM_ADDR_MIN 0
#define VMEM_ADDR_QCACHE_MIN 1
#define VMEM_ADDR_MAX (~(vmem_addr_t)0)
typedef int (vmem_import_t)(void *, vmem_size_t, int, vmem_addr_t *);
typedef void (vmem_release_t)(void *, vmem_addr_t, vmem_size_t);
typedef void (vmem_reclaim_t)(vmem_t *, int);
/*
* Create a vmem:
* name - Name of the region
* base - Initial span start (optional)
* size - Initial span size
* quantum - Natural unit of allocation (ie PAGE_SIZE, 1, etc)
* qcache_max - Maximum size to quantum cache. This creates a UMA
* cache for each multiple of quantum up to qcache_max.
* flags - M_* flags
*/
vmem_t *vmem_create(const char *name, vmem_addr_t base,
vmem_size_t size, vmem_size_t quantum, vmem_size_t qcache_max, int flags);
vmem_t *vmem_init(vmem_t *vm, const char *name, vmem_addr_t base,
vmem_size_t size, vmem_size_t quantum, vmem_size_t qcache_max, int flags);
void vmem_destroy(vmem_t *);
/*
* Set callbacks for bringing in dynamic regions:
* importfn - Backing store import routine.
* releasefn - Backing store release routine.
* arg - Backing store argument
* import_quantum - Size to import from backing store
*/
void vmem_set_import(vmem_t *vm, vmem_import_t *importfn,
vmem_release_t *releasefn, void *arg, vmem_size_t import_quantum);
/*
* Set a limit on the total size of a vmem.
*/
void vmem_set_limit(vmem_t *vm, vmem_size_t limit);
/*
* Set a callback for reclaiming memory when space is exhausted:
*/
void vmem_set_reclaim(vmem_t *vm, vmem_reclaim_t *reclaimfn);
/*
* Allocate and free linear regions from a vmem. Must specify
* BESTFIT or FIRSTFIT. Free is non-blocking. These routines
* respect the quantum caches.
*/
int vmem_alloc(vmem_t *vm, vmem_size_t size, int flags, vmem_addr_t *addrp);
void vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size);
/*
* Constrained allocate and free routines. These bypass the quantum cache.
* size - Size in units of 1, not quantum.
* align - Required alignment of the start of region
* phase - Offset from alignment
* nocross - Illegal boundary
* minaddr - Minimum allowed address for last byte
* maxaddr - Maximum allowed address for first byte
* flags - M_* flags
* addrp - result
*/
int vmem_xalloc(vmem_t *vm, vmem_size_t size, vmem_size_t align,
vmem_size_t phase, vmem_size_t nocross, vmem_addr_t minaddr,
vmem_addr_t maxaddr, int flags, vmem_addr_t *addrp);
void vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size);
/*
* Add a static region to a vmem after create. This won't be freed
* until the vmem is destroyed.
*/
int vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int flags);
/*
* Given roundup size to the vmem's native quantum size.
*/
vmem_size_t vmem_roundup_size(vmem_t *vm, vmem_size_t size);
/*
* Report vmem utilization according to the requested type.
*/
vmem_size_t vmem_size(vmem_t *vm, int typemask);
void vmem_whatis(vmem_addr_t addr, int (*fn)(const char *, ...)
__printflike(1, 2));
void vmem_print(vmem_addr_t addr, const char *, int (*fn)(const char *, ...)
__printflike(1, 2));
void vmem_printall(const char *, int (*fn)(const char *, ...)
__printflike(1, 2));
void vmem_startup(void);
/* vmem_size typemask */
#define VMEM_ALLOC 0x01
#define VMEM_FREE 0x02
#define VMEM_MAXFREE 0x10
#endif /* _KERNEL */
#endif /* !_SYS_VMEM_H_ */

561
freebsd/sys/vm/vm_meter.c Normal file
View File

@ -0,0 +1,561 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_meter.c 8.4 (Berkeley) 1/4/94
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resource.h>
#include <sys/rwlock.h>
#include <sys/sx.h>
#include <sys/vmmeter.h>
#include <sys/smp.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_extern.h>
#include <vm/vm_param.h>
#include <vm/vm_phys.h>
#include <vm/vm_pagequeue.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <sys/sysctl.h>
struct vmmeter __read_mostly vm_cnt = {
.v_swtch = EARLY_COUNTER,
.v_trap = EARLY_COUNTER,
.v_syscall = EARLY_COUNTER,
.v_intr = EARLY_COUNTER,
.v_soft = EARLY_COUNTER,
.v_vm_faults = EARLY_COUNTER,
.v_io_faults = EARLY_COUNTER,
.v_cow_faults = EARLY_COUNTER,
.v_cow_optim = EARLY_COUNTER,
.v_zfod = EARLY_COUNTER,
.v_ozfod = EARLY_COUNTER,
.v_swapin = EARLY_COUNTER,
.v_swapout = EARLY_COUNTER,
.v_swappgsin = EARLY_COUNTER,
.v_swappgsout = EARLY_COUNTER,
.v_vnodein = EARLY_COUNTER,
.v_vnodeout = EARLY_COUNTER,
.v_vnodepgsin = EARLY_COUNTER,
.v_vnodepgsout = EARLY_COUNTER,
.v_intrans = EARLY_COUNTER,
.v_reactivated = EARLY_COUNTER,
.v_pdwakeups = EARLY_COUNTER,
.v_pdpages = EARLY_COUNTER,
.v_pdshortfalls = EARLY_COUNTER,
.v_dfree = EARLY_COUNTER,
.v_pfree = EARLY_COUNTER,
.v_tfree = EARLY_COUNTER,
.v_forks = EARLY_COUNTER,
.v_vforks = EARLY_COUNTER,
.v_rforks = EARLY_COUNTER,
.v_kthreads = EARLY_COUNTER,
.v_forkpages = EARLY_COUNTER,
.v_vforkpages = EARLY_COUNTER,
.v_rforkpages = EARLY_COUNTER,
.v_kthreadpages = EARLY_COUNTER,
.v_wire_count = EARLY_COUNTER,
};
static void
vmcounter_startup(void)
{
counter_u64_t *cnt = (counter_u64_t *)&vm_cnt;
COUNTER_ARRAY_ALLOC(cnt, VM_METER_NCOUNTERS, M_WAITOK);
}
SYSINIT(counter, SI_SUB_KMEM, SI_ORDER_FIRST, vmcounter_startup, NULL);
SYSCTL_UINT(_vm, VM_V_FREE_MIN, v_free_min,
CTLFLAG_RW, &vm_cnt.v_free_min, 0, "Minimum low-free-pages threshold");
SYSCTL_UINT(_vm, VM_V_FREE_TARGET, v_free_target,
CTLFLAG_RW, &vm_cnt.v_free_target, 0, "Desired free pages");
SYSCTL_UINT(_vm, VM_V_FREE_RESERVED, v_free_reserved,
CTLFLAG_RW, &vm_cnt.v_free_reserved, 0, "Pages reserved for deadlock");
SYSCTL_UINT(_vm, VM_V_INACTIVE_TARGET, v_inactive_target,
CTLFLAG_RW, &vm_cnt.v_inactive_target, 0, "Pages desired inactive");
SYSCTL_UINT(_vm, VM_V_PAGEOUT_FREE_MIN, v_pageout_free_min,
CTLFLAG_RW, &vm_cnt.v_pageout_free_min, 0, "Min pages reserved for kernel");
SYSCTL_UINT(_vm, OID_AUTO, v_free_severe,
CTLFLAG_RW, &vm_cnt.v_free_severe, 0, "Severe page depletion point");
static int
sysctl_vm_loadavg(SYSCTL_HANDLER_ARGS)
{
#ifdef SCTL_MASK32
u_int32_t la[4];
if (req->flags & SCTL_MASK32) {
la[0] = averunnable.ldavg[0];
la[1] = averunnable.ldavg[1];
la[2] = averunnable.ldavg[2];
la[3] = averunnable.fscale;
return SYSCTL_OUT(req, la, sizeof(la));
} else
#endif
return SYSCTL_OUT(req, &averunnable, sizeof(averunnable));
}
SYSCTL_PROC(_vm, VM_LOADAVG, loadavg, CTLTYPE_STRUCT | CTLFLAG_RD |
CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_loadavg, "S,loadavg",
"Machine loadaverage history");
/*
* This function aims to determine if the object is mapped,
* specifically, if it is referenced by a vm_map_entry. Because
* objects occasionally acquire transient references that do not
* represent a mapping, the method used here is inexact. However, it
* has very low overhead and is good enough for the advisory
* vm.vmtotal sysctl.
*/
static bool
is_object_active(vm_object_t obj)
{
return (obj->ref_count > obj->shadow_count);
}
#if defined(COMPAT_FREEBSD11)
struct vmtotal11 {
int16_t t_rq;
int16_t t_dw;
int16_t t_pw;
int16_t t_sl;
int16_t t_sw;
int32_t t_vm;
int32_t t_avm;
int32_t t_rm;
int32_t t_arm;
int32_t t_vmshr;
int32_t t_avmshr;
int32_t t_rmshr;
int32_t t_armshr;
int32_t t_free;
};
#endif
static int
vmtotal(SYSCTL_HANDLER_ARGS)
{
struct vmtotal total;
#if defined(COMPAT_FREEBSD11)
struct vmtotal11 total11;
#endif
vm_object_t object;
struct proc *p;
struct thread *td;
if (req->oldptr == NULL) {
#if defined(COMPAT_FREEBSD11)
if (curproc->p_osrel < P_OSREL_VMTOTAL64)
return (SYSCTL_OUT(req, NULL, sizeof(total11)));
#endif
return (SYSCTL_OUT(req, NULL, sizeof(total)));
}
bzero(&total, sizeof(total));
/*
* Calculate process statistics.
*/
sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) {
if ((p->p_flag & P_SYSTEM) != 0)
continue;
PROC_LOCK(p);
if (p->p_state != PRS_NEW) {
FOREACH_THREAD_IN_PROC(p, td) {
thread_lock(td);
switch (td->td_state) {
case TDS_INHIBITED:
if (TD_IS_SWAPPED(td))
total.t_sw++;
else if (TD_IS_SLEEPING(td)) {
if (td->td_priority <= PZERO)
total.t_dw++;
else
total.t_sl++;
}
break;
case TDS_CAN_RUN:
total.t_sw++;
break;
case TDS_RUNQ:
case TDS_RUNNING:
total.t_rq++;
break;
default:
break;
}
thread_unlock(td);
}
}
PROC_UNLOCK(p);
}
sx_sunlock(&allproc_lock);
/*
* Calculate object memory usage statistics.
*/
mtx_lock(&vm_object_list_mtx);
TAILQ_FOREACH(object, &vm_object_list, object_list) {
/*
* Perform unsynchronized reads on the object. In
* this case, the lack of synchronization should not
* impair the accuracy of the reported statistics.
*/
if ((object->flags & OBJ_FICTITIOUS) != 0) {
/*
* Devices, like /dev/mem, will badly skew our totals.
*/
continue;
}
if (object->ref_count == 0) {
/*
* Also skip unreferenced objects, including
* vnodes representing mounted file systems.
*/
continue;
}
if (object->ref_count == 1 &&
(object->flags & OBJ_NOSPLIT) != 0) {
/*
* Also skip otherwise unreferenced swap
* objects backing tmpfs vnodes, and POSIX or
* SysV shared memory.
*/
continue;
}
total.t_vm += object->size;
total.t_rm += object->resident_page_count;
if (is_object_active(object)) {
total.t_avm += object->size;
total.t_arm += object->resident_page_count;
}
if (object->shadow_count > 1) {
/* shared object */
total.t_vmshr += object->size;
total.t_rmshr += object->resident_page_count;
if (is_object_active(object)) {
total.t_avmshr += object->size;
total.t_armshr += object->resident_page_count;
}
}
}
mtx_unlock(&vm_object_list_mtx);
total.t_pw = vm_wait_count();
total.t_free = vm_free_count();
#if defined(COMPAT_FREEBSD11)
/* sysctl(8) allocates twice as much memory as reported by sysctl(3) */
if (curproc->p_osrel < P_OSREL_VMTOTAL64 && (req->oldlen ==
sizeof(total11) || req->oldlen == 2 * sizeof(total11))) {
bzero(&total11, sizeof(total11));
total11.t_rq = total.t_rq;
total11.t_dw = total.t_dw;
total11.t_pw = total.t_pw;
total11.t_sl = total.t_sl;
total11.t_sw = total.t_sw;
total11.t_vm = total.t_vm; /* truncate */
total11.t_avm = total.t_avm; /* truncate */
total11.t_rm = total.t_rm; /* truncate */
total11.t_arm = total.t_arm; /* truncate */
total11.t_vmshr = total.t_vmshr; /* truncate */
total11.t_avmshr = total.t_avmshr; /* truncate */
total11.t_rmshr = total.t_rmshr; /* truncate */
total11.t_armshr = total.t_armshr; /* truncate */
total11.t_free = total.t_free; /* truncate */
return (SYSCTL_OUT(req, &total11, sizeof(total11)));
}
#endif
return (SYSCTL_OUT(req, &total, sizeof(total)));
}
SYSCTL_PROC(_vm, VM_TOTAL, vmtotal, CTLTYPE_OPAQUE | CTLFLAG_RD |
CTLFLAG_MPSAFE, NULL, 0, vmtotal, "S,vmtotal",
"System virtual memory statistics");
SYSCTL_NODE(_vm, OID_AUTO, stats, CTLFLAG_RW, 0, "VM meter stats");
static SYSCTL_NODE(_vm_stats, OID_AUTO, sys, CTLFLAG_RW, 0,
"VM meter sys stats");
static SYSCTL_NODE(_vm_stats, OID_AUTO, vm, CTLFLAG_RW, 0,
"VM meter vm stats");
SYSCTL_NODE(_vm_stats, OID_AUTO, misc, CTLFLAG_RW, 0, "VM meter misc stats");
static int
sysctl_handle_vmstat(SYSCTL_HANDLER_ARGS)
{
uint64_t val;
#ifdef COMPAT_FREEBSD11
uint32_t val32;
#endif
val = counter_u64_fetch(*(counter_u64_t *)arg1);
#ifdef COMPAT_FREEBSD11
if (req->oldlen == sizeof(val32)) {
val32 = val; /* truncate */
return (SYSCTL_OUT(req, &val32, sizeof(val32)));
}
#endif
return (SYSCTL_OUT(req, &val, sizeof(val)));
}
#define VM_STATS(parent, var, descr) \
SYSCTL_OID(parent, OID_AUTO, var, CTLTYPE_U64 | CTLFLAG_MPSAFE | \
CTLFLAG_RD, &vm_cnt.var, 0, sysctl_handle_vmstat, "QU", descr)
#define VM_STATS_VM(var, descr) VM_STATS(_vm_stats_vm, var, descr)
#define VM_STATS_SYS(var, descr) VM_STATS(_vm_stats_sys, var, descr)
VM_STATS_SYS(v_swtch, "Context switches");
VM_STATS_SYS(v_trap, "Traps");
VM_STATS_SYS(v_syscall, "System calls");
VM_STATS_SYS(v_intr, "Device interrupts");
VM_STATS_SYS(v_soft, "Software interrupts");
VM_STATS_VM(v_vm_faults, "Address memory faults");
VM_STATS_VM(v_io_faults, "Page faults requiring I/O");
VM_STATS_VM(v_cow_faults, "Copy-on-write faults");
VM_STATS_VM(v_cow_optim, "Optimized COW faults");
VM_STATS_VM(v_zfod, "Pages zero-filled on demand");
VM_STATS_VM(v_ozfod, "Optimized zero fill pages");
VM_STATS_VM(v_swapin, "Swap pager pageins");
VM_STATS_VM(v_swapout, "Swap pager pageouts");
VM_STATS_VM(v_swappgsin, "Swap pages swapped in");
VM_STATS_VM(v_swappgsout, "Swap pages swapped out");
VM_STATS_VM(v_vnodein, "Vnode pager pageins");
VM_STATS_VM(v_vnodeout, "Vnode pager pageouts");
VM_STATS_VM(v_vnodepgsin, "Vnode pages paged in");
VM_STATS_VM(v_vnodepgsout, "Vnode pages paged out");
VM_STATS_VM(v_intrans, "In transit page faults");
VM_STATS_VM(v_reactivated, "Pages reactivated by pagedaemon");
VM_STATS_VM(v_pdwakeups, "Pagedaemon wakeups");
VM_STATS_VM(v_pdshortfalls, "Page reclamation shortfalls");
VM_STATS_VM(v_dfree, "Pages freed by pagedaemon");
VM_STATS_VM(v_pfree, "Pages freed by exiting processes");
VM_STATS_VM(v_tfree, "Total pages freed");
VM_STATS_VM(v_forks, "Number of fork() calls");
VM_STATS_VM(v_vforks, "Number of vfork() calls");
VM_STATS_VM(v_rforks, "Number of rfork() calls");
VM_STATS_VM(v_kthreads, "Number of fork() calls by kernel");
VM_STATS_VM(v_forkpages, "VM pages affected by fork()");
VM_STATS_VM(v_vforkpages, "VM pages affected by vfork()");
VM_STATS_VM(v_rforkpages, "VM pages affected by rfork()");
VM_STATS_VM(v_kthreadpages, "VM pages affected by fork() by kernel");
static int
sysctl_handle_vmstat_proc(SYSCTL_HANDLER_ARGS)
{
u_int (*fn)(void);
uint32_t val;
fn = arg1;
val = fn();
return (SYSCTL_OUT(req, &val, sizeof(val)));
}
#define VM_STATS_PROC(var, descr, fn) \
SYSCTL_OID(_vm_stats_vm, OID_AUTO, var, CTLTYPE_U32 | CTLFLAG_MPSAFE | \
CTLFLAG_RD, fn, 0, sysctl_handle_vmstat_proc, "IU", descr)
#define VM_STATS_UINT(var, descr) \
SYSCTL_UINT(_vm_stats_vm, OID_AUTO, var, CTLFLAG_RD, &vm_cnt.var, 0, descr)
VM_STATS_UINT(v_page_size, "Page size in bytes");
VM_STATS_UINT(v_page_count, "Total number of pages in system");
VM_STATS_UINT(v_free_reserved, "Pages reserved for deadlock");
VM_STATS_UINT(v_free_target, "Pages desired free");
VM_STATS_UINT(v_free_min, "Minimum low-free-pages threshold");
VM_STATS_PROC(v_free_count, "Free pages", vm_free_count);
VM_STATS_PROC(v_wire_count, "Wired pages", vm_wire_count);
VM_STATS_PROC(v_active_count, "Active pages", vm_active_count);
VM_STATS_UINT(v_inactive_target, "Desired inactive pages");
VM_STATS_PROC(v_inactive_count, "Inactive pages", vm_inactive_count);
VM_STATS_PROC(v_laundry_count, "Pages eligible for laundering",
vm_laundry_count);
VM_STATS_UINT(v_pageout_free_min, "Min pages reserved for kernel");
VM_STATS_UINT(v_interrupt_free_min, "Reserved pages for interrupt code");
VM_STATS_UINT(v_free_severe, "Severe page depletion point");
#ifdef COMPAT_FREEBSD11
/*
* Provide compatibility sysctls for the benefit of old utilities which exit
* with an error if they cannot be found.
*/
SYSCTL_UINT(_vm_stats_vm, OID_AUTO, v_cache_count, CTLFLAG_RD,
SYSCTL_NULL_UINT_PTR, 0, "Dummy for compatibility");
SYSCTL_UINT(_vm_stats_vm, OID_AUTO, v_tcached, CTLFLAG_RD,
SYSCTL_NULL_UINT_PTR, 0, "Dummy for compatibility");
#endif
u_int
vm_free_count(void)
{
u_int v;
int i;
v = 0;
for (i = 0; i < vm_ndomains; i++)
v += vm_dom[i].vmd_free_count;
return (v);
}
static u_int
vm_pagequeue_count(int pq)
{
u_int v;
int i;
v = 0;
for (i = 0; i < vm_ndomains; i++)
v += vm_dom[i].vmd_pagequeues[pq].pq_cnt;
return (v);
}
u_int
vm_active_count(void)
{
return (vm_pagequeue_count(PQ_ACTIVE));
}
u_int
vm_inactive_count(void)
{
return (vm_pagequeue_count(PQ_INACTIVE));
}
u_int
vm_laundry_count(void)
{
return (vm_pagequeue_count(PQ_LAUNDRY));
}
static int
sysctl_vm_pdpages(SYSCTL_HANDLER_ARGS)
{
struct vm_pagequeue *pq;
uint64_t ret;
int dom, i;
ret = counter_u64_fetch(vm_cnt.v_pdpages);
for (dom = 0; dom < vm_ndomains; dom++)
for (i = 0; i < PQ_COUNT; i++) {
pq = &VM_DOMAIN(dom)->vmd_pagequeues[i];
ret += pq->pq_pdpages;
}
return (SYSCTL_OUT(req, &ret, sizeof(ret)));
}
SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_pdpages,
CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RD, NULL, 0, sysctl_vm_pdpages, "QU",
"Pages analyzed by pagedaemon");
static void
vm_domain_stats_init(struct vm_domain *vmd, struct sysctl_oid *parent)
{
struct sysctl_oid *oid;
vmd->vmd_oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(parent), OID_AUTO,
vmd->vmd_name, CTLFLAG_RD, NULL, "");
oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(vmd->vmd_oid), OID_AUTO,
"stats", CTLFLAG_RD, NULL, "");
SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"free_count", CTLFLAG_RD, &vmd->vmd_free_count, 0,
"Free pages");
SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"active", CTLFLAG_RD, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_cnt, 0,
"Active pages");
SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"actpdpgs", CTLFLAG_RD,
&vmd->vmd_pagequeues[PQ_ACTIVE].pq_pdpages, 0,
"Active pages scanned by the page daemon");
SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"inactive", CTLFLAG_RD, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_cnt, 0,
"Inactive pages");
SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"inactpdpgs", CTLFLAG_RD,
&vmd->vmd_pagequeues[PQ_INACTIVE].pq_pdpages, 0,
"Inactive pages scanned by the page daemon");
SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"laundry", CTLFLAG_RD, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt, 0,
"laundry pages");
SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"laundpdpgs", CTLFLAG_RD,
&vmd->vmd_pagequeues[PQ_LAUNDRY].pq_pdpages, 0,
"Laundry pages scanned by the page daemon");
SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "unswappable",
CTLFLAG_RD, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_cnt, 0,
"Unswappable pages");
SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"unswppdpgs", CTLFLAG_RD,
&vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_pdpages, 0,
"Unswappable pages scanned by the page daemon");
SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"inactive_target", CTLFLAG_RD, &vmd->vmd_inactive_target, 0,
"Target inactive pages");
SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"free_target", CTLFLAG_RD, &vmd->vmd_free_target, 0,
"Target free pages");
SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"free_reserved", CTLFLAG_RD, &vmd->vmd_free_reserved, 0,
"Reserved free pages");
SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"free_min", CTLFLAG_RD, &vmd->vmd_free_min, 0,
"Minimum free pages");
SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"free_severe", CTLFLAG_RD, &vmd->vmd_free_severe, 0,
"Severe free pages");
}
static void
vm_stats_init(void *arg __unused)
{
struct sysctl_oid *oid;
int i;
oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_vm), OID_AUTO,
"domain", CTLFLAG_RD, NULL, "");
for (i = 0; i < vm_ndomains; i++)
vm_domain_stats_init(VM_DOMAIN(i), oid);
}
SYSINIT(vmstats_init, SI_SUB_VM_CONF, SI_ORDER_FIRST, vm_stats_init, NULL);