git: 56242a4c6566 - main - Add extended attributes

From: Fedor Uporov <fsu_at_FreeBSD.org>
Date: Sun, 29 Jan 2023 08:19:32 UTC
The branch main has been updated by fsu:

URL: https://cgit.FreeBSD.org/src/commit/?id=56242a4c6566580d7612bcb0d9fc4559f3208921

commit 56242a4c6566580d7612bcb0d9fc4559f3208921
Author:     Fedor Uporov <fsu@FreeBSD.org>
AuthorDate: 2022-12-05 17:04:42 +0000
Commit:     Fedor Uporov <fsu@FreeBSD.org>
CommitDate: 2023-01-29 08:13:14 +0000

    Add extended attributes
    
    The extattrs follows semantic of ufs, mean it cannot
    be set to char/block devices and fifos. The attributes
    are allocated using regular malloc with M_WAITOK
    allocation with the own malloc tag M_TMPFSEA. The memory
    consumed by extended attributes is limited to avoid OOM
    triggereing by tmpfs_mount variable tm_ea_memory_max,
    which is set initialy to 16 MB. The extended attributes
    entries are stored as linked list in the tmpfs node.
    The mount point lock is required only under setextattr
    and deleteextattr to update extended attributes
    memory-inuse counter, all other operations are doing
    under vnode lock.
    
    Reviewed by:    kib
    MFC after:      2 week
    Differential revision:  https://reviews.freebsd.org/D38052
---
 share/man/man5/tmpfs.5      |   3 +
 sys/fs/tmpfs/tmpfs.h        |  32 +++++++
 sys/fs/tmpfs/tmpfs_subr.c   |   9 +-
 sys/fs/tmpfs/tmpfs_vfsops.c |  21 +++--
 sys/fs/tmpfs/tmpfs_vnops.c  | 216 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 274 insertions(+), 7 deletions(-)

diff --git a/share/man/man5/tmpfs.5 b/share/man/man5/tmpfs.5
index b934fe02868d..61f3a8505154 100644
--- a/share/man/man5/tmpfs.5
+++ b/share/man/man5/tmpfs.5
@@ -140,6 +140,9 @@ main memory and swap space) will be used.
 .It Cm maxfilesize
 Specifies the maximum file size in bytes.
 Defaults to the maximum possible value.
+.It Cm easize
+Specifies the maximum memory size used by extended attributes in bytes.
+Defaults to 16 megabytes.
 .El
 .Sh EXAMPLES
 Mount a
diff --git a/sys/fs/tmpfs/tmpfs.h b/sys/fs/tmpfs/tmpfs.h
index e9964d7d48af..70ed066ef0f5 100644
--- a/sys/fs/tmpfs/tmpfs.h
+++ b/sys/fs/tmpfs/tmpfs.h
@@ -132,6 +132,20 @@ RB_HEAD(tmpfs_dir, tmpfs_dirent);
 #define	TMPFS_DIRCOOKIE_DUP_MAX		\
 	(TMPFS_DIRCOOKIE_DUP | TMPFS_DIRCOOKIE_MASK)
 
+/*
+ * Internal representation of a tmpfs extended attribute entry.
+ */
+LIST_HEAD(tmpfs_extattr_list, tmpfs_extattr);
+
+struct tmpfs_extattr {
+	LIST_ENTRY(tmpfs_extattr)	ea_extattrs;
+	int			ea_namespace;	/* attr namespace */
+	char			*ea_name;	/* attr name */
+	unsigned char		ea_namelen;	/* attr name length */
+	char			*ea_value;	/* attr value buffer */
+	ssize_t			ea_size;	/* attr value size */
+};
+
 /*
  * Internal representation of a tmpfs file system node.
  *
@@ -239,6 +253,9 @@ struct tmpfs_node {
 	/* Transient refcounter on this node. */
 	u_int		tn_refcount;		/* 0<->1 (m) + (i) */
 
+	/* Extended attributes of this node. */
+	struct tmpfs_extattr_list	tn_extattrs;	/* (v) */
+
 	/* misc data field for different tn_type node */
 	union {
 		/* Valid when tn_type == VBLK || tn_type == VCHR. */
@@ -384,6 +401,12 @@ struct tmpfs_mount {
 	/* Number of nodes currently that are in use. */
 	ino_t			tm_nodes_inuse;
 
+	/* Memory used by extended attributes */
+	uint64_t		tm_ea_memory_inuse;
+
+	/* Maximum memory available for extended attributes */
+	uint64_t		tm_ea_memory_max;
+
 	/* Refcounter on this struct tmpfs_mount. */
 	uint64_t		tm_refcount;
 
@@ -480,6 +503,8 @@ struct tmpfs_dirent *tmpfs_dir_first(struct tmpfs_node *dnode,
 	    struct tmpfs_dir_cursor *dc);
 struct tmpfs_dirent *tmpfs_dir_next(struct tmpfs_node *dnode,
 	    struct tmpfs_dir_cursor *dc);
+bool	tmpfs_pages_check_avail(struct tmpfs_mount *tmp, size_t req_pages);
+void	tmpfs_extattr_free(struct tmpfs_extattr* ea);
 static __inline void
 tmpfs_update(struct vnode *vp)
 {
@@ -518,6 +543,13 @@ tmpfs_update(struct vnode *vp)
 #define TMPFS_PAGES_MINRESERVED		(4 * 1024 * 1024 / PAGE_SIZE)
 #endif
 
+/*
+ * Amount of memory to reserve for extended attributes.
+ */
+#if !defined(TMPFS_EA_MEMORY_RESERVED)
+#define TMPFS_EA_MEMORY_RESERVED	(16 * 1024 * 1024)
+#endif
+
 size_t tmpfs_mem_avail(void);
 size_t tmpfs_pages_used(struct tmpfs_mount *tmp);
 int tmpfs_subr_init(void);
diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c
index 67fb55d2a6a6..ccb9977c39eb 100644
--- a/sys/fs/tmpfs/tmpfs_subr.c
+++ b/sys/fs/tmpfs/tmpfs_subr.c
@@ -434,7 +434,7 @@ tmpfs_pages_used(struct tmpfs_mount *tmp)
 	return (meta_pages + tmp->tm_pages_used);
 }
 
-static bool
+bool
 tmpfs_pages_check_avail(struct tmpfs_mount *tmp, size_t req_pages)
 {
 	if (tmpfs_mem_avail() < req_pages)
@@ -587,6 +587,7 @@ tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, enum vtype type,
 	nnode->tn_mode = mode;
 	nnode->tn_id = alloc_unr64(&tmp->tm_ino_unr);
 	nnode->tn_refcount = 1;
+	LIST_INIT(&nnode->tn_extattrs);
 
 	/* Type-specific initialization. */
 	switch (nnode->tn_type) {
@@ -702,6 +703,7 @@ bool
 tmpfs_free_node_locked(struct tmpfs_mount *tmp, struct tmpfs_node *node,
     bool detach)
 {
+	struct tmpfs_extattr *ea;
 	vm_object_t uobj;
 	char *symlink;
 	bool last;
@@ -748,6 +750,11 @@ tmpfs_free_node_locked(struct tmpfs_mount *tmp, struct tmpfs_node *node,
 	}
 #endif
 
+	while ((ea = LIST_FIRST(&node->tn_extattrs)) != NULL) {
+		LIST_REMOVE(ea, ea_extattrs);
+		tmpfs_extattr_free(ea);
+	}
+
 	switch (node->tn_type) {
 	case VREG:
 		uobj = node->tn_reg.tn_aobj;
diff --git a/sys/fs/tmpfs/tmpfs_vfsops.c b/sys/fs/tmpfs/tmpfs_vfsops.c
index a8382872aa2f..682636d20725 100644
--- a/sys/fs/tmpfs/tmpfs_vfsops.c
+++ b/sys/fs/tmpfs/tmpfs_vfsops.c
@@ -92,12 +92,12 @@ static int	tmpfs_fhtovp(struct mount *, struct fid *, int,
 static int	tmpfs_statfs(struct mount *, struct statfs *);
 
 static const char *tmpfs_opts[] = {
-	"from", "size", "maxfilesize", "inodes", "uid", "gid", "mode", "export",
-	"union", "nonc", "nomtime", NULL
+	"from", "easize", "size", "maxfilesize", "inodes", "uid", "gid", "mode",
+	"export", "union", "nonc", "nomtime", NULL
 };
 
 static const char *tmpfs_updateopts[] = {
-	"from", "export", "nomtime", "size", NULL
+	"from", "easize", "export", "nomtime", "size", NULL
 };
 
 static int
@@ -332,7 +332,7 @@ tmpfs_mount(struct mount *mp)
 	bool nomtime, nonc;
 	/* Size counters. */
 	u_quad_t pages;
-	off_t nodes_max, size_max, maxfilesize;
+	off_t nodes_max, size_max, maxfilesize, ea_max_size;
 
 	/* Root node attributes. */
 	uid_t root_uid;
@@ -360,6 +360,9 @@ tmpfs_mount(struct mount *mp)
 			if (size_max != tmp->tm_size_max)
 				return (EOPNOTSUPP);
 		}
+		if (vfs_getopt_size(mp->mnt_optnew, "easize", &ea_max_size) == 0) {
+			tmp->tm_ea_memory_max = ea_max_size;
+		}
 		if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) &&
 		    !tmp->tm_ronly) {
 			/* RW -> RO */
@@ -405,6 +408,8 @@ tmpfs_mount(struct mount *mp)
 		size_max = 0;
 	if (vfs_getopt_size(mp->mnt_optnew, "maxfilesize", &maxfilesize) != 0)
 		maxfilesize = 0;
+	if (vfs_getopt_size(mp->mnt_optnew, "easize", &ea_max_size) != 0)
+		ea_max_size = 0;
 	nonc = vfs_getopt(mp->mnt_optnew, "nonc", NULL, NULL) == 0;
 	nomtime = vfs_getopt(mp->mnt_optnew, "nomtime", NULL, NULL) == 0;
 
@@ -443,8 +448,11 @@ tmpfs_mount(struct mount *mp)
 	mtx_init(&tmp->tm_allnode_lock, "tmpfs allnode lock", NULL, MTX_DEF);
 	tmp->tm_nodes_max = nodes_max;
 	tmp->tm_nodes_inuse = 0;
+	tmp->tm_ea_memory_inuse = 0;
 	tmp->tm_refcount = 1;
 	tmp->tm_maxfilesize = maxfilesize > 0 ? maxfilesize : OFF_MAX;
+	tmp->tm_ea_memory_max = ea_max_size > 0 ?
+	    ea_max_size : TMPFS_EA_MEMORY_RESERVED;
 	LIST_INIT(&tmp->tm_nodes_used);
 
 	tmp->tm_size_max = size_max;
@@ -708,11 +716,12 @@ db_print_tmpfs(struct mount *mp, struct tmpfs_mount *tmp)
 	    mp->mnt_stat.f_mntonname, tmp);
 	db_printf(
 	    "\tsize max %ju pages max %lu pages used %lu\n"
-	    "\tinodes max %ju inodes inuse %ju refcount %ju\n"
+	    "\tinodes max %ju inodes inuse %ju ea inuse %ju refcount %ju\n"
 	    "\tmaxfilesize %ju r%c %snamecache %smtime\n",
 	    (uintmax_t)tmp->tm_size_max, tmp->tm_pages_max, tmp->tm_pages_used,
 	    (uintmax_t)tmp->tm_nodes_max, (uintmax_t)tmp->tm_nodes_inuse,
-	    (uintmax_t)tmp->tm_refcount, (uintmax_t)tmp->tm_maxfilesize,
+	    (uintmax_t)tmp->tm_ea_memory_inuse, (uintmax_t)tmp->tm_refcount,
+	    (uintmax_t)tmp->tm_maxfilesize,
 	    tmp->tm_ronly ? 'o' : 'w', tmp->tm_nonc ? "no" : "",
 	    tmp->tm_nomtime ? "no" : "");
 }
diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c
index 6a58ffdc0f4b..55f0beebc848 100644
--- a/sys/fs/tmpfs/tmpfs_vnops.c
+++ b/sys/fs/tmpfs/tmpfs_vnops.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/dirent.h>
+#include <sys/extattr.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filio.h>
@@ -79,6 +80,8 @@ SYSCTL_INT(_vfs_tmpfs, OID_AUTO, rename_restarts, CTLFLAG_RD,
     __DEVOLATILE(int *, &tmpfs_rename_restarts), 0,
     "Times rename had to restart due to lock contention");
 
+MALLOC_DEFINE(M_TMPFSEA, "tmpfs extattr", "tmpfs extattr structure");
+
 static int
 tmpfs_vn_get_ino_alloc(struct mount *mp, void *arg, int lkflags,
     struct vnode **rvp)
@@ -1855,6 +1858,215 @@ restart_locked:
 	return (ENOENT);
 }
 
+void
+tmpfs_extattr_free(struct tmpfs_extattr *ea)
+{
+	free(ea->ea_name, M_TMPFSEA);
+	free(ea->ea_value, M_TMPFSEA);
+	free(ea, M_TMPFSEA);
+}
+
+static bool
+tmpfs_extattr_update_mem(struct tmpfs_mount *tmp, ssize_t size)
+{
+	TMPFS_LOCK(tmp);
+	if (size > 0 &&
+	    !tmpfs_pages_check_avail(tmp, howmany(size, PAGE_SIZE))) {
+		TMPFS_UNLOCK(tmp);
+		return (false);
+	}
+	if (tmp->tm_ea_memory_inuse + size > tmp->tm_ea_memory_max) {
+		TMPFS_UNLOCK(tmp);
+		return (false);
+	}
+	tmp->tm_ea_memory_inuse += size;
+	TMPFS_UNLOCK(tmp);
+	return (true);
+}
+
+static int
+tmpfs_deleteextattr(struct vop_deleteextattr_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct tmpfs_mount *tmp;
+	struct tmpfs_node *node;
+	struct tmpfs_extattr *ea;
+	size_t namelen;
+	ssize_t diff;
+	int error;
+
+	node = VP_TO_TMPFS_NODE(vp);
+	tmp = VFS_TO_TMPFS(vp->v_mount);
+	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
+		return (EOPNOTSUPP);
+	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
+	    ap->a_cred, ap->a_td, VWRITE);
+	if (error != 0)
+		return (error);
+	if (ap->a_name == NULL || ap->a_name[0] == '\0')
+		return (EINVAL);
+	namelen = strlen(ap->a_name);
+	if (namelen > EXTATTR_MAXNAMELEN)
+		return (EINVAL);
+
+	LIST_FOREACH(ea, &node->tn_extattrs, ea_extattrs) {
+		if (ea->ea_namespace == ap->a_attrnamespace &&
+		    namelen == ea->ea_namelen &&
+		    memcmp(ap->a_name, ea->ea_name, namelen) == 0)
+			break;
+	}
+
+	if (ea == NULL)
+		return (ENOATTR);
+	LIST_REMOVE(ea, ea_extattrs);
+	diff = -(sizeof(struct tmpfs_extattr) + namelen + ea->ea_size);
+	tmpfs_extattr_update_mem(tmp, diff);
+	tmpfs_extattr_free(ea);
+	return (0);
+}
+
+static int
+tmpfs_getextattr(struct vop_getextattr_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct tmpfs_node *node;
+	struct tmpfs_extattr *ea;
+	size_t namelen;
+	int error;
+
+	node = VP_TO_TMPFS_NODE(vp);
+	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
+		return (EOPNOTSUPP);
+	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
+	    ap->a_cred, ap->a_td, VREAD);
+	if (error != 0)
+		return (error);
+	if (ap->a_name == NULL || ap->a_name[0] == '\0')
+		return (EINVAL);
+	namelen = strlen(ap->a_name);
+	if (namelen > EXTATTR_MAXNAMELEN)
+		return (EINVAL);
+
+	LIST_FOREACH(ea, &node->tn_extattrs, ea_extattrs) {
+		if (ea->ea_namespace == ap->a_attrnamespace &&
+		    namelen == ea->ea_namelen &&
+		    memcmp(ap->a_name, ea->ea_name, namelen) == 0)
+			break;
+	}
+
+	if (ea == NULL)
+		return (ENOATTR);
+	if (ap->a_size != NULL)
+		*ap->a_size = ea->ea_size;
+	if (ap->a_uio != NULL && ea->ea_size != 0)
+		error = uiomove(ea->ea_value, ea->ea_size, ap->a_uio);
+	return (error);
+}
+
+static int
+tmpfs_listextattr(struct vop_listextattr_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct tmpfs_node *node;
+	struct tmpfs_extattr *ea;
+	int error;
+
+	node = VP_TO_TMPFS_NODE(vp);
+	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
+		return (EOPNOTSUPP);
+	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
+	    ap->a_cred, ap->a_td, VREAD);
+	if (error != 0)
+		return (error);
+	if (ap->a_size != NULL)
+		*ap->a_size = 0;
+
+	LIST_FOREACH(ea, &node->tn_extattrs, ea_extattrs) {
+		if (ea->ea_namespace != ap->a_attrnamespace)
+			continue;
+		if (ap->a_size != NULL)
+			*ap->a_size += ea->ea_namelen + 1;
+		if (ap->a_uio != NULL) {
+			error = uiomove(&ea->ea_namelen, 1, ap->a_uio);
+			if (error != 0)
+				break;
+			error = uiomove(ea->ea_name, ea->ea_namelen, ap->a_uio);
+			if (error != 0)
+				break;
+		}
+	}
+
+	return (error);
+}
+
+static int
+tmpfs_setextattr(struct vop_setextattr_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct tmpfs_mount *tmp;
+	struct tmpfs_node *node;
+	struct tmpfs_extattr *ea;
+	struct tmpfs_extattr *new_ea;
+	size_t attr_size;
+	size_t namelen;
+	ssize_t diff;
+	int error;
+
+	node = VP_TO_TMPFS_NODE(vp);
+	tmp = VFS_TO_TMPFS(vp->v_mount);
+	attr_size = ap->a_uio->uio_resid;
+	diff = 0;
+	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
+		return (EOPNOTSUPP);
+	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
+	    ap->a_cred, ap->a_td, VWRITE);
+	if (error != 0)
+		return (error);
+	if (ap->a_name == NULL || ap->a_name[0] == '\0')
+		return (EINVAL);
+	namelen = strlen(ap->a_name);
+	if (namelen > EXTATTR_MAXNAMELEN)
+		return (EINVAL);
+
+	LIST_FOREACH(ea, &node->tn_extattrs, ea_extattrs) {
+		if (ea->ea_namespace == ap->a_attrnamespace &&
+		    namelen == ea->ea_namelen &&
+		    memcmp(ap->a_name, ea->ea_name, namelen) == 0) {
+			diff -= sizeof(struct tmpfs_extattr) + ea->ea_namelen +
+			    ea->ea_size;
+			break;
+		}
+	}
+
+	diff += sizeof(struct tmpfs_extattr) + namelen + attr_size;
+	if (!tmpfs_extattr_update_mem(tmp, diff))
+		return (ENOSPC);
+	new_ea = malloc(sizeof(struct tmpfs_extattr), M_TMPFSEA, M_WAITOK);
+	new_ea->ea_namespace = ap->a_attrnamespace;
+	new_ea->ea_name = malloc(namelen, M_TMPFSEA, M_WAITOK);
+	new_ea->ea_namelen = namelen;
+	memcpy(new_ea->ea_name, ap->a_name, namelen);
+	if (attr_size != 0) {
+		new_ea->ea_value = malloc(attr_size, M_TMPFSEA, M_WAITOK);
+		new_ea->ea_size = attr_size;
+		error = uiomove(new_ea->ea_value, attr_size, ap->a_uio);
+	} else {
+		new_ea->ea_value = NULL;
+		new_ea->ea_size = 0;
+	}
+	if (error != 0) {
+		tmpfs_extattr_update_mem(tmp, -diff);
+		tmpfs_extattr_free(new_ea);
+		return (error);
+	}
+	if (ea != NULL) {
+		LIST_REMOVE(ea, ea_extattrs);
+		tmpfs_extattr_free(ea);
+	}
+	LIST_INSERT_HEAD(&node->tn_extattrs, new_ea, ea_extattrs);
+	return (0);
+}
+
 static off_t
 tmpfs_seek_data_locked(vm_object_t obj, off_t noff)
 {
@@ -2022,6 +2234,10 @@ struct vop_vector tmpfs_vnodeop_entries = {
 	.vop_lock1 =			vop_lock,
 	.vop_unlock = 			vop_unlock,
 	.vop_islocked = 		vop_islocked,
+	.vop_deleteextattr =		tmpfs_deleteextattr,
+	.vop_getextattr =		tmpfs_getextattr,
+	.vop_listextattr =		tmpfs_listextattr,
+	.vop_setextattr =		tmpfs_setextattr,
 	.vop_add_writecount =		vop_stdadd_writecount_nomsync,
 	.vop_ioctl =			tmpfs_ioctl,
 };