svn commit: r224061 - in head/sys/ufs: ffs ufs
Craig Rodrigues
rodrigc at crodrigues.org
Fri Jul 22 20:58:09 UTC 2011
On Fri, Jul 15, 2011 at 9:20 AM, Kirk McKusick <mckusick at freebsd.org> wrote:
> Author: mckusick
> Date: Fri Jul 15 16:20:33 2011
> New Revision: 224061
> URL: http://svn.freebsd.org/changeset/base/224061
>
> Log:
> Add an FFS specific mount option to allow a filesystem checker
> (typically fsck_ffs) to register that it wishes to use FFS specific
> sysctl's to update the filesystem. This ensures that two checkers
> cannot run on a given filesystem at the same time and that no other
> process accidentally or maliciously uses the filesystem updating
> sysctls inappropriately. This functionality is needed by the
> journaling soft-updates recovery code.
>
> Modified:
> head/sys/ufs/ffs/ffs_alloc.c
> head/sys/ufs/ffs/ffs_inode.c
> head/sys/ufs/ffs/ffs_vfsops.c
> head/sys/ufs/ffs/fs.h
> head/sys/ufs/ufs/ufsmount.h
>
> Modified: head/sys/ufs/ffs/ffs_alloc.c
>
> ==============================================================================
> --- head/sys/ufs/ffs/ffs_alloc.c Fri Jul 15 15:56:23 2011
> (r224060)
> +++ head/sys/ufs/ffs/ffs_alloc.c Fri Jul 15 16:20:33 2011
> (r224061)
> @@ -2381,6 +2381,18 @@ ffs_fserr(fs, inum, cp)
> * in the current directory is oldvalue then change it to newvalue.
> * unlink(nameptr, oldvalue) - Verify that the inode number associated
> * with nameptr in the current directory is oldvalue then unlink it.
> + *
> + * The following functions may only be used on a quiescent filesystem
> + * by the soft updates journal. They are not safe to be run on an active
> + * filesystem.
> + *
> + * setinode(inode, dip) - the specified disk inode is replaced with the
> + * contents pointed to by dip.
> + * setbufoutput(fd, flags) - output associated with the specified file
> + * descriptor (which must reference the character device supporting
> + * the filesystem) switches from using physio to running through the
> + * buffer cache when flags is set to 1. The descriptor reverts to
> + * physio for output when flags is set to zero.
> */
>
> static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS);
> @@ -2427,11 +2439,21 @@ static SYSCTL_NODE(_vfs_ffs, FFS_SET_DOT
> static SYSCTL_NODE(_vfs_ffs, FFS_UNLINK, unlink, CTLFLAG_WR,
> sysctl_ffs_fsck, "Unlink a Duplicate Name");
>
> +static SYSCTL_NODE(_vfs_ffs, FFS_SET_INODE, setinode, CTLFLAG_WR,
> + sysctl_ffs_fsck, "Update an On-Disk Inode");
> +
> +static SYSCTL_NODE(_vfs_ffs, FFS_SET_BUFOUTPUT, setbufoutput, CTLFLAG_WR,
> + sysctl_ffs_fsck, "Set Buffered Writing for Descriptor");
> +
> +#define DEBUG 1
> #ifdef DEBUG
> -static int fsckcmds = 0;
> +static int fsckcmds = 1;
> SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, "");
> #endif /* DEBUG */
>
> +static int buffered_write(struct file *, struct uio *, struct ucred *,
> + int, struct thread *);
> +
> static int
> sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
> {
> @@ -2445,8 +2467,10 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
> ufs2_daddr_t blkno;
> long blkcnt, blksize;
> struct filedesc *fdp;
> - struct file *fp;
> + struct file *fp, *vfp;
> int vfslocked, filetype, error;
> + static struct fileops *origops, bufferedops;
> + static int outcnt = 0;
>
> if (req->newlen > sizeof cmd)
> return (EBADRPC);
> @@ -2454,7 +2478,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
> return (error);
> if (cmd.version != FFS_CMD_VERSION)
> return (ERPCMISMATCH);
> - if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0)
> + if ((error = getvnode(td->td_proc->p_fd, cmd.handle, &fp)) != 0)
> return (error);
> vp = fp->f_data;
> if (vp->v_type != VREG && vp->v_type != VDIR) {
> @@ -2467,12 +2491,13 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
> fdrop(fp, td);
> return (EINVAL);
> }
> - if (mp->mnt_flag & MNT_RDONLY) {
> + ump = VFSTOUFS(mp);
> + if ((mp->mnt_flag & MNT_RDONLY) &&
> + ump->um_fsckpid != td->td_proc->p_pid) {
> vn_finished_write(mp);
> fdrop(fp, td);
> return (EROFS);
> }
> - ump = VFSTOUFS(mp);
> fs = ump->um_fs;
> filetype = IFREG;
>
> @@ -2493,7 +2518,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
> case FFS_ADJ_REFCNT:
> #ifdef DEBUG
> if (fsckcmds) {
> - printf("%s: adjust inode %jd count by %jd\n",
> + printf("%s: adjust inode %jd link count by %jd\n",
> mp->mnt_stat.f_mntonname, (intmax_t)cmd.value,
> (intmax_t)cmd.size);
> }
> @@ -2504,7 +2529,8 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
> ip->i_nlink += cmd.size;
> DIP_SET(ip, i_nlink, ip->i_nlink);
> ip->i_effnlink += cmd.size;
> - ip->i_flag |= IN_CHANGE;
> + ip->i_flag |= IN_CHANGE | IN_MODIFIED;
> + error = ffs_update(vp, 1);
> if (DOINGSOFTDEP(vp))
> softdep_change_linkcnt(ip);
> vput(vp);
> @@ -2522,7 +2548,8 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
> break;
> ip = VTOI(vp);
> DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + cmd.size);
> - ip->i_flag |= IN_CHANGE;
> + ip->i_flag |= IN_CHANGE | IN_MODIFIED;
> + error = ffs_update(vp, 1);
> vput(vp);
> break;
>
> @@ -2722,6 +2749,78 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
> UIO_USERSPACE, (ino_t)cmd.size);
> break;
>
> + case FFS_SET_INODE:
> + if (ump->um_fsckpid != td->td_proc->p_pid) {
> + error = EPERM;
> + break;
> + }
> +#ifdef DEBUG
> + if (fsckcmds && outcnt++ < 100) {
> + printf("%s: update inode %jd\n",
> + mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
> + }
> +#endif /* DEBUG */
> + if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE,
> &vp)))
> + break;
> + vfslocked = VFS_LOCK_GIANT(vp->v_mount);
> + AUDIT_ARG_VNODE1(vp);
> + ip = VTOI(vp);
> + if (ip->i_ump->um_fstype == UFS1)
> + error = copyin((void *)(intptr_t)cmd.size,
> ip->i_din1,
> + sizeof(struct ufs1_dinode));
> + else
> + error = copyin((void *)(intptr_t)cmd.size,
> ip->i_din2,
> + sizeof(struct ufs2_dinode));
> + if (error) {
> + vput(vp);
> + VFS_UNLOCK_GIANT(vfslocked);
> + break;
> + }
> + ip->i_flag |= IN_CHANGE | IN_MODIFIED;
> + error = ffs_update(vp, 1);
> + vput(vp);
> + VFS_UNLOCK_GIANT(vfslocked);
> + break;
> +
> + case FFS_SET_BUFOUTPUT:
> + if (ump->um_fsckpid != td->td_proc->p_pid) {
> + error = EPERM;
> + break;
> + }
> + if (VTOI(vp)->i_ump != ump) {
> + error = EINVAL;
> + break;
> + }
> +#ifdef DEBUG
> + if (fsckcmds) {
> + printf("%s: %s buffered output for descriptor
> %jd\n",
> + mp->mnt_stat.f_mntonname,
> + cmd.size == 1 ? "enable" : "disable",
> + (intmax_t)cmd.value);
> + }
> +#endif /* DEBUG */
> + if ((error = getvnode(td->td_proc->p_fd, cmd.value, &vfp))
> != 0)
> + break;
> + if (vfp->f_vnode->v_type != VCHR) {
> + fdrop(vfp, td);
> + error = EINVAL;
> + break;
> + }
> + if (origops == NULL) {
> + origops = vfp->f_ops;
> + bcopy((void *)origops, (void *)&bufferedops,
> + sizeof(bufferedops));
> + bufferedops.fo_write = buffered_write;
> + }
> + if (cmd.size == 1)
> + atomic_store_rel_ptr((volatile uintptr_t
> *)&vfp->f_ops,
> + (uintptr_t)&bufferedops);
> + else
> + atomic_store_rel_ptr((volatile uintptr_t
> *)&vfp->f_ops,
> + (uintptr_t)origops);
> + fdrop(vfp, td);
> + break;
> +
> default:
> #ifdef DEBUG
> if (fsckcmds) {
> @@ -2737,3 +2836,73 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
> vn_finished_write(mp);
> return (error);
> }
> +
> +/*
> + * Function to switch a descriptor to use the buffer cache to stage
> + * its I/O. This is needed so that writes to the filesystem device
> + * will give snapshots a chance to copy modified blocks for which it
> + * needs to retain copies.
> + */
> +static int
> +buffered_write(fp, uio, active_cred, flags, td)
> + struct file *fp;
> + struct uio *uio;
> + struct ucred *active_cred;
> + int flags;
> + struct thread *td;
> +{
>
Please use ANSI style prototypes for new functions.
>
> Modified: head/sys/ufs/ffs/ffs_vfsops.c
>
> ==============================================================================
> --- head/sys/ufs/ffs/ffs_vfsops.c Fri Jul 15 15:56:23 2011
> (r224060)
> +++ head/sys/ufs/ffs/ffs_vfsops.c Fri Jul 15 16:20:33 2011
> (r224061)
> @@ -132,8 +132,8 @@ static struct buf_ops ffs_ops = {
> */
> static const char *ffs_opts[] = { "acls", "async", "noatime",
> "noclusterr",
> "noclusterw", "noexec", "export", "force", "from", "groupquota",
> - "multilabel", "nfsv4acls", "snapshot", "nosuid", "suiddir",
> "nosymfollow",
> - "sync", "union", "userquota", NULL };
> + "multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir",
> + "nosymfollow", "sync", "union", "userquota", NULL };
>
> static int
> ffs_mount(struct mount *mp)
> @@ -142,6 +142,7 @@ ffs_mount(struct mount *mp)
> struct thread *td;
> struct ufsmount *ump = 0;
> struct fs *fs;
> + pid_t fsckpid = 0;
> int error, flags;
> u_int mntorflags;
> accmode_t accmode;
> @@ -184,6 +185,29 @@ ffs_mount(struct mount *mp)
> vfs_deleteopt(mp->mnt_opt, "snapshot");
> }
>
> + if (vfs_getopt(mp->mnt_optnew, "fsckpid", NULL, NULL) == 0 &&
> + vfs_scanopt(mp->mnt_optnew, "fsckpid", "%d", &fsckpid) == 1) {
> + /*
> + * Once we have set the restricted PID, do not
> + * persist "fsckpid" in the options list.
> + */
> + vfs_deleteopt(mp->mnt_optnew, "fsckpid");
> + vfs_deleteopt(mp->mnt_opt, "fsckpid");
> + if (mp->mnt_flag & MNT_UPDATE) {
> + if (VFSTOUFS(mp)->um_fs->fs_ronly == 0 &&
> + vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) ==
> 0) {
> + printf("Checker enable: Must be
> read-only\n");
> + return (EINVAL);
> + }
> + } else if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0)
> {
> + printf("Checker enable: Must be read-only\n");
> + return (EINVAL);
> + }
>
If you have error messages in an nmount() call, please consider
using the vfs_mount_error() function which is in sys/kern/vfs_mount.c.
In src/sbin/mount/mount_fs.c, there is an example of how the "errmsg"
string can be passed into nmount() from userspace into the kernel in order
to
retrieve the value of this string.
--
Craig Rodrigues
rodrigc at crodrigues.org
More information about the svn-src-all
mailing list