fdescfs brokenness

Kostik Belousov kostikbel at gmail.com
Sat May 9 13:34:16 UTC 2009


On Sat, May 09, 2009 at 01:05:31AM +0200, Jilles Tjoelker wrote:
> On Fri, May 08, 2009 at 11:12:03PM +0300, Kostik Belousov wrote:
> > On Thu, May 07, 2009 at 08:07:46PM -0700, Tim Kientzle wrote:
> > > Colin Percival recently pointed out some issues
> > > with tar and fdescfs.  Part of the problem
> > > here is tar; I need to rethink some of the
> > > traversal logic.
> 
> > > But fdescfs is really wonky:
> 
> > >  * This is a nit, but:  ls /dev/fd/18 should not
> > >    return EBADF; it should return ENOENT, just
> > >    like any other reference to a non-existent filename.
> > >    (Just because a filename reflects a file descriptor
> > >    does not mean it is a file descriptor.)
> > This is a traditional behaviour for fdescfs. According to man page,
> > open("dev/fd/N") shall be equivalent to fcntl(N, F_DUPFD, 0).
> > Solaris behaviour is the same.
> 
> On open, yes, but stat behaves differently on a Solaris 10 machine here.
> A valid but unallocated fd number will still stat as a character
> device, like an allocated fd.
> 
> % ls -l /dev/fd/0 /dev/fd/999
> crw-rw-rw-   1 root     root     320,  0 May  9 00:06 /dev/fd/0
> crw-rw-rw-   1 root     root     320, 999 May  9 00:06 /dev/fd/999
Yes, this makes sense.

> 
> By the way, both FreeBSD and Solaris also behave strangely if you try to
> access fd numbers 1<<32 or higher.
The strangeness is purely comsetical, in my opinion, but still.

> 
> Linux seems to behave strangely as well: the fds show up as symlinks,
> some of which do not contain valid file names but can still be opened.
> However, a command like
>   { read x <&5; read y </dev/fd/5; read z </dev/fd/5; echo $x $y $z; :; } 5<~/.zshrc
> which shows the first three lines under FreeBSD and Solaris,
> shows the first line three times under Linux, so apparently it does not
> duplicate file descriptors (at least in some cases).

> I think it should be possible to write a directory walker program using
> only standard interfaces.

For standard-compiant fses, yes. AFAIR POSIX does not make any claims
for the whole namespace.

I did liked the idea of turning fdescfs nodes to character devices for
stat(2). Besides fixing the issue, it also prevents recursive descent
into the vfs, preventing the LOR.

Being there, added check for the overflow.

diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
index d1788ae..9846357 100644
--- a/sys/fs/fdescfs/fdesc_vnops.c
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -264,7 +264,7 @@ fdesc_lookup(ap)
 	struct thread *td = cnp->cn_thread;
 	struct file *fp;
 	int nlen = cnp->cn_namelen;
-	u_int fd;
+	u_int fd, fd1;
 	int error;
 	struct vnode *fvp;
 
@@ -296,7 +296,12 @@ fdesc_lookup(ap)
 			error = ENOENT;
 			goto bad;
 		}
-		fd = 10 * fd + *pname++ - '0';
+		fd1 = 10 * fd + *pname++ - '0';
+		if (fd1 < fd) {
+			error = ENOENT;
+			goto bad;
+		}
+		fd = fd1;
 	}
 
 	if ((error = fget(td, fd, &fp)) != 0)
@@ -383,78 +388,34 @@ fdesc_getattr(ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
-	struct thread *td = curthread;
-	struct file *fp;
-	struct stat stb;
-	u_int fd;
-	int error = 0;
+
+	vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+	vap->va_fileid = VTOFDESC(vp)->fd_ix;
+	vap->va_uid = 0;
+	vap->va_gid = 0;
+	vap->va_blocksize = DEV_BSIZE;
+	vap->va_atime.tv_sec = boottime.tv_sec;
+	vap->va_atime.tv_nsec = 0;
+	vap->va_mtime = vap->va_atime;
+	vap->va_ctime = vap->va_mtime;
+	vap->va_gen = 0;
+	vap->va_flags = 0;
+	vap->va_bytes = 0;
+	vap->va_filerev = 0;
 
 	switch (VTOFDESC(vp)->fd_type) {
 	case Froot:
-		vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
 		vap->va_type = VDIR;
 		vap->va_nlink = 2;
 		vap->va_size = DEV_BSIZE;
-		vap->va_fileid = VTOFDESC(vp)->fd_ix;
-		vap->va_uid = 0;
-		vap->va_gid = 0;
-		vap->va_blocksize = DEV_BSIZE;
-		vap->va_atime.tv_sec = boottime.tv_sec;
-		vap->va_atime.tv_nsec = 0;
-		vap->va_mtime = vap->va_atime;
-		vap->va_ctime = vap->va_mtime;
-		vap->va_gen = 0;
-		vap->va_flags = 0;
 		vap->va_rdev = NODEV;
-		vap->va_bytes = 0;
-		vap->va_filerev = 0;
 		break;
 
 	case Fdesc:
-		fd = VTOFDESC(vp)->fd_fd;
-
-		if ((error = fget(td, fd, &fp)) != 0)
-			return (error);
-
-		bzero(&stb, sizeof(stb));
-		error = fo_stat(fp, &stb, td->td_ucred, td);
-		fdrop(fp, td);
-		if (error == 0) {
-			vap->va_type = IFTOVT(stb.st_mode);
-			vap->va_mode = stb.st_mode;
-			if (vap->va_type == VDIR)
-				vap->va_mode &= ~(S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH);
-			vap->va_nlink = 1;
-			vap->va_flags = 0;
-			vap->va_bytes = stb.st_blocks * stb.st_blksize;
-			vap->va_fileid = VTOFDESC(vp)->fd_ix;
-			vap->va_size = stb.st_size;
-			vap->va_blocksize = stb.st_blksize;
-			vap->va_rdev = stb.st_rdev;
-
-			/*
-			 * If no time data is provided, use the current time.
-			 */
-			if (stb.st_atimespec.tv_sec == 0 &&
-			    stb.st_atimespec.tv_nsec == 0)
-				nanotime(&stb.st_atimespec);
-
-			if (stb.st_ctimespec.tv_sec == 0 &&
-			    stb.st_ctimespec.tv_nsec == 0)
-				nanotime(&stb.st_ctimespec);
-
-			if (stb.st_mtimespec.tv_sec == 0 &&
-			    stb.st_mtimespec.tv_nsec == 0)
-				nanotime(&stb.st_mtimespec);
-
-			vap->va_atime = stb.st_atimespec;
-			vap->va_mtime = stb.st_mtimespec;
-			vap->va_ctime = stb.st_ctimespec;
-			vap->va_uid = stb.st_uid;
-			vap->va_gid = stb.st_gid;
-			vap->va_gen = 0;
-			vap->va_filerev = 0;
-		}
+		vap->va_type = VCHR;
+		vap->va_nlink = 1;
+		vap->va_size = 0;
+		vap->va_rdev = makedev(0, vap->va_fileid);
 		break;
 
 	default:
@@ -462,9 +423,8 @@ fdesc_getattr(ap)
 		break;
 	}
 
-	if (error == 0)
-		vp->v_type = vap->va_type;
-	return (error);
+	vp->v_type = vap->va_type;
+	return (0);
 }
 
 static int
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-hackers/attachments/20090509/cc40a48a/attachment.pgp


More information about the freebsd-hackers mailing list