svn commit: r333576 - in head/sys: kern sys ufs/ffs
Konstantin Belousov
kib at FreeBSD.org
Sun May 13 09:47:30 UTC 2018
Author: kib
Date: Sun May 13 09:47:28 2018
New Revision: 333576
URL: https://svnweb.freebsd.org/changeset/base/333576
Log:
Detect and optimize reads from the hole on UFS.
- Create getblkx(9) variant of getblk(9) which can return error.
- Add GB_NOSPARSE flag for getblk()/getblkx() which requests that BMAP
was performed before the buffer is created, and EJUSTRETURN returned
in case the requested block does not exist.
- Make ffs_read() use GB_NOSPARSE to avoid instantiating buffer (and
allocating the pages for it), copying from zero_region instead.
The end result is less page allocations and buffer recycling when a
hole is read, which is important for some benchmarks.
Requested and reviewed by: jeff
Tested by: pho
Sponsored by: The FreeBSD Foundation
MFC after: 2 weeks
Differential revision: https://reviews.freebsd.org/D14917
Modified:
head/sys/kern/vfs_bio.c
head/sys/kern/vfs_cluster.c
head/sys/sys/buf.h
head/sys/ufs/ffs/ffs_vnops.c
Modified: head/sys/kern/vfs_bio.c
==============================================================================
--- head/sys/kern/vfs_bio.c Sat May 12 20:00:29 2018 (r333575)
+++ head/sys/kern/vfs_bio.c Sun May 13 09:47:28 2018 (r333576)
@@ -2138,30 +2138,37 @@ breadn_flags(struct vnode *vp, daddr_t blkno, int size
void (*ckhashfunc)(struct buf *), struct buf **bpp)
{
struct buf *bp;
- int readwait, rv;
+ struct thread *td;
+ int error, readwait, rv;
CTR3(KTR_BUF, "breadn(%p, %jd, %d)", vp, blkno, size);
+ td = curthread;
/*
- * Can only return NULL if GB_LOCK_NOWAIT flag is specified.
+ * Can only return NULL if GB_LOCK_NOWAIT or GB_SPARSE flags
+ * are specified.
*/
- *bpp = bp = getblk(vp, blkno, size, 0, 0, flags);
- if (bp == NULL)
- return (EBUSY);
+ error = getblkx(vp, blkno, size, 0, 0, flags, &bp);
+ if (error != 0) {
+ *bpp = NULL;
+ return (error);
+ }
+ flags &= ~GB_NOSPARSE;
+ *bpp = bp;
/*
* If not found in cache, do some I/O
*/
readwait = 0;
if ((bp->b_flags & B_CACHE) == 0) {
- if (!TD_IS_IDLETHREAD(curthread)) {
+ if (!TD_IS_IDLETHREAD(td)) {
#ifdef RACCT
if (racct_enable) {
- PROC_LOCK(curproc);
- racct_add_buf(curproc, bp, 0);
- PROC_UNLOCK(curproc);
+ PROC_LOCK(td->td_proc);
+ racct_add_buf(td->td_proc, bp, 0);
+ PROC_UNLOCK(td->td_proc);
}
#endif /* RACCT */
- curthread->td_ru.ru_inblock++;
+ td->td_ru.ru_inblock++;
}
bp->b_iocmd = BIO_READ;
bp->b_flags &= ~B_INVAL;
@@ -3822,8 +3829,21 @@ has_addr:
}
}
+struct buf *
+getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
+ int flags)
+{
+ struct buf *bp;
+ int error;
+
+ error = getblkx(vp, blkno, size, slpflag, slptimeo, flags, &bp);
+ if (error != 0)
+ return (NULL);
+ return (bp);
+}
+
/*
- * getblk:
+ * getblkx:
*
* Get a block given a specified block and offset into a file/device.
* The buffers B_DONE bit will be cleared on return, making it almost
@@ -3858,12 +3878,13 @@ has_addr:
* intends to issue a READ, the caller must clear B_INVAL and BIO_ERROR
* prior to issuing the READ. biodone() will *not* clear B_INVAL.
*/
-struct buf *
-getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
- int flags)
+int
+getblkx(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
+ int flags, struct buf **bpp)
{
struct buf *bp;
struct bufobj *bo;
+ daddr_t d_blkno;
int bsize, error, maxsize, vmio;
off_t offset;
@@ -3878,6 +3899,7 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int
flags &= ~(GB_UNMAPPED | GB_KVAALLOC);
bo = &vp->v_bufobj;
+ d_blkno = blkno;
loop:
BO_RLOCK(bo);
bp = gbincore(bo, blkno);
@@ -3889,7 +3911,7 @@ loop:
*/
lockflags = LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK;
- if (flags & GB_LOCK_NOWAIT)
+ if ((flags & GB_LOCK_NOWAIT) != 0)
lockflags |= LK_NOWAIT;
error = BUF_TIMELOCK(bp, lockflags,
@@ -3902,8 +3924,8 @@ loop:
if (error == ENOLCK)
goto loop;
/* We timed out or were interrupted. */
- else if (error)
- return (NULL);
+ else if (error != 0)
+ return (error);
/* If recursed, assume caller knows the rules. */
else if (BUF_LOCKRECURSED(bp))
goto end;
@@ -4008,10 +4030,10 @@ loop:
* here.
*/
if (flags & GB_NOCREAT)
- return NULL;
+ return (EEXIST);
if (bdomain[bo->bo_domain].bd_freebuffers == 0 &&
TD_IS_IDLETHREAD(curthread))
- return NULL;
+ return (EBUSY);
bsize = vn_isdisk(vp, NULL) ? DEV_BSIZE : bo->bo_bsize;
KASSERT(bsize != 0, ("bsize == 0, check bo->bo_bsize"));
@@ -4025,11 +4047,22 @@ loop:
flags &= ~(GB_UNMAPPED | GB_KVAALLOC);
}
maxsize = imax(maxsize, bsize);
+ if ((flags & GB_NOSPARSE) != 0 && vmio &&
+ !vn_isdisk(vp, NULL)) {
+ error = VOP_BMAP(vp, blkno, NULL, &d_blkno, 0, 0);
+ KASSERT(error != EOPNOTSUPP,
+ ("GB_NOSPARSE from fs not supporting bmap, vp %p",
+ vp));
+ if (error != 0)
+ return (error);
+ if (d_blkno == -1)
+ return (EJUSTRETURN);
+ }
bp = getnewbuf(vp, slpflag, slptimeo, maxsize, flags);
if (bp == NULL) {
if (slpflag || slptimeo)
- return NULL;
+ return (ETIMEDOUT);
/*
* XXX This is here until the sleep path is diagnosed
* enough to work under very low memory conditions.
@@ -4075,7 +4108,8 @@ loop:
* Insert the buffer into the hash, so that it can
* be found by incore.
*/
- bp->b_blkno = bp->b_lblkno = blkno;
+ bp->b_lblkno = blkno;
+ bp->b_blkno = d_blkno;
bp->b_offset = offset;
bgetvp(vp, bp);
BO_UNLOCK(bo);
@@ -4110,7 +4144,8 @@ end:
buf_track(bp, __func__);
KASSERT(bp->b_bufobj == bo,
("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
- return (bp);
+ *bpp = bp;
+ return (0);
}
/*
Modified: head/sys/kern/vfs_cluster.c
==============================================================================
--- head/sys/kern/vfs_cluster.c Sat May 12 20:00:29 2018 (r333575)
+++ head/sys/kern/vfs_cluster.c Sun May 13 09:47:28 2018 (r333576)
@@ -94,12 +94,14 @@ cluster_read(struct vnode *vp, u_quad_t filesize, dadd
{
struct buf *bp, *rbp, *reqbp;
struct bufobj *bo;
+ struct thread *td;
daddr_t blkno, origblkno;
int maxra, racluster;
int error, ncontig;
int i;
error = 0;
+ td = curthread;
bo = &vp->v_bufobj;
if (!unmapped_buf_allowed)
gbflags &= ~GB_UNMAPPED;
@@ -118,10 +120,14 @@ cluster_read(struct vnode *vp, u_quad_t filesize, dadd
/*
* get the requested block
*/
- *bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0, gbflags);
- if (bp == NULL)
- return (EBUSY);
+ error = getblkx(vp, lblkno, size, 0, 0, gbflags, &bp);
+ if (error != 0) {
+ *bpp = NULL;
+ return (error);
+ }
+ gbflags &= ~GB_NOSPARSE;
origblkno = lblkno;
+ *bpp = reqbp = bp;
/*
* if it is in the cache, then check to see if the reads have been
@@ -243,12 +249,12 @@ cluster_read(struct vnode *vp, u_quad_t filesize, dadd
bstrategy(bp);
#ifdef RACCT
if (racct_enable) {
- PROC_LOCK(curproc);
- racct_add_buf(curproc, bp, 0);
- PROC_UNLOCK(curproc);
+ PROC_LOCK(td->td_proc);
+ racct_add_buf(td->td_proc, bp, 0);
+ PROC_UNLOCK(td->td_proc);
}
#endif /* RACCT */
- curthread->td_ru.ru_inblock++;
+ td->td_ru.ru_inblock++;
}
/*
@@ -303,12 +309,12 @@ cluster_read(struct vnode *vp, u_quad_t filesize, dadd
bstrategy(rbp);
#ifdef RACCT
if (racct_enable) {
- PROC_LOCK(curproc);
- racct_add_buf(curproc, rbp, 0);
- PROC_UNLOCK(curproc);
+ PROC_LOCK(td->td_proc);
+ racct_add_buf(td->td_proc, rbp, 0);
+ PROC_UNLOCK(td->td_proc);
}
#endif /* RACCT */
- curthread->td_ru.ru_inblock++;
+ td->td_ru.ru_inblock++;
}
if (reqbp) {
Modified: head/sys/sys/buf.h
==============================================================================
--- head/sys/sys/buf.h Sat May 12 20:00:29 2018 (r333575)
+++ head/sys/sys/buf.h Sun May 13 09:47:28 2018 (r333576)
@@ -479,6 +479,7 @@ buf_track(struct buf *bp, const char *location)
#define GB_UNMAPPED 0x0008 /* Do not mmap buffer pages. */
#define GB_KVAALLOC 0x0010 /* But allocate KVA. */
#define GB_CKHASH 0x0020 /* If reading, calc checksum hash */
+#define GB_NOSPARSE 0x0040 /* Do not instantiate holes */
#ifdef _KERNEL
extern int nbuf; /* The number of buffer headers */
@@ -540,6 +541,8 @@ struct buf * getpbuf(int *);
struct buf *incore(struct bufobj *, daddr_t);
struct buf *gbincore(struct bufobj *, daddr_t);
struct buf *getblk(struct vnode *, daddr_t, int, int, int, int);
+int getblkx(struct vnode *vp, daddr_t blkno, int size, int slpflag,
+ int slptimeo, int flags, struct buf **bpp);
struct buf *geteblk(int, int);
int bufwait(struct buf *);
int bufwrite(struct buf *);
Modified: head/sys/ufs/ffs/ffs_vnops.c
==============================================================================
--- head/sys/ufs/ffs/ffs_vnops.c Sat May 12 20:00:29 2018 (r333575)
+++ head/sys/ufs/ffs/ffs_vnops.c Sun May 13 09:47:28 2018 (r333576)
@@ -462,6 +462,26 @@ ffs_lock(ap)
#endif
}
+static int
+ffs_read_hole(struct uio *uio, long xfersize, long *size)
+{
+ ssize_t saved_resid, tlen;
+ int error;
+
+ while (xfersize > 0) {
+ tlen = min(xfersize, ZERO_REGION_SIZE);
+ saved_resid = uio->uio_resid;
+ error = vn_io_fault_uiomove(__DECONST(void *, zero_region),
+ tlen, uio);
+ if (error != 0)
+ return (error);
+ tlen = saved_resid - uio->uio_resid;
+ xfersize -= tlen;
+ *size -= tlen;
+ }
+ return (0);
+}
+
/*
* Vnode op for reading.
*/
@@ -483,9 +503,7 @@ ffs_read(ap)
off_t bytesinfile;
long size, xfersize, blkoffset;
ssize_t orig_resid;
- int error;
- int seqcount;
- int ioflag;
+ int bflag, error, ioflag, seqcount;
vp = ap->a_vp;
uio = ap->a_uio;
@@ -529,6 +547,7 @@ ffs_read(ap)
uio->uio_offset >= fs->fs_maxfilesize)
return (EOVERFLOW);
+ bflag = GB_UNMAPPED | (uio->uio_segflg == UIO_NOCOPY ? 0 : GB_NOSPARSE);
for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
break;
@@ -565,8 +584,7 @@ ffs_read(ap)
/*
* Don't do readahead if this is the end of the file.
*/
- error = bread_gb(vp, lbn, size, NOCRED,
- GB_UNMAPPED, &bp);
+ error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
/*
* Otherwise if we are allowed to cluster,
@@ -577,7 +595,7 @@ ffs_read(ap)
*/
error = cluster_read(vp, ip->i_size, lbn,
size, NOCRED, blkoffset + uio->uio_resid,
- seqcount, GB_UNMAPPED, &bp);
+ seqcount, bflag, &bp);
} else if (seqcount > 1) {
/*
* If we are NOT allowed to cluster, then
@@ -589,17 +607,21 @@ ffs_read(ap)
*/
u_int nextsize = blksize(fs, ip, nextlbn);
error = breadn_flags(vp, lbn, size, &nextlbn,
- &nextsize, 1, NOCRED, GB_UNMAPPED, NULL, &bp);
+ &nextsize, 1, NOCRED, bflag, NULL, &bp);
} else {
/*
* Failing all of the above, just read what the
* user asked for. Interestingly, the same as
* the first option above.
*/
- error = bread_gb(vp, lbn, size, NOCRED,
- GB_UNMAPPED, &bp);
+ error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
}
- if (error) {
+ if (error == EJUSTRETURN) {
+ error = ffs_read_hole(uio, xfersize, &size);
+ if (error == 0)
+ continue;
+ }
+ if (error != 0) {
brelse(bp);
bp = NULL;
break;
More information about the svn-src-all
mailing list