UPDATE: Forcible delaying of UFS (soft)updates

Marko Zec zec at tel.fer.hr
Tue Apr 15 11:25:29 PDT 2003


Attached are updated patches (against both 4.8 and 5.0) for delaying
disk buffer synching on softupdates-enabled FS. The original patch
started a rather lengthy debate whether when disk updates are being
delayed the fsync() processing should be delayed as well. As Kirk
McKusick already summarized, some people will prefer partial battery
power savings with working fsync() semantics, while other will desire
greater savings with broken semantics. Therefore as suggested the
updated patch introduces an additional sysctl tunable
vfs.ena_lazy_fsync, which controls whether fsync() calls will be ignored
or not. Note that when vfs.sync_extdelay is set to 0, vfs.ena_lazy_fsync
has no effect, i.e. fsync() always works with standard semantics.

Cheers,

Marko

-------------- next part --------------
--- /usr/src/sys.org/dev/ata/ata-disk.c	Thu Jan 30 08:19:59 2003
+++ dev/ata/ata-disk.c	Sat Apr 12 00:31:26 2003
@@ -294,6 +294,7 @@ adstrategy(struct buf *bp)
     struct ad_softc *adp = bp->b_dev->si_drv1;
     int s;
 
+    stratcalls++;
     if (adp->device->flags & ATA_D_DETACHING) {
 	bp->b_error = ENXIO;
 	bp->b_flags |= B_ERROR;
--- /usr/src/sys.org/kern/vfs_subr.c	Sun Oct 13 18:19:12 2002
+++ kern/vfs_subr.c	Mon Apr 14 23:27:52 2003
@@ -116,6 +116,13 @@ SYSCTL_INT(_vfs, OID_AUTO, reassignbufme
 static int nameileafonly = 0;
 SYSCTL_INT(_vfs, OID_AUTO, nameileafonly, CTLFLAG_RW, &nameileafonly, 0, "");
 
+int stratcalls = 0;
+int sync_extdelay = 0;
+SYSCTL_INT(_vfs, OID_AUTO, sync_extdelay, CTLFLAG_RW, &sync_extdelay, 0, "");
+
+int ena_lazy_fsync = 0;
+SYSCTL_INT(_vfs, OID_AUTO, ena_lazy_fsync, CTLFLAG_RW, &ena_lazy_fsync, 0, "");
+
 #ifdef ENABLE_VFS_IOOPT
 int vfs_ioopt = 0;
 SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, "");
@@ -137,7 +144,7 @@ static vm_zone_t vnode_zone;
  * The workitem queue.
  */
 #define SYNCER_MAXDELAY		32
-static int syncer_maxdelay = SYNCER_MAXDELAY;	/* maximum delay time */
+int syncer_maxdelay = SYNCER_MAXDELAY;	/* maximum delay time */
 time_t syncdelay = 30;		/* max time to delay syncing data */
 time_t filedelay = 30;		/* time to delay syncing files */
 SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, "");
@@ -145,7 +152,7 @@ time_t dirdelay = 29;		/* time to delay 
 SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, "");
 time_t metadelay = 28;		/* time to delay syncing metadata */
 SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, "");
-static int rushjob;			/* number of slots to run ASAP */
+int rushjob;			/* number of slots to run ASAP */
 static int stat_rush_requests;	/* number of times I/O speeded up */
 SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "");
 
@@ -1119,7 +1127,7 @@ sched_sync(void)
 {
 	struct synclist *slp;
 	struct vnode *vp;
-	long starttime;
+	time_t starttime;
 	int s;
 	struct proc *p = updateproc;
 
@@ -1127,8 +1135,6 @@ sched_sync(void)
 	    SHUTDOWN_PRI_LAST);   
 
 	for (;;) {
-		kproc_suspend_loop(p);
-
 		starttime = time_second;
 
 		/*
@@ -1198,8 +1204,25 @@ sched_sync(void)
 		 * matter as we are just trying to generally pace the
 		 * filesystem activity.
 		 */
-		if (time_second == starttime)
+		if (time_second != starttime)
+			continue;
+
+		if (sync_extdelay >= syncer_maxdelay)
+			while (syncer_delayno == 0 && rushjob == 0 &&
+	    		    abs(time_second - starttime) < sync_extdelay) {
+				stratcalls = 0;
 			tsleep(&lbolt, PPAUSE, "syncer", 0);
+				kproc_suspend_loop(p);
+				if (stratcalls != 0 && syncer_maxdelay <
+				    abs(time_second - starttime)) {
+					rushjob = syncer_maxdelay;
+					break;
+				}
+			}
+		else {
+			tsleep(&lbolt, PPAUSE, "syncer", 0);
+			kproc_suspend_loop(p);
+		}
 	}
 }
 
--- /usr/src/sys.org/kern/vfs_syscalls.c	Thu Jan  2 18:26:18 2003
+++ kern/vfs_syscalls.c	Tue Apr 15 13:42:01 2003
@@ -563,6 +563,9 @@ sync(p, uap)
 	register struct mount *mp, *nmp;
 	int asyncflag;
 
+	/* Notify sched_sync() to try flushing syncer_workitem_pending[*] */
+	rushjob += syncer_maxdelay; 
+
 	simple_lock(&mountlist_slock);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
@@ -2627,6 +2630,10 @@ fsync(p, uap)
 	struct file *fp;
 	vm_object_t obj;
 	int error;
+
+	/* Just return if we are artificially delaying disk syncs */
+	if (sync_extdelay && ena_lazy_fsync)
+		return (0);
 
 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 		return (error);
--- /usr/src/sys.org/ufs/ffs/ffs_alloc.c	Fri Sep 21 21:15:21 2001
+++ ufs/ffs/ffs_alloc.c	Sat Apr 12 00:06:20 2003
@@ -125,6 +125,10 @@ ffs_alloc(ip, lbn, bpref, size, cred, bn
 #endif /* DIAGNOSTIC */
 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
 		goto nospace;
+	/* Speedup flushing of syncer_wokitem_pending[*] if low on freespace */
+	if (rushjob == 0 &&
+	    freespace(fs, fs->fs_minfree + 2) - numfrags(fs, size) < 0)
+		rushjob = syncer_maxdelay;
 	if (cred->cr_uid != 0 &&
 	    freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0)
 		goto nospace;
@@ -195,6 +199,10 @@ ffs_realloccg(ip, lbprev, bpref, osize, 
 	if (cred == NOCRED)
 		panic("ffs_realloccg: missing credential");
 #endif /* DIAGNOSTIC */
+	/* Speedup flushing of syncer_wokitem_pending[*] if low on freespace */
+	if (rushjob == 0 &&
+	    freespace(fs, fs->fs_minfree + 2) - numfrags(fs, nsize - osize) < 0)
+		rushjob = syncer_maxdelay;
 	if (cred->cr_uid != 0 &&
 	    freespace(fs, fs->fs_minfree) -  numfrags(fs, nsize - osize) < 0)
 		goto nospace;
--- /usr/src/sys.org/sys/buf.h	Sat Jan 25 20:02:23 2003
+++ sys/buf.h	Sat Apr 12 00:30:48 2003
@@ -478,6 +478,7 @@ extern char	*buffers;		/* The buffer con
 extern int	bufpages;		/* Number of memory pages in the buffer pool. */
 extern struct	buf *swbuf;		/* Swap I/O buffer headers. */
 extern int	nswbuf;			/* Number of swap I/O buffer headers. */
+extern int	stratcalls;		/* I/O ops since last buffer sync */
 extern TAILQ_HEAD(swqueue, buf) bswlist;
 extern TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES];
 
--- /usr/src/sys.org/sys/vnode.h	Sun Dec 29 19:19:53 2002
+++ sys/vnode.h	Mon Apr 14 23:28:36 2003
@@ -294,6 +294,10 @@ extern	struct vm_zone *namei_zone;
 extern	int prtactive;			/* nonzero to call vprint() */
 extern	struct vattr va_null;		/* predefined null vattr structure */
 extern	int vfs_ioopt;
+extern	int rushjob;
+extern	int syncer_maxdelay;
+extern	int sync_extdelay;
+extern	int ena_lazy_fsync;
 
 /*
  * Macro/function to check for client cache inconsistency w.r.t. leasing.
-------------- next part --------------
--- /usr/src/sys.org/dev/ata/ata-disk.c	Sat Nov 16 09:07:36 2002
+++ dev/ata/ata-disk.c	Tue Apr 15 15:23:37 2003
@@ -289,6 +289,7 @@ adstrategy(struct bio *bp)
     struct ad_softc *adp = bp->bio_dev->si_drv1;
     int s;
 
+    stratcalls++;
     if (adp->device->flags & ATA_D_DETACHING) {
 	biofinish(bp, NULL, ENXIO);
 	return;
--- /usr/src/sys.org/kern/vfs_subr.c	Sat Nov 16 09:08:02 2002
+++ kern/vfs_subr.c	Tue Apr 15 15:34:19 2003
@@ -73,6 +73,8 @@
 #include <vm/vm_page.h>
 #include <vm/uma.h>
 
+#define abs(x)                      (((x) < 0) ? -(x) : (x))
+
 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
 
 static void	addalias(struct vnode *vp, dev_t nvp_rdev);
@@ -130,6 +132,13 @@ SYSCTL_INT(_vfs, OID_AUTO, reassignbufca
 static int nameileafonly;
 SYSCTL_INT(_vfs, OID_AUTO, nameileafonly, CTLFLAG_RW, &nameileafonly, 0, "");
 
+int stratcalls = 0;
+int sync_extdelay = 0;
+SYSCTL_INT(_vfs, OID_AUTO, sync_extdelay, CTLFLAG_RW, &sync_extdelay, 0, "");
+
+int ena_lazy_fsync = 0;
+SYSCTL_INT(_vfs, OID_AUTO, ena_lazy_fsync, CTLFLAG_RW, &ena_lazy_fsync, 0, "");
+
 #ifdef ENABLE_VFS_IOOPT
 /* See NOTES for a description of this setting. */
 int vfs_ioopt;
@@ -208,7 +217,7 @@ static struct synclist *syncer_workitem_
 static struct mtx sync_mtx;
 
 #define SYNCER_MAXDELAY		32
-static int syncer_maxdelay = SYNCER_MAXDELAY;	/* maximum delay time */
+int syncer_maxdelay = SYNCER_MAXDELAY;	/* maximum delay time */
 static int syncdelay = 30;		/* max time to delay syncing data */
 static int filedelay = 30;		/* time to delay syncing files */
 SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, "");
@@ -216,7 +225,7 @@ static int dirdelay = 29;		/* time to de
 SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, "");
 static int metadelay = 28;		/* time to delay syncing metadata */
 SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, "");
-static int rushjob;		/* number of slots to run ASAP */
+int rushjob;			/* number of slots to run ASAP */
 static int stat_rush_requests;	/* number of times I/O speeded up */
 SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "");
 
@@ -1669,7 +1678,7 @@ sched_sync(void)
 	struct synclist *slp;
 	struct vnode *vp;
 	struct mount *mp;
-	long starttime;
+	time_t starttime;
 	int s;
 	struct thread *td = FIRST_THREAD_IN_PROC(updateproc);  /* XXXKSE */
 
@@ -1679,8 +1688,6 @@ sched_sync(void)
 	    SHUTDOWN_PRI_LAST);
 
 	for (;;) {
-		kthread_suspend_check(td->td_proc);
-
 		starttime = time_second;
 
 		/*
@@ -1765,8 +1772,25 @@ sched_sync(void)
 		 * matter as we are just trying to generally pace the
 		 * filesystem activity.
 		 */
-		if (time_second == starttime)
+		if (time_second != starttime)
+			continue;
+
+		if (sync_extdelay >= syncer_maxdelay)
+			while (syncer_delayno == 0 && rushjob == 0 &&
+	    		    abs(time_second - starttime) < sync_extdelay) {
+				stratcalls = 0;
 			tsleep(&lbolt, PPAUSE, "syncer", 0);
+				kthread_suspend_check(td->td_proc);
+				if (stratcalls != 0 && syncer_maxdelay <
+				    abs(time_second - starttime)) {
+					rushjob = syncer_maxdelay;
+					break;
+				}
+			}
+		else {
+			tsleep(&lbolt, PPAUSE, "syncer", 0);
+			kthread_suspend_check(td->td_proc);
+		}
 	}
 }
 
--- /usr/src/sys.org/kern/vfs_syscalls.c	Sat Nov 16 09:08:02 2002
+++ kern/vfs_syscalls.c	Tue Apr 15 17:38:55 2003
@@ -123,6 +123,9 @@ sync(td, uap)
 	struct mount *mp, *nmp;
 	int asyncflag;
 
+	/* Notify sched_sync to try flushing dirty buffers */
+	rushjob += syncer_maxdelay;
+
 	mtx_lock(&mountlist_mtx);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
@@ -2704,6 +2707,10 @@ fsync(td, uap)
 	struct file *fp;
 	vm_object_t obj;
 	int error;
+
+	/* Just return if we are artificially delaying disk synchs */
+	if (sync_extdelay && ena_lazy_fsync)
+		return (0);
 
 	GIANT_REQUIRED;
 
--- /usr/src/sys.org/sys/bio.h	Sat Nov 16 09:08:19 2002
+++ sys/bio.h	Tue Apr 15 15:24:20 2003
@@ -134,6 +134,8 @@ bioq_first(struct bio_queue_head *head)
 	return (TAILQ_FIRST(&head->queue));
 }
 
+extern	int	stratcalls;
+
 void biodone(struct bio *bp);
 void biofinish(struct bio *bp, struct devstat *stat, int error);
 int biowait(struct bio *bp, const char *wchan);
--- /usr/src/sys.org/sys/vnode.h	Sat Nov 16 09:08:21 2002
+++ sys/vnode.h	Tue Apr 15 15:23:38 2003
@@ -361,6 +361,10 @@ extern	struct uma_zone *namei_zone;
 extern	int prtactive;			/* nonzero to call vprint() */
 extern	struct vattr va_null;		/* predefined null vattr structure */
 extern	int vfs_ioopt;
+extern	int rushjob;
+extern	int syncer_maxdelay;
+extern	int sync_extdelay;
+extern	int ena_lazy_fsync;
 
 /*
  * Macro/function to check for client cache inconsistency w.r.t. leasing.
--- /usr/src/sys.org/ufs/ffs/ffs_alloc.c	Sat Nov 16 09:08:21 2002
+++ ufs/ffs/ffs_alloc.c	Tue Apr 15 15:26:37 2003
@@ -139,6 +139,10 @@ ffs_alloc(ip, lbn, bpref, size, cred, bn
 #endif /* DIAGNOSTIC */
 	reclaimed = 0;
 retry:
+	/* Speedup flushing of dirty buffers in sched_sync */
+	if (rushjob == 0 &&
+	    freespace(fs, fs->fs_minfree + 2) - numfrags(fs, size) < 0)
+		rushjob = syncer_maxdelay;
 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
 		goto nospace;
 	if (suser_cred(cred, PRISON_ROOT) &&
@@ -222,6 +226,10 @@ ffs_realloccg(ip, lbprev, bprev, bpref, 
 #endif /* DIAGNOSTIC */
 	reclaimed = 0;
 retry:
+	/* Speedup flushing of dirty buffers in sched_sync */
+	if (rushjob == 0 &&
+	    freespace(fs, fs->fs_minfree + 2) - numfrags(fs, nsize - osize) < 0)
+		rushjob = syncer_maxdelay;
 	if (suser_cred(cred, PRISON_ROOT) &&
 	    freespace(fs, fs->fs_minfree) -  numfrags(fs, nsize - osize) < 0)
 		goto nospace;


More information about the freebsd-fs mailing list