git: 5a95a6e8e4d9 - main - nfscl: Use a smaller initial delay time for NFSERR_DELAY

From: Rick Macklem <rmacklem_at_FreeBSD.org>
Date: Tue, 02 Nov 2021 00:24:54 UTC
The branch main has been updated by rmacklem:

URL: https://cgit.FreeBSD.org/src/commit/?id=5a95a6e8e4d9e947b3bb4b4755a7242e1ddd72e7

commit 5a95a6e8e4d9e947b3bb4b4755a7242e1ddd72e7
Author:     Rick Macklem <rmacklem@FreeBSD.org>
AuthorDate: 2021-11-02 00:21:31 +0000
Commit:     Rick Macklem <rmacklem@FreeBSD.org>
CommitDate: 2021-11-02 00:21:31 +0000

    nfscl: Use a smaller initial delay time for NFSERR_DELAY
    
    For NFS RPCs that receive a NFSERR_DELAY reply, the delay time
    is initially 1sec and then increases exponentially to NFS_TRYLATERDEL.
    It was found that this delay time is excessive for some NFSv4
    servers, which work well with a 1msec delay.
    A 1sec delay resulted in very slow performance for Remove and
    Rename when delegations and pNFS were enabled.
    
    This patch decreases the initial delay time to 1msec.
    
    Found during a recent IETF NFSv4 working group testing event.
    
    MFC after:      2 weeks
---
 sys/fs/nfs/nfs_commonkrpc.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/sys/fs/nfs/nfs_commonkrpc.c b/sys/fs/nfs/nfs_commonkrpc.c
index 423ddb52494f..2235f1077a03 100644
--- a/sys/fs/nfs/nfs_commonkrpc.c
+++ b/sys/fs/nfs/nfs_commonkrpc.c
@@ -107,6 +107,10 @@ static int	nfs_reconnects;
 static int	nfs3_jukebox_delay = 10;
 static int	nfs_skip_wcc_data_onerr = 1;
 static int	nfs_dsretries = 2;
+static struct timespec	nfs_trylater_max = {
+	.tv_sec		= NFS_TRYLATERDEL,
+	.tv_nsec	= 0,
+};
 
 SYSCTL_DECL(_vfs_nfs);
 
@@ -584,12 +588,11 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp,
     u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *dssep)
 {
 	uint32_t retseq, retval, slotseq, *tl;
-	time_t waituntil;
 	int i = 0, j = 0, opcnt, set_sigset = 0, slot;
 	int error = 0, usegssname = 0, secflavour = AUTH_SYS;
 	int freeslot, maxslot, reterr, slotpos, timeo;
 	u_int16_t procnum;
-	u_int nextconn, trylater_delay = 1;
+	u_int nextconn;
 	struct nfs_feedback_arg nf;
 	struct timeval timo;
 	AUTH *auth;
@@ -602,7 +605,11 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp,
 	struct nfsclsession *sep;
 	uint8_t sessionid[NFSX_V4SESSIONID];
 	bool nextconn_set;
+	struct timespec trylater_delay, ts, waituntil;
 
+	/* Initially 1msec. */
+	trylater_delay.tv_sec = 0;
+	trylater_delay.tv_nsec = 1000000;
 	sep = dssep;
 	if (xidp != NULL)
 		*xidp = 0;
@@ -1144,12 +1151,19 @@ tryagain:
 			    (nd->nd_repstat == NFSERR_DELAY &&
 			     (nd->nd_flag & ND_NFSV4) == 0) ||
 			    nd->nd_repstat == NFSERR_RESOURCE) {
-				if (trylater_delay > NFS_TRYLATERDEL)
-					trylater_delay = NFS_TRYLATERDEL;
-				waituntil = NFSD_MONOSEC + trylater_delay;
-				while (NFSD_MONOSEC < waituntil)
-					(void) nfs_catnap(PZERO, 0, "nfstry");
-				trylater_delay *= 2;
+				/* Clip at NFS_TRYLATERDEL. */
+				if (timespeccmp(&trylater_delay,
+				    &nfs_trylater_max, >))
+					trylater_delay = nfs_trylater_max;
+				getnanouptime(&waituntil);
+				timespecadd(&waituntil, &trylater_delay,
+				    &waituntil);
+				do {
+					nfs_catnap(PZERO, 0, "nfstry");
+					getnanouptime(&ts);
+				} while (timespeccmp(&ts, &waituntil, <));
+				timespecadd(&trylater_delay, &trylater_delay,
+				    &trylater_delay);	/* Double each time. */
 				if (slot != -1) {
 					mtx_lock(&sep->nfsess_mtx);
 					sep->nfsess_slotseq[slot]++;