message buffer scrambling fix

Kenneth D. Merry ken at FreeBSD.org
Fri May 27 23:07:46 UTC 2011


Hey folks,

I have attached some patches to the kernel message buffer code (this
affects dmesg(8) output as well as kernel messages that go to the syslog)
to address log scrambling.

This fixes the same issue that 'options PRINTF_BUFR_SIZE=128' fixes for the
console.

The problem is that you can have multiple kernel threads writing to the
message buffer at the same time, and so their characters will get
interleaved.  All of the characters will get in there, because they're
written with atomic operations, but the output might looked scrambled.

So the fix is to use the same stack buffer that is used for the console
output (so the stack size doesn't increase), and use a spin lock instead of
atomic operations to insert the string into the message buffer.

The result is that dmesg and syslog output should look the same as the
console output.  As long as individual kernel prints fit in the printf
buffer size, they will be put into the message buffer atomically.

I also fixed a couple of other long-standing issues.  putcons() (in
subr_prf.c) was adding a carriage return before calling cnputs().  But
cnputs() calls cnputc(), which adds a carriage return before every newline.
So much of the console output (the part that came from putcons() at least)
had two carriage returns at the end.

The other issue was that log_console() was inserting a newline for any
console write that didn't already have one at the end.  The issue with that
can be seen if you do a 'dmesg -a' and compare that to the console output.

You'll see something like this on the console:

Updating motd:.

But this in dmesg -a:

Updating motd:
.

That is because "Updating motd:" is written first, log_console() appends a
newline, and then ".\n" is written.

I added a loader tunable and sysctl to turn the old behavior back on
(kern.log_console_add_linefeed) if you want the old behavior, but I think
we should be able to safely remove it.

Also, the new msgbuf_addstr() function allows the caller to optionally ask
for carriage returns to be stripped out.  However, in my testing I haven't
seen any carriage returns to strip.

Let me know if you have any comments.  I'm planning to check this into head
next week.

Thanks,

Ken
-- 
Kenneth Merry
ken at FreeBSD.ORG
-------------- next part --------------
Index: sys/kern/subr_msgbuf.c
===================================================================
--- sys/kern/subr_msgbuf.c	(revision 222390)
+++ sys/kern/subr_msgbuf.c	(working copy)
@@ -31,8 +31,16 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
 #include <sys/msgbuf.h>
 
+/*
+ * Maximum number conversion buffer length: uintmax_t in base 2, plus <>
+ * around the priority, and a terminating NUL.
+ */
+#define	MAXPRIBUF	(sizeof(intmax_t) * NBBY + 3)
+
 /* Read/write sequence numbers are modulo a multiple of the buffer size. */
 #define SEQMOD(size) ((size) * 16)
 
@@ -51,6 +59,9 @@
 	mbp->msg_seqmod = SEQMOD(size);
 	msgbuf_clear(mbp);
 	mbp->msg_magic = MSG_MAGIC;
+	mbp->msg_lastpri = -1;
+	mbp->msg_needsnl = 0;
+	mtx_init(&mbp->msg_lock, "msgbuf", NULL, MTX_SPIN);
 }
 
 /*
@@ -80,6 +91,11 @@
 		}
 		msgbuf_clear(mbp);
 	}
+
+	mbp->msg_lastpri = -1;
+	/* Assume that the old message buffer didn't end in a newline. */
+	mbp->msg_needsnl = 1;
+	mtx_init(&mbp->msg_lock, "msgbuf", NULL, MTX_SPIN);
 }
 
 /*
@@ -110,28 +126,143 @@
 }
 
 /*
- * Append a character to a message buffer.  This function can be
- * considered fully reentrant so long as the number of concurrent
- * callers is less than the number of characters in the buffer.
- * However, the message buffer is only guaranteed to be consistent
- * for reading when there are no callers in this function.
+ * Add a character into the message buffer, and update the checksum and
+ * sequence number.
+ *
+ * The caller should hold the message buffer spinlock.
  */
+static inline void
+msgbuf_do_addchar(struct msgbuf *mbp, u_int *seq, int c)
+{
+	u_int pos;
+
+	/* Make sure we properly wrap the sequence number. */
+	pos = MSGBUF_SEQ_TO_POS(mbp, *seq);
+
+	mbp->msg_cksum += (u_int)c -
+	    (u_int)(u_char)mbp->msg_ptr[pos];
+
+	mbp->msg_ptr[pos] = c;
+
+	*seq = MSGBUF_SEQNORM(mbp, *seq + 1);
+}
+
+/*
+ * Append a character to a message buffer.
+ */
 void
 msgbuf_addchar(struct msgbuf *mbp, int c)
 {
-	u_int new_seq, pos, seq;
+	mtx_lock_spin(&mbp->msg_lock);
 
-	do {
-		seq = mbp->msg_wseq;
-		new_seq = MSGBUF_SEQNORM(mbp, seq + 1);
-	} while (atomic_cmpset_rel_int(&mbp->msg_wseq, seq, new_seq) == 0);
-	pos = MSGBUF_SEQ_TO_POS(mbp, seq);
-	atomic_add_int(&mbp->msg_cksum, (u_int)(u_char)c -
-	    (u_int)(u_char)mbp->msg_ptr[pos]);
-	mbp->msg_ptr[pos] = c;
+	msgbuf_do_addchar(mbp, &mbp->msg_wseq, c);
+
+	mtx_unlock_spin(&mbp->msg_lock);
 }
 
 /*
+ * Append a NUL-terminated string with a priority to a message buffer.
+ * Filter carriage returns if the caller requests it.
+ *
+ * XXX The carriage return filtering behavior is present in the
+ * msglogchar() API, however testing has shown that we don't seem to send
+ * carriage returns down this path.  So do we still need it?
+ */
+void
+msgbuf_addstr(struct msgbuf *mbp, int pri, char *str, int filter_cr)
+{
+	u_int seq;
+	size_t len, prefix_len;
+	char prefix[MAXPRIBUF];
+	int nl, i;
+
+	len = strlen(str);
+	prefix_len = 0;
+	nl = 0;
+
+	/* If we have a zero-length string, no need to do anything. */
+	if (len == 0)
+		return;
+
+	mtx_lock_spin(&mbp->msg_lock);
+
+	/*
+	 * If this is true, we may need to insert a new priority sequence,
+	 * so prepare the prefix.
+	 */
+	if (pri != -1)
+		prefix_len = sprintf(prefix, "<%d>", pri);
+
+	/*
+	 * Starting write sequence number.
+	 */
+	seq = mbp->msg_wseq;
+
+	/*
+	 * Whenever there is a change in priority, we have to insert a
+	 * newline, and a priority prefix if the priority is not -1.  Here
+	 * we detect whether there was a priority change, and whether we
+	 * did not end with a newline.  If that is the case, we need to
+	 * insert a newline before this string.
+	 */
+	if (mbp->msg_lastpri != pri && mbp->msg_needsnl != 0) {
+
+		msgbuf_do_addchar(mbp, &seq, '\n');
+		mbp->msg_needsnl = 0;
+	}
+
+	for (i = 0; i < len; i++) {
+		/*
+		 * If we just had a newline, and the priority is not -1
+		 * (and therefore prefix_len != 0), then we need a priority
+		 * prefix for this line.
+		 */
+		if (mbp->msg_needsnl == 0 && prefix_len != 0) {
+			int j;
+
+			for (j = 0; j < prefix_len; j++)
+				msgbuf_do_addchar(mbp, &seq, prefix[j]);
+		}
+
+		/*
+		 * Don't copy carriage returns if the caller requested
+		 * filtering.
+		 * 
+		 * XXX This matches the behavior of msglogchar(), but is it
+		 * necessary?  Testing has shown that we don't seem to get
+		 * carriage returns here.
+		 */
+		if ((filter_cr != 0) && (str[i] == '\r'))
+			continue;
+
+		/*
+		 * Clear this flag if we see a newline.  This affects whether
+		 * we need to insert a new prefix or insert a newline later.
+		 */
+		if (str[i] == '\n')
+			mbp->msg_needsnl = 0;
+		else
+			mbp->msg_needsnl = 1;
+
+		msgbuf_do_addchar(mbp, &seq, str[i]);
+	}
+	/*
+	 * Update the write sequence number for the actual number of
+	 * characters we put in the message buffer.  (Depends on whether
+	 * carriage returns are filtered.)
+	 */
+	mbp->msg_wseq = seq;
+
+	/*
+	 * Set the last priority.
+	 */
+	mbp->msg_lastpri = pri;
+
+	mtx_unlock_spin(&mbp->msg_lock);
+
+}
+
+/*
  * Read and mark as read a character from a message buffer.
  * Returns the character, or -1 if no characters are available.
  */
@@ -141,14 +272,21 @@
 	u_int len, wseq;
 	int c;
 
+	mtx_lock_spin(&mbp->msg_lock);
+
 	wseq = mbp->msg_wseq;
 	len = MSGBUF_SEQSUB(mbp, wseq, mbp->msg_rseq);
-	if (len == 0)
+	if (len == 0) {
+		mtx_unlock_spin(&mbp->msg_lock);
 		return (-1);
+	}
 	if (len > mbp->msg_size)
 		mbp->msg_rseq = MSGBUF_SEQNORM(mbp, wseq - mbp->msg_size);
 	c = (u_char)mbp->msg_ptr[MSGBUF_SEQ_TO_POS(mbp, mbp->msg_rseq)];
 	mbp->msg_rseq = MSGBUF_SEQNORM(mbp, mbp->msg_rseq + 1);
+
+	mtx_unlock_spin(&mbp->msg_lock);
+
 	return (c);
 }
 
@@ -161,10 +299,14 @@
 {
 	u_int len, pos, wseq;
 
+	mtx_lock_spin(&mbp->msg_lock);
+
 	wseq = mbp->msg_wseq;
 	len = MSGBUF_SEQSUB(mbp, wseq, mbp->msg_rseq);
-	if (len == 0)
+	if (len == 0) {
+		mtx_unlock_spin(&mbp->msg_lock);
 		return (0);
+	}
 	if (len > mbp->msg_size) {
 		mbp->msg_rseq = MSGBUF_SEQNORM(mbp, wseq - mbp->msg_size);
 		len = mbp->msg_size;
@@ -175,6 +317,9 @@
 
 	bcopy(&mbp->msg_ptr[pos], buf, len);
 	mbp->msg_rseq = MSGBUF_SEQNORM(mbp, mbp->msg_rseq + len);
+
+	mtx_unlock_spin(&mbp->msg_lock);
+
 	return (len);
 }
 
@@ -193,16 +338,21 @@
 {
 	u_int len, pos, wseq;
 
+	mtx_lock_spin(&mbp->msg_lock);
+
 	if (buf == NULL) {
 		/* Just initialise *seqp. */
 		*seqp = MSGBUF_SEQNORM(mbp, mbp->msg_wseq - mbp->msg_size);
+		mtx_unlock_spin(&mbp->msg_lock);
 		return (0);
 	}
 
 	wseq = mbp->msg_wseq;
 	len = MSGBUF_SEQSUB(mbp, wseq, *seqp);
-	if (len == 0)
+	if (len == 0) {
+		mtx_unlock_spin(&mbp->msg_lock);
 		return (0);
+	}
 	if (len > mbp->msg_size) {
 		*seqp = MSGBUF_SEQNORM(mbp, wseq - mbp->msg_size);
 		len = mbp->msg_size;
@@ -212,6 +362,9 @@
 	len = min(len, (u_int)buflen);
 	bcopy(&mbp->msg_ptr[MSGBUF_SEQ_TO_POS(mbp, *seqp)], buf, len);
 	*seqp = MSGBUF_SEQNORM(mbp, *seqp + len);
+
+	mtx_unlock_spin(&mbp->msg_lock);
+
 	return (len);
 }
 
Index: sys/kern/subr_prf.c
===================================================================
--- sys/kern/subr_prf.c	(revision 222390)
+++ sys/kern/subr_prf.c	(working copy)
@@ -94,6 +94,7 @@
 extern	int log_open;
 
 static void  msglogchar(int c, int pri);
+static void  msglogstr(char *str, int pri, int filter_cr);
 static void  putchar(int ch, void *arg);
 static char *ksprintn(char *nbuf, uintmax_t num, int base, int *len, int upper);
 static void  snprintf_func(int ch, void *arg);
@@ -106,6 +107,14 @@
 SYSCTL_INT(_kern, OID_AUTO, log_console_output, CTLFLAG_RW,
     &log_console_output, 0, "Duplicate console output to the syslog.");
 
+/*
+ * See the comment in log_console() below for more explanation of this.
+ */
+static int log_console_add_linefeed = 0;
+TUNABLE_INT("kern.log_console_add_linefeed", &log_console_add_linefeed);
+SYSCTL_INT(_kern, OID_AUTO, log_console_add_linefeed, CTLFLAG_RW,
+    &log_console_add_linefeed, 0, "log_console() adds extra newlines.");
+
 static int	always_console_output = 0;
 TUNABLE_INT("kern.always_console_output", &always_console_output);
 SYSCTL_INT(_kern, OID_AUTO, always_console_output, CTLFLAG_RW,
@@ -240,16 +249,37 @@
 {
 	va_list ap;
 	struct putchar_arg pca;
+#ifdef PRINTF_BUFR_SIZE
+	char bufr[PRINTF_BUFR_SIZE];
+#endif
 
 	pca.tty = NULL;
 	pca.pri = level;
 	pca.flags = log_open ? TOLOG : TOCONS;
+#ifdef PRINTF_BUFR_SIZE
+	pca.p_bufr = bufr;
+	pca.p_next = pca.p_bufr;
+	pca.n_bufr = sizeof(bufr);
+	pca.remain = sizeof(bufr);
+	*pca.p_next = '\0';
+#else
 	pca.p_bufr = NULL;
+#endif
 
 	va_start(ap, fmt);
 	kvprintf(fmt, putchar, &pca, 10, ap);
 	va_end(ap);
 
+#ifdef PRINTF_BUFR_SIZE
+	/* Write any buffered console/log output: */
+	if (*pca.p_bufr != '\0') {
+		if (pca.flags & TOLOG)
+			msglogstr(pca.p_bufr, level, /*filter_cr*/1);
+
+		if (pca.flags & TOCONS)
+			cnputs(pca.p_bufr);
+	}
+#endif
 	msgbuftrigger = 1;
 }
 
@@ -258,7 +288,7 @@
 void
 log_console(struct uio *uio)
 {
-	int c, i, error, nl;
+	int c, error, nl;
 	char *consbuffer;
 	int pri;
 
@@ -271,20 +301,48 @@
 
 	nl = 0;
 	while (uio->uio_resid > 0) {
-		c = imin(uio->uio_resid, CONSCHUNK);
+		c = imin(uio->uio_resid, CONSCHUNK - 1);
 		error = uiomove(consbuffer, c, uio);
 		if (error != 0)
 			break;
-		for (i = 0; i < c; i++) {
-			msglogchar(consbuffer[i], pri);
-			if (consbuffer[i] == '\n')
-				nl = 1;
-			else
-				nl = 0;
-		}
+		/* Make sure we're NUL-terminated */
+		consbuffer[c] = '\0';
+		if (consbuffer[c - 1] == '\n')
+			nl = 1;
+		else
+			nl = 0;
+		msglogstr(consbuffer, pri, /*filter_cr*/ 1);
 	}
-	if (!nl)
-		msglogchar('\n', pri);
+	/*
+	 * The previous behavior in log_console() is preserved when
+	 * log_console_add_linefeed is non-zero.  For that behavior, if an
+	 * individual console write came in that was not terminated with a
+	 * line feed, it would add a line feed.
+	 *
+	 * This results in different data in the message buffer than
+	 * appears on the system console (which doesn't add extra line feed
+	 * characters).
+	 *
+	 * A number of programs and rc scripts write a line feed, or a period
+	 * and a line feed when they have completed their operation.  On
+	 * the console, this looks seamless, but when displayed with
+	 * 'dmesg -a', you wind up with output that looks like this:
+	 *
+	 * Updating motd:
+	 * .
+	 *
+	 * On the console, it looks like this:
+	 * Updating motd:.
+	 *
+	 * We could add logic to detect that situation, or just not insert
+	 * the extra newlines.  Set the kern.log_console_add_linefeed
+	 * sysctl/tunable variable to get the old behavior.
+	 */
+	if (!nl && log_console_add_linefeed) {
+		consbuffer[0] = '\n';
+		consbuffer[1] = '\0';
+		msglogstr(consbuffer, pri, /*filter_cr*/ 1);
+	}
 	msgbuftrigger = 1;
 	free(uio, M_IOV);
 	free(consbuffer, M_TEMP);
@@ -330,9 +388,11 @@
 	retval = kvprintf(fmt, putchar, &pca, 10, ap);
 
 #ifdef PRINTF_BUFR_SIZE
-	/* Write any buffered console output: */
-	if (*pca.p_bufr != '\0')
+	/* Write any buffered console/log output: */
+	if (*pca.p_bufr != '\0') {
 		cnputs(pca.p_bufr);
+		msglogstr(pca.p_bufr, pca.pri, /*filter_cr*/ 1);
+	}
 #endif
 
 	if (!panicstr)
@@ -342,18 +402,18 @@
 }
 
 static void
-putcons(int c, struct putchar_arg *ap)
+putbuf(int c, struct putchar_arg *ap)
 {
 	/* Check if no console output buffer was provided. */
-	if (ap->p_bufr == NULL)
+	if (ap->p_bufr == NULL) {
 		/* Output direct to the console. */
-		cnputc(c);
-	else {
+		if (ap->flags & TOCONS)
+			cnputc(c);
+
+		if (ap->flags & TOLOG)
+			msglogchar(c, ap->pri);
+	} else {
 		/* Buffer the character: */
-		if (c == '\n') {
-			*ap->p_next++ = '\r';
-			ap->remain--;
-		}
 		*ap->p_next++ = c;
 		ap->remain--;
 
@@ -361,12 +421,35 @@
 		*ap->p_next = '\0';
 
 		/* Check if the buffer needs to be flushed. */
-		if (ap->remain < 3 || c == '\n') {
-			cnputs(ap->p_bufr);
+		if (ap->remain == 2 || c == '\n') {
+
+			if (ap->flags & TOLOG)
+				msglogstr(ap->p_bufr, ap->pri, /*filter_cr*/1);
+
+			if (ap->flags & TOCONS) {
+				if ((panicstr == NULL) && (constty != NULL))
+					msgbuf_addstr(&consmsgbuf, -1,
+					    ap->p_bufr, /*filter_cr*/ 0);
+
+				if ((constty == NULL) ||(always_console_output))
+					cnputs(ap->p_bufr);
+			}
+
 			ap->p_next = ap->p_bufr;
 			ap->remain = ap->n_bufr;
 			*ap->p_next = '\0';
 		}
+
+		/*
+		 * Since we fill the buffer up one character at a time,
+		 * this should not happen.  We should always catch it when
+		 * ap->remain == 2 (if not sooner due to a newline), flush
+		 * the buffer and move on.  One way this could happen is
+		 * if someone sets PRINTF_BUFR_SIZE to 1 or something
+		 * similarly silly.
+		 */
+		KASSERT(ap->remain > 2, ("Bad buffer logic, remain = %zd",
+		    ap->remain));
 	}
 }
 
@@ -381,26 +464,25 @@
 	struct putchar_arg *ap = (struct putchar_arg*) arg;
 	struct tty *tp = ap->tty;
 	int flags = ap->flags;
+	int putbuf_done = 0;
 
 	/* Don't use the tty code after a panic or while in ddb. */
 	if (kdb_active) {
 		if (c != '\0')
 			cnputc(c);
-	} else if (panicstr || ((flags & TOCONS) && constty == NULL)) {
-		if (c != '\0')
-			putcons(c, ap);
 	} else {
-		if ((flags & TOTTY) && tp != NULL)
+		if ((panicstr == NULL) && (flags & TOTTY) && (tp != NULL))
 			tty_putchar(tp, c);
+
 		if (flags & TOCONS) {
-			if (constty != NULL)
-				msgbuf_addchar(&consmsgbuf, c);
-			if (always_console_output && c != '\0')
-				putcons(c, ap);
+			putbuf(c, ap);
+			putbuf_done = 1;
 		}
 	}
-	if ((flags & TOLOG))
-		msglogchar(c, ap->pri);
+	if ((flags & TOLOG) && (putbuf_done == 0)) {
+		if (c != '\0')
+			putbuf(c, ap);
+	}
 }
 
 /*
@@ -890,6 +972,15 @@
 	}
 }
 
+static void
+msglogstr(char *str, int pri, int filter_cr)
+{
+	if (!msgbufmapped)
+		return;
+
+	msgbuf_addstr(msgbufp, pri, str, filter_cr);
+}
+
 void
 msgbufinit(void *ptr, int size)
 {
Index: sys/sys/msgbuf.h
===================================================================
--- sys/sys/msgbuf.h	(revision 222390)
+++ sys/sys/msgbuf.h	(working copy)
@@ -33,15 +33,21 @@
 #ifndef _SYS_MSGBUF_H_
 #define	_SYS_MSGBUF_H_
 
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
 struct msgbuf {
-	char	*msg_ptr;		/* pointer to buffer */
+	char	   *msg_ptr;		/* pointer to buffer */
 #define	MSG_MAGIC	0x063062
-	u_int	msg_magic;
-	u_int	msg_size;		/* size of buffer area */
-	u_int	msg_wseq;		/* write sequence number */
-	u_int	msg_rseq;		/* read sequence number */
-	u_int	msg_cksum;		/* checksum of contents */
-	u_int	msg_seqmod;		/* range for sequence numbers */
+	u_int	   msg_magic;
+	u_int	   msg_size;		/* size of buffer area */
+	u_int	   msg_wseq;		/* write sequence number */
+	u_int	   msg_rseq;		/* read sequence number */
+	u_int	   msg_cksum;		/* checksum of contents */
+	u_int	   msg_seqmod;		/* range for sequence numbers */
+	int	   msg_lastpri;		/* saved priority value */
+	int	   msg_needsnl;		/* set when newline needed */
+	struct mtx msg_lock;		/* mutex to protect the buffer */
 };
 
 /* Normalise a sequence number or a difference between sequence numbers. */
@@ -59,6 +65,7 @@
 
 void	msgbufinit(void *ptr, int size);
 void	msgbuf_addchar(struct msgbuf *mbp, int c);
+void	msgbuf_addstr(struct msgbuf *mbp, int pri, char *str, int filter_cr);
 void	msgbuf_clear(struct msgbuf *mbp);
 void	msgbuf_copy(struct msgbuf *src, struct msgbuf *dst);
 int	msgbuf_getbytes(struct msgbuf *mbp, char *buf, int buflen);


More information about the freebsd-current mailing list