svn commit: r247866 - stable/9/sbin/hastd

Mikolaj Golub trociny at FreeBSD.org
Wed Mar 6 06:57:19 UTC 2013


Author: trociny
Date: Wed Mar  6 06:57:18 2013
New Revision: 247866
URL: http://svnweb.freebsd.org/changeset/base/247866

Log:
  MFC r247281:
  
  Add i/o error counters to hastd(8) and make hastctl(8) display
  them.  This may be useful for detecting problems with HAST disks.
  
  Discussed with and reviewed by: pjd

Modified:
  stable/9/sbin/hastd/control.c
  stable/9/sbin/hastd/hast.h
  stable/9/sbin/hastd/primary.c
  stable/9/sbin/hastd/secondary.c
Directory Properties:
  stable/9/sbin/hastd/   (props changed)

Modified: stable/9/sbin/hastd/control.c
==============================================================================
--- stable/9/sbin/hastd/control.c	Wed Mar  6 06:24:09 2013	(r247865)
+++ stable/9/sbin/hastd/control.c	Wed Mar  6 06:57:18 2013	(r247866)
@@ -207,6 +207,14 @@ control_status_worker(struct hast_resour
 	    "stat_flush%u", no);
 	nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_activemap_update"),
 	    "stat_activemap_update%u", no);
+	nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_read_error"),
+	    "stat_read_error%u", no);
+	nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_write_error"),
+	    "stat_write_error%u", no);
+	nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_delete_error"),
+	    "stat_delete_error%u", no);
+	nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_flush_error"),
+	    "stat_flush_error%u", no);
 end:
 	if (cnvin != NULL)
 		nv_free(cnvin);
@@ -459,6 +467,16 @@ ctrl_thread(void *arg)
 			nv_add_uint64(nvout, res->hr_stat_flush, "stat_flush");
 			nv_add_uint64(nvout, res->hr_stat_activemap_update,
 			    "stat_activemap_update");
+			nv_add_uint64(nvout, res->hr_stat_read_error,
+			    "stat_read_error");
+			nv_add_uint64(nvout, res->hr_stat_write_error +
+			    res->hr_stat_activemap_write_error,
+			    "stat_write_error");
+			nv_add_uint64(nvout, res->hr_stat_delete_error,
+			    "stat_delete_error");
+			nv_add_uint64(nvout, res->hr_stat_flush_error +
+			    res->hr_stat_activemap_flush_error,
+			    "stat_flush_error");
 			nv_add_int16(nvout, 0, "error");
 			break;
 		case CONTROL_RELOAD:

Modified: stable/9/sbin/hastd/hast.h
==============================================================================
--- stable/9/sbin/hastd/hast.h	Wed Mar  6 06:24:09 2013	(r247865)
+++ stable/9/sbin/hastd/hast.h	Wed Mar  6 06:57:18 2013	(r247866)
@@ -234,6 +234,18 @@ struct hast_resource {
 	uint64_t	hr_stat_flush;
 	/* Number of activemap updates. */
 	uint64_t	hr_stat_activemap_update;
+	/* Number of local read errors. */
+	uint64_t	hr_stat_read_error;
+	/* Number of local write errors. */
+	uint64_t	hr_stat_write_error;
+	/* Number of local delete errors. */
+	uint64_t	hr_stat_delete_error;
+	/* Number of flush errors. */
+	uint64_t	hr_stat_flush_error;
+	/* Number of activemap write errors. */
+	uint64_t	hr_stat_activemap_write_error;
+	/* Number of activemap flush errors. */
+	uint64_t	hr_stat_activemap_flush_error;
 
 	/* Next resource. */
 	TAILQ_ENTRY(hast_resource) hr_next;

Modified: stable/9/sbin/hastd/primary.c
==============================================================================
--- stable/9/sbin/hastd/primary.c	Wed Mar  6 06:24:09 2013	(r247865)
+++ stable/9/sbin/hastd/primary.c	Wed Mar  6 06:57:18 2013	(r247866)
@@ -303,6 +303,7 @@ hast_activemap_flush(struct hast_resourc
 	if (pwrite(res->hr_localfd, buf, size, METADATA_SIZE) !=
 	    (ssize_t)size) {
 		pjdlog_errno(LOG_ERR, "Unable to flush activemap to disk");
+		res->hr_stat_activemap_write_error++;
 		return (-1);
 	}
 	if (res->hr_metaflush == 1 && g_flush(res->hr_localfd) == -1) {
@@ -313,6 +314,7 @@ hast_activemap_flush(struct hast_resourc
 		} else {
 			pjdlog_errno(LOG_ERR,
 			    "Unable to flush disk cache on activemap update");
+			res->hr_stat_activemap_flush_error++;
 			return (-1);
 		}
 	}
@@ -1792,6 +1794,22 @@ ggate_send_thread(void *arg)
 				    "G_GATE_CMD_DONE failed");
 			}
 		}
+		if (hio->hio_errors[0]) {
+			switch (ggio->gctl_cmd) {
+			case BIO_READ:
+				res->hr_stat_read_error++;
+				break;
+			case BIO_WRITE:
+				res->hr_stat_write_error++;
+				break;
+			case BIO_DELETE:
+				res->hr_stat_delete_error++;
+				break;
+			case BIO_FLUSH:
+				res->hr_stat_flush_error++;
+				break;
+			}
+		}
 		pjdlog_debug(2,
 		    "ggate_send: (%p) Moving request to the free queue.", hio);
 		QUEUE_INSERT2(hio, free);

Modified: stable/9/sbin/hastd/secondary.c
==============================================================================
--- stable/9/sbin/hastd/secondary.c	Wed Mar  6 06:24:09 2013	(r247865)
+++ stable/9/sbin/hastd/secondary.c	Wed Mar  6 06:57:18 2013	(r247866)
@@ -725,6 +725,7 @@ disk_thread(void *arg)
 				pjdlog_errno(LOG_WARNING,
 				    "Unable to store cleared activemap");
 				free(map);
+				res->hr_stat_activemap_write_error++;
 				break;
 			}
 			free(map);
@@ -839,8 +840,23 @@ send_thread(void *arg)
 			PJDLOG_ABORT("Unexpected command (cmd=%hhu).",
 			    hio->hio_cmd);
 		}
-		if (hio->hio_error != 0)
+		if (hio->hio_error != 0) {
+			switch (hio->hio_cmd) {
+			case HIO_READ:
+				res->hr_stat_read_error++;
+				break;
+			case HIO_WRITE:
+				res->hr_stat_write_error++;
+				break;
+			case HIO_DELETE:
+				res->hr_stat_delete_error++;
+				break;
+			case HIO_FLUSH:
+				res->hr_stat_flush_error++;
+				break;
+			}
 			nv_add_int16(nvout, hio->hio_error, "error");
+		}
 		if (hast_proto_send(res, res->hr_remoteout, nvout, data,
 		    length) == -1) {
 			secondary_exit(EX_TEMPFAIL, "Unable to send reply");


More information about the svn-src-stable-9 mailing list