svn commit: r247281 - in head/sbin: hastctl hastd

Mikolaj Golub trociny at FreeBSD.org
Mon Feb 25 20:09:09 UTC 2013


Author: trociny
Date: Mon Feb 25 20:09:07 2013
New Revision: 247281
URL: http://svnweb.freebsd.org/changeset/base/247281

Log:
  Add i/o error counters to hastd(8) and make hastctl(8) display
  them.  This may be useful for detecting problems with HAST disks.
  
  Discussed with and reviewed by:	pjd
  MFC after:	1 week

Modified:
  head/sbin/hastctl/hastctl.c
  head/sbin/hastd/control.c
  head/sbin/hastd/hast.h
  head/sbin/hastd/primary.c
  head/sbin/hastd/secondary.c

Modified: head/sbin/hastctl/hastctl.c
==============================================================================
--- head/sbin/hastctl/hastctl.c	Mon Feb 25 19:55:32 2013	(r247280)
+++ head/sbin/hastctl/hastctl.c	Mon Feb 25 20:09:07 2013	(r247281)
@@ -351,6 +351,12 @@ control_status(struct nv *nv)
 		    (uint64_t)nv_get_uint64(nv, "stat_flush%u", ii));
 		printf("    activemap updates: %ju\n",
 		    (uint64_t)nv_get_uint64(nv, "stat_activemap_update%u", ii));
+		printf("    local errors: "
+		    "read: %ju, write: %ju, delete: %ju, flush: %ju\n",
+		    (uintmax_t)nv_get_uint64(nv, "stat_read_error%u", ii),
+		    (uintmax_t)nv_get_uint64(nv, "stat_write_error%u", ii),
+		    (uintmax_t)nv_get_uint64(nv, "stat_delete_error%u", ii),
+		    (uintmax_t)nv_get_uint64(nv, "stat_flush_error%u", ii));
 	}
 	return (ret);
 }

Modified: head/sbin/hastd/control.c
==============================================================================
--- head/sbin/hastd/control.c	Mon Feb 25 19:55:32 2013	(r247280)
+++ head/sbin/hastd/control.c	Mon Feb 25 20:09:07 2013	(r247281)
@@ -207,6 +207,14 @@ control_status_worker(struct hast_resour
 	    "stat_flush%u", no);
 	nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_activemap_update"),
 	    "stat_activemap_update%u", no);
+	nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_read_error"),
+	    "stat_read_error%u", no);
+	nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_write_error"),
+	    "stat_write_error%u", no);
+	nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_delete_error"),
+	    "stat_delete_error%u", no);
+	nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_flush_error"),
+	    "stat_flush_error%u", no);
 end:
 	if (cnvin != NULL)
 		nv_free(cnvin);
@@ -459,6 +467,16 @@ ctrl_thread(void *arg)
 			nv_add_uint64(nvout, res->hr_stat_flush, "stat_flush");
 			nv_add_uint64(nvout, res->hr_stat_activemap_update,
 			    "stat_activemap_update");
+			nv_add_uint64(nvout, res->hr_stat_read_error,
+			    "stat_read_error");
+			nv_add_uint64(nvout, res->hr_stat_write_error +
+			    res->hr_stat_activemap_write_error,
+			    "stat_write_error");
+			nv_add_uint64(nvout, res->hr_stat_delete_error,
+			    "stat_delete_error");
+			nv_add_uint64(nvout, res->hr_stat_flush_error +
+			    res->hr_stat_activemap_flush_error,
+			    "stat_flush_error");
 			nv_add_int16(nvout, 0, "error");
 			break;
 		case CONTROL_RELOAD:

Modified: head/sbin/hastd/hast.h
==============================================================================
--- head/sbin/hastd/hast.h	Mon Feb 25 19:55:32 2013	(r247280)
+++ head/sbin/hastd/hast.h	Mon Feb 25 20:09:07 2013	(r247281)
@@ -239,6 +239,18 @@ struct hast_resource {
 	uint64_t	hr_stat_flush;
 	/* Number of activemap updates. */
 	uint64_t	hr_stat_activemap_update;
+	/* Number of local read errors. */
+	uint64_t	hr_stat_read_error;
+	/* Number of local write errors. */
+	uint64_t	hr_stat_write_error;
+	/* Number of local delete errors. */
+	uint64_t	hr_stat_delete_error;
+	/* Number of flush errors. */
+	uint64_t	hr_stat_flush_error;
+	/* Number of activemap write errors. */
+	uint64_t	hr_stat_activemap_write_error;
+	/* Number of activemap flush errors. */
+	uint64_t	hr_stat_activemap_flush_error;
 
 	/* Next resource. */
 	TAILQ_ENTRY(hast_resource) hr_next;

Modified: head/sbin/hastd/primary.c
==============================================================================
--- head/sbin/hastd/primary.c	Mon Feb 25 19:55:32 2013	(r247280)
+++ head/sbin/hastd/primary.c	Mon Feb 25 20:09:07 2013	(r247281)
@@ -303,6 +303,7 @@ hast_activemap_flush(struct hast_resourc
 	if (pwrite(res->hr_localfd, buf, size, METADATA_SIZE) !=
 	    (ssize_t)size) {
 		pjdlog_errno(LOG_ERR, "Unable to flush activemap to disk");
+		res->hr_stat_activemap_write_error++;
 		return (-1);
 	}
 	if (res->hr_metaflush == 1 && g_flush(res->hr_localfd) == -1) {
@@ -313,6 +314,7 @@ hast_activemap_flush(struct hast_resourc
 		} else {
 			pjdlog_errno(LOG_ERR,
 			    "Unable to flush disk cache on activemap update");
+			res->hr_stat_activemap_flush_error++;
 			return (-1);
 		}
 	}
@@ -1936,6 +1938,22 @@ ggate_send_thread(void *arg)
 				    "G_GATE_CMD_DONE failed");
 			}
 		}
+		if (hio->hio_errors[0]) {
+			switch (ggio->gctl_cmd) {
+			case BIO_READ:
+				res->hr_stat_read_error++;
+				break;
+			case BIO_WRITE:
+				res->hr_stat_write_error++;
+				break;
+			case BIO_DELETE:
+				res->hr_stat_delete_error++;
+				break;
+			case BIO_FLUSH:
+				res->hr_stat_flush_error++;
+				break;
+			}
+		}
 		pjdlog_debug(2,
 		    "ggate_send: (%p) Moving request to the free queue.", hio);
 		QUEUE_INSERT2(hio, free);

Modified: head/sbin/hastd/secondary.c
==============================================================================
--- head/sbin/hastd/secondary.c	Mon Feb 25 19:55:32 2013	(r247280)
+++ head/sbin/hastd/secondary.c	Mon Feb 25 20:09:07 2013	(r247281)
@@ -765,6 +765,7 @@ disk_thread(void *arg)
 				pjdlog_errno(LOG_WARNING,
 				    "Unable to store cleared activemap");
 				free(map);
+				res->hr_stat_activemap_write_error++;
 				break;
 			}
 			free(map);
@@ -883,8 +884,23 @@ send_thread(void *arg)
 			PJDLOG_ABORT("Unexpected command (cmd=%hhu).",
 			    hio->hio_cmd);
 		}
-		if (hio->hio_error != 0)
+		if (hio->hio_error != 0) {
+			switch (hio->hio_cmd) {
+			case HIO_READ:
+				res->hr_stat_read_error++;
+				break;
+			case HIO_WRITE:
+				res->hr_stat_write_error++;
+				break;
+			case HIO_DELETE:
+				res->hr_stat_delete_error++;
+				break;
+			case HIO_FLUSH:
+				res->hr_stat_flush_error++;
+				break;
+			}
 			nv_add_int16(nvout, hio->hio_error, "error");
+		}
 		if (hast_proto_send(res, res->hr_remoteout, nvout, data,
 		    length) == -1) {
 			secondary_exit(EX_TEMPFAIL, "Unable to send reply");


More information about the svn-src-all mailing list