FreeBSD 8.1 and HAST

hiroshi at soupacific.com hiroshi at soupacific.com
Sat Jun 12 06:02:28 UTC 2010


I put some log message to trace the trouble of HAST on 8.1.

Modified code is

/*
  * Thread receives requests from the primary node.
  */
static void *
recv_thread(void *arg)
{
	struct hast_resource *res = arg;
	struct hio *hio;
	bool wakeup;
pjdlog_warning("recv_thread");
	for (;;) {
		pjdlog_debug(2, "recv: Taking free request.");
		mtx_lock(&hio_free_list_lock);
		while ((hio = TAILQ_FIRST(&hio_free_list)) == NULL) {
			pjdlog_debug(2, "recv: No free requests, waiting.");
			cv_wait(&hio_free_list_cond, &hio_free_list_lock);
		}
		TAILQ_REMOVE(&hio_free_list, hio, hio_next);
		mtx_unlock(&hio_free_list_lock);
		pjdlog_debug(2, "recv: (%p) Got request.", hio);

pjdlog_warning("wooooo");

		if (hast_proto_recv_hdr(res->hr_remotein, &hio->hio_nv) < 0) {
			pjdlog_exit(EX_TEMPFAIL,
			    "Unable to receive request header. ");
		}
		if (requnpack(res, hio) != 0)
{
pjdlog_warning("requnpack");
			goto send_queue;
}
		reqlog(LOG_DEBUG, 2, -1, hio,
		    "recv: (%p) Got request header: ", hio);
		if (hio->hio_cmd == HIO_WRITE) {
			if (hast_proto_recv_data(res, res->hr_remotein,
			    hio->hio_nv, hio->hio_data, MAXPHYS) < 0) {
				pjdlog_exit(EX_TEMPFAIL,
				    "Unable to receive reply data");
			}
pjdlog_warning("HIO_WRITE");
		}
		pjdlog_debug(2, "recv: (%p) Moving request to the disk queue.",
		    hio);
		mtx_lock(&hio_disk_list_lock);
		wakeup = TAILQ_EMPTY(&hio_disk_list);
		TAILQ_INSERT_TAIL(&hio_disk_list, hio, hio_next);
		mtx_unlock(&hio_disk_list_lock);
		if (wakeup)
{
pjdlog_warning("wakeup");
			cv_signal(&hio_disk_list_cond);
}
		continue;
send_queue:
		pjdlog_debug(2, "recv: (%p) Moving request to the send queue.",
		    hio);
		mtx_lock(&hio_send_list_lock);
		wakeup = TAILQ_EMPTY(&hio_send_list);
		TAILQ_INSERT_TAIL(&hio_send_list, hio, hio_next);
		mtx_unlock(&hio_send_list_lock);
		if (wakeup)
			cv_signal(&hio_send_list_cond);
	}
	/* NOTREACHED */
	return (NULL);
}

/*
  * Thread sends requests back to primary node.
  */
static void *
send_thread(void *arg)
{
	struct hast_resource *res = arg;
	struct nv *nvout;
	struct hio *hio;
	void *data;
	size_t length;
	bool wakeup;

	for (;;) {

pjdlog_warning("send_thread for loop");
		pjdlog_debug(2, "send: Taking request.");
		mtx_lock(&hio_send_list_lock);
		while ((hio = TAILQ_FIRST(&hio_send_list)) == NULL) {
			pjdlog_debug(2, "send: No requests, waiting.");
			cv_wait(&hio_send_list_cond, &hio_send_list_lock);
		}
		TAILQ_REMOVE(&hio_send_list, hio, hio_next);
		mtx_unlock(&hio_send_list_lock);

9.0 logs shows

un 12 12:49:33 fw01B hastd: [zfshast] (secondary) send_thread for loop
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) HIO_WRITE
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) wakup
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) woooo
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) send_thread for loop
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) HIO_WRITE
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) wakup
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) woooo
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) send_thread for loop
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) HIO_WRITE
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) wakup

repeated forever

8.1
Jun 12 14:07:18 sv01B hastd: [zfshast] (init) We act as init for the 
resource and

not as secondary as requested by tcp4://192.168.0.240:59254.
Jun 12 14:07:23 sv01B hastd: [zfshast] (init) We act as init for the 
resource and

not as secondary as requested by tcp4://192.168.0.240:56349.
Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) recv_thread
Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) send_thread for loop
Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) wooooo
Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) HIO_WRITE
Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) wakeup
Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) wooooo
Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) send_thread for loop
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) Unable to receive 
request header.

: Socket is not connected.
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) Worker process exited

ungracefully (pid=757, exitcode=75).
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) recv_thread
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) send_thread for loop
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) wooooo
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) HIO_WRITE
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) wakeup
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) wooooo
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) send_thread for loop
Jun 12 14:07:38 sv01B hastd: [zfshast] (secondary) Unable to receive 
request header.

: Socket is not connected.


I hope this simple trace could help you some idea.

Thanks

Hiroshi


More information about the freebsd-fs mailing list