GPT boot with ZFS RAIDZ "ZFS: i/o error - all block copies unavailable"

Norikatsu Shigemura nork at FreeBSD.org
Sat Jun 27 04:49:34 UTC 2009


Hi Jonathan.

On Thu, 25 Jun 2009 23:37:34 -0400
Jonathan <jonathan at kc8onw.net> wrote:	
> I currently seem to be having 2 issues.  One is that I get several 
> screenfulls of "error 1 lba xxx" with numbers like 4292179434 and 
> 4292179446.  Similar errors were reported here 
> http://lists.freebsd.org/pipermail/freebsd-fs/2008-December/005511.html 
> but I did not find anything in that thread that worked for me.  The 
> error message itself comes from here 
> http://svn.freebsd.org/viewvc/base/head/sys/boot/i386/gptboot/gptboot.c?revision=181436&view=markup

	I don't have any idea.

> The loader then proceeds to load the kernel, opensolaris, and zfs 
> modules.  Right before the displaying the loader menu it shows the 
> message "ZFS: i/o error - all block copies unavailable" which comes from 
> here 
> http://svn.freebsd.org/viewvc/base/head/sys/boot/zfs/zfsimpl.c?revision=192194&view=markup

	I confirmed this issue on my environment.  I was analyzing it.
	So I understood that gptzfsboot/loader doesn't support gang
	block.  As the result, gptzfsboot doesn't read gang-blocked
	loader or kernel, loader doesn't read gang-blocked kernel or
	modules by "ZFS: i/o error - all block copies unavailable".

	I'm trying to implement gang-block support, but I done checksum
	code.  I'm trying to implement 'read gang block' code.  But I
	cannot find 'read gang block' code on zfs, yet.  So now analyzing
	phase....

# Also, gptzfsboot/load only supported native byte-order zpool/zfs.
-------------- next part --------------
--- sys/cddl/boot/zfs/zfsimpl.h.orig	2009-05-16 19:48:20.000000000 +0900
+++ sys/cddl/boot/zfs/zfsimpl.h	2009-06-27 13:34:48.754949946 +0900
@@ -106,6 +106,8 @@
 
 #define	SPA_BLOCKSIZES		(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)
 
+#define	SPA_GANGBLOCKSIZE	SPA_MINBLOCKSIZE
+
 /*
  * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
  * The ASIZE encoding should be at least 64 times larger (6 more bits)
--- sys/cddl/boot/zfs/zfssubr.c.orig	2009-05-24 23:07:11.659515363 +0900
+++ sys/cddl/boot/zfs/zfssubr.c	2009-06-27 13:45:20.991111931 +0900
@@ -23,6 +23,7 @@
  * Use is subject to license terms.
  */
 
+#include <sys/endian.h>
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD: src/sys/cddl/boot/zfs/zfssubr.c,v 1.5 2009/05/23 16:01:58 des Exp $");
 
@@ -52,6 +53,21 @@
 	ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
 }
 
+static void
+zio_checksum_gang_verifier(zio_cksum_t *zcp, const blkptr_t *bp)
+{
+	const dva_t *dva = BP_IDENTITY(bp);
+	uint64_t txg = bp->blk_birth;
+
+	ZIO_SET_CHECKSUM(zcp, DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), txg, 0);
+}
+
+static void
+zio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset)
+{
+	ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0);
+}
+
 /*
  * Signature for checksum functions.
  */
@@ -123,32 +139,52 @@
 };
 
 static int
-zio_checksum_error(const blkptr_t *bp, void *data)
+zio_checksum_error(const blkptr_t *bp, uint64_t offset, void *data)
 {
-	zio_cksum_t zc = bp->blk_cksum;
-	unsigned int checksum = BP_GET_CHECKSUM(bp);
-	uint64_t size = BP_GET_PSIZE(bp);
+	unsigned int checksum = BP_IS_GANG(bp) ?
+		ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp);
+	uint64_t size = BP_IS_GANG(bp) ?
+		SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp);
 	zio_block_tail_t *zbt = (zio_block_tail_t *)((char *)data + size) - 1;
 	zio_checksum_info_t *ci = &zio_checksum_table[checksum];
-	zio_cksum_t actual_cksum, expected_cksum;
+	zio_cksum_t actual_cksum, expected_cksum, verifier;
 
 	if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
 		return (EINVAL);
 
 	if (ci->ci_zbt) {
+		if (checksum == ZIO_CHECKSUM_GANG_HEADER)
+			zio_checksum_gang_verifier(&verifier, bp);
+printf("zio_checksum_error: gang block found\n"); /* XXX: now debuging */
+		else if (checksum == ZIO_CHECKSUM_LABEL)
+			zio_checksum_label_verifier(&verifier, offset);
+		else
+			verifier = bp->blk_cksum;
+
 		expected_cksum = zbt->zbt_cksum;
-		zbt->zbt_cksum = zc;
+		zbt->zbt_cksum = verifier;
 		ci->ci_func[0](data, size, &actual_cksum);
 		zbt->zbt_cksum = expected_cksum;
-		zc = expected_cksum;
 	} else {
 		/* ASSERT(!BP_IS_GANG(bp)); */
+		expected_cksum = bp->blk_cksum;
 		ci->ci_func[0](data, size, &actual_cksum);
 	}
 
-	if (!ZIO_CHECKSUM_EQUAL(actual_cksum, zc)) {
+	if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) {
 		/*printf("ZFS: read checksum failed\n");*/
+/* XXX: now debuging */
+printf("zio_checksum_error: byteswap = %u, checksum = %u, expected_cksum = %08llx%08llx%08llx%08llx, actual_cksum = %08llx%08llx%08llx%08llx\n",
+	(zbt->zbt_magic == bswap64(ZBT_MAGIC)),
+	(unsigned int)checksum,
+	expected_cksum.zc_word[0], expected_cksum.zc_word[1], expected_cksum.zc_word[2], expected_cksum.zc_word[3],
+	actual_cksum.zc_word[0],actual_cksum.zc_word[1], actual_cksum.zc_word[2], actual_cksum.zc_word[3]
+);
 		return (EIO);
+} else {
+/* XXX: now debuging */
+if (checksum == ZIO_CHECKSUM_GANG_HEADER)
+printf("zio_checksum_error: gang block is OK\n"); /* XXX: now debuging */
 	}
 
 	return (0);
@@ -712,7 +748,7 @@
 	if (total_errors <= nparity - parity_untried) {
 		switch (data_errors) {
 		case 0:
-			if (zio_checksum_error(bp, buf) == 0)
+			if (zio_checksum_error(bp, offset, buf) == 0)
 				return (0);
 			break;
 
@@ -747,7 +783,7 @@
 				    acols, c);
 			}
 
-			if (zio_checksum_error(bp, buf) == 0)
+			if (zio_checksum_error(bp, offset, buf) == 0)
 				return (0);
 			break;
 
@@ -779,7 +815,7 @@
 			vdev_raidz_reconstruct_pq(cols, nparity, acols,
 			    c1, c);
 
-			if (zio_checksum_error(bp, buf) == 0)
+			if (zio_checksum_error(bp, offset, buf) == 0)
 				return (0);
 			break;
 
@@ -862,7 +898,7 @@
 			memcpy(orig, rc->rc_data, rc->rc_size);
 			vdev_raidz_reconstruct_p(cols, nparity, acols, c);
 
-			if (zio_checksum_error(bp, buf) == 0)
+			if (zio_checksum_error(bp, offset, buf) == 0)
 				return (0);
 
 			memcpy(rc->rc_data, orig, rc->rc_size);
@@ -881,7 +917,7 @@
 			memcpy(orig, rc->rc_data, rc->rc_size);
 			vdev_raidz_reconstruct_q(cols, nparity, acols, c);
 
-			if (zio_checksum_error(bp, buf) == 0)
+			if (zio_checksum_error(bp, offset, buf) == 0)
 				return (0);
 
 			memcpy(rc->rc_data, orig, rc->rc_size);
@@ -910,7 +946,7 @@
 				vdev_raidz_reconstruct_pq(cols, nparity,
 				    acols, c, c1);
 
-				if (zio_checksum_error(bp, buf) == 0)
+				if (zio_checksum_error(bp, offset, buf) == 0)
 					return (0);
 
 				memcpy(rc1->rc_data, orig1, rc1->rc_size);


More information about the freebsd-current mailing list