kern/141355: [zfs] zfs recv can fail with E2BIG

Martin Matuska mm at FreeBSD.org
Fri Dec 11 01:20:08 PST 2009


The following reply was made to PR kern/141355; it has been noted by GNATS.

From: Martin Matuska <mm at FreeBSD.org>
To: bug-followup at FreeBSD.org, mm at FreeBSD.org
Cc:  
Subject: Re: kern/141355: [zfs] zfs recv can fail with E2BIG
Date: Fri, 11 Dec 2009 10:08:43 +0100

 This is a multi-part message in MIME format.
 --------------070208060700060803030000
 Content-Type: text/plain; charset=ISO-8859-2
 Content-Transfer-Encoding: 7bit
 
 I have created a patch from OpenSolaris mercurial for head.
 
 Diffs used:
 
 hg clone ssh://anon@hg.opensolaris.org/hg/onnv/onnv-gate
 hg diff -r 7837 -r 7994 onnv-gate/usr/src/uts/common/fs/zfs/dmu_send.c
 hg diff -c 8986 onnv-gate
 
 Patch is easily backportable to RELENG_8. I am testing it in RELENG_8.
 
 --------------070208060700060803030000
 Content-Type: text/plain;
  name="sys.patch"
 Content-Transfer-Encoding: 7bit
 Content-Disposition: inline;
  filename="sys.patch"
 
 Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
 ===================================================================
 --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h	(revision 200402)
 +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h	(working copy)
 @@ -19,7 +19,7 @@
   * CDDL HEADER END
   */
  /*
 - * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 + * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
   * Use is subject to license terms.
   */
  
 @@ -237,7 +237,7 @@
  int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
      int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
  int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
 -    int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
 +    int blocksize, dmu_object_type_t bonustype, int bonuslen);
  
  /*
   * Free an object from this objset.
 Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
 ===================================================================
 --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	(revision 200402)
 +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	(working copy)
 @@ -19,12 +19,10 @@
   * CDDL HEADER END
   */
  /*
 - * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 + * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
   * Use is subject to license terms.
   */
  
 -#pragma ident	"%Z%%M%	%I%	%E% SMI"
 -
  #include <sys/dmu.h>
  #include <sys/dmu_objset.h>
  #include <sys/dmu_tx.h>
 @@ -108,19 +106,51 @@
  
  int
  dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
 -    int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 +    int blocksize, dmu_object_type_t bonustype, int bonuslen)
  {
  	dnode_t *dn;
 +	dmu_tx_t *tx;
 +	int nblkptr;
  	int err;
  
 -	if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
 +	if (object == DMU_META_DNODE_OBJECT)
  		return (EBADF);
  
  	err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED,
  	    FTAG, &dn);
  	if (err)
  		return (err);
 +
 +	if (dn->dn_type == ot && dn->dn_datablksz == blocksize &&
 +	    dn->dn_bonustype == bonustype && dn->dn_bonuslen == bonuslen) {
 +		/* nothing is changing, this is a noop */
 +		dnode_rele(dn, FTAG);
 +		return (0);
 +	}
 +
 +	tx = dmu_tx_create(os);
 +	dmu_tx_hold_bonus(tx, object);
 +	err = dmu_tx_assign(tx, TXG_WAIT);
 +	if (err) {
 +		dmu_tx_abort(tx);
 +		dnode_rele(dn, FTAG);
 +		return (err);
 +	}
 +
 +	nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
 +
 +	/*
 +	 * If we are losing blkptrs or changing the block size this must
 +	 * be a new file instance.   We must clear out the previous file
 +	 * contents before we can change this type of metadata in the dnode.
 +	 */
 +	if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize)
 +		dmu_free_long_range(os, object, 0, DMU_OBJECT_END);
 +
  	dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx);
 +
 +	dmu_tx_commit(tx);
 +
  	dnode_rele(dn, FTAG);
  
  	return (0);
 Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
 ===================================================================
 --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c	(revision 200402)
 +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c	(working copy)
 @@ -415,8 +415,7 @@
  dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
      dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
  {
 -	int i, nblkptr;
 -	dmu_buf_impl_t *db = NULL;
 +	int nblkptr;
  
  	ASSERT3U(blocksize, >=, SPA_MINBLOCKSIZE);
  	ASSERT3U(blocksize, <=, SPA_MAXBLOCKSIZE);
 @@ -428,42 +427,25 @@
  	ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
  	ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
  
 -	for (i = 0; i < TXG_SIZE; i++)
 -		ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
 -
  	/* clean up any unreferenced dbufs */
  	dnode_evict_dbufs(dn);
 -	ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
  
 -	/*
 -	 * XXX I should really have a generation number to tell if we
 -	 * need to do this...
 -	 */
 -	if (blocksize != dn->dn_datablksz ||
 -	    dn->dn_bonustype != bonustype || dn->dn_bonuslen != bonuslen) {
 -		/* free all old data */
 -		dnode_free_range(dn, 0, -1ULL, tx);
 -	}
 -
 -	nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
 -
 -	/* change blocksize */
  	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 -	if (blocksize != dn->dn_datablksz &&
 -	    (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) ||
 -	    list_head(&dn->dn_dbufs) != NULL)) {
 -		db = dbuf_hold(dn, 0, FTAG);
 -		dbuf_new_size(db, blocksize, tx);
 -	}
 -	dnode_setdblksz(dn, blocksize);
  	dnode_setdirty(dn, tx);
 -	dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen;
 -	dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize;
 +	if (dn->dn_datablksz != blocksize) {
 +		/* change blocksize */
 +		ASSERT(dn->dn_maxblkid == 0 &&
 +		    (BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) ||
 +		    dnode_block_freed(dn, 0)));
 +		dnode_setdblksz(dn, blocksize);
 +		dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize;
 +	}
 +	if (dn->dn_bonuslen != bonuslen)
 +		dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen;
 +	nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
  	if (dn->dn_nblkptr != nblkptr)
  		dn->dn_next_nblkptr[tx->tx_txg&TXG_MASK] = nblkptr;
  	rw_exit(&dn->dn_struct_rwlock);
 -	if (db)
 -		dbuf_rele(db, FTAG);
  
  	/* change type */
  	dn->dn_type = ot;
 @@ -1187,11 +1169,6 @@
  	if (dn->dn_free_txg)
  		return (TRUE);
  
 -	/*
 -	 * If dn_datablkshift is not set, then there's only a single
 -	 * block, in which case there will never be a free range so it
 -	 * won't matter.
 -	 */
  	range_tofind.fr_blkid = blkid;
  	mutex_enter(&dn->dn_mtx);
  	for (i = 0; i < TXG_SIZE; i++) {
 Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
 ===================================================================
 --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c	(revision 200402)
 +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c	(working copy)
 @@ -828,12 +828,8 @@
  {
  	int err;
  	dmu_tx_t *tx;
 +	void *data = NULL;
  
 -	err = dmu_object_info(os, drro->drr_object, NULL);
 -
 -	if (err != 0 && err != ENOENT)
 -		return (EINVAL);
 -
  	if (drro->drr_type == DMU_OT_NONE ||
  	    drro->drr_type >= DMU_OT_NUMTYPES ||
  	    drro->drr_bonustype >= DMU_OT_NUMTYPES ||
 @@ -846,12 +842,21 @@
  		return (EINVAL);
  	}
  
 -	tx = dmu_tx_create(os);
 +	err = dmu_object_info(os, drro->drr_object, NULL);
  
 +	if (err != 0 && err != ENOENT)
 +		return (EINVAL);
 +
 +	if (drro->drr_bonuslen) {
 +		data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8));
 +		if (ra->err)
 +			return (ra->err);
 +	}
 +
  	if (err == ENOENT) {
  		/* currently free, want to be allocated */
 +		tx = dmu_tx_create(os);
  		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
 -		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1);
  		err = dmu_tx_assign(tx, TXG_WAIT);
  		if (err) {
  			dmu_tx_abort(tx);
 @@ -860,45 +865,34 @@
  		err = dmu_object_claim(os, drro->drr_object,
  		    drro->drr_type, drro->drr_blksz,
  		    drro->drr_bonustype, drro->drr_bonuslen, tx);
 +		dmu_tx_commit(tx);
  	} else {
  		/* currently allocated, want to be allocated */
 -		dmu_tx_hold_bonus(tx, drro->drr_object);
 -		/*
 -		 * We may change blocksize and delete old content,
 -		 * so need to hold_write and hold_free.
 -		 */
 -		dmu_tx_hold_write(tx, drro->drr_object, 0, 1);
 -		dmu_tx_hold_free(tx, drro->drr_object, 0, DMU_OBJECT_END);
 -		err = dmu_tx_assign(tx, TXG_WAIT);
 -		if (err) {
 -			dmu_tx_abort(tx);
 -			return (err);
 -		}
 -
  		err = dmu_object_reclaim(os, drro->drr_object,
  		    drro->drr_type, drro->drr_blksz,
 -		    drro->drr_bonustype, drro->drr_bonuslen, tx);
 +		    drro->drr_bonustype, drro->drr_bonuslen);
  	}
 -	if (err) {
 -		dmu_tx_commit(tx);
 +	if (err)
  		return (EINVAL);
 +
 +	tx = dmu_tx_create(os);
 +	dmu_tx_hold_bonus(tx, drro->drr_object);
 +	err = dmu_tx_assign(tx, TXG_WAIT);
 +	if (err) {
 +		dmu_tx_abort(tx);
 +		return (err);
  	}
  
  	dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksum, tx);
  	dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx);
  
 -	if (drro->drr_bonuslen) {
 +	if (data != NULL) {
  		dmu_buf_t *db;
 -		void *data;
 +
  		VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db));
  		dmu_buf_will_dirty(db, tx);
  
  		ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
 -		data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8));
 -		if (data == NULL) {
 -			dmu_tx_commit(tx);
 -			return (ra->err);
 -		}
  		bcopy(data, db->db_data, drro->drr_bonuslen);
  		if (ra->byteswap) {
  			dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data,
 
 --------------070208060700060803030000--


More information about the freebsd-fs mailing list