kern/141355: [zfs] zfs recv can fail with E2BIG
Martin Matuska
mm at FreeBSD.org
Fri Dec 11 01:20:08 PST 2009
The following reply was made to PR kern/141355; it has been noted by GNATS.
From: Martin Matuska <mm at FreeBSD.org>
To: bug-followup at FreeBSD.org, mm at FreeBSD.org
Cc:
Subject: Re: kern/141355: [zfs] zfs recv can fail with E2BIG
Date: Fri, 11 Dec 2009 10:08:43 +0100
This is a multi-part message in MIME format.
--------------070208060700060803030000
Content-Type: text/plain; charset=ISO-8859-2
Content-Transfer-Encoding: 7bit
I have created a patch from OpenSolaris mercurial for head.
Diffs used:
hg clone ssh://anon@hg.opensolaris.org/hg/onnv/onnv-gate
hg diff -r 7837 -r 7994 onnv-gate/usr/src/uts/common/fs/zfs/dmu_send.c
hg diff -c 8986 onnv-gate
Patch is easily backportable to RELENG_8. I am testing it in RELENG_8.
--------------070208060700060803030000
Content-Type: text/plain;
name="sys.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="sys.patch"
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h (revision 200402)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h (working copy)
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -237,7 +237,7 @@
int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
- int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+ int blocksize, dmu_object_type_t bonustype, int bonuslen);
/*
* Free an object from this objset.
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c (revision 200402)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c (working copy)
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/dmu.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_tx.h>
@@ -108,19 +106,51 @@
int
dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
- int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
+ int blocksize, dmu_object_type_t bonustype, int bonuslen)
{
dnode_t *dn;
+ dmu_tx_t *tx;
+ int nblkptr;
int err;
- if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
+ if (object == DMU_META_DNODE_OBJECT)
return (EBADF);
err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED,
FTAG, &dn);
if (err)
return (err);
+
+ if (dn->dn_type == ot && dn->dn_datablksz == blocksize &&
+ dn->dn_bonustype == bonustype && dn->dn_bonuslen == bonuslen) {
+ /* nothing is changing, this is a noop */
+ dnode_rele(dn, FTAG);
+ return (0);
+ }
+
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_bonus(tx, object);
+ err = dmu_tx_assign(tx, TXG_WAIT);
+ if (err) {
+ dmu_tx_abort(tx);
+ dnode_rele(dn, FTAG);
+ return (err);
+ }
+
+ nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
+
+ /*
+ * If we are losing blkptrs or changing the block size this must
+ * be a new file instance. We must clear out the previous file
+ * contents before we can change this type of metadata in the dnode.
+ */
+ if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize)
+ dmu_free_long_range(os, object, 0, DMU_OBJECT_END);
+
dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx);
+
+ dmu_tx_commit(tx);
+
dnode_rele(dn, FTAG);
return (0);
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c (revision 200402)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c (working copy)
@@ -415,8 +415,7 @@
dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
{
- int i, nblkptr;
- dmu_buf_impl_t *db = NULL;
+ int nblkptr;
ASSERT3U(blocksize, >=, SPA_MINBLOCKSIZE);
ASSERT3U(blocksize, <=, SPA_MAXBLOCKSIZE);
@@ -428,42 +427,25 @@
ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
- for (i = 0; i < TXG_SIZE; i++)
- ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
-
/* clean up any unreferenced dbufs */
dnode_evict_dbufs(dn);
- ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
- /*
- * XXX I should really have a generation number to tell if we
- * need to do this...
- */
- if (blocksize != dn->dn_datablksz ||
- dn->dn_bonustype != bonustype || dn->dn_bonuslen != bonuslen) {
- /* free all old data */
- dnode_free_range(dn, 0, -1ULL, tx);
- }
-
- nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
-
- /* change blocksize */
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
- if (blocksize != dn->dn_datablksz &&
- (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) ||
- list_head(&dn->dn_dbufs) != NULL)) {
- db = dbuf_hold(dn, 0, FTAG);
- dbuf_new_size(db, blocksize, tx);
- }
- dnode_setdblksz(dn, blocksize);
dnode_setdirty(dn, tx);
- dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen;
- dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize;
+ if (dn->dn_datablksz != blocksize) {
+ /* change blocksize */
+ ASSERT(dn->dn_maxblkid == 0 &&
+ (BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) ||
+ dnode_block_freed(dn, 0)));
+ dnode_setdblksz(dn, blocksize);
+ dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize;
+ }
+ if (dn->dn_bonuslen != bonuslen)
+ dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen;
+ nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
if (dn->dn_nblkptr != nblkptr)
dn->dn_next_nblkptr[tx->tx_txg&TXG_MASK] = nblkptr;
rw_exit(&dn->dn_struct_rwlock);
- if (db)
- dbuf_rele(db, FTAG);
/* change type */
dn->dn_type = ot;
@@ -1187,11 +1169,6 @@
if (dn->dn_free_txg)
return (TRUE);
- /*
- * If dn_datablkshift is not set, then there's only a single
- * block, in which case there will never be a free range so it
- * won't matter.
- */
range_tofind.fr_blkid = blkid;
mutex_enter(&dn->dn_mtx);
for (i = 0; i < TXG_SIZE; i++) {
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c (revision 200402)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c (working copy)
@@ -828,12 +828,8 @@
{
int err;
dmu_tx_t *tx;
+ void *data = NULL;
- err = dmu_object_info(os, drro->drr_object, NULL);
-
- if (err != 0 && err != ENOENT)
- return (EINVAL);
-
if (drro->drr_type == DMU_OT_NONE ||
drro->drr_type >= DMU_OT_NUMTYPES ||
drro->drr_bonustype >= DMU_OT_NUMTYPES ||
@@ -846,12 +842,21 @@
return (EINVAL);
}
- tx = dmu_tx_create(os);
+ err = dmu_object_info(os, drro->drr_object, NULL);
+ if (err != 0 && err != ENOENT)
+ return (EINVAL);
+
+ if (drro->drr_bonuslen) {
+ data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8));
+ if (ra->err)
+ return (ra->err);
+ }
+
if (err == ENOENT) {
/* currently free, want to be allocated */
+ tx = dmu_tx_create(os);
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err) {
dmu_tx_abort(tx);
@@ -860,45 +865,34 @@
err = dmu_object_claim(os, drro->drr_object,
drro->drr_type, drro->drr_blksz,
drro->drr_bonustype, drro->drr_bonuslen, tx);
+ dmu_tx_commit(tx);
} else {
/* currently allocated, want to be allocated */
- dmu_tx_hold_bonus(tx, drro->drr_object);
- /*
- * We may change blocksize and delete old content,
- * so need to hold_write and hold_free.
- */
- dmu_tx_hold_write(tx, drro->drr_object, 0, 1);
- dmu_tx_hold_free(tx, drro->drr_object, 0, DMU_OBJECT_END);
- err = dmu_tx_assign(tx, TXG_WAIT);
- if (err) {
- dmu_tx_abort(tx);
- return (err);
- }
-
err = dmu_object_reclaim(os, drro->drr_object,
drro->drr_type, drro->drr_blksz,
- drro->drr_bonustype, drro->drr_bonuslen, tx);
+ drro->drr_bonustype, drro->drr_bonuslen);
}
- if (err) {
- dmu_tx_commit(tx);
+ if (err)
return (EINVAL);
+
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_bonus(tx, drro->drr_object);
+ err = dmu_tx_assign(tx, TXG_WAIT);
+ if (err) {
+ dmu_tx_abort(tx);
+ return (err);
}
dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksum, tx);
dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx);
- if (drro->drr_bonuslen) {
+ if (data != NULL) {
dmu_buf_t *db;
- void *data;
+
VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db));
dmu_buf_will_dirty(db, tx);
ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
- data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8));
- if (data == NULL) {
- dmu_tx_commit(tx);
- return (ra->err);
- }
bcopy(data, db->db_data, drro->drr_bonuslen);
if (ra->byteswap) {
dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data,
--------------070208060700060803030000--
More information about the freebsd-fs
mailing list