svn commit: r302641 - in vendor-sys/illumos/dist/uts/common/fs/zfs: . sys
Andriy Gapon
avg at FreeBSD.org
Tue Jul 12 11:18:27 UTC 2016
Author: avg
Date: Tue Jul 12 11:18:25 2016
New Revision: 302641
URL: https://svnweb.freebsd.org/changeset/base/302641
Log:
6844 dnode_next_offset can detect fictional holes
illumos/illumos-gate at 11ceac77ea8034bf2fe9bdd6d314f5d1e5ceeba3
https://github.com/illumos/illumos-gate/commit/11ceac77ea8034bf2fe9bdd6d314f5d1e5ceeba3
https://www.illumos.org/issues/6844
dnode_next_offset is used in a variety of places to iterate over the holes or
allocated blocks in a dnode. It operates under the premise that it can iterate
over the blockpointers of a dnode in open context while holding only the
dn_struct_rwlock as reader. Unfortunately, this premise does not hold.
When we create the zio for a dbuf, we pass in the actual block pointer in the
indirect block above that dbuf. When we later zero the bp in
zio_write_compress, we are directly modifying the bp. The state of the bp is
now inconsistent from the perspective of dnode_next_offset: the bp will appear
to be a hole until zio_dva_allocate finally finishes filling it in. In the
meantime, dnode_next_offset can detect a hole in the dnode when none exists.
I was able to experimentally demonstrate this behavior with the following
setup:
1. Create a file with 1 million dbufs.
2. Create a thread that randomly dirties L2 blocks by writing to the first L0
block under them.
3. Observe dnode_next_offset, waiting for it to skip over a hole in the middle
of a file.
4. Do dnode_next_offset in a loop until we skip over such a non-existent hole.
The fix is to ensure that it is valid to iterate over the indirect blocks in a
dnode while holding the dn_struct_rwlock by passing the zio a copy of the BP
and updating the actual BP in dbuf_write_ready while holding the lock.
Reviewed by: Matthew Ahrens <mahrens at delphix.com>
Reviewed by: George Wilson <george.wilson at delphix.com>
Reviewed by: Boris Protopopov <bprotopopov at hotmail.com>
Approved by: Dan McDonald <danmcd at omniti.com>
Author: Alex Reece <alex at delphix.com>
Modified:
vendor-sys/illumos/dist/uts/common/fs/zfs/dbuf.c
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dbuf.h
Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dbuf.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/dbuf.c Tue Jul 12 11:16:43 2016 (r302640)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/dbuf.c Tue Jul 12 11:18:25 2016 (r302641)
@@ -2879,7 +2879,8 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *
uint64_t fill = 0;
int i;
- ASSERT3P(db->db_blkptr, ==, bp);
+ ASSERT3P(db->db_blkptr, !=, NULL);
+ ASSERT3P(&db->db_data_pending->dr_bp_copy, ==, bp);
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
@@ -2901,7 +2902,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *
#ifdef ZFS_DEBUG
if (db->db_blkid == DMU_SPILL_BLKID) {
ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR);
- ASSERT(!(BP_IS_HOLE(db->db_blkptr)) &&
+ ASSERT(!(BP_IS_HOLE(bp)) &&
db->db_blkptr == &dn->dn_phys->dn_spill);
}
#endif
@@ -2942,6 +2943,10 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *
bp->blk_fill = fill;
mutex_exit(&db->db_mtx);
+
+ rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+ *db->db_blkptr = *bp;
+ rw_exit(&dn->dn_struct_rwlock);
}
/*
@@ -3120,6 +3125,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_
zio_t *zio;
int wp_flag = 0;
+ ASSERT(dmu_tx_is_syncing(tx));
+
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
os = dn->dn_objset;
@@ -3178,6 +3185,14 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_
dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
DB_DNODE_EXIT(db);
+ /*
+ * We copy the blkptr now (rather than when we instantiate the dirty
+ * record), because its value can change between open context and
+ * syncing context. We do not need to hold dn_struct_rwlock to read
+ * db_blkptr because we are in syncing context.
+ */
+ dr->dr_bp_copy = *db->db_blkptr;
+
if (db->db_level == 0 &&
dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
/*
@@ -3187,7 +3202,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_
void *contents = (data != NULL) ? data->b_data : NULL;
dr->dr_zio = zio_write(zio, os->os_spa, txg,
- db->db_blkptr, contents, db->db.db_size, &zp,
+ &dr->dr_bp_copy, contents, db->db.db_size, &zp,
dbuf_write_override_ready, NULL, dbuf_write_override_done,
dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
mutex_enter(&db->db_mtx);
@@ -3199,14 +3214,14 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_
ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF ||
zp.zp_checksum == ZIO_CHECKSUM_NOPARITY);
dr->dr_zio = zio_write(zio, os->os_spa, txg,
- db->db_blkptr, NULL, db->db.db_size, &zp,
+ &dr->dr_bp_copy, NULL, db->db.db_size, &zp,
dbuf_write_nofill_ready, NULL, dbuf_write_nofill_done, db,
ZIO_PRIORITY_ASYNC_WRITE,
ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb);
} else {
ASSERT(arc_released(data));
dr->dr_zio = arc_write(zio, os->os_spa, txg,
- db->db_blkptr, data, DBUF_IS_L2CACHEABLE(db),
+ &dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db),
DBUF_IS_L2COMPRESSIBLE(db), &zp, dbuf_write_ready,
dbuf_write_physdone, dbuf_write_done, db,
ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dbuf.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dbuf.h Tue Jul 12 11:16:43 2016 (r302640)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dbuf.h Tue Jul 12 11:18:25 2016 (r302641)
@@ -121,6 +121,9 @@ typedef struct dbuf_dirty_record {
/* How much space was changed to dsl_pool_dirty_space() for this? */
unsigned int dr_accounted;
+ /* A copy of the bp that points to us */
+ blkptr_t dr_bp_copy;
+
union dirty_types {
struct dirty_indirect {
More information about the svn-src-vendor
mailing list