svn commit: r197398 - in user/kmacy/releng_8_fcs:
cddl/contrib/opensolaris/cmd/ztest
sys/cddl/contrib/opensolaris/uts/common/fs/zfs
sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys
Kip Macy
kmacy at FreeBSD.org
Tue Sep 22 03:10:05 UTC 2009
Author: kmacy
Date: Tue Sep 22 03:10:04 2009
New Revision: 197398
URL: http://svn.freebsd.org/changeset/base/197398
Log:
- make tx type operation dependent rather than vfs state dependent
- simplify ZIL replay handing
- add dmu_read_flags to allow explicit disabling of prefetch
- remove assert that doesn't apply to freebsd
- update ztest for new zil_replay
Modified:
user/kmacy/releng_8_fcs/cddl/contrib/opensolaris/cmd/ztest/ztest.c
user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h
user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c
user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c
user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c
user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
Modified: user/kmacy/releng_8_fcs/cddl/contrib/opensolaris/cmd/ztest/ztest.c
==============================================================================
--- user/kmacy/releng_8_fcs/cddl/contrib/opensolaris/cmd/ztest/ztest.c Mon Sep 21 23:58:29 2009 (r197397)
+++ user/kmacy/releng_8_fcs/cddl/contrib/opensolaris/cmd/ztest/ztest.c Tue Sep 22 03:10:04 2009 (r197398)
@@ -1304,7 +1304,7 @@ ztest_dmu_objset_create_destroy(ztest_ar
if (ztest_random(2) == 0 &&
dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os) == 0) {
zr.zr_os = os;
- zil_replay(os, &zr, &zr.zr_assign, ztest_replay_vector, NULL);
+ zil_replay(os, &zr, ztest_replay_vector);
dmu_objset_close(os);
}
@@ -3321,8 +3321,7 @@ ztest_run(char *pool)
if (test_future)
ztest_dmu_check_future_leak(&za[t]);
zr.zr_os = za[d].za_os;
- zil_replay(zr.zr_os, &zr, &zr.zr_assign,
- ztest_replay_vector, NULL);
+ zil_replay(zr.zr_os, &zr, ztest_replay_vector);
za[d].za_zilog = zil_open(za[d].za_os, NULL);
}
Modified: user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
==============================================================================
--- user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c Mon Sep 21 23:58:29 2009 (r197397)
+++ user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c Tue Sep 22 03:10:04 2009 (r197398)
@@ -177,22 +177,22 @@ dmu_bonus_hold(objset_t *os, uint64_t ob
* whose dnodes are in the same block.
*/
static int
-dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
- uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
+dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
+ int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
{
dsl_pool_t *dp = NULL;
dmu_buf_t **dbp;
uint64_t blkid, nblks, i;
- uint32_t flags;
+ uint32_t dbuf_flags;
int err;
zio_t *zio;
hrtime_t start;
ASSERT(length <= DMU_MAX_ACCESS);
- flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT;
- if (length > zfetch_array_rd_sz)
- flags |= DB_RF_NOPREFETCH;
+ dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT;
+ if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz)
+ dbuf_flags |= DB_RF_NOPREFETCH;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
if (dn->dn_datablkshift) {
@@ -230,7 +230,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn,
/* initiate async i/o */
if (read) {
rw_exit(&dn->dn_struct_rwlock);
- (void) dbuf_read(db, zio, flags);
+ (void) dbuf_read(db, zio, dbuf_flags);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
}
dbp[i] = &db->db;
@@ -282,7 +282,7 @@ dmu_buf_hold_array(objset_t *os, uint64_
return (err);
err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
- numbufsp, dbpp);
+ numbufsp, dbpp, DMU_READ_PREFETCH);
dnode_rele(dn, FTAG);
@@ -297,7 +297,7 @@ dmu_buf_hold_array_by_bonus(dmu_buf_t *d
int err;
err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
- numbufsp, dbpp);
+ numbufsp, dbpp, DMU_READ_PREFETCH);
return (err);
}
@@ -536,8 +536,8 @@ dmu_free_range(objset_t *os, uint64_t ob
}
int
-dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
- void *buf)
+dmu_read_flags(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
+ void *buf, uint32_t flags)
{
dnode_t *dn;
dmu_buf_t **dbp;
@@ -567,7 +567,7 @@ dmu_read(objset_t *os, uint64_t object,
* to be reading in parallel.
*/
err = dmu_buf_hold_array_by_dnode(dn, offset, mylen,
- TRUE, FTAG, &numbufs, &dbp);
+ TRUE, FTAG, &numbufs, &dbp, flags);
if (err)
break;
@@ -593,6 +593,13 @@ dmu_read(objset_t *os, uint64_t object,
return (err);
}
+int
+dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
+ void *buf)
+{
+ return dmu_read_flags(os, object, offset, size, buf, 0);
+}
+
void
dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
const void *buf, dmu_tx_t *tx)
Modified: user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
==============================================================================
--- user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h Mon Sep 21 23:58:29 2009 (r197397)
+++ user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h Tue Sep 22 03:10:04 2009 (r197398)
@@ -447,8 +447,12 @@ int dmu_free_object(objset_t *os, uint64
* Canfail routines will return 0 on success, or an errno if there is a
* nonrecoverable I/O error.
*/
+#define DMU_READ_PREFETCH 0 /* prefetch */
+#define DMU_READ_NO_PREFETCH 1 /* don't prefetch */
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
void *buf);
+int dmu_read_flags(objset_t *os, uint64_t object, uint64_t offset,
+ uint64_t size, void *buf, uint32_t flags);
void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
const void *buf, dmu_tx_t *tx);
int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
@@ -456,7 +460,10 @@ int dmu_write_uio(objset_t *os, uint64_t
dmu_tx_t *tx);
int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size, struct page *pp, dmu_tx_t *tx);
-
+struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
+void dmu_return_arcbuf(struct arc_buf *buf);
+void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
+ dmu_tx_t *tx);
extern int zfs_prefetch_disable;
/*
Modified: user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
==============================================================================
--- user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h Mon Sep 21 23:58:29 2009 (r197397)
+++ user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h Tue Sep 22 03:10:04 2009 (r197398)
@@ -47,7 +47,6 @@ struct zfsvfs {
uint64_t z_root; /* id of root znode */
uint64_t z_unlinkedobj; /* id of unlinked zapobj */
uint64_t z_max_blksz; /* maximum block size for files */
- uint64_t z_assign; /* TXG_NOWAIT or set by zil_replay() */
uint64_t z_fuid_obj; /* fuid table object number */
uint64_t z_fuid_size; /* fuid table size */
avl_tree_t z_fuid_idx; /* fuid tree keyed by index */
@@ -72,6 +71,7 @@ struct zfsvfs {
boolean_t z_issnap; /* true if this is a snapshot */
boolean_t z_vscan; /* virus scan on/off */
boolean_t z_use_fuids; /* version allows fuids */
+ boolean_t z_replay; /* set during ZIL replay */
kmutex_t z_online_recv_lock; /* recv in prog grabs as WRITER */
uint64_t z_version; /* ZPL version */
#define ZFS_OBJ_MTX_SZ 64
Modified: user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
==============================================================================
--- user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h Mon Sep 21 23:58:29 2009 (r197397)
+++ user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h Tue Sep 22 03:10:04 2009 (r197398)
@@ -335,7 +335,6 @@ typedef void zil_parse_blk_func_t(zilog_
typedef void zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg,
uint64_t txg);
typedef int zil_replay_func_t();
-typedef void zil_replay_cleaner_t();
typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio);
extern uint64_t zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
@@ -350,9 +349,8 @@ extern void zil_free(zilog_t *zilog);
extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data);
extern void zil_close(zilog_t *zilog);
-extern void zil_replay(objset_t *os, void *arg, uint64_t *txgp,
- zil_replay_func_t *replay_func[TX_MAX_TYPE],
- zil_replay_cleaner_t *replay_cleaner);
+extern void zil_replay(objset_t *os, void *arg,
+ zil_replay_func_t *replay_func[TX_MAX_TYPE]);
extern void zil_destroy(zilog_t *zilog, boolean_t keep_first);
extern void zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx);
Modified: user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h
==============================================================================
--- user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h Mon Sep 21 23:58:29 2009 (r197397)
+++ user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h Tue Sep 22 03:10:04 2009 (r197398)
@@ -74,13 +74,14 @@ struct zilog {
uint64_t zl_commit_seq; /* committed upto this number */
uint64_t zl_lr_seq; /* log record sequence number */
uint64_t zl_destroy_txg; /* txg of last zil_destroy() */
- uint64_t zl_replay_seq[TXG_SIZE]; /* seq of last replayed rec */
+ uint64_t zl_replayed_seq[TXG_SIZE]; /* seq of last replayed rec */
+ uint64_t zl_replaying_seq; /* current replay seq number */
uint32_t zl_suspend; /* log suspend count */
kcondvar_t zl_cv_writer; /* log writer thread completion */
kcondvar_t zl_cv_suspend; /* log suspend completion */
uint8_t zl_suspending; /* log is currently suspending */
uint8_t zl_keep_first; /* keep first log block in destroy */
- uint8_t zl_stop_replay; /* don't replay any further */
+ uint8_t zl_replay; /* don't replay any further */
uint8_t zl_stop_sync; /* for debugging */
uint8_t zl_writer; /* boolean: write setup in progress */
uint8_t zl_log_error; /* boolean: log write error */
@@ -102,6 +103,9 @@ typedef struct zil_dva_node {
avl_node_t zn_node;
} zil_dva_node_t;
+#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_trailer_t) - \
+ sizeof (lr_write_t))
+
#ifdef __cplusplus
}
#endif
Modified: user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
==============================================================================
--- user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c Mon Sep 21 23:58:29 2009 (r197397)
+++ user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c Tue Sep 22 03:10:04 2009 (r197398)
@@ -2137,12 +2137,12 @@ top:
}
}
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
+ error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
mutex_exit(&zp->z_acl_lock);
mutex_exit(&zp->z_lock);
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
+ if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -2197,7 +2197,7 @@ zfs_zaccess_common(znode_t *zp, uint32_t
*check_privs = B_TRUE;
- if (zfsvfs->z_assign >= TXG_INITIAL) { /* ZIL replay */
+ if (zfsvfs->z_replay) {
*working_mode = 0;
return (0);
}
Modified: user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c
==============================================================================
--- user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c Mon Sep 21 23:58:29 2009 (r197397)
+++ user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c Tue Sep 22 03:10:04 2009 (r197398)
@@ -564,24 +564,6 @@ zfs_rmnode(znode_t *zp)
ASSERT(zp->z_phys->zp_links == 0);
/*
- * If this is a ZIL replay then leave the object in the unlinked set.
- * Otherwise we can get a deadlock, because the delete can be
- * quite large and span multiple tx's and txgs, but each replay
- * creates a tx to atomically run the replay function and mark the
- * replay record as complete. We deadlock trying to start a tx in
- * a new txg to further the deletion but can't because the replay
- * tx hasn't finished.
- *
- * We actually delete the object if we get a failure to create an
- * object in zil_replay_log_record(), or after calling zil_replay().
- */
- if (zfsvfs->z_assign >= TXG_INITIAL) {
- zfs_znode_dmu_fini(zp);
- zfs_znode_free(zp);
- return;
- }
-
- /*
* If this is an attribute directory, purge its contents.
*/
if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR &&
@@ -855,9 +837,9 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *
FUID_SIZE_ESTIMATE(zfsvfs));
}
}
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
+ error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT)
+ if (error == ERESTART)
dmu_tx_wait(tx);
dmu_tx_abort(tx);
return (error);
@@ -944,7 +926,7 @@ top:
error = zfs_make_xattrdir(zp, &va, xvpp, cr);
zfs_dirent_unlock(dl);
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
+ if (error == ERESTART) {
/* NB: we already did dmu_tx_wait() if necessary */
goto top;
}
@@ -975,7 +957,7 @@ zfs_sticky_remove_access(znode_t *zdp, z
uid_t fowner;
zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
- if (zdp->z_zfsvfs->z_assign >= TXG_INITIAL) /* ZIL replay */
+ if (zdp->z_zfsvfs->z_replay)
return (0);
if ((zdp->z_phys->zp_mode & S_ISVTX) == 0)
Modified: user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c
==============================================================================
--- user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c Mon Sep 21 23:58:29 2009 (r197397)
+++ user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c Tue Sep 22 03:10:04 2009 (r197398)
@@ -525,7 +525,6 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64
uint32_t rid;
idmap_stat status;
uint64_t idx;
- boolean_t is_replay = (zfsvfs->z_assign >= TXG_INITIAL);
zfs_fuid_t *zfuid = NULL;
zfs_fuid_info_t *fuidp;
@@ -540,7 +539,7 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64
if (!zfsvfs->z_use_fuids || !IS_EPHEMERAL(id) || fuid_idx != 0)
return (id);
- if (is_replay) {
+ if (zfsvfs->z_replay) {
fuidp = zfsvfs->z_fuid_replay;
/*
@@ -594,7 +593,7 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64
idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, tx);
- if (!is_replay)
+ if (!zfsvfs->z_replay)
zfs_fuid_node_add(fuidpp, kdomain, rid, idx, id, type);
else if (zfuid != NULL) {
list_remove(&fuidp->z_fuids, zfuid);
Modified: user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c
==============================================================================
--- user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c Mon Sep 21 23:58:29 2009 (r197397)
+++ user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c Tue Sep 22 03:10:04 2009 (r197398)
@@ -42,6 +42,17 @@
#include <sys/dmu.h>
#include <sys/spa.h>
#include <sys/zfs_fuid.h>
+#include <sys/dsl_dataset.h>
+
+#define ZFS_HANDLE_REPLAY(zilog, tx) \
+ if (zilog->zl_replay) { \
+ dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); \
+ zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] = \
+ zilog->zl_replaying_seq; \
+ return; \
+ }
+
+
/*
* All the functions in this file are used to construct the log entries
@@ -236,6 +247,8 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t
if (zilog == NULL)
return;
+ ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
+
/*
* If we have FUIDs present then add in space for
* domains and ACE fuid's if any.
@@ -339,6 +352,8 @@ zfs_log_remove(zilog_t *zilog, dmu_tx_t
if (zilog == NULL)
return;
+ ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
+
itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
lr = (lr_remove_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
@@ -363,6 +378,8 @@ zfs_log_link(zilog_t *zilog, dmu_tx_t *t
if (zilog == NULL)
return;
+ ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
+
itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
lr = (lr_link_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
@@ -390,6 +407,8 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t
if (zilog == NULL)
return;
+ ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
+
itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize);
lr = (lr_create_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
@@ -424,6 +443,8 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t
if (zilog == NULL)
return;
+ ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
+
itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
lr = (lr_rename_t *)&itx->itx_lr;
lr->lr_sdoid = sdzp->z_id;
@@ -456,6 +477,8 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *
if (zilog == NULL || zp->z_unlinked)
return;
+ ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
+
/*
* Writes are handled in three different ways:
*
@@ -554,6 +577,8 @@ zfs_log_truncate(zilog_t *zilog, dmu_tx_
if (zilog == NULL || zp->z_unlinked)
return;
+ ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
+
itx = zil_itx_create(txtype, sizeof (*lr));
lr = (lr_truncate_t *)&itx->itx_lr;
lr->lr_foid = zp->z_id;
@@ -583,6 +608,8 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t
if (zilog == NULL || zp->z_unlinked)
return;
+ ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
+
/*
* If XVATTR set, then log record size needs to allow
* for lr_attr_t + xvattr mask, mapsize and create time
@@ -649,6 +676,8 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx
if (zilog == NULL || zp->z_unlinked)
return;
+ ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
+
txtype = (zp->z_zfsvfs->z_version < ZPL_VERSION_FUID) ?
TX_ACL_V0 : TX_ACL;
Modified: user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
==============================================================================
--- user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c Mon Sep 21 23:58:29 2009 (r197397)
+++ user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c Tue Sep 22 03:10:04 2009 (r197398)
@@ -499,6 +499,13 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t
dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
+ zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
+ if (zil_disable) {
+ zil_destroy(zfsvfs->z_log, 0);
+ zfsvfs->z_log = NULL;
+ }
+
+
/*
* If we are not mounting (ie: online recv), then we don't
* have to worry about replaying the log as we blocked all
@@ -512,21 +519,27 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t
* allow replays to succeed.
*/
readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
- zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
-
- /*
- * Parse and replay the intent log.
- */
- zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign,
- zfs_replay_vector, zfs_unlinked_drain);
-
- zfs_unlinked_drain(zfsvfs);
+ if (readonly != 0)
+ zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
+ else
+ zfs_unlinked_drain(zfsvfs);
+
+ if (zfsvfs->z_log) {
+
+ /*
+ * Parse and replay the intent log.
+ * Because of ziltest, this must be done after
+ * zfs_unlinked_drain(). (Further note: ziltest
+ * doesn't use readonly mounts, where
+ */
+ zfsvfs->z_replay = B_TRUE;
+ zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector);
+ zfsvfs->z_replay = B_FALSE;
+ }
+
zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */
}
- if (!zil_disable)
- zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
-
return (0);
}
@@ -562,7 +575,6 @@ zfs_domount(vfs_t *vfsp, char *osname)
zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
zfsvfs->z_vfs = vfsp;
zfsvfs->z_parent = zfsvfs;
- zfsvfs->z_assign = TXG_NOWAIT;
zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
Modified: user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
==============================================================================
--- user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Mon Sep 21 23:58:29 2009 (r197397)
+++ user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Tue Sep 22 03:10:04 2009 (r197398)
@@ -132,12 +132,12 @@
* rw_enter(...); // grab any other locks you need
* tx = dmu_tx_create(...); // get DMU tx
* dmu_tx_hold_*(); // hold each object you might modify
- * error = dmu_tx_assign(tx, zfsvfs->z_assign); // try to assign
+ * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign
* if (error) {
* rw_exit(...); // drop locks
* zfs_dirent_unlock(dl); // unlock directory entry
* VN_RELE(...); // release held vnodes
- * if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
+ * if (error == ERESTART) {
* dmu_tx_wait(tx);
* dmu_tx_abort(tx);
* goto top;
@@ -793,10 +793,9 @@ zfs_write(vnode_t *vp, uio_t *uio, int i
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_bonus(tx, zp->z_id);
dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
+ error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
- if (error == ERESTART &&
- zfsvfs->z_assign == TXG_NOWAIT) {
+ if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
continue;
@@ -906,7 +905,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int i
* If we're in replay mode, or we made no progress, return error.
* Otherwise, it's at least a partial write, so it's successful.
*/
- if (zfsvfs->z_assign >= TXG_INITIAL || uio->uio_resid == start_resid) {
+ if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
ZFS_EXIT(zfsvfs);
return (error);
}
@@ -1397,11 +1396,10 @@ top:
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
0, SPA_MAXBLOCKSIZE);
}
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
+ error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
- if (error == ERESTART &&
- zfsvfs->z_assign == TXG_NOWAIT) {
+ if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -1610,11 +1608,11 @@ top:
/* charge as an update -- would be nice not to charge at all */
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
+ error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
VN_RELE(vp);
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
+ if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -1812,10 +1810,10 @@ top:
if ((dzp->z_phys->zp_flags & ZFS_INHERIT_ACE) || aclp)
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
0, SPA_MAXBLOCKSIZE);
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
+ error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
+ if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -1942,13 +1940,13 @@ top:
dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
dmu_tx_hold_bonus(tx, zp->z_id);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
+ error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
rw_exit(&zp->z_parent_lock);
rw_exit(&zp->z_name_lock);
zfs_dirent_unlock(dl);
VN_RELE(vp);
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
+ if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -2843,7 +2841,7 @@ top:
dmu_tx_hold_bonus(tx, attrzp->z_id);
}
- err = dmu_tx_assign(tx, zfsvfs->z_assign);
+ err = dmu_tx_assign(tx, TXG_NOWAIT);
if (err) {
if (attrzp)
VN_RELE(ZTOV(attrzp));
@@ -2853,7 +2851,7 @@ top:
aclp = NULL;
}
- if (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
+ if (err == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -3298,7 +3296,7 @@ top:
if (tzp)
dmu_tx_hold_bonus(tx, tzp->z_id); /* parent changes */
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
+ error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
if (zl != NULL)
zfs_rename_unlock(&zl);
@@ -3307,7 +3305,7 @@ top:
VN_RELE(ZTOV(szp));
if (tzp)
VN_RELE(ZTOV(tzp));
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
+ if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -3444,10 +3442,10 @@ top:
FUID_SIZE_ESTIMATE(zfsvfs));
}
}
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
+ error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
+ if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -3663,10 +3661,10 @@ top:
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_bonus(tx, szp->z_id);
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
+ error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
+ if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
Modified: user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
==============================================================================
--- user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c Mon Sep 21 23:58:29 2009 (r197397)
+++ user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c Tue Sep 22 03:10:04 2009 (r197398)
@@ -658,7 +658,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, d
ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
- if (zfsvfs->z_assign >= TXG_INITIAL) { /* ZIL replay */
+ if (zfsvfs->z_replay) {
obj = vap->va_nodeid;
flag |= IS_REPLAY;
now = vap->va_ctime; /* see zfs_replay_create() */
@@ -1196,9 +1196,9 @@ top:
newblksz = 0;
}
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
+ error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
+ if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -1318,9 +1318,9 @@ zfs_trunc(znode_t *zp, uint64_t end)
top:
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_bonus(tx, zp->z_id);
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
+ error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
+ if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
@@ -1397,9 +1397,9 @@ zfs_freesp(znode_t *zp, uint64_t off, ui
log:
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_bonus(tx, zp->z_id);
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
+ error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
+ if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto log;
@@ -1505,7 +1505,6 @@ zfs_create_fs(objset_t *os, cred_t *cr,
bzero(&zfsvfs, sizeof (zfsvfs_t));
zfsvfs.z_os = os;
- zfsvfs.z_assign = TXG_NOWAIT;
zfsvfs.z_parent = &zfsvfs;
zfsvfs.z_version = version;
zfsvfs.z_use_fuids = USE_FUIDS(version, os);
Modified: user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
==============================================================================
--- user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c Mon Sep 21 23:58:29 2009 (r197397)
+++ user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c Tue Sep 22 03:10:04 2009 (r197398)
@@ -1221,7 +1221,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
ASSERT(zilog->zl_stop_sync == 0);
- zh->zh_replay_seq = zilog->zl_replay_seq[txg & TXG_MASK];
+ zh->zh_replay_seq = zilog->zl_replayed_seq[txg & TXG_MASK];
if (zilog->zl_destroy_txg == txg) {
blkptr_t blk = zh->zh_log;
@@ -1230,7 +1230,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
ASSERT(spa_sync_pass(spa) == 1);
bzero(zh, sizeof (zil_header_t));
- bzero(zilog->zl_replay_seq, sizeof (zilog->zl_replay_seq));
+ bzero(zilog->zl_replayed_seq, sizeof (zilog->zl_replayed_seq));
if (zilog->zl_keep_first) {
/*
@@ -1467,9 +1467,7 @@ zil_resume(zilog_t *zilog)
typedef struct zil_replay_arg {
objset_t *zr_os;
zil_replay_func_t **zr_replay;
- zil_replay_cleaner_t *zr_replay_cleaner;
void *zr_arg;
- uint64_t *zr_txgp;
boolean_t zr_byteswap;
char *zr_lrbuf;
} zil_replay_arg_t;
@@ -1484,7 +1482,7 @@ zil_replay_log_record(zilog_t *zilog, lr
char *name;
int pass, error, sunk;
- if (zilog->zl_stop_replay)
+ if (!zilog->zl_replay) /* giving up */
return;
if (lr->lrc_txg < claim_txg) /* already committed */
@@ -1548,44 +1546,14 @@ zil_replay_log_record(zilog_t *zilog, lr
/*
* We must now do two things atomically: replay this log record,
* and update the log header to reflect the fact that we did so.
- * We use the DMU's ability to assign into a specific txg to do this.
+ * At the end of each replay function the sequence number
+ * is updated if we are in replay mode.
*/
- for (pass = 1, sunk = B_FALSE; /* CONSTANTCONDITION */; pass++) {
- uint64_t replay_txg;
- dmu_tx_t *replay_tx;
-
- replay_tx = dmu_tx_create(zr->zr_os);
- error = dmu_tx_assign(replay_tx, TXG_WAIT);
- if (error) {
- dmu_tx_abort(replay_tx);
- break;
- }
-
- replay_txg = dmu_tx_get_txg(replay_tx);
-
- if (txtype == 0 || txtype >= TX_MAX_TYPE) {
- error = EINVAL;
- } else {
- /*
- * On the first pass, arrange for the replay vector
- * to fail its dmu_tx_assign(). That's the only way
- * to ensure that those code paths remain well tested.
- *
- * Only byteswap (if needed) on the 1st pass.
- */
- *zr->zr_txgp = replay_txg - (pass == 1);
- error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lrbuf,
- zr->zr_byteswap && pass == 1);
- *zr->zr_txgp = TXG_NOWAIT;
- }
-
- if (error == 0) {
- dsl_dataset_dirty(dmu_objset_ds(zr->zr_os), replay_tx);
- zilog->zl_replay_seq[replay_txg & TXG_MASK] =
- lr->lrc_seq;
- }
-
- dmu_tx_commit(replay_tx);
+ for (pass = 1; pass <= 2; pass++) {
+ zilog->zl_replaying_seq = lr->lrc_seq;
+ /* Only byteswap (if needed) on the 1st pass. */
+ error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lrbuf,
+ zr->zr_byteswap && pass == 1);
if (!error)
return;
@@ -1593,36 +1561,22 @@ zil_replay_log_record(zilog_t *zilog, lr
/*
* The DMU's dnode layer doesn't see removes until the txg
* commits, so a subsequent claim can spuriously fail with
- * EEXIST. So if we receive any error other than ERESTART
- * we try syncing out any removes then retrying the
- * transaction.
+ * EEXIST. So if we receive any error we try syncing out
+ * any removes then retry the transaction.
*/
- if (error != ERESTART && !sunk) {
- if (zr->zr_replay_cleaner)
- zr->zr_replay_cleaner(zr->zr_arg);
+ if (pass == 1)
txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0);
- sunk = B_TRUE;
- continue; /* retry */
- }
-
- if (error != ERESTART)
- break;
-
- if (pass != 1)
- txg_wait_open(spa_get_dsl(zilog->zl_spa),
- replay_txg + 1);
-
- dprintf("pass %d, retrying\n", pass);
}
-
- ASSERT(error && error != ERESTART);
+bad:
+
+ ASSERT(error);
name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
dmu_objset_name(zr->zr_os, name);
cmn_err(CE_WARN, "ZFS replay transaction error %d, "
"dataset %s, seq 0x%llx, txtype %llu %s\n",
error, name, (u_longlong_t)lr->lrc_seq, (u_longlong_t)txtype,
(lr->lrc_txtype & TX_CI) ? "CI" : "");
- zilog->zl_stop_replay = 1;
+ zilog->zl_replay = B_FALSE;
kmem_free(name, MAXNAMELEN);
}
@@ -1637,9 +1591,8 @@ zil_incr_blks(zilog_t *zilog, blkptr_t *
* If this dataset has a non-empty intent log, replay it and destroy it.
*/
void
-zil_replay(objset_t *os, void *arg, uint64_t *txgp,
- zil_replay_func_t *replay_func[TX_MAX_TYPE],
- zil_replay_cleaner_t *replay_cleaner)
+zil_replay(objset_t *os, void *arg,
+ zil_replay_func_t *replay_func[TX_MAX_TYPE])
{
zilog_t *zilog = dmu_objset_zil(os);
const zil_header_t *zh = zilog->zl_header;
@@ -1653,9 +1606,7 @@ zil_replay(objset_t *os, void *arg, uint
zr.zr_os = os;
zr.zr_replay = replay_func;
- zr.zr_replay_cleaner = replay_cleaner;
zr.zr_arg = arg;
- zr.zr_txgp = txgp;
zr.zr_byteswap = BP_SHOULD_BYTESWAP(&zh->zh_log);
zr.zr_lrbuf = kmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP);
@@ -1664,7 +1615,7 @@ zil_replay(objset_t *os, void *arg, uint
*/
txg_wait_synced(zilog->zl_dmu_pool, 0);
- zilog->zl_stop_replay = 0;
+ zilog->zl_replay = B_TRUE;
zilog->zl_replay_time = LBOLT;
ASSERT(zilog->zl_replay_blks == 0);
(void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr,
@@ -1673,6 +1624,7 @@ zil_replay(objset_t *os, void *arg, uint
zil_destroy(zilog, B_FALSE);
txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);
+ zilog->zl_replay = B_FALSE;
//printf("ZFS: Replay of ZIL on %s finished.\n", os->os->os_spa->spa_name);
}
Modified: user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
==============================================================================
--- user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c Mon Sep 21 23:58:29 2009 (r197397)
+++ user/kmacy/releng_8_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c Tue Sep 22 03:10:04 2009 (r197398)
@@ -73,6 +73,7 @@
#include <sys/vdev_impl.h>
#include <sys/zvol.h>
#include <geom/geom.h>
+#include <sys/zil_impl.h>
#include "zfs_namecheck.h"
@@ -138,6 +139,7 @@ typedef struct zvol_state {
#define ZVOL_RDONLY 0x1
#define ZVOL_DUMPIFIED 0x2
#define ZVOL_EXCL 0x4
+#define ZVOL_WCE 0x8
/*
* zvol maximum transfer in one DMU tx.
@@ -294,28 +296,72 @@ zvol_access(struct g_provider *pp, int a
ssize_t zvol_immediate_write_sz = 32768;
static void
-zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t len)
+zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
+ boolean_t sync)
{
uint32_t blocksize = zv->zv_volblocksize;
- lr_write_t *lr;
+ zilog_t *zilog = zv->zv_zilog;
+ boolean_t slogging;
- while (len) {
- ssize_t nbytes = MIN(len, blocksize - P2PHASE(off, blocksize));
- itx_t *itx = zil_itx_create(TX_WRITE, sizeof (*lr));
+ if (zil_disable)
+ return;
- itx->itx_wr_state =
- len > zvol_immediate_write_sz ? WR_INDIRECT : WR_NEED_COPY;
- itx->itx_private = zv;
+ if (zilog->zl_replay) {
+ dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
+ zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] =
+ zilog->zl_replaying_seq;
+ return;
+ }
+ slogging = spa_has_slogs(zilog->zl_spa);
+
+ while (resid) {
+ ssize_t len;
+ itx_t *itx;
+ lr_write_t *lr;
+ itx_wr_state_t write_state;
+
+ /*
+ * Unlike zfs_log_write() we can be called with
+ * upto DMU_MAX_ACCESS/2 (5MB) writes.
+ */
+ if (blocksize > zvol_immediate_write_sz && !slogging &&
+ resid >= blocksize && off % blocksize == 0) {
+ write_state = WR_INDIRECT; /* uses dmu_sync */
+ len = blocksize;
+ } else if (sync) {
+ write_state = WR_COPIED;
+ len = MIN(ZIL_MAX_LOG_DATA, resid);
+ } else {
+ write_state = WR_NEED_COPY;
+ len = MIN(ZIL_MAX_LOG_DATA, resid);
+ }
+
+ itx = zil_itx_create(TX_WRITE, sizeof (*lr) +
+ (write_state == WR_COPIED ? len : 0));
lr = (lr_write_t *)&itx->itx_lr;
+ if (write_state == WR_COPIED && dmu_read_flags(zv->zv_objset,
+ ZVOL_OBJ, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
+ kmem_free(itx, offsetof(itx_t, itx_lr) +
+ itx->itx_lr.lrc_reclen);
+ itx = zil_itx_create(TX_WRITE, sizeof (*lr));
+ lr = (lr_write_t *)&itx->itx_lr;
+ write_state = WR_NEED_COPY;
+ }
+
+ itx->itx_wr_state = write_state;
+ if (write_state == WR_NEED_COPY)
+ itx->itx_sod += len;
+ itx->itx_private = zv;
lr->lr_foid = ZVOL_OBJ;
lr->lr_offset = off;
- lr->lr_length = nbytes;
+ lr->lr_length = len;
lr->lr_blkoff = off - P2ALIGN_TYPED(off, blocksize, uint64_t);
BP_ZERO(&lr->lr_blkptr);
(void) zil_itx_assign(zv->zv_zilog, itx, tx);
- len -= nbytes;
- off += nbytes;
+
+ off += len;
+ resid -= len;
}
}
@@ -353,6 +399,7 @@ zvol_serve_one(zvol_state_t *zv, struct
rl_t *rl;
int error = 0;
boolean_t reading;
+ boolean_t sync;
off = bp->bio_offset;
volsize = zv->zv_volsize;
@@ -365,12 +412,15 @@ zvol_serve_one(zvol_state_t *zv, struct
error = 0;
+
+ reading = (bp->bio_cmd == BIO_READ);
+ sync = /* !(bp->b_flags & B_ASYNC) && !is_dump && */ !reading &&
+ !(zv->zv_flags & ZVOL_WCE) && !zil_disable;
/*
* There must be no buffer changes when doing a dmu_sync() because
* we can't change the data whilst calculating the checksum.
* A better approach than a per zvol rwlock would be to lock ranges.
*/
- reading = (bp->bio_cmd == BIO_READ);
rl = zfs_range_lock(&zv->zv_znode, off, resid,
reading ? RL_READER : RL_WRITER);
@@ -391,7 +441,7 @@ zvol_serve_one(zvol_state_t *zv, struct
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-user
mailing list