diff options
Diffstat (limited to 'sys-fs/zfs')
-rw-r--r-- | sys-fs/zfs/ChangeLog | 9 | ||||
-rw-r--r-- | sys-fs/zfs/files/zfs-0.6.0_rc9-range-lock-caller-allocate.patch | 180 | ||||
-rw-r--r-- | sys-fs/zfs/zfs-0.6.0_rc9-r2.ebuild (renamed from sys-fs/zfs/zfs-0.6.0_rc9-r1.ebuild) | 2 |
3 files changed, 122 insertions, 69 deletions
diff --git a/sys-fs/zfs/ChangeLog b/sys-fs/zfs/ChangeLog index d636212d0177..d693068c29cf 100644 --- a/sys-fs/zfs/ChangeLog +++ b/sys-fs/zfs/ChangeLog @@ -1,6 +1,13 @@ # ChangeLog for sys-fs/zfs # Copyright 1999-2012 Gentoo Foundation; Distributed under the GPL v2 -# $Header: /var/cvsroot/gentoo-x86/sys-fs/zfs/ChangeLog,v 1.29 2012/06/25 02:37:14 ryao Exp $ +# $Header: /var/cvsroot/gentoo-x86/sys-fs/zfs/ChangeLog,v 1.30 2012/06/25 20:21:55 ryao Exp $ + +*zfs-0.6.0_rc9-r2 (25 Jun 2012) + + 25 Jun 2012; Richard Yao <ryao@gentoo.org> +zfs-0.6.0_rc9-r2.ebuild, + -zfs-0.6.0_rc9-r1.ebuild, + files/zfs-0.6.0_rc9-range-lock-caller-allocate.patch: + Fix regression caused by patch to fix deadlock 25 Jun 2012; Richard Yao <ryao@gentoo.org> files/zfs-0.6.0_rc9-range-lock-caller-allocate.patch: diff --git a/sys-fs/zfs/files/zfs-0.6.0_rc9-range-lock-caller-allocate.patch b/sys-fs/zfs/files/zfs-0.6.0_rc9-range-lock-caller-allocate.patch index dd9d83a038c5..4312f84fd5a7 100644 --- a/sys-fs/zfs/files/zfs-0.6.0_rc9-range-lock-caller-allocate.patch +++ b/sys-fs/zfs/files/zfs-0.6.0_rc9-range-lock-caller-allocate.patch @@ -1,28 +1,36 @@ -commit 4bbb86db6d6da5c05c452a810f20ffb19d6918de -Author: Richard Yao <ryao@cs.stonybrook.edu> -Date: Sun Jun 24 20:11:40 2012 -0400 +From fc1f1d3940f4d2e5b1b85481d900d8198cf4b6f3 Mon Sep 17 00:00:00 2001 +From: Richard Yao <ryao@cs.stonybrook.edu> +Date: Mon, 25 Jun 2012 14:41:30 -0400 +Subject: [PATCH] Make callers responsible for memory allocation in + zfs_range_lock() - Make callers responsible for memory allocation in zfs_range_lock() - - zfs_range_lock() is used in zvols, and previously, it could deadlock due - to an allocation using KM_SLEEP. We avoid this by moving responsibility - the memory allocation from zfs_range_lock() to the caller. This enables - us to avoid such deadlocks and use stack allocations, which are more - efficient and prevents deadlocks. The contexts in which stack - allocations are done do not appear to be stack heavy, so we do not risk - overflowing the stack from doing this. - - Signed-off-by: Richard Yao <ryao@cs.stonybrook.edu> - - Conflicts: - - module/zfs/zvol.c +zfs_range_lock() is used in zvols, and previously, it could deadlock due +to an allocation using KM_SLEEP. We avoid this by moving responsibility +the memory allocation from zfs_range_lock() to the caller. This enables +us to avoid such deadlocks and use stack allocations, which are more +efficient and prevents deadlocks. The contexts in which stack +allocations are done do not appear to be stack heavy, so we do not risk +overflowing the stack from doing this. + +Signed-off-by: Richard Yao <ryao@cs.stonybrook.edu> + +Conflicts: + + module/zfs/zvol.c +--- + cmd/ztest/ztest.c | 32 +++++++++++++++++--------------- + include/sys/zfs_rlock.h | 2 +- + module/zfs/zfs_rlock.c | 15 +++++++-------- + module/zfs/zfs_vnops.c | 30 ++++++++++++++++-------------- + module/zfs/zfs_znode.c | 30 +++++++++++++++--------------- + module/zfs/zvol.c | 24 +++++++++++++----------- + 6 files changed, 69 insertions(+), 64 deletions(-) diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 72d511b..c5dd0c2 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c -@@ -973,12 +973,11 @@ ztest_object_unlock(ztest_ds_t *zd, uint64_t object) +@@ -973,12 +973,11 @@ enum ztest_object { } static rl_t * @@ -36,7 +44,7 @@ index 72d511b..c5dd0c2 100644 rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL); rl->rl_object = object; -@@ -1389,7 +1388,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) +@@ -1389,7 +1388,7 @@ enum ztest_object { dmu_tx_t *tx; dmu_buf_t *db; arc_buf_t *abuf = NULL; @@ -45,7 +53,7 @@ index 72d511b..c5dd0c2 100644 if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); -@@ -1413,7 +1412,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) +@@ -1413,7 +1412,7 @@ enum ztest_object { bt = NULL; ztest_object_lock(zd, lr->lr_foid, RL_READER); @@ -54,7 +62,7 @@ index 72d511b..c5dd0c2 100644 VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); -@@ -1438,7 +1437,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) +@@ -1438,7 +1437,7 @@ enum ztest_object { if (abuf != NULL) dmu_return_arcbuf(abuf); dmu_buf_rele(db, FTAG); @@ -63,7 +71,7 @@ index 72d511b..c5dd0c2 100644 ztest_object_unlock(zd, lr->lr_foid); return (ENOSPC); } -@@ -1495,7 +1494,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) +@@ -1495,7 +1494,7 @@ enum ztest_object { dmu_tx_commit(tx); @@ -72,7 +80,7 @@ index 72d511b..c5dd0c2 100644 ztest_object_unlock(zd, lr->lr_foid); return (0); -@@ -1507,13 +1506,13 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) +@@ -1507,13 +1506,13 @@ enum ztest_object { objset_t *os = zd->zd_os; dmu_tx_t *tx; uint64_t txg; @@ -88,7 +96,7 @@ index 72d511b..c5dd0c2 100644 RL_WRITER); tx = dmu_tx_create(os); -@@ -1522,7 +1521,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) +@@ -1522,7 +1521,7 @@ enum ztest_object { txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); if (txg == 0) { @@ -97,7 +105,7 @@ index 72d511b..c5dd0c2 100644 ztest_object_unlock(zd, lr->lr_foid); return (ENOSPC); } -@@ -1534,7 +1533,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) +@@ -1534,7 +1533,7 @@ enum ztest_object { dmu_tx_commit(tx); @@ -106,7 +114,7 @@ index 72d511b..c5dd0c2 100644 ztest_object_unlock(zd, lr->lr_foid); return (0); -@@ -1670,6 +1669,8 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) +@@ -1670,6 +1669,8 @@ enum ztest_object { dmu_object_info_t doi; dmu_buf_t *db; zgd_t *zgd; @@ -115,7 +123,7 @@ index 72d511b..c5dd0c2 100644 int error; ztest_object_lock(zd, object, RL_READER); -@@ -1694,9 +1695,10 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) +@@ -1694,9 +1695,10 @@ enum ztest_object { zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL); zgd->zgd_zilog = zd->zd_zilog; zgd->zgd_private = zd; @@ -127,7 +135,7 @@ index 72d511b..c5dd0c2 100644 RL_READER); error = dmu_read(os, object, offset, size, buf, -@@ -1711,7 +1713,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) +@@ -1711,7 +1713,7 @@ enum ztest_object { offset = 0; } @@ -136,7 +144,7 @@ index 72d511b..c5dd0c2 100644 RL_READER); error = dmu_buf_hold(os, object, offset, zgd, &db, -@@ -1953,12 +1955,12 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) +@@ -1953,12 +1955,12 @@ enum ztest_object { objset_t *os = zd->zd_os; dmu_tx_t *tx; uint64_t txg; @@ -151,7 +159,7 @@ index 72d511b..c5dd0c2 100644 tx = dmu_tx_create(os); -@@ -1974,7 +1976,7 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) +@@ -1974,7 +1976,7 @@ enum ztest_object { (void) dmu_free_long_range(os, object, offset, size); } @@ -164,7 +172,7 @@ diff --git a/include/sys/zfs_rlock.h b/include/sys/zfs_rlock.h index da18b1f..85dc16a 100644 --- a/include/sys/zfs_rlock.h +++ b/include/sys/zfs_rlock.h -@@ -63,7 +63,7 @@ typedef struct rl { +@@ -63,7 +63,7 @@ * is converted to WRITER that specified to lock from the start of the * end of file. zfs_range_lock() returns the range lock structure. */ @@ -174,7 +182,7 @@ index da18b1f..85dc16a 100644 /* * Unlock range and destroy range lock structure. diff --git a/module/zfs/zfs_rlock.c b/module/zfs/zfs_rlock.c -index f3ada17..6e9afc0 100644 +index f3ada17..eb81777 100644 --- a/module/zfs/zfs_rlock.c +++ b/module/zfs/zfs_rlock.c @@ -31,9 +31,9 @@ @@ -190,7 +198,7 @@ index f3ada17..6e9afc0 100644 * * AVL tree * -------- -@@ -420,13 +420,11 @@ got_lock: +@@ -420,13 +420,11 @@ * previously locked as RL_WRITER). */ rl_t * @@ -205,11 +213,30 @@ index f3ada17..6e9afc0 100644 new->r_zp = zp; new->r_off = off; if (len + off < off) /* overflow */ +@@ -531,7 +529,6 @@ + } + + mutex_exit(&zp->z_range_lock); +- kmem_free(remove, sizeof (rl_t)); + } + } + +@@ -572,7 +569,9 @@ + + while ((free_rl = list_head(&free_list)) != NULL) { + list_remove(&free_list, free_rl); +- zfs_range_free(free_rl); ++ /* Freeing rl is the caller's responsibility */ ++ if (free_rl != rl) ++ zfs_range_free(free_rl); + } + + list_destroy(&free_list); diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c -index 2da5fec..039269a 100644 +index 2da5fec..c8ca7c5 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c -@@ -370,7 +370,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) +@@ -370,7 +370,7 @@ objset_t *os; ssize_t n, nbytes; int error = 0; @@ -218,7 +245,7 @@ index 2da5fec..039269a 100644 #ifdef HAVE_UIO_ZEROCOPY xuio_t *xuio = NULL; #endif /* HAVE_UIO_ZEROCOPY */ -@@ -418,7 +418,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) +@@ -418,7 +418,7 @@ /* * Lock the range against changes. */ @@ -227,7 +254,7 @@ index 2da5fec..039269a 100644 /* * If we are reading past end-of-file we can skip -@@ -482,7 +482,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) +@@ -482,7 +482,7 @@ n -= nbytes; } out: @@ -236,7 +263,7 @@ index 2da5fec..039269a 100644 ZFS_ACCESSTIME_STAMP(zsb, zp); zfs_inode_update(zp); -@@ -524,7 +524,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) +@@ -524,7 +524,7 @@ zilog_t *zilog; offset_t woff; ssize_t n, nbytes; @@ -245,7 +272,7 @@ index 2da5fec..039269a 100644 int max_blksz = zsb->z_max_blksz; int error = 0; arc_buf_t *abuf; -@@ -608,9 +608,9 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) +@@ -608,9 +608,9 @@ * Obtain an appending range lock to guarantee file append * semantics. We reset the write offset once we have the lock. */ @@ -258,7 +285,7 @@ index 2da5fec..039269a 100644 /* * We overlocked the file because this write will cause * the file block size to increase. -@@ -625,11 +625,11 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) +@@ -625,11 +625,11 @@ * this write, then this range lock will lock the entire file * so that we can re-write the block safely. */ @@ -272,7 +299,7 @@ index 2da5fec..039269a 100644 ZFS_EXIT(zsb); return (EFBIG); } -@@ -719,7 +719,7 @@ again: +@@ -719,7 +719,7 @@ * on the first iteration since zfs_range_reduce() will * shrink down r_len to the appropriate size. */ @@ -281,7 +308,7 @@ index 2da5fec..039269a 100644 uint64_t new_blksz; if (zp->z_blksz > max_blksz) { -@@ -729,7 +729,7 @@ again: +@@ -729,7 +729,7 @@ new_blksz = MIN(end_size, max_blksz); } zfs_grow_blocksize(zp, new_blksz, tx); @@ -290,7 +317,7 @@ index 2da5fec..039269a 100644 } /* -@@ -842,7 +842,7 @@ again: +@@ -842,7 +842,7 @@ uio_prefaultpages(MIN(n, max_blksz), uio); } @@ -299,7 +326,23 @@ index 2da5fec..039269a 100644 /* * If we're in replay mode, or we made no progress, return error. -@@ -946,7 +946,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) +@@ -915,6 +915,7 @@ + blkptr_t *bp = &lr->lr_blkptr; + dmu_buf_t *db; + zgd_t *zgd; ++ rl_t rl; + int error = 0; + + ASSERT(zio != NULL); +@@ -935,6 +936,7 @@ + } + + zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); ++ zgd->zgd_rl = &rl; + zgd->zgd_zilog = zsb->z_log; + zgd->zgd_private = zp; + +@@ -946,7 +948,7 @@ * we don't have to write the data twice. */ if (buf != NULL) { /* immediate write */ @@ -308,7 +351,7 @@ index 2da5fec..039269a 100644 /* test for truncation needs to be done while range locked */ if (offset >= zp->z_size) { error = ENOENT; -@@ -967,7 +967,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) +@@ -967,7 +969,7 @@ size = zp->z_blksz; blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; offset -= blkoff; @@ -321,7 +364,7 @@ diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 3a6872f..e363839 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c -@@ -1158,20 +1158,20 @@ zfs_extend(znode_t *zp, uint64_t end) +@@ -1158,20 +1158,20 @@ { zfs_sb_t *zsb = ZTOZSB(zp); dmu_tx_t *tx; @@ -345,7 +388,7 @@ index 3a6872f..e363839 100644 return (0); } top: -@@ -1202,7 +1202,7 @@ top: +@@ -1202,7 +1202,7 @@ goto top; } dmu_tx_abort(tx); @@ -354,7 +397,7 @@ index 3a6872f..e363839 100644 return (error); } -@@ -1214,7 +1214,7 @@ top: +@@ -1214,7 +1214,7 @@ VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)), &zp->z_size, sizeof (zp->z_size), tx)); @@ -363,7 +406,7 @@ index 3a6872f..e363839 100644 dmu_tx_commit(tx); -@@ -1235,19 +1235,19 @@ static int +@@ -1235,19 +1235,19 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) { zfs_sb_t *zsb = ZTOZSB(zp); @@ -386,7 +429,7 @@ index 3a6872f..e363839 100644 return (0); } -@@ -1256,7 +1256,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) +@@ -1256,7 +1256,7 @@ error = dmu_free_long_range(zsb->z_os, zp->z_id, off, len); @@ -395,7 +438,7 @@ index 3a6872f..e363839 100644 return (error); } -@@ -1275,7 +1275,7 @@ zfs_trunc(znode_t *zp, uint64_t end) +@@ -1275,7 +1275,7 @@ { zfs_sb_t *zsb = ZTOZSB(zp); dmu_tx_t *tx; @@ -404,7 +447,7 @@ index 3a6872f..e363839 100644 int error; sa_bulk_attr_t bulk[2]; int count = 0; -@@ -1283,19 +1283,19 @@ zfs_trunc(znode_t *zp, uint64_t end) +@@ -1283,19 +1283,19 @@ /* * We will change zp_size, lock the whole file. */ @@ -427,7 +470,7 @@ index 3a6872f..e363839 100644 return (error); } top: -@@ -1310,7 +1310,7 @@ top: +@@ -1310,7 +1310,7 @@ goto top; } dmu_tx_abort(tx); @@ -436,7 +479,7 @@ index 3a6872f..e363839 100644 return (error); } -@@ -1327,7 +1327,7 @@ top: +@@ -1327,7 +1327,7 @@ dmu_tx_commit(tx); @@ -446,10 +489,10 @@ index 3a6872f..e363839 100644 return (0); } diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c -index 125d58d..5cae597 100644 +index 125d58d..bbe53d9 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c -@@ -537,7 +537,7 @@ zvol_write(void *arg) +@@ -537,7 +537,7 @@ uint64_t size = blk_rq_bytes(req); int error = 0; dmu_tx_t *tx; @@ -458,7 +501,7 @@ index 125d58d..5cae597 100644 if (req->cmd_flags & VDEV_REQ_FLUSH) zil_commit(zv->zv_zilog, ZVOL_OBJ); -@@ -550,7 +550,7 @@ zvol_write(void *arg) +@@ -550,7 +550,7 @@ return; } @@ -467,7 +510,7 @@ index 125d58d..5cae597 100644 tx = dmu_tx_create(zv->zv_objset); dmu_tx_hold_write(tx, ZVOL_OBJ, offset, size); -@@ -559,7 +559,7 @@ zvol_write(void *arg) +@@ -559,7 +559,7 @@ error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); @@ -476,7 +519,7 @@ index 125d58d..5cae597 100644 blk_end_request(req, -error, size); return; } -@@ -570,7 +570,7 @@ zvol_write(void *arg) +@@ -570,7 +570,7 @@ req->cmd_flags & VDEV_REQ_FUA); dmu_tx_commit(tx); @@ -485,7 +528,7 @@ index 125d58d..5cae597 100644 if ((req->cmd_flags & VDEV_REQ_FUA) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) -@@ -589,7 +589,7 @@ zvol_discard(void *arg) +@@ -589,7 +589,7 @@ uint64_t offset = blk_rq_pos(req) << 9; uint64_t size = blk_rq_bytes(req); int error; @@ -494,7 +537,7 @@ index 125d58d..5cae597 100644 if (offset + size > zv->zv_volsize) { blk_end_request(req, -EIO, size); -@@ -601,7 +601,7 @@ zvol_discard(void *arg) +@@ -601,7 +601,7 @@ return; } @@ -503,7 +546,7 @@ index 125d58d..5cae597 100644 error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, size); -@@ -609,7 +609,7 @@ zvol_discard(void *arg) +@@ -609,7 +609,7 @@ * TODO: maybe we should add the operation to the log. */ @@ -512,7 +555,7 @@ index 125d58d..5cae597 100644 blk_end_request(req, -error, size); } -@@ -630,18 +630,18 @@ zvol_read(void *arg) +@@ -630,18 +630,18 @@ uint64_t offset = blk_rq_pos(req) << 9; uint64_t size = blk_rq_bytes(req); int error; @@ -534,15 +577,15 @@ index 125d58d..5cae597 100644 /* convert checksum errors into IO errors */ if (error == ECKSUM) -@@ -744,6 +744,7 @@ zvol_get_done(zgd_t *zgd, int error) +@@ -744,6 +744,7 @@ if (error == 0 && zgd->zgd_bp) zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); -+ kmem_free(zgd->zgd_rl, sizeof(rl_t)); ++ kmem_free(zgd->zgd_rl, sizeof (rl_t)); kmem_free(zgd, sizeof (zgd_t)); } -@@ -766,7 +767,8 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) +@@ -766,7 +767,8 @@ zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); zgd->zgd_zilog = zv->zv_zilog; @@ -552,3 +595,6 @@ index 125d58d..5cae597 100644 /* * Write records come in two flavors: immediate and indirect. +-- +1.7.10 + diff --git a/sys-fs/zfs/zfs-0.6.0_rc9-r1.ebuild b/sys-fs/zfs/zfs-0.6.0_rc9-r2.ebuild index dab1249affb8..93f7e7633fee 100644 --- a/sys-fs/zfs/zfs-0.6.0_rc9-r1.ebuild +++ b/sys-fs/zfs/zfs-0.6.0_rc9-r2.ebuild @@ -1,6 +1,6 @@ # Copyright 1999-2012 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 -# $Header: /var/cvsroot/gentoo-x86/sys-fs/zfs/zfs-0.6.0_rc9-r1.ebuild,v 1.1 2012/06/25 00:50:51 ryao Exp $ +# $Header: /var/cvsroot/gentoo-x86/sys-fs/zfs/zfs-0.6.0_rc9-r2.ebuild,v 1.1 2012/06/25 20:21:55 ryao Exp $ EAPI="4" |