diff options
author | Joe Peterson <lavajoe@gentoo.org> | 2008-08-20 23:59:43 +0000 |
---|---|---|
committer | Joe Peterson <lavajoe@gentoo.org> | 2008-08-20 23:59:43 +0000 |
commit | 05e41b7b4f8b48bf4676c718264823d6318f26d5 (patch) | |
tree | 6810381e43e1aed88becc82a67554125dba639b7 /sys-fs/btrfs | |
parent | Fix ChangeLog. (diff) | |
download | gentoo-2-05e41b7b4f8b48bf4676c718264823d6318f26d5.tar.gz gentoo-2-05e41b7b4f8b48bf4676c718264823d6318f26d5.tar.bz2 gentoo-2-05e41b7b4f8b48bf4676c718264823d6318f26d5.zip |
Apply 0.16 hotfixes (first set)
(Portage version: 2.2_rc8/cvs/Linux 2.6.26-gentoo i686)
Diffstat (limited to 'sys-fs/btrfs')
-rw-r--r-- | sys-fs/btrfs/ChangeLog | 8 | ||||
-rw-r--r-- | sys-fs/btrfs/btrfs-0.16-r2.ebuild | 66 | ||||
-rw-r--r-- | sys-fs/btrfs/files/btrfs-0.16-hotfix-1.patch | 992 |
3 files changed, 1065 insertions, 1 deletions
diff --git a/sys-fs/btrfs/ChangeLog b/sys-fs/btrfs/ChangeLog index b972bbcbcf47..3b52470c450b 100644 --- a/sys-fs/btrfs/ChangeLog +++ b/sys-fs/btrfs/ChangeLog @@ -1,6 +1,12 @@ # ChangeLog for sys-fs/btrfs # Copyright 1999-2008 Gentoo Foundation; Distributed under the GPL v2 -# $Header: /var/cvsroot/gentoo-x86/sys-fs/btrfs/ChangeLog,v 1.12 2008/08/18 17:10:32 lavajoe Exp $ +# $Header: /var/cvsroot/gentoo-x86/sys-fs/btrfs/ChangeLog,v 1.13 2008/08/20 23:59:42 lavajoe Exp $ + +*btrfs-0.16-r2 (20 Aug 2008) + + 20 Aug 2008; Joe Peterson <lavajoe@gentoo.org> + +files/btrfs-0.16-hotfix-1.patch, +btrfs-0.16-r2.ebuild: + Apply 0.16 hotfixes (first set) *btrfs-0.16-r1 (18 Aug 2008) diff --git a/sys-fs/btrfs/btrfs-0.16-r2.ebuild b/sys-fs/btrfs/btrfs-0.16-r2.ebuild new file mode 100644 index 000000000000..dbae32b51317 --- /dev/null +++ b/sys-fs/btrfs/btrfs-0.16-r2.ebuild @@ -0,0 +1,66 @@ +# Copyright 1999-2008 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 +# $Header: /var/cvsroot/gentoo-x86/sys-fs/btrfs/btrfs-0.16-r2.ebuild,v 1.1 2008/08/20 23:59:42 lavajoe Exp $ + +inherit eutils linux-mod + +DESCRIPTION="A checksumming copy-on-write filesystem" +HOMEPAGE="http://btrfs.wiki.kernel.org/" +SRC_URI="http://www.kernel.org/pub/linux/kernel/people/mason/btrfs/${P}.tar.bz2" + +LICENSE="GPL-2" +SLOT="0" +KEYWORDS="~amd64 ~x86" +IUSE="" + +DEPEND="" +RDEPEND="${DEPEND}" + +pkg_setup() +{ + linux-mod_pkg_setup + + BUILD_TARGETS="all" + BUILD_PARAMS="KERNELDIR=${KV_OUT_DIR}" + MODULE_NAMES="btrfs(fs:${S}/" + + if ! kernel_is 2 6; then + eerror "Need a 2.6 kernel to compile against!" + die "Need a 2.6 kernel to compile against!" + fi + + if ! linux_chkconfig_present LIBCRC32C; then + eerror "You need to enable LIBCRC32C in your kernel!" + die "You need to enable LIBCRC32C in your kernel!" + fi +} + +src_unpack() { + unpack ${A} + cd "${S}" + + # Apply hot fixes + epatch "${FILESDIR}/${P}-hotfix-1.patch" +} + +src_install() +{ + linux-mod_src_install + + dodoc INSTALL TODO +} + +pkg_postinst() { + linux-mod_pkg_postinst + + ewarn "WARNING: Btrfs is under heavy development and is not suitable for" + ewarn " any uses other than benchmarking and review." + ewarn " The Btrfs disk format is not yet finalized." + ewarn + ewarn " Also, it is highly recommended that the versions of" + ewarn " btrfs and btrfs-progs match." + ewarn + ewarn "Note: THE DISK FORMAT HAS CHANGED!" + ewarn " You must backup your data and re-create your btrfs" + ewarn " filesystem(s) for use with this version." +} diff --git a/sys-fs/btrfs/files/btrfs-0.16-hotfix-1.patch b/sys-fs/btrfs/files/btrfs-0.16-hotfix-1.patch new file mode 100644 index 000000000000..dcd6c397cb9b --- /dev/null +++ b/sys-fs/btrfs/files/btrfs-0.16-hotfix-1.patch @@ -0,0 +1,992 @@ +diff -Nurp btrfs-0.16/async-thread.c btrfs-0.16.new/async-thread.c +--- btrfs-0.16/async-thread.c 2008-08-05 12:13:37.000000000 -0600 ++++ btrfs-0.16.new/async-thread.c 2008-08-20 17:43:19.211404694 -0600 +@@ -49,6 +49,8 @@ struct btrfs_worker_thread { + /* number of things on the pending list */ + atomic_t num_pending; + ++ unsigned long sequence; ++ + /* protects the pending list. */ + spinlock_t lock; + +@@ -153,7 +155,7 @@ int btrfs_stop_workers(struct btrfs_work + /* + * simple init on struct btrfs_workers + */ +-void btrfs_init_workers(struct btrfs_workers *workers, int max) ++void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) + { + workers->num_workers = 0; + INIT_LIST_HEAD(&workers->worker_list); +@@ -161,6 +163,7 @@ void btrfs_init_workers(struct btrfs_wor + spin_lock_init(&workers->lock); + workers->max_workers = max; + workers->idle_thresh = 32; ++ workers->name = name; + } + + /* +@@ -184,7 +187,9 @@ int btrfs_start_workers(struct btrfs_wor + INIT_LIST_HEAD(&worker->worker_list); + spin_lock_init(&worker->lock); + atomic_set(&worker->num_pending, 0); +- worker->task = kthread_run(worker_loop, worker, "btrfs"); ++ worker->task = kthread_run(worker_loop, worker, ++ "btrfs-%s-%d", workers->name, ++ workers->num_workers + i); + worker->workers = workers; + if (IS_ERR(worker->task)) { + kfree(worker); +@@ -194,6 +199,7 @@ int btrfs_start_workers(struct btrfs_wor + + spin_lock_irq(&workers->lock); + list_add_tail(&worker->worker_list, &workers->idle_list); ++ worker->idle = 1; + workers->num_workers++; + spin_unlock_irq(&workers->lock); + } +@@ -235,7 +241,10 @@ static struct btrfs_worker_thread *next_ + */ + next = workers->worker_list.next; + worker = list_entry(next, struct btrfs_worker_thread, worker_list); +- list_move_tail(next, &workers->worker_list); ++ atomic_inc(&worker->num_pending); ++ worker->sequence++; ++ if (worker->sequence % workers->idle_thresh == 0) ++ list_move_tail(next, &workers->worker_list); + return worker; + } + +diff -Nurp btrfs-0.16/async-thread.h btrfs-0.16.new/async-thread.h +--- btrfs-0.16/async-thread.h 2008-08-05 12:13:37.000000000 -0600 ++++ btrfs-0.16.new/async-thread.h 2008-08-20 17:43:19.211404694 -0600 +@@ -69,11 +69,14 @@ struct btrfs_workers { + + /* lock for finding the next worker thread to queue on */ + spinlock_t lock; ++ ++ /* extra name for this worker */ ++ char *name; + }; + + int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); + int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); + int btrfs_stop_workers(struct btrfs_workers *workers); +-void btrfs_init_workers(struct btrfs_workers *workers, int max); ++void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); + int btrfs_requeue_work(struct btrfs_work *work); + #endif +diff -Nurp btrfs-0.16/compat.h btrfs-0.16.new/compat.h +--- btrfs-0.16/compat.h 2008-08-05 12:13:37.000000000 -0600 ++++ btrfs-0.16.new/compat.h 2008-08-20 17:43:19.501402495 -0600 +@@ -1,6 +1,9 @@ + #ifndef _COMPAT_H_ + #define _COMPAT_H_ + ++#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,26) ++#define trylock_page(page) (!TestSetPageLocked(page)) ++#endif + + /* + * Even if AppArmor isn't enabled, it still has different prototypes. +diff -Nurp btrfs-0.16/ctree.h btrfs-0.16.new/ctree.h +--- btrfs-0.16/ctree.h 2008-08-05 12:13:37.000000000 -0600 ++++ btrfs-0.16.new/ctree.h 2008-08-20 17:43:19.521406669 -0600 +@@ -526,6 +526,7 @@ struct btrfs_fs_info { + struct btrfs_transaction *running_transaction; + wait_queue_head_t transaction_throttle; + wait_queue_head_t transaction_wait; ++ wait_queue_head_t async_submit_wait; + struct btrfs_super_block super_copy; + struct btrfs_super_block super_for_commit; + struct block_device *__bdev; +@@ -544,6 +545,7 @@ struct btrfs_fs_info { + struct list_head hashers; + struct list_head dead_roots; + atomic_t nr_async_submits; ++ atomic_t nr_async_bios; + + /* + * this is used by the balancing code to wait for all the pending +@@ -1648,7 +1650,7 @@ int btrfs_csum_truncate(struct btrfs_tra + /* inode.c */ + + /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ +-#ifdef ClearPageFsMisc ++#if defined(ClearPageFsMisc) && !defined(ClearPageChecked) + #define ClearPageChecked ClearPageFsMisc + #define SetPageChecked SetPageFsMisc + #define PageChecked PageFsMisc +diff -Nurp btrfs-0.16/disk-io.c btrfs-0.16.new/disk-io.c +--- btrfs-0.16/disk-io.c 2008-08-05 12:13:37.000000000 -0600 ++++ btrfs-0.16.new/disk-io.c 2008-08-20 17:43:19.541408335 -0600 +@@ -429,14 +429,38 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_ + return 0; + } + ++unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) ++{ ++ unsigned long limit = min_t(unsigned long, ++ info->workers.max_workers, ++ info->fs_devices->open_devices); ++ return 256 * limit; ++} ++ ++int btrfs_congested_async(struct btrfs_fs_info *info, int iodone) ++{ ++ return atomic_read(&info->nr_async_bios) > ++ btrfs_async_submit_limit(info); ++} ++ + static void run_one_async_submit(struct btrfs_work *work) + { + struct btrfs_fs_info *fs_info; + struct async_submit_bio *async; ++ int limit; + + async = container_of(work, struct async_submit_bio, work); + fs_info = BTRFS_I(async->inode)->root->fs_info; ++ ++ limit = btrfs_async_submit_limit(fs_info); ++ limit = limit * 2 / 3; ++ + atomic_dec(&fs_info->nr_async_submits); ++ ++ if (atomic_read(&fs_info->nr_async_submits) < limit && ++ waitqueue_active(&fs_info->async_submit_wait)) ++ wake_up(&fs_info->async_submit_wait); ++ + async->submit_bio_hook(async->inode, async->rw, async->bio, + async->mirror_num); + kfree(async); +@@ -447,6 +471,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_ + extent_submit_bio_hook_t *submit_bio_hook) + { + struct async_submit_bio *async; ++ int limit = btrfs_async_submit_limit(fs_info); + + async = kmalloc(sizeof(*async), GFP_NOFS); + if (!async) +@@ -461,6 +486,10 @@ int btrfs_wq_submit_bio(struct btrfs_fs_ + async->work.flags = 0; + atomic_inc(&fs_info->nr_async_submits); + btrfs_queue_worker(&fs_info->workers, &async->work); ++ ++ wait_event_timeout(fs_info->async_submit_wait, ++ (atomic_read(&fs_info->nr_async_submits) < limit), ++ HZ/10); + return 0; + } + +@@ -475,11 +504,11 @@ static int __btree_submit_bio_hook(struc + + /* + * when we're called for a write, we're already in the async +- * submission context. Just jump ingo btrfs_map_bio ++ * submission context. Just jump into btrfs_map_bio + */ + if (rw & (1 << BIO_RW)) { + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, +- mirror_num, 0); ++ mirror_num, 1); + } + + /* +@@ -511,6 +540,12 @@ static int btree_writepage(struct page * + { + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; ++ ++ if (current->flags & PF_MEMALLOC) { ++ redirty_page_for_writepage(wbc, page); ++ unlock_page(page); ++ return 0; ++ } + return extent_write_full_page(tree, page, btree_get_extent, wbc); + } + +@@ -522,16 +557,11 @@ static int btree_writepages(struct addre + if (wbc->sync_mode == WB_SYNC_NONE) { + u64 num_dirty; + u64 start = 0; +- unsigned long thresh = 96 * 1024 * 1024; ++ unsigned long thresh = 8 * 1024 * 1024; + + if (wbc->for_kupdate) + return 0; + +- if (current_is_pdflush()) { +- thresh = 96 * 1024 * 1024; +- } else { +- thresh = 8 * 1024 * 1024; +- } + num_dirty = count_range_bits(tree, &start, (u64)-1, + thresh, EXTENT_DIRTY); + if (num_dirty < thresh) { +@@ -938,15 +968,13 @@ static int btrfs_congested_fn(void *cong + { + struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; + int ret = 0; +- int limit = 256 * info->fs_devices->open_devices; + struct list_head *cur; + struct btrfs_device *device; + struct backing_dev_info *bdi; + + if ((bdi_bits & (1 << BDI_write_congested)) && +- atomic_read(&info->nr_async_submits) > limit) { ++ btrfs_congested_async(info, 0)) + return 1; +- } + + list_for_each(cur, &info->fs_devices->devices) { + device = list_entry(cur, struct btrfs_device, dev_list); +@@ -1250,6 +1278,7 @@ struct btrfs_root *open_ctree(struct sup + INIT_LIST_HEAD(&fs_info->space_info); + btrfs_mapping_init(&fs_info->mapping_tree); + atomic_set(&fs_info->nr_async_submits, 0); ++ atomic_set(&fs_info->nr_async_bios, 0); + atomic_set(&fs_info->throttles, 0); + atomic_set(&fs_info->throttle_gen, 0); + fs_info->sb = sb; +@@ -1311,6 +1340,7 @@ struct btrfs_root *open_ctree(struct sup + mutex_init(&fs_info->volume_mutex); + init_waitqueue_head(&fs_info->transaction_throttle); + init_waitqueue_head(&fs_info->transaction_wait); ++ init_waitqueue_head(&fs_info->async_submit_wait); + + #if 0 + ret = add_hasher(fs_info, "crc32c"); +@@ -1347,8 +1377,11 @@ struct btrfs_root *open_ctree(struct sup + * queue work function gets called at interrupt time, and so it + * cannot dynamically grow. + */ +- btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); +- btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); ++ btrfs_init_workers(&fs_info->workers, "worker", ++ fs_info->thread_pool_size); ++ btrfs_init_workers(&fs_info->submit_workers, "submit", ++ min_t(u64, fs_devices->num_devices, ++ fs_info->thread_pool_size)); + + /* a higher idle thresh on the submit workers makes it much more + * likely that bios will be send down in a sane order to the +@@ -1356,9 +1389,18 @@ struct btrfs_root *open_ctree(struct sup + */ + fs_info->submit_workers.idle_thresh = 64; + +- btrfs_init_workers(&fs_info->fixup_workers, 1); +- btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); +- btrfs_init_workers(&fs_info->endio_write_workers, ++ /* fs_info->workers is responsible for checksumming file data ++ * blocks and metadata. Using a larger idle thresh allows each ++ * worker thread to operate on things in roughly the order they ++ * were sent by the writeback daemons, improving overall locality ++ * of the IO going down the pipe. ++ */ ++ fs_info->workers.idle_thresh = 128; ++ ++ btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); ++ btrfs_init_workers(&fs_info->endio_workers, "endio", ++ fs_info->thread_pool_size); ++ btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", + fs_info->thread_pool_size); + + /* +@@ -1823,10 +1865,10 @@ void btrfs_btree_balance_dirty(struct bt + struct extent_io_tree *tree; + u64 num_dirty; + u64 start = 0; +- unsigned long thresh = 16 * 1024 * 1024; ++ unsigned long thresh = 96 * 1024 * 1024; + tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; + +- if (current_is_pdflush()) ++ if (current_is_pdflush() || current->flags & PF_MEMALLOC) + return; + + num_dirty = count_range_bits(tree, &start, (u64)-1, +diff -Nurp btrfs-0.16/disk-io.h btrfs-0.16.new/disk-io.h +--- btrfs-0.16/disk-io.h 2008-08-05 12:13:37.000000000 -0600 ++++ btrfs-0.16.new/disk-io.h 2008-08-20 17:43:19.541408335 -0600 +@@ -72,4 +72,6 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_ + int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, + int rw, struct bio *bio, int mirror_num, + extent_submit_bio_hook_t *submit_bio_hook); ++int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); ++unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); + #endif +diff -Nurp btrfs-0.16/extent_io.c btrfs-0.16.new/extent_io.c +--- btrfs-0.16/extent_io.c 2008-08-05 12:13:37.000000000 -0600 ++++ btrfs-0.16.new/extent_io.c 2008-08-20 17:43:19.561407722 -0600 +@@ -14,6 +14,9 @@ + #include <linux/pagevec.h> + #include "extent_io.h" + #include "extent_map.h" ++#include "compat.h" ++#include "ctree.h" ++#include "btrfs_inode.h" + + /* temporary define until extent_map moves out of btrfs */ + struct kmem_cache *btrfs_cache_create(const char *name, size_t size, +@@ -1393,15 +1396,11 @@ static int end_bio_extent_writepage(stru + { + int uptodate = err == 0; + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; +- struct extent_state *state = bio->bi_private; +- struct extent_io_tree *tree = state->tree; +- struct rb_node *node; ++ struct extent_io_tree *tree; + u64 start; + u64 end; +- u64 cur; + int whole_page; + int ret; +- unsigned long flags; + + #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) +@@ -1409,6 +1408,8 @@ static int end_bio_extent_writepage(stru + #endif + do { + struct page *page = bvec->bv_page; ++ tree = &BTRFS_I(page->mapping->host)->io_tree; ++ + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; +@@ -1422,7 +1423,7 @@ static int end_bio_extent_writepage(stru + prefetchw(&bvec->bv_page->flags); + if (tree->ops && tree->ops->writepage_end_io_hook) { + ret = tree->ops->writepage_end_io_hook(page, start, +- end, state, uptodate); ++ end, NULL, uptodate); + if (ret) + uptodate = 0; + } +@@ -1430,9 +1431,8 @@ static int end_bio_extent_writepage(stru + if (!uptodate && tree->ops && + tree->ops->writepage_io_failed_hook) { + ret = tree->ops->writepage_io_failed_hook(bio, page, +- start, end, state); ++ start, end, NULL); + if (ret == 0) { +- state = NULL; + uptodate = (err == 0); + continue; + } +@@ -1444,68 +1444,7 @@ static int end_bio_extent_writepage(stru + SetPageError(page); + } + +- /* +- * bios can get merged in funny ways, and so we need to +- * be careful with the state variable. We know the +- * state won't be merged with others because it has +- * WRITEBACK set, but we can't be sure each biovec is +- * sequential in the file. So, if our cached state +- * doesn't match the expected end, search the tree +- * for the correct one. +- */ +- +- spin_lock_irqsave(&tree->lock, flags); +- if (!state || state->end != end) { +- state = NULL; +- node = __etree_search(tree, start, NULL, NULL); +- if (node) { +- state = rb_entry(node, struct extent_state, +- rb_node); +- if (state->end != end || +- !(state->state & EXTENT_WRITEBACK)) +- state = NULL; +- } +- if (!state) { +- spin_unlock_irqrestore(&tree->lock, flags); +- clear_extent_writeback(tree, start, +- end, GFP_ATOMIC); +- goto next_io; +- } +- } +- cur = end; +- while(1) { +- struct extent_state *clear = state; +- cur = state->start; +- node = rb_prev(&state->rb_node); +- if (node) { +- state = rb_entry(node, +- struct extent_state, +- rb_node); +- } else { +- state = NULL; +- } +- +- clear_state_bit(tree, clear, EXTENT_WRITEBACK, +- 1, 0); +- if (cur == start) +- break; +- if (cur < start) { +- WARN_ON(1); +- break; +- } +- if (!node) +- break; +- } +- /* before releasing the lock, make sure the next state +- * variable has the expected bits set and corresponds +- * to the correct offsets in the file +- */ +- if (state && (state->end + 1 != start || +- !(state->state & EXTENT_WRITEBACK))) { +- state = NULL; +- } +- spin_unlock_irqrestore(&tree->lock, flags); +-next_io: ++ clear_extent_writeback(tree, start, end, GFP_ATOMIC); + + if (whole_page) + end_page_writeback(page); +@@ -1538,13 +1477,9 @@ static int end_bio_extent_readpage(struc + { + int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; +- struct extent_state *state = bio->bi_private; +- struct extent_io_tree *tree = state->tree; +- struct rb_node *node; ++ struct extent_io_tree *tree; + u64 start; + u64 end; +- u64 cur; +- unsigned long flags; + int whole_page; + int ret; + +@@ -1555,6 +1490,8 @@ static int end_bio_extent_readpage(struc + + do { + struct page *page = bvec->bv_page; ++ tree = &BTRFS_I(page->mapping->host)->io_tree; ++ + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; +@@ -1569,80 +1506,26 @@ static int end_bio_extent_readpage(struc + + if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { + ret = tree->ops->readpage_end_io_hook(page, start, end, +- state); ++ NULL); + if (ret) + uptodate = 0; + } + if (!uptodate && tree->ops && + tree->ops->readpage_io_failed_hook) { + ret = tree->ops->readpage_io_failed_hook(bio, page, +- start, end, state); ++ start, end, NULL); + if (ret == 0) { +- state = NULL; + uptodate = + test_bit(BIO_UPTODATE, &bio->bi_flags); + continue; + } + } + +- spin_lock_irqsave(&tree->lock, flags); +- if (!state || state->end != end) { +- state = NULL; +- node = __etree_search(tree, start, NULL, NULL); +- if (node) { +- state = rb_entry(node, struct extent_state, +- rb_node); +- if (state->end != end || +- !(state->state & EXTENT_LOCKED)) +- state = NULL; +- } +- if (!state) { +- spin_unlock_irqrestore(&tree->lock, flags); +- if (uptodate) +- set_extent_uptodate(tree, start, end, +- GFP_ATOMIC); +- unlock_extent(tree, start, end, GFP_ATOMIC); +- goto next_io; +- } +- } ++ if (uptodate) ++ set_extent_uptodate(tree, start, end, ++ GFP_ATOMIC); ++ unlock_extent(tree, start, end, GFP_ATOMIC); + +- cur = end; +- while(1) { +- struct extent_state *clear = state; +- cur = state->start; +- node = rb_prev(&state->rb_node); +- if (node) { +- state = rb_entry(node, +- struct extent_state, +- rb_node); +- } else { +- state = NULL; +- } +- if (uptodate) { +- set_state_cb(tree, clear, EXTENT_UPTODATE); +- clear->state |= EXTENT_UPTODATE; +- } +- clear_state_bit(tree, clear, EXTENT_LOCKED, +- 1, 0); +- if (cur == start) +- break; +- if (cur < start) { +- WARN_ON(1); +- break; +- } +- if (!node) +- break; +- } +- /* before releasing the lock, make sure the next state +- * variable has the expected bits set and corresponds +- * to the correct offsets in the file +- */ +- if (state && (state->end + 1 != start || +- !(state->state & EXTENT_LOCKED))) { +- state = NULL; +- } +- spin_unlock_irqrestore(&tree->lock, flags); +-next_io: + if (whole_page) { + if (uptodate) { + SetPageUptodate(page); +@@ -1682,8 +1565,7 @@ static int end_bio_extent_preparewrite(s + { + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; +- struct extent_state *state = bio->bi_private; +- struct extent_io_tree *tree = state->tree; ++ struct extent_io_tree *tree; + u64 start; + u64 end; + +@@ -1694,6 +1576,8 @@ static int end_bio_extent_preparewrite(s + + do { + struct page *page = bvec->bv_page; ++ tree = &BTRFS_I(page->mapping->host)->io_tree; ++ + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; +@@ -1764,7 +1648,7 @@ static int submit_one_bio(int rw, struct + BUG_ON(state->end != end); + spin_unlock_irq(&tree->lock); + +- bio->bi_private = state; ++ bio->bi_private = NULL; + + bio_get(bio); + +@@ -3055,7 +2939,7 @@ int read_extent_buffer_pages(struct exte + for (i = start_i; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + if (!wait) { +- if (TestSetPageLocked(page)) ++ if (!trylock_page(page)) + goto unlock_exit; + } else { + lock_page(page); +diff -Nurp btrfs-0.16/extent_map.c btrfs-0.16.new/extent_map.c +--- btrfs-0.16/extent_map.c 2008-08-05 12:13:37.000000000 -0600 ++++ btrfs-0.16.new/extent_map.c 2008-08-20 17:43:19.571405082 -0600 +@@ -207,7 +207,14 @@ int add_extent_mapping(struct extent_map + int ret = 0; + struct extent_map *merge = NULL; + struct rb_node *rb; ++ struct extent_map *exist; + ++ exist = lookup_extent_mapping(tree, em->start, em->len); ++ if (exist) { ++ free_extent_map(exist); ++ ret = -EEXIST; ++ goto out; ++ } + assert_spin_locked(&tree->lock); + rb = tree_insert(&tree->map, em->start, &em->rb_node); + if (rb) { +diff -Nurp btrfs-0.16/file-item.c btrfs-0.16.new/file-item.c +--- btrfs-0.16/file-item.c 2008-08-05 12:13:37.000000000 -0600 ++++ btrfs-0.16.new/file-item.c 2008-08-20 17:43:19.571405082 -0600 +@@ -134,7 +134,6 @@ int btrfs_lookup_file_extent(struct btrf + return ret; + } + +-#if 0 /* broken */ + int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, + struct bio *bio) + { +@@ -151,6 +150,8 @@ int btrfs_lookup_bio_sums(struct btrfs_r + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + + path = btrfs_alloc_path(); ++ if (bio->bi_size > PAGE_CACHE_SIZE * 8) ++ path->reada = 2; + + WARN_ON(bio->bi_vcnt <= 0); + +@@ -211,7 +212,6 @@ found: + btrfs_free_path(path); + return 0; + } +-#endif + + int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, + struct bio *bio) +@@ -321,6 +321,7 @@ again: + file_key.offset = offset; + btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); + ++ mutex_lock(&BTRFS_I(inode)->csum_mutex); + item = btrfs_lookup_csum(trans, root, path, objectid, offset, 1); + if (!IS_ERR(item)) { + leaf = path->nodes[0]; +@@ -367,7 +368,7 @@ again: + ret = btrfs_search_slot(trans, root, &file_key, path, + BTRFS_CRC32_SIZE, 1); + if (ret < 0) +- goto fail; ++ goto fail_unlock; + if (ret == 0) { + BUG(); + } +@@ -411,10 +412,10 @@ insert: + ret = btrfs_insert_empty_item(trans, root, path, &file_key, + ins_size); + if (ret < 0) +- goto fail; ++ goto fail_unlock; + if (ret != 0) { + WARN_ON(1); +- goto fail; ++ goto fail_unlock; + } + csum: + leaf = path->nodes[0]; +@@ -427,6 +428,8 @@ found: + item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + + btrfs_item_size_nr(leaf, path->slots[0])); + eb_token = NULL; ++ mutex_unlock(&BTRFS_I(inode)->csum_mutex); ++ cond_resched(); + next_sector: + + if (!eb_token || +@@ -467,13 +470,18 @@ next_sector: + eb_token = NULL; + } + btrfs_mark_buffer_dirty(path->nodes[0]); ++ cond_resched(); + if (total_bytes < sums->len) { + btrfs_release_path(root, path); + goto again; + } +-fail: ++out: + btrfs_free_path(path); + return ret; ++ ++fail_unlock: ++ mutex_unlock(&BTRFS_I(inode)->csum_mutex); ++ goto out; + } + + int btrfs_csum_truncate(struct btrfs_trans_handle *trans, +diff -Nurp btrfs-0.16/inode.c btrfs-0.16.new/inode.c +--- btrfs-0.16/inode.c 2008-08-05 12:13:37.000000000 -0600 ++++ btrfs-0.16.new/inode.c 2008-08-20 17:43:19.601409609 -0600 +@@ -389,15 +389,15 @@ int btrfs_submit_bio_hook(struct inode * + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + BUG_ON(ret); + +- if (!(rw & (1 << BIO_RW))) { +- goto mapit; +- } +- + if (btrfs_test_opt(root, NODATASUM) || + btrfs_test_flag(inode, NODATASUM)) { + goto mapit; + } + ++ if (!(rw & (1 << BIO_RW))) { ++ btrfs_lookup_bio_sums(root, inode, bio); ++ goto mapit; ++ } + return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, + inode, rw, bio, mirror_num, + __btrfs_submit_bio_hook); +@@ -415,10 +415,8 @@ static noinline int add_pending_csums(st + btrfs_set_trans_block_group(trans, inode); + list_for_each(cur, list) { + sum = list_entry(cur, struct btrfs_ordered_sum, list); +- mutex_lock(&BTRFS_I(inode)->csum_mutex); + btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root, + inode, sum); +- mutex_unlock(&BTRFS_I(inode)->csum_mutex); + } + return 0; + } +@@ -605,58 +603,6 @@ int btrfs_writepage_end_io_hook(struct p + return btrfs_finish_ordered_io(page->mapping->host, start, end); + } + +-int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) +-{ +- int ret = 0; +- struct inode *inode = page->mapping->host; +- struct btrfs_root *root = BTRFS_I(inode)->root; +- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; +- struct btrfs_csum_item *item; +- struct btrfs_path *path = NULL; +- u32 csum; +- +- if (btrfs_test_opt(root, NODATASUM) || +- btrfs_test_flag(inode, NODATASUM)) +- return 0; +- +- /* +- * It is possible there is an ordered extent that has +- * not yet finished for this range in the file. If so, +- * that extent will have a csum cached, and it will insert +- * the sum after all the blocks in the extent are fully +- * on disk. So, look for an ordered extent and use the +- * sum if found. We have to do this before looking in the +- * btree because csum items are pre-inserted based on +- * the file size. btrfs_lookup_csum might find an item +- * that still hasn't been fully filled. +- */ +- ret = btrfs_find_ordered_sum(inode, start, &csum); +- if (ret == 0) +- goto found; +- +- ret = 0; +- path = btrfs_alloc_path(); +- item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); +- if (IS_ERR(item)) { +- ret = PTR_ERR(item); +- /* a csum that isn't present is a preallocated region. */ +- if (ret == -ENOENT || ret == -EFBIG) +- ret = 0; +- csum = 0; +- printk("no csum found for inode %lu start %Lu\n", inode->i_ino, +- start); +- goto out; +- } +- read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, +- BTRFS_CRC32_SIZE); +-found: +- set_state_private(io_tree, start, csum); +-out: +- if (path) +- btrfs_free_path(path); +- return ret; +-} +- + struct io_failure_record { + struct page *page; + u64 start; +@@ -1655,8 +1601,20 @@ static int btrfs_setattr(struct dentry * + btrfs_truncate_page(inode->i_mapping, inode->i_size); + + hole_size = block_end - hole_start; +- btrfs_wait_ordered_range(inode, hole_start, hole_size); +- lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); ++ while(1) { ++ struct btrfs_ordered_extent *ordered; ++ btrfs_wait_ordered_range(inode, hole_start, hole_size); ++ ++ lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); ++ ordered = btrfs_lookup_ordered_extent(inode, hole_start); ++ if (ordered) { ++ unlock_extent(io_tree, hole_start, ++ block_end - 1, GFP_NOFS); ++ btrfs_put_ordered_extent(ordered); ++ } else { ++ break; ++ } ++ } + + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); +@@ -1833,6 +1791,7 @@ static int btrfs_init_locked_inode(struc + inode->i_ino = args->ino; + BTRFS_I(inode)->root = args->root; + BTRFS_I(inode)->delalloc_bytes = 0; ++ inode->i_mapping->writeback_index = 0; + BTRFS_I(inode)->disk_i_size = 0; + BTRFS_I(inode)->index_cnt = (u64)-1; + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); +@@ -2239,6 +2198,7 @@ static struct inode *btrfs_new_inode(str + mutex_init(&BTRFS_I(inode)->csum_mutex); + mutex_init(&BTRFS_I(inode)->extent_mutex); + BTRFS_I(inode)->delalloc_bytes = 0; ++ inode->i_mapping->writeback_index = 0; + BTRFS_I(inode)->disk_i_size = 0; + BTRFS_I(inode)->root = root; + +@@ -2486,6 +2446,7 @@ static int btrfs_create(struct inode *di + mutex_init(&BTRFS_I(inode)->extent_mutex); + BTRFS_I(inode)->delalloc_bytes = 0; + BTRFS_I(inode)->disk_i_size = 0; ++ inode->i_mapping->writeback_index = 0; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; + btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); + } +@@ -3549,6 +3510,7 @@ static int btrfs_symlink(struct inode *d + mutex_init(&BTRFS_I(inode)->extent_mutex); + BTRFS_I(inode)->delalloc_bytes = 0; + BTRFS_I(inode)->disk_i_size = 0; ++ inode->i_mapping->writeback_index = 0; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; + btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); + } +@@ -3654,7 +3616,6 @@ static struct extent_io_ops btrfs_extent + .fill_delalloc = run_delalloc_range, + .submit_bio_hook = btrfs_submit_bio_hook, + .merge_bio_hook = btrfs_merge_bio_hook, +- .readpage_io_hook = btrfs_readpage_io_hook, + .readpage_end_io_hook = btrfs_readpage_end_io_hook, + .writepage_end_io_hook = btrfs_writepage_end_io_hook, + .writepage_start_hook = btrfs_writepage_start_hook, +diff -Nurp btrfs-0.16/transaction.c btrfs-0.16.new/transaction.c +--- btrfs-0.16/transaction.c 2008-08-05 12:13:37.000000000 -0600 ++++ btrfs-0.16.new/transaction.c 2008-08-20 17:43:19.641404933 -0600 +@@ -303,12 +303,12 @@ int btrfs_write_and_wait_transaction(str + struct btrfs_root *root) + { + int ret; +- int err; ++ int err = 0; + int werr = 0; + struct extent_io_tree *dirty_pages; + struct page *page; + struct inode *btree_inode = root->fs_info->btree_inode; +- u64 start; ++ u64 start = 0; + u64 end; + unsigned long index; + +@@ -317,12 +317,13 @@ int btrfs_write_and_wait_transaction(str + } + dirty_pages = &trans->transaction->dirty_pages; + while(1) { +- ret = find_first_extent_bit(dirty_pages, 0, &start, &end, ++ ret = find_first_extent_bit(dirty_pages, start, &start, &end, + EXTENT_DIRTY); + if (ret) + break; +- clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); + while(start <= end) { ++ cond_resched(); ++ + index = start >> PAGE_CACHE_SHIFT; + start = (u64)(index + 1) << PAGE_CACHE_SHIFT; + page = find_lock_page(btree_inode->i_mapping, index); +@@ -343,7 +344,30 @@ int btrfs_write_and_wait_transaction(str + page_cache_release(page); + } + } +- err = filemap_fdatawait(btree_inode->i_mapping); ++ while(1) { ++ ret = find_first_extent_bit(dirty_pages, 0, &start, &end, ++ EXTENT_DIRTY); ++ if (ret) ++ break; ++ ++ clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); ++ while(start <= end) { ++ index = start >> PAGE_CACHE_SHIFT; ++ start = (u64)(index + 1) << PAGE_CACHE_SHIFT; ++ page = find_get_page(btree_inode->i_mapping, index); ++ if (!page) ++ continue; ++ if (PageDirty(page)) { ++ lock_page(page); ++ err = write_one_page(page, 0); ++ if (err) ++ werr = err; ++ } ++ wait_on_page_writeback(page); ++ page_cache_release(page); ++ cond_resched(); ++ } ++ } + if (err) + werr = err; + return werr; +diff -Nurp btrfs-0.16/volumes.c btrfs-0.16.new/volumes.c +--- btrfs-0.16/volumes.c 2008-08-05 12:13:37.000000000 -0600 ++++ btrfs-0.16.new/volumes.c 2008-08-20 17:43:19.651405254 -0600 +@@ -138,12 +138,18 @@ int run_scheduled_bios(struct btrfs_devi + { + struct bio *pending; + struct backing_dev_info *bdi; ++ struct btrfs_fs_info *fs_info; + struct bio *tail; + struct bio *cur; + int again = 0; + unsigned long num_run = 0; ++ unsigned long limit; + + bdi = device->bdev->bd_inode->i_mapping->backing_dev_info; ++ fs_info = device->dev_root->fs_info; ++ limit = btrfs_async_submit_limit(fs_info); ++ limit = limit * 2 / 3; ++ + loop: + spin_lock(&device->io_lock); + +@@ -179,7 +185,11 @@ loop: + cur = pending; + pending = pending->bi_next; + cur->bi_next = NULL; +- atomic_dec(&device->dev_root->fs_info->nr_async_submits); ++ atomic_dec(&fs_info->nr_async_bios); ++ ++ if (atomic_read(&fs_info->nr_async_bios) < limit && ++ waitqueue_active(&fs_info->async_submit_wait)) ++ wake_up(&fs_info->async_submit_wait); + + BUG_ON(atomic_read(&cur->bi_cnt) == 0); + bio_get(cur); +@@ -2135,6 +2145,7 @@ int schedule_bio(struct btrfs_root *root + int rw, struct bio *bio) + { + int should_queue = 1; ++ unsigned long limit; + + /* don't bother with additional async steps for reads, right now */ + if (!(rw & (1 << BIO_RW))) { +@@ -2145,12 +2156,12 @@ int schedule_bio(struct btrfs_root *root + } + + /* +- * nr_async_sumbits allows us to reliably return congestion to the ++ * nr_async_bios allows us to reliably return congestion to the + * higher layers. Otherwise, the async bio makes it appear we have + * made progress against dirty pages when we've really just put it + * on a queue for later + */ +- atomic_inc(&root->fs_info->nr_async_submits); ++ atomic_inc(&root->fs_info->nr_async_bios); + WARN_ON(bio->bi_next); + bio->bi_next = NULL; + bio->bi_rw |= rw; +@@ -2171,6 +2182,11 @@ int schedule_bio(struct btrfs_root *root + if (should_queue) + btrfs_queue_worker(&root->fs_info->submit_workers, + &device->work); ++ ++ limit = btrfs_async_submit_limit(root->fs_info); ++ wait_event_timeout(root->fs_info->async_submit_wait, ++ (atomic_read(&root->fs_info->nr_async_bios) < limit), ++ HZ/10); + return 0; + } + |