--------------------- PatchSet 892 Date: 2002/08/08 22:50:00 Author: sct Log: Make all ext3 inode-dirty notifications go through ext3_dirty_inode(). This cleans up a lot of code and means that we can use very simple inode-dirtying rules in the rest of ext3, but removes any chance of having non-journaled inode updates (eg. for atime updates). The new code now passes basic testing (it boots and lets you start a stress test run :) but delete isn't _quite_ right yet. Members: fs/ext3/inode.c:1.64.2.16.2.3->1.64.2.16.2.4 [quota-branch] fs/ext3/super.c:1.34.2.19.2.3->1.34.2.19.2.4 [quota-branch] fs/jbd/commit.c:1.46.2.3.2.2->1.46.2.3.2.3 [quota-branch] fs/jbd/transaction.c:1.64.2.7.2.2->1.64.2.7.2.3 [quota-branch] include/linux/ext3_fs.h:1.20.2.16.2.2->1.20.2.16.2.3 [quota-branch] include/linux/jbd.h:1.37.2.8.2.4->1.37.2.8.2.5 [quota-branch] --- linux-2.4.21-pre3-rmap-ext3merge/fs/ext3/inode.c.=K0023=.orig 2003-01-23 16:30:32.000000000 +0000 +++ linux-2.4.21-pre3-rmap-ext3merge/fs/ext3/inode.c 2003-01-23 16:30:32.000000000 +0000 @@ -39,6 +39,8 @@ */ #undef SEARCH_FROM_ZERO +static int add_inode_to_commit_queue(handle_t *handle, struct inode *inode); + /* The ext3 forget function must perform a revoke if we are freeing data * which has been journaled. Metadata (eg. indirect blocks) must be * revoked in all cases. @@ -202,17 +204,16 @@ * One subtle ordering requirement: if anything has gone wrong * (transaction abort, IO errors, whatever), then we can still * do these next steps (the fs will already have been marked as - * having errors), but we can't free the inode if the mark_dirty - * fails. + * having errors), but we can't free the inode if the + * flush-to-disk fails. */ - ext3_reserve_inode_write(handle, inode, NULL); - ext3_flush_inode_reservation(handle, inode); - - if (ext3_mark_inode_dirty(handle, inode)) + add_inode_to_commit_queue(handle, inode); + if (ext3_flush_inode_reservation(handle, inode)) /* If that failed, just do the required in-core inode clear. */ clear_inode(inode); else ext3_free_inode(handle, inode); + ext3_journal_stop(handle, inode); unlock_kernel(); return; @@ -2479,47 +2480,28 @@ return ret; } -int -ext3_mark_iloc_dirty(handle_t *handle, - struct inode *inode, - struct ext3_iloc *iloc) -{ - int err = 0; - - if (handle) { - /* the do_update_inode consumes one bh->b_count */ - atomic_inc(&iloc->bh->b_count); - err = ext3_do_update_inode(handle, inode, iloc); - /* ext3_do_update_inode() does journal_dirty_metadata */ - brelse(iloc->bh); - } else { - printk(KERN_EMERG "%s: called with no handle!\n", __FUNCTION__); - } - return err; -} - -/* - * On success, we end up with an outstanding reference count against - * iloc->bh. This _must_ be cleaned up later. +/* + * The core of the ext3 dirty inode handling. + * + * This queues the inode against the current running transaction's + * dirty-inode list. The inode will not actually be updated in the + * buffer cache until we commit (or delete the inode). + * + * This results in a bh reference held by the inode's iloc structure, + * which needs to be released at commit time. */ - -int -ext3_reserve_inode_write(handle_t *handle, struct inode *inode, - struct ext3_iloc *iloc) +static int add_inode_to_commit_queue(handle_t *handle, struct inode *inode) { + int err = 0; struct ext3_iloc *where = &EXT3_I(inode)->i_where; if (test_and_set_bit(EXT3_STATE_INODE_RESERVATION, &EXT3_I(inode)->i_state)) return 0; - - /* Mark the inode dirty right away, to pin it in memory whatever - * happens until we get around to flushing it to disk in the - * next transaction commit. */ - - mark_inode_dirty_sync(inode); + list_add_tail(&EXT3_I(inode)->i_dirty, &EXT3_SB(inode->i_sb)->s_dirty); + /* The reservation needs to locate the appropriate buffer_head * for the inode and get write access to it. We won't actually * update the buffer contents until commit time. */ @@ -2529,34 +2511,73 @@ /* Find the inode's on-disk and in-core location, and cache that * in the inode so that on commit, we can flush it to disk. */ - err = ext3_get_inode_loc(inode, where); + err = ext3_get_inode_loc(inode, &iloc); jbd_debug(3, "located inode %ld at %p\n", - inode->i_ino, where->raw_inode); + inode->i_ino, iloc.raw_inode); - if (!err) { - BUFFER_TRACE(where->bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, where->bh); - if (err) - brelse(where->bh); - } + if (err) + goto error; - if (!err) { - list_add(&EXT3_I(inode)->i_dirty, - &EXT3_SB(inode->i_sb)->s_dirty); - } else { - where->bh = NULL; - clear_bit(EXT3_STATE_INODE_RESERVATION, - &EXT3_I(inode)->i_state); - } + BUFFER_TRACE(iloc.>bh, "get_write_access"); + err = ext3_journal_get_write_access(handle, iloc.bh); + if (err) + goto err_brelse; + /* Did somebody beat us to this? */ + if (test_and_set_bit(EXT3_STATE_INODE_RESERVATION, + &EXT3_I(inode)->i_state)) + goto err_brelse; + + EXT3_I(inode)->i_where = iloc; + +error: ext3_std_error(inode->i_sb, err); return err; + +err_brelse: + brelse(iloc.bh); + goto error; +} + + +/* + * This function is going away --- ext3_reserve_inode_write is now + * enough to guarantee a deferred, consistent inode flush once the + * transaction commits. + */ +int +ext3_mark_iloc_dirty(handle_t *handle, + struct inode *inode, + struct ext3_iloc *iloc) +{ + return ext3_mark_inode_dirty(handle, inode); } +/* + * This function is also going away: mark_inode_write() is now enough, + * because the resulting call to ext3_dirty_inode will do all that we + * need it to do. + */ + +int +ext3_reserve_inode_write(handle_t *handle, struct inode *inode, + struct ext3_iloc *iloc) +{ + return 0; +} + +/* + * + * On success, we end up with an outstanding reference + * count against iloc->bh. This _must_ be cleaned up later. + */ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) { - J_ASSERT(test_bit(EXT3_STATE_INODE_RESERVATION, - &EXT3_I(inode)->i_state)); + /* We just need to call the mark_inode_dirty VFS routines, now. + * Those will call back into ext3_dirty_inode() to hook the + * inode onto the superblock queues. */ + + mark_inode_dirty_sync(inode); return 0; } @@ -2591,7 +2612,7 @@ } else { jbd_debug(5, "marking dirty. outer handle=%p\n", current_handle); - ext3_mark_inode_dirty(handle, inode); + add_inode_to_commit_queue(handle, inode); } ext3_journal_stop(handle, inode); out: --- linux-2.4.21-pre3-rmap-ext3merge/fs/ext3/super.c.=K0023=.orig 2003-01-23 16:30:32.000000000 +0000 +++ linux-2.4.21-pre3-rmap-ext3merge/fs/ext3/super.c 2003-01-23 16:32:22.000000000 +0000 @@ -1832,11 +1832,14 @@ journal_t *journal, void *cookie) { - struct list_head *list, *next; + struct list_head *list; + struct inode *inode; + struct super_block *sb = (struct super_block *) cookie; - list_for_each_safe(list, next, &EXT3_SB(sb)->s_dirty) { - struct inode *inode = list_entry(list, struct inode, u.ext3_i.i_dirty); + while (!list_empty(&EXT3_SB(sb)->s_dirty)) { + list = EXT3_SB(sb)->s_dirty.next; + inode = list_entry(list, struct inode, u.ext3_i.i_dirty); ext3_flush_inode_reservation(handle, inode); } } --- linux-2.4.21-pre3-rmap-ext3merge/fs/jbd/commit.c.=K0023=.orig 2003-01-23 16:30:32.000000000 +0000 +++ linux-2.4.21-pre3-rmap-ext3merge/fs/jbd/commit.c 2003-01-23 16:32:22.000000000 +0000 @@ -797,16 +797,14 @@ tmp_handle.h_ref = 1; J_ASSERT(current->journal_info == NULL); current->journal_info = &tmp_handle; - __journal_register_transaction(transaction, &tmp_handle); J_ASSERT(transaction->t_updates == 0); - + __journal_register_transaction(transaction, &tmp_handle); + unlock_journal(journal); journal->j_commit_callback(&tmp_handle, journal, journal->j_cookie); lock_journal(journal); - J_ASSERT (!tmp_handle.h_sync); - journal_stop(&tmp_handle); - + __journal_unhook_handle(transaction, &tmp_handle); J_ASSERT(transaction->t_updates == 0); } --- linux-2.4.21-pre3-rmap-ext3merge/fs/jbd/transaction.c.=K0023=.orig 2003-01-23 16:30:32.000000000 +0000 +++ linux-2.4.21-pre3-rmap-ext3merge/fs/jbd/transaction.c 2003-01-23 16:32:22.000000000 +0000 @@ -1415,6 +1415,14 @@ * return -EIO if a journal_abort has been executed since the * transaction began. */ + +void __journal_unhook_handle(transaction_t *transaction, handle_t *handle) +{ + current->journal_info = NULL; + transaction->t_outstanding_credits -= handle->h_buffer_credits; + transaction->t_updates--; +} + int journal_stop(handle_t *handle) { transaction_t *transaction = handle->h_transaction; @@ -1456,9 +1464,7 @@ } while (old_handle_count != transaction->t_handle_count); } - current->journal_info = NULL; - transaction->t_outstanding_credits -= handle->h_buffer_credits; - transaction->t_updates--; + __journal_unhook_handle(transaction, handle); if (!transaction->t_updates) { wake_up(&journal->j_wait_updates); if (journal->j_barrier_count) --- linux-2.4.21-pre3-rmap-ext3merge/include/linux/ext3_fs.h.=K0023=.orig 2003-01-23 16:30:32.000000000 +0000 +++ linux-2.4.21-pre3-rmap-ext3merge/include/linux/ext3_fs.h 2003-01-23 16:32:22.000000000 +0000 @@ -715,6 +715,7 @@ extern void ext3_read_inode (struct inode *); extern void ext3_write_inode (struct inode *, int); extern int ext3_do_update_inode(handle_t *, struct inode *, struct ext3_iloc *); +extern int ext3_flush_inode_reservation(handle_t *, struct inode *); extern int ext3_setattr (struct dentry *, struct iattr *); extern void ext3_put_inode (struct inode *); extern void ext3_delete_inode (struct inode *); --- linux-2.4.21-pre3-rmap-ext3merge/include/linux/jbd.h.=K0023=.orig 2003-01-23 16:30:32.000000000 +0000 +++ linux-2.4.21-pre3-rmap-ext3merge/include/linux/jbd.h 2003-01-23 16:32:22.000000000 +0000 @@ -789,6 +789,7 @@ extern void journal_sync_buffer (struct buffer_head *); extern int journal_flushpage(journal_t *, struct page *, unsigned long); extern int journal_try_to_free_buffers(journal_t *, struct page *, int); +extern void __journal_unhook_handle(transaction_t *, handle_t *); extern int journal_stop(handle_t *); extern int journal_flush (journal_t *); extern void journal_callback_set(handle_t *handle,