--------------------- PatchSet 891 Date: 2002/08/08 22:34:56 Author: sct Log: Add initial code to track the pending dirty inodes against a given superblock for deferred updating of the buffer cache on commit. Members: fs/ext3/inode.c:1.64.2.16.2.2->1.64.2.16.2.3 [quota-branch] fs/ext3/super.c:1.34.2.19.2.2->1.34.2.19.2.3 [quota-branch] fs/jbd/commit.c:1.46.2.3.2.1->1.46.2.3.2.2 [quota-branch] include/linux/ext3_fs.h:1.20.2.16.2.1->1.20.2.16.2.2 [quota-branch] include/linux/ext3_fs_i.h:1.10.12.1->1.10.12.1.4.1 [quota-branch] include/linux/ext3_fs_sb.h:1.7.2.1->1.7.2.1.2.1 [quota-branch] include/linux/jbd.h:1.37.2.8.2.3->1.37.2.8.2.4 [quota-branch] --- linux-2.4.21-pre3-rmap-ext3merge/fs/ext3/inode.c.=K0022=.orig 2003-01-23 16:30:32.000000000 +0000 +++ linux-2.4.21-pre3-rmap-ext3merge/fs/ext3/inode.c 2003-01-23 16:30:32.000000000 +0000 @@ -205,6 +205,9 @@ * having errors), but we can't free the inode if the mark_dirty * fails. */ + ext3_reserve_inode_write(handle, inode, NULL); + ext3_flush_inode_reservation(handle, inode); + if (ext3_mark_inode_dirty(handle, inode)) /* If that failed, just do the required in-core inode clear. */ clear_inode(inode); @@ -1118,7 +1121,7 @@ kunmap(page); if (pos > inode->i_size) inode->i_size = pos; - EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; + set_bit(EXT3_STATE_JDATA, &EXT3_I(inode)->i_state); } else { if (ext3_should_order_data(inode)) { ret = walk_page_buffers(handle, inode, page->buffers, @@ -1171,7 +1174,7 @@ journal_t *journal; int err; - if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) { + if (test_bit(EXT3_STATE_JDATA, &EXT3_I(inode)->i_state)) { /* * This is a REALLY heavyweight approach, but the use of * bmap on dirty files is expected to be extremely rare: @@ -1190,7 +1193,7 @@ * everything they get. */ - EXT3_I(inode)->i_state &= ~EXT3_STATE_JDATA; + clear_bit(EXT3_STATE_JDATA, &EXT3_I(inode)->i_state); journal = EXT3_JOURNAL(inode); journal_lock_updates(journal); err = journal_flush(journal); @@ -2127,6 +2130,7 @@ for (block = 0; block < EXT3_N_BLOCKS; block++) inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); + INIT_LIST_HEAD(&inode->u.ext3_i.i_dirty); brelse (iloc.bh); @@ -2180,9 +2184,9 @@ * buffer_head in the inode location struct. */ -static int ext3_do_update_inode(handle_t *handle, - struct inode *inode, - struct ext3_iloc *iloc) +int ext3_do_update_inode(handle_t *handle, + struct inode *inode, + struct ext3_iloc *iloc) { struct ext3_inode *raw_inode = iloc->raw_inode; struct buffer_head *bh = iloc->bh; @@ -2196,7 +2200,7 @@ } /* For fields not not tracking in the in-memory inode, * initialise them to zero for new inodes. */ - if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) + if (test_bit(EXT3_STATE_NEW, &EXT3_I(inode)->i_state)) memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size); raw_inode->i_mode = cpu_to_le16(inode->i_mode); @@ -2277,7 +2281,7 @@ rc = ext3_journal_dirty_metadata(handle, bh); if (!err) err = rc; - EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW; + clear_bit(EXT3_STATE_NEW, &EXT3_I(inode)->i_state); out_brelse: brelse (bh); @@ -2285,6 +2289,28 @@ return err; } +int ext3_flush_inode_reservation(handle_t *handle, + struct inode *inode) +{ + struct ext3_iloc iloc; + + /* How to lock access to the reservation state? Updates to both + * the reservation bit and the i_where location need to be done + * in a single atomic update. */ + J_ASSERT(test_bit(EXT3_STATE_INODE_RESERVATION, + &EXT3_I(inode)->i_state)); + + list_del(&EXT3_I(inode)->i_dirty); + iloc = EXT3_I(inode)->i_where; + EXT3_I(inode)->i_where.bh = NULL; + clear_bit(EXT3_STATE_INODE_RESERVATION, + &EXT3_I(inode)->i_state); + + /* This consumes the bh reference in the iloc: */ + return ext3_do_update_inode(handle, inode, &iloc); +} + + /* * ext3_write_inode() * @@ -2473,7 +2499,7 @@ } /* - * On success, We end up with an outstanding reference count against + * On success, we end up with an outstanding reference count against * iloc->bh. This _must_ be cleaned up later. */ @@ -2482,51 +2508,56 @@ struct ext3_iloc *iloc) { int err = 0; - if (handle) { - err = ext3_get_inode_loc(inode, iloc); - if (!err) { - BUFFER_TRACE(iloc->bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, iloc->bh); - if (err) { - brelse(iloc->bh); - iloc->bh = NULL; - } - } + struct ext3_iloc *where = &EXT3_I(inode)->i_where; + + if (test_and_set_bit(EXT3_STATE_INODE_RESERVATION, + &EXT3_I(inode)->i_state)) + return 0; + + /* Mark the inode dirty right away, to pin it in memory whatever + * happens until we get around to flushing it to disk in the + * next transaction commit. */ + + mark_inode_dirty_sync(inode); + + /* The reservation needs to locate the appropriate buffer_head + * for the inode and get write access to it. We won't actually + * update the buffer contents until commit time. */ + + J_ASSERT(handle); + + /* Find the inode's on-disk and in-core location, and cache that + * in the inode so that on commit, we can flush it to disk. */ + + err = ext3_get_inode_loc(inode, where); + jbd_debug(3, "located inode %ld at %p\n", + inode->i_ino, where->raw_inode); + + if (!err) { + BUFFER_TRACE(where->bh, "get_write_access"); + err = ext3_journal_get_write_access(handle, where->bh); + if (err) + brelse(where->bh); } + + if (!err) { + list_add(&EXT3_I(inode)->i_dirty, + &EXT3_SB(inode->i_sb)->s_dirty); + } else { + where->bh = NULL; + clear_bit(EXT3_STATE_INODE_RESERVATION, + &EXT3_I(inode)->i_state); + } + ext3_std_error(inode->i_sb, err); return err; } -/* - * akpm: What we do here is to mark the in-core inode as clean - * with respect to inode dirtiness (it may still be data-dirty). - * This means that the in-core inode may be reaped by prune_icache - * without having to perform any I/O. This is a very good thing, - * because *any* task may call prune_icache - even ones which - * have a transaction open against a different journal. - * - * Is this cheating? Not really. Sure, we haven't written the - * inode out, but prune_icache isn't a user-visible syncing function. - * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) - * we start and wait on commits. - * - * Is this efficient/effective? Well, we're being nice to the system - * by cleaning up our inodes proactively so they can be reaped - * without I/O. But we are potentially leaving up to five seconds' - * worth of inodes floating about which prune_icache wants us to - * write out. One way to fix that would be to get prune_icache() - * to do a write_super() to free up some memory. It has the desired - * effect. - */ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) { - struct ext3_iloc iloc; - int err; - - err = ext3_reserve_inode_write(handle, inode, &iloc); - if (!err) - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - return err; + J_ASSERT(test_bit(EXT3_STATE_INODE_RESERVATION, + &EXT3_I(inode)->i_state)); + return 0; } /* --- linux-2.4.21-pre3-rmap-ext3merge/fs/ext3/super.c.=K0022=.orig 2003-01-23 16:30:32.000000000 +0000 +++ linux-2.4.21-pre3-rmap-ext3merge/fs/ext3/super.c 2003-01-23 16:30:32.000000000 +0000 @@ -49,6 +49,10 @@ static int ext3_sync_fs(struct super_block * sb); +static void ext3_commit_callback(handle_t *handle, + journal_t *journal, + void *cookie); + #ifdef CONFIG_JBD_DEBUG int journal_no_write[2]; @@ -1164,6 +1168,7 @@ */ sb->s_op = &ext3_sops; INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ + INIT_LIST_HEAD(&sbi->s_dirty); /* inodes to be flushed next commit */ sb->s_root = 0; @@ -1217,6 +1222,14 @@ } /* + * Initialise any remaining journal information now that we have + * the journal live: + */ + + sbi->s_journal->j_cookie = (void *) sb; + sbi->s_journal->j_commit_callback = ext3_commit_callback; + + /* * The journal_load will have done any necessary log recovery, * so we can safely mount the rest of the filesystem now. */ @@ -1807,6 +1820,28 @@ unregister_filesystem(&ext3_fs_type); } +/* + * On transaction commit, any updates which have been made against this + * transaction but which are not yet on disk need to be committed to the + * buffer-cache image before we can start writing that out. + * + * This is currently used only for inode updates, to defer the update of + * the on-disk structures until commit time. + */ +static void ext3_commit_callback(handle_t *handle, + journal_t *journal, + void *cookie) +{ + struct list_head *list, *next; + struct super_block *sb = (struct super_block *) cookie; + + list_for_each_safe(list, next, &EXT3_SB(sb)->s_dirty) { + struct inode *inode = list_entry(list, struct inode, u.ext3_i.i_dirty); + ext3_flush_inode_reservation(handle, inode); + } +} + + EXPORT_NO_SYMBOLS; MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); --- linux-2.4.21-pre3-rmap-ext3merge/fs/jbd/commit.c.=K0022=.orig 2003-01-23 16:30:32.000000000 +0000 +++ linux-2.4.21-pre3-rmap-ext3merge/fs/jbd/commit.c 2003-01-23 16:30:32.000000000 +0000 @@ -802,7 +802,7 @@ J_ASSERT(transaction->t_updates == 0); unlock_journal(journal); - journal->j_commit_callback(journal, journal->j_cookie); + journal->j_commit_callback(&tmp_handle, journal, journal->j_cookie); lock_journal(journal); J_ASSERT (!tmp_handle.h_sync); --- linux-2.4.21-pre3-rmap-ext3merge/include/linux/ext3_fs.h.=K0022=.orig 2003-01-23 16:30:32.000000000 +0000 +++ linux-2.4.21-pre3-rmap-ext3merge/include/linux/ext3_fs.h 2003-01-23 16:30:32.000000000 +0000 @@ -217,8 +217,10 @@ /* * Inode dynamic state flags */ -#define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ -#define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ +#define EXT3_STATE_JDATA 0 /* journaled data exists */ +#define EXT3_STATE_NEW 1 /* inode is newly created */ +#define EXT3_STATE_INODE_RESERVATION 2 /* running transaction has a + reservation on inode */ /* * ioctl commands @@ -635,18 +637,6 @@ */ #define HASH_NB_ALWAYS 1 - -/* - * Describe an inode's exact location on disk and in memory - */ -struct ext3_iloc -{ - struct buffer_head *bh; - struct ext3_inode *raw_inode; - unsigned long block_group; -}; - - /* * This structure is stuffed into the struct file's private_data field * for directories. It is where we put information so that we can do @@ -724,6 +714,7 @@ extern int ext3_get_inode_loc (struct inode *, struct ext3_iloc *); extern void ext3_read_inode (struct inode *); extern void ext3_write_inode (struct inode *, int); +extern int ext3_do_update_inode(handle_t *, struct inode *, struct ext3_iloc *); extern int ext3_setattr (struct dentry *, struct iattr *); extern void ext3_put_inode (struct inode *); extern void ext3_delete_inode (struct inode *); --- linux-2.4.21-pre3-rmap-ext3merge/include/linux/ext3_fs_i.h.=K0022=.orig 2003-01-23 16:30:32.000000000 +0000 +++ linux-2.4.21-pre3-rmap-ext3merge/include/linux/ext3_fs_i.h 2003-01-23 16:32:22.000000000 +0000 @@ -19,6 +19,16 @@ #include /* + * Describe an inode's exact location on disk and in memory + */ +struct ext3_iloc +{ + struct buffer_head *bh; + struct ext3_inode *raw_inode; + unsigned long block_group; +}; + +/* * second extended file system inode data in memory */ struct ext3_inode_info { @@ -44,6 +54,10 @@ __u32 i_dir_start_lookup; struct list_head i_orphan; /* unlinked but open inodes */ + struct list_head i_dirty; /* dirty inodes on current transaction */ + /* Location of the inode on disk and in the buffer cache. Only + * valid while EXT3_STATE_INODE_RESERVATION is set. */ + struct ext3_iloc i_where; /* * i_disksize keeps track of what the inode size is ON DISK, not --- linux-2.4.21-pre3-rmap-ext3merge/include/linux/ext3_fs_sb.h.=K0022=.orig 2003-01-23 16:30:32.000000000 +0000 +++ linux-2.4.21-pre3-rmap-ext3merge/include/linux/ext3_fs_sb.h 2003-01-23 16:32:22.000000000 +0000 @@ -71,6 +71,7 @@ struct inode * s_journal_inode; struct journal_s * s_journal; struct list_head s_orphan; + struct list_head s_dirty; unsigned long s_commit_interval; struct block_device *journal_bdev; #ifdef CONFIG_JBD_DEBUG --- linux-2.4.21-pre3-rmap-ext3merge/include/linux/jbd.h.=K0022=.orig 2003-01-23 16:30:32.000000000 +0000 +++ linux-2.4.21-pre3-rmap-ext3merge/include/linux/jbd.h 2003-01-23 16:30:32.000000000 +0000 @@ -674,7 +674,7 @@ /* Callback into the client [filesystem] made when we're about * to wrap up a commit. */ - void (*j_commit_callback)(journal_t *, void *); + void (*j_commit_callback)(handle_t *, journal_t *, void *); /* And the cookie used by the client to identify which fs is * referenced by the commit callback */ void *j_cookie;