fs/nfs/dir.c | 161 +++++++++++++++++++++++++++++++++++++++++----- fs/nfs/inode.c | 63 ++++++++++-------- fs/nfs/nfs2xdr.c | 1 fs/nfs/nfs3proc.c | 8 ++ fs/nfs/nfs3xdr.c | 18 +++-- include/linux/nfs_fs.h | 11 ++- include/linux/nfs_fs_sb.h | 5 + include/linux/nfs_xdr.h | 3 8 files changed, 221 insertions(+), 49 deletions(-) diff -u --recursive --new-file --show-c-function linux-2.4.26-05-cto2/fs/nfs/dir.c linux-2.4.26-06-rdplus/fs/nfs/dir.c --- linux-2.4.26-05-cto2/fs/nfs/dir.c 2004-04-16 09:35:34.000000000 -0700 +++ linux-2.4.26-06-rdplus/fs/nfs/dir.c 2004-04-16 09:35:36.000000000 -0700 @@ -37,6 +37,8 @@ static loff_t nfs_dir_llseek(struct file *, loff_t, int); static int nfs_readdir(struct file *, void *, filldir_t); static struct dentry *nfs_lookup(struct inode *, struct dentry *); +static int nfs_cached_lookup(struct inode *, struct dentry *, + struct nfs_fh *, struct nfs_fattr *); static int nfs_create(struct inode *, struct dentry *, int); static int nfs_mkdir(struct inode *, struct dentry *, int); static int nfs_rmdir(struct inode *, struct dentry *); @@ -130,13 +132,15 @@ int nfs_readdir_filler(nfs_readdir_descr error = NFS_PROTO(inode)->readdir(inode, cred, desc->entry->cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); /* We requested READDIRPLUS, but the server doesn't grok it */ - if (desc->plus && error == -ENOTSUPP) { - NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; - desc->plus = 0; - goto again; - } - if (error < 0) + if (error < 0) { + if (error == -ENOTSUPP && desc->plus) { + NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; + NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + desc->plus = 0; + goto again; + } goto error; + } SetPageUptodate(page); /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either @@ -215,8 +219,7 @@ int find_dirent_page(nfs_readdir_descrip dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index); - desc->plus = NFS_USE_READDIRPLUS(inode); - page = read_cache_page(&inode->i_data, desc->page_index, + page = read_cache_page(inode->i_mapping, desc->page_index, (filler_t *)nfs_readdir_filler, desc); if (IS_ERR(page)) { status = PTR_ERR(page); @@ -267,6 +270,24 @@ int readdir_search_pagecache(nfs_readdir return res; } +static unsigned int nfs_type2dtype[] = { + DT_UNKNOWN, + DT_REG, + DT_DIR, + DT_BLK, + DT_CHR, + DT_LNK, + DT_SOCK, + DT_UNKNOWN, + DT_FIFO +}; + +static inline +unsigned int nfs_type_to_d_type(enum nfs_ftype type) +{ + return nfs_type2dtype[type]; +} + /* * Once we've found the start of the dirent within a page: fill 'er up... */ @@ -283,11 +304,17 @@ int nfs_do_filldir(nfs_readdir_descripto dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target); for(;;) { + unsigned d_type = DT_UNKNOWN; /* Note: entry->prev_cookie contains the cookie for * retrieving the current dirent on the server */ fileid = nfs_fileid_to_ino_t(entry->ino); + + /* Use readdirplus info */ + if (desc->plus && (entry->fattr->valid & NFS_ATTR_FATTR)) + d_type = nfs_type_to_d_type(entry->fattr->type); + res = filldir(dirent, entry->name, entry->len, - entry->prev_cookie, fileid, DT_UNKNOWN); + entry->prev_cookie, fileid, d_type); if (res < 0) break; file->f_pos = desc->target = entry->cookie; @@ -354,7 +381,8 @@ int uncached_readdir(nfs_readdir_descrip /* Reset read descriptor so it searches the page cache from * the start upon the next call to readdir_search_pagecache() */ desc->page_index = 0; - memset(desc->entry, 0, sizeof(*desc->entry)); + desc->entry->cookie = desc->entry->prev_cookie = 0; + desc->entry->eof = 0; out: dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status); return status; @@ -373,9 +401,11 @@ static int nfs_readdir(struct file *filp nfs_readdir_descriptor_t my_desc, *desc = &my_desc; struct nfs_entry my_entry; + struct nfs_fh fh; + struct nfs_fattr fattr; long res; - res = nfs_revalidate(dentry); + res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res < 0) return res; @@ -386,12 +416,16 @@ static int nfs_readdir(struct file *filp * itself. */ memset(desc, 0, sizeof(*desc)); - memset(&my_entry, 0, sizeof(my_entry)); - desc->file = filp; desc->target = filp->f_pos; - desc->entry = &my_entry; desc->decode = NFS_PROTO(inode)->decode_dirent; + desc->plus = NFS_USE_READDIRPLUS(inode); + + my_entry.cookie = my_entry.prev_cookie = 0; + my_entry.eof = 0; + my_entry.fh = &fh; + my_entry.fattr = &fattr; + desc->entry = &my_entry; while(!desc->entry->eof) { res = readdir_search_pagecache(desc); @@ -512,10 +546,19 @@ static int nfs_lookup_revalidate(struct /* Force a full look up iff the parent directory has changed */ if (nfs_check_verifier(dir, dentry)) { if (nfs_lookup_verify_inode(inode)) - goto out_bad; + goto out_zap_parent; goto out_valid; } + error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); + if (!error) { + if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) + goto out_bad; + if (nfs_lookup_verify_inode(inode)) + goto out_zap_parent; + goto out_valid_renew; + } + if (NFS_STALE(inode)) goto out_bad; @@ -527,10 +570,13 @@ static int nfs_lookup_revalidate(struct if ((error = nfs_refresh_inode(inode, &fattr)) != 0) goto out_bad; + out_valid_renew: nfs_renew_times(dentry); out_valid: unlock_kernel(); return 1; +out_zap_parent: + nfs_zap_caches(dir); out_bad: NFS_CACHEINV(dir); if (inode && S_ISDIR(inode->i_mode)) { @@ -607,6 +653,18 @@ static struct dentry *nfs_lookup(struct error = -ENOMEM; dentry->d_op = &nfs_dentry_operations; + error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); + if (!error) { + error = -EACCES; + inode = nfs_fhget(dentry, &fhandle, &fattr); + if (inode) { + d_add(dentry, inode); + nfs_renew_times(dentry); + error = 0; + } + goto out; + } + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); inode = NULL; if (error == -ENOENT) @@ -625,6 +683,79 @@ out: return ERR_PTR(error); } +static inline +int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry) +{ + struct nfs_entry *entry = desc->entry; + int status; + + while((status = dir_decode(desc)) == 0) { + if (entry->len != dentry->d_name.len) + continue; + if (memcmp(entry->name, dentry->d_name.name, entry->len)) + continue; + if (!(entry->fattr->valid & NFS_ATTR_FATTR)) + continue; + break; + } + return status; +} + +/* + * Use the cached Readdirplus results in order to avoid a LOOKUP call + * whenever we believe that the parent directory has not changed. + * + * We assume that any file creation/rename changes the directory mtime. + * As this results in a page cache invalidation whenever it occurs, + * we don't require any other tests for cache coherency. + */ +static +int nfs_cached_lookup(struct inode *dir, struct dentry *dentry, + struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + nfs_readdir_descriptor_t desc; + struct nfs_server *server; + struct nfs_entry entry; + struct page *page; + unsigned long timestamp = NFS_MTIME_UPDATE(dir); + int res; + + if (!NFS_USE_READDIRPLUS(dir)) + return -ENOENT; + server = NFS_SERVER(dir); + if (server->flags & NFS_MOUNT_NOAC) + return -ENOENT; + nfs_revalidate_inode(server, dir); + + entry.fh = fh; + entry.fattr = fattr; + + desc.decode = NFS_PROTO(dir)->decode_dirent; + desc.entry = &entry; + desc.page_index = 0; + desc.plus = 1; + + for(;(page = find_get_page(dir->i_mapping, desc.page_index)); desc.page_index++) { + + res = -EIO; + if (Page_Uptodate(page)) { + desc.ptr = kmap(page); + res = find_dirent_name(&desc, page, dentry); + kunmap(page); + } + page_cache_release(page); + + if (res == 0) + goto out_found; + if (res != -EAGAIN) + break; + } + return -ENOENT; + out_found: + fattr->timestamp = timestamp; + return 0; +} + /* * Code common to create, mkdir, and mknod. */ diff -u --recursive --new-file --show-c-function linux-2.4.26-05-cto2/fs/nfs/inode.c linux-2.4.26-06-rdplus/fs/nfs/inode.c --- linux-2.4.26-05-cto2/fs/nfs/inode.c 2004-04-16 09:35:34.000000000 -0700 +++ linux-2.4.26-06-rdplus/fs/nfs/inode.c 2004-04-16 09:35:36.000000000 -0700 @@ -395,12 +395,14 @@ nfs_read_super(struct super_block *sb, v INIT_LIST_HEAD(&server->lru_busy); nfsv3_try_again: + server->caps = 0; /* Check NFS protocol revision and initialize RPC op vector * and file handle pool. */ if (data->flags & NFS_MOUNT_VER3) { #ifdef CONFIG_NFS_V3 server->rpc_ops = &nfs_v3_clientops; version = 3; + server->caps |= NFS_CAP_READDIRPLUS; if (data->version < 4) { printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n"); goto out_unlock; @@ -638,36 +640,35 @@ nfs_invalidate_inode(struct inode *inode nfs_zap_caches(inode); } +/* Don't use READDIRPLUS on directories that we believe are too large */ +#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE) + /* * Fill in inode information from the fattr. */ static void nfs_fill_inode(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr) { - /* - * Check whether the mode has been set, as we only want to - * do this once. (We don't allow inodes to change types.) + NFS_FILEID(inode) = fattr->fileid; + inode->i_mode = fattr->mode; + /* Why so? Because we want revalidate for devices/FIFOs, and + * that's precisely what we have in nfs_file_inode_operations. */ - if (inode->i_mode == 0) { - NFS_FILEID(inode) = fattr->fileid; - inode->i_mode = fattr->mode; - /* Why so? Because we want revalidate for devices/FIFOs, and - * that's precisely what we have in nfs_file_inode_operations. - */ - inode->i_op = &nfs_file_inode_operations; - if (S_ISREG(inode->i_mode)) { - inode->i_fop = &nfs_file_operations; - inode->i_data.a_ops = &nfs_file_aops; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &nfs_dir_inode_operations; - inode->i_fop = &nfs_dir_operations; - } else if (S_ISLNK(inode->i_mode)) - inode->i_op = &nfs_symlink_inode_operations; - else - init_special_inode(inode, inode->i_mode, fattr->rdev); - memcpy(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)); - } - nfs_refresh_inode(inode, fattr); + inode->i_op = &nfs_file_inode_operations; + if (S_ISREG(inode->i_mode)) { + inode->i_fop = &nfs_file_operations; + inode->i_data.a_ops = &nfs_file_aops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &nfs_dir_inode_operations; + inode->i_fop = &nfs_dir_operations; + if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) + && fattr->size <= NFS_LIMIT_READDIRPLUS) + NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS; + } else if (S_ISLNK(inode->i_mode)) + inode->i_op = &nfs_symlink_inode_operations; + else + init_special_inode(inode, inode->i_mode, fattr->rdev); + memcpy(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)); } struct nfs_find_desc { @@ -742,7 +743,14 @@ __nfs_fhget(struct super_block *sb, stru if (!(inode = iget4(sb, ino, nfs_find_actor, &desc))) goto out_no_inode; - nfs_fill_inode(inode, fh, fattr); + /* + * Check whether the mode has been set, as we only want to + * do this once. (We don't allow inodes to change types.) + */ + if (inode->i_mode == 0) + nfs_fill_inode(inode, fh, fattr); + + nfs_refresh_inode(inode, fattr); dprintk("NFS: __nfs_fhget(%x/%Ld ct=%d)\n", inode->i_dev, (long long)NFS_FILEID(inode), atomic_read(&inode->i_count)); @@ -1016,6 +1024,9 @@ __nfs_refresh_inode(struct inode *inode, goto out_err; } + /* Throw out obsolete READDIRPLUS attributes */ + if (time_before(fattr->timestamp, NFS_READTIME(inode))) + return 0; /* * Make sure the inode's type hasn't changed. */ @@ -1034,7 +1045,7 @@ __nfs_refresh_inode(struct inode *inode, /* * Update the read time so we don't revalidate too often. */ - NFS_READTIME(inode) = jiffies; + NFS_READTIME(inode) = fattr->timestamp; /* * Note: NFS_CACHE_ISIZE(inode) reflects the state of the cache. @@ -1084,7 +1095,7 @@ __nfs_refresh_inode(struct inode *inode, if (NFS_CACHE_MTIME(inode) != new_mtime) { if (invalid) - NFS_MTIME_UPDATE(inode) = jiffies; + NFS_MTIME_UPDATE(inode) = fattr->timestamp; NFS_CACHE_MTIME(inode) = new_mtime; inode->i_mtime = nfs_time_to_secs(new_mtime); } diff -u --recursive --new-file --show-c-function linux-2.4.26-05-cto2/fs/nfs/nfs2xdr.c linux-2.4.26-06-rdplus/fs/nfs/nfs2xdr.c --- linux-2.4.26-05-cto2/fs/nfs/nfs2xdr.c 2004-04-16 09:35:30.000000000 -0700 +++ linux-2.4.26-06-rdplus/fs/nfs/nfs2xdr.c 2004-04-16 09:35:36.000000000 -0700 @@ -118,6 +118,7 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO; fattr->rdev = 0; } + fattr->timestamp = jiffies; return p; } diff -u --recursive --new-file --show-c-function linux-2.4.26-05-cto2/fs/nfs/nfs3proc.c linux-2.4.26-06-rdplus/fs/nfs/nfs3proc.c --- linux-2.4.26-05-cto2/fs/nfs/nfs3proc.c 2004-04-16 09:35:30.000000000 -0700 +++ linux-2.4.26-06-rdplus/fs/nfs/nfs3proc.c 2004-04-16 09:35:36.000000000 -0700 @@ -433,6 +433,8 @@ nfs3_proc_rmdir(struct inode *dir, struc * The decode function itself doesn't perform any decoding, it just makes * sure the reply is syntactically correct. * + * Also note that this implementation handles both plain readdir and + * readdirplus. */ static int nfs3_proc_readdir(struct inode *dir, struct rpc_cred *cred, @@ -446,7 +448,11 @@ nfs3_proc_readdir(struct inode *dir, str struct rpc_message msg = { NFS3PROC_READDIR, &arg, &res, cred }; int status; - dprintk("NFS call readdir %d\n", (unsigned int) cookie); + if (plus) + msg.rpc_proc = NFS3PROC_READDIRPLUS; + + dprintk("NFS call readdir%s %d\n", + plus? "plus" : "", (unsigned int) cookie); dir_attr.valid = 0; status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); diff -u --recursive --new-file --show-c-function linux-2.4.26-05-cto2/fs/nfs/nfs3xdr.c linux-2.4.26-06-rdplus/fs/nfs/nfs3xdr.c --- linux-2.4.26-05-cto2/fs/nfs/nfs3xdr.c 2004-04-16 09:35:30.000000000 -0700 +++ linux-2.4.26-06-rdplus/fs/nfs/nfs3xdr.c 2004-04-16 09:35:36.000000000 -0700 @@ -181,6 +181,7 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt /* Update the mode bits */ fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3); + fattr->timestamp = jiffies; return p; } @@ -606,6 +607,7 @@ err_unmap: u32 * nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus) { + struct nfs_entry old = *entry; u64 cookie; if (!*p++) { @@ -624,13 +626,19 @@ nfs3_decode_dirent(u32 *p, struct nfs_en entry->cookie = nfs_transform_cookie64(cookie); if (plus) { - struct nfs_fattr fattr; - p = xdr_decode_post_op_attr(p, &fattr); + entry->fattr->valid = 0; + p = xdr_decode_post_op_attr(p, entry->fattr); /* In fact, a post_op_fh3: */ if (*p++) { - struct nfs_fh fh; - p = xdr_decode_fhandle(p, &fh); - } + p = xdr_decode_fhandle(p, entry->fh); + /* Ugh -- server reply was truncated */ + if (p == NULL) { + dprintk("NFS: FH truncated\n"); + *entry = old; + return ERR_PTR(-EAGAIN); + } + } else + memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); } entry->eof = !p[0] && p[1]; diff -u --recursive --new-file --show-c-function linux-2.4.26-05-cto2/include/linux/nfs_fs.h linux-2.4.26-06-rdplus/include/linux/nfs_fs.h --- linux-2.4.26-05-cto2/include/linux/nfs_fs.h 2004-04-14 11:28:18.000000000 -0700 +++ linux-2.4.26-06-rdplus/include/linux/nfs_fs.h 2004-04-16 09:35:36.000000000 -0700 @@ -102,8 +102,15 @@ do { \ #define NFS_FILEID(inode) ((inode)->u.nfs_i.fileid) -/* Inode Flags */ -#define NFS_USE_READDIRPLUS(inode) ((NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS) ? 1 : 0) +static inline int nfs_server_capable(struct inode *inode, int cap) +{ + return NFS_SERVER(inode)->caps & cap; +} + +static inline int NFS_USE_READDIRPLUS(struct inode *inode) +{ + return NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS; +} /* * These are the default flags for swap requests diff -u --recursive --new-file --show-c-function linux-2.4.26-05-cto2/include/linux/nfs_fs_sb.h linux-2.4.26-06-rdplus/include/linux/nfs_fs_sb.h --- linux-2.4.26-05-cto2/include/linux/nfs_fs_sb.h 2004-04-14 11:28:57.000000000 -0700 +++ linux-2.4.26-06-rdplus/include/linux/nfs_fs_sb.h 2004-04-16 09:35:36.000000000 -0700 @@ -10,6 +10,7 @@ struct nfs_server { struct rpc_clnt * client; /* RPC client handle */ struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ int flags; /* various flags */ + unsigned int caps; /* server capabilities */ unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ unsigned int wsize; /* write size */ @@ -36,4 +37,8 @@ struct nfs_sb_info { struct nfs_server s_server; }; +/* Server capabilities */ +#define NFS_CAP_READDIRPLUS 1 + + #endif diff -u --recursive --new-file --show-c-function linux-2.4.26-05-cto2/include/linux/nfs_xdr.h linux-2.4.26-06-rdplus/include/linux/nfs_xdr.h --- linux-2.4.26-05-cto2/include/linux/nfs_xdr.h 2004-04-16 09:35:30.000000000 -0700 +++ linux-2.4.26-06-rdplus/include/linux/nfs_xdr.h 2004-04-16 09:35:36.000000000 -0700 @@ -27,6 +27,7 @@ struct nfs_fattr { __u64 atime; __u64 mtime; __u64 ctime; + unsigned long timestamp; }; #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ @@ -132,6 +133,8 @@ struct nfs_entry { const char * name; unsigned int len; int eof; + struct nfs_fh *fh; + struct nfs_fattr *fattr; }; /*