diff -urNp ocfs/fs/Config.in 2.4.20pre5aa2/fs/Config.in --- ocfs/fs/Config.in Fri Sep 6 01:46:14 2002 +++ 2.4.20pre5aa2/fs/Config.in Fri Sep 6 01:46:32 2002 @@ -110,6 +110,8 @@ dep_mbool ' DMAPI support' CONFIG_XF dep_mbool ' Debugging support (EXPERIMENTAL)' CONFIG_XFS_DEBUG $CONFIG_XFS_FS $CONFIG_EXPERIMENTAL dep_mbool ' Pagebuf debugging support (EXPERIMENTAL)' CONFIG_PAGEBUF_DEBUG $CONFIG_XFS_FS $CONFIG_EXPERIMENTAL +tristate 'Oracle Cluster Filesystem' CONFIG_OCFS_FS + if [ "$CONFIG_NET" = "y" ]; then mainmenu_option next_comment diff -urNp ocfs/fs/Makefile 2.4.20pre5aa2/fs/Makefile --- ocfs/fs/Makefile Fri Sep 6 01:46:15 2002 +++ 2.4.20pre5aa2/fs/Makefile Fri Sep 6 01:46:39 2002 @@ -69,6 +69,7 @@ subdir-$(CONFIG_DEVPTS_FS) += devpts subdir-$(CONFIG_SUN_OPENPROMFS) += openpromfs subdir-$(CONFIG_JFS_FS) += jfs subdir-$(CONFIG_XFS_FS) += xfs +subdir-$(CONFIG_OCFS_FS) += ocfs obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o diff -urNp ocfs/fs/ocfs/Common/inc/ocfs.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfs.h --- ocfs/fs/ocfs/Common/inc/ocfs.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfs.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,112 @@ +/* + * ocfs.h + * + * Main include file + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFS_H_ +#define _OCFS_H_ + +/* XXX Hack to avoid warning */ +struct mem_dqinfo; +extern inline void mark_info_dirty(struct mem_dqinfo *info); + +/* +** System header files +*/ +#define __KERNEL_SYSCALLS__ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* +** Private header files +*/ +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif /* _OCFS_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfscom.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfscom.h --- ocfs/fs/ocfs/Common/inc/ocfscom.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfscom.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,150 @@ +/* + * ocfscom.h + * + * Includes datatype typedefs among other things + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSCOM_H_ +#define _OCFSCOM_H_ + +#define OCFS_POINTER_SIZE (sizeof(void *)) + +typedef char eb1; +typedef short eb2; +typedef long eb4; +typedef long long eb8; +typedef signed char sb1; +typedef signed short sb2; +typedef signed long sb4; +typedef signed long long sb8; +typedef unsigned char ub1; +typedef unsigned short ub2; +typedef unsigned long ub4; +typedef unsigned long long ub8; + +typedef struct _ocfs_alloc_bm +{ + void *buf; + ub4 size; + ub4 failed; + ub4 ok_retries; +} +ocfs_alloc_bm; + +#ifndef FORMAT_UTIL + +typedef struct _ocfs_sem +{ + long magic; /* OCFS_SEM_MAGIC */ + pid_t pid; + long count; + struct semaphore sem; +} +ocfs_sem; + +#endif /* FORMAT_UTIL */ + +/* convenience macro */ +#define ocfs_safefree(x) \ + do \ + { \ + if ((x) != NULL) \ + ocfs_free(x); \ + (x) = NULL; \ + } while (0) + +#define OCFS_ASSERT(x) do { if (!(x)) BUG(); } while (0) +#define OCFS_BREAKPOINT() printk("DEBUG BREAKPOINT! %s, %d\n", \ + __FILE__, __LINE__) + +/* time is in 0.1 microsecs */ +#define OcfsQuerySystemTime(t) \ + do { \ + (*t) = (ub8)((ub8)CURRENT_TIME * (ub8)10000000); \ + (*t) += (ub8)((ub8)xtime.tv_usec * (ub8)10); \ + } while (0) + +#ifdef __KERNEL__ +#define ocfs_getpid() current->pid +#endif +#ifndef __KERNEL__ +#define ocfs_getpid() getpid() +#endif + + +#ifndef FORMAT_UTIL + +typedef struct _ocfs_extent +{ + sb8 virtual; + sb8 physical; + sb8 sectors; +} +ocfs_extent; + +typedef struct _ocfs_extent_map +{ + spinlock_t lock; + ub4 capacity; + ub4 count; + bool initialized; + void *buf; +} +ocfs_extent_map; + +typedef struct _alloc_item +{ + void *address; + int length; + struct list_head list; + char tag[30]; +} +alloc_item; + +/* i_flags flag - heh yeah i know it's evil! */ +#define S_OCFS_OIN_VALID 256 + +#define inode_data_is_oin(i) (i->i_flags & S_OCFS_OIN_VALID) + +#define SET_INODE_OFFSET(i,o) do { \ + i->i_flags &= ~S_OCFS_OIN_VALID; \ + i->u.generic_ip = (void *)HI(o); \ + i->i_ino = LO(o); \ + } while (0) + +#define GET_INODE_OFFSET(i) (ub8)((((ub8)((ub4)i->u.generic_ip))<<32) + \ + ((ub8)i->i_ino)) + +#define SET_INODE_OIN(i,o) do { \ + i->i_flags |= S_OCFS_OIN_VALID; \ + i->u.generic_ip = (void *)o; \ + } while (0) + + +#endif /* FORMAT_UTIL */ + +#define FIRST_FILE_ENTRY(dir) ((char *) ((char *)dir)+OCFS_SECTOR_SIZE) +#define FILEENT(dir,idx) (ocfs_file_entry *) ( ((char *)dir) + \ + ((dir->index[idx]+1) * OCFS_SECTOR_SIZE)) + +#endif /* _OCFSCOM_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsconst.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsconst.h --- ocfs/fs/ocfs/Common/inc/ocfsconst.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsconst.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,240 @@ +/* + * ocfsconst.h + * + * constants + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#define OCFS_DIR_FILENAME "DirFile" +#define OCFS_DIR_BITMAP_FILENAME "DirBitMapFile" +#define OCFS_FILE_EXTENT_FILENAME "ExtentFile" +#define OCFS_FILE_EXTENT_BITMAP_FILENAME "ExtentBitMapFile" +#define OCFS_RECOVER_LOG_FILENAME "RecoverLogFile" +#define OCFS_CLEANUP_LOG_FILENAME "CleanUpLogFile" + +#define ONE_SECOND (10 * 1000 * 1000) + +/* +** The Volume Recognition Sector +** Signature - 128 Bytes +** Volume Label - 64 bytes (MAXIMUM_VOLUME_LABEL_LENGTH) +** Volume Serial Number - 8 Bytes +** Volume Length - 8 Bytes +** Volume Starting Offset - 8 Bytes +** Root Directory Starting Offset - 8 Bytes +*/ + +#define ONE_MILLI_SEC (10 * 1000) /* in 0.1 microsec unit */ +#define ONE_MEGA_BYTE (1 * 1024 * 1024) + +#define MISS_COUNT_VALUE 5 + +#define OCFS_DEFAULT_DIR_NODE_SIZE (1024*128) +#define OCFS_DEFAULT_FILE_NODE_SIZE (512) + +/* +** The following flag values reflect the operation to be performed +** by ocfs_create_modify_file +*/ +#define FLAG_FILE_CREATE 0x1 +#define FLAG_FILE_EXTEND 0x2 +#define FLAG_FILE_DELETE 0x4 +#define FLAG_FILE_RENAME 0x8 +#define FLAG_FILE_UPDATE 0x10 +#define FLAG_FILE_CREATE_DIR 0x40 +#define FLAG_FILE_UPDATE_OIN 0x80 +#define FLAG_FILE_RELEASE_MASTER 0x100 +#define FLAG_CHANGE_MASTER 0x400 +#define FLAG_ADD_OIN_MAP 0x800 +#define FLAG_DIR 0x1000 +#define FLAG_DEL_NAME 0x20000 +#define FLAG_RESET_VALID 0x40000 +#define FLAG_FILE_RELEASE_CACHE 0x400000 +#define FLAG_FILE_CREATE_CDSL 0x800000 +#define FLAG_FILE_DELETE_CDSL 0x1000000 +#define FLAG_FILE_CHANGE_TO_CDSL 0x4000000 +#define FLAG_FILE_TRUNCATE 0x8000000 // kch- consider removing this and ocfs_truncate_file + +#define OCFS_FILE_VOL_META_DATA 0 +#define OCFS_FILE_VOL_LOG_FILE (OCFS_FILE_VOL_META_DATA + \ + OCFS_MAXIMUM_NODES) +#define OCFS_FILE_DIR_ALLOC (OCFS_FILE_VOL_LOG_FILE + \ + OCFS_MAXIMUM_NODES) +#define OCFS_FILE_DIR_ALLOC_BITMAP (OCFS_FILE_DIR_ALLOC + OCFS_MAXIMUM_NODES) +#define OCFS_FILE_FILE_ALLOC (OCFS_FILE_DIR_ALLOC_BITMAP + \ + OCFS_MAXIMUM_NODES) +#define OCFS_FILE_FILE_ALLOC_BITMAP (OCFS_FILE_FILE_ALLOC + OCFS_MAXIMUM_NODES) +#define LOG_FILE_BASE_ID (OCFS_FILE_FILE_ALLOC_BITMAP + \ + OCFS_MAXIMUM_NODES) +#define CLEANUP_FILE_BASE_ID (LOG_FILE_BASE_ID + OCFS_MAXIMUM_NODES) +#ifdef LOCAL_ALLOC +#define OCFS_VOL_BITMAP_FILE (CLEANUP_FILE_BASE_ID + OCFS_MAXIMUM_NODES) +#endif + + +#define OCFS_LOG_SECTOR_SIZE 9 +#define OCFS_SECTOR_SIZE (1<needs_verification = true; \ + ocfs_verify_update_oin((a)->osb, (a)); \ + } while (0) + +#define OIN_NEEDS_VERIFICATION(a) \ + ((a)->needs_verification) +#define OIN_UPDATED(a) ((a)->needs_verification = false) + +#define IS_VALID_DIR_NODE(ptr) \ + (!strncmp((ptr)->signature, OCFS_DIR_NODE_SIGNATURE, \ + strlen(OCFS_DIR_NODE_SIGNATURE))) + +#define IS_VALID_FILE_ENTRY(ptr) \ + (!strcmp((ptr)->signature, OCFS_FILE_ENTRY_SIGNATURE)) + +#define IS_VALID_EXTENT_HEADER(ptr) \ + (!strcmp((ptr)->signature, OCFS_EXTENT_HEADER_SIGNATURE)) + +#define IS_VALID_EXTENT_DATA(ptr) \ + (!strcmp((ptr)->signature, OCFS_EXTENT_DATA_SIGNATURE)) + +#define IS_VALID_NODE_NUM(node) \ + (((node) >= 0) && ((node) < OCFS_MAXIMUM_NODES)) + +#define OCFS_GET_EXTENT(vbo, extent, k) \ + do { \ + for ((k) = 0; (k) < OCFS_MAX_DATA_EXTENTS; (k)++) { \ + if((sb8)((extent)->extents[(k)].file_off + \ + (extent)->extents[(k)].num_clusters) > (vbo)) \ + break; \ + } \ + } while(0) + +#define OCFS_GET_FILE_ENTRY_EXTENT(vbo, fileentry, k) \ + do { \ + for ((k) = 0; (k) < OCFS_MAX_FILE_ENTRY_EXTENTS; (k)++) { \ + if((sb8)((fileentry)->extents[(k)].file_off + \ + (fileentry)->extents[(k)].length) > (vbo)) \ + break; \ + } \ + } while(0) + +#define CHECK_FOR_LAST_EXTENT(fileentry, k) \ + do { \ + for((k) = 0; (k) < OCFS_MAX_FILE_ENTRY_EXTENTS; (k)++) { \ + if((fileentry)->extents[(k)].disk_off == 0) \ + break; \ + } \ + (k) = ((k) >= 1) ? ((k) - 1) : (k); \ + } while(0) + +struct _ocfs_file; +struct _ocfs_inode; +struct _ocfs_super; + +/* +** Macros +*/ +#define OCFS_SET_FLAG(flag, value) ((flag) |= (value)) +#define OCFS_CLEAR_FLAG(flag, value) ((flag) &= ~(value)) + +#define OCFS_SECTOR_ALIGN(buf) \ + ((ub8)buf + \ + (((ub8)buf % OCFS_SECTOR_SIZE) ? \ + (OCFS_SECTOR_SIZE - ((ub8)buf % OCFS_SECTOR_SIZE)):0)) + +#define OCFS_ALIGN(val, align) \ + ((ub8)val + \ + (((ub8)val % align) ? (align - ((ub8)val % align)): 0)) + +/* +** Structures... +*/ + +#define IS_NODE_ALIVE(pubmap, i, numnodes) \ + (((pubmap) >> ((i) % (numnodes))) & 0x1) + +#define IS_VALIDBIT_SET(flags) ((flags) & 0x1) + +#define SET_VALID_BIT(flags) ((flags) |= 0x1) + +/* +** All structures have a type, and a size associated with it. +** The type serves to identify the structure. The size is used for +** consistency checking ... +*/ +#define UPDATE_PUBLISH_MAP(pubmap, num, flag, numnodes) \ + do { \ + ub8 var = 0x1; \ + if(!(flag)) \ + (pubmap) &= (~(var << ((num) % (numnodes)))); \ + else \ + (pubmap) |= (var << ((num) % (numnodes))); \ + } while(0) + +typedef struct _ocfs_obj_id +{ + ub4 type; /* 4 byte signature to uniquely identify the struct */ + ub4 size; /* sizeof the struct */ +} +ocfs_obj_id; + +/************************************************************************** +** Each file open instance is represented by a context control block. +** For each successful create/open request; a file object and a ocfs_file will +** be created. +** For open operations performed internally by the FSD, there may not +** exist file objects; but a ocfs_file will definitely be created. +** This structure must be quad-word aligned because it is zone allocated. +**************************************************************************/ +typedef struct _ocfs_file +{ + ocfs_obj_id obj_id; + struct _ocfs_inode *oin; /* ptr to the assoc. ocfs_inode */ + struct list_head next_ofile; /* all OFILEs for a ocfs_inode are linked */ + struct file *k_file; + ub8 curr_byte_off; + sb8 curr_dir_off; + void *curr_dir_buf; + long f_iobuf_lock; + struct kiobuf *f_iobuf; +} +ocfs_file; + +typedef struct _ocfs_inode ocfs_inode; +typedef struct _ocfs_super ocfs_super; +typedef struct _ocfs_superduper ocfs_superduper; + +typedef struct _ocfs_lock_res +{ + ub4 signature; + ub1 lock_type; /* Support only Exclusive & Shared */ + ub4 ref_cnt; /* Used in case of Shared resources */ + sb4 master_node_num; /* Master Node */ + ub8 last_upd_seq_num; + ub8 last_lock_upd; + ub8 sector_num; + ub8 oin_openmap; + ub1 in_use; + int thread_id; + struct list_head cache_list; + bool in_cache_list; + ub4 lock_state; + ocfs_inode *oin; + spinlock_t lock_mutex; + wait_queue_head_t *voted_event; + ub8 req_vote_map; + ub8 got_vote_map; + ub4 vote_status; + ub8 last_write_time; + ub8 last_read_time; + sb4 writer_node_num; + sb4 reader_node_num; +} +ocfs_lock_res; + +struct _ocfs_inode +{ + ocfs_obj_id obj_id; + sb8 alloc_size; + struct inode *inode; + ocfs_sem main_res; + ocfs_sem paging_io_res; + ocfs_lock_res *lock_res; + ub8 file_disk_off; /* file location on the volume */ + ub8 dir_disk_off; /* for dirs, offset to dirnode structure */ + ub8 chng_seq_num; + ub8 parent_dirnode_off; /* from the start of vol */ + ocfs_extent_map map; + struct _ocfs_super *osb; /* ocfs_inode belongs to this volume */ + ub4 oin_flags; + struct list_head next_ofile; /* list of all ofile(s) */ + bool open_for_write; + ub4 ref_cnt; /* when = 0, free ocfs_inode */ + ub4 open_hndl_cnt; + bool needs_verification; + bool cache_enabled; +}; + +typedef enum _ocfs_vol_state +{ + VOLUME_DISABLED, + VOLUME_INIT, + VOLUME_ENABLED, + VOLUME_LOCKED, + VOLUME_IN_RECOVERY, + VOLUME_MOUNTED, + VOLUME_BEING_DISMOUNTED, + VOLUME_DISMOUNTED +} +ocfs_vol_state; + +typedef struct _ocfs_node_config_info +{ + char node_name[MAX_NODE_NAME_LENGTH]; + ocfs_ipc_config_info ipc_config[OCFS_MAX_IPC]; + ub8 exp_recv[OCFS_MAX_IPC]; + ub1 num_interfaces; + ub1 primary_comm; + ub1 state; + ub1 last_comm_indx; +} +ocfs_node_config_info; + +/* + * ocfs_super + * + * A mounted volume is represented using the following structure. + */ +struct _ocfs_super +{ + ocfs_obj_id obj_id; + ocfs_sem osb_res; /* resource to protect the ocfs_super */ + struct list_head osb_next; /* list of ocfs_super(s) */ + ub4 osb_id; /* id used by the proc interface */ + struct completion complete; + struct task_struct *dlm_task; + ub4 osb_flags; + sb8 file_open_cnt; /* num of open files/dirs. vol cannot be dismounted if > 0 */ + ub8 publ_map; /* each bit represents state of node */ + HASHTABLE root_sect_node; /* lockres->sector_num hash */ + struct list_head cache_lock_list; + struct super_block *sb; + ocfs_inode *oin_root_dir; /* ptr to the root dir ocfs_inode */ + ocfs_vol_layout vol_layout; + ocfs_vol_node_map vol_node_map; + ocfs_node_config_info *node_cfg_info[OCFS_MAXIMUM_NODES]; + ub8 cfg_seq_num; + bool cfg_initialized; + ub4 num_cfg_nodes; + sb4 node_num; + ub1 hbm; + ub4 hbt; + ub8 log_disk_off; + ub8 log_meta_disk_off; + ub8 log_file_size; + ub4 sect_size; + bool needs_flush; + bool commit_cache_exec; + ocfs_sem map_lock; + ocfs_extent_map metadata_map; + ocfs_extent_map trans_map; + ocfs_alloc_bm cluster_bitmap; + ub4 max_dir_node_ent; + ocfs_vol_state vol_state; + sb8 curr_trans_id; + bool trans_in_progress; + ocfs_sem log_lock; + ocfs_sem recovery_lock; + ub4 node_recovering; +#ifdef PARANOID_LOCKS + ocfs_sem dir_alloc_lock; + ocfs_sem file_alloc_lock; +#endif /* PARANOID_LOCKS */ + ocfs_sem vol_alloc_lock; + struct timer_list lock_timer; + bool lock_stop; + wait_queue_head_t lock_event; + bool cache_fs; +}; + +typedef struct _ocfs_comm_info +{ + ub4 addr; + ub4 mask; + ub4 type; + ub4 active; + ub4 port; + ub4 valid; +} +ocfs_comm_info; + +typedef struct _ocfs_global_ctxt +{ + ocfs_obj_id obj_id; + ocfs_sem res; +#if !defined(DLM_THREAD_PER_VOLUME) + ocfs_sem dlm_thread_mon; +#endif + struct list_head osb_next; /* List of all volumes */ + kmem_cache_t *oin_cache; + kmem_cache_t *ofile_cache; + kmem_cache_t *fe_cache; + kmem_cache_t *lockres_cache; + ub4 flags; + char *node_name; + char *cluster_name; + ocfs_comm_info comm_info[OCFS_MAX_IPC]; + ub4 num_ipc; + wait_queue_head_t flush_event; + ub1 hbm; +} +ocfs_global_ctxt; + +typedef struct _ocfs_io_runs +{ + ub8 disk_off; + ub4 offset; + ub4 byte_cnt; +} +ocfs_io_runs; + +#if defined(OCFS_MEM_DBG) +# define ocfs_malloc(Size) ocfs_linux_dbg_alloc(Size, __FILE__, __LINE__) +# define ocfs_free ocfs_linux_dbg_free +#elif !defined(OCFS_MEM_DBG) +# define ocfs_malloc(Size) kmalloc((size_t)(Size), GFP_NOFS) +# define ocfs_free kfree +#endif /* ! defined(OCFS_MEM_DBG) */ + +typedef struct _ocfs_ipc_ctxt +{ + ocfs_sem ipc_ctxt_res; + ub4 dlm_msg_size; + ub2 version; + bool init; + bool re_init; + wait_queue_head_t event[OCFS_MAX_IPC]; + struct socket *send_sock; + struct socket *recv_sock; + struct completion complete; + struct task_struct *task; +} +ocfs_ipc_ctxt; + +typedef enum _ocfs_protocol +{ + OCFS_TCP = 1, + OCFS_UDP +} +ocfs_protocol; + +extern ocfs_ipc_ctxt OcfsIpcCtxt; + +typedef struct _ocfs_ipc_dlm_config +{ + ub2 version; + ub4 msg_size; + ub4 num_recv_threads; +} +ocfs_ipc_dlm_config; + +/* +** Globals ... +*/ +extern ocfs_global_ctxt OcfsGlobalCtxt; + +#endif /* _OCFSDEF_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsdisk.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsdisk.h --- ocfs/fs/ocfs/Common/inc/ocfsdisk.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsdisk.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,226 @@ +/* + * ocfsdisk.h + * + * Defines disk-based structures + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +#ifndef _OCFSDISK_H_ +#define _OCFSDISK_H_ + +typedef struct _ocfs_alloc_ext +{ + ub8 file_off; /* Starting Cluster on disk */ + ub8 num_clusters; /* No of Clusters used by this alloc */ + ub8 disk_off; /* Physical Disk Offset */ +} +ocfs_alloc_ext; + +typedef struct _ocfs_publish +{ + ub8 time; + bool vote; + bool dirty; + ub4 vote_type; + ub8 vote_map; + ub8 seq_num; + ub8 dir_ent; + ub1 hbm[OCFS_MAXIMUM_NODES]; + /* Might be useful to keep track of total number of files and a */ + /* bitmap reflecting used directory entries to optimise the query */ + /* directory etc here!!! */ +} +ocfs_publish; + +typedef struct _ocfs_vote +{ + ub1 vote[OCFS_MAXIMUM_NODES]; + ub8 seq_num; + ub8 dir_ent; + ub1 open_handle; + ub1 pad[7]; +} +ocfs_vote; + +typedef struct _ocfs_file_entry +{ + ocfs_disk_lock disk_lock; + ub1 signature[8]; + bool local_ext; /* Is FileEntry extents used or B+tree used */ + ub1 next_free_ext; + sb1 next_del; + sb4 granularity; /* Number of leaf levels */ + ub1 filename[OCFS_MAX_FILENAME_LENGTH]; + ub2 filename_len; + ub8 file_size; + ub8 alloc_size; /* Could be >= FileSize */ + ub8 create_time; + ub8 modify_time; /* Creation time or Last Modified time */ + ocfs_alloc_ext extents[OCFS_MAX_FILE_ENTRY_EXTENTS]; + ub8 dir_node_ptr; + ub8 this_sector; + ub8 last_ext_ptr; + ub4 sync_flags; /* Valid (bit 1), Change (bit 2), Dirty (bit 3) */ + ub4 link_cnt; + ub4 attribs; + ub4 prot_bits; + ub4 uid; + ub4 gid; + ub2 dev_major; + ub2 dev_minor; +} +ocfs_file_entry; + +/* not sizeof-safe across platforms */ +typedef struct _ocfs_index_node +{ + ub8 down_ptr; + ub8 file_ent_ptr; + ub1 name_len; + ub1 name[1]; +} +ocfs_index_node; + +typedef struct _ocfs_index_hdr +{ + ocfs_disk_lock disk_lock; + ub8 signature; + sb8 up_tree_ptr; /* Pointer to parent of this dnode */ + ub8 node_disk_off; + ub1 state; /* In recovery, needs recovery etc */ + ub1 pad[7]; + ub8 down_ptr; + ub1 num_ents;; /* Number of extents in this Node */ + ub1 depth; /* Depth of this Node from root of the btree */ + ub1 num_ent_used; /* Num of entries in the dir blk used up. */ + ub1 dir_node_flags; /* Flags */ + ub1 sync_flags; /* Flags */ + ub1 index[256]; + ub1 reserved[161]; + ub1 file_ent[1]; /* 63 entries here with 32K DIR_NODE size */ +} +ocfs_index_hdr; + +/* not sizeof-safe across platforms */ +typedef struct _ocfs_dir_node +{ + ocfs_disk_lock disk_lock; + ub1 signature[8]; + ub8 alloc_file_off; + ub4 alloc_node; + ub1 pad[4]; + ub8 free_node_ptr; + ub8 node_disk_off; + sb8 next_node_ptr; + sb8 indx_node_ptr; + sb8 next_del_ent_node; + sb8 head_del_ent_node; + ub1 first_del; + ub1 num_del; + ub1 num_ents; /* Number of extents in this Node */ + ub1 depth; /* Depth of Node from the root of te btree */ + ub1 num_ent_used; /* Number of entries in the dir blk used up. */ + ub1 dir_node_flags; /* Flags */ + ub1 sync_flags; /* Flags */ + ub1 index[256]; + ub1 index_dirty; + ub1 bad_off; + ub1 reserved[127]; + ub1 file_ent[1];; /* 63 entries here with 32K DIR_NODE size */ +} +ocfs_dir_node; + +/* Second sector on the volume contains this information */ +typedef struct _ocfs_disk_entry +{ + ub8 seq_num; + ub8 dir_ent; +} +ocfs_disk_entry; + +typedef struct _ocfs_vol_node_map +{ + ub8 time[OCFS_MAXIMUM_NODES]; + ub8 scan_time[OCFS_MAXIMUM_NODES]; + ub1 scan_rate[OCFS_MAXIMUM_NODES]; + ub1 exp_scan_rate[OCFS_MAXIMUM_NODES]; + ub8 exp_rate_chng_time[OCFS_MAXIMUM_NODES]; + ub4 miss_cnt[OCFS_MAXIMUM_NODES]; + ub8 largest_seq_num; +} +ocfs_vol_node_map; + +typedef struct _ocfs_vol_layout +{ + ub8 start_off; + ub4 num_nodes; + ub4 cluster_size; + ub1 mount_point[128]; + ub1 id[64]; + ub1 label[64]; + ub4 label_len; + ub1 pad[4]; + ub8 size; + ub8 root_start_off; + ub8 serial_num; + ub8 root_size; + ub8 publ_sect_off; + ub8 vote_sect_off; + ub8 root_bitmap_off; + ub8 root_bitmap_size; + ub8 data_start_off; + ub8 num_clusters; + ub8 root_int_off; + ub8 dir_node_size; + ub8 file_node_size; + ub8 bitmap_off; + ub8 node_cfg_off; + ub8 node_cfg_size; + ub8 new_cfg_off; + ub4 prot_bits; + ub4 uid; + ub4 gid; +} +ocfs_vol_layout; + +typedef struct _ocfs_extent_group +{ + ub1 signature[8]; + sb4 next_free_ext; /* 0 when init, -1 when full */ + ub4 curr_sect; /* Currently available sector for use */ + ub4 max_sects; /* Maximum Number of Sectors */ + ub4 type; /* Type of this sector... either */ + /* Actual Data or a Ptr to another location */ + sb4 granularity; /* Number of leaf levels */ + ub4 alloc_node; + ub8 this_ext; + ub8 next_data_ext; + ub8 alloc_file_off; + ub8 last_ext_ptr; + ub8 up_hdr_node_ptr; + ocfs_alloc_ext extents[OCFS_MAX_DATA_EXTENTS]; +} +ocfs_extent_group; + +#endif /*_OCFSDISK_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsdlm.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsdlm.h --- ocfs/fs/ocfs/Common/inc/ocfsdlm.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsdlm.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,137 @@ +/* + * ocfsdlm.h + * + * ipcdlm related structures + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSDLM_H_ +#define _OCFSDLM_H_ + +#define MAX_UDP_PACKETS (10) +#define OCFS_LOW_MARK_UDP (4) +#define OCFS_HIGH_MARK_UDP (7) + +typedef struct _ocfs_dlm_req_master +{ + ub8 lock_id; + ub4 flags; + ub8 lock_seq_num; +} +ocfs_dlm_req_master; + +typedef struct _ocfs_dlm_reply_master +{ + ub8 lock_id; + ub4 flags; + ub8 lock_seq_num; + ub4 status; +} +ocfs_dlm_reply_master; + +typedef struct _ocfs_dlm_disk_vote_req +{ + ub8 lock_id; + ub4 flags; + ub8 lock_seq_num; +} +ocfs_dlm_disk_vote_req; + +typedef struct _ocfs_dlm_disk_vote_reply +{ + ub8 lock_id; + ub4 flags; + ub8 lock_seq_num; + ub4 status; +} +ocfs_dlm_disk_vote_reply; + +typedef struct _ocfs_dlm_msg +{ + ub4 magic; + ub4 msg_len; + ub1 vol_id[64]; + ub4 src_node; + ub4 dst_node; + ub4 msg_type; + ub4 check_sum; + ub1 msg_buf[1]; +} +ocfs_dlm_msg; + +/* +** IPC related structs +*/ +typedef struct _ocfs_recv_context +{ + ocfs_dlm_msg *recv_packet[MAX_UDP_PACKETS]; + bool free[MAX_UDP_PACKETS]; + sb4 next_free; + sb4 num_used; + atomic_t num_posted; + ocfs_sem *free_lock; + wait_queue_head_t *event; +} +ocfs_recv_context; + +typedef struct _ocfs_recv_comp_context +{ + ocfs_recv_context *recv_ctxt; + sb4 index; + int status; + ub4 recvd_len; + struct tq_struct *work_item; +} +ocfs_recv_comp_context; + +#define OCFS_MAX_DLM_PKT_SIZE (256) +enum +{ + OCFS_REQUEST_MAKE_MASTER = 1, + OCFS_REPLY_MAKE_MASTER, + OCFS_DISK_VOTE_REQUEST, + OCFS_DISK_VOTE_REPLY +}; + +typedef struct _ocfs_free_buf_ctxt +{ + bool *free; + sb4 *num_used; + ocfs_sem *free_lock; +} +ocfs_free_buf_ctxt; + +typedef struct _ocfs_dlm_comm_work_item +{ + struct tq_struct *work_item; + ocfs_dlm_msg *recv_packet; + ocfs_free_buf_ctxt free_buf_ctxt; +} +ocfs_dlm_comm_work_item; + +#define OCFS_DLM_VOTE_OK 0 +#define OCFS_DLM_VOTE_OIN_ALREADY_INUSE 1 +#define OCFS_DLM_VOTE_UPDATE_RETRY 2 +#define OCFS_DLM_VOTE_FILE_DEL 3 +#define OCFS_DLM_MSG_MAGIC (0x79677083) + +#endif /* _OCFSDLM_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfserr.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfserr.h --- ocfs/fs/ocfs/Common/inc/ocfserr.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfserr.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,74 @@ +/**************************************************************************** + ** Oracle Cluster File System + ** + ** File: ocfserr.h + ** + ** Module: Oracle Cluster File System Driver + ** + ** Description: + ** + ** Author: Neeraj Goyal, Suchit Kaura + ** + ** MODIFIED MM/DD/YY + ** ======== ======== + ** smushran 08-21-01 - Portable + ** skaura 04-04-00 - Created + ***************************************************************************** + ** Copyright (c) 2000, 2001, 2002 by Oracle Corporation. All Rights Reserved. + ** This document contains proprietary information about Oracle Corporation. + ** It is provided under an agreement containing restrictions on use and + ** disclosure and is also protected by law. + ****************************************************************************/ + +#ifndef _OCFSERR_H_ +#define _OCFSERR_H_ + +/* +** Values are 32 bit values layed out as follows: +** +** 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 +** 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 +** +---+-+-+-----------------------+-------------------------------+ +** |Sev|C|R| Facility | Code | +** +---+-+-+-----------------------+-------------------------------+ +** +** where +** +** Sev - is the severity code +** +** 00 - Success +** 01 - Informational +** 10 - Warning +** 11 - Error +** +** C - is the Customer code flag +** +** R - is a reserved bit +** +** Facility - is the facility code +** +** Code - is the facility's status code +** +** +** Define the facility codes +*/ + +/* +** Define the severity codes +*/ +#define STATUS_SEVERITY_WARNING 0x2 +#define STATUS_SEVERITY_SUCCESS 0x0 +#define STATUS_SEVERITY_INFORMATIONAL 0x1 +#define STATUS_SEVERITY_ERROR 0x3 + +/* +** MessageId: OCFS_ERROR_INTERNAL_ERROR +** +** MessageText: +** +** The Sample FSD encountered an internal error. +** Please check log data information. +*/ +#define OCFS_ERROR_INTERNAL_ERROR ((ub4)0xE004A001L) + +#endif /* _OCFSERR_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsgenalloc.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgenalloc.h --- ocfs/fs/ocfs/Common/inc/ocfsgenalloc.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgenalloc.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,111 @@ +/* + * ocfsgenalloc.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSGENALLOC_H_ +#define _OCFSGENALLOC_H_ + +#define ocfs_get_file_entry(x,y,z) ocfs_force_get_file_entry(x,y,z,false) +#define ocfs_put_file_entry(x,y) ocfs_force_put_file_entry(x,y,false) + +int ocfs_force_get_file_entry (ocfs_super * osb, ocfs_file_entry ** FileEntry, + ub8 DiskOffset, bool force); +int ocfs_force_put_file_entry (ocfs_super * osb, ocfs_file_entry * FileEntry, + bool force); + +int ocfs_read_file_entry (ocfs_super * osb, + ocfs_file_entry * FileEntry, ub8 DiskOffset); + +int ocfs_write_file_entry (ocfs_super * osb, ocfs_file_entry * FileEntry, ub8 Offset); + +void ocfs_remove_extent_map_entry (ocfs_super * osb, + ocfs_extent_map * Map, sb8 Vbo, ub4 ByteCount); + +int ocfs_allocate_new_data_node (ocfs_super * osb, + ocfs_file_entry * FileEntry, + ub8 actualDiskOffset, + ub8 actualLength, + ocfs_extent_group * ExtentHeader, ub8 * NewExtentOffset); + +int ocfs_add_to_last_data_node (ocfs_super * osb, + ocfs_inode * oin, + ocfs_file_entry * FileEntry, + ub8 actualDiskOffset, + ub8 actualLength, ub4 * ExtentIndex, bool * IncreaseDepth); + +int ocfs_update_last_data_extent (ocfs_super * osb, + ocfs_file_entry * FileEntry, ub8 NextDataOffset); + +int ocfs_grow_extent_tree (ocfs_super * osb, + ocfs_file_entry * FileEntry, + ub8 actualDiskOffset, ub8 actualLength); + +int ocfs_allocate_extent (ocfs_super * osb, + ocfs_inode * oin, + ocfs_file_entry * FileEntry, + ub8 actualDiskOffset, ub8 actualLength); + +bool ocfs_check_for_extent_merge (ocfs_alloc_ext * LastExtent, ub8 ActualDiskOffset); + +bool ocfs_get_next_extent_map_entry (ocfs_super * osb, + ocfs_extent_map * Map, + ub4 RunIndex, + sb8 * Vbo, sb8 * Lbo, ub4 * SectorCount); + +int ocfs_update_all_headers (ocfs_super * osb, + ocfs_extent_group * AllocExtent, ub8 FileSize); +int ocfs_free_extents_for_truncate (ocfs_super * osb, + ocfs_file_entry * FileEntry, + ocfs_inode * oin, sb4 LogNodeNum, ub8 FileSize); + +int ocfs_get_leaf_extent (ocfs_super * osb, + ocfs_file_entry * FileEntry, + sb8 Vbo, ocfs_extent_group * OcfsDataExtent); + +int ocfs_lookup_file_allocation (ocfs_super * osb, + ocfs_inode * oin, + sb8 Vbo, + sb8 * Lbo, + ub4 ByteCount, ub4 * NumIndex, void **Buffer); + +int ocfs_adjust_allocation (ocfs_io_runs ** IoRuns, ub4 * ioRunSize); + +bool ocfs_lookup_extent_map_entry (ocfs_super * osb, + ocfs_extent_map * Map, + sb8 Vbo, sb8 * Lbo, ub8 * SectorCount, ub4 * Index); + +bool ocfs_add_extent_map_entry (ocfs_super * osb, + ocfs_extent_map * Map, sb8 Vbo, sb8 Lbo, ub8 ByteCount); + +int ocfs_update_extent_map (ocfs_super * osb, + ocfs_extent_map * Map, + void *Buffer, + sb8 * localVbo, ub8 * remainingLength, ub4 Flag); + +int ocfs_extent_map_load (ocfs_super * osb, + ocfs_extent_map * Map, + void **Buffer, sb8 Vbo, ub8 ByteCount, ub4 * RetRuns); + +#endif /* _OCFSGENALLOC_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsgenclose.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgenclose.h --- ocfs/fs/ocfs/Common/inc/ocfsgenclose.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgenclose.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,31 @@ +/* + * ocfsgenclose.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ +#ifndef _OCFSGENCLOSE_H_ +#define _OCFSGENCLOSE_H_ + +int ocfs_gen_close (ocfs_super * osb, ocfs_file * ofile); + +#endif /* _OCFSGENCLOSE_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsgencreate.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgencreate.h --- ocfs/fs/ocfs/Common/inc/ocfsgencreate.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgencreate.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,98 @@ +/* + * ocfsgencreate.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSGENCREATE_H_ +#define _OCFSGENCREATE_H_ + +int ocfs_verify_update_oin (ocfs_super * osb, ocfs_inode * oin); + +int ocfs_find_contiguous_space_from_bitmap (ocfs_super * osb, + ub8 file_size, + ub8 * cluster_off, ub8 * cluster_count, bool sysfile); + +int ocfs_create_oin_from_entry (ocfs_super * osb, + ocfs_file_entry * fe, + ocfs_inode ** new_oin, + ub8 parent_dir_off, ocfs_inode * parent_oin); + +int ocfs_find_files_on_disk (ocfs_super * osb, + ub8 parent_off, + struct qstr * file_name, + ocfs_file_entry * fe, ocfs_file * ofile); + +void ocfs_initialize_dir_node (ocfs_super * osb, + ocfs_dir_node * dir_node, + ub8 bitmap_off, ub8 file_off, ub4 node); + +int ocfs_delete_file_entry (ocfs_super * osb, + ocfs_file_entry * fe, ub8 parent_off, sb4 log_node_num); + +int ocfs_rename_file (ocfs_super * osb, + ub8 parent_off, struct qstr * file_name, ub8 file_off); + +int ocfs_del_file (ocfs_super * osb, ub8 parent_off, ub4 flags, ub8 file_off); + +int ocfs_extend_file (ocfs_super * osb, ub8 parent_off, + ocfs_inode * oin, ub8 file_size, ub8 * file_off); + +int ocfs_change_file_size (ocfs_super * osb, + ub8 parent_off, + ocfs_inode * oin, + ub8 file_size, ub8 * file_off, struct iattr *attr); + +int ocfs_create_directory (ocfs_super * osb, ub8 parent_off, ocfs_file_entry * fe); + +int ocfs_create_file (ocfs_super * osb, ub8 parent_off, ocfs_file_entry * fe); + +int ocfs_create_modify_file (ocfs_super * osb, + ub8 parent_off, + ocfs_inode * oin, + struct qstr * file_name, + ub8 file_size, + ub8 * file_off, ub4 flags, ocfs_file_entry * fe, struct iattr *attr); + +int ocfs_initialize_oin (ocfs_inode * oin, + ocfs_super * osb, + ub4 flags, struct file *file_obj, ub8 file_off, ub8 lock_id); + +int ocfs_create_delete_cdsl (struct inode *inode, + struct file *filp, ocfs_super * osb, ocfs_cdsl * cdsl); + +int ocfs_find_create_cdsl (ocfs_super * osb, ocfs_file_entry * fe); + +int ocfs_update_file_entry_slot (ocfs_super * osb, ocfs_inode * oin, ocfs_rw_mode rw_mode); + +void ocfs_check_lock_state (ocfs_super * osb, ocfs_inode * oin); + +int ocfs_delete_cdsl (ocfs_super * osb, ub8 parent_off, ocfs_file_entry * fe); + +int ocfs_create_cdsl (ocfs_super * osb, ub8 parent_off, ocfs_file_entry * fe); + +int ocfs_change_to_cdsl (ocfs_super * osb, ub8 parent_off, ocfs_file_entry * fe); + +int ocfs_truncate_file (ocfs_super * osb, ocfs_inode * oin, ub8 file_size); + +#endif /* _OCFSGENCREATE_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsgendirnode.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgendirnode.h --- ocfs/fs/ocfs/Common/inc/ocfsgendirnode.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgendirnode.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,84 @@ +/* + * ocfsgendirnode.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSGENDIRNODE_H_ +#define _OCFSGENDIRNODE_H_ + +void ocfs_print_file_entry (ocfs_file_entry * fe); + +void ocfs_print_dir_node (ocfs_super * osb, ocfs_dir_node * DirNode); + +int ocfs_alloc_node_block (ocfs_super * osb, + ub8 FileSize, + ub8 * DiskOffset, + ub8 * file_off, ub8 * NumClusterAlloc, ub4 NodeNum, ub4 Type); + +int ocfs_free_vol_block (ocfs_super * osb, ocfs_free_log * FreeLog, ub4 NodeNum, ub4 Type); + +int ocfs_free_node_block (ocfs_super * osb, + ub8 file_off, ub8 Length, ub4 NodeNum, ub4 Type); + +int ocfs_free_directory_block (ocfs_super * osb, ocfs_file_entry * fe, sb4 LogNodeNum); + +int ocfs_recover_dir_node (ocfs_super * osb, + ub8 OrigDirNodeOffset, ub8 SavedDirNodeOffset); + +int ocfs_read_dir_node (ocfs_super * osb, ocfs_dir_node * DirNode, ub8 NodeDiskOffset); + +int ocfs_write_force_dir_node (ocfs_super * osb, + ocfs_dir_node * DirNode, sb4 IndexFileEntry); + +int ocfs_write_dir_node (ocfs_super * osb, + ocfs_dir_node * DirNode, sb4 IndexFileEntry); + +bool ocfs_walk_dir_node (ocfs_super * osb, + ocfs_dir_node * DirNode, + ocfs_file_entry * found_fe, ocfs_file * OFile); + +bool ocfs_search_dir_node (ocfs_super * osb, + ocfs_dir_node * DirNode, + struct qstr * SearchName, + ocfs_file_entry * found_fe, ocfs_file * OFile); + +bool ocfs_find_index (ocfs_super * osb, + ocfs_dir_node * DirNode, struct qstr * FileName, int *Index); + +int ocfs_reindex_dir_node (ocfs_super * osb, ub8 DirNodeOffset, ocfs_dir_node * DirNode); + +int ocfs_insert_dir_node (ocfs_super * osb, + ocfs_dir_node * DirNode, + ocfs_file_entry * InsertEntry, + ocfs_dir_node * LockNode, sb4 * IndexOffset); + +int ocfs_del_file_entry (ocfs_super * osb, + ocfs_file_entry * EntryToDel, ocfs_dir_node * LockNode); + +int ocfs_insert_file (ocfs_super * osb, + ocfs_dir_node * DirNode, + ocfs_file_entry * InsertEntry, + ocfs_dir_node * LockNode, ocfs_lock_res * LockResource); + +#endif /* _OCFSGENDIRNODE_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsgendlm.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgendlm.h --- ocfs/fs/ocfs/Common/inc/ocfsgendlm.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgendlm.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,150 @@ +/* + * ocfsgendlm.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSGENDLM_H_ +#define _OCFSGENDLM_H_ + +typedef struct _ocfs_offset_map +{ + ub4 length; + ub8 log_disk_off; + ub8 actual_disk_off; +} +ocfs_offset_map; + +int ocfs_insert_cache_link (ocfs_super * osb, ocfs_lock_res * LockResource); + +int ocfs_update_lock_state (ocfs_super * osb, ocfs_lock_res * LockResource, ub4 Flags); + +int ocfs_disk_request_vote (ocfs_super * osb, + ub8 LockId, + ub4 LockType, ub4 Flags, ub8 VoteMap, ub8 * LockSeqNo); + +int ocfs_wait_for_disk_lock_release (ocfs_super * osb, + ub8 Offset, ub4 TimeToWait, ub4 LockType); + +int ocfs_wait_for_lock_release (ocfs_super * osb, + ub8 Offset, + ub4 TimeToWait, ocfs_lock_res * LockResource, ub4 LockType); + +int ocfs_get_vote_on_disk (ocfs_super * osb, + ub8 LockId, + ub4 LockType, + ub4 Flags, + ub8 * GotVoteMap, + ub8 VoteMap, ub8 LockSeqNum, ub8 * oin_open_map); + +int ocfs_disk_reset_voting (ocfs_super * osb, ub8 LockId, ub4 LockType); + +int ocfs_wait_for_vote (ocfs_super * osb, + ub8 LockId, + ub4 LockType, + ub4 Flags, + ub8 VoteMap, + ub4 TimeToWait, ub8 LockSeqNum, ocfs_lock_res * LockResource); + +int ocfs_prime_voting (ocfs_super * osb, ub8 LockId, ub4 LockType, ub8 VoteMap); /* empty */ + +int ocfs_reset_voting (ocfs_super * osb, ub8 LockId, ub4 LockType, ub8 VoteMap); + +int ocfs_request_vote (ocfs_super * osb, + ub8 LockId, ub4 LockType, ub4 Flags, ub8 VoteMap, ub8 * LockSeqNo); + +int ocfs_comm_request_vote (ocfs_super * osb, + ub8 LockId, + ub4 LockType, ub4 Flags, ocfs_file_entry * FileEntry); + +void ocfs_init_dlm_msg (ocfs_super * osb, ocfs_dlm_msg * DlmMesg, ub4 MsgSize); + +int ocfs_send_dlm_request_msg (ocfs_super * osb, + ub8 LockId, + ub4 LockType, + ub4 Flags, + ocfs_lock_res * LockResource, ub8 VoteMap, ub4 MesgType); + +int ocfs_comm_make_lock_master (ocfs_super * osb, + ub8 LockId, + ub4 LockType, + ub4 Flags, + ocfs_lock_res * LockResource, + ocfs_file_entry * FileEntry, ub8 VoteMap); + +int ocfs_make_lock_master (ocfs_super * osb, + ub8 LockId, + ub4 LockType, + ub4 Flags, + ocfs_lock_res * LockResource, ocfs_file_entry * FileEntry); + +void ocfs_acquire_lockres (ocfs_lock_res * LockResource); + +void ocfs_release_lockres (ocfs_lock_res * LockResource); + +int ocfs_update_disk_lock (ocfs_super * osb, + ocfs_lock_res * LockResource, + ub4 Flags, ocfs_file_entry * FileEntry); + +int ocfs_update_master_on_open (ocfs_super * osb, ocfs_lock_res * lockResource); + +void ocfs_init_lockres (ocfs_super * osb, ocfs_lock_res * lockResource, ub8 LockId); + +int ocfs_create_update_lock (ocfs_super * osb, ocfs_inode * oin, ub8 LockId, ub4 Flags); + +int ocfs_get_x_for_del (ocfs_super * osb, + ub8 LockId, + ub4 LockType, + ub4 Flags, + ocfs_lock_res * lockResource, ocfs_file_entry * FileEntry); + +int ocfs_acquire_lock (ocfs_super * osb, + ub8 LockId, + ub4 LockType, + ub4 Flags, + ocfs_lock_res ** LockResource, ocfs_file_entry * LockSector); + +int ocfs_disk_release_lock (ocfs_super * osb, + ub8 LockId, + ub4 LockType, ub4 Flags, ocfs_lock_res * LockResource); + +int ocfs_release_lock (ocfs_super * osb, + ub8 LockId, + ub4 LockType, ub4 Flags, ocfs_lock_res * LockResource); + +int ocfs_init_dlm (void); + +int ocfs_add_lock_to_recovery (void); /* unused */ + +int ocfs_create_log_extent_map (ocfs_super * osb, /* unused */ + ocfs_io_runs ** TransRuns, + ub4 * PNumTransRuns, ub8 diskOffset, ub8 ByteCount); + +int ocfs_lookup_cache_link (ocfs_super * osb, + ub1 * Buffer, ub8 ActualDiskOffset, ub8 Length); + +int ocfs_process_log_file (ocfs_super * osb, bool Flag); + +int ocfs_break_cache_lock (ocfs_super * osb, ocfs_lock_res * LockRes); + +#endif /* _OCFSGENDLM_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsgeninit.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgeninit.h --- ocfs/fs/ocfs/Common/inc/ocfsgeninit.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgeninit.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,32 @@ +/* + * ocfsgeninit.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSGENINIT_H_ +#define _OCFSGENINIT_H_ + +int ocfs_driver_init (void); + +#endif /* _OCFSGENINIT_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsgenmisc.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgenmisc.h --- ocfs/fs/ocfs/Common/inc/ocfsgenmisc.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgenmisc.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,70 @@ +/* + * ocfsgenmisc.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSGENMISC_H_ +#define _OCFSGENMISC_H_ + +void ocfs_delete_name (ocfs_inode * oin); + +int ocfs_create_meta_log_files (ocfs_super * osb); + +int ocfs_create_new_oin (ocfs_inode ** Returnedoin, + ub8 * AllocationSize, + ub8 * EndOfFile, struct file *FileObject, ocfs_super * osb); + +int ocfs_create_root_dir_node (ocfs_super * osb); + +int ocfs_create_root_oin (ocfs_super * osb); + +ocfs_file *ocfs_allocate_ofile (void); + +ocfs_inode *ocfs_allocate_oin (void); + +ocfs_file_entry *ocfs_allocate_file_entry (void); + +void ocfs_release_file_entry (ocfs_file_entry * FileEntry); + +void ocfs_release_ofile (ocfs_file * OFile); + +void ocfs_delete_all_extent_maps (ocfs_inode * oin); + +void ocfs_release_oin (ocfs_inode * oin, bool FreeMemory); + +int ocfs_initialize_osb (ocfs_super * osb, + ocfs_vol_disk_hdr * VolDiskHdr, + ocfs_vol_label * VolLabel, ub4 SectorSize); + +int ocfs_verify_volume (ocfs_vol_disk_hdr * VolDiskHdr); + +int ocfs_vol_member_reconfig (ocfs_super * osb); + +int ocfs_check_volume (ocfs_super * osb); + +void ocfs_delete_osb (ocfs_super * osb); + +int ocfs_commit_cache (ocfs_super * osb, bool Flag); + +#endif /* _OCFSGENMISC_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsgennm.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgennm.h --- ocfs/fs/ocfs/Common/inc/ocfsgennm.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgennm.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,63 @@ +/* + * ocfsgennm.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSGENNM_H_ +#define _OCFSGENNM_H_ + +int ocfs_flush_data (ocfs_inode * oin); + +void ocfs_update_publish_map (ocfs_super * osb, void *buffer, bool first_time); + +int ocfs_nm_heart_beat (ocfs_super * osb, ub4 ChannelFlag, ub1 Operation); + +int ocfs_polling_thread (void *unused); + +int ocfs_nm_join_cluster (ocfs_super * osb); /* unused */ + +int ocfs_disk_update_resource (ocfs_super * osb, + ocfs_lock_res * LockResource, ocfs_file_entry * FileEntry); + +int ocfs_find_update_res (ocfs_super * osb, + ub8 LockId, + ocfs_lock_res ** LockResource, + ocfs_file_entry * FileEntry, ub4 * Updated); + +int ocfs_vote_for_del_ren (ocfs_super * osb, + ocfs_publish * PublishToVote, + ub4 NodeAskingVote, + ocfs_vote * VoteSector, ocfs_lock_res ** lockResource); + +int ocfs_process_update_inode_request (ocfs_super * osb, + ocfs_vote * VoteSector, + ocfs_publish * PublishToVote, + ocfs_lock_res * lockResource, ub4 NodeAskingVote); + +int ocfs_process_vote (ocfs_super * osb, + ocfs_publish * PublishToVote, ub4 NodeAskingVote); + +int ocfs_nm_thread (ocfs_super * mount_osb); + +#endif /* _OCFSGENNM_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsgenshutdn.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgenshutdn.h --- ocfs/fs/ocfs/Common/inc/ocfsgenshutdn.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgenshutdn.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,34 @@ +/* + * ocfsgenshutdn.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSGENSHUTDN_H_ +#define _OCFSGENSHUTDN_H_ + +void ocfs_commit_at_shutdown (void); + +int ocfs_common_shutdown (void); + +#endif /* _OCFSGENSHUTDN_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsgensysfile.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgensysfile.h --- ocfs/fs/ocfs/Common/inc/ocfsgensysfile.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgensysfile.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,55 @@ +/* + * ocfsgensysfile.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSGENSYSFILE_H_ +#define _OCFSGENSYSFILE_H_ + +int ocfs_init_system_file (ocfs_super * osb, ub4 FileId); + +int ocfs_read_system_file (ocfs_super * osb, + ub4 FileId, void *Buffer, ub8 Length, ub8 Offset); + +int ocfs_write_system_file (ocfs_super * osb, + ub4 FileId, void *Buffer, ub8 Length, ub8 Offset); + +ub8 ocfs_file_to_disk_off (ocfs_super * osb, ub4 FileId, ub8 Offset); + +int ocfs_get_system_file_size (ocfs_super * osb, ub4 FileId, ub8 * Length, ub8 * AllocSize); + +int ocfs_extend_system_file (ocfs_super * osb, ub4 FileId, ub8 FileSize); + +int ocfs_find_extents_of_system_file (ocfs_super * osb, + ub8 file_off, + ub8 Length, + ocfs_file_entry * fe, void **Buffer, ub4 * NumEntries); + +int ocfs_free_file_extents (ocfs_super * osb, ocfs_file_entry * fe, sb4 LogNodeNum); + +#if !defined(DEBUGOCFS) +int ocfs_write_map_file (ocfs_super * osb); +#endif + +#endif /* _OCFSGENSYSFILE_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsgentrans.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgentrans.h --- ocfs/fs/ocfs/Common/inc/ocfsgentrans.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgentrans.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,51 @@ +/* + * ocfsgentrans.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSGENTRANS_H_ +#define _OCFSGENTRANS_H_ + +int ocfs_free_disk_bitmap (ocfs_super * osb, ocfs_cleanup_record * log_rec); + +int ocfs_process_record (ocfs_super * osb, void *buffer); + +int ocfs_process_log (ocfs_super * osb, ub8 trans_id, ub4 node_num, ub4 * type); + +int ocfs_start_trans (ocfs_super * osb); + +int ocfs_commit_trans (ocfs_super * osb, ub8 trans_id); + +int ocfs_abort_trans (ocfs_super * osb, ub8 trans_id); + +int ocfs_reset_publish (ocfs_super * osb, ub8 node_num); + +int ocfs_recover_vol (ocfs_super * osb, ub8 node_num); + +int ocfs_write_log (ocfs_super * osb, ocfs_log_record * log_rec, ub4 type); + +int ocfs_write_node_log (ocfs_super * osb, + ocfs_log_record * log_rec, ub4 node_num, ub4 type); + +#endif /* _OCFSGENTRANS_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsgenutil.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgenutil.h --- ocfs/fs/ocfs/Common/inc/ocfsgenutil.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgenutil.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,34 @@ +/* + * ocfsgenutil.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSGENUTIL_H_ +#define _OCFSGENUTIL_H_ + +void ocfs_debug_print (ub4 Context, ub4 Level, char *FormatStr, ...); + +int ocfs_compare_qstr (struct qstr * s1, struct qstr * s2); + +#endif /* _OCFSGENUTIL_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsgenvolcfg.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgenvolcfg.h --- ocfs/fs/ocfs/Common/inc/ocfsgenvolcfg.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgenvolcfg.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,68 @@ +/* + * ocfsgenvolcfg.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSGENVOLCFG_H_ +#define _OCFSGENVOLCFG_H_ + +typedef struct _ocfs_cfg_task +{ + struct tq_struct task; + ocfs_super *osb; + ub8 lock_off; + ub1 *buffer; +} +ocfs_cfg_task; + +void ocfs_worker (void *Arg); + +void ocfs_assert_lock_owned (ub4 Arg); + +int ocfs_add_to_disk_config (ocfs_super * osb, ocfs_disk_node_config_info * NodeCfgInfo); + +int ocfs_config_with_disk_lock (ocfs_super * osb, ub8 LockOffset, ub1 * Buffer); + +int ocfs_release_disk_lock (ocfs_super * osb, ub8 LockOffset); + +void ocfs_cfg_worker (ocfs_super * osb); + +int ocfs_add_upd_ipc_cfg (ocfs_node_config_info ** CfgInfo, + ocfs_disk_node_config_info * NodeCfgInfo); + +int ocfs_update_node_config (ocfs_super * osb); + +int ocfs_chk_update_config (ocfs_super * osb); + +int ocfs_add_node_to_config (ocfs_super * osb); + +int ocfs_get_config (ocfs_super * osb); + +bool ocfs_is_node_config_ok (ocfs_super * osb); + +int ocfs_refresh_node_config (ocfs_super * osb); + +void ocfs_show_all_node_cfgs (ocfs_super * osb); + +#endif /* _OCFSGENVOLCFG_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsgenvote.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgenvote.h --- ocfs/fs/ocfs/Common/inc/ocfsgenvote.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsgenvote.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,53 @@ +/* + * ocfsgenvote.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSGENVOTE_H_ +#define _OCFSGENVOTE_H_ + +int ocfs_send_vote_reply (ocfs_super * osb, + ocfs_dlm_msg * DlmMesg, ub4 status, bool HandleOpen); + +int ocfs_comm_vote_for_del_ren (ocfs_super * osb, + ocfs_lock_res ** LockResource, ocfs_dlm_msg * DlmMesg); + +int ocfs_find_lockres (ocfs_super * osb, ub8 LockId, ocfs_lock_res ** LockResource); + +bool ocfs_check_ipc_msg (ub1 * Mesg, ub4 Length); + +void ocfs_find_osb (sb1 * VolumeID, ocfs_super ** osb); + +int ocfs_find_create_lockres (ocfs_super * osb, + ub8 LockId, ocfs_lock_res ** LockResource); + +int ocfs_comm_process_vote (ocfs_super * osb, ocfs_dlm_msg * DlmMesg); + +int ocfs_comm_process_vote_reply (ocfs_super * osb, ocfs_dlm_msg * DlmMesg); + +void ocfs_dlm_recv_msg (void *Arg); + +int ocfs_comm_process_msg (ub1 * DlmMesg); + +#endif /* _OCFSGENVOTE_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfstrace.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfstrace.h --- ocfs/fs/ocfs/Common/inc/ocfstrace.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfstrace.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,252 @@ +/* + * ocfstrace.h + * + * Trace related macros + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSTRACE_H_ +#define _OCFSTRACE_H_ + +extern ub4 OcfsDebugCtxt; +extern ub4 OcfsDebugLevel; + +#define HI(val) ((ub4)((val) >> 32)) +#define LO(val) ((ub4)((val) & 0x00000000FFFFFFFFUL)) + +/* Tracing Levels */ +#define OCFS_DEBUG_LEVEL_ERROR 0x00000001 +#define OCFS_DEBUG_LEVEL_WARNING 0x00000002 +#define OCFS_DEBUG_LEVEL_TRACE 0x00000004 +#define OCFS_DEBUG_LEVEL_ENTRY 0x00000008 +#define OCFS_DEBUG_LEVEL_EXIT 0x00000010 +#define OCFS_DEBUG_LEVEL_VERBOSE 0x00000020 +#define OCFS_DEBUG_LEVEL_ALL 0xffffffff + +/* Tracing Contexts */ +#define OCFS_DEBUG_CONTEXT_INIT 0x00000001 /* ocfsgeninit.c,ocfsmain.c */ +#define OCFS_DEBUG_CONTEXT_MEM 0x00000002 /* ocfs_memcheck() in ocfsmain.c */ + +#define OCFS_DEBUG_CONTEXT_NM 0x00000010 /* ocfsgennm.c */ +#define OCFS_DEBUG_CONTEXT_DLM 0x00000020 /* ocfsgendlm.c */ +#define OCFS_DEBUG_CONTEXT_VOTE 0x00000040 /* ocfsgenvote.c */ +#define OCFS_DEBUG_CONTEXT_IPC 0x00000080 /* ocfsipc.c */ + +#define OCFS_DEBUG_CONTEXT_VOLCFG 0x00000100 /* ocfsgenvolcfg.c */ + +#define OCFS_DEBUG_CONTEXT_MOUNT 0x00001000 /* ocfsmount.c */ +#define OCFS_DEBUG_CONTEXT_SHUTDOWN 0x00002000 /* ocfsgenshutdn.c */ +#define OCFS_DEBUG_CONTEXT_CREATE 0x00004000 /* gencreate.c, create.c ?? */ +#define OCFS_DEBUG_CONTEXT_CLOSE 0x00008000 /* genclose.c, ocfsclose.c */ + +#define OCFS_DEBUG_CONTEXT_EXTENT 0x00010000 /* ocfsgenalloc.c */ +#define OCFS_DEBUG_CONTEXT_DIRINFO 0x00020000 /* ocfsgendirnode.c */ +#define OCFS_DEBUG_CONTEXT_FILEINFO 0x00040000 /* ocfsfile.c */ +#define OCFS_DEBUG_CONTEXT_TRANS 0x00080000 /* ocfsgentrans.c */ + +#define OCFS_DEBUG_CONTEXT_DISKIO 0x00100000 /* ocfsgenio.c */ +#define OCFS_DEBUG_CONTEXT_MISC 0x00200000 /* ocfsgenmisc.c */ + +#define OCFS_DEBUG_CONTEXT_UTIL 0x01000000 /* ocfsgenutil.c */ +#define OCFS_DEBUG_CONTEXT_HASH 0x02000000 /* ocfshash.h */ +#define OCFS_DEBUG_CONTEXT_PORT 0x08000000 /* ocfsport.c */ + +#define OCFS_DEBUG_CONTEXT_IOCTL 0x10000000 /* ocfsioctl.c */ +#define OCFS_DEBUG_CONTEXT_PROC 0x20000000 /* ocfsproc.c */ +#define OCFS_DEBUG_CONTEXT_IOSUP 0x40000000 /* ocfsiosup.c */ + +#define OCFS_DEBUG_CONTEXT_ALL 0xffffffff + +/* OcfsDebugDump */ +# define OcfsDebugDump(Context, Level, fmt, arg...) \ + do { \ + if ((OcfsDebugCtxt & Context) && (OcfsDebugLevel & Level)) \ + printk(fmt, ## arg); \ + } while(0) + +/* IF macro */ +#define IF_TRACE(func) \ + do { \ + if ((OcfsDebugCtxt & OCFS_DEBUG_CONTEXT) && \ + (OcfsDebugLevel & OCFS_DEBUG_LEVEL_TRACE)) \ + func; \ + } while(0) + +/* TRACE disabled. ERROR macros are never disabled. */ +#if !defined(TRACE) +# define LOG_ENTRY() +# define LOG_EXIT() +# define LOG_EXIT_STATUS(val) +# define LOG_EXIT_LONG(val) +# define LOG_EXIT_ULONG(val) +# define LOG_EXIT_PTR(val) +# define LOG_TRACE_STR(str) +# define LOG_ENTRY_ARGS(fmt, arg...) +# define LOG_EXIT_ARGS(fmt, arg...) +# define LOG_TRACE_ARGS(fmt, arg...) +#endif /* !defined(TRACE) */ + +/* TRACE enabled */ +#if defined(TRACE) +/* ENTRY macros */ + +/* LOG_ENTRY() + * + */ +# define LOG_ENTRY() \ + OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_ENTRY, \ + "(%d) [%u.%06u] ENTRY: %s()\n", ocfs_getpid(), \ + xtime.tv_sec, xtime.tv_usec, __FUNCTION__) + +/* LOG_ENTRY_ARGS() + * + * Note: The macro expects the args to be enclosed in parenthesis and + * terminated by a newline. + */ +# define LOG_ENTRY_ARGS(fmt, arg...) \ + do { \ + OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_ENTRY, \ + "(%d) [%u.%06u] ENTRY: %s", ocfs_getpid(), \ + xtime.tv_sec, xtime.tv_usec, __FUNCTION__); \ + OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_ENTRY, \ + fmt, ##arg); \ + } while(0) + +/* EXIT macros */ + +/* LOG_EXIT() + * + */ +# define LOG_EXIT() \ + OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_EXIT, \ + "(%d) [%u.%06u] EXIT : %s()\n", ocfs_getpid(), \ + xtime.tv_sec, xtime.tv_usec, __FUNCTION__) + +/* LOG_EXIT_STATUS() + * + */ +# define LOG_EXIT_STATUS(val) \ + OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_EXIT, \ + "(%d) [%u.%06u] EXIT : %s() = 0x%08x\n", \ + ocfs_getpid(), xtime.tv_sec, xtime.tv_usec, \ + __FUNCTION__, val) + +/* LOG_EXIT_LONG() + * + */ +# define LOG_EXIT_LONG(val) \ + OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_EXIT, \ + "(%d) [%u.%06u] EXIT : %s() = %d\n", \ + ocfs_getpid(), xtime.tv_sec, xtime.tv_usec, \ + __FUNCTION__, val) + +/* LOG_EXIT_ULONG() + * + */ +# define LOG_EXIT_ULONG(val) \ + OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_EXIT, \ + "(%d) [%u.%06u] EXIT : %s() = %u\n", \ + ocfs_getpid(), xtime.tv_sec, xtime.tv_usec, \ + __FUNCTION__, val) + +/* LOG_EXIT_PTR() + * + */ +# define LOG_EXIT_PTR(val) \ + OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_EXIT, \ + "(%d) [%u.%06u] EXIT : %s() = 0x%08x\n", \ + ocfs_getpid(), xtime.tv_sec, xtime.tv_usec, \ + __FUNCTION__, val) + +/* LOG_EXIT_ARGS() + * + * Note: The macro expects the args to be enclosed in parenthesis and + * terminated by a newline. + */ +# define LOG_EXIT_ARGS(fmt, arg...) \ + do { \ + OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_EXIT, \ + "(%d) [%u.%06u] EXIT : %s(), ", \ + ocfs_getpid(), xtime.tv_sec, xtime.tv_usec, \ + __FUNCTION__); \ + OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_EXIT, \ + fmt, ## arg); \ + } while(0) +#endif /* TRACE */ + +/* ERROR macros are not compiled out */ + +/* LOG_ERROR_STR() + * + */ +#define LOG_ERROR_STR(str) \ + printk("(%d) ERROR: %s, %s(), %s, %d\n", ocfs_getpid(), \ + str, __FUNCTION__, __FILE__, __LINE__) + +/* LOG_ERROR_ARGS() + * + * Note: The macro expects the args to be enclosed in parenthesis and + * terminated by a newline. + */ +#define LOG_ERROR_ARGS(fmt, arg...) \ + do { \ + printk("(%d) ERROR: %s(), %s, %d, ", ocfs_getpid(), \ + __FUNCTION__, __FILE__, __LINE__); \ + printk(fmt, ## arg); \ + } while(0) + +/* LOG_ERROR_STATUS() + * + */ +#define LOG_ERROR_STATUS(status) \ + printk("(%d) ERROR: status = 0x%08x, %s(), %s, %d\n", \ + ocfs_getpid(), status, __FUNCTION__, __FILE__, __LINE__) + +/* TRACE enabled */ +#if defined(TRACE) +/* TRACE macros */ + +/* LOG_TRACE_STR() + * + */ +# define LOG_TRACE_STR(str) \ + OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_TRACE, \ + "(%d) TRACE: %s() %s\n", ocfs_getpid(), \ + __FUNCTION__, str) + + +/* LOG_TRACE_ARGS() + * + * Note: The macro expects the args to be enclosed in parenthesis and + * terminated by a newline. + */ +# define LOG_TRACE_ARGS(fmt, arg...) \ + do { \ + OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_TRACE, \ + "(%d) TRACE: %s() ", ocfs_getpid(), \ + __FUNCTION__); \ + OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_TRACE, \ + fmt, ## arg); \ + } while(0) +#endif /* TRACE */ + +#endif /* _OCFSTRACE_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfstrans.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfstrans.h --- ocfs/fs/ocfs/Common/inc/ocfstrans.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfstrans.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,168 @@ +/* + * ocfstrans.h + * + * Logging and recovery related structures + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSTRANS_H_ +#define _OCFSTRANS_H_ + +#define LOG_TYPE_DISK_ALLOC 1 +#define LOG_TYPE_DIR_NODE 2 +#define LOG_TYPE_RECOVERY 3 +#define LOG_CLEANUP_LOCK 4 +#define LOG_TYPE_TRANS_START 5 +#define LOG_TYPE_TRANS_END 6 +#define LOG_RELEASE_BDCAST_LOCK 7 +#define LOG_DELETE_ENTRY 8 +#define LOG_MARK_DELETE_ENTRY 9 +#define LOG_FREE_BITMAP 10 +#define LOG_UPDATE_EXTENT 11 +#define LOG_DELETE_NEW_ENTRY 12 + +typedef struct _ocfs_free_bitmap +{ + ub8 length; + ub8 file_off; + ub4 type; + ub4 node_num; +} +ocfs_free_bitmap; + +typedef struct _ocfs_free_extent_log +{ + ub4 index; + ub1 pad[4]; + ub8 disk_off; +} +ocfs_free_extent_log; + +#define FREE_LOG_SIZE 150 + +typedef struct _ocfs_free_log +{ + ub4 num_free_upds; + ub1 pad[4]; + ocfs_free_bitmap free_bitmap[FREE_LOG_SIZE]; +} +ocfs_free_log; + +typedef struct _ocfs_delete_log +{ + ub8 node_num; + ub8 ent_del; + ub8 parent_dirnode_off; + ub4 flags; + ub1 pad[4]; +} +ocfs_delete_log; + +typedef struct _ocfs_recovery_log +{ + ub8 node_num; +} +ocfs_recovery_log; + +#define DISK_ALLOC_DIR_NODE 1 +#define DISK_ALLOC_EXTENT_NODE 2 +#define DISK_ALLOC_VOLUME 3 + +typedef struct _ocfs_alloc_log +{ + ub8 length; + ub8 file_off; + ub4 type; + ub4 node_num; +} +ocfs_alloc_log; + +typedef struct _ocfs_dir_log +{ + ub8 orig_off; + ub8 saved_off; + ub8 length; +} +ocfs_dir_log; + +typedef struct _ocfs_lock_update +{ + ub8 orig_off; + ub8 new_off; +} +ocfs_lock_update; + +#define LOCK_UPDATE_LOG_SIZE 450 + +typedef struct _ocfs_lock_log +{ + ub4 num_lock_upds; + ub1 pad[4]; + ocfs_lock_update lock_upd[LOCK_UPDATE_LOG_SIZE]; +} +ocfs_lock_log; + +typedef struct _ocfs_bcast_rel_log +{ + ub8 lock_id; +} +ocfs_bcast_rel_log; + +typedef struct _ocfs_cleanup_record +{ + ub8 log_id; + ub4 log_type; + ub1 pad[4]; + union + { + ocfs_lock_log lock; + ocfs_alloc_log alloc; + ocfs_bcast_rel_log bcast; + ocfs_delete_log del; + ocfs_free_log free; + } + rec; +} +ocfs_cleanup_record; + +typedef struct _ocfs_log_record +{ + ub8 log_id; + ub4 log_type; + ub1 pad[4]; + union + { + ocfs_dir_log dir; + ocfs_alloc_log alloc; + ocfs_recovery_log recovery; + ocfs_bcast_rel_log bcast; + ocfs_delete_log del; + ocfs_free_extent_log extent; + } + rec; +} +ocfs_log_record; + +#define LOG_RECOVER 1 +#define LOG_CLEANUP 2 + +#endif /* _OCFSTRANS_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsver.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsver.h --- ocfs/fs/ocfs/Common/inc/ocfsver.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsver.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,32 @@ +/* + * ocfsver.h + * + * version printing function + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSVER_H_ +#define _OCFSVER_H_ + +void ocfs_version_print (void); + +#endif /* _OCFSVER_H_ */ diff -urNp ocfs/fs/ocfs/Common/inc/ocfsvol.h 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsvol.h --- ocfs/fs/ocfs/Common/inc/ocfsvol.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/inc/ocfsvol.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,190 @@ +/* + * ocfsvol.h + * + * On-disk structures. See format.h for disk layout. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSVOL_H_ +#define _OCFSVOL_H_ + +#define OCFS_MINOR_VERSION (2) +#define OCFS_MAJOR_VERSION (1) +#define OCFS_MINOR_VER_STRING "2" +#define OCFS_MAJOR_VER_STRING "1" + +#define OCFS_VOLUME_SIGNATURE "OracleCFS" + +typedef struct _ocfs_vol_disk_hdr +{ + ub4 minor_version; + ub4 major_version; + ub1 signature[128]; /* OracleCFS */ + ub1 mount_point[128]; /* Mount point.... for e.g., /mnt/vol1 */ + ub8 serial_num; + ub8 device_size; /* Size of the device in bytes */ + ub8 start_off; /* Start of the volume... typically 0 */ + ub8 bitmap_off; /* Offset to Volume Bitmap... */ + ub8 publ_off; /* Offset to the Publish Sector */ + ub8 vote_off; /* Offset to the Vote Sector */ + ub8 root_bitmap_off; + ub8 data_start_off; + ub8 root_bitmap_size; + ub8 root_off; + ub8 root_size; + ub8 cluster_size; /* Cluster size as specified during format */ + ub8 num_nodes; /* Max number of nodes.... OCFS_MAXIMUM_NODES */ + ub8 num_clusters; /* Number of free clusters at format */ + ub8 dir_node_size; /* OCFS_DEFAULT_DIR_NODE_SIZE */ + ub8 file_node_size; /* OCFS_DEFAULT_FILE_NODE_SIZE */ + ub8 internal_off; + ub8 node_cfg_off; /* Offset to Node Config */ + ub8 node_cfg_size; /* Size of Node Config */ + ub8 new_cfg_off; /* Offset to Node Config Lock */ + ub4 prot_bits; + ub4 uid; + ub4 gid; + sb4 excl_mount; +} +ocfs_vol_disk_hdr; + +#define DLOCK_FLAG_OPEN_MAP (0x1) +#define DLOCK_FLAG_LOCK (0x2) +#define DLOCK_FLAG_SEQ_NUM (0x4) +#define DLOCK_FLAG_MASTER (0x8) +#define DLOCK_FLAG_LAST_UPDATE (0x10) +#define DLOCK_FLAG_ALL (DLOCK_FLAG_OPEN_MAP | DLOCK_FLAG_LOCK | \ + DLOCK_FLAG_SEQ_NUM | DLOCK_FLAG_MASTER | \ + DLOCK_FLAG_LAST_UPDATE) + +typedef struct _ocfs_disk_lock +{ + sb4 curr_master; /* INVALID_MASTER -1, */ + ub1 file_lock; + ub8 last_write_time; + ub8 last_read_time; + sb4 writer_node_num; + sb4 reader_node_num; + ub8 oin_node_map; + ub8 seq_num; +} +ocfs_disk_lock; + +#define DISK_LOCK_CURRENT_MASTER(x) ( ((ocfs_disk_lock *)x)->curr_master ) +#define DISK_LOCK_OIN_MAP(x) ( ((ocfs_disk_lock *)x)->oin_node_map ) +#define DISK_LOCK_FILE_LOCK(x) ( ((ocfs_disk_lock *)x)->file_lock ) +#define DISK_LOCK_LAST_READ(x) ( ((ocfs_disk_lock *)x)->last_read_time ) +#define DISK_LOCK_LAST_WRITE(x) ( ((ocfs_disk_lock *)x)->last_write_time ) +#define DISK_LOCK_READER_NODE(x) ( ((ocfs_disk_lock *)x)->reader_node_num ) +#define DISK_LOCK_SEQNUM(x) ( ((ocfs_disk_lock *)x)->seq_num ) +#define DISK_LOCK_WRITER_NODE(x) ( ((ocfs_disk_lock *)x)->writer_node_num ) + +#define MAX_VOL_ID_LENGTH 64 + +typedef struct _ocfs_vol_label +{ + ocfs_disk_lock disk_lock; + ub1 label[64]; + ub2 label_len; + ub1 id[MAX_VOL_ID_LENGTH]; + ub2 id_len; + unsigned char cluster_name[64]; + ub2 cluster_name_len; +} +ocfs_vol_label; + +#define OCFS_IPC_NOT_CONFIG (0x0) +#define OCFS_IPC_STATE_CONFIG (0x1) +#define OCFS_IPC_STATE_ACTIVE (0x2) +#define OCFS_IPC_STATE_INACTIVE (0x4) +#define OCFS_IPC_STATE_PRIMARY (0x8) + +#define OCFS_IPC_DEFAULT_PORT 7001 + +typedef struct _ocfs_ipc_config_info +{ + ub4 addr; + ub4 port; + ub4 mask; + ub1 state; + ub1 type; + ub1 active; + ub1 pad[1]; +} +ocfs_ipc_config_info; + +#define OCFS_IPC_DLM_VERSION 0x0201 +#define OCFS_DLM_MAX_MSG_SIZE (256) + +#define OCFS_NODE_NOT_CONFIG 0 +#define OCFS_NODE_STATE_CONFIG 1 + +#define OCFS_MAX_IPC 3 +#define MAX_NODE_NAME_LENGTH 32 + +typedef struct _ocfs_disk_node_config_info +{ + ocfs_disk_lock disk_lock; + char node_name[MAX_NODE_NAME_LENGTH + 1]; + ocfs_ipc_config_info ipc_config[OCFS_MAX_IPC]; + ub1 num_interfaces; + ub1 primary_comm; + ub1 state; + ub1 pad[6]; +} +ocfs_disk_node_config_info; + +#define NODE_CONFIG_HDR_SIGN "NDCFG10" +#define NODE_CONFIG_SIGN_LEN 8 +#define NODE_CONFIG_VER 1 +#define NODE_MIN_SUPPORTED_VER 1 + +typedef struct _ocfs_node_config_hdr +{ + ocfs_disk_lock disk_lock; + char signature[NODE_CONFIG_SIGN_LEN]; + ub4 version; + ub4 num_nodes; + ub4 last_node; + ub8 seq_num; +} +ocfs_node_config_hdr; + +/* +** CDSL +*/ +#define OCFS_CDSL_CREATE (0x1) +#define OCFS_CDSL_DELETE (0x2) +#define OCFS_CDSL_REVERT (0x3) + +#define OCFS_FLAG_CDSL_FILE (0x1) +#define OCFS_FLAG_CDSL_DIR (0x2) + +typedef struct _ocfs_cdsl +{ + ub1 name[1024]; + ub4 flags; + ub4 operation; +} +ocfs_cdsl; + +#endif /* _OCFSVOL_H_ */ diff -urNp ocfs/fs/ocfs/Common/ocfsgenalloc.c 2.4.20pre5aa2/fs/ocfs/Common/ocfsgenalloc.c --- ocfs/fs/ocfs/Common/ocfsgenalloc.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/ocfsgenalloc.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,2229 @@ +/* + * ocfsgenalloc.c + * + * Allocate and free file system structures. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#ifndef DEBUGOCFS +bool Debug = false; +ub8 debugOffset = 0; + + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_EXTENT + + +/* + * ocfs_force_get_file_entry() + * + */ +int ocfs_force_get_file_entry (ocfs_super * osb, ocfs_file_entry ** FileEntry, + ub8 DiskOffset, bool force) +{ + if (!FileEntry) + return -EFAIL; + *FileEntry = ocfs_allocate_file_entry (); + if (!*FileEntry) + return -ENOMEM; + if (force) + return ocfs_read_force_disk (osb, *FileEntry, osb->sect_size, + DiskOffset); + else + return ocfs_read_file_entry (osb, *FileEntry, DiskOffset); +} /* ocfs_force_get_file_entry */ + + +/* + * ocfs_force_put_file_entry() + * + */ +int ocfs_force_put_file_entry (ocfs_super * osb, ocfs_file_entry * FileEntry, + bool force) +{ + if (!FileEntry) + return -EFAIL; + if (force) + return ocfs_write_force_disk (osb, FileEntry, osb->sect_size, + FileEntry->this_sector); + else + return ocfs_write_file_entry (osb, FileEntry, + FileEntry->this_sector); +} /* ocfs_force_put_file_entry */ + + +/* + * ocfs_read_file_entry() + * + * This function reads the File Entry from the disk. + * + * Returns 0 on success, < 0 on error + */ +int ocfs_read_file_entry (ocfs_super * osb, ocfs_file_entry * FileEntry, + ub8 DiskOffset) +{ + int status = 0; + + LOG_ENTRY_ARGS ("osb=%p, fileentry=%p, offset=%u.%u\n", osb, FileEntry, + HI (DiskOffset), LO (DiskOffset)); + + OCFS_ASSERT (FileEntry); + OCFS_ASSERT (osb); + + /* Size of File Entry is one sector */ + status = + ocfs_read_metadata (osb, FileEntry, (ub4) osb->sect_size, DiskOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + } + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_read_file_entry */ + + +/* + * ocfs_write_file_entry() + * + * This function writes the File Entry to the disk. + * + * Returns 0 on success, < 0 on error + */ +int ocfs_write_file_entry (ocfs_super * osb, ocfs_file_entry * FileEntry, ub8 Offset) +{ + int status = 0; + + LOG_ENTRY (); + + OCFS_ASSERT (FileEntry); + OCFS_ASSERT (osb); + + LOG_TRACE_ARGS ("File offset on the disk is %u.%u\n", HI (Offset), + LO (Offset)); + + /* size of File Entry is one sector */ + if ((DISK_LOCK_FILE_LOCK (FileEntry) == OCFS_DLM_ENABLE_CACHE_LOCK) && + (DISK_LOCK_CURRENT_MASTER (FileEntry) == osb->node_num) && + (Offset >= osb->vol_layout.bitmap_off)) { + status = + ocfs_write_metadata (osb, FileEntry, (ub4) osb->sect_size, + Offset); + } else { + status = + ocfs_write_disk (osb, FileEntry, (ub4) osb->sect_size, + Offset); + } + + if (status < 0) { + LOG_ERROR_STATUS (status); + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_write_file_entry */ + + +/* + * ocfs_remove_extent_map_entry() + * + * Remove an entry from the extent map + */ +void ocfs_remove_extent_map_entry (ocfs_super * osb, + ocfs_extent_map * Map, sb8 Vbo, ub4 ByteCount) +{ + LOG_ENTRY (); + + if ((ByteCount) && (ByteCount != 0xFFFFFFFF)) { + ByteCount--; + ByteCount >>= OCFS_LOG_SECTOR_SIZE; + ByteCount++; + } + + Vbo >>= OCFS_LOG_SECTOR_SIZE; + + ocfs_extent_map_remove ((ocfs_extent_map *) Map, (sb8) Vbo, + (sb8) ByteCount); + + LOG_EXIT (); + return; +} /* ocfs_remove_extent_map_entry */ + +/* ocfs_allocate_new_data_node() + * + */ +int ocfs_allocate_new_data_node (ocfs_super * osb, + ocfs_file_entry * FileEntry, + ub8 actualDiskOffset, + ub8 actualLength, + ocfs_extent_group * ExtentHeader, ub8 * NewExtentOffset) +{ + int status = 0; + ub1 *tempBuf = NULL; + ub4 length; + ub4 k, i; + ub4 depth; + ocfs_extent_group *IterExtentHeader = NULL, *IterExtent; + + ub4 allocSize; + ub8 upHeaderPtr; + ub8 physicalOffset; + ub8 fileOffset = 0; + ub8 numSectorsAlloc = 0; + ub8 lastExtPointer; + + LOG_ENTRY (); + + if (ExtentHeader != NULL) { + allocSize = (ub4) ((NUM_SECTORS_IN_LEAF_NODE + + ExtentHeader->granularity) * + OCFS_SECTOR_SIZE); + + /* allocate contiguous blocks on disk */ + status = ocfs_alloc_node_block (osb, allocSize, &physicalOffset, + &fileOffset, (ub8 *) & numSectorsAlloc, + osb->node_num, DISK_ALLOC_EXTENT_NODE); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + if (fileOffset == 0) { + LOG_ERROR_ARGS ("File offset was 0 for file: %s\n", + FileEntry->filename); + } + + k = ExtentHeader->next_free_ext; + ExtentHeader->extents[k].file_off = FileEntry->alloc_size; + ExtentHeader->extents[k].num_clusters = actualLength; + ExtentHeader->extents[k].disk_off = physicalOffset; + ExtentHeader->next_free_ext++; + depth = ExtentHeader->granularity; + upHeaderPtr = ExtentHeader->this_ext; + } else { + allocSize = + ((NUM_SECTORS_IN_LEAF_NODE + + FileEntry->granularity) * OCFS_SECTOR_SIZE); + + /* Allocate contiguous blocks on disk */ + status = ocfs_alloc_node_block (osb, allocSize, &physicalOffset, + &fileOffset, (ub8 *) & numSectorsAlloc, + osb->node_num, DISK_ALLOC_EXTENT_NODE); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + if (fileOffset == 0) { + LOG_ERROR_ARGS ("File offset was 0 for file: %s\n", + FileEntry->filename); + } + + k = FileEntry->next_free_ext; + FileEntry->extents[k].file_off = FileEntry->alloc_size; + FileEntry->extents[k].num_clusters = actualLength; + FileEntry->extents[k].disk_off = physicalOffset; + FileEntry->next_free_ext++; + depth = FileEntry->granularity; + upHeaderPtr = FileEntry->this_sector; + } + + /* Common code between grow and this func. */ + + length = (ub4) OCFS_ALIGN (allocSize, PAGE_SIZE); + + tempBuf = ocfs_malloc (length); + if (tempBuf == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto finally; + } + + memset (tempBuf, 0, length); + + lastExtPointer = FileEntry->last_ext_ptr; + + /* Fill in all the headers on the way to the leaf node */ + for (i = 0; i < depth; i++) { + IterExtentHeader = (ocfs_extent_group *) + (tempBuf + (OCFS_SECTOR_SIZE * i)); + + IterExtentHeader->last_ext_ptr = lastExtPointer; + IterExtentHeader->up_hdr_node_ptr = upHeaderPtr; + + strcpy (IterExtentHeader->signature, + OCFS_EXTENT_HEADER_SIGNATURE); + + IterExtentHeader->type = OCFS_EXTENT_HEADER; + IterExtentHeader->granularity = (depth - 1 - i); + IterExtentHeader->extents[0].disk_off = + (ub8) (physicalOffset + (OCFS_SECTOR_SIZE * (i + 1))); + IterExtentHeader->extents[0].file_off = FileEntry->alloc_size; + IterExtentHeader->extents[0].num_clusters = actualLength; + IterExtentHeader->next_free_ext = 1; + IterExtentHeader->alloc_file_off = + fileOffset + (OCFS_SECTOR_SIZE * i); + IterExtentHeader->alloc_node = osb->node_num; + IterExtentHeader->this_ext = + (ub8) (physicalOffset + (OCFS_SECTOR_SIZE * i)); + + upHeaderPtr = IterExtentHeader->this_ext; + lastExtPointer = IterExtentHeader->this_ext; + } + + /* Fill in the leaf branch of the extent tree */ + IterExtent = (ocfs_extent_group *) + (tempBuf + (OCFS_SECTOR_SIZE * depth)); + IterExtent->this_ext = (ub8) (physicalOffset + + (OCFS_SECTOR_SIZE * depth)); + IterExtent->last_ext_ptr = lastExtPointer; + IterExtent->up_hdr_node_ptr = upHeaderPtr; + (*NewExtentOffset) = IterExtent->this_ext; + + if ((depth) && + (IterExtent->this_ext != IterExtentHeader->extents[0].disk_off)) { + OCFS_BREAKPOINT (); + } + + strcpy (IterExtent->signature, OCFS_EXTENT_DATA_SIGNATURE); + + IterExtent->extents[0].file_off = FileEntry->alloc_size; + IterExtent->extents[0].num_clusters = actualLength; + IterExtent->extents[0].disk_off = actualDiskOffset; + IterExtent->curr_sect = 1; + IterExtent->max_sects = NUM_SECTORS_IN_LEAF_NODE; + IterExtent->next_free_ext = 1; + IterExtent->type = OCFS_EXTENT_DATA; + IterExtent->alloc_file_off = fileOffset + (OCFS_SECTOR_SIZE * depth); + IterExtent->alloc_node = osb->node_num; + + FileEntry->last_ext_ptr = IterExtent->this_ext; + + /* Write the extents to disk */ + status = ocfs_write_disk (osb, tempBuf, allocSize, physicalOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + if (ExtentHeader != NULL) { + /* This has to be in the end... */ + status = ocfs_write_disk (osb, ExtentHeader, OCFS_SECTOR_SIZE, + ExtentHeader->this_ext); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + for (i = ExtentHeader->granularity + 1; + i < FileEntry->granularity; i++) { + status = + ocfs_read_sector (osb, ExtentHeader, + ExtentHeader->up_hdr_node_ptr); + if (status < 0) { + LOG_ERROR_STATUS (status); + status = -EINVAL; + goto finally; + } + + if (ExtentHeader->next_free_ext == 0) { + OCFS_BREAKPOINT (); + } + + k = ExtentHeader->next_free_ext - 1; + + ExtentHeader->extents[k].num_clusters += actualLength; + + status = ocfs_write_sector (osb, ExtentHeader, + ExtentHeader->this_ext); + if (status < 0) { + goto finally; + } + } + k = FileEntry->next_free_ext - 1; + FileEntry->extents[k].num_clusters += actualLength; + } + finally: + ocfs_safefree (tempBuf); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_allocate_new_data_node */ + +/* ocfs_add_to_last_data_node() + * + */ +int ocfs_add_to_last_data_node (ocfs_super * osb, + ocfs_inode * oin, + ocfs_file_entry * FileEntry, + ub8 actualDiskOffset, + ub8 actualLength, ub4 * ExtentIndex, bool * IncreaseDepth) +{ + int status = 0; + ub4 k = 0, i; + ub4 length; + ub1 *buffer = NULL; + ocfs_extent_group *OcfsExtent = NULL, *OcfsExtentHeader = NULL; + ocfs_extent_group *AllocExtentBuf = NULL; + bool UpdateParent = true; + ub8 newExtentOff; + + LOG_ENTRY (); + + *IncreaseDepth = false; + length = (ub4) OCFS_ALIGN (sizeof (ocfs_extent_group), PAGE_SIZE); + + buffer = ocfs_malloc (length); + if (buffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } + + status = ocfs_read_sector (osb, buffer, FileEntry->last_ext_ptr); + if (status < 0) { + LOG_ERROR_STATUS (status = -EINVAL); + goto bail; + } + + /* Verify that this is a leaf Node */ + OcfsExtent = (ocfs_extent_group *) buffer; + + if (!IS_VALID_EXTENT_DATA (OcfsExtent)) { + LOG_ERROR_STATUS(status = -EINVAL); + goto bail; + } + + /* Read the last enxtent and keep traversing upward till we find a */ + /* free extent or we are at the top and need to create another level. */ + if (OcfsExtent->next_free_ext > OCFS_MAX_DATA_EXTENTS) { + LOG_ERROR_STATUS(status = -EINVAL); + goto bail; + } + + k = OcfsExtent->next_free_ext - 1; + + LOG_TRACE_ARGS ("Using local_ext for extent Entry = %u\n", k); + + /* Check here a case where the new allocation can be joined with */ + /* the last Extent */ + if (OcfsExtent->next_free_ext >= 1) { + + if (ocfs_check_for_extent_merge + (&(OcfsExtent->extents[k]), actualDiskOffset)) { + /* We can join the extents, just increase the len of extent */ + OcfsExtent->extents[k].num_clusters += actualLength; + status = 0; + goto bail; + } + } else { + OCFS_BREAKPOINT (); + } + + /* We cannot merge give him the next extent */ + k = OcfsExtent->next_free_ext; + + if (k == OCFS_MAX_DATA_EXTENTS) { + ub8 up_hdr_node_ptr = 0; + + if (FileEntry->granularity == 0) { + if (FileEntry->next_free_ext == + OCFS_MAX_FILE_ENTRY_EXTENTS) { + (*IncreaseDepth) = true; + goto bail; + } else { + ocfs_allocate_new_data_node (osb, FileEntry, + actualDiskOffset, + actualLength, NULL, + &newExtentOff); + OcfsExtent->next_data_ext = newExtentOff; + UpdateParent = false; + FileEntry->last_ext_ptr = newExtentOff; + status = + ocfs_write_sector (osb, OcfsExtent, + OcfsExtent->this_ext); + if (status < 0) { + goto bail; + } + } + } else { + i = 0; + + length = + (ub4) OCFS_ALIGN (sizeof (ocfs_extent_group), + PAGE_SIZE); + AllocExtentBuf = OcfsExtentHeader = + ocfs_malloc (length); + if (OcfsExtentHeader == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto bail; + } + + up_hdr_node_ptr = OcfsExtent->up_hdr_node_ptr; + + for (i = 0; i < FileEntry->granularity; i++) { + memset (OcfsExtentHeader, 0, + sizeof (ocfs_extent_group)); + + status = + ocfs_read_sector (osb, OcfsExtentHeader, + up_hdr_node_ptr); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + if (!IS_VALID_EXTENT_HEADER (OcfsExtentHeader)) { + LOG_ERROR_STATUS(status = -EINVAL); + goto bail; + } + + if (OcfsExtentHeader->granularity != i) { + LOG_ERROR_STATUS(status = -EINVAL); + goto bail; + } + + if (OcfsExtentHeader->next_free_ext > + OCFS_MAX_DATA_EXTENTS) { + LOG_ERROR_STATUS(status = -EINVAL); + goto bail; + } + + if (OcfsExtentHeader->next_free_ext == + OCFS_MAX_DATA_EXTENTS) { + up_hdr_node_ptr = + OcfsExtentHeader->up_hdr_node_ptr; + continue; + } else { + break; + } + } + + if (i == FileEntry->granularity) { + if (FileEntry->next_free_ext == + OCFS_MAX_FILE_ENTRY_EXTENTS) { + (*IncreaseDepth) = true; + goto bail; + } else { + ocfs_allocate_new_data_node (osb, FileEntry, + actualDiskOffset, + actualLength, NULL, + &newExtentOff); + OcfsExtent->next_data_ext = + newExtentOff; + UpdateParent = false; + FileEntry->last_ext_ptr = newExtentOff; + status = + ocfs_write_sector (osb, OcfsExtent, + OcfsExtent-> + this_ext); + if (status < 0) { + goto bail; + } + } + (*IncreaseDepth) = true; + goto bail; + } else { + ocfs_allocate_new_data_node (osb, FileEntry, + actualDiskOffset, + actualLength, + OcfsExtentHeader, + &newExtentOff); + OcfsExtent->next_data_ext = newExtentOff; + UpdateParent = false; + FileEntry->last_ext_ptr = newExtentOff; + status = + ocfs_write_sector (osb, OcfsExtent, + OcfsExtent->this_ext); +// if(status < 0) + { + goto bail; + } + } + } + } else { + /* FileOffset for the new Extent will be equal to the previous */ + /* allocation size of file */ + OcfsExtent->extents[k].file_off = FileEntry->alloc_size; + OcfsExtent->extents[k].num_clusters = actualLength; + OcfsExtent->extents[k].disk_off = actualDiskOffset; + OcfsExtent->next_free_ext++; + } + + bail: + if (status >= 0 && !(*IncreaseDepth) && UpdateParent) { + status = + ocfs_write_sector (osb, OcfsExtent, OcfsExtent->this_ext); + if (status < 0) { + goto finally; + } + + for (i = 0; i < FileEntry->granularity; i++) { + OcfsExtentHeader = (ocfs_extent_group *) buffer; + status = ocfs_read_sector (osb, buffer, + OcfsExtentHeader-> + up_hdr_node_ptr); + if (status < 0) { + LOG_ERROR_STATUS (status); + status = -EINVAL; + goto finally; + } + + OcfsExtentHeader = (ocfs_extent_group *) buffer; + + if (OcfsExtentHeader->next_free_ext == 0) { + OCFS_BREAKPOINT (); + } + + k = OcfsExtentHeader->next_free_ext - 1; + + OcfsExtentHeader->extents[k].num_clusters += + actualLength; + + status = ocfs_write_sector (osb, OcfsExtentHeader, + OcfsExtentHeader->this_ext); + if (status < 0) { + goto finally; + } + } + + k = FileEntry->next_free_ext - 1; + + FileEntry->extents[k].num_clusters += actualLength; + } + + finally: + ocfs_safefree (buffer); + ocfs_safefree (AllocExtentBuf); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_add_to_last_data_node */ + +/* ocfs_update_last_data_extent() + * + */ +int ocfs_update_last_data_extent (ocfs_super * osb, + ocfs_file_entry * FileEntry, ub8 NextDataOffset) +{ + int status = 0; + ub4 length = 0; + ub1 *buffer = NULL; + ocfs_extent_group *OcfsExtent; + + LOG_ENTRY (); + + length = (ub4) OCFS_ALIGN (sizeof (ocfs_extent_group), PAGE_SIZE); + + buffer = ocfs_malloc (length); + if (buffer == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto finally; + } + + status = ocfs_read_sector (osb, buffer, FileEntry->last_ext_ptr); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + OcfsExtent = (ocfs_extent_group *) buffer; + + if (OcfsExtent->next_data_ext != 0) { + OCFS_BREAKPOINT (); + } + + OcfsExtent->next_data_ext = NextDataOffset; + + status = ocfs_write_sector (osb, buffer, FileEntry->last_ext_ptr); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + finally: + ocfs_safefree (buffer); + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_update_last_data_extent */ + +/* ocfs_grow_extent_tree() + * + */ +int ocfs_grow_extent_tree (ocfs_super * osb, + ocfs_file_entry * FileEntry, + ub8 actualDiskOffset, ub8 actualLength) +{ + int status = 0; + sb4 k, i; + ub4 length = 0; + ub4 numSectorsAlloc = 0; + ub1 *buffer = NULL; + ocfs_extent_group *OcfsExtent = NULL; + ocfs_extent_group *ExtentHeader = NULL; + ub8 physicalOffset; + ub8 fileOffset = 0; + ub8 upHeaderPtr, lastExtentPtr; + ub4 AllocSize; + + LOG_ENTRY (); + + AllocSize = ((FileEntry->granularity + 2) * OCFS_SECTOR_SIZE); + + /* Allocate the space from the Extent file. This function should */ + /* return contigous disk blocks requested. */ + status = ocfs_alloc_node_block (osb, AllocSize, &physicalOffset, + &fileOffset, (ub8 *) & numSectorsAlloc, + osb->node_num, DISK_ALLOC_EXTENT_NODE); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + if (fileOffset == 0) { + LOG_TRACE_ARGS ("File offset was 0 for file: %s\n", + FileEntry->filename); + } + + if (physicalOffset == 0) { + LOG_ERROR_STATUS(status = -ENOMEM); + goto finally; + } + + length = (ub4) OCFS_ALIGN (AllocSize, PAGE_SIZE); + + buffer = ocfs_malloc (length); + if (buffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + memset (buffer, 0, length); + + OcfsExtent = (ocfs_extent_group *) buffer; + + /* Copy the File Entry information in to the newly allocated sector */ + for (k = 0; k < OCFS_MAX_FILE_ENTRY_EXTENTS; k++) { + OcfsExtent->extents[k].file_off = + FileEntry->extents[k].file_off; + OcfsExtent->extents[k].num_clusters = + FileEntry->extents[k].num_clusters; + OcfsExtent->extents[k].disk_off = + FileEntry->extents[k].disk_off; + } + + OcfsExtent->last_ext_ptr = FileEntry->last_ext_ptr; + + lastExtentPtr = FileEntry->last_ext_ptr; + + OcfsExtent->this_ext = physicalOffset; + OcfsExtent->alloc_file_off = fileOffset; + OcfsExtent->alloc_node = osb->node_num; + OcfsExtent->next_data_ext = 0; + + FileEntry->local_ext = false; + FileEntry->granularity++; + + LOG_TRACE_ARGS ("Granularity is: %d\n", FileEntry->granularity); + + /* If granularity is zero now, the for loop will not execute. */ + /* First time a file is created ,granularity = -1 and local_ext flag */ + /* is set to true */ + + upHeaderPtr = FileEntry->this_sector; + + for (i = 0; i < FileEntry->granularity; i++) { + ExtentHeader = + (ocfs_extent_group *) (buffer + (OCFS_SECTOR_SIZE * i)); + ExtentHeader->type = OCFS_EXTENT_HEADER; + ExtentHeader->granularity = (FileEntry->granularity - 1) - i; + + strcpy (ExtentHeader->signature, OCFS_EXTENT_HEADER_SIGNATURE); + + if (i == 0) { + ExtentHeader->extents[OCFS_MAX_FILE_ENTRY_EXTENTS]. + disk_off = physicalOffset + OCFS_SECTOR_SIZE; + ExtentHeader->extents[OCFS_MAX_FILE_ENTRY_EXTENTS]. + file_off = FileEntry->alloc_size; + ExtentHeader->extents[OCFS_MAX_FILE_ENTRY_EXTENTS]. + num_clusters = actualLength; + + ExtentHeader->next_free_ext = + OCFS_MAX_FILE_ENTRY_EXTENTS + 1; + ExtentHeader->this_ext = physicalOffset; + ExtentHeader->last_ext_ptr = lastExtentPtr; + ExtentHeader->up_hdr_node_ptr = upHeaderPtr; + + upHeaderPtr = ExtentHeader->this_ext; + lastExtentPtr = ExtentHeader->this_ext; + + /* Also modify the PreviousPtr for all the nodes now */ + } else { + ExtentHeader->extents[0].disk_off = + physicalOffset + (OCFS_SECTOR_SIZE * (i + 1)); + ExtentHeader->extents[0].file_off = + FileEntry->alloc_size; + ExtentHeader->extents[0].num_clusters = actualLength; + ExtentHeader->next_free_ext = 1; + ExtentHeader->alloc_file_off = + fileOffset + (OCFS_SECTOR_SIZE * (i + 1)); + ExtentHeader->alloc_node = osb->node_num; + ExtentHeader->this_ext = + physicalOffset + (OCFS_SECTOR_SIZE * i); + ExtentHeader->up_hdr_node_ptr = upHeaderPtr; + ExtentHeader->last_ext_ptr = lastExtentPtr; + + upHeaderPtr = ExtentHeader->this_ext; + lastExtentPtr = ExtentHeader->this_ext; + } + } + + /* Update the Data Segment */ + OcfsExtent = (ocfs_extent_group *) (buffer + (OCFS_SECTOR_SIZE * + FileEntry->granularity)); + + i = (FileEntry->granularity) ? 0 : OCFS_MAX_FILE_ENTRY_EXTENTS; + + LOG_TRACE_ARGS ("EntryAvailable is: %d\n", OcfsExtent->next_free_ext); + + /* For the time being we are assuming that the newly allocated Extent */ + /* will have one more entry to accomodate the latest allocation */ + + strcpy (OcfsExtent->signature, OCFS_EXTENT_DATA_SIGNATURE); + + OcfsExtent->extents[i].file_off = FileEntry->alloc_size; + OcfsExtent->extents[i].num_clusters = actualLength; + OcfsExtent->extents[i].disk_off = actualDiskOffset; + OcfsExtent->curr_sect = 1; + OcfsExtent->max_sects = NUM_SECTORS_IN_LEAF_NODE; + OcfsExtent->type = OCFS_EXTENT_DATA; + OcfsExtent->next_free_ext = i + 1; + OcfsExtent->alloc_file_off = + fileOffset + (FileEntry->granularity * OCFS_SECTOR_SIZE); + OcfsExtent->alloc_node = osb->node_num; + OcfsExtent->this_ext = + physicalOffset + (FileEntry->granularity * OCFS_SECTOR_SIZE); + OcfsExtent->up_hdr_node_ptr = upHeaderPtr; + OcfsExtent->last_ext_ptr = lastExtentPtr; + OcfsExtent->next_data_ext = 0; + + upHeaderPtr = OcfsExtent->this_ext; + lastExtentPtr = OcfsExtent->this_ext; + + /* We assume that the AllocSize passed in is Sector aligned */ + + status = ocfs_write_disk (osb, buffer, AllocSize, physicalOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* Update the Previous Last Data Extent with this new Data Extent Pointer */ + if (FileEntry->last_ext_ptr != 0) { + status = + ocfs_update_last_data_extent (osb, FileEntry, OcfsExtent->this_ext); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + + /* Clear all the extent information from File Entry */ + + for (i = 0; i < OCFS_MAX_FILE_ENTRY_EXTENTS; i++) { + FileEntry->extents[i].file_off = 0; + FileEntry->extents[i].num_clusters = 0; + FileEntry->extents[i].disk_off = 0; + } + + /* Update the File Entry Extent */ + + FileEntry->local_ext = false; + + FileEntry->extents[0].file_off = 0; + FileEntry->extents[0].num_clusters = FileEntry->alloc_size + + actualLength; + FileEntry->extents[0].disk_off = physicalOffset; + FileEntry->last_ext_ptr = lastExtentPtr; + FileEntry->next_free_ext = 1; + + finally: + ocfs_safefree (buffer); + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_grow_extent_tree */ + +/* + * ocfs_allocate_extent() + * + */ +int ocfs_allocate_extent (ocfs_super * osb, + ocfs_inode * oin, + ocfs_file_entry * FileEntry, + ub8 actualDiskOffset, ub8 actualLength) +{ + int status = 0; + bool IncreaseTreeDepth = false; + ub4 k = 0; + + LOG_ENTRY (); + + /* Don't do an Assertion on oin as it can NULL also in some cases. */ + OCFS_ASSERT (FileEntry); + + if (!IS_VALID_FILE_ENTRY (FileEntry)) { + LOG_ERROR_STATUS(status = -EINVAL); + goto finally; + } + + if (FileEntry->local_ext) { + /* We are still using the local extents of File Entry */ + if (FileEntry->next_free_ext > OCFS_MAX_FILE_ENTRY_EXTENTS) { + LOG_ERROR_STATUS(status = -EINVAL); + goto finally; + } + + if (FileEntry->next_free_ext >= 1) { + k = FileEntry->next_free_ext - 1; + + LOG_TRACE_ARGS + ("Using local_ext for extent Entry = %u\n", k); + + /* Check here a case where the new allocation can be */ + /* joined with the last extent. */ + if (ocfs_check_for_extent_merge + (&FileEntry->extents[k], actualDiskOffset)) { + /* We can join the extents, just increase the len of extent */ + FileEntry->extents[k].num_clusters += actualLength; + status = 0; + goto finally; + } + } + + /* We cannot merge give him the next extent */ + k = FileEntry->next_free_ext; + if (k == OCFS_MAX_FILE_ENTRY_EXTENTS) { + IncreaseTreeDepth = true; + } else { + /* file_off for the new extent will be equal to the previous */ + /* allocation size of file */ + FileEntry->extents[k].file_off = FileEntry->alloc_size; + FileEntry->extents[k].num_clusters = actualLength; + FileEntry->extents[k].disk_off = actualDiskOffset; + FileEntry->next_free_ext++; + + status = 0; + goto finally; + } + } else { + LOG_TRACE_STR ("Using DiskExtents"); + + if (FileEntry->granularity > 3) { + OCFS_BREAKPOINT (); + } + + /* This File is no longer using Local Extents */ + status = ocfs_add_to_last_data_node (osb, oin, FileEntry, + actualDiskOffset, actualLength, + &k, &IncreaseTreeDepth); + if (status < 0) { + LOG_ERROR_STATUS(status); + goto finally; + } + } + + if (IncreaseTreeDepth) { + status = ocfs_grow_extent_tree (osb, FileEntry, + actualDiskOffset, actualLength); + if (status < 0) { + LOG_ERROR_STATUS(status); + goto finally; + } + } + + finally: + if ((status == 0) && (oin != NULL)) { + sb8 Vbo = 0; + sb8 Lbo = 0; + + /* Add this Entry in to extent map. If a new mapping run to be added */ + /* overlaps an existing mapping run, ocfs_add_extent_map_entry merges */ + /* them into a single mapping run.So just adding this entry will be fine. */ + Vbo = FileEntry->alloc_size; + Lbo = actualDiskOffset; + + /* Add the Entry to the extent map list */ + if (!ocfs_add_extent_map_entry (osb, &oin->map, Vbo, Lbo, + actualLength)) { + LOG_ERROR_ARGS ("op failed for off %u.%u, size %u.%u\n", + HI (Vbo), LO (Vbo), + HI (actualLength), LO (actualLength)); + } + } + + /* ?? We should update the Filesize and allocation size here */ + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_allocate_extent */ + +/* + * ocfs_check_for_extent_merge() + * + * In order to optimize the usage of Extents Per file, ocfs tries to merge + * contigous allocations of same file. This function is called whever a + * new allocation is added in order to check if it is possible to merge + * it with the previous allocation. + * + * Returns true if possible, false, if not. + */ +bool ocfs_check_for_extent_merge (ocfs_alloc_ext * LastExtent, ub8 ActualDiskOffset) +{ + bool ret = false; + + LOG_ENTRY (); + + /* Check here a case where the new allocation can be */ + /* joined with the last extent. */ + + if ((LastExtent->disk_off + + LastExtent->num_clusters) == ActualDiskOffset) { + ret = true; + } + + LOG_EXIT_ULONG (ret); + return ret; +} /* ocfs_check_for_extent_merge */ + +/* + * ocfs_get_next_extent_map_entry() + * + * This routine looks up the existing mapping of VBO to LBO for a file. + * The information it queries is either stored in the extent map field + * of the oin or is stored in the allocation file and needs to be + * retrieved, decoded and updated in the extent map. + * + */ +bool ocfs_get_next_extent_map_entry (ocfs_super * osb, + ocfs_extent_map * Map, + ub4 RunIndex, sb8 * Vbo, sb8 * Lbo, + ub4 * SectorCount) +{ + bool Results; + sb8 LiVbo; + sb8 LiLbo; + sb8 LiSectorCount; + + LOG_ENTRY (); + + LiLbo = 0; + + Results = ocfs_extent_map_next_entry (Map, RunIndex, &LiVbo, &LiLbo, + &LiSectorCount); + + *Vbo = ((sb8) LiVbo) << OCFS_LOG_SECTOR_SIZE; + + if (((ub4) LiLbo) != -1) { + *Lbo = ((sb8) LiLbo) << OCFS_LOG_SECTOR_SIZE; + } else { + Results = false; + *Lbo = 0; + } + + *SectorCount = ((ub4) LiSectorCount) << OCFS_LOG_SECTOR_SIZE; + + if ((*SectorCount == 0) && (LiSectorCount != 0)) { + *SectorCount = (ub4) - 1; /* it overflowed */ + Results = false; + } + + LOG_EXIT_ULONG (Results); + return Results; +} /* ocfs_get_next_extent_map_entry */ + + +/* + * ocfs_update_all_headers() + * + */ +int ocfs_update_all_headers (ocfs_super * osb, ocfs_extent_group * AllocExtent, + ub8 FileSize) +{ + int status = 0; + ub8 upHeaderPtr; + ub4 i = 0; + ocfs_extent_group *ExtentHeader = NULL; + ocfs_file_entry *FileEntry = NULL; + + LOG_ENTRY (); + + status = ocfs_write_sector (osb, AllocExtent, AllocExtent->this_ext); + if (status < 0) { + LOG_ERROR_STATUS(status); + goto finally; + } + + ExtentHeader = AllocExtent; + + while (1) { + upHeaderPtr = ExtentHeader->up_hdr_node_ptr; + + memset (ExtentHeader, 0, OCFS_SECTOR_SIZE); + + status = ocfs_read_sector (osb, ExtentHeader, upHeaderPtr); + if (status < 0) { + LOG_ERROR_STATUS(status = -EINVAL); + goto finally; + } + + if (IS_VALID_EXTENT_HEADER (ExtentHeader)) { + for (i = 0; i < AllocExtent->next_free_ext; i++) { + if ((AllocExtent->extents[i].file_off + + AllocExtent->extents[i].num_clusters) > + FileSize) { + if (AllocExtent->extents[i].file_off > + FileSize) { + AllocExtent->extents[i]. + file_off = + AllocExtent->extents[i]. + num_clusters = 0; + AllocExtent->next_free_ext = i; + break; + } else { + AllocExtent->extents[i]. + num_clusters -= + ((AllocExtent->extents[i]. + file_off + + AllocExtent->extents[i]. + num_clusters) - FileSize); + AllocExtent->next_free_ext = + i + 1; + break; + } + } + } + continue; + } else { + FileEntry = (ocfs_file_entry *) ExtentHeader; + + if (!IS_VALID_FILE_ENTRY (FileEntry)) { + LOG_ERROR_STATUS(status = -EFAIL); + goto finally; + } + + for (i = 0; i < FileEntry->next_free_ext; i++) { + if ((FileEntry->extents[i].file_off + + FileEntry->extents[i].num_clusters) > + FileSize) { + if (FileEntry->extents[i].file_off > + FileSize) { + FileEntry->extents[i].file_off = + FileEntry->extents[i]. + num_clusters = 0; + FileEntry->next_free_ext = i; + break; + } else { + FileEntry->extents[i]. + num_clusters -= + ((FileEntry->extents[i]. + file_off + + FileEntry->extents[i]. + num_clusters) - FileSize); + FileEntry->next_free_ext = + i + 1; + break; + } + } + } + break; + } + } + +finally: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_update_all_headers */ + + +/* + * ocfs_free_extents_for_truncate() + * + */ +int ocfs_free_extents_for_truncate (ocfs_super * osb, + ocfs_file_entry * FileEntry, + ocfs_inode * oin, sb4 LogNodeNum, ub8 FileSize) +{ + int status = 0; + ub4 i, size, numUpdt = 0; + ub4 numBitsAllocated = 0, bitmapOffset = 0; + ocfs_cleanup_record *CleanupLogRec = NULL; + ocfs_extent_group *AllocExtent = NULL; + bool FirstTime = true; + ub4 freeExtent = 0; + ub8 actualSize = 0, origLength = 0; + ub8 diskOffsetTobeFreed, lengthTobeFreed = 0; + + LOG_ENTRY (); + + size = sizeof (ocfs_cleanup_record); + size = (ub4) OCFS_ALIGN (size, PAGE_SIZE); + + CleanupLogRec = ocfs_malloc (size); + if (CleanupLogRec == NULL) { + status = -ENOMEM; + goto finally; + } + + CleanupLogRec->rec.free.num_free_upds = 0; + CleanupLogRec->log_id = osb->curr_trans_id; + CleanupLogRec->log_type = LOG_FREE_BITMAP; + + if (FileEntry->local_ext) { + freeExtent = FileEntry->next_free_ext; + for (i = 0; i < FileEntry->next_free_ext; i++) { + actualSize = + (FileEntry->extents[i].num_clusters + + FileEntry->extents[i].file_off); + if (actualSize > FileSize) { + if (FileEntry->extents[i].file_off >= FileSize) { + if (FirstTime) { + freeExtent = i; + FirstTime = false; + } + numBitsAllocated = + (ub4) (FileEntry->extents[i]. + num_clusters / + (osb->vol_layout. + cluster_size)); + + bitmapOffset = + (ub4) ((FileEntry->extents[i]. + disk_off - + osb->vol_layout. + data_start_off) / + (osb->vol_layout. + cluster_size)); + + FileEntry->extents[i].num_clusters = + FileEntry->extents[i].disk_off = + FileEntry->extents[i].file_off = 0; + + } else { + if (FirstTime) { + freeExtent = i + 1; + FirstTime = false; + } + + origLength = + FileEntry->extents[i].num_clusters; + FileEntry->extents[i].num_clusters = + (FileSize - + FileEntry->extents[i].file_off); + lengthTobeFreed = + (origLength - + FileEntry->extents[i]. + num_clusters); + + if (lengthTobeFreed == 0) { + continue; + } + + numBitsAllocated = + (ub4) (lengthTobeFreed / + (osb->vol_layout. + cluster_size)); + diskOffsetTobeFreed = + FileEntry->extents[i].disk_off + + FileEntry->extents[i].num_clusters; + + bitmapOffset = + (ub4) ((diskOffsetTobeFreed - + osb->vol_layout. + data_start_off) / + (osb->vol_layout. + cluster_size)); + + } + + numUpdt = CleanupLogRec->rec.free.num_free_upds; + + CleanupLogRec->rec.free.free_bitmap[numUpdt]. + length = numBitsAllocated; + CleanupLogRec->rec.free.free_bitmap[numUpdt]. + file_off = bitmapOffset; + CleanupLogRec->rec.free.free_bitmap[numUpdt]. + type = DISK_ALLOC_VOLUME; + CleanupLogRec->rec.free.free_bitmap[numUpdt]. + node_num = -1; + (CleanupLogRec->rec.free.num_free_upds)++; + } + + } + + /* Also Update the next_free_ext */ + FileEntry->next_free_ext = freeExtent; + } else { + freeExtent = 0; + FirstTime = true; + + size = OCFS_ALIGN (sizeof (ocfs_extent_group), PAGE_SIZE); + + AllocExtent = ocfs_malloc (size); + if (AllocExtent == NULL) { + LOG_ERROR_STR + ("ocfs_free_file_extents():ocfs_malloc failed for PAllocExtent"); + status = -ENOMEM; + goto finally; + } + + memset (AllocExtent, 0, size); + + while (FileEntry->last_ext_ptr != 0) { + status = + ocfs_read_metadata (osb, AllocExtent, + (ub4) osb->sect_size, + FileEntry->last_ext_ptr); + if (status < 0) { + LOG_ERROR_ARGS + ("ocfs_read_disk failed with status(%x)", + status); + goto finally; + } + + if ((AllocExtent->type != OCFS_EXTENT_DATA) && + (AllocExtent->type != OCFS_EXTENT_HEADER)) { + LOG_ERROR_STR ("Invalid Extent Header"); + status = -EFAIL; + goto finally; + } + + if (AllocExtent->extents[0].file_off >= FileSize) { + if (AllocExtent->type == OCFS_EXTENT_DATA) { + + if (!IS_VALID_EXTENT_DATA (AllocExtent)) { + LOG_ERROR_STR + ("Invalid Extent Header"); + status = -EFAIL; + goto finally; + } + + for (i = 0; + i < AllocExtent->next_free_ext; + i++) { + numBitsAllocated = + (ub4) (AllocExtent-> + extents[i]. + num_clusters / + (osb->vol_layout. + cluster_size)); + bitmapOffset = + (ub4) ((AllocExtent-> + extents[i]. + disk_off - + osb->vol_layout. + data_start_off) / + (osb->vol_layout. + cluster_size)); + + numUpdt = + CleanupLogRec->rec.free. + num_free_upds; + if (numUpdt >= FREE_LOG_SIZE) { + status = + ocfs_write_node_log + (osb, + (ocfs_log_record *) + CleanupLogRec, + LogNodeNum, + LOG_CLEANUP); + if (status < 0) { + LOG_ERROR_ARGS + ("ocfs_read_disk failed with status(%x)\n", + status); + goto finally; + } + numUpdt = + CleanupLogRec->rec. + free.num_free_upds = + 0; + } + + AllocExtent->extents[i]. + num_clusters = + AllocExtent->extents[i]. + disk_off = + AllocExtent->extents[i]. + file_off = 0; + + CleanupLogRec->rec.free. + free_bitmap[numUpdt]. + length = numBitsAllocated; + CleanupLogRec->rec.free. + free_bitmap[numUpdt]. + file_off = bitmapOffset; + CleanupLogRec->rec.free. + free_bitmap[numUpdt].type = + DISK_ALLOC_VOLUME; + CleanupLogRec->rec.free. + free_bitmap[numUpdt]. + node_num = -1; + (CleanupLogRec->rec.free. + num_free_upds)++; + } + + if (numUpdt >= FREE_LOG_SIZE) { + status = + ocfs_write_node_log (osb, + (ocfs_log_record + *) + CleanupLogRec, + LogNodeNum, + LOG_CLEANUP); + if (status < 0) { + LOG_ERROR_ARGS + ("ocfs_free_extents_for_truncate(): ocfs_read_disk failed with status(%x)\n", + status); + goto finally; + } + numUpdt = + CleanupLogRec->rec.free. + num_free_upds = 0; + } + + CleanupLogRec->rec.free. + free_bitmap[numUpdt].length = 1; + CleanupLogRec->rec.free. + free_bitmap[numUpdt].file_off = + AllocExtent->alloc_file_off; + CleanupLogRec->rec.free. + free_bitmap[numUpdt].type = + DISK_ALLOC_EXTENT_NODE; + CleanupLogRec->rec.free. + free_bitmap[numUpdt].node_num = + AllocExtent->alloc_node; + (CleanupLogRec->rec.free. + num_free_upds)++; + if (AllocExtent->alloc_file_off == 0) { + LOG_ERROR_ARGS + ("File offset was 0 for file %s type DISK_ALLOC_EXTENT_NODE blk 200\n", + FileEntry->filename); + } + + } else { + OCFS_ASSERT (AllocExtent->type == + OCFS_EXTENT_HEADER); + if (!IS_VALID_EXTENT_HEADER + (AllocExtent)) { + LOG_ERROR_STR + ("Invalid Extent Header"); + status = -EFAIL; + goto finally; + } + + numUpdt = + CleanupLogRec->rec.free. + num_free_upds; + if (numUpdt >= FREE_LOG_SIZE) { + status = + ocfs_write_node_log (osb, + (ocfs_log_record + *) + CleanupLogRec, + LogNodeNum, + LOG_CLEANUP); + if (status < 0) { + LOG_ERROR_ARGS + ("ocfs_read_disk failed with status(%x)\n", + status); + goto finally; + } + numUpdt = + CleanupLogRec->rec.free. + num_free_upds = 0; + } + + CleanupLogRec->rec.free. + free_bitmap[numUpdt].length = 1; + + if (AllocExtent->alloc_file_off == 0) { + OCFS_BREAKPOINT(); + } + + CleanupLogRec->rec.free. + free_bitmap[numUpdt].file_off = + AllocExtent->alloc_file_off; + + CleanupLogRec->rec.free. + free_bitmap[numUpdt].type = + DISK_ALLOC_EXTENT_NODE; + + CleanupLogRec->rec.free. + free_bitmap[numUpdt].node_num = + AllocExtent->alloc_node; + (CleanupLogRec->rec.free. + num_free_upds)++; + + } + } else { + if (AllocExtent->type == OCFS_EXTENT_DATA) { + freeExtent = AllocExtent->next_free_ext; + + for (i = 0; + i < AllocExtent->next_free_ext; + i++) { + actualSize = + (AllocExtent->extents[i]. + num_clusters + + AllocExtent->extents[i]. + file_off); + if (actualSize > FileSize) { + if (AllocExtent-> + extents[i]. + file_off >= + FileSize) { + if (FirstTime) { + freeExtent + = i; + FirstTime + = + false; + } + numBitsAllocated + = (ub4) + (AllocExtent-> + extents[i]. + num_clusters + / + (osb-> + vol_layout. + cluster_size)); + bitmapOffset = + (ub4) ((AllocExtent->extents[i].disk_off - osb->vol_layout.data_start_off) / (osb->vol_layout.cluster_size)); + + AllocExtent-> + extents[i]. + num_clusters + = + AllocExtent-> + extents[i]. + disk_off = + AllocExtent-> + extents[i]. + file_off = + 0; + + } else { + if (FirstTime) { + freeExtent + = + i + + 1; + FirstTime + = + false; + } + + origLength = + AllocExtent-> + extents[i]. + num_clusters; + AllocExtent-> + extents[i]. + num_clusters + = + (FileSize - + AllocExtent-> + extents[i]. + file_off); + lengthTobeFreed + = + (origLength + - + AllocExtent-> + extents[i]. + num_clusters); + + if (lengthTobeFreed == 0) { + continue; + } + + numBitsAllocated + = (ub4) + (lengthTobeFreed + / + (osb-> + vol_layout. + cluster_size)); + diskOffsetTobeFreed + = + AllocExtent-> + extents[i]. + disk_off + + AllocExtent-> + extents[i]. + num_clusters; + + bitmapOffset = + (ub4) ((diskOffsetTobeFreed - osb->vol_layout.data_start_off) / (osb->vol_layout.cluster_size)); + } + + numUpdt = + CleanupLogRec->rec. + free.num_free_upds; + if (numUpdt >= + FREE_LOG_SIZE) { + status = + ocfs_write_node_log + (osb, + (ocfs_log_record + *) + CleanupLogRec, + LogNodeNum, + LOG_CLEANUP); + if (status < 0) { + LOG_ERROR_ARGS + ("ocfs_read_disk failed with status(%x)\n", + status); + goto finally; + } + numUpdt = + CleanupLogRec-> + rec.free. + num_free_upds + = 0; + } + + CleanupLogRec->rec.free. + free_bitmap + [numUpdt].length = + numBitsAllocated; + CleanupLogRec->rec.free. + free_bitmap + [numUpdt].file_off = + bitmapOffset; + CleanupLogRec->rec.free. + free_bitmap + [numUpdt].type = + DISK_ALLOC_VOLUME; + CleanupLogRec->rec.free. + free_bitmap + [numUpdt].node_num = + -1; + (CleanupLogRec->rec. + free.num_free_upds)++; + } + } + + AllocExtent->next_free_ext = freeExtent; + + if (numUpdt >= FREE_LOG_SIZE) { + status = + ocfs_write_node_log (osb, + (ocfs_log_record + *) + CleanupLogRec, + LogNodeNum, + LOG_CLEANUP); + if (status < 0) { + LOG_ERROR_ARGS + ("ocfs_read_disk failed with status(%x)\n", + status); + goto finally; + } + numUpdt = + CleanupLogRec->rec.free. + num_free_upds = 0; + } + + CleanupLogRec->rec.free. + free_bitmap[numUpdt].length = 1; + CleanupLogRec->rec.free. + free_bitmap[numUpdt].file_off = + AllocExtent->alloc_file_off; + CleanupLogRec->rec.free. + free_bitmap[numUpdt].type = + DISK_ALLOC_EXTENT_NODE; + CleanupLogRec->rec.free. + free_bitmap[numUpdt].node_num = + AllocExtent->alloc_node; + (CleanupLogRec->rec.free. + num_free_upds)++; + if (AllocExtent->alloc_file_off == 0) { + LOG_ERROR_ARGS + ("File offset was 0 for file %s type DISK_ALLOC_EXTENT_NODE blk 200\n", + FileEntry->filename); + } + } else { + OCFS_BREAKPOINT(); + } + + ocfs_update_all_headers (osb, AllocExtent, FileSize); + break; + } + } + + } + + /* Write the log. */ + if (CleanupLogRec->rec.free.num_free_upds > 0) { + status = ocfs_write_node_log (osb, + (ocfs_log_record *) CleanupLogRec, + LogNodeNum, LOG_CLEANUP); + if (status < 0) { + LOG_ERROR_STATUS(status); + goto finally; + } + } + + finally: + if (AllocExtent) { + ocfs_free (AllocExtent); + AllocExtent = NULL; + } + + if (CleanupLogRec) { + ocfs_free (CleanupLogRec); + } + + LOG_EXIT_ULONG (status); + return status; +} /* ocfs_free_extents_for_truncate */ +#endif /* !DEBUGOCFS */ + +/* + * ocfs_lookup_file_allocation() + * + * This routine looks up the existing mapping of VBO to LBO for a file. + * The information it queries is either stored in the extent map field + * of the oin or is stored in the allocation file and needs to be retrieved, + * decoded and updated in the extent map. + * + */ +int ocfs_lookup_file_allocation (ocfs_super * osb, ocfs_inode * oin, sb8 Vbo, + sb8 * Lbo, ub4 ByteCount, ub4 * NumIndex, + void **Buffer) +{ + int status = 0; + ocfs_file_entry *FileEntry = NULL; + ub4 allocSize = 0; + ub8 length = 0, remainingLength = 0; + ub1 *buffer = NULL; + ub4 Runs; + sb8 localVbo; + ocfs_extent_group *OcfsExtent = NULL; + ocfs_io_runs *IoRuns = NULL; + + LOG_ENTRY (); + + OCFS_ASSERT (osb); + OCFS_ASSERT (oin); + *Buffer = NULL; + + if (Vbo >= oin->alloc_size) { + goto READ_ENTRY; + } + + status = ocfs_extent_map_load (osb, &(oin->map), Buffer, Vbo, ByteCount, + &Runs); + if (status >= 0) { + /* If status is success, we found the needed extent map */ + goto finally; + } + + READ_ENTRY: + if (*Buffer) { + ocfs_free (*Buffer); + *Buffer = NULL; + } + + remainingLength = ByteCount; + length = 0; + localVbo = Vbo; + + /* We are looking for a Vbo, but it is not in the Map or not Valid. */ + /* Thus we have to go to the disk, and update the Map */ + + /* Read the file Entry corresponding to this */ + status = ocfs_get_file_entry (osb, &FileEntry, oin->file_disk_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + if (!IS_VALID_FILE_ENTRY (FileEntry)) { + LOG_ERROR_STATUS (status = -EINVAL); + goto finally; + } + + if (Vbo >= (sb8) FileEntry->alloc_size) { + LOG_ERROR_STATUS (status = -EFAIL); + goto finally; + } + + if (FileEntry->local_ext) { + status = ocfs_update_extent_map (osb, &oin->map, FileEntry, + NULL, NULL, 1); + } else { + /* Extents are branched and we are no longer using Local Extents */ + /* for this File Entry. */ + allocSize = (NUM_SECTORS_IN_LEAF_NODE + + FileEntry->granularity) * OCFS_SECTOR_SIZE; + length = OCFS_ALIGN (allocSize, PAGE_SIZE); + buffer = ocfs_malloc ((ub4) length); + if (buffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + OcfsExtent = (ocfs_extent_group *) buffer; + + status = ocfs_get_leaf_extent (osb, FileEntry, localVbo, OcfsExtent); + if (status < 0) { + goto finally; + } + + while (1) { + if (!IS_VALID_EXTENT_DATA (OcfsExtent)) { + LOG_ERROR_STATUS(status = -EFAIL); + goto finally; + } + + status = + ocfs_update_extent_map (osb, &oin->map, OcfsExtent, + &localVbo, &remainingLength, + 2); + if (status < 0) { + LOG_ERROR_STATUS(status); + goto finally; + } + + if (remainingLength > 0) { + OCFS_ASSERT (OcfsExtent->next_data_ext); + + status = ocfs_read_sector (osb, OcfsExtent, + OcfsExtent-> + next_data_ext); + if (status < 0) { + LOG_ERROR_STATUS(status); + goto finally; + } + } else { + break; + } + } + } + + status = + ocfs_extent_map_load (osb, &oin->map, Buffer, Vbo, ByteCount, &Runs); + if (status < 0) { + goto finally; + } + + finally: + if (status >= 0) { + IoRuns = (ocfs_io_runs *) (*Buffer); + *(NumIndex) = Runs; + *(Lbo) = IoRuns[0].disk_off; + } + + /* Should send a null for IoRuns in case of onl 1 extent */ + LOG_TRACE_ARGS ("Num of Runs is: %d\n", Runs); + + ocfs_safefree (buffer); + + if (FileEntry) { + ocfs_release_file_entry (FileEntry); + } + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_lookup_file_allocation */ + +/* ocfs_extent_map_load() + * + */ +int ocfs_extent_map_load (ocfs_super * osb, + ocfs_extent_map * Map, + void **Buffer, sb8 Vbo, ub8 ByteCount, ub4 * RetRuns) +{ + int status = -EFAIL; + ocfs_io_runs *IoRuns = NULL; + ub4 BufferOffset; + ub4 ioExtents = OCFS_MAX_DATA_EXTENTS; + ub8 length = 0, remainingLength = 0; + ub4 Runs, Index, ioRunSize; + sb8 localLbo; + sb8 localVbo; + + LOG_ENTRY (); + + ioRunSize = OCFS_MAX_DATA_EXTENTS * sizeof (ocfs_io_runs); + IoRuns = ocfs_malloc (ioRunSize); + if (IoRuns == NULL) { + LOG_ERROR_STATUS(status = -ENOMEM); + goto bail; + } + + remainingLength = ByteCount; + Runs = 0; + length = 0; + localVbo = Vbo; + BufferOffset = 0; + + while (ocfs_lookup_extent_map_entry + (osb, Map, localVbo, &localLbo, &length, &Index)) { + IoRuns[Runs].disk_off = localLbo; + IoRuns[Runs].byte_cnt = length; + IoRuns[Runs].offset = BufferOffset; + + if (length >= remainingLength) { + IoRuns[Runs].byte_cnt = remainingLength; + status = 0; + Runs++; + break; + } else { + Runs++; + if (Runs >= ioExtents) { + status = ocfs_adjust_allocation (&IoRuns, &ioRunSize); + if (status < 0) { + LOG_ERROR_STATUS(status); + goto bail; + } + + ioExtents *= 2; + } + localVbo += length; + BufferOffset += length; + remainingLength -= length; + continue; + } + } + + (*RetRuns) = Runs; + (*Buffer) = IoRuns; + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_extent_map_load */ + + +/* + * ocfs_lookup_extent_map_entry() + * + * search for an VBO in the extent map passed on. + * + * Returns true if entry available in the extent map, false otherwise. + */ +bool ocfs_lookup_extent_map_entry (ocfs_super * osb, + ocfs_extent_map * Map, + sb8 Vbo, sb8 * Lbo, ub8 * SectorCount, ub4 * Index) +{ + bool ret; + sb8 LiLbo = 0; + sb8 LiSectorCount = 0; + ub8 remainder = 0; + + LOG_ENTRY (); + + /* Sector Align the vbo */ + remainder = Vbo & OCFS_MOD_SECTOR_SIZE; + + ret = ocfs_extent_map_lookup (Map, (Vbo >> OCFS_LOG_SECTOR_SIZE), &LiLbo, + &LiSectorCount, Index); + if ((ub4) LiLbo != -1) { + *Lbo = (((sb8) LiLbo) << (sb8) OCFS_LOG_SECTOR_SIZE); + if (ret) { + *Lbo += remainder; + } + } else { + ret = false; + *Lbo = 0; + } + + *SectorCount = LiSectorCount; + if (*SectorCount) { + *SectorCount <<= (sb8) OCFS_LOG_SECTOR_SIZE; + if (*SectorCount == 0) { + *SectorCount = (ub4) - 1; + } + + if (ret) { + *SectorCount -= remainder; + } + } + + LOG_EXIT_ULONG (ret); + return ret; +} /* ocfs_lookup_extent_map_entry */ + + +/* + * ocfs_adjust_allocation() + * + * It gets called if the number of runs is more than a default number and + * so will free up the previously allocated memory and allocated twice the + * prevously allocated memory. + * + */ +int ocfs_adjust_allocation (ocfs_io_runs ** IoRuns, ub4 * ioRunSize) +{ + int status = 0; + ub4 runSize = 0; + ocfs_io_runs *localIoRuns = NULL; + + LOG_ENTRY (); + + OCFS_ASSERT (IoRuns); + + runSize = (*ioRunSize) * 2; + localIoRuns = ocfs_malloc (runSize); + if (localIoRuns == NULL) { + LOG_ERROR_STATUS(status = -ENOMEM); + goto bail; + } + + memcpy (localIoRuns, *IoRuns, *ioRunSize); + ocfs_free (*IoRuns); + *IoRuns = localIoRuns; + *ioRunSize = runSize; + + /* Don't free localIoRuns here */ +bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_adjust_allocation */ + +/* ocfs_update_extent_map() + * + */ +int ocfs_update_extent_map (ocfs_super * osb, ocfs_extent_map * Map, + void *Buffer, sb8 * localVbo, + ub8 * remainingLength, ub4 Flag) +{ + int status = 0; + ocfs_file_entry *FileEntry; + ocfs_extent_group *OcfsExtent; + sb8 tempVbo; + sb8 tempLbo; + ub8 tempSize; + bool Ret; + int j; + + LOG_ENTRY (); + + if (Flag == 1) { + FileEntry = (ocfs_file_entry *) Buffer; + + OCFS_ASSERT (FileEntry->local_ext); + + for (j = 0; j < FileEntry->next_free_ext; j++) { + tempVbo = FileEntry->extents[j].file_off; + tempLbo = FileEntry->extents[j].disk_off; + tempSize = FileEntry->extents[j].num_clusters; + + /* Add the Extent to extent map list */ + Ret = + ocfs_add_extent_map_entry (osb, Map, tempVbo, tempLbo, + tempSize); + if (!Ret) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } + } + } else { + ub8 localLength = 0; + + OcfsExtent = (ocfs_extent_group *) Buffer; + + for (j = 0; j < OcfsExtent->next_free_ext; j++) { + if ((sb8) (OcfsExtent->extents[j].file_off + + OcfsExtent->extents[j].num_clusters) > + (*localVbo)) { + tempVbo = OcfsExtent->extents[j].file_off; + tempLbo = OcfsExtent->extents[j].disk_off; + tempSize = OcfsExtent->extents[j].num_clusters; + + /* Add the Extent to extent map list */ + Ret = + ocfs_add_extent_map_entry (osb, Map, tempVbo, + tempLbo, + (ub4) tempSize); + if (!Ret) { + LOG_ERROR_STATUS (status -ENOMEM); + goto bail; + } + + localLength = + (tempSize - ((*localVbo) - tempVbo)); + + /* Since we have read the disk we should add some */ + /* more Entries to the extent map list */ + if (localLength >= (*remainingLength)) { + (*remainingLength) = 0; + status = 0; + goto bail; + } else { + (*remainingLength) -= localLength; + (*localVbo) += localLength; + } + } + } + + if ((OcfsExtent->next_free_ext != OCFS_MAX_DATA_EXTENTS) && + (*remainingLength)) { + OCFS_BREAKPOINT (); + } + + } + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_update_extent_map */ + +/* + * ocfs_add_extent_map_entry() + * + * adds an entry to an extent map + * + * Returns true on success, false otherwise. + */ +bool ocfs_add_extent_map_entry (ocfs_super * osb, + ocfs_extent_map * Map, sb8 Vbo, sb8 Lbo, ub8 ByteCount) +{ + bool ret; + + LOG_ENTRY (); + + /* Convert the Bytes in to number of Sectors */ + if (ByteCount) { + /* Round up sectors */ + ByteCount--; + ByteCount >>= OCFS_LOG_SECTOR_SIZE; + ByteCount++; + } + + /* Make the ByteOffsets in to Sector numbers. */ + /* In case of 512 byte sectors the OcfsLogOf gives back a value of 9. */ + /* And by doing a right shift of 9 bits we are actually dividing */ + /* the value by 512. */ + Vbo >>= OCFS_LOG_SECTOR_SIZE; + Lbo >>= OCFS_LOG_SECTOR_SIZE; + + ret = ocfs_extent_map_add (Map, ((sb8) Vbo), ((sb8) Lbo), ((sb8) ByteCount)); + + LOG_EXIT_ULONG (ret); + return ret; +} /* ocfs_add_extent_map_entry */ + +/* ocfs_get_leaf_extent() + * + */ +int ocfs_get_leaf_extent (ocfs_super * osb, + ocfs_file_entry * FileEntry, + sb8 Vbo, ocfs_extent_group * OcfsDataExtent) +{ + int status = -EFAIL, tempstat; + ub4 i, j, length; + ocfs_extent_group *ExtentHeader = NULL; + ub8 childDiskOffset = 0; + + LOG_ENTRY (); + + for (i = 0; i < FileEntry->next_free_ext; i++) { + if ((sb8) (FileEntry->extents[i].file_off + + FileEntry->extents[i].num_clusters) > Vbo) { + childDiskOffset = FileEntry->extents[i].disk_off; + break; + } + } + + if (childDiskOffset == 0) { + status = -EINVAL; + goto finally; + } + + if (FileEntry->granularity >= 1) { + length = PAGE_SIZE; + ExtentHeader = ocfs_malloc ((ub4) length); + if (ExtentHeader == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto finally; + } + } + + for (i = 0; i < FileEntry->granularity; i++) { + tempstat = ocfs_read_sector (osb, ExtentHeader, childDiskOffset); + if (tempstat < 0) { + status = tempstat; + goto finally; + } + + if (!IS_VALID_EXTENT_HEADER (ExtentHeader)) { + LOG_ERROR_STATUS(status = -EFAIL); + goto finally; + } + + for (j = 0; j < ExtentHeader->next_free_ext; j++) { + if ((sb8) (ExtentHeader->extents[j].file_off + + ExtentHeader->extents[j].num_clusters) > Vbo) + { + childDiskOffset = + ExtentHeader->extents[j].disk_off; + break; + } + } + + if (i != (FileEntry->granularity - 1)) { + continue; + } else { + break; + } + } + + tempstat = ocfs_read_sector (osb, OcfsDataExtent, childDiskOffset); + if (tempstat < 0) { + status = tempstat; + goto finally; + } + + if (!IS_VALID_EXTENT_DATA (OcfsDataExtent)) { + LOG_ERROR_STATUS(status = -EFAIL); + goto finally; + } + + finally: + ocfs_safefree (ExtentHeader); + + LOG_EXIT_STATUS (status); + return (status = 0); //SM WHAT????? +} /* ocfs_get_leaf_extent */ diff -urNp ocfs/fs/ocfs/Common/ocfsgencreate.c 2.4.20pre5aa2/fs/ocfs/Common/ocfsgencreate.c --- ocfs/fs/ocfs/Common/ocfsgencreate.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/ocfsgencreate.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,2926 @@ +/* + * ocfsgencreate.c + * + * Does lots of things sort-of associated with creating a file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_CREATE + +/* + * ocfs_verify_update_oin() + * + * This function searches the cached oin list for a volume for a given + * filename. We currently cache all the oin's. We should hash this list. + * + */ +int ocfs_verify_update_oin (ocfs_super * osb, ocfs_inode * oin) +{ + int status = 0; + ocfs_file_entry *fe = NULL; + ocfs_lock_res *pLockRes; + struct inode *inode = NULL; + struct list_head *iter; + int disk_len; + + /* We are setting the oin Updated flag in the end. */ + LOG_ENTRY (); + + OCFS_ASSERT (oin); + + status = ocfs_get_file_entry (osb, &fe, oin->file_disk_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + /* Make sure that what we found is not a directory. */ + if (!(oin->oin_flags & OCFS_OIN_DIRECTORY)) { + /* Add checks as needed */ + if ((fe->sync_flags & OCFS_SYNC_FLAG_MARK_FOR_DELETION) || + (!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) { + if (fe->sync_flags & OCFS_SYNC_FLAG_MARK_FOR_DELETION) { + LOG_TRACE_STR + ("File Entry is marked for deletion"); + } else { + LOG_TRACE_STR ("File Entry is invalid"); + } + + OCFS_SET_FLAG (oin->oin_flags, OCFS_OIN_INVALID); + /* ?? I think we should remove the oin here from the oin list */ + status = -ENOENT; + goto leave; + } + + disk_len = strlen(fe->filename); + inode = oin->inode; + if (inode == NULL) { + LOG_ERROR_STR ("oin has no matching inode!!!!"); + OCFS_SET_FLAG (oin->oin_flags, OCFS_OIN_INVALID); + status = -ENOENT; + goto leave; + } + + status = -ENOENT; + list_for_each (iter, &(inode->i_dentry)) { + struct dentry *dentry = list_entry (iter, struct dentry, d_alias); + if (dentry->d_name.len == disk_len && + strncmp(dentry->d_name.name, fe->filename, disk_len)==0) + { + status = 0; + } + } + if (status < 0) { + LOG_TRACE_STR ("file entry name did not match inode"); + OCFS_SET_FLAG (oin->oin_flags, OCFS_OIN_INVALID); + goto leave; + } + + + if ((oin->alloc_size != (sb8) fe->alloc_size) || + (inode->i_size != (sb8) fe->file_size) || + (oin->chng_seq_num != DISK_LOCK_SEQNUM (fe)) || + inode->i_uid != fe->uid || + inode->i_gid != fe->gid || inode->i_mode != fe->prot_bits) { + + LOG_TRACE_STR + ("Allocsize, filesize or seq no did not match"); + oin->alloc_size = fe->alloc_size; + inode->i_size = fe->file_size; + oin->chng_seq_num = DISK_LOCK_SEQNUM (fe); + + inode->i_blocks = (inode->i_size + 512) >> 9; + inode->i_uid = fe->uid; + inode->i_gid = fe->gid; + inode->i_mode = fe->prot_bits; + inode->i_blksize = (ub4) osb->vol_layout.cluster_size; + inode->i_ctime = fe->create_time; + inode->i_atime = fe->modify_time; + inode->i_mtime = fe->modify_time; + + if (!S_ISDIR (inode->i_mode)) { + truncate_inode_pages (inode->i_mapping, 0); + } + + switch (fe->attribs & (~OCFS_ATTRIB_FILE_CDSL)) { + case OCFS_ATTRIB_DIRECTORY: + inode->i_size = OCFS_DEFAULT_DIR_NODE_SIZE; + inode->i_mode |= S_IFDIR; + break; + case OCFS_ATTRIB_SYMLINK: + inode->i_mode |= S_IFLNK; + break; + case OCFS_ATTRIB_REG: + inode->i_mode |= S_IFREG; + break; + case OCFS_ATTRIB_CHAR: + case OCFS_ATTRIB_BLOCK: + case OCFS_ATTRIB_FIFO: + case OCFS_ATTRIB_SOCKET: + { + kdev_t kdev; + + if (fe->attribs == OCFS_ATTRIB_CHAR) + inode->i_mode |= S_IFCHR; + else if (fe->attribs == OCFS_ATTRIB_BLOCK) + inode->i_mode |= S_IFBLK; + else if (fe->attribs == OCFS_ATTRIB_FIFO) + inode->i_mode |= S_IFIFO; + else if (fe->attribs == OCFS_ATTRIB_SOCKET) + inode->i_mode |= S_IFSOCK; + + inode->i_rdev = NODEV; + kdev = MKDEV (fe->dev_major, fe->dev_minor); + init_special_inode (inode, inode->i_mode, + kdev_t_to_nr (kdev)); + break; + } + default: + LOG_ERROR_STR ("invalid file type!\n"); + inode->i_mode |= S_IFREG; + break; + } + + if (fe->local_ext) { + sb8 tempVbo; + sb8 tempLbo; + ub8 tempSize; + ub4 j; + + /* Add the Extents to extent map */ + for (j = 0; j < fe->next_free_ext; j++) { + tempVbo = fe->extents[j].file_off; + tempLbo = fe->extents[j].disk_off; + tempSize = fe->extents[j].num_clusters; + + ocfs_add_extent_map_entry (osb, &oin->map, + tempVbo, tempLbo, + tempSize); + } + } + } + + pLockRes = oin->lock_res; + + /* ??? we need to the lock resource before updating it */ + if (pLockRes) { + pLockRes->lock_type = DISK_LOCK_FILE_LOCK (fe); + pLockRes->master_node_num = DISK_LOCK_CURRENT_MASTER (fe); + pLockRes->oin_openmap = DISK_LOCK_OIN_MAP (fe); + pLockRes->last_write_time = DISK_LOCK_LAST_WRITE (fe); + pLockRes->last_read_time = DISK_LOCK_LAST_READ (fe); + pLockRes->reader_node_num = DISK_LOCK_READER_NODE (fe); + pLockRes->writer_node_num = DISK_LOCK_WRITER_NODE (fe); + } + + status = 0; + } else { + /* Update for the DIRECTORY */ + } + + leave: + if (status == 0) { + OIN_UPDATED (oin); + } + + if (fe != NULL) { + ocfs_release_file_entry (fe); + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_verify_update_oin */ + +/* + * ocfs_find_contiguous_space_from_bitmap() + * + * This function looks for free space in the volume based on the bitmap. + * It looks for contiguous space only and if it founds the space available + * it returns cluster bitmap offset. Each bit in Cluster bitmap represents + * memory equal to cluster size (specified during format). + * + * TODO: The Bitmap stuff needs to be changed for handling more than 32 bits... + * Although we can go upto 4k(clustersize) * 8 * 4M(max 32 bits for now...) + * + * Returns 0 on success, < 0 on error. + */ +int ocfs_find_contiguous_space_from_bitmap (ocfs_super * osb, + ub8 file_size, + ub8 * cluster_off, ub8 * cluster_count, bool sysfile) +{ + int status = 0, tmpstat; + ub4 size = 0, ByteOffset = 0, ClusterCount = 0; + ub8 ByteCount = 0; + ub4 LargeAlloc = 0; + static ub4 LargeAllocOffset = 0; + static ub4 SmallAllocOffset = 0; + ub8 startOffset = 0; + bool bLockAcquired = false; + ocfs_lock_res *pLockResource; + + LOG_ENTRY (); + + OCFS_ASSERT (osb); + + ocfs_down_sem (&(osb->vol_alloc_lock), true); + + /* Get the allocation lock here */ + status = + ocfs_acquire_lock (osb, OCFS_BITMAP_LOCK_OFFSET, + OCFS_DLM_ENABLE_CACHE_LOCK, 0, &pLockResource, + NULL); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + bLockAcquired = true; + + ByteCount = file_size; + + /* Calculate the size in Bytes */ + size = (ub4) OCFS_SECTOR_ALIGN ((osb->cluster_bitmap.size) / 8); + + startOffset = osb->vol_layout.bitmap_off; + status = + ocfs_read_metadata (osb, osb->cluster_bitmap.buf, size, startOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + /* Round off the byte count to next clustersize (bytes per cluster) */ + ByteCount += (ByteCount % (osb->vol_layout.cluster_size)) ? + (osb->vol_layout.cluster_size - + (ByteCount % (osb->vol_layout.cluster_size))) : 0; + + if (ByteCount == 0) { + LOG_ERROR_STR ("DISK_FULL?: Bytecount==0"); + status = 0; + goto leave; + } + + ClusterCount = + (ub4) ((ub8) ByteCount / (ub8) osb->vol_layout.cluster_size); + + if (sysfile ? (ClusterCount > osb->vol_layout.num_clusters) : + (ClusterCount > (osb->vol_layout.num_clusters - + ((8 * ONE_MEGA_BYTE) / osb->vol_layout.cluster_size)))){ + LOG_ERROR_STR ("Disk Full"); + status = -ENOSPC; + goto leave; + } + + /* This function will check for clear bits in the Bitmap for consecutive */ + /* clear bits equal to ClusterCount */ + + /* If we create a chunk that is larger than 5% of the disksize, then start */ + /* allocation at 5%, so that small files stay in the beginning as much as possible */ + + if (ClusterCount > (osb->vol_layout.num_clusters / 20)) { + LargeAlloc = 1; + LargeAllocOffset = (osb->vol_layout.num_clusters / 20); + } + + + ByteOffset = ocfs_find_clear_bits (&osb->cluster_bitmap, ClusterCount, + LargeAlloc ? LargeAllocOffset : + SmallAllocOffset, sysfile ? 0 : + ((8 * ONE_MEGA_BYTE) / osb->vol_layout.cluster_size)); + + /* if fails we should try again from the beginning of the disk. */ + /* in the end we pass # of bits we want to keep for system file extention only */ + /* right now if we run out of diskspace, we still have 8mb free for a systemfile */ + + if (ByteOffset == -1 && LargeAlloc) { + osb->cluster_bitmap.failed++; + ByteOffset = + ocfs_find_clear_bits (&osb->cluster_bitmap, ClusterCount, 0, + sysfile ? 0 :((8 * ONE_MEGA_BYTE) / osb->vol_layout.cluster_size)); + } + + /* It returns -1 on failure, otherwise ByteOffset points at the */ + /* location inb bitmap from where there are ClusterCount no of bits */ + /* are free. */ + + if (ByteOffset == -1) { + LOG_ERROR_ARGS + ("Cannot allocate contiguous amount of clusters, want %u\n", + ClusterCount); + status = -ENOSPC; + goto leave; + } + +#ifdef SMART_ALLOC + /* FIXME : we try to be smart and keep track of the last offset we were at + * need to add the same in delfile so that we put it lower again + */ + if (LargeAlloc) { + osb->cluster_bitmap.ok_retries++; + LargeAllocOffset = ByteOffset + ClusterCount; + } else { + SmallAllocOffset = ByteOffset + ClusterCount; + } +#endif + + LOG_TRACE_ARGS ("The byte offset is %u\n", ByteOffset); + + ocfs_set_bits (&osb->cluster_bitmap, ByteOffset, ClusterCount); + + startOffset = osb->vol_layout.bitmap_off; + + status = + ocfs_write_metadata (osb, osb->cluster_bitmap.buf, size, startOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + *cluster_off = ByteOffset; + *cluster_count = ClusterCount; + status = 0; + + leave: + ocfs_up_sem (&(osb->vol_alloc_lock)); + if (bLockAcquired) { + tmpstat = + ocfs_release_lock (osb, OCFS_BITMAP_LOCK_OFFSET, + OCFS_DLM_EXCLUSIVE_LOCK, 0, pLockResource); + if (tmpstat < 0) { + status = tmpstat; + LOG_ERROR_STATUS (status); + /* TODO FAIL VOLUME */ + } + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_find_contiguous_space_from_bitmap */ + +/* ocfs_create_oin_from_entry() + * + */ +int +ocfs_create_oin_from_entry (ocfs_super * osb, + ocfs_file_entry * fe, + ocfs_inode ** new_oin, + ub8 parent_dir_off, ocfs_inode * parent_oin) +{ + int status = 0; + ub8 allocSize = 0; + ub8 endofFile = 0; + ocfs_inode *oin; + ub8 lockId; + int j; + sb8 tempVbo; + sb8 tempLbo; + ub8 tempSize; + ocfs_extent_group *buffer = NULL; + ocfs_extent_group *pOcfsExtent; + bool bRet; + + LOG_ENTRY (); + + /* First insert on the sector node tree... */ + + /* Check for state on the disk and notify master */ + *new_oin = NULL; + + /* We have a new file on disk , so create an oin for the file */ + status = ocfs_create_new_oin (&oin, &allocSize, &endofFile, NULL, osb); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + oin->parent_dirnode_off = parent_dir_off; + oin->chng_seq_num = DISK_LOCK_SEQNUM (fe); + oin->alloc_size = fe->alloc_size; + + if (fe->this_sector == 0) + OCFS_BREAKPOINT (); + + /* The oins gets Linked into the osb in this function */ + + if (fe->attribs & OCFS_ATTRIB_DIRECTORY) { + lockId = fe->extents[0].disk_off; + status = ocfs_initialize_oin (oin, osb, OCFS_OIN_DIRECTORY | OCFS_OIN_IN_USE, + NULL, fe->this_sector, lockId); + if (status < 0) { + goto leave; + } + oin->dir_disk_off = fe->extents[0].disk_off; + } else { + status = ocfs_initialize_oin (oin, osb, OCFS_OIN_IN_USE, NULL, + fe->this_sector, fe->this_sector); + if (status < 0) + goto leave; + + if (fe->local_ext) { + for (j = 0; j < fe->next_free_ext; j++) { + tempVbo = fe->extents[j].file_off; + tempLbo = fe->extents[j].disk_off; + tempSize = fe->extents[j].num_clusters; + + /* Add the Extent to extent map */ + if (!ocfs_add_extent_map_entry (osb, &oin->map, + tempVbo, tempLbo, + tempSize)) { + LOG_TRACE_ARGS + ("ocfs_add_extent_map_entry failed for offset (%d.%d), " + "size (%u.%u)\n", HI (tempVbo), + LO (tempVbo), HI (tempSize), + LO (tempSize)); + } + } + } else { + ub4 alloSize; + ub4 length; + + /* Extents are branched and we are no longer using */ + /* Local Extents for this File Entry. */ + alloSize = + (NUM_SECTORS_IN_LEAF_NODE + + fe->granularity) * OCFS_SECTOR_SIZE; + + length = (ub4) OCFS_ALIGN (alloSize, PAGE_SIZE); + + buffer = ocfs_malloc (length); + if (buffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + pOcfsExtent = (ocfs_extent_group *) buffer; + + status = ocfs_get_leaf_extent (osb, fe, 0, pOcfsExtent); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + while (1) { + if (!IS_VALID_EXTENT_DATA (pOcfsExtent)) { + LOG_ERROR_STATUS(status = -EFAIL); + goto leave; + } + + for (j = 0; j < pOcfsExtent->next_free_ext; j++) { + tempVbo = + pOcfsExtent->extents[j].disk_off; + tempLbo = + pOcfsExtent->extents[j].disk_off; + tempSize = + pOcfsExtent->extents[j]. + num_clusters; + + /* Add the Extent to extent map */ + bRet = + ocfs_add_extent_map_entry (osb, + &oin->map, + tempVbo, + tempLbo, + (ub4) + tempSize); + if (!bRet) { + LOG_ERROR_STATUS (status = + -ENOMEM); + return (status); + } + } + + if (pOcfsExtent->next_data_ext > 0) { + OCFS_ASSERT (pOcfsExtent-> + next_data_ext); + + status = + ocfs_read_sector (osb, pOcfsExtent, + pOcfsExtent-> + next_data_ext); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } else + break; + } + } + } + + *new_oin = oin; + leave: + ocfs_safefree (buffer); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_create_oin_from_entry */ + +/* ocfs_find_files_on_disk() + * + */ +int +ocfs_find_files_on_disk (ocfs_super * osb, + ub8 parent_off, + struct qstr * file_name, + ocfs_file_entry * fe, ocfs_file * ofile) +{ + int status = -ENOENT; + ocfs_dir_node *pDirNode = NULL; + ub8 thisDirNode, lockId; + int tmpstat; + bool bRet, bLockAcquired = false; + ocfs_lock_res *pLockResource; + bool bReadDirNode = true; + + LOG_ENTRY_ARGS ("osb=%p, parent=%u.%u, fname=%p, entry=%p, ofile=%p\n", + osb, parent_off, file_name, fe, ofile); + + lockId = parent_off; + + /* Get a shared lock on the directory... */ + tmpstat = + ocfs_acquire_lock (osb, lockId, OCFS_DLM_SHARED_LOCK, FLAG_DIR, + &pLockResource, NULL); + if (tmpstat < 0) { + /* Volume should be disabled in this case */ + status = tmpstat; + goto leave; + } + bLockAcquired = true; + + if ((ofile == NULL) + || ((ofile != NULL) && (ofile->curr_dir_buf == NULL))) { + pDirNode = ocfs_malloc (osb->vol_layout.dir_node_size); + if (pDirNode == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto leave; + } + + memset (pDirNode, 0, osb->vol_layout.dir_node_size); + + if (ofile != NULL) { + ofile->curr_dir_buf = pDirNode; + } + } else { + pDirNode = ofile->curr_dir_buf; + } + + if ((ofile != NULL) && (ofile->curr_dir_off > 0)) { + thisDirNode = ofile->curr_dir_off; + if (pDirNode->node_disk_off == thisDirNode) { + bReadDirNode = false; + } + } else { + thisDirNode = parent_off; + } + + if (bReadDirNode) { + tmpstat = ocfs_read_dir_node (osb, pDirNode, thisDirNode); + if (tmpstat < 0) { + /* Volume should be disabled in this case */ + status = tmpstat; + goto leave; + } + } + + /* if file_name is null here, it means that we want to walk the */ + /* directory for all files if it is not null, it means we want */ + /* a particular file */ + if (file_name == NULL) { + bRet = ocfs_walk_dir_node (osb, pDirNode, fe, ofile); + if (bRet) + status = 0; + } else { + bRet = ocfs_search_dir_node (osb, pDirNode, file_name, fe, ofile); + if (bRet) + status = 0; + } + + if (status >= 0 && (fe->attribs & OCFS_ATTRIB_FILE_CDSL)) { + /* Return the relevant CDSL for this node */ + ocfs_find_create_cdsl (osb, fe); + } + + leave: + if (bLockAcquired) { + if (pLockResource) // WIM + { + tmpstat = + ocfs_release_lock (osb, lockId, OCFS_DLM_SHARED_LOCK, + FLAG_DIR, pLockResource); + if (tmpstat < 0) { + // status = tmpstat ? + /* Volume should be disabled in this case */ + } + } + } + + if (ofile == NULL) { + ocfs_safefree (pDirNode); + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_find_files_on_disk */ + +/* ocfs_initialize_dir_node() + * + */ +void +ocfs_initialize_dir_node (ocfs_super * osb, + ocfs_dir_node * dir_node, + ub8 bitmap_off, ub8 file_off, ub4 node) +{ + LOG_ENTRY (); + + memset (dir_node, 0, sizeof (ocfs_dir_node)); + strcpy (dir_node->signature, OCFS_DIR_NODE_SIGNATURE); + + dir_node->num_ents = (ub1) osb->max_dir_node_ent; + dir_node->node_disk_off = bitmap_off; + dir_node->alloc_file_off = file_off; + dir_node->alloc_node = node; + + DISK_LOCK_CURRENT_MASTER (dir_node) = OCFS_INVALID_NODE_NUM; + + dir_node->free_node_ptr = INVALID_NODE_POINTER; + dir_node->next_node_ptr = INVALID_NODE_POINTER; + dir_node->indx_node_ptr = INVALID_NODE_POINTER; + dir_node->next_del_ent_node = INVALID_NODE_POINTER; + dir_node->head_del_ent_node = INVALID_NODE_POINTER; + + dir_node->first_del = INVALID_DIR_NODE_INDEX; + dir_node->index_dirty = 0; + + LOG_EXIT (); + return; +} /* ocfs_initialize_dir_node */ + +/* ocfs_delete_file_entry() + * + */ +int +ocfs_delete_file_entry (ocfs_super * osb, + ocfs_file_entry * fe, ub8 parent_off, sb4 log_node_num) +{ + int status = 0; + ub4 size; + ocfs_cleanup_record *pCleanupLogRec = NULL; + + LOG_ENTRY (); + + size = sizeof (ocfs_cleanup_record); + size = (ub4) OCFS_ALIGN (size, PAGE_SIZE); + + pCleanupLogRec = ocfs_malloc (size); + if (pCleanupLogRec == NULL) { + status = -ENOMEM; + goto leave; + } + + /* Now start writing the cleanup log of the filentry master. */ + /* It is this node for normal cases and or the node we are doing */ + /* recovery for. */ + + pCleanupLogRec->log_id = osb->curr_trans_id; + pCleanupLogRec->log_type = LOG_DELETE_ENTRY; + + pCleanupLogRec->rec.del.node_num = log_node_num; + pCleanupLogRec->rec.del.ent_del = fe->this_sector; + pCleanupLogRec->rec.del.parent_dirnode_off = parent_off; + pCleanupLogRec->rec.del.flags = 0; + + status = ocfs_write_node_log (osb, (ocfs_log_record *) pCleanupLogRec, + log_node_num, LOG_CLEANUP); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + if (fe->link_cnt) { + /* Decrement Link count when implementing links... TODO*/ + OCFS_SET_FLAG (fe->sync_flags, OCFS_SYNC_FLAG_NAME_DELETED); + fe->sync_flags &= (~OCFS_SYNC_FLAG_VALID); + } else { + OCFS_SET_FLAG (fe->sync_flags, OCFS_SYNC_FLAG_MARK_FOR_DELETION); + fe->sync_flags &= (~OCFS_SYNC_FLAG_VALID); + } + + status = ocfs_write_file_entry (osb, fe, fe->this_sector); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + /* Free up all the bits in the bitmap. */ + if (fe->attribs & OCFS_ATTRIB_DIRECTORY) { + /* Write to the cleanup record which bits need to be freed for */ + /* the ocfs_dir_node */ + + /* Iterate through all the dir nodes for this directory and free them */ + + /* TODO Free the index nodes too. */ + + status = ocfs_free_directory_block (osb, fe, log_node_num); + goto leave; + } else { + /* Write to the cleanup record which bits need to be freed for */ + /* the cluster bitmap */ + status = ocfs_free_file_extents (osb, fe, log_node_num); + goto leave; + } + + leave: + ocfs_safefree (pCleanupLogRec); + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_delete_file_entry */ + +/* ocfs_rename_file() + * + */ +int +ocfs_rename_file (ocfs_super * osb, + ub8 parent_off, struct qstr * file_name, ub8 file_off) +{ + int status = 0; + ocfs_dir_node *pLockNode = NULL; + ocfs_file_entry *fe = NULL; + ub8 changeSeqNum = 0; + bool bAcquiredLock = false; + ub4 lockFlags = 0; + ocfs_lock_res *pLockResource; + ub8 lockId = 0; + bool bParentLockAcquired = false; + ub4 parentLockFlags; + ocfs_lock_res *pParentLockResource; + ub8 parentLockId; + ub4 index; + + LOG_ENTRY (); + + parentLockId = parent_off; + parentLockFlags = (FLAG_FILE_CREATE | FLAG_DIR); + status = ocfs_acquire_lock (osb, parentLockId, OCFS_DLM_EXCLUSIVE_LOCK, + (ub4) parentLockFlags, &pParentLockResource, + NULL); // never used, so why not pass NULL? + // (ocfs_file_entry *)pLockNode); + if (status < 0) { + LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", status, + HI (parentLockId), LO (parentLockId)); + goto leave; + } + + bParentLockAcquired = true; + + status = ocfs_get_file_entry (osb, &fe, file_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + if (fe->attribs & OCFS_ATTRIB_DIRECTORY) { + lockId = fe->extents[0].disk_off; + lockFlags = (FLAG_DIR | FLAG_FILE_RENAME); + } else { + lockId = fe->this_sector; + lockFlags = FLAG_FILE_RENAME; + } + + status = + ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, lockFlags, + &pLockResource, NULL); + if (status < 0) { + LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", status, + HI (lockId), LO (lockId)); + goto leave; + } + + bAcquiredLock = true; + + /* Change the name and write it back.... */ + fe->filename[0] = '\0'; + strncpy (fe->filename, file_name->name, file_name->len); + fe->filename[file_name->len] = '\0'; + + DISK_LOCK_SEQNUM (fe) = changeSeqNum; + + /* Set the Valid bit here */ + SET_VALID_BIT (fe->sync_flags); + fe->sync_flags &= ~(OCFS_SYNC_FLAG_CHANGE); + + status = + ocfs_get_file_entry (osb, (ocfs_file_entry **) & pLockNode, + fe->dir_node_ptr); + pLockNode->index_dirty = 1; + pLockNode->bad_off = (fe->this_sector - + fe->dir_node_ptr) / osb->sect_size; + pLockNode->bad_off -= 1; + + for (index = 0; index < pLockNode->num_ent_used; index++) { + if (pLockNode->index[index] == pLockNode->bad_off) { + break; + } + } + + if (index < pLockNode->num_ent_used) { + memmove (&pLockNode->index[index], &pLockNode->index[index + 1], + pLockNode->num_ent_used - (index + 1)); + pLockNode->index[pLockNode->num_ent_used - 1] = + pLockNode->bad_off; + + status = + ocfs_write_file_entry (osb, (ocfs_file_entry *) pLockNode, + fe->dir_node_ptr); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } + + status = ocfs_write_file_entry (osb, fe, fe->this_sector); + if (status < 0) { + LOG_TRACE_STR ("ocfs_write_file_entry failed!!!"); + goto leave; + } + + LOG_TRACE_ARGS ("Wrote to sector %u.%u, name written was \"%s\"\n", + HI (fe->this_sector), LO (fe->this_sector), + fe->filename); + + if (DISK_LOCK_FILE_LOCK (pLockNode) < OCFS_DLM_ENABLE_CACHE_LOCK) { + /* Update the disk as the other node will not see this file directory */ + status = ocfs_force_put_file_entry (osb, fe, true); + if (status < 0) { + goto leave; + } + } + + if (index < pLockNode->num_ent_used) { + LOG_TRACE_STR ("ocfs_reindex_dir_node being called"); + status = ocfs_reindex_dir_node (osb, fe->dir_node_ptr, NULL); + } + + leave: + + /* Release the file lock if we acquired it */ + if (bAcquiredLock) { + status = + ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + lockFlags, pLockResource); + if (status < 0) { + LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", + status, HI (lockId), LO (lockId)); + } + } + + /* Release the file lock if we acquired it */ + if (bParentLockAcquired) { + status = + ocfs_release_lock (osb, parentLockId, OCFS_DLM_EXCLUSIVE_LOCK, + parentLockFlags, pParentLockResource); + if (status < 0) { + LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", + status, HI (parentLockId), + LO (parentLockId)); + } + } + + /* free up fileentry */ + if (fe) { + ocfs_release_file_entry (fe); + } + + if (pLockNode) { + ocfs_release_file_entry ((ocfs_file_entry *) pLockNode); + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_rename_file */ + +/* ocfs_del_file() + * + */ +int +ocfs_del_file (ocfs_super * osb, ub8 parent_off, ub4 flags, ub8 file_off) +{ + int status = 0; + ocfs_file_entry *fe = NULL; + ub4 size = 0; + ocfs_dir_node *pLockNode = NULL; + ub4 lockFlags; + bool bAcquiredLock = false; + ocfs_lock_res *pLockResource; + ocfs_cleanup_record *pCleanupLogRec = NULL; + ocfs_log_record *pOcfsLogRec; + ub8 lockId; + sb4 log_node_num = -1; + + LOG_ENTRY (); + + fe = ocfs_allocate_file_entry (); + if (fe == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto leave; + } + + status = ocfs_read_file_entry (osb, fe, file_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + if (fe->attribs & OCFS_ATTRIB_DIRECTORY) { + lockId = fe->extents[0].disk_off; + lockFlags = (FLAG_FILE_DELETE | FLAG_DIR); + pLockNode = (ocfs_dir_node *) ocfs_allocate_file_entry (); + if (pLockNode == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + } else { + lockId = fe->this_sector; + lockFlags = (FLAG_FILE_DELETE); + (ocfs_file_entry *) pLockNode = fe; + } + status = + ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, lockFlags, + &pLockResource, (ocfs_file_entry *) pLockNode); + if (status < 0) { + LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", status, + HI (lockId), LO (lockId)); + goto leave; + } + + bAcquiredLock = true; + + size = sizeof (ocfs_cleanup_record); + size = (ub4) OCFS_ALIGN (size, PAGE_SIZE); + + pCleanupLogRec = ocfs_malloc (size); + if (pCleanupLogRec == NULL) { + status = -ENOMEM; + goto leave; + } + + log_node_num = osb->node_num; + + if (flags & FLAG_DEL_NAME) { + /* Now start writing the cleanup log of the filentry master. */ + /* It is this node for normal cases and or the node we are doing */ + /* recovery for. */ + pCleanupLogRec->log_id = osb->curr_trans_id; + pCleanupLogRec->log_type = LOG_DELETE_ENTRY; + + pCleanupLogRec->rec.del.node_num = log_node_num; + pCleanupLogRec->rec.del.ent_del = fe->this_sector; + pCleanupLogRec->rec.del.parent_dirnode_off = parent_off; + pCleanupLogRec->rec.del.flags = 0; + + status = + ocfs_write_node_log (osb, (ocfs_log_record *) pCleanupLogRec, + log_node_num, LOG_CLEANUP); + if (status < 0) { + goto leave; + } + status = 0; + goto leave; + } + + /* Ask for a lock on the file to ensure there are no open oin's */ + /* on the file on any node */ + if (fe->attribs & OCFS_ATTRIB_DIRECTORY) { + if ((pLockNode->num_ent_used > 0) && !(flags & FLAG_DEL_NAME)) { + status = -ENOTEMPTY; + goto leave; + } + } + + pOcfsLogRec = (ocfs_log_record *) pCleanupLogRec; + + pOcfsLogRec->log_id = osb->curr_trans_id; + pOcfsLogRec->log_type = LOG_MARK_DELETE_ENTRY; + + pOcfsLogRec->rec.del.node_num = log_node_num; + pOcfsLogRec->rec.del.ent_del = fe->this_sector; + pOcfsLogRec->rec.del.parent_dirnode_off = parent_off; + + if (flags & FLAG_RESET_VALID) { + pOcfsLogRec->rec.del.flags = FLAG_RESET_VALID; + } else { + pOcfsLogRec->rec.del.flags = 0; + } + + status = ocfs_write_log (osb, pOcfsLogRec, LOG_RECOVER); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + status = ocfs_delete_file_entry (osb, fe, parent_off, log_node_num); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + /* free up fileentry */ + + leave: + if ((fe != (ocfs_file_entry *) pLockNode) && (pLockNode)) { + ocfs_release_file_entry ((ocfs_file_entry *) pLockNode); + } + + if (fe) { + ocfs_release_file_entry (fe); + } + + ocfs_safefree (pCleanupLogRec); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_del_file */ + +/* ocfs_extend_file() + * + */ +int ocfs_extend_file (ocfs_super * osb, ub8 parent_off, + ocfs_inode * oin, ub8 file_size, ub8 * file_off) +{ + int status = 0; + int tmpstat; + ocfs_dir_node *pLockNode = NULL; + ocfs_file_entry *fileEntry = NULL; + ub8 tempOffset = 0; + ub8 allocSize = 0; + ub4 size; + ub8 bitmapOffset = 0; + ub8 numClustersAlloc = 0; + ub8 lockId = 0; + ub4 lockFlags = 0; + bool bFileLockAcquired = false; + bool bAcquiredLock = false; + ocfs_lock_res *pLockResource; + ub8 changeSeqNum = 0; + ub8 actualDiskOffset = 0; + ub8 actualLength = 0; + bool bCacheLock = false; + + LOG_ENTRY (); + + if (file_size == 0) { + goto leave; + } + + LOG_TRACE_STR ("Extending File"); + + /* allocate memory for fileentry */ + fileEntry = ocfs_allocate_file_entry (); + if (fileEntry == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto leave; + } + + status = ocfs_read_file_entry (osb, fileEntry, *file_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + /* Grab a lock on the entry found if we have more than 1 extents and */ + /* also make this node the master*/ + /* Acquire the Lock using ipc and disk based locking */ + + /* now we always take an EXTEND lock */ + lockId = fileEntry->this_sector; + lockFlags = FLAG_FILE_EXTEND; + bFileLockAcquired = true; + (ocfs_file_entry *) pLockNode = fileEntry; + + if ((DISK_LOCK_FILE_LOCK (fileEntry) == OCFS_DLM_ENABLE_CACHE_LOCK) && + (DISK_LOCK_CURRENT_MASTER (fileEntry) == osb->node_num)) { + bCacheLock = true; + } + + if (bCacheLock) + status = + ocfs_acquire_lock (osb, lockId, OCFS_DLM_ENABLE_CACHE_LOCK, + lockFlags, &pLockResource, + (ocfs_file_entry *) pLockNode); + else + status = + ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + lockFlags, &pLockResource, + (ocfs_file_entry *) pLockNode); + if (status < 0) { + LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", status, + HI (lockId), LO (lockId)); + goto leave; + } + + bAcquiredLock = true; + + if (bCacheLock) { + DISK_LOCK_FILE_LOCK (fileEntry) = OCFS_DLM_ENABLE_CACHE_LOCK; + DISK_LOCK_CURRENT_MASTER (fileEntry) = osb->node_num; + } + + if (file_size > (sb8) fileEntry->alloc_size) { + allocSize = file_size - fileEntry->alloc_size; + + /* TODO: We can add something here so that after 2-3 allocations, */ + /* we give a lot more disk space to the file than the allocSize so */ + /* in order to try to use the Extents of File Entry only and ofcourse */ + /* the file will have more contigous disk space. */ + { + ub8 tempSize = fileEntry->alloc_size; + + if (tempSize > ONE_MEGA_BYTE) + tempSize = ONE_MEGA_BYTE; + allocSize += (tempSize * 2); + } + + status = + ocfs_find_contiguous_space_from_bitmap (osb, allocSize, + &bitmapOffset, + &numClustersAlloc, false); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + actualDiskOffset = + (bitmapOffset * osb->vol_layout.cluster_size) + + osb->vol_layout.data_start_off; + actualLength = + (ub8) (numClustersAlloc * osb->vol_layout.cluster_size); + + /* note: ok if oin is null here, not used in ocfs_allocate_extent */ + status = + ocfs_allocate_extent (osb, oin, fileEntry, actualDiskOffset, + actualLength); + if (status < 0) { + /* Error ....or may be disk full */ + goto leave; + } + + /* update the total allocation size here */ + fileEntry->alloc_size += + (numClustersAlloc * osb->vol_layout.cluster_size); + if (oin) { + ocfs_down_sem (&(oin->main_res), true); + oin->alloc_size = fileEntry->alloc_size; + ocfs_up_sem (&(oin->main_res)); + } + /* no need to do OCFS_SECTOR_ALIGN once the allocation size is correct. */ + DISK_LOCK_SEQNUM (fileEntry) = changeSeqNum; + } + + /* Update tha file size and add the new one to old one. */ + fileEntry->file_size = file_size; + + /* Set the Valid bit and reset the change bit here... TODO */ + SET_VALID_BIT (fileEntry->sync_flags); + fileEntry->sync_flags &= ~(OCFS_SYNC_FLAG_CHANGE); + + fileEntry->modify_time = CURRENT_TIME; + + tempOffset = fileEntry->this_sector; + size = (ub4) OCFS_ALIGN (sizeof (ocfs_file_entry), osb->sect_size); + + status = + ocfs_write_file_entry (osb, (ocfs_file_entry *) fileEntry, tempOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + /* Update all open oins */ + + /* Our local update is done, if somebody had asked for a bdcast lock */ + /* He shd set the state */ + + leave: + /* Release the file lock if we acquired it */ + if (bAcquiredLock) { + + if (bFileLockAcquired) { + lockFlags |= FLAG_FILE_UPDATE_OIN; + } + + tmpstat = + ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + lockFlags, pLockResource); + if (tmpstat < 0) { + status = tmpstat; + LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", + status, HI (fileEntry->this_sector), + LO (fileEntry->this_sector)); + } + } + + /* free up fileentry */ + if (fileEntry) { + ocfs_release_file_entry (fileEntry); + fileEntry = NULL; + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_extend_file */ + +/* ocfs_change_file_size() + * + */ +int ocfs_change_file_size (ocfs_super * osb, + ub8 parent_off, + ocfs_inode * oin, + ub8 file_size, ub8 * file_off, struct iattr *attr) +{ + int status = 0; + int tmpstat; + ocfs_dir_node *pLockNode = NULL; + ocfs_file_entry *fileEntry = NULL; + ub8 dirOffset = 0; + ub4 size; + bool bFileLockAcquired = false; + bool bAcquiredLock = false; + ocfs_lock_res *pLockResource; + ub8 changeSeqNum = 0; + ub8 lockId = 0; + ub4 lockFlags = 0; + bool bCacheLock = false; + + LOG_ENTRY (); + + fileEntry = ocfs_allocate_file_entry (); + if (fileEntry == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto leave; + } + + status = ocfs_read_file_entry (osb, fileEntry, *file_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + /* Acquire the Lock using TCP/IP and disk based locking */ + if ((DISK_LOCK_FILE_LOCK (fileEntry) == OCFS_DLM_ENABLE_CACHE_LOCK) && + (DISK_LOCK_CURRENT_MASTER (fileEntry) == osb->node_num)) { + bCacheLock = true; + } + + /* now we always take an UPDATE lock */ + lockId = fileEntry->this_sector; + lockFlags = FLAG_FILE_UPDATE; + bFileLockAcquired = true; + (ocfs_file_entry *) pLockNode = fileEntry; + + if (bCacheLock) + status = + ocfs_acquire_lock (osb, lockId, OCFS_DLM_ENABLE_CACHE_LOCK, + lockFlags, &pLockResource, + (ocfs_file_entry *) pLockNode); + else + status = + ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + lockFlags, &pLockResource, + (ocfs_file_entry *) pLockNode); + if (status < 0) { + LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", status, + HI (fileEntry->this_sector), + LO (fileEntry->this_sector)); + goto leave; + } + + bAcquiredLock = true; + + if (bCacheLock) { + DISK_LOCK_FILE_LOCK (fileEntry) = OCFS_DLM_ENABLE_CACHE_LOCK; + DISK_LOCK_CURRENT_MASTER (fileEntry) = osb->node_num; + } + + DISK_LOCK_SEQNUM (fileEntry) = changeSeqNum; + if (attr->ia_valid & ATTR_SIZE) + fileEntry->file_size = attr->ia_size; + if (attr->ia_valid & ATTR_UID) + fileEntry->uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + fileEntry->gid = attr->ia_gid; + if (attr->ia_valid & ATTR_MODE) + fileEntry->prot_bits = attr->ia_mode & 0007777; + + /* Set the valid bit here */ + SET_VALID_BIT (fileEntry->sync_flags); + fileEntry->sync_flags &= ~(OCFS_SYNC_FLAG_CHANGE); + + dirOffset = fileEntry->this_sector; + + fileEntry->modify_time = CURRENT_TIME; + + size = (ub4) OCFS_SECTOR_ALIGN (sizeof (ocfs_file_entry)); + status = + ocfs_write_file_entry (osb, (ocfs_file_entry *) fileEntry, dirOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + leave: + /* Release the file lock if we acquired it */ + if (bAcquiredLock) { + if (bFileLockAcquired) { + lockFlags |= FLAG_FILE_UPDATE_OIN; + } + + tmpstat = + ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + lockFlags, pLockResource); + if (tmpstat < 0) { + status = tmpstat; + LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", + status, HI (fileEntry->this_sector), + LO (fileEntry->this_sector)); + } + } + + /* free up fileentry */ + if (fileEntry) { + ocfs_release_file_entry (fileEntry); + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_change_file_size */ + +/* ocfs_create_directory() + * + */ +int ocfs_create_directory (ocfs_super * osb, ub8 parent_off, ocfs_file_entry * fe) +{ + int status = 0; + ocfs_file_entry *fileEntry = NULL; + ocfs_dir_node *PDirNode = NULL; + ocfs_dir_node *PNewDirNode = NULL; + ocfs_dir_node *pLockNode = NULL; + ub4 size; + ub8 allocSize = 0; + ub8 bitmapOffset; + ub8 numClustersAlloc = 0; + ub8 fileOffset = 0; + ub8 lockId = 0; + ocfs_lock_res *pLockResource; + ub4 lockFlags = 0; + bool bAcquiredLock = false; + + LOG_ENTRY (); + + fileEntry = fe; + + pLockNode = (ocfs_dir_node *) ocfs_allocate_file_entry (); + if (pLockNode == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto leave; + } + + lockId = parent_off; + lockFlags = FLAG_FILE_CREATE | FLAG_DIR; + + /* acquire the lock */ + status = + ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, lockFlags, + &pLockResource, (ocfs_file_entry *) pLockNode); + if (status < 0) { + LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", status, + HI (lockId), LO (lockId)); + goto leave; + } + + bAcquiredLock = true; + + /* Zero out the entry for the file and rewrite it back to the disk */ + /* Also, the other nodes should update their cache bitmap for file */ + /* ent to mark this one as free now. */ + + allocSize = osb->vol_layout.dir_node_size; + + status = ocfs_alloc_node_block (osb, allocSize, &bitmapOffset, &fileOffset, + &numClustersAlloc, osb->node_num, + DISK_ALLOC_DIR_NODE); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + /* update the total allocation size here */ + fileEntry->alloc_size = osb->vol_layout.dir_node_size; + fileEntry->extents[0].disk_off = bitmapOffset; + fileEntry->file_size = osb->vol_layout.dir_node_size; + fileEntry->next_del = INVALID_DIR_NODE_INDEX; + + if (DISK_LOCK_FILE_LOCK (pLockNode) != OCFS_DLM_ENABLE_CACHE_LOCK) + DISK_LOCK_FILE_LOCK (fileEntry) = OCFS_DLM_NO_LOCK; + + size = (ub4) (osb->vol_layout.dir_node_size); + PDirNode = ocfs_malloc (size); + if (PDirNode == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto leave; + } + + PNewDirNode = PDirNode; + size = (ub4) (osb->vol_layout.dir_node_size); + memset (PNewDirNode, 0, size); + + ocfs_initialize_dir_node (osb, PNewDirNode, bitmapOffset, fileOffset, + osb->node_num); + + DISK_LOCK_CURRENT_MASTER (PNewDirNode) = osb->node_num; + DISK_LOCK_FILE_LOCK (PNewDirNode) = OCFS_DLM_ENABLE_CACHE_LOCK; + PNewDirNode->dir_node_flags |= DIR_NODE_FLAG_ROOT; + + status = + ocfs_write_metadata (osb, PNewDirNode, osb->vol_layout.dir_node_size, + PNewDirNode->node_disk_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + // do we need to keep this??? + status = ocfs_write_dir_node (osb, PNewDirNode, -1); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + if (pLockResource->lock_type != OCFS_DLM_ENABLE_CACHE_LOCK) + ocfs_write_force_dir_node (osb, PNewDirNode, -1); + + /* Lock node is read in as part of acquire lock */ + if (pLockNode->free_node_ptr == -1) { + status = ocfs_read_dir_node (osb, PDirNode, parent_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } else { + /* Goto the deleted tail or the free node pointer */ + if (pLockNode->next_del_ent_node == -1) { + status = + ocfs_read_dir_node (osb, PDirNode, + pLockNode->free_node_ptr); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } else { + status = ocfs_read_dir_node (osb, PDirNode, + pLockNode->next_del_ent_node); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } + } + + if (DISK_LOCK_FILE_LOCK (pLockNode) != OCFS_DLM_ENABLE_CACHE_LOCK) + DISK_LOCK_FILE_LOCK (fileEntry) = OCFS_DLM_NO_LOCK; + + OcfsQuerySystemTime (&DISK_LOCK_LAST_WRITE (fileEntry)); + OcfsQuerySystemTime (&DISK_LOCK_LAST_WRITE (fileEntry)); + + DISK_LOCK_WRITER_NODE (fileEntry) = osb->node_num; + DISK_LOCK_READER_NODE (fileEntry) = osb->node_num; + + status = + ocfs_insert_file (osb, PDirNode, fileEntry, pLockNode, pLockResource); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + bAcquiredLock = false; + + leave: + if (bAcquiredLock) { + status = + ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + lockFlags, pLockResource); + if (status < 0) { + LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", + status, HI (lockId), LO (lockId)); + } + } + ocfs_safefree (PDirNode); + ocfs_release_file_entry ((ocfs_file_entry *) pLockNode); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_create_directory */ + +/* ocfs_create_file() + * + */ +int ocfs_create_file (ocfs_super * osb, ub8 parent_off, ocfs_file_entry * fe) +{ + int status = 0; + ocfs_file_entry *fileEntry = NULL; + ocfs_dir_node *PDirNode = NULL; + ocfs_dir_node *pLockNode = NULL; + ub4 size; + ub8 lockId = 0; + ocfs_lock_res *pLockResource; + ub4 lockFlags = 0; + bool bAcquiredLock = false; + + LOG_ENTRY (); + + /* Zero out the entry for the file and rewrite it back to the disk */ + /* Also, the other nodes should update their cache bitmap for file */ + /* ent to mark this one as free now. */ + pLockNode = (ocfs_dir_node *) ocfs_allocate_file_entry (); + if (pLockNode == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto leave; + } + + lockId = parent_off; + lockFlags = FLAG_FILE_CREATE | FLAG_DIR; + + /* acquire the lock */ + status = + ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, lockFlags, + &pLockResource, (ocfs_file_entry *) pLockNode); + if (status < 0) { + LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", status, + HI (lockId), LO (lockId)); + goto leave; + } + + bAcquiredLock = true; + + /* Change the name and write it back... */ + fileEntry = fe; + + size = (ub4) (osb->vol_layout.dir_node_size); + PDirNode = ocfs_malloc (size); + if (PDirNode == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto leave; + } + + if (pLockNode->free_node_ptr == -1) { + status = ocfs_read_dir_node (osb, PDirNode, parent_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } else { + /* Goto the deleted tail or the free node pointer */ + if (pLockNode->next_del_ent_node == -1) { + status = + ocfs_read_dir_node (osb, PDirNode, + pLockNode->free_node_ptr); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } else { + status = ocfs_read_dir_node (osb, PDirNode, + pLockNode->next_del_ent_node); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } + } + OcfsQuerySystemTime (&DISK_LOCK_LAST_WRITE (fileEntry)); + OcfsQuerySystemTime (&DISK_LOCK_LAST_WRITE (fileEntry)); + + DISK_LOCK_WRITER_NODE (fileEntry) = osb->node_num; + DISK_LOCK_READER_NODE (fileEntry) = osb->node_num; + + status = + ocfs_insert_file (osb, PDirNode, fileEntry, pLockNode, pLockResource); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + bAcquiredLock = false; + + leave: + if (bAcquiredLock) { + status = + ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + lockFlags, pLockResource); + if (status < 0) { + LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", + status, HI (lockId), LO (lockId)); + } + } + ocfs_safefree (PDirNode); + ocfs_release_file_entry ((ocfs_file_entry *) pLockNode); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_create_file */ + +/* + * ocfs_create_modify_file() + * + * Looks up for the existence of the filename in the oins in memory + * and the entries on the disk. + */ +int +ocfs_create_modify_file (ocfs_super * osb, + ub8 parent_off, + ocfs_inode * oin, + struct qstr * file_name, + ub8 file_size, + ub8 * file_off, ub4 flags, ocfs_file_entry * fe, struct iattr *attr) +{ + int status = 0; + int tmpstat = 0; + ocfs_file_entry *newfe = NULL; + ub8 changeSeqNum = 0; + ub8 t; + + LOG_ENTRY (); + + ocfs_start_trans (osb); + + changeSeqNum = osb->curr_trans_id; + switch (flags) { + case FLAG_FILE_EXTEND: + status = ocfs_extend_file (osb, parent_off, oin, file_size, file_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + break; + + case FLAG_FILE_DELETE: + status = + ocfs_del_file (osb, parent_off, 0, *file_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + break; + + case FLAG_FILE_CREATE_DIR: + if (fe == NULL) { + newfe = ocfs_allocate_file_entry (); + if (newfe == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto leave; + } + } else { + newfe = fe; + } + + /* Change the name and write it back... */ + strncpy (newfe->filename, file_name->name, file_name->len); + newfe->filename[file_name->len]='\0'; + newfe->filename_len = file_name->len; + + /* Set the valid bit here */ + SET_VALID_BIT (newfe->sync_flags); + newfe->sync_flags &= ~(OCFS_SYNC_FLAG_CHANGE); + newfe->attribs |= (OCFS_ATTRIB_DIRECTORY); + + DISK_LOCK_SEQNUM (newfe) = changeSeqNum; + + /* Initialize the lock state */ + DISK_LOCK_CURRENT_MASTER (newfe) = osb->node_num; + DISK_LOCK_FILE_LOCK (newfe) = OCFS_DLM_ENABLE_CACHE_LOCK; + DISK_LOCK_READER_NODE (newfe) = osb->node_num; + DISK_LOCK_WRITER_NODE (newfe) = osb->node_num; + OcfsQuerySystemTime(&t); + DISK_LOCK_LAST_WRITE(newfe) = t; + DISK_LOCK_LAST_READ(newfe) = t; + + newfe->create_time = newfe->modify_time = CURRENT_TIME; + + status = ocfs_create_directory (osb, parent_off, newfe); + if (status >= 0) + *file_off = newfe->this_sector; + break; + + case FLAG_FILE_CREATE: + if (fe == NULL) { + newfe = ocfs_allocate_file_entry (); + if (newfe == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto leave; + } + } else { + newfe = fe; + } + + strncpy (newfe->filename, file_name->name, file_name->len); + newfe->filename[file_name->len]='\0'; + newfe->filename_len = file_name->len; + + /* Set the flag to use the local extents */ + newfe->local_ext = true; + newfe->granularity = -1; + newfe->next_free_ext = 0; + newfe->last_ext_ptr = 0; + + strcpy (newfe->signature, OCFS_FILE_ENTRY_SIGNATURE); + + /* Set the valid bit here */ + SET_VALID_BIT (newfe->sync_flags); + newfe->sync_flags &= ~(OCFS_SYNC_FLAG_CHANGE); + + /* Initialize the lock state */ + DISK_LOCK_SEQNUM (newfe) = changeSeqNum; + + DISK_LOCK_CURRENT_MASTER (newfe) = osb->node_num; + DISK_LOCK_FILE_LOCK (newfe) = OCFS_DLM_ENABLE_CACHE_LOCK; + DISK_LOCK_READER_NODE (newfe) = osb->node_num; + DISK_LOCK_WRITER_NODE (newfe) = osb->node_num; + OcfsQuerySystemTime(&t); + DISK_LOCK_LAST_WRITE(newfe) = t; + DISK_LOCK_LAST_READ(newfe) = t; + + newfe->create_time = newfe->modify_time = CURRENT_TIME; + + status = ocfs_create_file (osb, parent_off, newfe); + if (status >= 0) + *file_off = newfe->this_sector; + break; + + case FLAG_FILE_DELETE_CDSL: + status = ocfs_delete_cdsl (osb, parent_off, fe); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + break; + + case FLAG_FILE_CREATE_CDSL: + status = ocfs_create_cdsl (osb, parent_off, fe); + break; + + case FLAG_FILE_CHANGE_TO_CDSL: + status = ocfs_change_to_cdsl (osb, parent_off, fe); + break; + + case FLAG_FILE_TRUNCATE: + status = ocfs_truncate_file (osb, oin, file_size); + break; + + case FLAG_FILE_UPDATE: + status = ocfs_change_file_size (osb, parent_off, oin, + file_size, file_off, attr); + break; + + default: + break; + } + + ocfs_commit_trans (osb, osb->curr_trans_id); + + leave: + if (status < 0) { + tmpstat = ocfs_abort_trans (osb, osb->curr_trans_id); + if (tmpstat < 0) { + /* VOL DISABLE TODO */ + status = tmpstat; + } + } + + osb->trans_in_progress = false; + + if ((newfe != fe) && (newfe)) { + ocfs_release_file_entry (newfe); + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_create_modify_file */ + + +/* + * ocfs_initialize_oin() + * + * Initialize a oin structure and file object. This function is called + * whenever a file is recognized for the first time. + */ +int +ocfs_initialize_oin (ocfs_inode * oin, + ocfs_super * osb, + ub4 flags, struct file *file_obj, ub8 file_off, ub8 lock_id) +{ + int status = 0; + + LOG_ENTRY (); + + if (!(flags & OCFS_OIN_ROOT_DIRECTORY)) { + status = ocfs_create_update_lock (osb, oin, lock_id, flags); + if (status < 0) { + /* This can be okay as the other node can tell us the */ + /* file was deleted. */ + goto leave; + } + } + + oin->dir_disk_off = 0; + oin->osb = osb; + INIT_LIST_HEAD (&(oin->next_ofile)); + oin->oin_flags |= flags; + oin->ref_cnt = 0; + oin->open_hndl_cnt = 0; + oin->file_disk_off = file_off; + ocfs_extent_map_init (&oin->map); + + leave: + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_initialize_oin */ + + +/* + * ocfs_create_delete_cdsl() + * + */ +int ocfs_create_delete_cdsl (struct inode *inode, + struct file *filp, ocfs_super * osb, ocfs_cdsl * cdsl) +{ + int status = 0; + struct qstr fileName; + bool bAcquiredOSB = false; + ocfs_file_entry *fe = NULL; + ub8 tempSize = 0; + ub8 fileEntry = 0; + ub8 parent_off; + + LOG_ENTRY (); + + if (cdsl->name[0] == '\0') { + status = -EINVAL; + goto leave; + } + + ocfs_down_sem (&(osb->osb_res), true); + bAcquiredOSB = true; + + fileName.name = cdsl->name; + fileName.len = strlen(cdsl->name); + + fe = ocfs_allocate_file_entry (); + if (fe == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + memset (fe, 0, sizeof (ocfs_file_entry)); + if (!ocfs_linux_get_inode_offset (inode, &parent_off, NULL)) { + LOG_ERROR_STR ("Error getting parent offset for CDSL"); + status = -EFAIL; + goto leave; + } + + status = ocfs_find_files_on_disk (osb, parent_off, &fileName, fe, NULL); + + if (status >= 0) { + if (cdsl->operation & OCFS_CDSL_CREATE) { + /* Create a cdsl with a file/directory already present. */ + if ((cdsl->flags & OCFS_FLAG_CDSL_DIR) && + (!(fe->attribs & OCFS_ATTRIB_DIRECTORY))) { + status = -EEXIST; + goto leave; + } + + fe->attribs |= OCFS_ATTRIB_FILE_CDSL; + + /* Initialize the lock state */ + DISK_LOCK_SEQNUM (fe) = 0; + DISK_LOCK_CURRENT_MASTER (fe) = OCFS_INVALID_NODE_NUM; + DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK; + DISK_LOCK_READER_NODE (fe) = OCFS_INVALID_NODE_NUM; + DISK_LOCK_WRITER_NODE (fe) = OCFS_INVALID_NODE_NUM; + + OcfsQuerySystemTime (&fe->modify_time); + + fe->create_time = fe->modify_time; + + status = + ocfs_create_modify_file (osb, parent_off, NULL, NULL, + tempSize, &fileEntry, + FLAG_FILE_CHANGE_TO_CDSL, fe, + NULL); + goto leave; + } else if ((cdsl->operation & OCFS_CDSL_DELETE)) { + status = + ocfs_create_modify_file (osb, parent_off, NULL, NULL, + tempSize, &fileEntry, + FLAG_FILE_DELETE_CDSL, fe, NULL); + goto leave; + } else { + status = -EINVAL; + goto leave; + } + } + + if ((status == -ENOENT) && (cdsl->operation & OCFS_CDSL_CREATE)) { + memset (fe, 0, sizeof (ocfs_file_entry)); + memcpy (fe->filename, cdsl->name, strlen (cdsl->name)); + fe->filename_len = strlen (fe->filename); + + /* Set the flag to use the local extents */ + fe->local_ext = true; + fe->granularity = -1; + fe->next_free_ext = 0; + fe->last_ext_ptr = 0; + fe->attribs |= OCFS_ATTRIB_FILE_CDSL; + fe->uid = current->fsuid; + fe->gid = current->fsgid; + fe->prot_bits = 0755; //mode & 0007777; + + + if (cdsl->flags & OCFS_FLAG_CDSL_DIR) { + fe->attribs |= OCFS_ATTRIB_DIRECTORY; + } + + strcpy (fe->signature, OCFS_FILE_ENTRY_SIGNATURE); + + /* Set the valid bit here */ + + SET_VALID_BIT (fe->sync_flags); + fe->sync_flags &= ~(OCFS_SYNC_FLAG_CHANGE); + + /* Initialize the lock state */ + DISK_LOCK_SEQNUM (fe) = 0; + DISK_LOCK_CURRENT_MASTER (fe) = OCFS_INVALID_NODE_NUM; + DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK; + DISK_LOCK_READER_NODE (fe) = OCFS_INVALID_NODE_NUM; + DISK_LOCK_WRITER_NODE (fe) = OCFS_INVALID_NODE_NUM; + + OcfsQuerySystemTime (&fe->modify_time); + + fe->create_time = fe->modify_time; + + status = + ocfs_create_modify_file (osb, parent_off, NULL, NULL, + tempSize, &fileEntry, + FLAG_FILE_CREATE_CDSL, fe, NULL); + + goto leave; + } + + leave: + if (bAcquiredOSB) { + LOG_TRACE_STR ("RELEASE OSB LOCK"); + ocfs_up_sem (&(osb->osb_res)); + bAcquiredOSB = false; + } + + if (fe) { + ocfs_release_file_entry (fe); + } + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_create_delete_cdsl */ + + +/* + * ocfs_find_create_cdsl() + * + */ +int ocfs_find_create_cdsl (ocfs_super * osb, ocfs_file_entry * fe) +{ + int status = 0; + ub1 *buffer = NULL; + ub8 cdslOffset; + ub8 *cdslInfo; + ocfs_file_entry *pNewFileEntry = NULL; + ub4 length; + ocfs_dir_node *PDirNode, *PNewDirNode; + + LOG_ENTRY (); + + /* Read and see if we have a relevant entry for this node */ + length = (8 * OCFS_MAXIMUM_NODES); + length = OCFS_ALIGN (length, PAGE_SIZE); + + /* Initialize the table with 0 */ + buffer = ocfs_malloc (length); + if (buffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + pNewFileEntry = ocfs_allocate_file_entry (); + if (pNewFileEntry == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + memcpy (pNewFileEntry, fe, sizeof (ocfs_file_entry)); + + status = + ocfs_read_disk (osb, (sb1 *) buffer, length, fe->extents[0].disk_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + cdslInfo = (ub8 *) buffer; + + cdslOffset = *(cdslInfo + osb->node_num); + if (cdslOffset == 0) { + ub8 physicalOffset, fileOffset, numSectorsAlloc, bitmapOffset, + numClustersAlloc; + + /* create the entry if one doesn't exist and modify the cdsl data */ + LOG_TRACE_STR ("Calling ocfs_alloc_node_block from CDSL"); + + /* Allocate contiguous blocks on disk */ + status = + ocfs_alloc_node_block (osb, OCFS_SECTOR_SIZE, &physicalOffset, + &fileOffset, (ub8 *) & numSectorsAlloc, + osb->node_num, DISK_ALLOC_EXTENT_NODE); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + LOG_TRACE_STR ("ocfs_alloc_node_block returned from CDSL"); + + if (fileOffset == 0) { + LOG_ERROR_ARGS ("File offset was 0 for file %s\n", + fe->filename); + } + + pNewFileEntry->this_sector = physicalOffset; + + *(cdslInfo + osb->node_num) = cdslOffset = physicalOffset; + + if (pNewFileEntry->attribs & OCFS_ATTRIB_DIRECTORY) { + status = + ocfs_alloc_node_block (osb, osb->vol_layout.dir_node_size, + &bitmapOffset, &fileOffset, + &numClustersAlloc, osb->node_num, + DISK_ALLOC_DIR_NODE); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + /* update the total allocation size here */ + pNewFileEntry->alloc_size = + osb->vol_layout.dir_node_size; + pNewFileEntry->extents[0].disk_off = bitmapOffset; + pNewFileEntry->file_size = + osb->vol_layout.dir_node_size; + pNewFileEntry->next_del = INVALID_DIR_NODE_INDEX; + + PDirNode = ocfs_malloc (osb->vol_layout.dir_node_size); + if (PDirNode == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + PNewDirNode = PDirNode; + memset (PNewDirNode, 0, osb->vol_layout.dir_node_size); + + ocfs_initialize_dir_node (osb, PNewDirNode, bitmapOffset, + fileOffset, osb->node_num); + + DISK_LOCK_CURRENT_MASTER (PNewDirNode) = osb->node_num; + DISK_LOCK_FILE_LOCK (PNewDirNode) = + OCFS_DLM_ENABLE_CACHE_LOCK; + PNewDirNode->dir_node_flags |= DIR_NODE_FLAG_ROOT; + + status = ocfs_write_dir_node (osb, PNewDirNode, -1); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } else { + /* This is a file */ + pNewFileEntry->extents[0].disk_off = 0; + pNewFileEntry->alloc_size = 0; + pNewFileEntry->file_size = 0; + } + + status = + ocfs_write_file_entry (osb, pNewFileEntry, + pNewFileEntry->this_sector); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + status = + ocfs_write_disk (osb, (sb1 *) buffer, length, + fe->extents[0].disk_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + memcpy (fe, pNewFileEntry, OCFS_SECTOR_SIZE); + + } else { + status = + ocfs_read_disk (osb, (sb1 *) fe, OCFS_SECTOR_SIZE, + cdslOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + } + + leave: + if (buffer) { + ocfs_safefree (buffer); + } + + if (pNewFileEntry) { + ocfs_release_file_entry (pNewFileEntry); + } + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_find_create_cdsl */ + + +/* + * ocfs_update_file_entry_slot() + * + */ +int ocfs_update_file_entry_slot (ocfs_super * osb, ocfs_inode * oin, ocfs_rw_mode rw_mode) +{ + int status = 0; + ocfs_file_entry *fe = NULL; + + LOG_ENTRY (); + + fe = ocfs_allocate_file_entry (); + if (fe == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + status = ocfs_read_file_entry (osb, (void *) fe, oin->file_disk_off); + if (status < 0) { + goto leave; + } + + /* Update tick count if needed i.e., our slot time has expired */ + + if (rw_mode == OCFS_WRITE) { + OcfsQuerySystemTime (&DISK_LOCK_LAST_WRITE (fe)); + DISK_LOCK_WRITER_NODE (fe) = osb->node_num; + } else { + OcfsQuerySystemTime (&DISK_LOCK_LAST_READ (fe)); + DISK_LOCK_READER_NODE (fe) = osb->node_num; + } + + status = + ocfs_write_force_disk (osb, (void *) fe, osb->sect_size, + oin->file_disk_off); + if (status < 0) { + goto leave; + } + leave: + /* free up fileentry */ + if (fe) { + ocfs_release_file_entry (fe); + } + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_update_file_entry_slot */ + +/* + * ocfs_check_lock_state() + * + */ +void ocfs_check_lock_state (ocfs_super * osb, ocfs_inode * oin) +{ + int status = 0; + ocfs_lock_res *pLockRes; + ub8 readTimediff = 0; + ub8 writeTimediff = 0; + ub8 newTime = 0; + + LOG_ENTRY (); + + pLockRes = oin->lock_res; + OCFS_ASSERT (pLockRes); + + OcfsQuerySystemTime (&newTime); + + readTimediff = (ub8) (newTime - pLockRes->last_read_time); + writeTimediff = (ub8) (newTime - pLockRes->last_write_time); + + /* Check the lock Id for which we are doing a open if somebody owns */ + /* a cache on it ask for a flush. If there is no cache but a master */ + /* which has a timestamp which is still in the slot */ + /* ??? Do we want to do Update master on open in this case or just */ + /* revert to Write Thru. Read caching can be enabled if we have a */ + /* lot of readers but no writers. In this case when a writer comes */ + /* it will need to update all readers so that they update their cache. */ + /* Slot for reader, slot for writers can solve the issue. */ + + if (pLockRes->lock_type == OCFS_DLM_ENABLE_CACHE_LOCK) { + if ((pLockRes->master_node_num == osb->node_num) && + (writeTimediff > CACHE_LOCK_SLOT_TIME)) { + oin->cache_enabled = true; + ocfs_update_file_entry_slot (osb, oin, OCFS_WRITE); + goto leave; + } else { + LOG_TRACE_ARGS + ("Calling NodeNum (%u) to break File Cache\n", + pLockRes->master_node_num); + + status = ocfs_break_cache_lock (osb, pLockRes); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + oin->cache_enabled = false; + pLockRes->lock_type = OCFS_DLM_NO_LOCK; + status = 0; + goto leave; + } + } + + if ((pLockRes->lock_type <= OCFS_DLM_SHARED_LOCK) && + (readTimediff > CACHE_LOCK_SLOT_TIME)) { + if (writeTimediff > CACHE_LOCK_SLOT_TIME) + oin->cache_enabled = true; + + ocfs_update_file_entry_slot (osb, oin, OCFS_READ); + status = 0; + goto leave; + } + leave: + + LOG_EXIT (); + return; +} /* ocfs_check_lock_state */ + + +/* + * ocfs_delete_cdsl() + * + */ +int ocfs_delete_cdsl (ocfs_super * osb, ub8 parent_off, ocfs_file_entry * fe) +{ + int status = 0; + ocfs_file_entry *newfe = NULL; + ocfs_dir_node *pLockNode = NULL; + + ub4 lockFlags = 0; + bool bAcquiredLock = false; + ocfs_lock_res *pLockResource; + ub8 lockId = 0; + + bool bParentLockAcquired = false; + ub4 parentLockFlags = 0; + ocfs_lock_res *pParentLockResource; + ub8 parentLockId = 0; + + LOG_ENTRY (); + + newfe = fe; + if (newfe == NULL) { + status = -EINVAL; + goto leave; + } + + if (newfe->link_cnt != 0) { + status = -ENOTEMPTY; + goto leave; + } + + pLockNode = (ocfs_dir_node *) ocfs_allocate_file_entry (); + if (pLockNode == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + parentLockId = parent_off; + parentLockFlags = (FLAG_FILE_CREATE | FLAG_DIR); + status = + ocfs_acquire_lock (osb, parentLockId, OCFS_DLM_EXCLUSIVE_LOCK, + parentLockFlags, &pParentLockResource, + (ocfs_file_entry *) pLockNode); + if (status < 0) { + LOG_ERROR_ARGS + ("ocfs_acquire_lock() failed with status(0x%08x) for " + "LockId (%u.%u)\n", status, HI (parentLockId), + LO (parentLockId)); + goto leave; + } + + bParentLockAcquired = true; + + lockId = newfe->this_sector; + lockFlags = (FLAG_FILE_DELETE); + + status = + ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, lockFlags, + &pLockResource, newfe); + if (status < 0) { + LOG_ERROR_ARGS + ("ocfs_acquire_lock() failed with status(0x%08x) for " + "LockId (%u.%u)\n", status, HI (lockId), LO (lockId)); + goto leave; + } + + bAcquiredLock = true; + + /* Check the file Entry and call delete if link count == 0 */ + if (newfe->link_cnt == 0) { + /* Mark the file as being deleted */ + OCFS_SET_FLAG (fe->sync_flags, OCFS_SYNC_FLAG_MARK_FOR_DELETION); + fe->sync_flags &= (~OCFS_SYNC_FLAG_VALID); + + status = ocfs_write_file_entry (osb, fe, fe->this_sector); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + /* Lock on directory shd be held by the node which either */ + /* died or this node... */ + + status = ocfs_del_file_entry (osb, newfe, pLockNode); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } else { + status = -ENOTEMPTY; + goto leave; + } + + leave: + /* Release the file lock if we acquired it */ + if (bAcquiredLock) { + status = + ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + lockFlags, pLockResource); + if (status < 0) { + LOG_ERROR_ARGS + ("ocfs_release_lock() failed with status(0x%08x) " + "for LockId (%u.%u)\n", status, HI (lockId), + LO (lockId)); + } + } + + /* Release the file lock if we acquired it */ + if (bParentLockAcquired) { + status = + ocfs_release_lock (osb, parentLockId, OCFS_DLM_EXCLUSIVE_LOCK, + parentLockFlags, pParentLockResource); + if (status < 0) { + LOG_ERROR_ARGS + ("ocfs_release_lock() failed with status(0x%08x) for LockId (%u.%u)\n", + status, HI (parentLockId), LO (parentLockId)); + } + } + + if (pLockNode) { + ocfs_release_file_entry ((ocfs_file_entry *) pLockNode); + } + + LOG_EXIT_STATUS (status); + return status; +} + + +/* + * ocfs_change_to_cdsl() + * + */ +int ocfs_change_to_cdsl (ocfs_super * osb, ub8 parent_off, ocfs_file_entry * fe) +{ + int status = 0, tmpstat = 0; + ocfs_file_entry *pNewFileEntry = NULL; + ocfs_dir_node *pLockNode = NULL; + ub4 length; + + ub8 lockId = 0, bitmapOffset, numClustersAlloc; + ocfs_lock_res *pLockResource; + ub4 lockFlags = 0; + bool bAcquiredLock = false; + ub1 *buffer = NULL; + ub8 *cdslInfo; + + bool bCacheLock = false; + + /* Zero out the entry for the file and rewrite it back to the disk */ + /* Also, the other nodes should update their cache bitmap for file */ + /* ent to mark this one as free now. */ + + LOG_ENTRY(); + + + if (fe == NULL) { + LOG_ERROR_STR("fe is NULL"); + status = -EINVAL; + goto leave; + } + + pNewFileEntry = ocfs_allocate_file_entry (); + if (pNewFileEntry == NULL) { + LOG_ERROR_STATUS(status = -ENOMEM); + goto leave; + } + + memcpy (pNewFileEntry, fe, sizeof (ocfs_file_entry)); + + if ((DISK_LOCK_FILE_LOCK (pNewFileEntry) == OCFS_DLM_ENABLE_CACHE_LOCK) + && (DISK_LOCK_CURRENT_MASTER (pNewFileEntry) == osb->node_num)) { + bCacheLock = true; + } + + lockId = pNewFileEntry->this_sector; + lockFlags = FLAG_FILE_CHANGE_TO_CDSL; + pLockNode = (ocfs_dir_node *) pNewFileEntry; + + if (bCacheLock) { + status = + ocfs_acquire_lock (osb, lockId, OCFS_DLM_ENABLE_CACHE_LOCK, + lockFlags, &pLockResource, + (ocfs_file_entry *) pLockNode); + } else { + status = + ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + lockFlags, &pLockResource, + (ocfs_file_entry *) pLockNode); + } + + if (status < 0) { + LOG_ERROR_ARGS("ocfs_acquire_lock failed with status(0x%08x) " \ + "for LockId (%u.%u)\n", status, + HI (pNewFileEntry->this_sector), + LO (pNewFileEntry->this_sector)); + goto leave; + } + + bAcquiredLock = true; + + if (bCacheLock) { + DISK_LOCK_FILE_LOCK (pNewFileEntry) = + OCFS_DLM_ENABLE_CACHE_LOCK; + DISK_LOCK_CURRENT_MASTER (pNewFileEntry) = osb->node_num; + } + + length = (8 * OCFS_MAXIMUM_NODES); + length = OCFS_ALIGN (length, PAGE_SIZE); + + status = + ocfs_find_contiguous_space_from_bitmap (osb, length, &bitmapOffset, + &numClustersAlloc, false); + if (status < 0) { + LOG_ERROR_STATUS(status); + goto leave; + + } + + pNewFileEntry->extents[0].disk_off = + (bitmapOffset * osb->vol_layout.cluster_size) + + osb->vol_layout.data_start_off; + pNewFileEntry->extents[0].num_clusters = + numClustersAlloc * osb->vol_layout.cluster_size; + pNewFileEntry->extents[0].file_off = 0; + + pNewFileEntry->alloc_size = pNewFileEntry->file_size = + pNewFileEntry->extents[0].num_clusters; + + pNewFileEntry->attribs |= OCFS_ATTRIB_FILE_CDSL; + + /* Initialize the table with 0 */ + buffer = ocfs_malloc (length); + if (buffer == NULL) { + LOG_ERROR_STATUS(status = -ENOMEM); + goto leave; + } + + memset (buffer, 0, length); + cdslInfo = (ub8 *) buffer; + + /* Point entry for this node to the file entry we have */ + + { + ub8 physicalOffset, fileOffset, numSectorsAlloc; + + /* create the entry if one doesn't exist and modify the cdsl data */ + + /* Allocate contiguous blocks on disk */ + status = + ocfs_alloc_node_block (osb, OCFS_SECTOR_SIZE, &physicalOffset, + &fileOffset, (ub8 *) & numSectorsAlloc, + osb->node_num, DISK_ALLOC_EXTENT_NODE); + if (status < 0) { + LOG_ERROR_STATUS(status); + goto leave; + } + + fe->this_sector = physicalOffset; + + *(cdslInfo + osb->node_num) = physicalOffset; + + /* Write the new file entry to the disk */ + status = ocfs_write_file_entry (osb, fe, physicalOffset); + if (status < 0) { + LOG_ERROR_STATUS(status); + goto leave; + } + } + + status = + ocfs_write_disk (osb, (sb1 *) buffer, length, + pNewFileEntry->extents[0].disk_off); + if (status < 0) { + LOG_ERROR_STATUS(status); + goto leave; + } + + OcfsQuerySystemTime (&DISK_LOCK_LAST_WRITE (pNewFileEntry)); + OcfsQuerySystemTime (&DISK_LOCK_LAST_WRITE (pNewFileEntry)); + DISK_LOCK_WRITER_NODE (pNewFileEntry) = osb->node_num; + DISK_LOCK_READER_NODE (pNewFileEntry) = osb->node_num; + + /* Write the file entry with the cdsl back */ + status = + ocfs_write_file_entry (osb, pNewFileEntry, pNewFileEntry->this_sector); + if (status < 0) { + LOG_ERROR_STATUS(status); + goto leave; + } + + leave: + if (bAcquiredLock) { + tmpstat = + ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + lockFlags, pLockResource); + if (tmpstat < 0) { + status = tmpstat; + LOG_ERROR_ARGS("ocfs_release_lock failed with " \ + "status(0x%08x) for LockId (%u.%u)\n", + tmpstat, HI (pNewFileEntry->this_sector), + LO (pNewFileEntry->this_sector)); + } + } + + if (buffer) { + ocfs_safefree (buffer); + } + + LOG_EXIT_STATUS(status); + return status; +} /* ocfs_change_to_cdsl */ + + +/* + * ocfs_create_cdsl() + * + */ +int ocfs_create_cdsl (ocfs_super * osb, ub8 parent_off, ocfs_file_entry * fe) +{ + int status = 0, tmpstat = 0; + ocfs_file_entry *fileEntry = NULL; + ocfs_dir_node *PDirNode = NULL, *pLockNode = NULL; + ub4 size, length; + ub8 lockId = 0, bitmapOffset, numClustersAlloc; + ocfs_lock_res *pLockResource; + ub4 lockFlags = 0; + bool bAcquiredLock = false; + ub1 *buffer = NULL; + + LOG_ENTRY (); + + /* Zero out the entry for the file and rewrite it back to the disk */ + /* Also, the other nodes should update their cache bitmap for file */ + /* ent to mark this one as free now. */ + + pLockNode = (ocfs_dir_node *) ocfs_allocate_file_entry (); + if (pLockNode == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + lockId = parent_off; + lockFlags = FLAG_FILE_CREATE | FLAG_DIR; + + /* acquire the lock */ + status = + ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, lockFlags, + &pLockResource, (ocfs_file_entry *) pLockNode); + if (status < 0) { + LOG_ERROR_ARGS ("ocfs_acquire_lock() failed with status(0x%08x) " + "for LockId (%u.%u)\n", status, HI (lockId), + LO (lockId)); + goto leave; + } + bAcquiredLock = true; + + /* Change the name and write it back... */ + if (fe == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + fileEntry = fe; + length = (8 * OCFS_MAXIMUM_NODES); + length = OCFS_ALIGN (length, PAGE_SIZE); + + status = + ocfs_find_contiguous_space_from_bitmap (osb, length, &bitmapOffset, + &numClustersAlloc, false); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + fileEntry->extents[0].disk_off = + (bitmapOffset * osb->vol_layout.cluster_size) + + osb->vol_layout.data_start_off; + fileEntry->extents[0].num_clusters = + numClustersAlloc * osb->vol_layout.cluster_size; + fileEntry->extents[0].file_off = 0; + + fileEntry->alloc_size = fileEntry->file_size = + fileEntry->extents[0].num_clusters; + + /* Initialize the table with 0 */ + buffer = ocfs_malloc (length); + if (buffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + memset (buffer, 0, length); + + status = + ocfs_write_disk (osb, (sb1 *) buffer, length, + fileEntry->extents[0].disk_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + size = (ub4) (osb->vol_layout.dir_node_size); + PDirNode = ocfs_malloc (size); + if (PDirNode == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + if (pLockNode->free_node_ptr == -1) { + status = ocfs_read_dir_node (osb, PDirNode, parent_off); + if (status < 0) { + goto leave; + } + + } else { + /* Goto the deleted tail or the free node pointer */ + if (pLockNode->next_del_ent_node == -1) { + LOG_TRACE_STR ("Going to free_node_ptr"); + status = + ocfs_read_dir_node (osb, PDirNode, + pLockNode->free_node_ptr); + if (status < 0) { + goto leave; + } + + } else { + LOG_TRACE_STR ("Going to pLockNode->next_del_ent_node"); + status = + ocfs_read_dir_node (osb, PDirNode, + pLockNode->next_del_ent_node); + if (status < 0) { + goto leave; + } + + } + + } + + OcfsQuerySystemTime (&DISK_LOCK_LAST_WRITE (fileEntry)); + OcfsQuerySystemTime (&DISK_LOCK_LAST_WRITE (fileEntry)); + DISK_LOCK_WRITER_NODE (fileEntry) = osb->node_num; + DISK_LOCK_READER_NODE (fileEntry) = osb->node_num; + + LOG_TRACE_ARGS ("PDirNode has a Lock = %d\n", + DISK_LOCK_FILE_LOCK (PDirNode)); + + status = + ocfs_insert_file (osb, PDirNode, fileEntry, pLockNode, pLockResource); + if (status < 0) { + LOG_ERROR_STATUS(status); + goto leave; + } + + bAcquiredLock = false; + + leave: + if (bAcquiredLock) { + tmpstat = + ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + lockFlags, pLockResource); + if (tmpstat < 0) { + LOG_ERROR_ARGS + ("ocfs_release_lock() failed with status(0x%08x) " + "for LockId (%u.%u)", tmpstat, HI (lockId), + LO (lockId)); + status = tmpstat; + } + } + + if (PDirNode) { + ocfs_safefree (PDirNode); + } + + if (pLockNode) { + ocfs_safefree (pLockNode); + } + + if (buffer) { + ocfs_safefree (buffer); + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_create_cdsl */ + + +/* + * ocfs_truncate_file() + * + */ +int ocfs_truncate_file (ocfs_super * osb, ocfs_inode * oin, ub8 file_size) +{ + int status = 0, tmpstat; + ocfs_file_entry *fe = NULL; + ub8 lockId = 0; + ub4 lockFlags = 0; + bool bFileLockAcquired = false; + bool bAcquiredLock = false; + ocfs_lock_res *pLockResource; + ub8 changeSeqNum = 0; + bool bCacheLock = false; + ocfs_dir_node *pLockNode = NULL; + struct inode *inode = NULL; + + LOG_ENTRY (); + + OCFS_ASSERT (oin); + inode = oin->inode; + OCFS_ASSERT (inode); + + /* Allocate memory for fileentry */ + fe = ocfs_allocate_file_entry (); + if (fe == NULL) { + LOG_ERROR_STR ("ocfs_allocate_file_entry failed"); + status = -ENOMEM; + goto leave; + } + + status = ocfs_read_file_entry (osb, fe, oin->file_disk_off); + if (status < 0) { + LOG_ERROR_ARGS ("ocfs_read_file_entry failed with status(%x)\n", + status); + goto leave; + } + + /* Grab a lock on the entry found if we have more than 1 extents and */ + /* also make this node the master */ + lockId = fe->this_sector; + lockFlags = FLAG_FILE_TRUNCATE; + bFileLockAcquired = true; + pLockNode = (ocfs_dir_node *) fe; + + if ((DISK_LOCK_FILE_LOCK (fe) == OCFS_DLM_ENABLE_CACHE_LOCK) + && (DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num)) { + bCacheLock = true; + } + + status = + ocfs_acquire_lock (osb, lockId, + bCacheLock ? OCFS_DLM_ENABLE_CACHE_LOCK : + OCFS_DLM_EXCLUSIVE_LOCK, lockFlags, &pLockResource, + (ocfs_file_entry *) pLockNode); + + if (status < 0) { + LOG_ERROR_ARGS + ("ocfs_acquire_lock failed with status(%x) for %u.%u\n", + status, lockId); + goto leave; + } + + bAcquiredLock = true; + + status = + ocfs_free_extents_for_truncate (osb, fe, oin, osb->node_num, + oin->alloc_size); + if (status < 0) { + LOG_ERROR_ARGS + ("ocfs_free_extents_for_truncate failed with status (%x)\n", + status); + goto leave; + } + + if (bCacheLock) { + DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_ENABLE_CACHE_LOCK; + DISK_LOCK_CURRENT_MASTER (fe) = osb->node_num; + } + + /* no need to do OCFS_SECTOR_ALIGN once the allocation size is correct. */ + DISK_LOCK_SEQNUM (fe) = changeSeqNum; + + /* Set the valid bit and reset the change bit here... TODO */ + SET_VALID_BIT (fe->sync_flags); + fe->sync_flags &= ~(OCFS_SYNC_FLAG_CHANGE); + + OcfsQuerySystemTime (&fe->modify_time); + + fe->file_size = inode->i_size; + fe->alloc_size = oin->alloc_size; + + status = ocfs_write_file_entry (osb, fe, fe->this_sector); + if (status < 0) { + LOG_ERROR_ARGS ("ocfs_write_file_entry failed with status(%x)\n", + status); + } + + /* Update all open oins */ + /* Our local update is done, if somebody had asked for a bdcast lock */ + /* He shd set the state */ + + leave: + /* Release the file lock if we acquired it */ + if (bAcquiredLock) { + if (bFileLockAcquired) { + lockFlags |= FLAG_FILE_UPDATE_OIN; + } + + tmpstat = + ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + lockFlags, pLockResource); + if (tmpstat < 0) { + // status = tmpstat ? + LOG_ERROR_ARGS + ("ocfs_release_lock failed with status(%x) for %u.%u\n", + tmpstat, fe->this_sector); + } + } + + /* free up fileEntry */ + if (fe) { + ocfs_release_file_entry (fe); + fe = NULL; + } + + LOG_EXIT_STATUS(status); + return status; +} /* ocfs_truncate_file */ diff -urNp ocfs/fs/ocfs/Common/ocfsgendirnode.c 2.4.20pre5aa2/fs/ocfs/Common/ocfsgendirnode.c --- ocfs/fs/ocfs/Common/ocfsgendirnode.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/ocfsgendirnode.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,1601 @@ +/* + * ocfsgendirnode.c + * + * Allocate, free, read, write, find, etc. dirnodes. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_DIRINFO + +#define IS_VALID_DIR_NODE(PDirNode) \ + (!strncmp((PDirNode)->signature, OCFS_DIR_NODE_SIGNATURE, \ + strlen(OCFS_DIR_NODE_SIGNATURE))) + +/* + * ocfs_print_file_entry() + * + */ +void ocfs_print_file_entry (ocfs_file_entry * fe) +{ + LOG_ERROR_ARGS ("This fe has name %s\n", fe->filename); + + return; +} /* ocfs_print_file_entry */ + +/* + * ocfs_print_dir_node() + * + */ +void ocfs_print_dir_node (ocfs_super * osb, ocfs_dir_node * DirNode) +{ + int i; + ocfs_file_entry *pOrigFileEntry; + + if (DirNode->dir_node_flags & DIR_NODE_FLAG_ROOT) { + LOG_ERROR_STR ("This is the Root of the BTree.\n"); + } + + LOG_ERROR_ARGS ("signature: %s\n", DirNode->signature); + + LOG_ERROR_ARGS ("node_disk_off: %u.%u\n", HI (DirNode->node_disk_off), + LO (DirNode->node_disk_off)); + + LOG_ERROR_ARGS ("num_ents: %u, num_ent_used: %u\n", DirNode->num_ents, + DirNode->num_ent_used); + + for (i = 0; i < DirNode->num_ent_used; i++) { + pOrigFileEntry = FILEENT (DirNode, i); + ocfs_print_file_entry (pOrigFileEntry); + } + + return; +} /* ocfs_print_dir_node */ + +/* + * ocfs_alloc_node_block() + * + */ +int ocfs_alloc_node_block (ocfs_super * osb, + ub8 FileSize, + ub8 * DiskOffset, + ub8 * file_off, ub8 * NumClusterAlloc, ub4 NodeNum, ub4 Type) +{ + int status = 0; + int tmpstat; + ub8 fileSize = 0; + ub8 offset = 0; + ub8 length = 0; + ub8 lockId = 0; + ub8 numBytes = 0; + ub8 allocSize = 0; + ub8 prevFileSize = 0; + ub8 extent; + ub8 newFileSize; + ub8 bitMapSize; + ub1 *buffer = NULL; + ocfs_alloc_bm DirAllocBitMap; + ub4 numBits = 0; + ub4 foundBit = -1; + ub4 blockSize = 0; + bool bLockAcquired = false; + ocfs_lock_res *pLockResource; + ub4 fileId = 0; + ub4 extendFileId = 0; + ocfs_log_record *pOcfsLogRec = NULL; + + LOG_ENTRY (); + +#ifdef PARANOID_LOCKS + ocfs_down_sem (&(osb->dir_alloc_lock), true); + ocfs_down_sem (&(osb->file_alloc_lock), true); +#endif + ocfs_down_sem (&(osb->vol_alloc_lock), true); + + if (Type == DISK_ALLOC_DIR_NODE) { + fileId = OCFS_FILE_DIR_ALLOC_BITMAP + NodeNum; + blockSize = (ub4) osb->vol_layout.dir_node_size; + extendFileId = OCFS_FILE_DIR_ALLOC + NodeNum; + } else if (Type == DISK_ALLOC_EXTENT_NODE) { + fileId = OCFS_FILE_FILE_ALLOC_BITMAP + NodeNum; + extendFileId = OCFS_FILE_FILE_ALLOC + NodeNum; + blockSize = (ub4) osb->vol_layout.file_node_size; + } + + /* Allocate a block of size blocksize from the relevant file/bitmap */ + + OCFS_ASSERT (blockSize); + + lockId = (fileId * OCFS_SECTOR_SIZE) + osb->vol_layout.root_int_off; + + /* Get a lock on the file */ + status = + ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + FLAG_FILE_CREATE, &pLockResource, NULL); + if (status < 0) { + goto leave; + } + + bLockAcquired = true; + + numBytes = OCFS_ALIGN ((FileSize), blockSize); + numBits = (ub4) (numBytes / blockSize); + +// while(1) + { + /* Read in the bitmap file for the dir alloc and look for the required */ + /* space, if found */ + + status = ocfs_get_system_file_size (osb, fileId, &fileSize, &allocSize); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + prevFileSize = fileSize; + + if ((fileSize != 0) && (allocSize != 0)) { + /* Round this off to dirnodesize */ + length = OCFS_ALIGN (allocSize, OCFS_SECTOR_SIZE); + length = OCFS_ALIGN (allocSize, PAGE_SIZE); + + buffer = vmalloc (length); + if (buffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + status = + ocfs_read_system_file (osb, fileId, buffer, allocSize, + offset); + if (status < 0) { + goto leave; + } + + ocfs_initialize_bitmap (&DirAllocBitMap, (ub4 *) buffer, + (ub4) (fileSize * 8)); + + /* Find the requisite number of bits... */ + + /* This function will check for clear bits in the Bitmap for */ + /* consective clear bits equal to ClusterCount */ + foundBit = + ocfs_find_clear_bits (&DirAllocBitMap, (ub4) numBits, + 0, 0); + } + + /* It returns -1 on failure , otherwise ByteOffset points at the */ + /* location in bitmap from where there are ClusterCount no of bits */ + /* are free. */ + + if (foundBit == -1) { + /* if not found add more allocation to the file and try again. */ + + /* Lets get a 1MB chunks every time or clustersize which ever */ + /* is greater or the number of bit asked */ + extent = + ((ONE_MEGA_BYTE) > + osb->vol_layout. + cluster_size) ? (ONE_MEGA_BYTE) : osb->vol_layout. + cluster_size; + + extent = (extent > (numBits * blockSize)) ? extent : + (numBits * blockSize); + + extent = OCFS_ALIGN (extent, ONE_MEGA_BYTE); + + ocfs_get_system_file_size (osb, (extendFileId), &newFileSize, + &allocSize); + + /* This is for OUI optimzation to allocate more disk space for */ + /* directory allocations */ + + if (allocSize > 0) + extent *= 2; + + status = + ocfs_extend_system_file (osb, (extendFileId), + newFileSize + extent); + if (status < 0) { + goto leave; + } + + newFileSize += extent; + bitMapSize = newFileSize / (blockSize * 8); + + /* Calculate the new bitmap size */ + status = ocfs_extend_system_file (osb, fileId, bitMapSize); + if (status < 0) { + goto leave; + } + + /* ?? Free the buffer here */ + vfree (buffer); + buffer = NULL; + + status = + ocfs_get_system_file_size (osb, fileId, &fileSize, + &allocSize); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + length = OCFS_ALIGN (allocSize, osb->sect_size); + length = OCFS_ALIGN (allocSize, PAGE_SIZE); + + buffer = vmalloc (length); + if (buffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + status = + ocfs_read_system_file (osb, fileId, buffer, allocSize, + offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + ocfs_initialize_bitmap (&DirAllocBitMap, (ub4 *) buffer, + (ub4) (fileSize * 8)); + + foundBit = prevFileSize * 8; +// continue; + + } +/* else + { + break; + } +*/ + } + + LOG_TRACE_ARGS ("The byte offset is (%d)\n", foundBit); + + ocfs_set_bits (&DirAllocBitMap, (ub4) foundBit, (ub4) numBits); + + /* Log the change under current transid, */ + { + ub4 size; + + size = sizeof (ocfs_log_record); + size = (ub4) OCFS_ALIGN (size, PAGE_SIZE); + + if ((pOcfsLogRec = ocfs_malloc (size)) == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + pOcfsLogRec->log_id = osb->curr_trans_id; + pOcfsLogRec->log_type = LOG_TYPE_DISK_ALLOC; + + pOcfsLogRec->rec.alloc.length = numBits; + pOcfsLogRec->rec.alloc.file_off = (foundBit * blockSize); + pOcfsLogRec->rec.alloc.type = Type; + pOcfsLogRec->rec.alloc.node_num = NodeNum; + + /* Log the original dirnode sector and the new cluster where the */ + /* info is stored */ + status = ocfs_write_log (osb, pOcfsLogRec, LOG_RECOVER); + if (status < 0) { + goto leave; + } + } + + /* Write the bitmap file back */ + status = ocfs_write_system_file (osb, fileId, buffer, allocSize, offset); + if (status < 0) { + LOG_ERROR_STATUS(status); + goto leave; + } + + *DiskOffset = ocfs_file_to_disk_off (osb, (extendFileId), + (foundBit * blockSize)); + if (*DiskOffset == 0) { + LOG_ERROR_STATUS(status = -EFAIL); + goto leave; + } + + *file_off = (ub8) ((ub8) foundBit * (ub8) blockSize); + if (*file_off == 0) { + LOG_ERROR_ARGS + ("File offset was %u.%u for type (%x) blocksize=%u foundbit=%u\n", + *file_off, Type, blockSize, foundBit); + } + + leave: + + ocfs_up_sem (&(osb->vol_alloc_lock)); +#ifdef PARANOID_LOCKS + ocfs_up_sem (&(osb->file_alloc_lock)); + ocfs_up_sem (&(osb->dir_alloc_lock)); +#endif + if (bLockAcquired) { + tmpstat = + ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + FLAG_FILE_CREATE, pLockResource); + if (tmpstat < 0) + status = tmpstat; + + bLockAcquired = false; + } + + if (buffer) { + vfree (buffer); + buffer = NULL; + } + + ocfs_safefree (pOcfsLogRec); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_alloc_node_block */ + +/* + * ocfs_free_vol_block() + * + */ +int ocfs_free_vol_block (ocfs_super * osb, ocfs_free_log * FreeLog, ub4 NodeNum, ub4 Type) +{ + int status = 0; + ub8 fileSize = 0; + ub8 offset = 0; + ub8 length = 0; + ub8 allocSize = 0; + ub4 foundBit = -1; //SM... What??? + ub4 blockSize = 0; + ub4 fileId = 0; + ub4 extendFileId = 0; + ub1 *buffer = NULL; + ocfs_alloc_bm AllocBitMap; + ocfs_alloc_bm *pTempBitMap; + ub4 i; + ub4 size; + + LOG_ENTRY (); + +#ifdef PARANOID_LOCKS + ocfs_down_sem (&(osb->dir_alloc_lock), true); + ocfs_down_sem (&(osb->file_alloc_lock), true); +#endif + ocfs_down_sem (&(osb->vol_alloc_lock), true); + + switch (Type) { + case DISK_ALLOC_DIR_NODE: + fileId = OCFS_FILE_DIR_ALLOC_BITMAP + NodeNum; + blockSize = (ub4) osb->vol_layout.dir_node_size; + extendFileId = OCFS_FILE_DIR_ALLOC + NodeNum; + + if (!IS_VALID_NODE_NUM (NodeNum)) { + LOG_ERROR_STATUS(status = -EINVAL); + goto leave; + } + break; + + case DISK_ALLOC_EXTENT_NODE: + fileId = OCFS_FILE_FILE_ALLOC_BITMAP + NodeNum; + extendFileId = OCFS_FILE_FILE_ALLOC + NodeNum; + blockSize = (ub4) osb->vol_layout.file_node_size; + + if (!IS_VALID_NODE_NUM (NodeNum)) { + LOG_ERROR_STATUS(status = -EINVAL); + goto leave; + } + break; + + case DISK_ALLOC_VOLUME: + break; + + default: + goto leave; + } + + if (Type == DISK_ALLOC_VOLUME) { + size = (ub4) OCFS_SECTOR_ALIGN ((osb->cluster_bitmap.size) / 8); + status = ocfs_read_metadata (osb, osb->cluster_bitmap.buf, size, + osb->vol_layout.bitmap_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + pTempBitMap = &osb->cluster_bitmap; + } else { + /* Read in the bitmap file for the dir alloc and look for the */ + /* required space, if found */ + + status = ocfs_get_system_file_size (osb, fileId, &fileSize, &allocSize); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + /* Round this off to dirnodesize */ + length = OCFS_ALIGN (allocSize, OCFS_SECTOR_SIZE); + length = OCFS_ALIGN (allocSize, PAGE_SIZE); + + if ((buffer = ocfs_malloc (length)) == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + status = + ocfs_read_system_file (osb, fileId, buffer, allocSize, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + ocfs_initialize_bitmap (&AllocBitMap, (ub4 *) buffer, + (ub4) (fileSize * 8)); + pTempBitMap = &AllocBitMap; + } + + for (i = 0; i < FreeLog->num_free_upds; i++) { + if (FreeLog->free_bitmap[i].file_off == 0) { + LOG_ERROR_ARGS + ("File offset was 0 for type (%x) blk %d\n", Type, + blockSize); + } + + if (Type == DISK_ALLOC_VOLUME) + foundBit = (ub4) FreeLog->free_bitmap[i].file_off; + else + foundBit = + (ub4) (FreeLog->free_bitmap[i].file_off / + blockSize); + + ocfs_clear_bits (pTempBitMap, (ub4) foundBit, + (ub4) FreeLog->free_bitmap[i].length); + } + + /* Write a cleanup log here */ + + if (Type == DISK_ALLOC_VOLUME) { + size = (ub4) OCFS_SECTOR_ALIGN ((osb->cluster_bitmap.size) / 8); + + /* I have absolutely no idea why this is done twice! */ + status = ocfs_write_disk (osb, osb->cluster_bitmap.buf, + size, osb->vol_layout.bitmap_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + status = ocfs_write_metadata (osb, osb->cluster_bitmap.buf, + size, osb->vol_layout.bitmap_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } else { + status = + ocfs_write_system_file (osb, fileId, buffer, allocSize, offset); + if (status < 0) { + goto leave; + } + } + leave: + ocfs_up_sem (&(osb->vol_alloc_lock)); +#ifdef PARANOID_LOCKS + ocfs_up_sem (&(osb->file_alloc_lock)); + ocfs_up_sem (&(osb->dir_alloc_lock)); +#endif + ocfs_safefree (buffer); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_free_vol_block */ + +/* + * ocfs_free_node_block() + * + */ +int ocfs_free_node_block (ocfs_super * osb, ub8 file_off, ub8 Length, ub4 NodeNum, + ub4 Type) +{ + int status = 0; + ub8 fileSize = 0; + ub8 offset = 0; + ub8 length = 0; + ub8 lockId = 0; + ub8 allocSize = 0; + ub1 *buffer = NULL; + ocfs_alloc_bm DirAllocBitMap; + ub4 foundBit = -1; + ub4 blockSize = 0; + bool bLockAcquired = false; + ocfs_lock_res *pLockResource; + ub4 fileId = 0; + ub4 extendFileId = 0; + + LOG_ENTRY (); + + if (Type == DISK_ALLOC_DIR_NODE) { + fileId = OCFS_FILE_DIR_ALLOC_BITMAP + NodeNum; + blockSize = (ub4) osb->vol_layout.dir_node_size; + extendFileId = OCFS_FILE_DIR_ALLOC + NodeNum; + } else if (Type == DISK_ALLOC_EXTENT_NODE) { + fileId = OCFS_FILE_FILE_ALLOC_BITMAP + NodeNum; + extendFileId = OCFS_FILE_FILE_ALLOC + NodeNum; + blockSize = (ub4) osb->vol_layout.file_node_size; + } + + /* Allocate a block of size blocksize from the relevant file/bitmap */ + + lockId = (fileId * OCFS_SECTOR_SIZE) + osb->vol_layout.root_int_off; + + /* Get a lock on the file */ + status = + ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + FLAG_FILE_CREATE, &pLockResource, NULL); + if (status < 0) { + goto leave; + } + + bLockAcquired = true; + + /* Read in the bitmap file for the dir alloc and look for the required */ + /* space, if found */ + status = ocfs_get_system_file_size (osb, fileId, &fileSize, &allocSize); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + /* Round this off to dirnodesize */ + length = OCFS_ALIGN (allocSize, OCFS_SECTOR_SIZE); + length = OCFS_ALIGN (allocSize, PAGE_SIZE); + + if ((buffer = ocfs_malloc (length)) == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + status = ocfs_read_system_file (osb, fileId, buffer, allocSize, offset); + if (status < 0) { + goto leave; + } + + ocfs_initialize_bitmap (&DirAllocBitMap, (ub4 *) buffer, + (ub4) (fileSize * 8)); + + foundBit = (ub4) (file_off / blockSize); + ocfs_clear_bits (&DirAllocBitMap, (ub4) foundBit, (ub4) Length); + + status = ocfs_write_system_file (osb, fileId, buffer, allocSize, offset); + if (status < 0) { + goto leave; + } + leave: + if (bLockAcquired) { + status = + ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + FLAG_FILE_CREATE, pLockResource); + if (status < 0) { + /* log failure */ + } + bLockAcquired = false; + } + ocfs_safefree (buffer); + + LOG_EXIT_STATUS (0); + return 0; +} /* ocfs_free_node_block */ + +/* + * ocfs_free_directory_block() + * + */ +int ocfs_free_directory_block (ocfs_super * osb, ocfs_file_entry * fe, sb4 LogNodeNum) +{ + int status = 0; + ocfs_dir_node *PDirNode = NULL; + ub4 size; + ub4 numUpdt; + ub8 currentDirNode; + ocfs_cleanup_record *pCleanupLogRec = NULL; + + LOG_ENTRY (); + + size = sizeof (ocfs_cleanup_record); + size = (ub4) OCFS_ALIGN (size, PAGE_SIZE); + + if ((pCleanupLogRec = ocfs_malloc (size)) == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + pCleanupLogRec->rec.free.num_free_upds = 0; + + currentDirNode = fe->extents[0].disk_off; + + size = OCFS_SECTOR_SIZE; + + status = + ocfs_get_file_entry (osb, (ocfs_file_entry **) (&PDirNode), + currentDirNode); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + pCleanupLogRec->log_id = osb->curr_trans_id; + pCleanupLogRec->log_type = LOG_FREE_BITMAP; + + while ((PDirNode->node_disk_off != INVALID_NODE_POINTER) && + (IS_VALID_DIR_NODE (PDirNode))) { + /* Add to the cleanup log */ + numUpdt = pCleanupLogRec->rec.free.num_free_upds; + if (numUpdt >= FREE_LOG_SIZE) { + status = + ocfs_write_node_log (osb, + (ocfs_log_record *) + pCleanupLogRec, LogNodeNum, + LOG_CLEANUP); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + numUpdt = pCleanupLogRec->rec.free.num_free_upds = 0; + } + + pCleanupLogRec->rec.free.free_bitmap[numUpdt].length = 1; + pCleanupLogRec->rec.free.free_bitmap[numUpdt].file_off = + PDirNode->alloc_file_off; + pCleanupLogRec->rec.free.free_bitmap[numUpdt].type = + DISK_ALLOC_DIR_NODE; + pCleanupLogRec->rec.free.free_bitmap[numUpdt].node_num = + PDirNode->alloc_node; + (pCleanupLogRec->rec.free.num_free_upds)++; + + /* LOG_FREE_BITMAP */ + + if (PDirNode->next_node_ptr != INVALID_NODE_POINTER) { + status = + ocfs_read_disk (osb, PDirNode, OCFS_SECTOR_SIZE, + PDirNode->next_node_ptr); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + continue; + } else { + break; + } + } + + /* Write the log and break */ + if (pCleanupLogRec->rec.free.num_free_upds > 0) { + status = + ocfs_write_node_log (osb, (ocfs_log_record *) pCleanupLogRec, + LogNodeNum, LOG_CLEANUP); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } + leave: + ocfs_safefree (PDirNode); + ocfs_safefree (pCleanupLogRec); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_free_directory_block */ + +/* + * ocfs_recover_dir_node() + * + */ +int ocfs_recover_dir_node (ocfs_super * osb, ub8 OrigDirNodeOffset, ub8 SavedDirNodeOffset) +{ + LOG_ENTRY (); + + LOG_EXIT_STATUS (0); + return 0; +} /* ocfs_recover_dir_node */ + +/* + * ocfs_read_dir_node() + * + */ +int ocfs_read_dir_node (ocfs_super * osb, ocfs_dir_node * DirNode, ub8 NodeDiskOffset) +{ + int status = 0; + + LOG_ENTRY (); + + /* Read in the Dir Node from the disk into the buffer supplied */ + status = + ocfs_read_disk (osb, DirNode, osb->vol_layout.dir_node_size, + NodeDiskOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_read_dir_node */ + +/* + * ocfs_write_force_dir_node() + * + */ +int ocfs_write_force_dir_node (ocfs_super * osb, + ocfs_dir_node * DirNode, sb4 IndexFileEntry) +{ + int status = 0; + + LOG_ENTRY (); + + if (IndexFileEntry != -1) { + /* Read in the Dir Node from the disk into the buffer supplied */ + status = ocfs_write_disk (osb, + (ub1 *) (((ub1 *) DirNode) + + ((IndexFileEntry + + 1) * osb->sect_size)), + osb->sect_size, + DirNode->node_disk_off + + ((IndexFileEntry + + 1) * osb->sect_size)); + if (status < 0) { + LOG_ERROR_STATUS (status); + } + } + + /* Write the first sector last */ + status = + ocfs_write_disk (osb, DirNode, osb->sect_size, + DirNode->node_disk_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_write_force_dir_node */ + +/* + * ocfs_write_dir_node() + * + */ +int ocfs_write_dir_node (ocfs_super * osb, ocfs_dir_node * DirNode, sb4 IndexFileEntry) +{ + int status = 0; + ub8 offset; + ub4 size; + ub1 *buffer; + bool bCacheWrite = false; + bool bFileCacheWrite = false; + + LOG_ENTRY (); + + if ((DISK_LOCK_CURRENT_MASTER (DirNode) == osb->node_num) && + (DISK_LOCK_FILE_LOCK (DirNode) == OCFS_DLM_ENABLE_CACHE_LOCK)) { + bCacheWrite = true; + } + + if (IndexFileEntry != -1) { + ocfs_file_entry *fe = NULL; + + /* Read in the Dir Node from the disk into the buffer supplied */ + + offset = DirNode->node_disk_off + + ((IndexFileEntry + 1) * osb->sect_size); + size = (ub4) osb->sect_size; + buffer = (ub1 *) (((ub1 *) DirNode) + + ((IndexFileEntry + 1) * osb->sect_size)); + fe = (ocfs_file_entry *) buffer; + + if ((DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num) && + (DISK_LOCK_FILE_LOCK (fe) == OCFS_DLM_ENABLE_CACHE_LOCK)) { + bFileCacheWrite = true; + } + + /* Write in the dir node */ + if (bFileCacheWrite) { + status = ocfs_write_metadata (osb, buffer, size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + } + if (!bCacheWrite) { + status = + ocfs_write_disk (osb, buffer, size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + } + } + } else { + status = ocfs_write_disk (osb, buffer, size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + } + } + } + + /* Write the first sector last */ + offset = DirNode->node_disk_off; + size = (ub4) OCFS_SECTOR_SIZE; + + /* Write the dir node */ + if (bCacheWrite) { + status = ocfs_write_metadata (osb, DirNode, size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + } + } else { + status = ocfs_write_disk (osb, DirNode, size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + } + } + + IF_TRACE (ocfs_print_dir_node (osb, DirNode)); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_write_dir_node */ + + +/* + * ocfs_walk_dir_node() + * + */ +bool ocfs_walk_dir_node (ocfs_super * osb, + ocfs_dir_node * DirNode, + ocfs_file_entry * found_fe, ocfs_file * OFile) +{ + ub4 start; + ub4 i; + ocfs_file_entry *fe; + int status; + bool bRet = false; + + LOG_ENTRY (); + + if (OFile != NULL) + start = OFile->curr_byte_off; + else + start = 0; + + if (!IS_VALID_DIR_NODE (DirNode)) { + bRet = false; + goto bail; + } + + while (1) { + /* Iterate thru this dirnode and find a matching entry */ + for (i = start; i < DirNode->num_ent_used; i++) { + fe = FILEENT (DirNode, i); + + if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) || + (!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) { + continue; + } + + /* Check to see if the name satisfies pattern */ + { + if ((OFile == NULL) + && (fe->attribs & OCFS_ATTRIB_DIRECTORY)) { + continue; + } + + status = 0; + memcpy ((void *) found_fe, (void *) fe, + OCFS_SECTOR_SIZE); + + LOG_TRACE_ARGS + ("Returning entry: %u, name: %s\n", i, + fe->filename); + + if (OFile != NULL) { + OFile->curr_dir_off = + DirNode->node_disk_off; + OFile->curr_byte_off = i + 1; + } + + bRet = true; + goto bail; + } + } + + if (DirNode->next_node_ptr != -1) { + status = + ocfs_read_dir_node (osb, DirNode, + DirNode->next_node_ptr); + + if (!IS_VALID_DIR_NODE (DirNode)) { + bRet = false; + goto bail; + } + start = 0; + continue; + } else { + /* We are done... */ + break; + } + } + + if (OFile != NULL) { + OFile->curr_dir_off = DirNode->node_disk_off; + OFile->curr_byte_off = i + 1; + } + + bail: + LOG_EXIT_ULONG (bRet); + return bRet; +} /* ocfs_walk_dir_node */ + +/* + * ocfs_search_dir_node() + * + */ +bool ocfs_search_dir_node (ocfs_super * osb, + ocfs_dir_node * DirNode, + struct qstr * SearchName, + ocfs_file_entry * found_fe, ocfs_file * OFile) +{ + ub4 start; + ub4 index; + ocfs_file_entry *fe; + int status; + bool bRet = false; + + LOG_ENTRY (); + + if (OFile != NULL) + start = OFile->curr_byte_off; + else + start = 0; + + index = start; + + while (1) { + /* Iterate thru this dirnode and find a matching entry */ + if (index < DirNode->num_ent_used) { + if (ocfs_find_index (osb, DirNode, SearchName, (int *) &index)) { + fe = FILEENT (DirNode, index); + + memcpy ((void *) found_fe, (void *) fe, + OCFS_SECTOR_SIZE); + if (OFile != NULL) { + OFile->curr_dir_off = + DirNode->node_disk_off; + OFile->curr_byte_off = index + 1; + } + bRet = true; + goto bail; + } + } + + if (DirNode->next_node_ptr != -1) { + status = + ocfs_read_dir_node (osb, DirNode, + DirNode->next_node_ptr); + + if (!IS_VALID_DIR_NODE (DirNode)) { + bRet = false; + goto bail; + } + + index = 0; + continue; + } else { + /* We are done... */ + break; + } + } + + if (OFile != NULL) { + OFile->curr_dir_off = DirNode->node_disk_off; + OFile->curr_byte_off = index + 1; + } + + bail: + LOG_EXIT_ULONG (bRet); + return bRet; +} /* ocfs_search_dir_node */ + + +/* + * ocfs_find_index() + * + */ +bool ocfs_find_index (ocfs_super * osb, ocfs_dir_node * DirNode, struct qstr * FileName, + int *Index) +{ + int lowBnd, upBnd; + ocfs_file_entry *fe; + int res = -1, index = 0, start = 0; + int ret = false; + struct qstr q; + + LOG_ENTRY (); + if (!IS_VALID_DIR_NODE (DirNode) || FileName==NULL) { + ret = false; + goto bail; + } + + if (*Index > 0) + start = *Index; + + if (DirNode->index_dirty) { + for (index = start; index < DirNode->num_ent_used; index++) { + fe = FILEENT (DirNode, index); + if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) || + (!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) { + continue; + } + q.name = fe->filename; + q.len = strlen(fe->filename); + res = ocfs_compare_qstr(&q, FileName); + if (!res) { + *Index = index; + ret = true; + goto bail; + } + } + *Index = index; + ret = false; + goto bail; + } + + for (lowBnd = start, upBnd = (DirNode->num_ent_used - start); upBnd; + upBnd >>= 1) { + index = lowBnd + (upBnd >> 1); + + fe = FILEENT (DirNode, index); + + if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) || + (!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) { + for (index = lowBnd; index < (lowBnd + upBnd); index++) { + fe = FILEENT (DirNode, index); + if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) || + (!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) { + continue; + } + + q.name = fe->filename; + q.len = strlen(fe->filename); + res = ocfs_compare_qstr(&q, FileName); + if (!res) { + *Index = index; + ret = true; + goto bail; + } + if (res < 0) { + *Index = index; + ret = false; + goto bail; + } + } + *Index = lowBnd + upBnd - 1; + ret = false; + goto bail; + } + + q.name = fe->filename; + q.len = strlen(fe->filename); + res = ocfs_compare_qstr(&q, FileName); + if (!res) { + *Index = index; + ret = true; + goto bail; + } + + if (res > 0) { + lowBnd = index + 1; + --upBnd; + } + } + + *Index = index; + + bail: + LOG_EXIT_ULONG (ret); + return ret; +} /* ocfs_find_index */ + +/* + * ocfs_reindex_dir_node() + * + */ +int ocfs_reindex_dir_node (ocfs_super * osb, ub8 DirNodeOffset, ocfs_dir_node * DirNode) +{ + int status = 0; + ocfs_dir_node *pDirNode = NULL; + ocfs_file_entry *pInsertEntry; + ocfs_file_entry *fe; + ub4 size, index; + ub1 offset = 0; + int res; + + LOG_ENTRY (); + + if (DirNode == NULL) { + size = (ub4) (osb->vol_layout.dir_node_size); + pDirNode = ocfs_malloc (size); + if (pDirNode == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + status = ocfs_read_dir_node (osb, pDirNode, DirNodeOffset); + if (status < 0) { + goto leave; + } + } else { + pDirNode = DirNode; + } + + if (pDirNode->index_dirty) { + offset = pDirNode->bad_off; + pInsertEntry = + (ocfs_file_entry *) (FIRST_FILE_ENTRY (pDirNode) + + (offset * OCFS_SECTOR_SIZE)); + + for (index = 0; index < pDirNode->num_ent_used; index++) { + fe = FILEENT (pDirNode, index); + + if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) || + (!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) { + continue; + } + + res = + strcmp (fe->filename, pInsertEntry->filename); + if (res < 0) { + break; + } + } + + if (index < (pDirNode->num_ent_used - 1)) { + memmove (&pDirNode->index[index + 1], + &pDirNode->index[index], + pDirNode->num_ent_used - index); + pDirNode->index[index] = offset; + } + + pDirNode->index_dirty = 0; + + status = ocfs_write_dir_node (osb, pDirNode, -1); + if (status < 0) { + goto leave; + } + + } + leave: + if (DirNode == NULL) + ocfs_safefree (pDirNode); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_reindex_dir_node */ + +/* + * ocfs_insert_dir_node() + * + */ +int ocfs_insert_dir_node (ocfs_super * osb, + ocfs_dir_node * DirNode, + ocfs_file_entry * InsertEntry, + ocfs_dir_node * LockNode, sb4 * IndexOffset) +{ + int status = 0; + ocfs_file_entry *fe; + int res = 0; + int index = -1; + ocfs_file_entry *lastEntry; + ocfs_log_record *pLogRec = NULL; + ub4 size; + ub1 freeOffset; + struct qstr q; + + LOG_ENTRY (); + + if (!IS_VALID_DIR_NODE (DirNode)) { + LOG_ERROR_STATUS(status = -EINVAL); + goto bail; + } + + if (DirNode->index_dirty) { + status = ocfs_reindex_dir_node (osb, DirNode->node_disk_off, DirNode); + if (status < 0) + goto bail; + } + + if (DirNode->num_ent_used < osb->max_dir_node_ent) { + if (DirNode->num_ent_used) { + q.name = InsertEntry->filename; + q.len = strlen(InsertEntry->filename); + if (ocfs_find_index (osb, DirNode, &q, &index)) { + /* Already inserted... */ + status = -EEXIST; + goto bail; + } + + if (index < DirNode->num_ent_used) { + fe = FILEENT (DirNode, index); + + res = + strcmp (fe->filename, + InsertEntry->filename); + if (res > 0) { + /* We are greater than the entry in question we */ + /* shd be less than the one next to it */ + index++; + } + } + } else { + index = 0; + } + + if (index < DirNode->num_ent_used) + memmove (&DirNode->index[index + 1], + &DirNode->index[index], + DirNode->num_ent_used - index); + + if (DirNode->num_ent_used) { + if (DirNode->num_del) { + /* Insert at first deleted & change first deleted */ + freeOffset = DirNode->first_del; + DirNode->num_del--; + if (DirNode->num_del) { + lastEntry = + (ocfs_file_entry + *) (FIRST_FILE_ENTRY (DirNode) + + (freeOffset * + OCFS_SECTOR_SIZE)); + DirNode->first_del = + lastEntry->next_del; + } else { + if (LockNode->node_disk_off != + DirNode->node_disk_off) + LockNode->head_del_ent_node = + DirNode->next_del_ent_node; + else + DirNode->head_del_ent_node = + DirNode->next_del_ent_node; + } + } else { + /* Insert at end and change the index */ + freeOffset = DirNode->num_ent_used; + } + } else { + freeOffset = 0; + } + + lastEntry = (ocfs_file_entry *) (FIRST_FILE_ENTRY (DirNode) + + (freeOffset * + OCFS_SECTOR_SIZE)); + + *IndexOffset = freeOffset; + + /* Put the entry at the end */ + InsertEntry->dir_node_ptr = DirNode->node_disk_off; + + memcpy (lastEntry, InsertEntry, osb->sect_size); + + OCFS_SET_FLAG (lastEntry->sync_flags, OCFS_SYNC_FLAG_VALID); + + lastEntry->this_sector = DirNode->node_disk_off + + ((freeOffset + 1) * OCFS_SECTOR_SIZE); + InsertEntry->this_sector = lastEntry->this_sector; + + if (!(InsertEntry->sync_flags & OCFS_SYNC_FLAG_VALID)) { + /* This is special for rename... */ + + /* Log into recovery that this name only needs to be deleted if we fail */ + size = sizeof (ocfs_log_record); + size = (ub4) OCFS_ALIGN (size, PAGE_SIZE); + + if ((pLogRec = ocfs_malloc (size)) == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } + + /* Now start writing the cleanup log of the filentry master. */ + /* It is this node for normal cases and or the node we are doing */ + /* recovery for. */ + pLogRec->log_id = osb->curr_trans_id; + pLogRec->log_type = LOG_DELETE_NEW_ENTRY; + + pLogRec->rec.del.node_num = osb->node_num; + pLogRec->rec.del.ent_del = InsertEntry->this_sector; + pLogRec->rec.del.parent_dirnode_off = + LockNode->node_disk_off; + pLogRec->rec.del.flags = 0; + + status = + ocfs_write_node_log (osb, pLogRec, osb->node_num, + LOG_RECOVER); + ocfs_safefree (pLogRec); + if (status < 0) + goto bail; + } + + if (DISK_LOCK_FILE_LOCK (InsertEntry) == + OCFS_DLM_ENABLE_CACHE_LOCK) { + ocfs_write_metadata (osb, InsertEntry, OCFS_SECTOR_SIZE, + InsertEntry->this_sector); + } + + DirNode->index[index] = freeOffset; + DirNode->num_ent_used++; + } + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_insert_dir_node */ + +/* + * ocfs_del_file_entry() + * + */ +int ocfs_del_file_entry (ocfs_super * osb, + ocfs_file_entry * EntryToDel, ocfs_dir_node * LockNode) +{ + int status = 0; + ub4 offset; + ub4 size; + ocfs_dir_node *PDirNode = NULL; + ocfs_file_entry *fe; + int index = 0; + int length = 0; + + LOG_ENTRY (); + + size = (ub4) (osb->vol_layout.dir_node_size); + + PDirNode = ocfs_malloc (size); + if (PDirNode == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + status = ocfs_read_dir_node (osb, PDirNode, EntryToDel->dir_node_ptr); + if (status < 0) { + goto leave; + } + + offset = (EntryToDel->this_sector - PDirNode->node_disk_off) / + OCFS_SECTOR_SIZE; + + offset -= 1; + for (index = 0; index < PDirNode->num_ent_used; index++) { + if (PDirNode->index[index] != offset) + continue; + + fe = FILEENT (PDirNode, index); + + length = OCFS_SECTOR_SIZE; + if (memcmp (fe, EntryToDel, length) == 0) { + memmove (&PDirNode->index[index], + &PDirNode->index[index + 1], + PDirNode->num_ent_used - (index + 1)); + + PDirNode->num_ent_used--; + if (PDirNode->num_ent_used == 0) { + PDirNode->num_del = 0; + } else { + /* Insert this dir node as one containing a deleted entry if the */ + /* count on the root dir node for deleted entries is 0 */ + if (PDirNode->num_del != 0) { + PDirNode->num_del++; + fe->sync_flags = OCFS_SYNC_FLAG_DELETED; + fe->next_del = PDirNode->first_del; + PDirNode->first_del = offset; + } else { + PDirNode->num_del++; + fe->sync_flags = OCFS_SYNC_FLAG_DELETED; + fe->next_del = INVALID_DIR_NODE_INDEX; + PDirNode->first_del = offset; + + PDirNode->next_del_ent_node = + LockNode->head_del_ent_node; + + if (LockNode->node_disk_off != + PDirNode->node_disk_off) + LockNode->head_del_ent_node = + PDirNode->node_disk_off; + else + PDirNode->head_del_ent_node = + PDirNode->node_disk_off; + } + } + + status = ocfs_write_dir_node (osb, PDirNode, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + if (LockNode->node_disk_off != PDirNode->node_disk_off) { + if ((DISK_LOCK_CURRENT_MASTER (LockNode) == + osb->node_num) + && (DISK_LOCK_FILE_LOCK (LockNode) == + OCFS_DLM_ENABLE_CACHE_LOCK)) + status = + ocfs_write_metadata (osb, LockNode, + osb->sect_size, + LockNode-> + node_disk_off); + else + status = + ocfs_write_disk (osb, LockNode, + osb->sect_size, + LockNode-> + node_disk_off); + + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } + goto leave; + } + } + leave: + ocfs_safefree (PDirNode); + + LOG_EXIT_STATUS (status); + return status; +} /* DelFreeEntry */ + +/* + * ocfs_insert_file() + * + */ +int ocfs_insert_file (ocfs_super * osb, + ocfs_dir_node * DirNode, + ocfs_file_entry * InsertEntry, + ocfs_dir_node * LockNode, ocfs_lock_res * LockResource) +{ + int status = 0; + ub8 bitmapOffset = 0; + ub8 numClustersAlloc = 0; + ocfs_dir_node *pNewDirNode = NULL; + ub4 size; + sb4 indexOffset = -1; + + LOG_ENTRY (); + + IF_TRACE (ocfs_print_dir_node (osb, DirNode)); + + if (!IS_VALID_DIR_NODE (DirNode)) { + LOG_ERROR_STR ("Invalid DirNode"); + status = -EFAIL; + goto leave; + } + + /* If we have a list of dir nodes go to the last dirnode */ + /* and insert in that. */ + + /* We should not find this entry already inserted */ + if (DirNode->num_ent_used < osb->max_dir_node_ent) { + status = ocfs_insert_dir_node (osb, DirNode, InsertEntry, LockNode, + &indexOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } else { + size = (ub4) (osb->vol_layout.dir_node_size); + pNewDirNode = ocfs_malloc (size); + if (pNewDirNode == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + if (DirNode->next_node_ptr != INVALID_NODE_POINTER) { + ocfs_read_dir_node (osb, pNewDirNode, + DirNode->next_node_ptr); + } else { + ub8 fileOffset = 0; + + /* Allocate a new dir node */ + status = + ocfs_alloc_node_block (osb, osb->vol_layout.dir_node_size, + &bitmapOffset, &fileOffset, + &numClustersAlloc, osb->node_num, + DISK_ALLOC_DIR_NODE); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + ocfs_initialize_dir_node (osb, pNewDirNode, bitmapOffset, + fileOffset, osb->node_num); + } + + if ((DISK_LOCK_CURRENT_MASTER (DirNode) == osb->node_num) && + (DISK_LOCK_FILE_LOCK (DirNode) == + OCFS_DLM_ENABLE_CACHE_LOCK)) { + DISK_LOCK_CURRENT_MASTER (pNewDirNode) = osb->node_num; + DISK_LOCK_FILE_LOCK (pNewDirNode) = + OCFS_DLM_ENABLE_CACHE_LOCK; + } + + status = ocfs_insert_dir_node (osb, pNewDirNode, InsertEntry, LockNode, + &indexOffset); + if (status < 0) { + LOG_ERROR_STR ("ocfs_insert_dir_node failed"); + goto leave; + } + + if (LockNode->node_disk_off == DirNode->node_disk_off) { + /* Make sure we copy over the lock node head deleted entry */ + DirNode->head_del_ent_node = + LockNode->next_del_ent_node; + DirNode->free_node_ptr = pNewDirNode->node_disk_off; + } else { + LockNode->free_node_ptr = pNewDirNode->node_disk_off; + } + + /* Insert in this dirnode and setup the pointers */ + DirNode->next_node_ptr = pNewDirNode->node_disk_off; + + /* Create the btree now... */ + ocfs_write_dir_node (osb, pNewDirNode, indexOffset); + indexOffset = -1; + } + + if (DISK_LOCK_FILE_LOCK (DirNode) != OCFS_DLM_ENABLE_CACHE_LOCK) { + /* This is an optimization... */ + ocfs_acquire_lockres (LockResource); + LockResource->lock_type = OCFS_DLM_NO_LOCK; + ocfs_release_lockres (LockResource); + + if (LockNode->node_disk_off == DirNode->node_disk_off) + /* Reset the lock on the disk */ + DISK_LOCK_FILE_LOCK (DirNode) = OCFS_DLM_NO_LOCK; + else + DISK_LOCK_FILE_LOCK (LockNode) = OCFS_DLM_NO_LOCK; + } + + ocfs_write_dir_node (osb, DirNode, indexOffset); + + if (LockNode->node_disk_off != DirNode->node_disk_off) { + if ((DISK_LOCK_CURRENT_MASTER (LockNode) == osb->node_num) && + (DISK_LOCK_FILE_LOCK (LockNode) == + OCFS_DLM_ENABLE_CACHE_LOCK)) + status = + ocfs_write_metadata (osb, LockNode, osb->sect_size, + LockNode->node_disk_off); + else + status = + ocfs_write_disk (osb, LockNode, osb->sect_size, + LockNode->node_disk_off); + + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } + leave: + ocfs_safefree (pNewDirNode); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_insert_file */ diff -urNp ocfs/fs/ocfs/Common/ocfsgendlm.c 2.4.20pre5aa2/fs/ocfs/Common/ocfsgendlm.c --- ocfs/fs/ocfs/Common/ocfsgendlm.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/ocfsgendlm.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,2871 @@ +/* + * ocfsgendlm.c + * + * Distributed lock manager. Requests and processes lock votes. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_DLM + +/* + * ocfs_insert_cache_link() + * + */ +int ocfs_insert_cache_link (ocfs_super * osb, ocfs_lock_res * LockResource) +{ + int status = 0; + + LOG_ENTRY (); + + LockResource->in_cache_list = true; + + list_add_tail (&(LockResource->cache_list), &(osb->cache_lock_list)); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_insert_cache_link */ + +/* + * ocfs_update_lock_state() + * + */ +int ocfs_update_lock_state (ocfs_super * osb, ocfs_lock_res * LockResource, ub4 Flags) +{ + ub4 votemap; + int status = 0; + int tmpstat; + ub8 lockseqno = 0; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u)\n", osb, LockResource, Flags); + + votemap = (1 << LockResource->master_node_num); + + status = + ocfs_prime_voting (osb, LockResource->sector_num, LockResource->lock_type, + votemap); + if (status < 0) { + /* Lock up Volume... */ + LOG_ERROR_STATUS (status); + goto finito; + } + + /* Call Comm layer to broadcast to all nodes alive, that this node */ + /* wants exclusive access to the lock */ + status = ocfs_request_vote (osb, LockResource->sector_num, + LockResource->lock_type, Flags, votemap, + &lockseqno); + if (status < 0) { + LOG_ERROR_ARGS + ("ocfs_update_master_on_open()... ocfs_request_vote failed map " + "0x%08x.0x%08x, lockid %u.%u, status 0x%08x\n", + HI (LockResource->oin_openmap), + LO (LockResource->oin_openmap), + HI (LockResource->sector_num), + LO (LockResource->sector_num), status); + goto finito; + } + + status = + ocfs_wait_for_vote (osb, LockResource->sector_num, LockResource->lock_type, + Flags, votemap, 5000, /*5sec */ lockseqno, + LockResource); + if (status < 0) { + LOG_ERROR_ARGS + ("ocfs_update_master_on_open()... ocfs_wait_for_vote failed map " + "0x%08x.0x%08x, lockid %u.%u, status 0x%08x\n", + HI (LockResource->oin_openmap), + LO (LockResource->oin_openmap), + HI (LockResource->sector_num), + LO (LockResource->sector_num), status); + goto finito; + } + + /* Change the lock id on the disk and on the resource */ + if (Flags & FLAG_CHANGE_MASTER) { + LockResource->master_node_num = osb->node_num; + } + + /* Lock state should transition from no lock */ + + finito: + tmpstat = ocfs_reset_voting (osb, LockResource->sector_num, + LockResource->lock_type, votemap); + if (tmpstat < 0) { + status = tmpstat; + LOG_ERROR_STATUS (status); + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_update_lock_state */ + +/* + * ocfs_disk_request_vote() + * + */ +int ocfs_disk_request_vote (ocfs_super * osb, + ub8 LockId, + ub4 LockType, ub4 Flags, ub8 VoteMap, ub8 * LockSeqNo) +{ + int status = 0; + ub8 offset = 0; + ub8 pub_off; + ub4 size = 0; + ub4 numnodes = 0; + ub4 i; + ub1 *buffer = NULL; + ocfs_publish *pubsect = NULL; + ub8 largestseqno = 0; + ub8 pubmap = 0; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x.0x%08x, 0x%08x)\n", osb, + HI (LockId), LO (LockId), LockType, Flags, + HI (VoteMap), LO (VoteMap), LockSeqNo); + + pubmap = osb->publ_map; + + offset = osb->vol_layout.publ_sect_off; + + numnodes = osb->num_cfg_nodes; + + OCFS_ASSERT (numnodes); + + /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + * !! kch - This seems like a bug to me. !! + * !! What if node #2 of 3 drops out? !! + * !! numnodes will be 2 but node #3 !! + * !! will still be writing to the third !! + * !! slot, right?!?! !! + * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ + size = (numnodes * osb->sect_size); + + buffer = ocfs_malloc (size); + if (buffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + /* Read the Publish Sector of all nodes */ + status = ocfs_read_disk (osb, buffer, size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + for (i = 0; i < numnodes; i++) { + /* for the time being the delay is for 1 sec */ + pubsect = + (ocfs_publish *) ((ub1 *) buffer + (i * OCFS_SECTOR_SIZE)); + if (pubsect->seq_num > largestseqno) { + largestseqno = pubsect->seq_num; + + if (pubsect->dirty) { + if (!IS_NODE_ALIVE (pubmap, i, numnodes)) { + LOG_TRACE_ARGS + ("Calling ocfs_recover_vol for NodeNum (%u)\n", + i); + /* Add recovery code here */ + ocfs_recover_vol (osb, i); + } else { + ocfs_sleep (500); /* 500 ms */ + } + status = -EAGAIN; + goto finally; + } + } + } + + /* Increment the largest sequence number by one & */ + /* write it in its own Publish Sector and set the Dirty Bit */ + + pubsect = (ocfs_publish *) (buffer + (osb->node_num * osb->sect_size)); + largestseqno++; + pubsect->seq_num = largestseqno; + pubsect->dirty = 1; + pubsect->vote = FLAG_VOTE_NODE; + pubsect->vote_map = VoteMap; + pubsect->vote_type = Flags; + pubsect->dir_ent = LockId; + + pub_off = osb->vol_layout.publ_sect_off + + (osb->node_num * osb->sect_size); + + LOG_TRACE_ARGS ("Writing Publish Sector (%d)\n", pubsect->vote); + + status = ocfs_write_disk (osb, pubsect, osb->sect_size, pub_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + *LockSeqNo = largestseqno; + + finally: + ocfs_safefree (buffer); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_disk_request_vote */ + +/* + * ocfs_wait_for_disk_lock_release() + * + * @osb: ocfs super block for the volume + * @Offset: + * @TimeToWait: + * @LockType: lowest level to which a lock must deprecate for us to break out. + * + * Returns 0 of success, < 0 if error. + */ +int ocfs_wait_for_disk_lock_release (ocfs_super * osb, + ub8 Offset, ub4 TimeToWait, ub4 LockType) +{ + int status = -ETIMEDOUT; + int tmpstat = -ETIMEDOUT; + ub4 timewaited = 0; + ocfs_file_entry *fileentry = NULL; + + LOG_ENTRY (); + + /* Create a sepearate thread which should set the event of the */ + /* resource after N retries. */ + + fileentry = ocfs_allocate_file_entry (); + if (fileentry == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + while (TimeToWait > timewaited) { + memset (fileentry, 0, sizeof (ocfs_file_entry)); + + tmpstat = + ocfs_read_force_disk (osb, fileentry, osb->sect_size, Offset); + if (tmpstat < 0) { + LOG_ERROR_STATUS (status = tmpstat); + goto finally; + } + + /* This will always be zero when the first Node comes up after reboot */ + /* (for volume lock) */ + if ((DISK_LOCK_CURRENT_MASTER (fileentry) == + OCFS_INVALID_NODE_NUM) + || (DISK_LOCK_CURRENT_MASTER (fileentry) == osb->node_num)) { + /* Lock was released */ + status = 0; + goto finally; + } + + if (!IS_NODE_ALIVE + (osb->publ_map, DISK_LOCK_CURRENT_MASTER (fileentry), + OCFS_MAXIMUM_NODES)) { + /* The node holding the lock is dead do recovery... */ + LOG_TRACE_ARGS + ("Calling ocfs_recover_vol for NodeNum (%d)", + DISK_LOCK_CURRENT_MASTER (fileentry)); + +// ocfs_recover_vol(osb, DISK_LOCK_CURRENT_MASTER(pFileEntry)); + + /* Reset the lock as not owned and return success?? */ + /* This needs to be under some sort of cluster wide lock */ + + DISK_LOCK_CURRENT_MASTER (fileentry) = + OCFS_INVALID_NODE_NUM; + DISK_LOCK_FILE_LOCK (fileentry) = OCFS_DLM_NO_LOCK; + status = 0; + goto finally; + } + + LOG_TRACE_ARGS + ("Master node is (%d) for lockid %u.%u, lock state " + "is (%d)\n", DISK_LOCK_CURRENT_MASTER (fileentry), + HI (Offset), LO (Offset), DISK_LOCK_FILE_LOCK (fileentry)); + + /* If we are here in the code it means the local node is not the master */ + + if (DISK_LOCK_FILE_LOCK (fileentry) <= LockType) { + /* Lock was released */ + status = 0; + goto finally; + } else + ocfs_sleep (200); /* in ms */ + + timewaited += 200; + } + + finally: + if (fileentry != NULL) + ocfs_release_file_entry (fileentry); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_wait_for_disk_lock_release */ + +/* + * ocfs_wait_for_lock_release() + * + */ +int ocfs_wait_for_lock_release (ocfs_super * osb, + ub8 Offset, + ub4 TimeToWait, ocfs_lock_res * LockResource, ub4 LockType) +{ /* This is the lowest level to which a */ + /* lock must deprecate for us to break out. */ + int status = -ETIMEDOUT; + int tmpstat = -ETIMEDOUT; + ub4 timewaited = 0; + ocfs_file_entry *fileentry = NULL; + ub4 length = 0; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, 0x%08x, %u)\n", osb, + HI (Offset), LO (Offset), TimeToWait, + LockResource, LockType); + + /* Create a sepearate thread which should set the event of the */ + /* resource after N retries */ + fileentry = ocfs_allocate_file_entry (); + if (fileentry == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + /* For the time being the delay is for 1 second */ + while (TimeToWait > timewaited) { + memset (fileentry, 0, sizeof (ocfs_file_entry)); + length = osb->sect_size; + + tmpstat = ocfs_read_disk (osb, fileentry, length, Offset); + if (tmpstat < 0) { + LOG_ERROR_STATUS (status = tmpstat); + goto finally; + } + + /* ?? this will always be zero when the first Node comes up */ + /* after reboot ( for volume lock) */ + if ((DISK_LOCK_CURRENT_MASTER (fileentry) == + OCFS_INVALID_NODE_NUM) + || (DISK_LOCK_CURRENT_MASTER (fileentry) == osb->node_num)) { + /* Lock was released */ + status = 0; + goto finally; + } + + if (!IS_NODE_ALIVE + (osb->publ_map, DISK_LOCK_CURRENT_MASTER (fileentry), + OCFS_MAXIMUM_NODES)) { + /* The node holding the lock is dead do recovery... ** TODO: */ + + LOG_ERROR_ARGS + ("ocfs_wait_for_lock_release() calling ocfs_recover_vol for " + "NodeNum (%d)\n", + DISK_LOCK_CURRENT_MASTER (fileentry)); + +// ocfs_recover_vol(osb, DISK_LOCK_CURRENT_MASTER(pFileEntry)); + + /* Reset the lock as not owned and return success?? */ + /* This needs to be under some sort of cluster wide lock, */ + + DISK_LOCK_CURRENT_MASTER (fileentry) = + OCFS_INVALID_NODE_NUM; + DISK_LOCK_FILE_LOCK (fileentry) = OCFS_DLM_NO_LOCK; + + status = 0; + goto finally; + } + + LOG_TRACE_ARGS ("Master Node is (%d) for Lock 0x%08x.0x%08x, " + "Lock State is (%u)\n", + DISK_LOCK_CURRENT_MASTER (fileentry), + HI (Offset), LO (Offset), + DISK_LOCK_FILE_LOCK (fileentry)); + + /* The Local node is not the master */ + + if (DISK_LOCK_FILE_LOCK (fileentry) >= + OCFS_DLM_ENABLE_CACHE_LOCK) { + LockResource->lock_type = + DISK_LOCK_FILE_LOCK (fileentry); + LockResource->master_node_num = + DISK_LOCK_CURRENT_MASTER (fileentry); + + LOG_TRACE_STR ("Calling ocfs_break_cache_lock"); + + status = ocfs_break_cache_lock (osb, LockResource); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* if we succeeded, make sure the file entry shows the change */ + DISK_LOCK_FILE_LOCK (fileentry) = + LockResource->lock_type; + DISK_LOCK_CURRENT_MASTER (fileentry) = + LockResource->master_node_num; + } + + if (DISK_LOCK_FILE_LOCK (fileentry) <= LockType) { + /* Lock was released */ + status = 0; + goto finally; + } else { + ocfs_sleep (200); /* 200 ms */ + } + timewaited += 200; + } + + finally: + if (LockResource && status >= 0) { + ocfs_acquire_lockres (LockResource); + + LockResource->lock_type = DISK_LOCK_FILE_LOCK (fileentry); + LockResource->master_node_num = + DISK_LOCK_CURRENT_MASTER (fileentry); + LockResource->oin_openmap = DISK_LOCK_OIN_MAP (fileentry); + + /* ?? shld we keep two times in lockresource also,or just the greater */ + /* of two */ + LockResource->last_lock_upd = DISK_LOCK_LAST_WRITE (fileentry); + ocfs_release_lockres (LockResource); + } + + if (fileentry != NULL) + ocfs_release_file_entry (fileentry); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_wait_for_lock_release */ + +/* + * ocfs_get_vote_on_disk() + * + */ +int ocfs_get_vote_on_disk (ocfs_super * osb, + ub8 LockId, + ub4 LockType, + ub4 Flags, + ub8 * GotVoteMap, ub8 VoteMap, ub8 LockSeqNum, + ub8 * oin_open_map) +{ + int status = 0; + ub4 length = 0; + ub4 i; + ub4 numnodes; + ub1 *buffer = NULL; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x, 0x%08x.0x%08x, " + "%u.%u, 0x%08x)\n", osb, HI (LockId), LO (LockId), + LockType, Flags, GotVoteMap, HI (VoteMap), LO (VoteMap), + HI (LockSeqNum), LO (LockSeqNum), oin_open_map); + + /* Can be number of configured nodes, active nodesif node numbers */ + /* are contiguous */ + numnodes = OCFS_MAXIMUM_NODES; + + /* TODO Is this still a valid assumption the other node could be */ + /* arbitrating a different entry we shd retry in that case */ + + /* Read the vote sectors of all the nodes */ + length = numnodes * osb->sect_size; + buffer = ocfs_malloc (length); + if (buffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + status = ocfs_read_disk (osb, buffer, length, + osb->vol_layout.vote_sect_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* Change here for the bitmap */ + for (i = 0; i < numnodes; i++) { + ocfs_vote *VoteSect; + + VoteSect = (ocfs_vote *) (buffer + (i * osb->sect_size)); + + /* + ** Don't check the snapshot of publ_map from ocfs_nm_thread. + ** Instead of this we should kick in NM thread and get the + ** latest publ_map. + */ + if ((IS_NODE_ALIVE (VoteMap, i, numnodes)) && + !(IS_NODE_ALIVE (osb->publ_map, i, numnodes))) { + /* A node we were asking to vote is dead */ + if (Flags & FLAG_FILE_UPDATE_OIN) { + (*GotVoteMap) |= 1 << i; + } else { + /* + ** These are essentially the cases where we were either making + ** a master node or changing/updating the master node and + ** it died midway... + */ + status = -EAGAIN; + goto finally; + } + } + + if ((IS_NODE_ALIVE (VoteMap, i, numnodes)) && + (IS_NODE_ALIVE (osb->publ_map, i, numnodes)) && + (VoteSect->seq_num == LockSeqNum) + && (VoteSect->dir_ent == LockId)) { + if (VoteSect->vote[osb->node_num] == FLAG_VOTE_NODE) { + (*GotVoteMap) |= 1 << i; + if ((Flags & FLAG_FILE_EXTEND) + || (Flags & FLAG_FILE_UPDATE)) { + (*oin_open_map) |= + (VoteSect->open_handle << i); + } + } else if (VoteSect->vote[osb->node_num] == + FLAG_VOTE_OIN_ALREADY_INUSE) { + (*GotVoteMap) |= 1 << i; + status = -EFAIL; + + if (Flags & FLAG_FILE_DELETE) { + status = -EBUSY; + } + goto finally; + } else if (VoteSect->vote[osb->node_num] == + FLAG_VOTE_OIN_UPDATED) { + (*GotVoteMap) |= 1 << i; + } else if (VoteSect->vote[osb->node_num] == + FLAG_VOTE_UPDATE_RETRY) { + status = -EAGAIN; + goto finally; + } else if (VoteSect->vote[osb->node_num] == + FLAG_VOTE_FILE_DEL) { + status = -ENOENT; + goto finally; + } + } + } + + finally: + ocfs_safefree (buffer); + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_get_vote_on_disk */ + +/* + * ocfs_disk_reset_voting() + * + */ +int ocfs_disk_reset_voting (ocfs_super * osb, ub8 LockId, ub4 LockType) +{ + int status = 0; + ocfs_publish *pubsect = NULL; + ub8 offset = 0; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u)\n", osb, HI (LockId), LO (LockId), + LockType); + + pubsect = ocfs_malloc (osb->sect_size); + if (pubsect == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + offset = osb->vol_layout.publ_sect_off + + (osb->node_num * osb->sect_size); + + /* Read Disk */ + status = ocfs_read_disk (osb, pubsect, osb->sect_size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + pubsect->dirty = 0; + pubsect->vote = 0; + pubsect->vote_type = 0; + + /* Write Disk */ + status = ocfs_write_disk (osb, pubsect, osb->sect_size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + finally: + ocfs_safefree (pubsect); + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_disk_reset_voting */ + +/* + * ocfs_wait_for_vote() + * + */ +int ocfs_wait_for_vote (ocfs_super * osb, + ub8 LockId, + ub4 LockType, + ub4 Flags, + ub8 VoteMap, + ub4 TimeToWait, ub8 LockSeqNum, ocfs_lock_res * LockResource) +{ + int status = -EAGAIN; + ub4 timewaited = 0; + ub8 gotvotemap = 0; + ub8 fileopenmap = 0; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x.0x%08x, %u, " + "%u.%u, 0x%08x)\n", osb, HI (LockId), LO (LockId), + LockType, Flags, HI (VoteMap), LO (VoteMap), + TimeToWait, HI (LockSeqNum), LO (LockSeqNum), + LockResource); + +#define WAIT_FOR_VOTE_INCREMENT 200 + /* Create a sepearate thread which should set the event of the */ + /* resource after N retries. */ + while (TimeToWait > timewaited) { + ocfs_sleep (WAIT_FOR_VOTE_INCREMENT); + + gotvotemap |= (1 << osb->node_num); + + /* GetVoteOnComm */ + status = + ocfs_get_vote_on_disk (osb, LockId, LockType, Flags, &gotvotemap, + VoteMap, LockSeqNum, &fileopenmap); + if (status < 0) { + LOG_ERROR_ARGS + ("ocfs_get_vote_on_disk failed with gotmap 0x%08x.0x%08x, " + "lockid %u.%u votemap 0x%08x.0x%08x\n", + HI (gotvotemap), LO (gotvotemap), HI (LockId), + LO (LockId), HI (VoteMap), LO (VoteMap)); + goto bail; + } + + if (!(gotvotemap & (1 << osb->node_num))) { + /* We need to bail out... */ + LOG_ERROR_ARGS + ("ocfs_get_vote_on_disk failed with gotmap did not have " + "this node num RETRY 0x%08x.0x%08x, lockid %u.%u, " + "votemap 0x%08x.0x%08x\n", HI (gotvotemap), + LO (gotvotemap), HI (LockId), LO (LockId), + HI (VoteMap), LO (VoteMap)); + status = -EAGAIN; + goto bail; + } + + VoteMap |= (1 << osb->node_num); + if (VoteMap == gotvotemap) { + if ((Flags & FLAG_FILE_EXTEND) + || (Flags & FLAG_FILE_UPDATE)) + LockResource->oin_openmap = fileopenmap; + + LOG_TRACE_ARGS ("ocfs_get_vote_on_disk SUCCESS 0x%08x.0x%08x, " + "lockid %u.%u, fileOpenMap 0x%08x.0x%08x\n", + HI (gotvotemap), LO (gotvotemap), + HI (LockId), LO (LockId), + HI (fileopenmap), LO (fileopenmap)); + status = 0; + goto bail; + } + timewaited += WAIT_FOR_VOTE_INCREMENT; + } + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_wait_for_vote */ + +/* + * ocfs_prime_voting() + * + */ +int ocfs_prime_voting (ocfs_super * osb, ub8 LockId, ub4 LockType, ub8 VoteMap) +{ + int status = 0; + + LOG_ENTRY (); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_prime_voting */ + +/* + * ocfs_reset_voting() + * + */ +int ocfs_reset_voting (ocfs_super * osb, ub8 LockId, ub4 LockType, ub8 VoteMap) +{ + int status; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, 0x%08x.0x%08x)\n", osb, + HI (LockId), LO (LockId), LockType, HI (VoteMap), + LO (VoteMap)); + + /* COMM reset VOTING */ + + status = ocfs_disk_reset_voting (osb, LockId, LockType); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_reset_voting */ + +/* + * ocfs_request_vote() + * + */ +int ocfs_request_vote (ocfs_super * osb, + ub8 LockId, ub4 LockType, ub4 Flags, ub8 VoteMap, ub8 * LockSeqNo) +{ + int status; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x.0x%08x, 0x%08x)\n", osb, + HI (LockId), LO (LockId), LockType, Flags, + HI (VoteMap), LO (VoteMap), LockSeqNo); + + status = + ocfs_disk_request_vote (osb, LockId, LockType, Flags, VoteMap, LockSeqNo); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_request_vote */ + +/* + * ocfs_comm_request_vote() + * + */ +int ocfs_comm_request_vote (ocfs_super * osb, + ub8 LockId, + ub4 LockType, ub4 Flags, ocfs_file_entry * FileEntry) +{ + ub8 votemap; + ocfs_lock_res *lockres; + int status = 0; + + LOG_ENTRY (); + + status = ocfs_lookup_sector_node (osb, LockId, &lockres); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + if (((Flags & FLAG_FILE_DELETE) || (Flags & FLAG_FILE_RENAME)) && + (!(Flags & FLAG_DIR)) && + (DISK_LOCK_CURRENT_MASTER (FileEntry) == osb->node_num)) + votemap = DISK_LOCK_OIN_MAP (FileEntry); + else + votemap = osb->publ_map; + + votemap &= ~(1 << osb->node_num); + + status = 0; + if (votemap != 0) + status = + ocfs_send_dlm_request_msg (osb, LockId, LockType, Flags, + lockres, votemap, + OCFS_DISK_VOTE_REQUEST); + if (status == -ETIMEDOUT) { + LOG_TRACE_STR ("IPC voting timed out"); + status = -EFAIL; + goto bail; + } else { + LOG_TRACE_STR ("Got vote on comm in 1sec, what to do now?!?!"); + if (lockres->vote_status >= 0) { + lockres->lock_type = (ub1) LockType; + lockres->master_node_num = osb->node_num; + + status = ocfs_update_disk_lock (osb, lockres, + DLOCK_FLAG_MASTER | + DLOCK_FLAG_LOCK, FileEntry); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + } + status = lockres->vote_status; + goto bail; + } + + bail: + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_comm_request_vote */ + +/* + * ocfs_init_dlm_msg() + * + */ +void ocfs_init_dlm_msg (ocfs_super * osb, ocfs_dlm_msg * DlmMesg, ub4 MsgSize) +{ + LOG_ENTRY (); + + DlmMesg->magic = OCFS_DLM_MSG_MAGIC; + DlmMesg->msg_len = MsgSize; + + memcpy (DlmMesg->vol_id, osb->vol_layout.id, MAX_VOL_ID_LENGTH); + + DlmMesg->src_node = osb->node_num; + + LOG_EXIT (); + return; +} /* ocfs_init_dlm_msg */ + +/* + * ocfs_send_dlm_request_msg() + * + */ +int ocfs_send_dlm_request_msg (ocfs_super * osb, + ub8 LockId, + ub4 LockType, + ub4 Flags, + ocfs_lock_res * LockResource, ub8 VoteMap, ub4 MesgType) +{ + int status = 0; + ocfs_dlm_msg *DlmMesg = NULL; + ub4 msgSize; + bool UpdateDisk = true; + + LOG_ENTRY (); + + msgSize = sizeof (ocfs_dlm_msg) - 1 + sizeof (ocfs_dlm_req_master); + + DlmMesg = ocfs_malloc (msgSize); + if (DlmMesg == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + /* If the lock resource has a master this is a simple request */ + /* to the master */ + ocfs_acquire_lockres (LockResource); + + if (LockResource->voted_event == NULL) { + LockResource->voted_event = + ocfs_malloc (sizeof (wait_queue_head_t)); + if (LockResource->voted_event == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + } + + init_waitqueue_head (LockResource->voted_event); + if (LockResource->master_node_num == -1) { + UpdateDisk = true; + LockResource->lock_state |= LOCK_STATE_IN_VOTING; + LockResource->req_vote_map = VoteMap; + LockResource->got_vote_map = 0; + } + + LockResource->lock_state |= LOCK_STATE_IN_VOTING; + ocfs_release_lockres (LockResource); + + ocfs_init_dlm_msg (osb, DlmMesg, msgSize); + + DlmMesg->msg_type = MesgType; + + if (MesgType == OCFS_REQUEST_MAKE_MASTER) { + ocfs_dlm_req_master *req = + (ocfs_dlm_req_master *) DlmMesg->msg_buf; + req->lock_id = LockId; + req->flags = Flags; + req->lock_seq_num = LockResource->last_upd_seq_num; + } else if (MesgType == OCFS_DISK_VOTE_REQUEST) { + ocfs_dlm_disk_vote_req *req = + (ocfs_dlm_disk_vote_req *) DlmMesg->msg_buf; + req->lock_id = LockId; + req->flags = Flags; + req->lock_seq_num = LockResource->last_upd_seq_num; + } + + ocfs_send_bcast (osb, VoteMap, DlmMesg); + status = ocfs_wait (LockResource->voted_event, false, 1000); + + finally: + ocfs_safefree (DlmMesg); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_send_dlm_request_msg */ + +/* + * ocfs_comm_make_lock_master() + * + */ +int ocfs_comm_make_lock_master (ocfs_super * osb, + ub8 LockId, + ub4 LockType, + ub4 Flags, + ocfs_lock_res * LockResource, + ocfs_file_entry * FileEntry, ub8 VoteMap) +{ + int status = 0; + ocfs_dlm_msg *DlmMesg = NULL; + bool UpdateDisk = true; + + LOG_ENTRY (); + + status = + ocfs_send_dlm_request_msg (osb, LockId, LockType, Flags, LockResource, + VoteMap, OCFS_REQUEST_MAKE_MASTER); + if (status == -ETIMEDOUT) { + LOG_TRACE_STR ("IPC voting timed out"); + status = -EFAIL; + } else { + LOG_TRACE_STR ("Got vote on comm in 1sec, updating disk"); + + if (UpdateDisk && LockResource->vote_status >= 0) { + LockResource->lock_type = (ub1) LockType; + LockResource->master_node_num = osb->node_num; + + status = ocfs_update_disk_lock (osb, LockResource, + DLOCK_FLAG_MASTER | + DLOCK_FLAG_LOCK, FileEntry); + if (status < 0) { + LOG_ERROR_STATUS (status); + /* This is really bad and we need to DISABLE our ocfs_nm_thread and */ + /* let the other nodes know we are away from this volume... */ + goto finally; + } + } + status = LockResource->vote_status; + } + + finally: + ocfs_safefree (DlmMesg); + + LOG_EXIT_STATUS (status); + return status; +} + +/* + * ocfs_make_lock_master() + * + */ +int ocfs_make_lock_master (ocfs_super * osb, + ub8 LockId, + ub4 LockType, + ub4 Flags, + ocfs_lock_res * LockResource, ocfs_file_entry * FileEntry) +{ + ub8 voteMap = 0; + ub8 tempMap = 0; + int status = 0; + int tmpstat; + ub8 lockSeqNo = 0; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x, 0x%08x)\n", osb, + HI (LockId), LO (LockId), LockType, Flags, LockResource, + FileEntry); + + voteMap = osb->publ_map; + + if (((Flags & FLAG_FILE_DELETE) || (Flags & FLAG_FILE_RENAME)) && + (!(Flags & FLAG_DIR)) && + (DISK_LOCK_CURRENT_MASTER (FileEntry) == osb->node_num)) { + voteMap = DISK_LOCK_OIN_MAP (FileEntry); + } + + /* Vote over comm */ + tempMap = (1 << osb->node_num); + voteMap |= (tempMap); + + status = ocfs_comm_make_lock_master (osb, LockId, LockType, Flags, LockResource, + FileEntry, voteMap); + if (status >= 0) /* if succ, return. Else continue. */ + goto finito; + + tempMap = (1 << osb->node_num); + voteMap &= (~tempMap); + + if (voteMap == 0) { + /* This is the only node alive */ + /* Make this node the master of this lock */ + if (LockResource->lock_type <= LockType) + LockResource->lock_type = (ub1) LockType; + + LockResource->master_node_num = osb->node_num; + + /* Write that we now are the master to the disk */ + status = ocfs_update_disk_lock (osb, LockResource, + DLOCK_FLAG_MASTER | DLOCK_FLAG_LOCK, + FileEntry); + if (status < 0) { + LOG_ERROR_STATUS (status); + /* This is really bad and we need to DISABLE our ocfs_nm_thread and */ + /* let the other nodes know we are away from this volume... TODO */ + goto finito; + } + + LOG_TRACE_ARGS ("Votemap did NOT go to disk 0x%08x.0x%08x, " + "lockid %u.%u masternode (%d)\n", + HI (voteMap), LO (voteMap), HI (LockId), + LO (LockId), + DISK_LOCK_CURRENT_MASTER (FileEntry)); + goto finito; + } + + LOG_TRACE_ARGS + ("Votemap was 0x%08x.0x%08x, lockid %u.%u masternode (%d)\n", + HI (voteMap), LO (voteMap), HI (LockId), LO (LockId), + DISK_LOCK_CURRENT_MASTER (FileEntry)); + + status = ocfs_prime_voting (osb, LockId, LockType, voteMap); + if (status < 0) { + /* Lock up Volume... */ + LOG_ERROR_STATUS (status); + goto finito; + } + + /* Call Comm layer to broadcast to all nodes alive, that this node */ + /* wants exclusive access to the lock. */ + status = + ocfs_request_vote (osb, LockId, LockType, Flags, voteMap, &lockSeqNo); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + + status = ocfs_wait_for_vote (osb, LockId, LockType, Flags, voteMap, + 5000 /* 5 secs */ , lockSeqNo, LockResource); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + + /* Make this node the master of this lock */ + if (LockResource->lock_type <= LockType) + LockResource->lock_type = (ub1) LockType; + + LockResource->master_node_num = osb->node_num; + + /* Write that we now are the master to the disk */ + status = ocfs_update_disk_lock (osb, LockResource, + DLOCK_FLAG_MASTER | DLOCK_FLAG_LOCK | + DLOCK_FLAG_OPEN_MAP, FileEntry); + if (status < 0) { + LOG_ERROR_STATUS (status); + /* This is really bad and we need to DISABLE our ocfs_nm_thread and */ + /* let the other nodes know we are away from this volume... TODO */ + goto finito; + } + + finito: + tmpstat = ocfs_reset_voting (osb, LockId, LockType, voteMap); + if (tmpstat < 0) { + status = tmpstat; + LOG_ERROR_STATUS (status); + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_make_lock_master */ + +/* + * ocfs_acquire_lockres() + * + */ +void ocfs_acquire_lockres (ocfs_lock_res * LockResource) +{ + int mypid; + + LOG_ENTRY_ARGS ("(0x%08x)\n", LockResource); + + mypid = ocfs_getpid (); + + while (1) { + spin_lock (&LockResource->lock_mutex); + + if (LockResource->in_use) { + /* someone else has it */ + if (LockResource->thread_id != mypid) { + spin_unlock (&LockResource->lock_mutex); + ocfs_sleep (1); /* 1 ms, dangerously short! */ + } + /* I acquired it in this callstack already */ + else { + LockResource->in_use++; + spin_unlock (&LockResource->lock_mutex); + break; + } + } else { /* no owner, take it */ + + LockResource->in_use = 1; + LockResource->thread_id = mypid; + spin_unlock (&LockResource->lock_mutex); + break; + } + } + + LOG_EXIT (); + return; +} /* ocfs_acquire_lockres */ + +/* + * ocfs_release_lockres() + * + */ +void ocfs_release_lockres (ocfs_lock_res * LockResource) +{ + LOG_ENTRY_ARGS ("(0x%08x)\n", LockResource); + + if (LockResource->in_use == 0) { + LOG_TRACE_ARGS("Releasing lock resource: %p inuse was zero, thread=%d\n", + LockResource, LockResource->thread_id); + LockResource->thread_id = 0; + LockResource->in_use = 0; + } else { + LockResource->in_use--; + if (LockResource->in_use == 0) { + LockResource->thread_id = 0; + } + } + + LOG_EXIT (); + return; +} /* ocfs_release_lockres */ + +/* + * ocfs_update_disk_lock() + * + */ +int ocfs_update_disk_lock (ocfs_super * osb, + ocfs_lock_res * LockResource, + ub4 Flags, ocfs_file_entry * FileEntry) +{ + int status = 0; + ub8 offset = 0; + ocfs_file_entry *fe = NULL; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u, 0x%08x)\n", osb, LockResource, + Flags, FileEntry); + + if (FileEntry == NULL) + status = ocfs_get_file_entry (osb, &fe, LockResource->sector_num); + else { + fe = FileEntry; + offset = LockResource->sector_num; + status = + ocfs_read_disk (osb, (void *) fe, (ub4) osb->sect_size, + offset); + } + + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + if (Flags & DLOCK_FLAG_MASTER) + DISK_LOCK_CURRENT_MASTER (fe) = LockResource->master_node_num; + + if (Flags & DLOCK_FLAG_LOCK) + DISK_LOCK_FILE_LOCK (fe) = LockResource->lock_type; + + if (Flags & DLOCK_FLAG_OPEN_MAP) + DISK_LOCK_OIN_MAP (fe) = LockResource->oin_openmap; + + if (Flags & DLOCK_FLAG_SEQ_NUM) + DISK_LOCK_SEQNUM (fe) = LockResource->last_upd_seq_num; + + status = ocfs_write_disk (osb, fe, osb->sect_size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + finally: + if ((fe != FileEntry) && (fe)) + ocfs_release_file_entry (fe); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_update_disk_lock */ + +/* + * ocfs_update_master_on_open() + * + */ +int ocfs_update_master_on_open (ocfs_super * osb, ocfs_lock_res * LockResource) +{ + int status = -EAGAIN; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", osb, LockResource); + + //Just for the time being , we are going to fix it . + LOG_EXIT (); + return 0; + + while (status == -EAGAIN) { + if (!IS_NODE_ALIVE + (osb->publ_map, LockResource->master_node_num, + OCFS_MAXIMUM_NODES)) { + LOG_TRACE_ARGS + ("Master is dead, lockid %u.%u, master(%d)\n", + HI (LockResource->sector_num), + LO (LockResource->sector_num), + LockResource->master_node_num); + status = 0; + goto bail; + } + + ocfs_acquire_lockres (LockResource); + + if (LockResource->master_node_num == osb->node_num) { + LOG_TRACE_ARGS + ("Added this node to map 0x%08x.0x%08x, lockid %u.%u\n", + HI (LockResource->oin_openmap), + LO (LockResource->oin_openmap), + HI (LockResource->sector_num), + LO (LockResource->sector_num)); + + /* Write the node map to the disk... */ + LockResource->oin_openmap |= (1 << osb->node_num); + + status = + ocfs_update_disk_lock (osb, LockResource, + DLOCK_FLAG_OPEN_MAP, NULL); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + ocfs_release_lockres (LockResource); + } else { + LOG_TRACE_ARGS + ("ocfs_update_lock_state added this node to map " + "0x%08x.0x%08x, lockid %u.%u\n", + HI (LockResource->oin_openmap), + LO (LockResource->oin_openmap), + HI (LockResource->sector_num), + LO (LockResource->sector_num)); + + /* TODO we need to handle the case when thee master node is not alive */ + status = + ocfs_update_lock_state (osb, LockResource, + FLAG_ADD_OIN_MAP); + if (status < 0) { + LOG_ERROR_ARGS + ("ocfs_update_lock_state failed map 0x%08x.0x%08x, " + "lockid %u.%u, status 0x%08x\n", + HI (LockResource->oin_openmap), + LO (LockResource->oin_openmap), + HI (LockResource->sector_num), + LO (LockResource->sector_num), status); + ocfs_release_lockres (LockResource); + if (status == -EAGAIN) + continue; + goto bail; + } + LOG_TRACE_ARGS + ("ocfs_update_lock_state SUCCESS map 0x%08x.0x%08x, " + "lockid %u.%u, status 0x%08x\n", + HI (LockResource->oin_openmap), + LO (LockResource->oin_openmap), + HI (LockResource->sector_num), + LO (LockResource->sector_num), status); + ocfs_release_lockres (LockResource); + } + } + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_update_master_on_open */ + +/* + * ocfs_init_lockres() + * + */ +void ocfs_init_lockres (ocfs_super * osb, ocfs_lock_res * LockResource, ub8 LockId) +{ + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u.%u)\n", osb, LockResource, + HI (LockId), LO (LockId)); + + LockResource->signature = 0x55AA; + LockResource->lock_type = OCFS_DLM_NO_LOCK; + LockResource->ref_cnt = 0; + LockResource->master_node_num = OCFS_INVALID_NODE_NUM; + LockResource->last_upd_seq_num = 0; + LockResource->oin_openmap = 0; + LockResource->sector_num = LockId; + LockResource->in_use = 0; + LockResource->oin = NULL; + LockResource->lock_state = 0; + LockResource->voted_event = NULL; + LockResource->in_cache_list = false; + spin_lock_init(&LockResource->lock_mutex); + + /* For read/write caching */ + LockResource->last_read_time = 0; + LockResource->last_write_time = 0; + LockResource->writer_node_num = OCFS_INVALID_NODE_NUM; + LockResource->reader_node_num = OCFS_INVALID_NODE_NUM; + + LOG_EXIT (); + return; +} /* ocfs_init_lockres */ + +/* + * ocfs_create_update_lock() + * + */ +int ocfs_create_update_lock (ocfs_super * osb, ocfs_inode * oin, ub8 LockId, ub4 Flags) +{ + int status = 0; + ocfs_lock_res *lockres = NULL; + bool IsDir = false; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u.%u, %u)\n", osb, oin, + HI (LockId), LO (LockId), Flags); + + IsDir = (Flags & OCFS_OIN_DIRECTORY) ? true : false; + + /* Check the lock state on the disk / in our resource map */ + status = ocfs_lookup_sector_node (osb, LockId, &lockres); + if (status >= 0) { + if (lockres->signature != 0x55AA) { + LOG_ERROR_STATUS(status = -EFAIL); + goto bail; + } + + ocfs_acquire_lockres (lockres); + if (lockres->oin) { + if (lockres->oin->obj_id.type != OCFS_TYPE_OIN) { + LOG_ERROR_STATUS(status = -EFAIL); + goto bail; + } + else { + lockres->oin->lock_res = NULL; + } + } + + lockres->oin = oin; + oin->oin_flags |= Flags; + oin->lock_res = lockres; + + ocfs_release_lockres (lockres); + } + + if (status < 0) { + /* Create a resource and insert in the hash */ + lockres = kmem_cache_alloc (OcfsGlobalCtxt.lockres_cache, GFP_KERNEL); + (sizeof (ocfs_lock_res)); + if (lockres == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } + + ocfs_init_lockres (osb, lockres, LockId); + + /* Update lock resource */ + if (IsDir) { + status = + ocfs_wait_for_lock_release (osb, LockId, + 30000 /* 30 sec */ , + lockres, + OCFS_DLM_EXCLUSIVE_LOCK); + } else { + status = + ocfs_wait_for_lock_release (osb, LockId, + 30000 /* 30 sec */ , + lockres, OCFS_DLM_NO_LOCK); + } + + if (status < 0) { + /* + ** The lock owner is alive and is taking too long to do the + ** release for the lock + ** Notify him of our existance/ kick him out??? + ** Retry lock... + */ + LOG_ERROR_STR + ("Lock owner is alive and taking too much time"); + } + + ocfs_insert_sector_node (osb, lockres); + if (status < 0) { + /* Failure LOCK up volume operation ** TODO */ + LOG_ERROR_STR ("Lock up volume"); + } + + if (Flags & OCFS_OIN_CACHE_UPDATE) { + ocfs_insert_cache_link (osb, lockres); + if (status < 0) { + /* Failure LOCK up volume operation TODO */ + LOG_ERROR_STR ("Lock up volume"); + } + } + } else { + if (IsDir) { + status = + ocfs_wait_for_lock_release (osb, LockId, + 30000 /* 30 sec */ , + lockres, + OCFS_DLM_EXCLUSIVE_LOCK); + } else { + status = + ocfs_wait_for_lock_release (osb, LockId, + 30000 /* 30 sec */ , + lockres, OCFS_DLM_NO_LOCK); + } + } + + ocfs_acquire_lockres (lockres); + + lockres->oin = oin; + oin->oin_flags |= Flags; + oin->lock_res = lockres; + + LOG_TRACE_ARGS ("MasterNode=%d, ThisNode=%d\n", + lockres->master_node_num, osb->node_num); + + if ((!IsDir) && (lockres->master_node_num != OCFS_INVALID_NODE_NUM) && + ((!IS_NODE_ALIVE (lockres->oin_openmap, osb->node_num, + OCFS_MAXIMUM_NODES)) || + (lockres->lock_state & FLAG_ALWAYS_UPDATE_OPEN))) { + ocfs_release_lockres (lockres); + + /* Send a message to master so that he can send the oin update to */ + /* this node also. If u are the master then update File_entry */ + /* and set the bit that this node has a open */ + status = ocfs_update_master_on_open (osb, lockres); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + } + + ocfs_release_lockres (lockres); + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_create_update_lock */ + + +/* + * ocfs_get_x_for_del() + * + */ +int ocfs_get_x_for_del (ocfs_super * osb, + ub8 LockId, + ub4 LockType, + ub4 Flags, ocfs_lock_res * LockResource, + ocfs_file_entry * FileEntry) +{ + bool LockResourceAcquired = false; // true; + int status = -EFAIL; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x, 0x%08x)\n", osb, + HI (LockId), LO (LockId), LockType, Flags, LockResource, + FileEntry); + + while (1) { + if (LockResourceAcquired == false) { + ocfs_acquire_lockres (LockResource); + LockResourceAcquired = true; + } + + /* If I am master and I am the only one in the oin node map */ + /* update the disk */ + status = + ocfs_make_lock_master (osb, LockId, LockType, Flags, LockResource, + FileEntry); + if (status >= 0) { + /* RELEASE_LOCK */ + if (LockResourceAcquired) { + ocfs_release_lockres (LockResource); + LockResourceAcquired = false; + } + /* We got the lock */ + status = 0; + goto finally; + } else if (status == -EAGAIN) { + if (LockResourceAcquired) { + ocfs_release_lockres (LockResource); + LockResourceAcquired = false; + } + + status = + ocfs_disk_update_resource (osb, LockResource, FileEntry); + if (status < 0) { + /* This should never fail so lock up the volume */ + osb->vol_state = VOLUME_DISABLED; + goto finally; + } + continue; + } else { + if (LockResourceAcquired) { + ocfs_release_lockres (LockResource); + LockResourceAcquired = false; + } + goto finally; + } + } + + finally: + if (LockResourceAcquired) { + ocfs_release_lockres (LockResource); + LockResourceAcquired = false; + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_get_x_for_del */ + +/* + * ocfs_acquire_lock() + * + */ +int ocfs_acquire_lock (ocfs_super * osb, + ub8 LockId, + ub4 LockType, + ub4 Flags, + ocfs_lock_res ** LockResource, ocfs_file_entry * LockSector) +{ + int status = -EFAIL; + ocfs_lock_res *lockres = NULL; + bool lockres_acq = false; + ocfs_file_entry *disklock = NULL; + ub4 updated = 0; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x, 0x%08x)\n", osb, + HI (LockId), LO (LockId), LockType, Flags, LockResource, + LockSector); + + /* If the caller passed in memory read it and update resource */ + /* from the entry */ + if (LockSector) + disklock = LockSector; + else { + /* Allocate the entry */ + disklock = ocfs_allocate_file_entry (); + if (disklock == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + } + + status = ocfs_find_update_res (osb, LockId, &lockres, disklock, &updated); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + *LockResource = lockres; + + switch (LockType) { + case OCFS_DLM_SHARED_LOCK: + + if (!(Flags & FLAG_DIR)) { + status = 0; + goto finally; + } + + ocfs_acquire_lockres (lockres); + lockres_acq = true; + + if (lockres->lock_type == OCFS_DLM_NO_LOCK) { + lockres->lock_type = OCFS_DLM_SHARED_LOCK; + } + if ((lockres->lock_type == OCFS_DLM_ENABLE_CACHE_LOCK) && + (lockres->master_node_num != osb->node_num)) { + LOG_TRACE_ARGS + ("Called for type OCFS_DLM_SHARED_LOCK, " + "calling ocfs_break_cache_lock (master=%u, this=%u)\n", + lockres->master_node_num, osb->node_num); + ocfs_break_cache_lock (osb, lockres); + } + + lockres->ref_cnt++; + + if (lockres_acq) { + ocfs_release_lockres (lockres); + lockres_acq = false; + } + + status = 0; + goto finally; + break; + + case OCFS_DLM_EXCLUSIVE_LOCK: + case OCFS_DLM_ENABLE_CACHE_LOCK: + /* This will be called for vol, allocation, file and directory */ + /* from create modify */ + while (1) { + ocfs_acquire_lockres (lockres); + lockres_acq = true; +#ifdef NOT_USED + /* If I am master update disk */ + if(LockId == OCFS_BITMAP_LOCK_OFFSET) + { + LOG_TRACE_ARGS("Called for BITMAP Exclusive lock, " \ + "Owner is node (%u), my Node num is (%u)\n", + lockres->master_node_num,osb->node_num); + } +#endif + + if (lockres->master_node_num == osb->node_num) { +#ifdef NOT_USED + if((lockres->lock_type == OCFS_DLM_ENABLE_CACHE_LOCK) && + (LockId == OCFS_BITMAP_LOCK_OFFSET)) + { + if(lockres_acq) + { + ocfs_release_lockres(lockres); + lockres_acq = false; + } + /* We got the lock */ + try_return( status = 0); + } +#endif + + /* Make lock Master */ + if ((Flags & FLAG_FILE_DELETE) + || (Flags & FLAG_FILE_RENAME) + || (Flags & FLAG_FILE_TRUNCATE)) { + if (!updated) { + /* Read the fileEntry from the disk... */ + status = + ocfs_read_file_entry (osb, + disklock, + LockId); + if (status < 0) { + LOG_ERROR_STATUS + (status); + goto finally; + } + } + + /* If this is a directory being deleted ask */ + /* everybody... */ + /* If this is a file and we are master and the */ + /* nodemap is just this node acquire a local lock */ + /* and set a flag in our resource to indicate that */ + /* the file has been deleted, a change master */ + /* / update master on open will now return */ + /* failure to the caller preventing them from */ + /* opening this file. */ + + status = + ocfs_get_x_for_del (osb, LockId, + LockType, Flags, + lockres, disklock); + + if (lockres_acq) { + ocfs_release_lockres + (lockres); + lockres_acq = false; + } + goto finally; + } +// if( lockres->lock_type >= OCFS_DLM_EXCLUSIVE_LOCK) +// { +// } + + if (!updated) { + /* Read the fileEntry from the disk... */ + status = + ocfs_read_file_entry (osb, + disklock, + LockId); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + if (lockres->master_node_num != + DISK_LOCK_CURRENT_MASTER + (disklock)) { + /* Update our state... */ + lockres->master_node_num = + DISK_LOCK_CURRENT_MASTER + (disklock); + lockres->lock_type = + DISK_LOCK_FILE_LOCK + (disklock); + lockres->oin_openmap = + DISK_LOCK_OIN_MAP + (disklock); + + if (lockres_acq) { + ocfs_release_lockres + (lockres); + lockres_acq = false; + } + continue; + } + } else { + /* The file entry was already read in */ + /* ocfs_find_update_res */ + } + + DISK_LOCK_CURRENT_MASTER (disklock) = + osb->node_num; + + if (DISK_LOCK_FILE_LOCK (disklock) < + OCFS_DLM_EXCLUSIVE_LOCK) { + DISK_LOCK_FILE_LOCK (disklock) = + LockType; + + if (LockType == + OCFS_DLM_ENABLE_CACHE_LOCK) { +// ub8 dwOffset; +// ub4 SectorSize = osb->sect_size; + +// dwOffset.QuadPart = LockId; + status = + ocfs_write_force_disk (osb, + disklock, + osb-> + sect_size, + LockId); + if (status < 0) { + LOG_ERROR_STATUS + (status); + goto finally; + } + } + } + + status = + ocfs_write_file_entry (osb, disklock, + LockId); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + lockres->lock_type = LockType; + + /* RELEASE_LOCK */ + if (lockres_acq) { + ocfs_release_lockres (lockres); + lockres_acq = false; + } + + /* We got the lock */ + status = 0; + goto finally; + } else { + bool bMakeLockMaster = false; + +// if(updated == 0) + { + /* Read the fileEntry from the disk... */ + status = + ocfs_read_file_entry (osb, + disklock, + LockId); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* Update our state... */ + lockres->master_node_num = + DISK_LOCK_CURRENT_MASTER + (disklock); + lockres->lock_type = + DISK_LOCK_FILE_LOCK (disklock); + lockres->oin_openmap = + DISK_LOCK_OIN_MAP (disklock); + } + + if (lockres->master_node_num != + OCFS_INVALID_NODE_NUM) { + if (!IS_VALID_NODE_NUM + (lockres->master_node_num)) { + LOG_ERROR_STATUS(status = + -EINVAL); + goto finally; + } + } + + if (lockres->master_node_num == + OCFS_INVALID_NODE_NUM) { + bMakeLockMaster = true; + } else + if (!IS_NODE_ALIVE + (osb->publ_map, + lockres->master_node_num, + OCFS_MAXIMUM_NODES)) { + bMakeLockMaster = true; + + /* RELEASE_LOCK */ + if (lockres_acq) { + ocfs_release_lockres + (lockres); + lockres_acq = false; + } + + /* Recovery */ + LOG_TRACE_ARGS + ("Calling ocfs_recover_vol for NodeNum (%d)\n", + lockres->master_node_num); + status = + ocfs_recover_vol (osb, + lockres-> + master_node_num); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + ocfs_acquire_lockres (lockres); + lockres_acq = true; + } + + if (bMakeLockMaster) { + /* I am not master, master is dead or not there. */ + /* If lock was owned we need to do recovery */ + /* otherwise we need to arbitrate for the lock */ + + /* ArbitrateLock */ + if (lockres_acq) { + ocfs_release_lockres + (lockres); + lockres_acq = false; + } + + status = + ocfs_make_lock_master (osb, LockId, + LockType, Flags, + lockres, + disklock); + if (status >= 0) { + /* release lock */ + if (lockres_acq) { + ocfs_release_lockres + (lockres); + lockres_acq = false; + } + + if (LockType == + OCFS_DLM_ENABLE_CACHE_LOCK) + { + DISK_LOCK_FILE_LOCK + (disklock) = + LockType; + status = + ocfs_write_force_disk + (osb, disklock, + osb->sect_size, + LockId); + if (status < 0) { + LOG_ERROR_STATUS + (status); + goto finally; + } + } + + DISK_LOCK_CURRENT_MASTER + (disklock) = + osb->node_num; + DISK_LOCK_FILE_LOCK + (disklock) = LockType; + + status = + ocfs_write_file_entry (osb, + disklock, + LockId); + if (status < 0) { + LOG_ERROR_STATUS + (status); + goto finally; + } + + /* We got the lock */ + status = 0; + goto finally; + } else if (status == -EAGAIN) { + if (lockres_acq) { + ocfs_release_lockres + (lockres); + lockres_acq = false; + } + + status = + ocfs_disk_update_resource (osb, + lockres, + disklock); + if (status < 0) { + /* should never fail... lock up vol */ + LOG_ERROR_STR + ("ocfs_disk_update_resource failed - Disabling Volume"); + osb->vol_state = + VOLUME_DISABLED; + goto finally; + } + continue; + } else { + if (lockres_acq) { + ocfs_release_lockres + (lockres); + lockres_acq = false; + } + goto finally; + } + } else { + /* MasterNode is alive and it is not this node */ + /* If the lock is acquired already by the master */ + /* wait for release else change master. */ + +/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ +/* !!! this next line is very questionable. !!! */ +/* !!! since lock_type is unsigned and cannot !!! */ +/* !!! be < 0, the else case will *never* be !!! */ +/* !!! executed. is that the intent?! !!! */ +/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ + +#if 0 + // commenting to avoid compile warning + if (lockres->lock_type >= 0) +#else + if (true) +#endif + { + if ((Flags & + FLAG_FILE_DELETE) + || (Flags & + FLAG_FILE_RENAME) + || (Flags & + FLAG_FILE_TRUNCATE)) + { + /* If this is a directory being */ + /* deleted ask everybody... */ + /* If this is a file and we are */ + /* master and the nodemap is just */ + /* this node acquire a local lock */ + /* and set a flag in our resource */ + /* to indicate that the file has */ + /* been deleted, a change master */ + /* / update master on open will */ + /* now return failure to the */ + /* caller preventing them from */ + /* opening this file. */ + + status = + ocfs_get_x_for_del (osb, + LockId, + LockType, + Flags, + lockres, + disklock); + if (lockres_acq) { + ocfs_release_lockres + (lockres); + lockres_acq + = false; + } + goto finally; + } + + /* Change Lock Master */ + status = + ocfs_update_lock_state (osb, + lockres, + FLAG_CHANGE_MASTER); + if (lockres_acq) { + ocfs_release_lockres + (lockres); + lockres_acq = false; + } + + if (status < 0) { + if (status == + -EAGAIN) + continue; + goto finally; + } + + { + status = + ocfs_read_file_entry + (osb, disklock, + LockId); + if (status < 0) { + LOG_ERROR_STATUS + (status); + goto finally; + } + + DISK_LOCK_CURRENT_MASTER + (disklock) = + osb->node_num; + DISK_LOCK_FILE_LOCK + (disklock) = + LockType; + status = + ocfs_write_file_entry + (osb, disklock, + LockId); + if (status < 0) { + LOG_ERROR_STATUS + (status); + goto finally; + } + + /* Update our state... */ + lockres-> + master_node_num + = + DISK_LOCK_CURRENT_MASTER + (disklock); + lockres->lock_type = + DISK_LOCK_FILE_LOCK + (disklock); + lockres-> + oin_openmap = + DISK_LOCK_OIN_MAP + (disklock); + } + goto finally; + } else { + /* Wait for lock release */ + if (lockres_acq) { + ocfs_release_lockres + (lockres); + lockres_acq = false; + } + + if (Flags & FLAG_DIR) { + status = + ocfs_wait_for_lock_release (osb, + LockId, 30000, + lockres, + OCFS_DLM_SHARED_LOCK); + } else { + status = + ocfs_wait_for_lock_release (osb, + LockId, 30000, + lockres, + OCFS_DLM_NO_LOCK); + } + + if (status < 0) { + if (status == -ETIMEDOUT) + continue; + else + goto finally; + } + /* Try and acquire the lock again */ + continue; + } + } + } + } + break; + + default: + break; + } + + finally: + if ((LockSector == NULL) && (disklock)) + ocfs_release_file_entry (disklock); + + if (lockres_acq) { + ocfs_release_lockres (lockres); + lockres_acq = false; + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_acquire_lock */ + +/* + * ocfs_disk_release_lock() + * + */ +int ocfs_disk_release_lock (ocfs_super * osb, + ub8 LockId, + ub4 LockType, ub4 Flags, ocfs_lock_res * LockResource) +{ + ub4 votemap = 0; + ub4 tempmap = 0; + ub4 i; + int status = 0; + int tmpstat; + ocfs_file_entry *fe = NULL; + ub8 lockseqno; + bool cachelock = false; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x)\n", osb, HI (LockId), + LO (LockId), LockType, Flags, LockResource); + + status = ocfs_get_file_entry (osb, &fe, LockId); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + + if (!IS_VALID_NODE_NUM (DISK_LOCK_CURRENT_MASTER (fe))) { + LOG_ERROR_STATUS(status = -EINVAL); + status = -EINVAL; + goto finito; + } + + if (DISK_LOCK_CURRENT_MASTER (fe) != osb->node_num) { + /* How is this happening??? */ + LOG_ERROR_ARGS ("Current master is NOT this NODE (%d)\n", + DISK_LOCK_CURRENT_MASTER (fe)); + status = 0; + goto finito; + } + + for (i = 0; i < OCFS_MAXIMUM_NODES; i++) { + if (((1 << i) & DISK_LOCK_OIN_MAP (fe)) && + IS_NODE_ALIVE (osb->publ_map, i, OCFS_MAXIMUM_NODES)) { + votemap |= (1 << i); + } + } + + LOG_TRACE_ARGS ("ocfs_update_lock_state SUCCESS oin map 0x%08x.0x%08x, " + "lockid %u.%u flags 0x%08x\n", + HI (DISK_LOCK_OIN_MAP (fe)), + LO (DISK_LOCK_OIN_MAP (fe)), + HI (LockId), LO (LockId), Flags); + + /* Send an update to all nodes alive, can be optimized later TODO */ + if (Flags & FLAG_FILE_RENAME) + votemap = (ub4) (osb->publ_map); + +#ifdef ALLOW_INODE_UPDATES_WITH_NO_OIN + /* TODO: figure out how to properly handle inode updates w/no oin */ + votemap = (ub4) (osb->publ_map); // temporary hack, forces broadcast +#endif + + tempmap = (1 << osb->node_num); + + votemap &= (~tempmap); + + if (votemap != 0) { + /* Call Comm layer to broadcast to all nodes alive, that this node */ + /* wants exclusive access to the lock. */ + if (Flags & FLAG_FILE_UPDATE_OIN) { + LOG_TRACE_ARGS + ("Votemap was not 0, update oin votemap 0x%08x, " + "lockid %u.%u, flags 0x%08x\n", votemap, + HI (LockId), LO (LockId), Flags); + + status = -EAGAIN; + while (status == -EAGAIN) { + /* + ** Do the real retry for getting the vote + ** kick in the vote thread so that if the other guy is waiting + ** on us check??? + ** in the ocfs_request_vote when reading all publish sectors + */ + status = + ocfs_comm_request_vote (osb, LockId, LockType, + Flags, fe); + if (status >= 0) { + LOG_TRACE_STR + ("ocfs_comm_request_vote succeeded!"); + break; + } +#ifdef ALLOW_INODE_UPDATES_WITH_NO_OIN + status = + ocfs_request_vote (osb, LockId, LockType, Flags, + votemap, &lockseqno); +#else + status = + ocfs_request_vote (osb, LockId, LockType, Flags, + DISK_LOCK_OIN_MAP (fe), + &lockseqno); +#endif + if (status == -EAGAIN) { + /* Check if there was somebody else who came in and changed our */ + LOG_TRACE_ARGS + ("ocfs_request_vote retrying votemap 0x%08x " + "lockid %u.%u, status 0x%08x\n", + votemap, HI (LockId), LO (LockId), status); + + if ((Flags & FLAG_FILE_UPDATE_OIN)) { + // ????? + } + continue; + } + + if (status < 0) { + /* Disable the vol */ + LOG_TRACE_ARGS + ("ocfs_request_vote failed votemap 0x%08x, " + "lockid %u.%u, status 0x%08x\n", + votemap, HI (LockId), LO (LockId), status); + goto finito; + } +#ifdef ALLOW_INODE_UPDATES_WITH_NO_OIN + status = + ocfs_wait_for_vote (osb, LockId, LockType, + FLAG_FILE_UPDATE_OIN, votemap, + 5000, lockseqno, LockResource); +#else + status = + ocfs_wait_for_vote (osb, LockId, LockType, + FLAG_FILE_UPDATE_OIN, + DISK_LOCK_OIN_MAP (fe), 5000, + lockseqno, LockResource); +#endif + if (status < 0) { + if (status == -EAGAIN) { + LOG_TRACE_ARGS + ("ocfs_wait_for_vote retrying votemap 0x%08x, " + "lockid %u.%u, flags 0x%08x\n", + votemap, HI (LockId), LO (LockId), Flags); + continue; + } + goto finito; + } + } + + status = + ocfs_reset_voting (osb, LockId, LockType, + DISK_LOCK_OIN_MAP (fe)); + if (status < 0) { + LOG_ERROR_STATUS (status); + } + } + } + + LOG_TRACE_ARGS ("Votemap was 0, votemap 0x%08x, " + "lockid %u.%u, flags 0x%08x\n", + votemap, HI (LockId), LO (LockId), Flags); + + finito: + + if (Flags & FLAG_FILE_RELEASE_MASTER) + DISK_LOCK_CURRENT_MASTER (fe) = OCFS_INVALID_NODE_NUM; + + if ((DISK_LOCK_FILE_LOCK (fe) == OCFS_DLM_ENABLE_CACHE_LOCK) && + (DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num)) + cachelock = true; + else + DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK; + + /* Reset the lock on the disk */ + if (!cachelock) { + tmpstat = ocfs_write_file_entry (osb, fe, LockId); + if (tmpstat < 0) { + status = tmpstat; + LOG_ERROR_STATUS (status); + } + } + + if (fe) + ocfs_release_file_entry (fe); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_disk_release_lock */ + +/* + * ocfs_release_lock() + * + */ +int ocfs_release_lock (ocfs_super * osb, + ub8 LockId, + ub4 LockType, ub4 Flags, ocfs_lock_res * LockResource) +{ + int status = 0; + bool lock_acq = false; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x)\n", osb, HI (LockId), + LO (LockId), LockType, Flags, LockResource); + + ocfs_acquire_lockres (LockResource); + lock_acq = true; + + switch (LockType) { + case OCFS_DLM_SHARED_LOCK: + LockResource->ref_cnt--; + if (LockResource->lock_type == OCFS_DLM_SHARED_LOCK) { + if (LockResource->ref_cnt == 0) { + LockResource->lock_type = OCFS_DLM_NO_LOCK; + } + } + status = 0; + goto finally; + + case OCFS_DLM_EXCLUSIVE_LOCK: + break; + } + /* + ** Change Flags based on which kind of lock we are releasing + ** For directory we need special handling of oin updates when the release + ** is for XBcast + ** For file we need to update oin's + ** For Shared we need to update the lock state locally only + */ + + /* OcfsRelease */ + + /* CommReleaseLock */ + if (Flags & FLAG_FILE_DELETE) { + LockResource->lock_type = OCFS_DLM_NO_LOCK; + LockResource->master_node_num = OCFS_INVALID_NODE_NUM; + status = 0; + goto finally; + } +// if(LockId != OCFS_BITMAP_LOCK_OFFSET) + { + if ((LockResource->lock_type == OCFS_DLM_ENABLE_CACHE_LOCK) && + (LockResource->master_node_num == osb->node_num)) { + status = 0; + goto finally; + } + } + + if (LockId == OCFS_BITMAP_LOCK_OFFSET) { + LOG_TRACE_ARGS ("Bitmap lock state is (%d)\n", + LockResource->lock_type); + } + + LockResource->lock_type = OCFS_DLM_NO_LOCK; + if (Flags & FLAG_FILE_RELEASE_MASTER) + LockResource->master_node_num = OCFS_INVALID_NODE_NUM; + + if (lock_acq) { + ocfs_release_lockres (LockResource); + lock_acq = false; + } + + status = ocfs_disk_release_lock (osb, LockId, LockType, Flags, LockResource); + if (status < 0) { + /* Disable the vol */ + LOG_ERROR_STATUS (status); + goto finally; + } + + finally: + if (lock_acq) { + ocfs_release_lockres (LockResource); + lock_acq = false; + } + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_release_lock */ + +/* + * ocfs_init_dlm() + * + */ +int ocfs_init_dlm (void) +{ + LOG_ENTRY (); + + OcfsIpcCtxt.init = false; + OcfsIpcCtxt.re_init = false; + + LOG_EXIT_STATUS (0); + return 0; +} /* ocfs_init_dlm */ + +/* + * ocfs_add_lock_to_recovery() + * + */ +int ocfs_add_lock_to_recovery (void) +{ + LOG_ENTRY (); + + LOG_EXIT (); + return 0; +} /* ocfs_add_lock_to_recovery */ + +/* + * ocfs_create_log_extent_map() + * + */ +int ocfs_create_log_extent_map (ocfs_super * osb, + ocfs_io_runs ** PTransRuns, + ub4 * PNumTransRuns, ub8 diskOffset, ub8 ByteCount) +{ + int status = 0; + sb8 tempVbo = 0; + sb8 tempLbo = 0; + ub4 tempSize = 0; + ub4 numDataRuns = 0; + ub4 numTransRuns; + ub4 i; + ub4 numMetaDataRuns = 0; + ub4 ioRunSize; + ocfs_io_runs *IoDataRuns = NULL; + ocfs_io_runs *IoMetaDataRuns = NULL; + ocfs_io_runs *IoTransRuns = NULL; + ocfs_io_runs *TransRuns = NULL; + ub8 fileSize; + ub8 remainingLength; + bool bRet; + ub4 RunsInExtentMap = 0; + ub4 ExtentMapIndex; + ub4 length; + sb8 diskOffsetToFind = 0; + sb8 foundFileOffset = 0; + sb8 foundDiskOffset = 0; + + LOG_ENTRY (); + + ioRunSize = (OCFS_MAX_DATA_EXTENTS * sizeof (ocfs_io_runs)); + + IoTransRuns = ocfs_malloc (ioRunSize); + if (IoTransRuns == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } + + IoMetaDataRuns = ocfs_malloc (ioRunSize); + if (IoMetaDataRuns == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } + + IoDataRuns = ocfs_malloc (ioRunSize); + if (IoDataRuns == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } + + TransRuns = ocfs_malloc (ioRunSize); + if (TransRuns == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } + + diskOffsetToFind = diskOffset; + remainingLength = ByteCount; + + ocfs_down_sem (&(osb->map_lock), true); + + RunsInExtentMap = ocfs_extent_map_get_count (&osb->metadata_map); + + for (ExtentMapIndex = 0; ExtentMapIndex < RunsInExtentMap; + ExtentMapIndex++) { + ub4 tempLen; + + if (!ocfs_get_next_extent_map_entry + (osb, &osb->metadata_map, ExtentMapIndex, &foundFileOffset, + &foundDiskOffset, &tempLen)) + continue; + + length = tempLen; + + if (foundDiskOffset >= (diskOffsetToFind + remainingLength)) + break; + + if (diskOffsetToFind >= (foundDiskOffset + length)) { + /* This meta data run is before the relevant stf */ + continue; + } else { + if ((diskOffsetToFind >= foundDiskOffset) && + ((diskOffsetToFind + remainingLength) <= + (foundDiskOffset + length))) { + /* It is only metadata */ + IoMetaDataRuns[numMetaDataRuns].offset = + diskOffsetToFind; + IoMetaDataRuns[numMetaDataRuns].disk_off = + diskOffsetToFind; + IoMetaDataRuns[numMetaDataRuns].byte_cnt = + remainingLength; + remainingLength -= + IoMetaDataRuns[numMetaDataRuns].byte_cnt; + diskOffsetToFind += + IoMetaDataRuns[numMetaDataRuns].byte_cnt; + numMetaDataRuns++; + break; + } else if ((diskOffsetToFind < foundDiskOffset) && + ((diskOffsetToFind + remainingLength) > + foundDiskOffset)) { + /* We have a data run and a metadata run */ + IoDataRuns[numDataRuns].offset = + diskOffsetToFind; + IoDataRuns[numDataRuns].disk_off = + diskOffsetToFind; + IoDataRuns[numDataRuns].byte_cnt = + foundDiskOffset - diskOffsetToFind; + remainingLength -= + IoDataRuns[numDataRuns].byte_cnt; + diskOffsetToFind += + IoDataRuns[numDataRuns].byte_cnt; + numDataRuns++; + + IoMetaDataRuns[numMetaDataRuns].offset = + foundDiskOffset; + IoMetaDataRuns[numMetaDataRuns].disk_off = + foundDiskOffset; + IoMetaDataRuns[numMetaDataRuns].byte_cnt = + (remainingLength > + length) ? length : remainingLength; + + remainingLength -= + IoMetaDataRuns[numMetaDataRuns].byte_cnt; + diskOffsetToFind += + IoMetaDataRuns[numMetaDataRuns].byte_cnt; + numMetaDataRuns++; + if (remainingLength > 0) + continue; + else + break; + } else if ((diskOffsetToFind >= foundDiskOffset) && + ((diskOffsetToFind + remainingLength) > + (foundDiskOffset + length))) { + /* Meta data and as yet unknown data */ + IoMetaDataRuns[numMetaDataRuns].offset = + diskOffsetToFind; + IoMetaDataRuns[numMetaDataRuns].disk_off = + diskOffsetToFind; + IoMetaDataRuns[numMetaDataRuns].byte_cnt = + length - (diskOffsetToFind - + foundDiskOffset); + remainingLength -= + IoMetaDataRuns[numMetaDataRuns].byte_cnt; + diskOffsetToFind += + IoMetaDataRuns[numMetaDataRuns].byte_cnt; + numMetaDataRuns++; + continue; + } + } + } + + ocfs_up_sem (&(osb->map_lock)); + + numTransRuns = *PNumTransRuns = 0; + + /* Create new extent map from real runs */ + + for (i = 0; i < numMetaDataRuns; i++) { + if (osb->log_disk_off == 0) + ocfs_create_meta_log_files (osb); + + fileSize = osb->log_file_size; + + if (fileSize > (10 * ONE_MEGA_BYTE)) + OCFS_BREAKPOINT (); + + tempVbo = IoMetaDataRuns[i].disk_off; /* Actual Disk Offset */ + tempLbo = fileSize + osb->log_disk_off; /* Log file disk Offset */ + tempSize = IoMetaDataRuns[i].byte_cnt; /* Lenght of run */ + + osb->log_file_size = (fileSize + tempSize); + + /* Add the Extent to extent map list */ + ocfs_down_sem (&(osb->map_lock), true); + LOG_TRACE_STR ("Acquired map_lock"); + + bRet = + ocfs_add_extent_map_entry (osb, &osb->trans_map, tempVbo, + tempLbo, tempSize); + if (!bRet) { + ocfs_remove_extent_map_entry (osb, &osb->trans_map, tempVbo, + tempSize); + bRet = + ocfs_add_extent_map_entry (osb, &osb->trans_map, + tempVbo, tempLbo, tempSize); + } + + ocfs_up_sem (&(osb->map_lock)); + LOG_TRACE_STR ("Released map_lock"); + + if (!bRet) { + LOG_ERROR_STATUS(status = -EFAIL); + goto bail; + } + + TransRuns[numTransRuns].offset = tempVbo; + TransRuns[numTransRuns].disk_off = tempLbo; + TransRuns[numTransRuns].byte_cnt = tempSize; + numTransRuns++; + } + + fileSize = osb->log_file_size; + + if (fileSize > (10 * ONE_MEGA_BYTE)) + OCFS_BREAKPOINT (); + + if (fileSize >= (2 * ONE_MEGA_BYTE)) + osb->needs_flush = true; + + *PNumTransRuns = numTransRuns; + *PTransRuns = TransRuns; + + ocfs_safefree (IoTransRuns); + ocfs_safefree (IoMetaDataRuns); + ocfs_safefree (IoDataRuns); + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_create_log_extent_map */ + +/* + * ocfs_lookup_cache_link() + * + */ +int ocfs_lookup_cache_link (ocfs_super * osb, + ub1 * Buffer, ub8 actual_disk_off, ub8 Length) +{ + int status = 0; + ocfs_file_entry *fe = NULL; + ocfs_lock_res *lockres = NULL; + ub8 offset = 0; + struct list_head *iterentry; + + LOG_ENTRY (); + + offset = actual_disk_off; + + list_for_each (iterentry, &(osb->cache_lock_list)) { + lockres = list_entry (iterentry, ocfs_lock_res, cache_list); + if (lockres == NULL) { + LOG_ERROR_STATUS(status = -EFAIL); + goto bail; + } + + ocfs_acquire_lockres (lockres); + + if ((lockres->sector_num >= actual_disk_off) && + (lockres->sector_num < (actual_disk_off + Length))) { + LOG_TRACE_ARGS + ("ocfs_lookup_cache_link has a valid entry in cache " + "link for DiskOffset %u.%u\n", + HI (lockres->sector_num), + LO (lockres->sector_num)); + + /* Change Lock type */ + fe = (ocfs_file_entry *) ((ub1 *) Buffer + + (lockres->sector_num - + actual_disk_off)); + + /* Flush */ + if (lockres->oin != NULL) { + ocfs_inode *oin; + + oin = lockres->oin; + oin->cache_enabled = false; + + /* If the Open Handle Count is zero , then release the */ + /* lock and no need to flush as the data must already */ + /* be flushed */ + + if (!(oin->oin_flags & OCFS_OIN_DIRECTORY)) { + if (oin->open_hndl_cnt == 0) + lockres->lock_type = + DISK_LOCK_FILE_LOCK (fe) = + OCFS_DLM_NO_LOCK; + else + ocfs_flush_cache (osb); + } + } else { + /* Release the lock, as there will be no open handle if there is */ + /* no oin, and so we don't need to keep the lock state to caching */ + lockres->lock_type = DISK_LOCK_FILE_LOCK (fe) = + OCFS_DLM_NO_LOCK; + } + + lockres->in_cache_list = false; + list_del (iterentry); + } + ocfs_release_lockres (lockres); + } + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_lookup_cache_link */ + +/* + * ocfs_process_log_file() + * + * This is recovery. It will read the log file based on trans extent map and + * do the actual disk writes of meta data at right disk offset. + */ +int ocfs_process_log_file (ocfs_super * osb, bool Flag) +{ + int status = 0; + ub1 *pMetaDataBuffer = NULL; + ub1 *pLTempBuffer = NULL; + ub4 size; + ub4 i = 0; + ocfs_offset_map *pMapBuffer; + ub8 fileSize; + ub8 metaFileSize; + ub8 metaAllocSize; + + LOG_ENTRY (); + + metaAllocSize = 0; + status = ocfs_get_system_file_size (osb, + (OCFS_FILE_VOL_META_DATA + osb->node_num), + &metaFileSize, &metaAllocSize); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + size = OCFS_ALIGN (metaFileSize, osb->vol_layout.cluster_size); + pMetaDataBuffer = ocfs_malloc (size); + if (pMetaDataBuffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + status = ocfs_read_system_file (osb, (OCFS_FILE_VOL_META_DATA + osb->node_num), + pMetaDataBuffer, size, 0); + if (status < 0) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + fileSize = osb->log_file_size; + + size = 0; + i = 0; + + while (metaFileSize != 0) { + ub8 tmpOffset; + + pMapBuffer = (ocfs_offset_map *) (pMetaDataBuffer + + (i * + sizeof (ocfs_offset_map))); + + if ((pMapBuffer->length % OCFS_SECTOR_SIZE) || + (pMapBuffer->actual_disk_off % OCFS_SECTOR_SIZE)) { + LOG_ERROR_STR + ("length or actual_disk_off is Unaligned"); + } + + if (size < pMapBuffer->length) { + ocfs_safefree (pLTempBuffer); + + size = OCFS_ALIGN (pMapBuffer->length, PAGE_SIZE); + + pLTempBuffer = ocfs_malloc (size); + if (pLTempBuffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + } + + tmpOffset = pMapBuffer->log_disk_off; + + status = + ocfs_read_force_disk (osb, pLTempBuffer, pMapBuffer->length, + tmpOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + tmpOffset = pMapBuffer->actual_disk_off; + + if (Flag) { + status = ocfs_lookup_cache_link (osb, pLTempBuffer, + pMapBuffer-> + actual_disk_off, + pMapBuffer->length); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + + status = + ocfs_write_force_disk (osb, pLTempBuffer, pMapBuffer->length, + tmpOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + metaFileSize -= sizeof (ocfs_offset_map); + i++; + } + + finally: + ocfs_safefree (pMetaDataBuffer); + ocfs_safefree (pLTempBuffer); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_process_log_file */ + +/* + * ocfs_break_cache_lock() + * + */ +int ocfs_break_cache_lock (ocfs_super * osb, ocfs_lock_res * LockRes) +{ + int status = -EAGAIN; + ub4 votemap; + ub8 lockseqno = 0; + + LOG_ENTRY (); + + /* Ask the node with cache to flush and revert to write thru on this file */ + votemap = (1 << LockRes->master_node_num); + + while (status == -EAGAIN) { + if (!IS_NODE_ALIVE (osb->publ_map, LockRes->master_node_num, + OCFS_MAXIMUM_NODES)) { + LOG_ERROR_ARGS + ("master is dead lock id %u.%u, master node %u\n", + LockRes->sector_num, LockRes->master_node_num); + /* TODO recovery needs to be done here .....and then become master */ + status = 0; + goto finito; + } + + status = + ocfs_request_vote (osb, LockRes->sector_num, LockRes->lock_type, + FLAG_FILE_RELEASE_CACHE, votemap, &lockseqno); + if (status == -EAGAIN) + continue; + else if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + + status = + ocfs_wait_for_vote (osb, LockRes->sector_num, LockRes->lock_type, + FLAG_FILE_RELEASE_CACHE, votemap, + 15000 /* 15sec */ , lockseqno, LockRes); + if (status == -EAGAIN) + continue; + else if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + break; + } + + LockRes->lock_type = (ub1) OCFS_DLM_NO_LOCK; + LOG_TRACE_STR ("okie dokie... ocfs_break_cache_lock done\n"); + + finito: + status = + ocfs_reset_voting (osb, LockRes->sector_num, LockRes->lock_type, votemap); + if (status < 0) { + LOG_ERROR_STATUS (status); + return (status); + } + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_break_cache_lock */ diff -urNp ocfs/fs/ocfs/Common/ocfsgenmisc.c 2.4.20pre5aa2/fs/ocfs/Common/ocfsgenmisc.c --- ocfs/fs/ocfs/Common/ocfsgenmisc.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/ocfsgenmisc.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,1061 @@ +/* + * ocfsgenmisc.c + * + * Miscellaneous. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_MISC + +/* Global Sequence number for error log. */ + +ub4 OcfsErrorLogSequence = 0; + +/* + * ocfs_delete_name() + * + */ +void ocfs_delete_name (ocfs_inode * oin) +{ + LOG_ENTRY (); + + /* DO NOTHING -Manish */ + + LOG_EXIT (); + return; +} /* ocfs_delete_name */ + +/* + * ocfs_create_meta_log_files() + * + */ +int ocfs_create_meta_log_files (ocfs_super * osb) +{ + int status = 0; + ub8 fileSize = 0; + ub8 allocSize = 0; + ub8 log_disk_off = 0; + ub4 logFileId; + + LOG_ENTRY (); + + logFileId = (OCFS_FILE_VOL_LOG_FILE + osb->node_num); + + status = ocfs_get_system_file_size (osb, logFileId, &fileSize, &allocSize); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + if (allocSize != 0) { + log_disk_off = ocfs_file_to_disk_off (osb, (OCFS_FILE_VOL_LOG_FILE + + osb->node_num), 0); + if (log_disk_off == 0) { + LOG_ERROR_STATUS(status = -EFAIL); + goto bail; + } + + osb->log_disk_off = log_disk_off; + + log_disk_off = ocfs_file_to_disk_off (osb, (OCFS_FILE_VOL_META_DATA + + osb->node_num), 0); + if (log_disk_off == 0) { + LOG_ERROR_STATUS(status = -EFAIL); + goto bail; + } + osb->log_meta_disk_off = log_disk_off; + goto bail; + } + + status = ocfs_extend_system_file (osb, (OCFS_FILE_VOL_LOG_FILE + + osb->node_num), (ONE_MEGA_BYTE * 10)); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + ocfs_extend_system_file (osb, (OCFS_FILE_VOL_LOG_FILE + osb->node_num), 0); + + log_disk_off = ocfs_file_to_disk_off (osb, (OCFS_FILE_VOL_LOG_FILE + + osb->node_num), 0); + if (log_disk_off == 0) { + LOG_ERROR_STATUS(status = -EFAIL); + goto bail; + } + + osb->log_disk_off = log_disk_off; + + status = ocfs_extend_system_file (osb, (OCFS_FILE_VOL_META_DATA + + osb->node_num), ONE_MEGA_BYTE); + if (status < 0) { + LOG_ERROR_STATUS (status); + return (status); + } + + ocfs_extend_system_file (osb, (OCFS_FILE_VOL_META_DATA + osb->node_num), 0); + + log_disk_off = ocfs_file_to_disk_off (osb, (OCFS_FILE_VOL_META_DATA + + osb->node_num), 0); + if (log_disk_off == 0) { + LOG_ERROR_STATUS(status = -EFAIL); + goto bail; + } + + osb->log_meta_disk_off = log_disk_off; + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_create_meta_log_files */ + +/* + * ocfs_create_new_oin() + * + * Create a new oin. + */ +int ocfs_create_new_oin (ocfs_inode ** Returnedoin, + ub8 * alloc_size, + ub8 * EndOfFile, struct file *FileObject, ocfs_super * osb) +{ + int status = 0; + ocfs_inode *oin = NULL; + + LOG_ENTRY (); + + /* + ** Don't do OCFS_ASSERT for FileObject, as it is possible and + ** OK if FileObject is NULL + */ + + OCFS_ASSERT (osb); + + /* Allocate memory for a new oin */ + oin = ocfs_allocate_oin (); + *Returnedoin = oin; + + if (oin == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + /* Initialize the main_res and PagingIoResource structures now. */ + ocfs_init_sem (&(oin->main_res)); + OCFS_SET_FLAG (oin->oin_flags, OCFS_INITIALIZED_MAIN_RESOURCE); + + /* Initialize the alloc size value here, file size will come later in i_size */ + oin->alloc_size = *(alloc_size); + + /* Insert the pointer to osb in the oin and also Initialize */ + /* the OFile list */ + oin->osb = osb; + INIT_LIST_HEAD (&(oin->next_ofile)); + + finally: + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_create_new_oin */ + +/* ocfs_create_root_dir_node() + * + */ +int ocfs_create_root_dir_node (ocfs_super * osb) +{ + int status = 0, tempstat; + ocfs_dir_node *NewDirNode = NULL; + ub8 bitmapOffset, numClustersAlloc, fileOffset = 0; + ub4 size, i; + ocfs_vol_disk_hdr *volDiskHdr = NULL; + ocfs_lock_res *LockResource; + bool LockAcquired = false; + + LOG_ENTRY (); + + /* Acquire volume Lock ... */ + status = + ocfs_acquire_lock (osb, OCFS_VOLUME_LOCK_OFFSET, + OCFS_DLM_EXCLUSIVE_LOCK, FLAG_FILE_CREATE, + &LockResource, NULL); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + LockAcquired = true; + + size = (ub4) osb->vol_layout.dir_node_size; + NewDirNode = ocfs_malloc (size); + if (NewDirNode == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } + + memset (NewDirNode, 0, size); + + size = (ONE_MEGA_BYTE > osb->vol_layout.cluster_size) ? + ONE_MEGA_BYTE : osb->vol_layout.cluster_size; + + status = ocfs_find_contiguous_space_from_bitmap (osb, size, &bitmapOffset, + &numClustersAlloc, false); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + osb->vol_layout.root_int_off = (bitmapOffset * + osb->vol_layout.cluster_size) + + osb->vol_layout.data_start_off; + + /* Create Files in root for directory, file node allocations */ + /* Create the dir alloc file now that we have a internal */ + for (i = 0; i < OCFS_MAXIMUM_NODES; i++) { + ocfs_init_system_file (osb, OCFS_FILE_VOL_META_DATA + i); + ocfs_init_system_file (osb, OCFS_FILE_VOL_LOG_FILE + i); + ocfs_init_system_file (osb, OCFS_FILE_DIR_ALLOC + i); + ocfs_init_system_file (osb, OCFS_FILE_DIR_ALLOC_BITMAP + i); + ocfs_init_system_file (osb, OCFS_FILE_FILE_ALLOC + i); + ocfs_init_system_file (osb, OCFS_FILE_FILE_ALLOC_BITMAP + i); + ocfs_init_system_file (osb, LOG_FILE_BASE_ID + i); + ocfs_init_system_file (osb, CLEANUP_FILE_BASE_ID + i); +#ifdef LOCAL_ALLOC + ocfs_init_system_file (osb, OCFS_VOL_BITMAP_FILE + (2*i)); +#endif + } + + status = ocfs_alloc_node_block (osb, osb->vol_layout.dir_node_size, + &bitmapOffset, &fileOffset, + &numClustersAlloc, osb->node_num, + DISK_ALLOC_DIR_NODE); + if (status < 0) { + status = -EFAIL; + goto bail; + } + + osb->vol_layout.root_start_off = bitmapOffset; + + ocfs_initialize_dir_node (osb, NewDirNode, bitmapOffset, fileOffset, + osb->node_num); + NewDirNode->dir_node_flags |= DIR_NODE_FLAG_ROOT; + + status = ocfs_write_dir_node (osb, NewDirNode, -1); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + size = OCFS_SECTOR_SIZE; + + /* Read the first sector bytes from the target device */ + if ((volDiskHdr = ocfs_malloc (PAGE_SIZE)) == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } + + status = ocfs_read_disk (osb, (sb1 *) volDiskHdr, size, 0); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + volDiskHdr->root_off = osb->vol_layout.root_start_off; + volDiskHdr->internal_off = osb->vol_layout.root_int_off; + + status = ocfs_write_disk (osb, (sb1 *) volDiskHdr, size, 0); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + +// status = ocfs_create_meta_log_files(osb); +// if(status < 0) +// { +// LOG_ERROR_STATUS(status); +// goto bail; +// } + + bail: + if (status < 0) { + LOG_ERROR_STR ("Disabling Volume"); + osb->vol_state = VOLUME_DISABLED; + } + + /* Release Volume Lock */ + if (LockAcquired) { + tempstat = + ocfs_release_lock (osb, OCFS_VOLUME_LOCK_OFFSET, + OCFS_DLM_EXCLUSIVE_LOCK, 0, LockResource); + if (tempstat < 0) { + osb->vol_state = VOLUME_DISABLED; + status = tempstat; + } + } + ocfs_safefree (NewDirNode); + ocfs_safefree (volDiskHdr); + NewDirNode = NULL; + volDiskHdr = NULL; + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_create_root_dir_node */ + +/* ocfs_create_root_oin() + * + */ +int ocfs_create_root_oin (ocfs_super * osb) +{ + int status = 0; + ub8 allocSize = 0, endofFile = 0; + ocfs_inode *oin; + ocfs_vol_disk_hdr *volDiskHdr = NULL; + ocfs_lock_res *LockResource; + + LOG_ENTRY (); + + if (osb->vol_layout.root_start_off == 0) { + bool LockAcquired = false; + + volDiskHdr = ocfs_malloc (PAGE_SIZE); + if (volDiskHdr == NULL) { + LOG_ERROR_STR ("ocfs_malloc failed"); + status = -ENOMEM; + goto finally; + } + + ocfs_wait_for_disk_lock_release (osb, OCFS_VOLUME_LOCK_OFFSET, + 10000 /* 10 sec */ , OCFS_DLM_NO_LOCK); + status = + ocfs_acquire_lock (osb, OCFS_VOLUME_LOCK_OFFSET, + OCFS_DLM_EXCLUSIVE_LOCK, FLAG_FILE_CREATE, + &LockResource, NULL); + if (status < 0) { + LOG_ERROR_STR ("ocfs_acquire_lock failed"); + goto finally; + } + + LockAcquired = true; + status = + ocfs_read_disk (osb, (sb1 *) volDiskHdr, OCFS_SECTOR_SIZE, 0); + if (status < 0) { + LOG_ERROR_STR ("ocfs_read_disk failed"); + goto finally; + } + + if (LockAcquired) { + status = + ocfs_release_lock (osb, OCFS_VOLUME_LOCK_OFFSET, + OCFS_DLM_EXCLUSIVE_LOCK, 0, + LockResource); + if (status < 0) { + LOG_ERROR_STR + ("ocfs_release_lock failed! disabling volume!!!!"); + osb->vol_state = VOLUME_DISABLED; + } + } + + if (volDiskHdr->root_off != 0) { + ocfs_sleep (3000); /* 3 sec */ + ocfs_wait_for_disk_lock_release (osb, OCFS_VOLUME_LOCK_OFFSET, + 30000 /* 30 sec */ , + OCFS_DLM_NO_LOCK); + osb->vol_layout.root_start_off = volDiskHdr->root_off; + osb->vol_layout.root_int_off = volDiskHdr->internal_off; + } + + status = ocfs_create_root_dir_node (osb); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + /* if it fails, Release the memory for the OFile we allocated above */ + } else { + status = ocfs_create_meta_log_files (osb); + if (status < 0) { + LOG_ERROR_STR ("ocfs_create_meta_log_files failed"); + goto finally; + } + } + + /* Create the root directory oin. This is done either here or in */ + /* FindNewoin's if it fails, Release the memory for the OFile we */ + /* allocated above */ + status = ocfs_create_new_oin (&oin, &allocSize, &endofFile, NULL, osb); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* This is for root . */ + ocfs_initialize_oin (oin, osb, + OCFS_OIN_DIRECTORY | OCFS_OIN_ROOT_DIRECTORY, NULL, + 0, osb->vol_layout.root_start_off); + + // oin->Parentoin = NULL; /* Root has no parent */ + + /* Set the Rootdirectories root Dir Node */ + + osb->oin_root_dir = oin; + + oin->dir_disk_off = osb->vol_layout.root_start_off; + + finally: + ocfs_safefree (volDiskHdr); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_create_root_oin */ + +/* + * ocfs_allocate_ofile() + * + */ +ocfs_file *ocfs_allocate_ofile () +{ + ocfs_file *OFile = NULL; + int status; + + LOG_ENTRY (); + + /* Removes the first entry from the specified lookaside list in */ + /* nonpaged memory. If the List is empty, it returns NULL */ + OFile = kmem_cache_alloc (OcfsGlobalCtxt.ofile_cache, GFP_KERNEL); + + if (OFile == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + memset (OFile, 0, sizeof (ocfs_file)); + OFile->obj_id.type = OCFS_TYPE_OFILE; + OFile->obj_id.size = sizeof (ocfs_file); + + finally: + + LOG_EXIT_PTR (OFile); + return (OFile); +} /* ocfs_allocate_ofile */ + +/* + * ocfs_allocate_oin() + * + */ +ocfs_inode *ocfs_allocate_oin (void) +{ + ocfs_inode *oin = NULL; + int status; + + LOG_ENTRY (); + + oin = kmem_cache_alloc (OcfsGlobalCtxt.oin_cache, GFP_KERNEL); + if (oin == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + memset (oin, 0, sizeof (ocfs_inode)); + oin->obj_id.type = OCFS_TYPE_OIN; + oin->obj_id.size = sizeof (ocfs_inode); + + finally: + + LOG_EXIT_PTR (oin); + return oin; +} /* ocfs_allocate_oin */ + + +/* + * ocfs_allocate_file_entry() + * + */ +ocfs_file_entry *ocfs_allocate_file_entry (void) +{ + ocfs_file_entry *FileEntry = NULL; + int status = 0; + + LOG_ENTRY (); + + FileEntry = kmem_cache_alloc (OcfsGlobalCtxt.fe_cache, GFP_KERNEL); + if (FileEntry == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } + + /* kmem_cache inited to give SECTOR_SIZE */ + memset (FileEntry, 0, OCFS_SECTOR_SIZE); + + bail: + LOG_EXIT_PTR (FileEntry); + return FileEntry; +} /* ocfs_allocate_file_entry */ + + +/* + * ocfs_release_file_entry() + * + */ +void ocfs_release_file_entry (ocfs_file_entry * FileEntry) +{ + LOG_ENTRY (); + + OCFS_ASSERT (FileEntry); + kmem_cache_free (OcfsGlobalCtxt.fe_cache, FileEntry); + FileEntry = NULL; + + LOG_EXIT (); + return; +} /* ocfs_release_file_entry */ + + +/* + * ocfs_release_ofile() + * + */ +void ocfs_release_ofile (ocfs_file * OFile) +{ + OCFS_ASSERT (OFile); + + LOG_ENTRY (); + + /* Free the directory name allocated in dir */ + if (OFile->curr_dir_buf != NULL) { + ocfs_safefree (OFile->curr_dir_buf); + OFile->curr_dir_buf = NULL; + } + + kmem_cache_free (OcfsGlobalCtxt.ofile_cache, OFile); + OFile = NULL; + + LOG_EXIT (); + return; +} /* ocfs_release_ofile */ + +/* + * ocfs_delete_all_extent_maps() + * + */ +void ocfs_delete_all_extent_maps (ocfs_inode * oin) +{ + ub4 RunsInExtentMap = 0, ExtentMapIndex, ByteCount = 0; + sb8 Vbo; + sb8 Lbo; + + LOG_ENTRY (); + + RunsInExtentMap = ocfs_extent_map_get_count (&oin->map); + + for (ExtentMapIndex = 0; ExtentMapIndex < RunsInExtentMap; + ExtentMapIndex++) { + if (ocfs_get_next_extent_map_entry + (oin->osb, &oin->map, ExtentMapIndex, &Vbo, &Lbo, + &ByteCount)) { + ocfs_remove_extent_map_entry (oin->osb, &oin->map, Vbo, + ByteCount); + } + } + + LOG_EXIT (); + return; +} /* ocfs_delete_all_extent_maps */ + + +/* + * ocfs_release_oin() + * + */ +void ocfs_release_oin (ocfs_inode * oin, bool FreeMemory) +{ + bool LockResourceAcquired = false; + ocfs_lock_res *lockResource = NULL; + ocfs_super *osb; + struct inode *inode; + + LOG_ENTRY_ARGS ("(0x%08x, %u)\n", oin, FreeMemory); + + if (!oin || !oin->osb) + goto bail; + + osb = oin->osb; + lockResource = oin->lock_res; + + if (lockResource != NULL) { + ocfs_acquire_lockres (lockResource); + LockResourceAcquired = true; + if (lockResource->oin == oin) { + lockResource->oin = NULL; + } + + if (LockResourceAcquired) { + ocfs_release_lockres (lockResource); + LockResourceAcquired = false; + } + } + + inode = (struct inode *) oin->inode; + + if (inode) { + ub8 savedOffset = oin->file_disk_off; + + SET_INODE_OIN (inode, NULL); + SET_INODE_OFFSET (inode, savedOffset); + LOG_TRACE_ARGS ("inode oin cleared / flags: %d / offset: %u.%u\n", + inode->i_flags, savedOffset); + } + + ocfs_extent_map_destroy (&oin->map); + + /* Delete the ocfs_sem objects */ + if (oin->oin_flags & OCFS_INITIALIZED_MAIN_RESOURCE) { + ocfs_del_sem (&(oin->main_res)); + OCFS_CLEAR_FLAG (oin->oin_flags, OCFS_INITIALIZED_MAIN_RESOURCE); + } + if (oin->oin_flags & OCFS_INITIALIZED_PAGING_IO_RESOURCE) { + ocfs_del_sem (&(oin->paging_io_res)); + OCFS_CLEAR_FLAG (oin->oin_flags, + OCFS_INITIALIZED_PAGING_IO_RESOURCE); + } + + if (FreeMemory) { + kmem_cache_free (OcfsGlobalCtxt.oin_cache, oin); + oin = NULL; + } +bail: + LOG_EXIT (); + return; +} /* ocfs_release_oin */ + +/* + * ocfs_initialize_osb() + * + */ +int ocfs_initialize_osb (ocfs_super * osb, + ocfs_vol_disk_hdr * VolDiskHdr, + ocfs_vol_label * VolLabel, ub4 sect_size) +{ + int status = 0; + ocfs_publish *PublishSect = NULL; + ub4 BitMapSize, length; + void *BitMapBuffer, *buffer = NULL; + ub8 startOffset; + + LOG_ENTRY (); + + if (osb == NULL) { + status = -EFAIL; + goto finally; + } + + OCFS_CLEAR_FLAG (osb->osb_flags, OCFS_OSB_FLAGS_SHUTDOWN); + + osb->vol_layout.cluster_size = (ub4) (VolDiskHdr->cluster_size); + osb->obj_id.type = OCFS_TYPE_OSB; + osb->obj_id.size = sizeof (ocfs_super); + + if (!ocfs_hash_create (&(osb->root_sect_node), 12)) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + ocfs_init_sem (&(osb->osb_res)); + ocfs_init_sem (&(osb->map_lock)); + ocfs_init_sem (&(osb->log_lock)); + ocfs_init_sem (&(osb->recovery_lock)); +#ifdef PARANOID_LOCKS + ocfs_init_sem (&(osb->dir_alloc_lock)); + ocfs_init_sem (&(osb->file_alloc_lock)); +#endif + ocfs_init_sem (&(osb->vol_alloc_lock)); + + osb->node_recovering = OCFS_INVALID_NODE_NUM; + + osb->needs_flush = false; + osb->commit_cache_exec = false; + osb->log_disk_off = 0; + osb->log_meta_disk_off = 0; + osb->trans_in_progress = false; + + ocfs_extent_map_init (&osb->metadata_map); + ocfs_extent_map_init (&osb->trans_map); + + INIT_LIST_HEAD (&(osb->cache_lock_list)); + osb->sect_size = sect_size; + osb->oin_root_dir = NULL; + osb->node_num = OCFS_INVALID_NODE_NUM; + + memcpy (osb->vol_layout.mount_point, VolDiskHdr->mount_point, strlen (VolDiskHdr->mount_point)); + osb->vol_layout.serial_num = VolDiskHdr->serial_num; + osb->vol_layout.size = (ub8) (VolDiskHdr->device_size); + osb->vol_layout.start_off = VolDiskHdr->start_off; + osb->vol_layout.bitmap_off = (ub8) VolDiskHdr->bitmap_off; + osb->vol_layout.publ_sect_off = VolDiskHdr->publ_off; + osb->vol_layout.vote_sect_off = VolDiskHdr->vote_off; + osb->vol_layout.root_bitmap_off = VolDiskHdr->root_bitmap_off; + osb->vol_layout.root_start_off = VolDiskHdr->root_off; + osb->vol_layout.root_int_off = VolDiskHdr->internal_off; + osb->vol_layout.root_size = VolDiskHdr->root_size; + osb->vol_layout.cluster_size = (ub4) VolDiskHdr->cluster_size; + osb->vol_layout.num_nodes = (ub4) VolDiskHdr->num_nodes; + osb->vol_layout.data_start_off = VolDiskHdr->data_start_off; + osb->vol_layout.root_bitmap_size = VolDiskHdr->root_bitmap_size; + osb->vol_layout.num_clusters = VolDiskHdr->num_clusters; + osb->vol_layout.dir_node_size = VolDiskHdr->dir_node_size; + osb->vol_layout.file_node_size = VolDiskHdr->file_node_size; + osb->vol_layout.node_cfg_off = VolDiskHdr->node_cfg_off; + osb->vol_layout.node_cfg_size = VolDiskHdr->node_cfg_size; + osb->vol_layout.new_cfg_off = VolDiskHdr->new_cfg_off; + osb->vol_layout.prot_bits = VolDiskHdr->prot_bits; + osb->vol_layout.uid = VolDiskHdr->uid; + osb->vol_layout.gid = VolDiskHdr->gid; + + memcpy (osb->vol_layout.id, VolLabel->id, MAX_VOL_ID_LENGTH); + + if (osb->vol_layout.dir_node_size == 0) + osb->vol_layout.dir_node_size = OCFS_DEFAULT_DIR_NODE_SIZE; + + if (osb->vol_layout.file_node_size == 0) + osb->vol_layout.file_node_size = OCFS_DEFAULT_FILE_NODE_SIZE; + + osb->max_dir_node_ent = (ub4) (osb->vol_layout.dir_node_size / OCFS_SECTOR_SIZE) - 2; + BitMapSize = (ub4) osb->vol_layout.num_clusters; + + /* In the start one sector is for Volume header and second sector */ + /* is for Global sequence Number and Directoy Entry. */ + { + ub4 sz = OCFS_ALIGN ((BitMapSize + 7) / 8, PAGE_SIZE); + + if ((BitMapBuffer = vmalloc_32 (sz)) == NULL) { + LOG_ERROR_STR ("vmalloc failed\n"); + LOG_ERROR_STATUS (status -ENOMEM); + goto finally; + } + } + ocfs_initialize_bitmap (&osb->cluster_bitmap, (ub4 *) BitMapBuffer, + BitMapSize); + + status = ocfs_get_config (osb); + if (status < 0) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + if ((PublishSect = ocfs_malloc (osb->sect_size)) == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + startOffset = osb->vol_layout.publ_sect_off + + (osb->node_num * osb->sect_size); + + /* Read the Publish Sector of local Node */ + status = ocfs_read_force_disk (osb, PublishSect, + osb->sect_size, startOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* Zero out the time stamp to write a new value */ + PublishSect->time = 0; + OcfsQuerySystemTime (&PublishSect->time); + + status = + ocfs_write_disk (osb, PublishSect, osb->sect_size, startOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + length = (osb->num_cfg_nodes * osb->sect_size); + + if ((buffer = ocfs_malloc (length)) == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + /* Read disk for all Publish Sectors */ + status = ocfs_read_force_disk (osb, buffer, length, + osb->vol_layout.publ_sect_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + ocfs_update_publish_map (osb, (ub1 *) buffer, true); + + /* We might need to add a variable in Global List of osb to */ + /* delay any creation, if any other node is already creating a file */ + + /* Link this osb onto the global linked list of all osb structures. */ + /* The Global Link List is mainted for the whole driver . */ + ocfs_down_sem (&(OcfsGlobalCtxt.res), true); + list_add_tail (&(osb->osb_next), &(OcfsGlobalCtxt.osb_next)); + ocfs_up_sem (&(OcfsGlobalCtxt.res)); + + /* Mark the fact that this osb structure is initialized. */ + OCFS_SET_FLAG (osb->osb_flags, OCFS_OSB_FLAGS_OSB_INITIALIZED); + + finally: + ocfs_safefree (PublishSect); + ocfs_safefree (buffer); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_initialize_osb */ + +/* + * ocfs_verify_volume() + * + */ +int ocfs_verify_volume (ocfs_vol_disk_hdr * VolDiskHdr) +{ + int status = 0; + + LOG_ENTRY (); + + if (VolDiskHdr == NULL) { + status = -EFAIL; + goto bail; + } + + /* Compare the Signature with the one we read from disk */ + if (memcmp (VolDiskHdr->signature, OCFS_VOLUME_SIGNATURE, + strlen (OCFS_VOLUME_SIGNATURE)) != 0) { + status = -EINVAL; + goto bail; + } + + /* Check the Volume Length and the ClusterSize. */ + if (VolDiskHdr->device_size == 0) { + status = -EINVAL; + goto bail; + } + + if (VolDiskHdr->cluster_size == 0) { + status = -EINVAL; + goto bail; + } + + if (VolDiskHdr->major_version != OCFS_MAJOR_VERSION) { + LOG_ERROR_ARGS ("Version number not compatible: %u.%u\n", + VolDiskHdr->major_version, + VolDiskHdr->minor_version); + status = -EINVAL; + goto bail; + } + + /* Verify if mount point and volume size are valid */ + /* Read the root directory and make sure it is valid */ + /* Check to see who else is alive. */ + /* Kick in the NM i/f to start writing time stamps to the disk */ + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_verify_volume */ + +/* + * ocfs_vol_member_reconfig() + * + */ +int ocfs_vol_member_reconfig (ocfs_super * osb) +{ + int status = 0; + + LOG_ENTRY (); + + /* Start out with the highest multiple.... */ + osb->hbm = DISK_HBEAT_COMM_ON; + + /* Trigger the NM on this node to init the VolMap based on the info */ + /* on the disk currently and advertise to other nodes our existance. */ + ocfs_nm_heart_beat (osb, HEARTBEAT_METHOD_DISK, 0); + + /* Send a mesg to force the nm on all other nodes to process this */ + /* volume, this should allow for them to detect our existance. */ +// ocfs_nm_heart_beat(osb, HEARTBEAT_METHOD_IPC, 0); + + /* WE should do an actual join of the cluster with the NM, IMPORTANT */ + /* NmJoinCluster(osb); */ + osb->publ_map |= (1 << osb->node_num); + + /* what's happening here? your guess is as good as mine :) */ + osb->hbt = 20 + jiffies; + ocfs_sleep (5000); /* 5 sec */ + osb->hbt = 20 + jiffies; + ocfs_sleep (5000); /* 5 sec */ + + /* Create the Rootdirectory oin. */ + osb->vol_state = VOLUME_INIT; + + status = ocfs_create_root_oin (osb); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + osb->vol_state = VOLUME_ENABLED; + + finally: + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_vol_member_reconfig */ + +/* + * ocfs_check_volume() + * + */ +int ocfs_check_volume (ocfs_super * osb) +{ + int status = 0; + ub8 startOffset = 0; + ub1 *buffer = NULL; + ub4 length; + ocfs_publish *PublishSect; + bool osb_resAcquired = false; + + LOG_ENTRY (); + + length = sizeof (ocfs_file_entry); + length = (length > PAGE_SIZE) ? length : PAGE_SIZE; + + if ((buffer = ocfs_malloc (length)) == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + /* Read the publish map */ + startOffset = osb->vol_layout.publ_sect_off + + (osb->node_num * osb->sect_size); + + length = osb->sect_size; + + /* Read Publish Sector of the node which died */ + status = ocfs_read_force_disk (osb, buffer, length, startOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + PublishSect = (ocfs_publish *) buffer; + + LOG_TRACE_STR ("putting publishsector dirty check back in..."); + if (PublishSect->dirty) { + LOG_TRACE_STR ("publishsector IS dirty!!!"); + /* Acquire the osb lock */ + LOG_TRACE_STR ("Acquiring osb lock"); + + ocfs_down_sem (&(osb->osb_res), true); + osb_resAcquired = true; + + LOG_TRACE_STR ("Acquired osb lock"); + + status = ocfs_recover_vol (osb, osb->node_num); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + + finally: + if (osb_resAcquired) { + LOG_TRACE_STR ("Release osb Lock"); + ocfs_up_sem (&(osb->osb_res)); + osb_resAcquired = false; + } + ocfs_safefree (buffer); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_check_volume */ + + +/* + * ocfs_delete_osb() + * + * The routine gets called from dismount or close whenever a dismount on + * volume is requested and the osb open count becomes 1. + * It will remove the osb from the global list and also free up all the + * initialized resources and fileobject. + */ +void ocfs_delete_osb (ocfs_super * osb) +{ + LOG_ENTRY (); + + /* This function assumes that the caller has the main osb resource */ + + /* Remove the osb from the global linked list of all osb structures. */ + /* The Global Link List is mainted for the whole driver */ + ocfs_down_sem (&(OcfsGlobalCtxt.res), true); + list_del (&(osb->osb_next)); + ocfs_up_sem (&(OcfsGlobalCtxt.res)); + + ocfs_del_sem (&(osb->osb_res)); + ocfs_del_sem (&(osb->log_lock)); + ocfs_del_sem (&(osb->recovery_lock)); + ocfs_del_sem (&(osb->map_lock)); + ocfs_extent_map_destroy (&osb->metadata_map); + ocfs_extent_map_destroy (&osb->trans_map); + memset (osb, 0, sizeof (ocfs_super)); + + LOG_EXIT (); + return; +} /* ocfs_delete_osb */ + +/* + * ocfs_commit_cache() + * + */ +int ocfs_commit_cache (ocfs_super * osb, bool Flag) +{ + int status = 0; + + LOG_ENTRY (); + + ocfs_flush_cache (osb); + + ocfs_down_sem (&(osb->map_lock), true); + + status = ocfs_write_map_file (osb); + if (status >= 0) { + ocfs_process_log_file (osb, Flag); + + status = + ocfs_extend_system_file (osb, + (OCFS_FILE_VOL_LOG_FILE + osb->node_num), + 0); + osb->log_file_size = 0; + + status = + ocfs_extend_system_file (osb, + (OCFS_FILE_VOL_META_DATA + osb->node_num), + 0); + + ocfs_extent_map_destroy (&osb->metadata_map); + ocfs_extent_map_destroy (&osb->trans_map); + ocfs_extent_map_init (&osb->metadata_map); + ocfs_extent_map_init (&osb->trans_map); + } + + ocfs_up_sem (&(osb->map_lock)); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_commit_cache */ diff -urNp ocfs/fs/ocfs/Common/ocfsgennm.c 2.4.20pre5aa2/fs/ocfs/Common/ocfsgennm.c --- ocfs/fs/ocfs/Common/ocfsgennm.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/ocfsgennm.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,1449 @@ +/* + * ocfsgennm.c + * + * Keeps track of alive nodes in the cluster. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_NM + +/* + * ocfs_flush_data() + * + */ +int ocfs_flush_data (ocfs_inode * oin) +{ + int status = 0; + bool acq_oin = false; + + LOG_ENTRY (); + + if (oin->oin_flags & OCFS_OIN_DIRECTORY) + goto bail; + + ocfs_down_sem (&(oin->main_res), true); + acq_oin = true; + + oin->cache_enabled = false; + + ocfs_flush_cache (oin->osb); + + /* Grab and release PagingIo to serialize ourselves with the lazy writer. */ + /* This will work to ensure that all IO has completed on the cached */ + /* data and we will succesfully tear away the cache section. */ + ocfs_down_sem (&(oin->paging_io_res), true); + ocfs_up_sem (&(oin->paging_io_res)); + + ocfs_purge_cache_section (oin, NULL, 0); + + if (acq_oin) { + ocfs_up_sem (&(oin->main_res)); + acq_oin = false; + } + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_flush_data */ + +/* + * ocfs_update_publish_map() + * + * @osb: ocfs super block for the volume + * @buffer: publish sectors read in the last round + * @first_time: if true, the buffer needs to be initialized + * + * Reads the publish sectors and compares the timestamp of each node + * to the one it read in the last round. As long as the timestamp keeps + * changing, the node is marked alive. Conversely, if the timestamp does + * not change over time, the node is marked dead. The function marks all + * the live nodes in the publishmap. + * + */ +void ocfs_update_publish_map (ocfs_super * osb, void *buffer, bool first_time) +{ + ocfs_publish *publish; + ocfs_vol_node_map *node_map; + ub8 curr_time = 0; + ub4 i; + ub4 num_nodes; + ub4 miss_cnt_val = MISS_COUNT_VALUE; + ub1 *p; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u)\n", osb, buffer, first_time); + + /* Total number of nodes */ + num_nodes = osb->num_cfg_nodes; + + node_map = &(osb->vol_node_map); + + /* Read the time stamp of all nodes and compare them with the time */ + /* stamp in memory for that node */ + OcfsQuerySystemTime (&curr_time); + + /* If this function is called for the first time and the local node */ + /* doesn't have any Timestamp of any node in local memory, then we */ + /* just update everybody's last time stamp in local memory and return. */ + /* Also we don't update publ_map at this time. */ + if (first_time) { + for (i = 0, p = (ub1 *) buffer; i < num_nodes; + i++, p += osb->sect_size) { + publish = (ocfs_publish *) p; + node_map->time[i] = publish->time; + node_map->scan_rate[i] = publish->hbm[i]; + node_map->scan_time[i] = curr_time; + } + goto bail; /* exit */ + } + + for (i = 0, p = (ub1 *) buffer; i < num_nodes; i++, p += osb->sect_size) { + publish = (ocfs_publish *) p; + + /* Check if the node is hung or not by comparing the disk */ + /* and memory timestamp values */ + if (node_map->time[i] == publish->time) { + /* If we are connected to this node and it is not */ + /* hearbeating on this volume it is time to send a */ + /* message to ask it to start heartbeating to the */ + /* volume. Check to see we expected a rate change */ + if (node_map->exp_scan_rate[i] != DISK_HBEAT_INVALID) { + if (node_map->scan_time[i] > + node_map->exp_rate_chng_time[i]) { + ub8 time1; + ub8 time2; + + time1 = (ub8) (curr_time - + node_map->scan_time[i]); + time2 = (ub8) (ONE_MILLI_SEC * 200 * + node_map->exp_scan_rate[i]); + + if (time1 > time2) + (node_map->miss_cnt[i])++; + } + + switch (node_map->exp_scan_rate[i]) { + case DISK_HBEAT_COMM_ON: + /* + * We are probably switching the node + * to larger from smaller rate. The + * comm establishment should have + * enabled the longer hbt on this. If + * some other node was asking for a + * smaller hbeat, and the actual + * hbeat is smaller we should not + * have hit this case, the stamp + * should have changed. Increment the + * misscount, the node is probably + * dead/hung. + */ + miss_cnt_val = MISS_COUNT_VALUE; + break; + + case DISK_HBEAT_NO_COMM: + miss_cnt_val = MISS_COUNT_VALUE; + break; + } + } else { + (node_map->miss_cnt[i])++; + } + + /* This will clear the bit for the osb publish map */ + /* for the node which is not updating its timestamp */ + if (node_map->miss_cnt[i] > MISS_COUNT_VALUE) { + UPDATE_PUBLISH_MAP (osb->publ_map, i, 0, + num_nodes); + /* + * If we are in fact connected to this node and + * it is not hearbeating on this volume it is + * time to send a message to disable volume + * access on this node and ask it to remount the + * volume now... + * Should we check to do some recovery here for + * the hung node, esp. releasing the locks? + */ + } + } else { + /* This will set the bit for the osb publish map for */ + /* the node which is up or new addition */ + node_map->miss_cnt[i] = 0; + node_map->time[i] = publish->time; + UPDATE_PUBLISH_MAP (osb->publ_map, i, 1, num_nodes); + + /* Update the multiple the other node wants us to hbeat */ + if ((publish->hbm[osb->node_num] != DISK_HBEAT_INVALID) + && (osb->hbm > publish->hbm[osb->node_num])) { + /* Go to the lowest multiplier any of the nodes */ + /* alive want us to heartbeat with. */ + osb->hbm = publish->hbm[osb->node_num]; + + if (osb->hbm == 0) { + LOG_ERROR_STR ("Heartbeat was 0"); + osb->hbm = DISK_HBEAT_NO_COMM; + } + + if (OcfsGlobalCtxt.hbm > osb->hbm) { + if (osb->hbm) + OcfsGlobalCtxt.hbm = osb->hbm; + } + + if (OcfsGlobalCtxt.hbm == 0) { + LOG_ERROR_STR ("Global heartbeat was 0"); + OcfsGlobalCtxt.hbm = DISK_HBEAT_NO_COMM; + } + } + + /* CALL IPC for a reconfig if this node was a joining node. */ + /* Also, delay the join somewhat... */ + } + node_map->scan_time[i] = curr_time; + } + + bail: + LOG_EXIT (); + return; +} /* ocfs_update_publish_map */ + +/* + * ocfs_nm_heart_beat() + * + * @osb: ocfs super block for the volume + * @flag: type of heart beat + * @op: + * + * Updates the timestamp in the nodes publish sector. + * + * Returns 0 if success, < 0 if error. + */ +int ocfs_nm_heart_beat (ocfs_super * osb, ub4 flag, ub1 op) +{ + ocfs_publish *publish = NULL; + int status = 0; + ub8 node_publ_off = 0; + + LOG_ENTRY_ARGS ("(0x%08x, %u, %u)\n", osb, flag, op); + + if (flag & HEARTBEAT_METHOD_DISK) { + publish = ocfs_malloc (osb->sect_size); + if (publish == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto finally; + } + + node_publ_off = osb->vol_layout.publ_sect_off + + (osb->node_num * osb->sect_size); + +//SM??? This function can be optimized by removing this read as this function +// is called only from ocfs_nm_thread() after that function has read the +// Publish Sectors for all the nodes + /* Read the publish sector */ + status = ocfs_read_force_disk (osb, publish, osb->sect_size, + node_publ_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + OcfsQuerySystemTime (&publish->time); + + publish->hbm[osb->node_num] = osb->hbm; + + node_publ_off = osb->vol_layout.publ_sect_off + + (osb->node_num * osb->sect_size); + + /* Write the current time in local node's Publish sector */ + status = ocfs_write_force_disk (osb, publish, osb->sect_size, + node_publ_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + LOG_TRACE_ARGS ("Updating timestamp for node (%u)\n", + osb->node_num); + } + + if (flag & HEARTBEAT_METHOD_IPC) { + /* Plug this in later... */ + } + + finally: + ocfs_safefree (publish); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_nm_heart_beat */ + +#if !defined(DLM_THREAD_PER_VOLUME) +/* + * ocfs_polling_thread() + * + */ +int ocfs_polling_thread (void *unused) +{ + struct list_head *iterEntry; + bool bGlobalResourceAcquired = false; + IPC_DLM_CONFIG IpcDlmConfig; + + LOG_ENTRY (); + + IpcDlmConfig.Version = OCFS_IPC_DLM_VERSION; + IpcDlmConfig.MsgSize = OCFS_DLM_MAX_MSG_SIZE; + + init_waitqueue_head (&OcfsGlobalCtxt.FlushEvent); + + ocfs_daemonize ("ocfsdlm"); + /* Acquire DLMThreadMonitor */ + ocfs_down_sem (&(OcfsGlobalCtxt.DLMThreadMonitor), true); + + /* The delay changes based on multiplier */ + while (!(OcfsGlobalCtxt.OcfsFlags & OCFS_FLAG_SHUTDOWN_VOL_THREAD)) { + if (OcfsGlobalCtxt.hbm == 0) { + LOG_ERROR_STR ("OcfsGlobalCtxt Heartbeat was 0"); + OcfsGlobalCtxt.hbm = DISK_HBEAT_NO_COMM; + } + + ocfs_wait (&OcfsGlobalCtxt.FlushEvent, false, 2000); /* ms */ + + init_waitqueue_head (&OcfsGlobalCtxt.FlushEvent); + + ocfs_nm_thread (NULL); + } + + /* Relinquish semaphore to signal main thread */ + ocfs_up_sem (&(OcfsGlobalCtxt.DLMThreadMonitor)); + + LOG_EXIT_LONG (0); + return 0; +} /* ocfs_polling_thread */ +#endif /* !defined(DLM_THREAD_PER_VOLUME) */ + +/* + * ocfs_nm_join_cluster() + * + */ +int ocfs_nm_join_cluster (ocfs_super * osb) +{ + return -EFAIL; +} /* ocfs_nm_join_cluster */ + +/* + * ocfs_disk_update_resource() + * + * @osb: ocfs super block for the volume + * @LockResource: lockres to be updated + * @FileEntry: corresponding file entry + * + * Updates the in memory lock resource from the disklock info + * stored in the file entry on disk. + * + * Returns 0 if success, < 0 if error. + */ +int ocfs_disk_update_resource (ocfs_super * osb, ocfs_lock_res * lock_res, + ocfs_file_entry * file_ent) +{ + int status = 0; + ocfs_file_entry *fe = NULL; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, 0x%08x)\n", osb, lock_res, + file_ent); + + LOG_TRACE_ARGS ("Reading sector: %u.%u\n", HI (lock_res->sector_num), + LO (lock_res->sector_num)); + + if (file_ent) { + fe = file_ent; + status = ocfs_read_file_entry (osb, fe, lock_res->sector_num); + } else { + status = ocfs_get_file_entry (osb, &fe, lock_res->sector_num); + } + + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + ocfs_acquire_lockres (lock_res); + + lock_res->lock_type = DISK_LOCK_FILE_LOCK (fe); + lock_res->master_node_num = DISK_LOCK_CURRENT_MASTER (fe); + lock_res->oin_openmap = DISK_LOCK_OIN_MAP (fe); + + ocfs_release_lockres (lock_res); + + finally: + if (file_ent == NULL) { + if (fe) + ocfs_release_file_entry (fe); + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_disk_update_resource */ + +/* + * ocfs_find_update_res() + * + * @osb: ocfs super block for the volume + * @LockId: sector number of the resource to be locked + * @LockResource: lockres of the resource + * @FileEntry: corresponding file entry + * @Updated: set to 1 if lockres is refreshed from disk + * + * Searches for the lockres for the given lockid in the hashtable. + * If not found, it allocates a lockres for the lockid, and adds + * it to the hashtable. If found and it's master node is not the + * same as the current node, the lockres is refreshed from the disk. + * + * Returns 0 if success, < 0 if error. + */ +int ocfs_find_update_res (ocfs_super * osb, + ub8 LockId, + ocfs_lock_res ** LockResource, + ocfs_file_entry * FileEntry, ub4 * Updated) +{ + int status = 0; + ocfs_lock_res *lockres = NULL; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, 0x%08x, 0x%08x, 0x%08x)\n", osb, + HI (LockId), LO (LockId), LockResource, FileEntry, + Updated); + + status = ocfs_lookup_sector_node (osb, LockId, &lockres); + if (status < 0) { + /* Create a resource and insert in the hash */ + lockres = kmem_cache_alloc (OcfsGlobalCtxt.lockres_cache, GFP_KERNEL); + + if (lockres == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + /* Initialize Resource */ + ocfs_init_lockres (osb, lockres, LockId); + + /* Update from the disk */ + status = ocfs_disk_update_resource (osb, lockres, FileEntry); + if (status < 0) { + /* This should never fail so lock up the volume */ + LOG_ERROR_STR ("Disabling Volume"); + osb->vol_state = VOLUME_DISABLED; + goto finally; + } + + if (LockId != lockres->sector_num) { + LOG_ERROR_STATUS(status = -EFAIL); + goto finally; + } + + if (Updated) + *Updated = 1; + + ocfs_insert_sector_node (osb, lockres); + if (status < 0) { + /* Failure LOCK up volume operation */ + LOG_ERROR_STR ("Disabling Volume"); + osb->vol_state = VOLUME_DISABLED; + goto finally; + } + } else { + + if (lockres->signature != 0x55AA) { + LOG_ERROR_STATUS(status = -EFAIL); + goto finally; + } + + if (LockId != lockres->sector_num) { + LOG_ERROR_STATUS(status = -EFAIL); + goto finally; + } + + if (lockres->master_node_num != (sb4) osb->node_num) { + /* Update from the disk */ + status = ocfs_disk_update_resource (osb, lockres, FileEntry); + if (status < 0) { + /* This should never fail so lock up the volume */ + LOG_ERROR_STR ("Disabling Volume"); + osb->vol_state = VOLUME_DISABLED; + goto finally; + } + + if (Updated) + *Updated = 1; + } + } + + *LockResource = lockres; + + finally: + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_find_update_res */ + +/* + * ocfs_vote_for_del_ren() + * + */ +int ocfs_vote_for_del_ren (ocfs_super * osb, ocfs_publish * PublishToVote, + ub4 NodeAskingVote, ocfs_vote * VoteSector, + ocfs_lock_res ** LockResource) +{ + int status = 0; + ub4 Flags = 0; + ub4 retryCount = 0; + bool acq_oin = false; + ocfs_file_entry *pFileEntry = NULL; + ocfs_lock_res *lockres; + bool rls_oin = true; + ocfs_sem *oin_sem = NULL; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u, 0x%08x, 0x%08x)\n", osb, + PublishToVote, NodeAskingVote, VoteSector, + LockResource); + + lockres = *LockResource; + + Flags = PublishToVote->vote_type; + + ocfs_acquire_lockres (lockres); + + if (lockres->oin) { + UPDATE_OIN (lockres->oin); + } + + LOG_TRACE_ARGS ("For node (%u) for lockid %u.%u and seq %u.%u\n", + NodeAskingVote, + HI (PublishToVote->dir_ent), + LO (PublishToVote->dir_ent), + HI (PublishToVote->seq_num), + LO (PublishToVote->seq_num)); + + /* Check for oin */ + if (lockres->oin != NULL) { + ocfs_inode *oin; + + oin = lockres->oin; + + oin_sem = &(oin->main_res); + ocfs_down_sem (oin_sem, true); + acq_oin = true; + + /* If OIN_IN_USE is set we should go back and retry */ + while ((oin->oin_flags & OCFS_OIN_IN_USE) && (retryCount < 5)) { + if (acq_oin) { + ocfs_up_sem (oin_sem); + acq_oin = false; + } + + ocfs_sleep (20); /* 20 ms */ + retryCount++; + + if (!acq_oin) { + ocfs_down_sem (oin_sem, true); + acq_oin = true; + } + } + +// if((lockResource->oin->ReferenceCount == 0) && + if ((lockres->oin->open_hndl_cnt == 0) && + (!(oin->oin_flags & OCFS_OIN_IN_USE))) { + if (!(oin->oin_flags & OCFS_OIN_IN_TEARDOWN)) { +//KASEY... Could we please make the code below more confusing? + { + if (acq_oin) { + ocfs_up_sem (oin_sem); + acq_oin = false; + } + + ocfs_delete_name (oin); + + rls_oin = false; + + if (!acq_oin) { + ocfs_down_sem (oin_sem, true); + acq_oin = true; + } + + OCFS_SET_FLAG (oin->oin_flags, + OCFS_OIN_NEEDS_DELETION); + } + + if (acq_oin) { + ocfs_up_sem (oin_sem); + acq_oin = false; + } + + ocfs_release_lockres (lockres); + + if (oin && oin->inode) { + struct inode *inode = oin->inode; + + //inode->i_nlink--; + inode->i_nlink = 0; + d_prune_aliases (inode); + } + + if (rls_oin) { + ocfs_release_cached_oin (osb, oin); + ocfs_release_oin (oin, true); + } else { + ocfs_down_sem (&(oin->paging_io_res), + true); + ocfs_purge_cache_section (oin, NULL, 0); + ocfs_up_sem (&(oin->paging_io_res)); + } + lockres = *LockResource = NULL; + } + + VoteSector->vote[NodeAskingVote] = FLAG_VOTE_NODE; + goto finito; + } else { + LOG_TRACE_ARGS + ("Returned in use (%u) for lockid %u.%u and seq %u.%u\n", + NodeAskingVote, HI (PublishToVote->dir_ent), + LO (PublishToVote->dir_ent), + HI (PublishToVote->seq_num), + HI (PublishToVote->seq_num)); + VoteSector->vote[NodeAskingVote] = + FLAG_VOTE_OIN_ALREADY_INUSE; + goto finito; + } + } else { + LOG_TRACE_ARGS + ("Voted to del/ren (%u) for lockid %u.%u and seq %u.%u\n", + NodeAskingVote, HI (PublishToVote->dir_ent), + LO (PublishToVote->dir_ent), HI (PublishToVote->seq_num), + HI (PublishToVote->seq_num)); + VoteSector->vote[NodeAskingVote] = FLAG_VOTE_NODE; + goto finito; + } + + finito: + /* Set the always update master on open flag */ + if (lockres) { + lockres->lock_state |= FLAG_ALWAYS_UPDATE_OPEN; + lockres->last_upd_seq_num = PublishToVote->seq_num; + + if (lockres->master_node_num != OCFS_INVALID_NODE_NUM) { + if (!IS_NODE_ALIVE + (osb->publ_map, lockres->master_node_num, + OCFS_MAXIMUM_NODES)) { + lockres->master_node_num = NodeAskingVote; + } + } else { + lockres->master_node_num = NodeAskingVote; + } + + /* Change the master if there is no lock */ + if ((lockres->master_node_num == osb->node_num) && + (lockres->lock_state <= OCFS_DLM_SHARED_LOCK)) { + ub8 tmp = PublishToVote->dir_ent; + + /* Change the lock ownership to the node asking for vote */ + /* and write new master on the disk */ + status = + ocfs_get_file_entry (osb, &pFileEntry, + PublishToVote->dir_ent); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + DISK_LOCK_CURRENT_MASTER (pFileEntry) = NodeAskingVote; + status = + ocfs_write_disk (osb, pFileEntry, osb->sect_size, + tmp); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + lockres->master_node_num = NodeAskingVote; + } + } + + finally: + if (pFileEntry) + ocfs_release_file_entry (pFileEntry); + + if (acq_oin && oin_sem) { + ocfs_up_sem (oin_sem); + acq_oin = false; + } + + if (lockres) + ocfs_release_lockres (lockres); + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_vote_for_del_ren */ + +/* + * ocfs_process_update_inode_request() + * + * get an inode just long enough to dump its pages + */ +int ocfs_process_update_inode_request (ocfs_super * osb, + ocfs_vote * VoteSector, + ocfs_publish * PublishToVote, + ocfs_lock_res * LockResource, + ub4 NodeAskingVote) +{ + ub8 fileoff; + struct inode *inode = NULL; + ocfs_file_entry *pFileEntry; + int status; + + LOG_ENTRY (); + + if (LockResource && LockResource->oin) { + LOG_ERROR_STR ("should not be called if there exists an " \ + "oin for this inode!\n"); + return -EFAIL; + } + + fileoff = PublishToVote->dir_ent; + + status = ocfs_get_file_entry (osb, &pFileEntry, fileoff); + if (status >= 0) { + ocfs_find_inode_args args; + + args.offset = pFileEntry->this_sector; + args.entry = pFileEntry; + inode = iget4 (osb->sb, (ub4) LO (fileoff), + (find_inode_t) ocfs_find_inode, + (void *) (&args)); + if (inode == NULL || is_bad_inode (inode)) { + if (inode) + iput (inode); + inode = NULL; + } else { + truncate_inode_pages (inode->i_mapping, 0); + iput (inode); + inode = NULL; + } + ocfs_release_file_entry (pFileEntry); + pFileEntry = NULL; + } + + if (LockResource) + ocfs_hash_del (&(osb->root_sect_node), + &(LockResource->sector_num), sizeof (ub8)); + + VoteSector->dir_ent = PublishToVote->dir_ent; + VoteSector->seq_num = PublishToVote->seq_num; + VoteSector->vote[NodeAskingVote] = FLAG_VOTE_OIN_UPDATED; + LOG_TRACE_STR ("now doing a FLAG_VOTE_OIN_UPDATED vote!\n"); + + LOG_EXIT (); + return 0; +} /* ocfs_process_update_inode_request */ + +/* + * ocfs_process_vote() + * + */ +int ocfs_process_vote (ocfs_super * osb, ocfs_publish * PublishToVote, + ub4 NodeAskingVote) +{ + int status = 0; + ocfs_lock_res *lockres = NULL; + ub4 Flags; + ub4 num_nodes; + ub4 i; + ub8 nodeVoteOffset; + bool acq_oin = false; + ocfs_file_entry *pFileEntry = NULL; + ocfs_vote *PVoteSector = NULL; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u)\n", osb, PublishToVote, + NodeAskingVote); + + LOG_TRACE_ARGS ("Called from node (%u) for lockid %u.%u and " \ + "seq %u.%u\n", NodeAskingVote, + HI (PublishToVote->dir_ent), + LO (PublishToVote->dir_ent), + HI (PublishToVote->seq_num), + LO (PublishToVote->seq_num)); + + num_nodes = osb->num_cfg_nodes; + Flags = PublishToVote->vote_type; + + PVoteSector = ocfs_malloc (osb->sect_size); + if (PVoteSector == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finito; + } + + nodeVoteOffset = osb->vol_layout.vote_sect_off + + (osb->node_num * osb->sect_size); + + status = ocfs_read_force_disk (osb, PVoteSector, osb->sect_size, + nodeVoteOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + + LOG_TRACE_ARGS + ("Called from node (%u) for lockid %u.%u and seq %u.%u\n", + NodeAskingVote, HI (PublishToVote->dir_ent), + LO (PublishToVote->dir_ent), HI (PublishToVote->seq_num), + LO (PublishToVote->seq_num)); + + /* Exclusive vote for */ + status = ocfs_find_update_res (osb, PublishToVote->dir_ent, &lockres, + NULL, NULL); + if (status < 0) { +#ifdef ALLOW_INODE_UPDATES_WITH_NO_OIN + /* if this is an update oin request, check if there is a + * matching inode which has no lock resource yet */ + if (Flags & FLAG_FILE_UPDATE_OIN) { + LOG_TRACE_ARGS ("Update request received for %u.%u. " + "Checking for inode.\n", + PublishToVote->dir_ent); + status = ocfs_process_update_inode_request (osb, + PVoteSector, PublishToVote, + lockres, NodeAskingVote); + } else /* ! FLAG_FILE_UPDATE_OIN */ +#endif + { + LOG_ERROR_STATUS (status); + } + goto finito; + } + + /* Zero out the vote for everybody, if any already set and hung */ + for (i = 0; i < num_nodes; i++) + PVoteSector->vote[i] = 0; + + /* + ** Check if we have the lock on resource here. + ** ocfs_acquire_lockres(lockResource); + */ + if ((Flags & FLAG_FILE_DELETE) || (Flags & FLAG_FILE_RENAME)) { + status = ocfs_vote_for_del_ren (osb, PublishToVote, + NodeAskingVote, PVoteSector, &lockres); + goto finito; + } + + if (Flags & FLAG_FILE_RELEASE_CACHE) { + ocfs_file_entry *pTempEntry = NULL; + + i = 0; + LOG_TRACE_STR ("Called for FLAG_FILE_RELEASE_CACHE"); + + if (!osb->commit_cache_exec) { + osb->needs_flush = true; + while ((osb->trans_in_progress) && (i < 10)) { + ocfs_sleep (100); /* in ms */ + i++; + } + + status = -EFAIL; + + if (osb->trans_in_progress == false) { + osb->commit_cache_exec = true; + ocfs_commit_cache (osb, true); + osb->needs_flush = false; + osb->commit_cache_exec = false; + + if (lockres->oin != NULL) { + ocfs_flush_data (lockres->oin); + lockres->lock_type = OCFS_DLM_NO_LOCK; + } + + status = ocfs_get_file_entry (osb, &pTempEntry, + PublishToVote->dir_ent); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + + /* At this stage there is nothing in disk, so */ + /* no need to update cache, as there is */ + /* nothing there */ + if (DISK_LOCK_FILE_LOCK (pTempEntry) > + OCFS_DLM_NO_LOCK) { + ub8 tmp = PublishToVote->dir_ent; + + DISK_LOCK_FILE_LOCK (pTempEntry) = + OCFS_DLM_NO_LOCK; + + status = ocfs_write_force_disk (osb, + pTempEntry, + osb->sect_size, + tmp); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + lockres->lock_type = OCFS_DLM_NO_LOCK; + } + + ocfs_safefree (pTempEntry); + + PVoteSector->vote[NodeAskingVote] = + FLAG_VOTE_NODE; + status = 0; + } + + LOG_TRACE_STR ("Exiting for FLAG_FILE_RELEASE_CACHE"); + goto finito; + } + + LOG_TRACE_STR ("Exiting for FLAG_FILE_RELEASE_CACHE"); + } + + if (PublishToVote->vote_type & FLAG_FILE_UPDATE_OIN) { + ocfs_inode *oin; + + LOG_TRACE_ARGS + ("Got update oin from node (%u) for lockid %u.%u " + "and seq %u.%u\n", NodeAskingVote, + HI (PublishToVote->dir_ent), LO (PublishToVote->dir_ent), + HI (PublishToVote->seq_num), LO (PublishToVote->seq_num)); + + /* + ** Set the verify oin flag on the oin....??? Assumption is that + ** we have Lock resource or oin lock + */ + if (lockres->oin != NULL) { + oin = lockres->oin; + ocfs_down_sem (&(oin->main_res), true); + acq_oin = true; + + /* + ** Get the main resource too + */ + LOG_TRACE_STR ("Now calling UPDATE_OIN()!"); + UPDATE_OIN (lockres->oin); + + if (acq_oin) { + ocfs_up_sem (&(oin->main_res)); + acq_oin = false; + } + PVoteSector->dir_ent = PublishToVote->dir_ent; + PVoteSector->seq_num = PublishToVote->seq_num; + PVoteSector->vote[NodeAskingVote] = + FLAG_VOTE_OIN_UPDATED; + } +#ifdef ALLOW_INODE_UPDATES_WITH_NO_OIN + else { /* lockResource->oin == NULL */ + + LOG_TRACE_STR ("a lock resource exists, but no oin. " + "calling ocfs_process_update_inode_request.\n"); + status = + ocfs_process_update_inode_request (osb, PVoteSector, + PublishToVote, lockres, + NodeAskingVote); + } +#endif + goto finito; + } + + /* + ** If there is a masternode and it is alive ask the node asking for + ** vote to update its state + */ + if (lockres->master_node_num != OCFS_INVALID_NODE_NUM) { + if (lockres->master_node_num == osb->node_num) { + if (Flags & FLAG_CHANGE_MASTER) { + osb->needs_flush = true; + LOG_TRACE_ARGS + ("Got Change Master from Node (%u) for Lock " + "0x%08x.0x%08x, Seq %u.%u\n", + NodeAskingVote, + HI (PublishToVote->dir_ent), + LO (PublishToVote->dir_ent), + HI (PublishToVote->seq_num), + LO (PublishToVote->seq_num)); + + while ((osb->trans_in_progress) && (i < 10)) { + ocfs_sleep (100); /* in ms */ + i++; + } + + status = -EFAIL; + + if (osb->trans_in_progress == false) { + ub8 tmp = PublishToVote->dir_ent; + + ocfs_commit_cache (osb, true); + lockres->master_node_num = + NodeAskingVote; + osb->needs_flush = false; + + if (lockres->oin != NULL) { + ocfs_flush_data (lockres->oin); + lockres->lock_type = + OCFS_DLM_NO_LOCK; + } + + status = + ocfs_get_file_entry (osb, &pFileEntry, + PublishToVote->dir_ent); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + + LOG_TRACE_ARGS + ("Got change master from node (%u) for " + "lockid %u.%u and seq %u.%u\n", + NodeAskingVote, + HI (PublishToVote->dir_ent), + LO (PublishToVote->dir_ent), + HI (PublishToVote->seq_num), + LO (PublishToVote->seq_num)); + + if (lockres->oin) + DISK_LOCK_OIN_MAP (pFileEntry) + |= (1 << osb->node_num); + + DISK_LOCK_CURRENT_MASTER (pFileEntry) = + NodeAskingVote; + + /* Write new master on the disk */ + status = + ocfs_write_disk (osb, pFileEntry, + osb->sect_size, tmp); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + + lockres->master_node_num = + NodeAskingVote; + PVoteSector->vote[NodeAskingVote] = + FLAG_VOTE_NODE; + status = 0; + } + } else if (Flags & FLAG_ADD_OIN_MAP) { + status = + ocfs_get_file_entry (osb, &pFileEntry, + PublishToVote->dir_ent); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + + LOG_TRACE_ARGS + ("Got add oin map from node (%u) for " + "lockid %u.%u and seq %u.%u\n", + NodeAskingVote, + HI (PublishToVote->dir_ent), + LO (PublishToVote->dir_ent), + HI (PublishToVote->seq_num), + LO (PublishToVote->seq_num)); + + if ((pFileEntry-> + sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) + || + (!(pFileEntry-> + sync_flags & OCFS_SYNC_FLAG_VALID))) { + PVoteSector->vote[NodeAskingVote] = + FLAG_VOTE_FILE_DEL; + PVoteSector->open_handle = false; + } else { + ub8 tmp = PublishToVote->dir_ent; + + DISK_LOCK_OIN_MAP (pFileEntry) |= + (1 << NodeAskingVote); + + /* Write new map on the disk */ + status = + ocfs_write_disk (osb, pFileEntry, + osb->sect_size, tmp); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + + /* Add this node to the oin map on the file entry */ + lockres->oin_openmap = + DISK_LOCK_OIN_MAP (pFileEntry); + PVoteSector->vote[NodeAskingVote] = + FLAG_VOTE_NODE; + } + } + } else { + if (IS_NODE_ALIVE + (osb->publ_map, lockres->master_node_num, + OCFS_MAXIMUM_NODES)) { + LOG_TRACE_ARGS + ("Got master req from node (%u) for " + "lockid %u.%u and seq %u.%u currmaster is (%u)\n", + NodeAskingVote, + HI (PublishToVote->dir_ent), + LO (PublishToVote->dir_ent), + HI (PublishToVote->seq_num), + LO (PublishToVote->seq_num), + lockres->master_node_num); + + /* + ** We have no business voting on this lock + */ + PVoteSector->vote[NodeAskingVote] = + FLAG_VOTE_UPDATE_RETRY; + PVoteSector->open_handle = false; + } else { + /* + ** Master Node is dead and a vote is needed to create a new master + */ + PVoteSector->open_handle = false; + + LOG_TRACE_ARGS + ("Got master req from node (%u) for lockid %u.%u " + "and seq %u.%u current master is dead\n", + NodeAskingVote, + HI (PublishToVote->dir_ent), + LO (PublishToVote->dir_ent), + HI (PublishToVote->seq_num), + LO (PublishToVote->seq_num)); + + PVoteSector->vote[NodeAskingVote] = + FLAG_VOTE_NODE; + + if ((!(Flags & FLAG_DIR)) && + ((Flags & FLAG_FILE_EXTEND) + || (Flags & FLAG_FILE_UPDATE))) { + LOG_TRACE_ARGS + ("EXTEND/UPDATE: master_node_num = %u\n", + lockres->master_node_num); + if (lockres->oin) { + PVoteSector->open_handle = true; + } + } + } + } + } else { + /* Vote for the node */ + + LOG_TRACE_ARGS + ("Got master req from node (%u) for lockid %u.%u " + "and seq %u.%u no current master\n", NodeAskingVote, + HI (PublishToVote->dir_ent), LO (PublishToVote->dir_ent), + HI (PublishToVote->seq_num), LO (PublishToVote->seq_num)); + + PVoteSector->vote[NodeAskingVote] = FLAG_VOTE_NODE; + PVoteSector->open_handle = false; + + if ((!(Flags & FLAG_DIR)) && + ((Flags & FLAG_FILE_EXTEND) || (Flags & FLAG_FILE_UPDATE))) + { + LOG_TRACE_STR ("EXTEND/UPDATE: no master"); + if (lockres->oin) { + PVoteSector->open_handle = true; + } + } + } + + finito: + PVoteSector->dir_ent = PublishToVote->dir_ent; + PVoteSector->seq_num = PublishToVote->seq_num; + + LOG_TRACE_ARGS ("Voting for node (%u) for lockid %u.%u and seq %u.%u " + "vote %u openhandle %u\n", NodeAskingVote, + HI (PVoteSector->dir_ent), LO (PVoteSector->dir_ent), + HI (PVoteSector->seq_num), LO (PVoteSector->seq_num), + PVoteSector->vote[NodeAskingVote], + PVoteSector->open_handle); + + if (status >= 0) { + /* Write the vote sector */ + nodeVoteOffset = osb->vol_layout.vote_sect_off + + (osb->node_num * osb->sect_size); + + status = + ocfs_write_disk (osb, PVoteSector, osb->sect_size, nodeVoteOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + } + } + + if (pFileEntry) + ocfs_release_file_entry (pFileEntry); + + ocfs_safefree (PVoteSector); + + LOG_TRACE_ARGS + ("Exited from node (%u) for lockid %u.%u and seq %u.%u\n", + NodeAskingVote, HI (PublishToVote->dir_ent), + LO (PublishToVote->dir_ent), HI (PublishToVote->seq_num), + LO (PublishToVote->seq_num)); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_process_vote */ + +/* + * ocfs_nm_thread() + * + */ +int ocfs_nm_thread (ocfs_super * mount_osb) +{ + int status; +#if !defined(DLM_THREAD_PER_VOLUME) + struct list_head *iterEntry; +#endif + ub1 *allocBuffer = NULL; + ub1 *buffer = NULL; + ub1 *cfgBuffer = NULL; + ocfs_super *osb = NULL; + ocfs_publish *publish; + ocfs_publish *publish_to_vote = NULL; + ub4 length = 0; + ub4 i; + ub4 j; + ub4 highestVoteNode = 0; + ub4 maxAllocLen = 0; + ub8 startOffset = 0; + ub4 num_nodes = 0; + sb4 voteNode = -1; + bool bGlobalResourceAcquired = false; + int ret = 0; + ocfs_node_config_hdr *pNodeCfgHdr = NULL; + ub1 *p; + + LOG_ENTRY (); + + /* For each mounted volume reiterate the time stamp on the publish sector */ +#if defined(DLM_THREAD_PER_VOLUME) + if (!mount_osb) { + LOG_ERROR_STATUS (status = -EFAIL); + goto finally; + } +#endif + +#if !defined(DLM_THREAD_PER_VOLUME) + ocfs_down_sem (&(OcfsGlobalCtxt.res), true); + bGlobalResourceAcquired = true; +#endif + + /* Step thru each osb */ +#if defined(DLM_THREAD_PER_VOLUME) + for (osb = mount_osb, j = 0; j < 1; ++j) +#else + list_for_each (iterEntry, &(OcfsGlobalCtxt.osb_next)) +#endif + { +#if !defined(DLM_THREAD_PER_VOLUME) + if (bGlobalResourceAcquired) { + ocfs_up_sem (&(OcfsGlobalCtxt.res)); + bGlobalResourceAcquired = false; + } + + /* Retrieve the next osb */ + osb = list_entry (iterEntry, ocfs_super, osb_next); + OCFS_ASSERT (osb); +#endif + + /* Ensure that the volume is valid ... */ + if (osb->obj_id.type != OCFS_TYPE_OSB) + continue; + + /* ... and that it is mounted */ + if (osb->osb_flags & OCFS_OSB_FLAGS_BEING_DISMOUNTED) { + /* TODO LINUX We need to remove this volume from this list */ + continue; + } + + if (!time_after (jiffies, osb->hbt)) + continue; + + if (osb->vol_state == VOLUME_MOUNTED) { + if (osb->needs_flush) { + i = 0; + while ((osb->trans_in_progress) && (i < 10)) { + ocfs_sleep (100); /* in ms */ + i++; + } + + if (osb->trans_in_progress == false) { + ocfs_commit_cache (osb, false); + osb->needs_flush = false; + } + } + } + + length = (osb->num_cfg_nodes + 4) * osb->sect_size; + length = OCFS_ALIGN (length, PAGE_SIZE); + + if ((allocBuffer == NULL) || (length > maxAllocLen)) { + ocfs_safefree (allocBuffer); + + allocBuffer = ocfs_malloc (length); + if (allocBuffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + maxAllocLen = length; + } + + cfgBuffer = allocBuffer; + + /* Get the Publish Sector start Offset */ + startOffset = osb->vol_layout.new_cfg_off; + + /* Read disk for Publish Sectors of all nodes */ + status = + ocfs_read_force_disk (osb, cfgBuffer, length, startOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } +// /* If some node is asking us to add it to the config... */ +// /* ocfs_add_to_disk_config */ +// if(osb->bConfigUpdateNeeded) +// { +// ocfs_update_node_config(osb); +// osb->bConfigUpdateNeeded = false; +// } + + /* If another node was added to the config read and update the cfg */ + pNodeCfgHdr = (ocfs_node_config_hdr *) (cfgBuffer + osb->sect_size); + + if ((osb->cfg_seq_num != pNodeCfgHdr->seq_num) || + (osb->num_cfg_nodes != pNodeCfgHdr->num_nodes)) { + status = ocfs_chk_update_config (osb); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + + /* re read the publish based on the new config info... */ + if (length < ((osb->num_cfg_nodes + 4) * osb->sect_size)) { + length = (osb->num_cfg_nodes + 4) * osb->sect_size; + length = OCFS_ALIGN (length, PAGE_SIZE); + + if ((allocBuffer == NULL) || (length > maxAllocLen)) { + ocfs_safefree (allocBuffer); + + allocBuffer = ocfs_malloc (length); + if (allocBuffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + maxAllocLen = length; + cfgBuffer = allocBuffer; + } + + startOffset = osb->vol_layout.new_cfg_off; + + /* Read disk for Publish Sectors of all nodes */ + status = + ocfs_read_force_disk (osb, cfgBuffer, length, + startOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + + buffer = allocBuffer + (4 * osb->sect_size); + + /* Total number of nodes in this volume */ + num_nodes = osb->num_cfg_nodes; + + LOG_TRACE_ARGS ("Number of nodes (%d)\n", num_nodes); + + /* Update the Publish Map */ + ocfs_update_publish_map (osb, buffer, false); + + LOG_TRACE_ARGS ("Updated PublishMap: 0x%08x.0x%08x\n", + HI (osb->publ_map), LO (osb->publ_map)); + + /* Update the timestamp on disk to indicate that it is alive */ + ocfs_nm_heart_beat (osb, HEARTBEAT_METHOD_DISK, 0); + + /* Check for the highest node looking for a vote, if anybody is looking */ + for (i = 0, p = buffer; i < num_nodes; i++, p += osb->sect_size) { + publish = (ocfs_publish *) p; + if ((publish->vote == 0x1) && + (publish->vote_map & (1 << osb->node_num))) { + LOG_TRACE_ARGS + ("Node(%u): Vote=%s Dirty=%s Type=%u\n", i, + publish->vote ? "yes" : "no", + publish->dirty ? "yes" : "no", + publish->vote_type); + + highestVoteNode = i; + + /* Check if the node is alive or not */ + if (IS_NODE_ALIVE + (osb->publ_map, highestVoteNode, + num_nodes)) { + voteNode = highestVoteNode; + publish_to_vote = publish; + } else { + LOG_TRACE_ARGS + ("Calling ocfs_recover_vol for NodeNum (%u)\n", + highestVoteNode); + /* Recovery... */ + status = + ocfs_recover_vol (osb, + highestVoteNode); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + } + } + + if ((voteNode >= 0) && (voteNode != osb->node_num)) { + LOG_TRACE_ARGS ("Highest Node asking for Vote (%d) " + "local nodenum is (%d) node map is 0x%08x.0x%08x\n", + voteNode, osb->node_num, + HI (osb->publ_map), LO (osb->publ_map)); + + publish = (ocfs_publish *) (buffer + + (osb->node_num * + osb->sect_size)); + + if (publish->vote) { + LOG_TRACE_ARGS + ("This node %d wanted to create but was not the " + "highest number node %d\n", osb->node_num, + voteNode); + publish->vote = 0; + } + ocfs_process_vote (osb, publish_to_vote, voteNode); + } + osb->hbt = 50 + jiffies; + } + + finally: + if (bGlobalResourceAcquired) { + ocfs_up_sem (&(OcfsGlobalCtxt.res)); + bGlobalResourceAcquired = false; + } + ocfs_safefree (allocBuffer); + + LOG_EXIT_STATUS (ret); + return ret; +} /* ocfs_nm_thread() */ diff -urNp ocfs/fs/ocfs/Common/ocfsgensysfile.c 2.4.20pre5aa2/fs/ocfs/Common/ocfsgensysfile.c --- ocfs/fs/ocfs/Common/ocfsgensysfile.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/ocfsgensysfile.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,1073 @@ +/* + * ocfsgensysfile.c + * + * Initialize, read, write, etc. system files. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_MISC + +/* + * ocfs_init_system_file() + * + */ +int ocfs_init_system_file (ocfs_super * osb, ub4 FileId) +{ + int status = 0; + ub8 diskOffset = 0; + ocfs_file_entry *fe = NULL; + ub4 length = 0; + char fileName[OCFS_MAX_FILENAME_LENGTH]; + + LOG_ENTRY_ARGS ("(FileId = %u)\n", FileId); + + memset (fileName, 0, sizeof (OCFS_MAX_FILENAME_LENGTH)); + + if ((FileId >= OCFS_FILE_DIR_ALLOC) && + (FileId < (OCFS_FILE_DIR_ALLOC + 32))) { + sprintf (fileName, "%s%d", OCFS_DIR_FILENAME, FileId); + } else if ((FileId >= OCFS_FILE_DIR_ALLOC_BITMAP) && + (FileId < (OCFS_FILE_DIR_ALLOC_BITMAP + 32))) { + sprintf (fileName, "%s%d", OCFS_DIR_BITMAP_FILENAME, FileId); + } else if ((FileId >= OCFS_FILE_FILE_ALLOC) && + (FileId < (OCFS_FILE_FILE_ALLOC + 32))) { + sprintf (fileName, "%s%d", OCFS_FILE_EXTENT_FILENAME, FileId); + } else if ((FileId >= OCFS_FILE_FILE_ALLOC_BITMAP) && + (FileId < (OCFS_FILE_FILE_ALLOC_BITMAP + 32))) { + sprintf (fileName, "%s%d", OCFS_FILE_EXTENT_BITMAP_FILENAME, + FileId); + } else if ((FileId >= LOG_FILE_BASE_ID) + && (FileId < (LOG_FILE_BASE_ID + 32))) { + sprintf (fileName, "%s%d", OCFS_RECOVER_LOG_FILENAME, FileId); + } else if ((FileId >= CLEANUP_FILE_BASE_ID) && + (FileId < (CLEANUP_FILE_BASE_ID + 32))) { + sprintf (fileName, "%s%d", OCFS_CLEANUP_LOG_FILENAME, FileId); + } else if ((FileId >= OCFS_FILE_VOL_META_DATA) && + (FileId < (OCFS_FILE_VOL_META_DATA + 32))) { + sprintf (fileName, "%s", "VolMetaDataFile"); + } else if ((FileId >= OCFS_FILE_VOL_LOG_FILE) && + (FileId < (OCFS_FILE_VOL_LOG_FILE + 32))) { + sprintf (fileName, "%s", "VolMetaDataLogFile"); +#ifdef LOCAL_ALLOC + } else if ((FileId >= OCFS_VOL_BITMAP_FILE) && + (FileId < (OCFS_FILE_VOL_LOG_FILE + 64))) { + sprintf (fileName, "%s", "VolBitMapFile"); +#endif + } else { + sprintf (fileName, "%s", "UKNOWNSysFile"); + } + + diskOffset = (FileId * osb->sect_size) + osb->vol_layout.root_int_off; + + length = osb->sect_size; + + fe = ocfs_allocate_file_entry (); + if (fe == NULL) { + status = -ENOMEM; + goto leave; + } + + /* Set the Flag to use the Local Extents */ + fe->local_ext = true; + fe->granularity = -1; + + strcpy (fe->signature, OCFS_FILE_ENTRY_SIGNATURE); + fe->next_free_ext = 0; + + /* Add a file Name */ + memcpy (fe->filename, fileName, strlen (fileName)); + (fe->filename)[strlen (fileName)] = '\0'; + + /* Set the Valid bit here */ + SET_VALID_BIT (fe->sync_flags); + fe->sync_flags &= ~(OCFS_SYNC_FLAG_CHANGE); + fe->this_sector = diskOffset; + fe->last_ext_ptr = 0; + + status = ocfs_write_disk (osb, (void *) fe, osb->sect_size, diskOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + leave: + if (fe) { + ocfs_release_file_entry (fe); + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_init_system_file */ + +/* + * ocfs_read_system_file() + * + */ +int ocfs_read_system_file (ocfs_super * osb, + ub4 FileId, void *Buffer, ub8 Length, ub8 Offset) +{ + int status = 0; + ocfs_file_entry *fe = NULL; + void *extentBuffer = NULL; + ub4 numExts = 0, i; + ocfs_io_runs *IoRuns = NULL; + ub8 templength; + ub4 *tempBuffer; + ub8 tempOffset = 0; + bool bWriteThru = false; + + LOG_ENTRY_ARGS ("(FileId = %u)\n", FileId); + + if ((FileId == (ub4) (OCFS_FILE_VOL_LOG_FILE + osb->node_num)) || + (FileId == (ub4) (OCFS_FILE_VOL_META_DATA + osb->node_num))) { + bWriteThru = true; + } + + /* Read the File Entry corresponding to File Id */ + status = ocfs_force_get_file_entry (osb, &fe, + (FileId * osb->sect_size) + + osb->vol_layout.root_int_off, + bWriteThru); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + if (!IS_VALID_FILE_ENTRY (fe)) { + LOG_ERROR_STATUS(status = -EINVAL); + goto leave; + } + + status = ocfs_find_extents_of_system_file (osb, Offset, Length, + fe, &extentBuffer, &numExts); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + OCFS_ASSERT (extentBuffer); + + IoRuns = (ocfs_io_runs *) extentBuffer; + tempOffset = 0; + templength = 0; + tempBuffer = Buffer; + + for (i = 0; i < numExts; i++) { + tempBuffer += templength; + /* ?? need to align both the length and buffer and also */ + /* offset ( atleast the starting one) */ + tempOffset = IoRuns[i].disk_off; + templength = IoRuns[i].byte_cnt; + + if (bWriteThru) { + status = + ocfs_read_disk (osb, (void *) tempBuffer, + (ub4) templength, tempOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } else { + status = + ocfs_read_metadata (osb, (void *) tempBuffer, + (ub4) templength, tempOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } + } + + leave: + if (fe) { + ocfs_release_file_entry (fe); + } + ocfs_safefree (extentBuffer); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_read_system_file */ + + +/* + * ocfs_write_system_file() + * + */ +int ocfs_write_system_file (ocfs_super * osb, + ub4 FileId, void *Buffer, ub8 Length, ub8 Offset) +{ + int status = 0; + ocfs_file_entry *fe = NULL; + void *extentBuffer = NULL; + ub4 numExts = 0, i; + ocfs_io_runs *IoRuns = NULL; + ub8 templength; + ub4 *tempBuffer; + ub8 tempOffset = 0; + bool bWriteThru = false; + + LOG_ENTRY_ARGS ("(FileId = %u)\n", FileId); + + if ((FileId == (OCFS_FILE_VOL_LOG_FILE + osb->node_num)) || + (FileId == (OCFS_FILE_VOL_META_DATA + osb->node_num))) { + bWriteThru = true; + } + + /* Read the File Entry corresponding to File Id */ + status = ocfs_force_get_file_entry (osb, &fe, + (FileId * osb->sect_size) + + osb->vol_layout.root_int_off, + bWriteThru); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + if (!IS_VALID_FILE_ENTRY (fe)) { + LOG_ERROR_STATUS(status = -EINVAL); + goto leave; + } + + status = ocfs_find_extents_of_system_file (osb, Offset, Length, + fe, &extentBuffer, &numExts); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + OCFS_ASSERT (extentBuffer); + + IoRuns = (ocfs_io_runs *) extentBuffer; + tempOffset = 0; + templength = 0; + tempBuffer = Buffer; + + for (i = 0; i < numExts; i++) { + tempBuffer += templength; + /* ?? need to align both the length and buffer and also */ + /* offset ( atleast the starting one) */ + tempOffset = IoRuns[i].disk_off; + templength = IoRuns[i].byte_cnt; + /* ?? Also need to read the data from the start of sector */ + /* and then munge it . */ + if (bWriteThru) { + status = + ocfs_write_force_disk (osb, (void *) tempBuffer, + (ub4) templength, tempOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } else { + status = + ocfs_write_metadata (osb, (void *) tempBuffer, + (ub4) templength, tempOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } + } + + leave: + if (fe) { + ocfs_release_file_entry (fe); + } + ocfs_safefree (extentBuffer); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_write_system_file */ + +/* + * ocfs_file_to_disk_off() + * + */ +ub8 ocfs_file_to_disk_off (ocfs_super * osb, ub4 FileId, ub8 Offset) +{ + int status = 0; + ub8 StartOffset = 0; + void *Buffer = NULL; + ocfs_file_entry *fe = NULL; + ocfs_io_runs *IoRuns; + ub4 NumExts = 0; + bool bWriteThru = false; + + LOG_ENTRY_ARGS ("(FileId = %u)\n", FileId); + + if ((FileId == (OCFS_FILE_VOL_LOG_FILE + osb->node_num)) || + (FileId == (OCFS_FILE_VOL_META_DATA + osb->node_num))) { + bWriteThru = true; + } + + /* Read the File Entry corresponding to File Id */ + status = ocfs_force_get_file_entry (osb, &fe, + (FileId * osb->sect_size) + + osb->vol_layout.root_int_off, + bWriteThru); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + if (!IS_VALID_FILE_ENTRY (fe)) { + LOG_ERROR_STATUS(status = -EINVAL); + goto leave; + } + + status = ocfs_find_extents_of_system_file (osb, Offset, osb->sect_size, + fe, &Buffer, &NumExts); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + OCFS_ASSERT (Buffer); + + IoRuns = (ocfs_io_runs *) Buffer; + /* Return the disk offset of first run . */ + StartOffset = (IoRuns[0].disk_off); + + leave: + if (fe) { + ocfs_release_file_entry (fe); + } + ocfs_safefree (Buffer); + + LOG_EXIT_ARGS ("%u.%u\n", HI (StartOffset), LO (StartOffset)); + return StartOffset; +} /* ocfs_file_to_disk_off */ + + +/* + * ocfs_get_system_file_size() + * + */ +int ocfs_get_system_file_size (ocfs_super * osb, ub4 FileId, ub8 * Length, ub8 * AllocSize) +{ + int status = 0; + ocfs_file_entry *fe = NULL; + bool bWriteThru = false; + + LOG_ENTRY_ARGS ("(FileId = %u)\n", FileId); + + if ((FileId == (OCFS_FILE_VOL_LOG_FILE + osb->node_num)) || + (FileId == (OCFS_FILE_VOL_META_DATA + osb->node_num))) { + bWriteThru = true; + } + *AllocSize = *Length = 0; + + status = ocfs_force_get_file_entry (osb, &fe, + (FileId * osb->sect_size) + + osb->vol_layout.root_int_off, + bWriteThru); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + if (!IS_VALID_FILE_ENTRY (fe)) { + LOG_ERROR_STATUS(status = -EINVAL); + goto leave; + } + + *Length = (ub8) (fe->file_size); + *AllocSize = (ub8) (fe->alloc_size); + + leave: + if (fe) { + ocfs_release_file_entry (fe); + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_get_system_file_size */ + +#ifdef __KERNEL__ +/* + * ocfs_extend_system_file() + * + */ +int ocfs_extend_system_file (ocfs_super * osb, ub4 FileId, ub8 FileSize) +{ + int status = 0; + ub8 actualDiskOffset = 0, actualLength = 0; + ocfs_file_entry *fe = NULL; + bool bWriteThru = false; + + LOG_ENTRY_ARGS ("(FileId = %u, Size = %u.%u)\n", FileId, HI (FileSize), + LO (FileSize)); + + if ((FileId == (OCFS_FILE_VOL_LOG_FILE + osb->node_num)) || + (FileId == (OCFS_FILE_VOL_META_DATA + osb->node_num))) { + bWriteThru = true; + } + OCFS_ASSERT (osb); + + status = ocfs_force_get_file_entry (osb, &fe, + (FileId * osb->sect_size) + + osb->vol_layout.root_int_off, + bWriteThru); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + if (!IS_VALID_FILE_ENTRY (fe)) { + LOG_ERROR_STATUS (status = -EINVAL); + goto leave; + } + + if (FileSize <= fe->alloc_size) { + fe->file_size = FileSize; + } else { + /* We need to allocate from bitmap */ + ub8 numClusterAlloc = 0, BitmapOffset = 0; + + status = + ocfs_find_contiguous_space_from_bitmap (osb, + FileSize - fe->alloc_size, + &BitmapOffset, + &numClusterAlloc, true); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + actualDiskOffset = + (BitmapOffset * osb->vol_layout.cluster_size) + + osb->vol_layout.data_start_off; + actualLength = + (ub8) (numClusterAlloc * osb->vol_layout.cluster_size); + + status = ocfs_allocate_extent (osb, NULL, fe, actualDiskOffset, + actualLength); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + fe->alloc_size += actualLength; + fe->file_size = FileSize; + } + + if (!bWriteThru) { + DISK_LOCK_CURRENT_MASTER (fe) = osb->node_num; + DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_ENABLE_CACHE_LOCK; + } + status = ocfs_force_put_file_entry (osb, fe, bWriteThru); + + leave: + if (fe) { + ocfs_release_file_entry (fe); + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_extend_system_file */ + +#endif /* __KERNEL__ */ + +/* + * ocfs_find_extents_of_system_file() + * + * Searches for the extents in the file entry passed starting from + * file offset up to the length specified. + */ +int ocfs_find_extents_of_system_file (ocfs_super * osb, + ub8 file_off, + ub8 Length, + ocfs_file_entry * fe, void **Buffer, ub4 * NumEntries) +{ + int status; + ub4 allocSize = 0, size; + ub1 *buffer = NULL; + ub4 k = 0, j; + ub4 Runs, Runoffset; + ub4 length; + ocfs_extent_group *pOcfsExtent = NULL, *pOcfsExtentHeader = NULL; + ocfs_io_runs *IoRuns; + ub8 newOffset = 0, searchVbo, remainingLength = 0; + + LOG_ENTRY (); + + OCFS_ASSERT (osb); + + if (!IS_VALID_FILE_ENTRY (fe)) { + LOG_ERROR_STATUS(status = -EFAIL); + goto leave; + } + + /* ??? need to allocate accoordingly ...as number of runs can be more */ + size = (OCFS_MAX_DATA_EXTENTS * sizeof (ocfs_io_runs)); + size = OCFS_ALIGN (size, PAGE_SIZE); + + IoRuns = ocfs_malloc (size); + if (IoRuns == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto leave; + } + + memset (IoRuns, 0, size); + + remainingLength = Length; + Runs = 1; + Runoffset = 0; + newOffset = file_off; + + status = -EFAIL; + if (fe->local_ext) { + for (j = 0; j < OCFS_MAX_FILE_ENTRY_EXTENTS; j++) { + if ((fe->extents[j].file_off + + fe->extents[j].num_clusters) > newOffset) { + IoRuns[Runoffset].disk_off = + fe->extents[j].disk_off + + (newOffset - fe->extents[j].file_off); + IoRuns[Runoffset].byte_cnt = + (ub4) ((fe->extents[j].file_off + + fe->extents[j].num_clusters) - + newOffset); + if (IoRuns[Runoffset].byte_cnt >= + remainingLength) { + IoRuns[Runoffset].byte_cnt = + (ub4) remainingLength; + status = 0; + break; + } else { + newOffset += IoRuns[Runoffset].byte_cnt; + remainingLength -= + IoRuns[Runoffset].byte_cnt; + Runs++; + Runoffset++; + } + } + } + + *NumEntries = Runs; + *Buffer = IoRuns; + goto leave; + } else { + /* Extents are branched and we are no longer using Local Extents */ + /* for this File Entry. */ + + allocSize = (NUM_SECTORS_IN_LEAF_NODE + fe->granularity) * + OCFS_SECTOR_SIZE; + + length = (ub4) OCFS_ALIGN (allocSize, PAGE_SIZE); + + buffer = ocfs_malloc (length); + if (buffer == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto leave; + } + + while (1) { + /* Keep going downwards looking for the Entry, till we hit */ + /* the last Data entry */ + for (k = 0; k < OCFS_MAX_FILE_ENTRY_EXTENTS; k++) { + if ((sb8) (fe->extents[k].file_off + + fe->extents[k].num_clusters) > + newOffset) { + break; + } + } + + if (k == OCFS_MAX_FILE_ENTRY_EXTENTS) { + OCFS_BREAKPOINT (); + } + + memset (buffer, 0, length); + + if (fe->extents[k].disk_off == 0) { + OCFS_BREAKPOINT (); + } + + status = + ocfs_read_metadata (osb, (void *) buffer, allocSize, + fe->extents[k].disk_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + pOcfsExtent = (ocfs_extent_group *) buffer; + + while (pOcfsExtent->type != OCFS_EXTENT_DATA) { + ub8 diskoffset; + if (!IS_VALID_EXTENT_HEADER (pOcfsExtent)) { + LOG_ERROR_STATUS(status = -EFAIL); + goto leave; + } + + OCFS_GET_EXTENT ((sb8) newOffset, pOcfsExtent, + k); + if (k == OCFS_MAX_DATA_EXTENTS) { + OCFS_BREAKPOINT (); + } + + if (pOcfsExtent->extents[k].disk_off == 0) { + OCFS_BREAKPOINT (); + } + diskoffset = pOcfsExtent->extents[k].disk_off; + + memset (buffer, 0, length); + + status = + ocfs_read_metadata (osb, (void *) buffer, + allocSize, + diskoffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + pOcfsExtent = (ocfs_extent_group *) buffer; + } + + pOcfsExtentHeader = (ocfs_extent_group *) buffer; + + searchVbo = newOffset; + + OCFS_ASSERT (pOcfsExtentHeader->type == + OCFS_EXTENT_DATA); + + if (!IS_VALID_EXTENT_DATA (pOcfsExtentHeader)) { + LOG_ERROR_STATUS(status = -EFAIL); + goto leave; + } + + { + for (j = 0; j < OCFS_MAX_DATA_EXTENTS; j++) { + if ((pOcfsExtent->extents[j].file_off + + pOcfsExtent->extents[j]. + num_clusters) > newOffset) { + IoRuns[Runoffset].disk_off = + pOcfsExtent->extents[j]. + disk_off + (newOffset - + pOcfsExtent-> + extents[j]. + file_off); + IoRuns[Runoffset].byte_cnt = + (ub4) ((pOcfsExtent-> + extents[j]. + file_off + + pOcfsExtent-> + extents[j]. + num_clusters) - + newOffset); + + if (IoRuns[Runoffset]. + byte_cnt >= + remainingLength) { + IoRuns[Runoffset]. + byte_cnt = (ub4) + remainingLength; + status = 0; + break; + } else { + newOffset += + IoRuns[Runoffset]. + byte_cnt; + remainingLength -= + IoRuns[Runoffset]. + byte_cnt; + Runs++; + Runoffset++; + if (Runs >= + OCFS_MAX_DATA_EXTENTS) + { + LOG_ERROR_STR + ("No of Runs are more"); + } + } + } + } + + if (j == OCFS_MAX_DATA_EXTENTS) { + continue; + } else { + *NumEntries = Runs; + *Buffer = IoRuns; + goto leave; + } + } + } + } + + leave: + /* Don't free the IoRuns Memory here */ + ocfs_safefree (buffer); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_find_extents_of_system_file */ + +/* + * ocfs_free_file_extents() + * + */ +int ocfs_free_file_extents (ocfs_super * osb, ocfs_file_entry * fe, sb4 LogNodeNum) +{ + int status = 0; + ub4 i, size, numUpdt = 0; + ub4 numBitsAllocated = 0, bitmapOffset = 0; + ocfs_cleanup_record *pCleanupLogRec = NULL; + ocfs_extent_group *PAllocExtent = NULL; + + LOG_ENTRY (); + + size = sizeof (ocfs_cleanup_record); + size = (ub4) OCFS_ALIGN (size, PAGE_SIZE); + + pCleanupLogRec = ocfs_malloc (size); + if (pCleanupLogRec == NULL) { + status = -ENOMEM; + goto leave; + } + + pCleanupLogRec->rec.free.num_free_upds = 0; + pCleanupLogRec->log_id = osb->curr_trans_id; + pCleanupLogRec->log_type = LOG_FREE_BITMAP; + + if (fe->local_ext) { + for (i = 0; i < fe->next_free_ext; i++) { + numBitsAllocated = (ub4) (fe->extents[i].num_clusters / + (osb->vol_layout. + cluster_size)); + + bitmapOffset = + (ub4) ((fe->extents[i].disk_off - + osb->vol_layout.data_start_off) / + (osb->vol_layout.cluster_size)); + + numUpdt = pCleanupLogRec->rec.free.num_free_upds; + + pCleanupLogRec->rec.free.free_bitmap[numUpdt].length = + numBitsAllocated; + pCleanupLogRec->rec.free.free_bitmap[numUpdt].file_off = + bitmapOffset; + pCleanupLogRec->rec.free.free_bitmap[numUpdt].type = + DISK_ALLOC_VOLUME; + pCleanupLogRec->rec.free.free_bitmap[numUpdt].node_num = + -1; + + (pCleanupLogRec->rec.free.num_free_upds)++; + } + } else { + ub8 diskOffset = 0; + + size = OCFS_ALIGN (sizeof (ocfs_extent_group), PAGE_SIZE); + + PAllocExtent = ocfs_malloc (size); + if (PAllocExtent == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto leave; + } + + memset (PAllocExtent, 0, size); + + diskOffset = fe->last_ext_ptr; + + while (diskOffset != 0) { + status = + ocfs_read_metadata (osb, PAllocExtent, + (ub4) osb->sect_size, diskOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + if ((PAllocExtent->type != OCFS_EXTENT_DATA) && + (PAllocExtent->type != OCFS_EXTENT_HEADER)) { + LOG_ERROR_STATUS(status = -EFAIL); + goto leave; + } + + if (PAllocExtent->type == OCFS_EXTENT_DATA) { + + if (!IS_VALID_EXTENT_DATA (PAllocExtent)) { + LOG_ERROR_STATUS(status = -EFAIL); + goto leave; + } + + for (i = 0; i < PAllocExtent->next_free_ext; + i++) { + numBitsAllocated = + (ub4) (PAllocExtent->extents[i]. + num_clusters / + (osb->vol_layout. + cluster_size)); + bitmapOffset = + (ub4) ((PAllocExtent->extents[i]. + disk_off - + osb->vol_layout. + data_start_off) / + (osb->vol_layout. + cluster_size)); + + numUpdt = + pCleanupLogRec->rec.free. + num_free_upds; + if (numUpdt >= FREE_LOG_SIZE) { + status = ocfs_write_node_log (osb, + (ocfs_log_record + *) + pCleanupLogRec, + LogNodeNum, + LOG_CLEANUP); + if (status < 0) { + LOG_ERROR_STATUS + (status); + goto leave; + } + numUpdt = + pCleanupLogRec->rec.free. + num_free_upds = 0; + } + + pCleanupLogRec->rec.free. + free_bitmap[numUpdt].length = + numBitsAllocated; + pCleanupLogRec->rec.free. + free_bitmap[numUpdt].file_off = + bitmapOffset; + pCleanupLogRec->rec.free. + free_bitmap[numUpdt].type = + DISK_ALLOC_VOLUME; + pCleanupLogRec->rec.free. + free_bitmap[numUpdt].node_num = -1; + (pCleanupLogRec->rec.free. + num_free_upds)++; + } + + if (numUpdt >= FREE_LOG_SIZE) { + status = ocfs_write_node_log (osb, + (ocfs_log_record + *) + pCleanupLogRec, + LogNodeNum, + LOG_CLEANUP); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + numUpdt = + pCleanupLogRec->rec.free. + num_free_upds = 0; + } + + pCleanupLogRec->rec.free.free_bitmap[numUpdt]. + length = 1; + pCleanupLogRec->rec.free.free_bitmap[numUpdt]. + file_off = PAllocExtent->alloc_file_off; + pCleanupLogRec->rec.free.free_bitmap[numUpdt]. + type = DISK_ALLOC_EXTENT_NODE; + pCleanupLogRec->rec.free.free_bitmap[numUpdt]. + node_num = PAllocExtent->alloc_node; + (pCleanupLogRec->rec.free.num_free_upds)++; + + if (PAllocExtent->alloc_file_off == 0) { + LOG_ERROR_ARGS + ("File offset was 0 for file %s " + "type DISK_ALLOC_EXTENT_NODE blk 200\n", + fe->filename); + } + } else { + OCFS_ASSERT (PAllocExtent->type == + OCFS_EXTENT_HEADER); + + if (!IS_VALID_EXTENT_HEADER (PAllocExtent)) { + LOG_ERROR_STATUS(status = -EFAIL); + goto leave; + } + + numUpdt = + pCleanupLogRec->rec.free.num_free_upds; + if (numUpdt >= FREE_LOG_SIZE) { + status = ocfs_write_node_log (osb, + (ocfs_log_record + *) + pCleanupLogRec, + LogNodeNum, + LOG_CLEANUP); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + numUpdt = + pCleanupLogRec->rec.free. + num_free_upds = 0; + } + + pCleanupLogRec->rec.free.free_bitmap[numUpdt]. + length = 1; + + if (PAllocExtent->alloc_file_off == 0) { + OCFS_BREAKPOINT (); + } + + pCleanupLogRec->rec.free.free_bitmap[numUpdt]. + file_off = PAllocExtent->alloc_file_off; + pCleanupLogRec->rec.free.free_bitmap[numUpdt]. + type = DISK_ALLOC_EXTENT_NODE; + pCleanupLogRec->rec.free.free_bitmap[numUpdt]. + node_num = PAllocExtent->alloc_node; + (pCleanupLogRec->rec.free.num_free_upds)++; + } + + diskOffset = PAllocExtent->last_ext_ptr; + } + } + + /* Write the log */ + if (pCleanupLogRec->rec.free.num_free_upds > 0) { + status = + ocfs_write_node_log (osb, (ocfs_log_record *) pCleanupLogRec, + LogNodeNum, LOG_CLEANUP); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + } + + leave: + ocfs_safefree (PAllocExtent); + ocfs_safefree (pCleanupLogRec); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_free_file_extents */ + +#if !defined(DEBUGOCFS) +/* + * ocfs_write_map_file() + * + */ +int ocfs_write_map_file (ocfs_super * osb) +{ + int status; + ub4 RunsInMap; + ub4 MapIndex; + ub4 length; + ocfs_offset_map *pMapBuffer = NULL; + ub8 fileSize; + ub8 allocSize; + ub8 neededSize; + sb8 foundVolOffset; + sb8 foundlogOffset; + ub4 numRec; + ocfs_file_entry *fe = NULL; + + LOG_ENTRY (); + + RunsInMap = ocfs_extent_map_get_count (&osb->trans_map); + + LOG_TRACE_ARGS ("NumRuns in trans_map (%u)\n", RunsInMap); + + if (RunsInMap == 0) { + status = -EFAIL; + goto leave; + } + + neededSize = + OCFS_ALIGN ((RunsInMap * sizeof (ocfs_offset_map)), PAGE_SIZE); + + status = ocfs_get_system_file_size (osb, + (OCFS_FILE_VOL_META_DATA + osb->node_num), + &fileSize, &allocSize); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + if (allocSize < neededSize) { + LOG_TRACE_STR ("allocSize < neededSize"); + ocfs_extend_system_file (osb, + (OCFS_FILE_VOL_META_DATA + osb->node_num), + neededSize); + } + + pMapBuffer = ocfs_malloc (neededSize); + if (pMapBuffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + numRec = 0; + for (MapIndex = 0; MapIndex < RunsInMap; MapIndex++) { + if (!ocfs_get_next_extent_map_entry (osb, &osb->trans_map, MapIndex, + &foundVolOffset, + &foundlogOffset, &length)) { + /* It means this is a hole */ + continue; + } + + pMapBuffer[numRec].length = length; + pMapBuffer[numRec].actual_disk_off = foundVolOffset; + pMapBuffer[numRec].log_disk_off = foundlogOffset; + numRec++; + } + + status = ocfs_force_get_file_entry (osb, &fe, + ((OCFS_FILE_VOL_META_DATA + + osb->node_num) * osb->sect_size) + + osb->vol_layout.root_int_off, true); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + if (!IS_VALID_FILE_ENTRY (fe)) { + LOG_ERROR_STR ("Invalid File Entry"); + status = -EINVAL; + goto leave; + } + + ocfs_write_force_disk (osb, (void *) pMapBuffer, neededSize, + osb->log_meta_disk_off); + + fe->file_size = (numRec * sizeof (ocfs_offset_map)); + + status = ocfs_force_put_file_entry (osb, fe, true); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + leave: + if (fe) + ocfs_release_file_entry (fe); + + ocfs_safefree (pMapBuffer); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_write_map_file */ + +#endif /* ! DEBUGOCFS */ diff -urNp ocfs/fs/ocfs/Common/ocfsgentrans.c 2.4.20pre5aa2/fs/ocfs/Common/ocfsgentrans.c --- ocfs/fs/ocfs/Common/ocfsgentrans.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/ocfsgentrans.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,1201 @@ +/* + * ocfsgentrans.c + * + * Logging and recovery for file system structures. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_TRANS + +/* + * ocfs_free_disk_bitmap() + * + * + * + * called by: ocfs_process_record() + */ +int ocfs_free_disk_bitmap (ocfs_super * osb, ocfs_cleanup_record * log_rec) +{ + int status = 0; + ub4 num_upd; + ub4 i; + ub4 node_num; + ocfs_free_log *free_dir_node[OCFS_MAXIMUM_NODES]; + ocfs_free_log *free_ext_node[OCFS_MAXIMUM_NODES]; + ocfs_free_log *free_vol_bits; + ocfs_lock_res *dirnode_lockres[OCFS_MAXIMUM_NODES]; + ocfs_lock_res *extnode_lockres[OCFS_MAXIMUM_NODES]; + ocfs_lock_res *vol_lockres; + ocfs_free_log *tmp_log; + ocfs_free_log *free_log; + ub4 tmp_indx; + ub8 lock_id; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", osb, log_rec); + + /* init */ + free_vol_bits = NULL; + for (i = 0; i < OCFS_MAXIMUM_NODES; i++) { + free_dir_node[i] = NULL; + free_ext_node[i] = NULL; + } + + free_log = &(log_rec->rec.free); + + /* alloc memory */ + num_upd = free_log->num_free_upds; + for (i = 0; i < num_upd; i++) { + switch (free_log->free_bitmap[i].type) { + case DISK_ALLOC_DIR_NODE: + node_num = free_log->free_bitmap[i].node_num; + if (free_dir_node[node_num] == NULL) { + free_dir_node[node_num] = + ocfs_malloc (sizeof (ocfs_free_log)); + if (free_dir_node[node_num] == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + free_dir_node[node_num]->num_free_upds = 0; + } + tmp_log = free_dir_node[node_num]; + break; + + case DISK_ALLOC_EXTENT_NODE: + node_num = free_log->free_bitmap[i].node_num; + if (free_ext_node[node_num] == NULL) { + free_ext_node[node_num] = + ocfs_malloc (sizeof (ocfs_free_log)); + if (free_ext_node[node_num] == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + free_ext_node[node_num]->num_free_upds = 0; + } + tmp_log = free_ext_node[node_num]; + break; + + case DISK_ALLOC_VOLUME: + if (free_vol_bits == NULL) { + free_vol_bits = + ocfs_malloc (sizeof (ocfs_free_log)); + if (free_vol_bits == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + free_vol_bits->num_free_upds = 0; + } + tmp_log = free_vol_bits; + break; + + default: + tmp_log = NULL; + break; + } + + if (tmp_log) { + ocfs_free_bitmap *fb1, *fb2; + + tmp_indx = tmp_log->num_free_upds; + + fb1 = &(tmp_log->free_bitmap[tmp_indx]); + fb2 = &(free_log->free_bitmap[i]); + + fb1->length = fb2->length; + fb1->file_off = fb2->file_off; + fb1->type = fb2->type; + fb1->node_num = fb2->node_num; + + tmp_log->num_free_upds++; + } + } + + /* Get all locks */ + if (free_vol_bits != NULL) { + lock_id = OCFS_BITMAP_LOCK_OFFSET; + status = ocfs_acquire_lock (osb, lock_id, + OCFS_DLM_EXCLUSIVE_LOCK, + FLAG_FILE_CREATE, &vol_lockres, NULL); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + + lock_id = (OCFS_FILE_DIR_ALLOC_BITMAP * osb->sect_size) + + osb->vol_layout.root_int_off; + for (i = 0; i < OCFS_MAXIMUM_NODES; i++, lock_id += osb->sect_size) { + if (free_dir_node[i] != NULL) { + status = ocfs_acquire_lock (osb, lock_id, + OCFS_DLM_EXCLUSIVE_LOCK, + FLAG_FILE_CREATE, + &(dirnode_lockres[i]), NULL); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + } + + lock_id = (OCFS_FILE_FILE_ALLOC_BITMAP * osb->sect_size) + + osb->vol_layout.root_int_off; + for (i = 0; i < OCFS_MAXIMUM_NODES; i++, lock_id += osb->sect_size) { + if (free_ext_node[i] != NULL) { + status = ocfs_acquire_lock (osb, lock_id, + OCFS_DLM_EXCLUSIVE_LOCK, + FLAG_FILE_CREATE, + &(extnode_lockres[i]), NULL); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + } + + /* free vol block */ + if (free_vol_bits != NULL) + ocfs_free_vol_block (osb, free_vol_bits, -1, DISK_ALLOC_VOLUME); + + /* We can potentiallly loose some allocation for dirNodes or extent */ + /* nodes but they should not be much... */ + for (i = 0; i < OCFS_MAXIMUM_NODES; i++) { + if (free_dir_node[i] != NULL) + ocfs_free_vol_block (osb, free_dir_node[i], i, + DISK_ALLOC_DIR_NODE); + + if (free_ext_node[i] != NULL) + ocfs_free_vol_block (osb, free_ext_node[i], i, + DISK_ALLOC_EXTENT_NODE); + } + + /* release all locks */ + if (free_vol_bits != NULL) { + lock_id = OCFS_BITMAP_LOCK_OFFSET; + status = ocfs_release_lock (osb, lock_id, + OCFS_DLM_EXCLUSIVE_LOCK, + FLAG_FILE_CREATE, vol_lockres); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + + lock_id = (OCFS_FILE_DIR_ALLOC_BITMAP * osb->sect_size) + + osb->vol_layout.root_int_off; + for (i = 0; i < OCFS_MAXIMUM_NODES; i++, lock_id += osb->sect_size) { + if (free_dir_node[i] != NULL) { + status = ocfs_release_lock (osb, lock_id, + OCFS_DLM_EXCLUSIVE_LOCK, + FLAG_FILE_CREATE, + dirnode_lockres[i]); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + } + + lock_id = (OCFS_FILE_FILE_ALLOC_BITMAP * osb->sect_size) + + osb->vol_layout.root_int_off; + for (i = 0; i < OCFS_MAXIMUM_NODES; i++, lock_id += osb->sect_size) { + if (free_ext_node[i] != NULL) { + status = ocfs_release_lock (osb, lock_id, + OCFS_DLM_EXCLUSIVE_LOCK, + FLAG_FILE_CREATE, + extnode_lockres[i]); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + + } + + finally: + for (i = 0; i < OCFS_MAXIMUM_NODES; i++) { + ocfs_safefree (free_dir_node[i]); + ocfs_safefree (free_ext_node[i]); + } + + ocfs_safefree (free_vol_bits); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_free_disk_bitmap */ + +/* + * ocfs_process_record() + * + * + * called by: ocfs_process_log() + */ +int ocfs_process_record (ocfs_super * osb, void *buffer) +{ + int status = 0; + ocfs_log_record *log_rec; + ocfs_cleanup_record *clean_rec; + ocfs_file_entry *fe = NULL; + ocfs_dir_node *lock_node = NULL; + ub1 *read_buf = NULL; + ub4 node_num; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", osb, buffer); + + log_rec = (ocfs_log_record *) buffer; + clean_rec = (ocfs_cleanup_record *) buffer; + + switch (log_rec->log_type) { + case LOG_TYPE_DISK_ALLOC: + { + switch (log_rec->rec.alloc.type) { + case DISK_ALLOC_DIR_NODE: + case DISK_ALLOC_EXTENT_NODE: + status = ocfs_free_node_block (osb, + log_rec->rec.alloc.file_off, + log_rec->rec.alloc.length, + log_rec->rec.alloc.node_num, + log_rec->rec.alloc.type); + break; + default: + break; + } + } + break; + + case LOG_DELETE_NEW_ENTRY: + status = ocfs_get_file_entry (osb, &fe, + log_rec->rec.del.ent_del); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + status = ocfs_get_file_entry (osb, + (ocfs_file_entry **) (&lock_node), + log_rec->rec.del.parent_dirnode_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + node_num = log_rec->rec.del.node_num; + + /* + ** Lock on directory shd be held by the node which either + ** died or this node... + */ + status = ocfs_del_file_entry (osb, fe, lock_node); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + break; + + case LOG_DELETE_ENTRY: + /* + ** Delete the entry from the dir node it was associated + ** with. Now it can be reused. + */ + status = ocfs_get_file_entry (osb, &fe, + clean_rec->rec.del.ent_del); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + status = ocfs_get_file_entry (osb, + (ocfs_file_entry **) (&lock_node), + clean_rec->rec.del.parent_dirnode_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + node_num = clean_rec->rec.del.node_num; + + /* + ** Lock on directory shd be held by the node which + ** either died or this node... + */ + status = ocfs_del_file_entry (osb, fe, lock_node); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + break; + + case LOG_MARK_DELETE_ENTRY: + status = ocfs_get_file_entry (osb, &fe, + log_rec->rec.del.ent_del); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + if (log_rec->rec.del.flags & FLAG_RESET_VALID) { + OCFS_SET_FLAG (fe->sync_flags, OCFS_SYNC_FLAG_VALID); + status = ocfs_write_file_entry (osb, fe, + log_rec->rec.del.ent_del); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* We are done... */ + status = 0; + goto finally; + } + + /* + ** Read in the entry to be deleted. We are doing + ** recovery on another node? + ** What if we were in abort trans for this node??? + */ + node_num = log_rec->rec.del.node_num; + + /* This is recovery for a dead node */ + if (fe->sync_flags & OCFS_SYNC_FLAG_VALID) { + /* No recovery needed for the entry, let it stay */ + status = 0; + goto finally; + } else { + status = ocfs_delete_file_entry (osb, fe, + log_rec->rec.del.parent_dirnode_off, + node_num); + goto finally; + } + break; + + case LOG_FREE_BITMAP: + status = ocfs_free_disk_bitmap (osb, buffer); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + break; + + case LOG_UPDATE_EXTENT: + { + ub4 index; + ocfs_extent_group *alloc_ext; + ub8 disk_off = 0; + + /* + ** Make sure we have the file lock here + */ + read_buf = ocfs_malloc (osb->sect_size); + if (read_buf == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + disk_off = log_rec->rec.extent.disk_off; + + status = ocfs_read_disk (osb, read_buf, + (ub4) osb->sect_size, disk_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + alloc_ext = (ocfs_extent_group *) read_buf; + + index = log_rec->rec.extent.index; + + alloc_ext->extents[index].file_off = 0; + alloc_ext->extents[index].num_clusters = 0; + alloc_ext->extents[index].disk_off = 0; + + disk_off = log_rec->rec.extent.disk_off; + + status = ocfs_write_disk (osb, read_buf, + (ub4) osb->sect_size, disk_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + break; + + case LOG_TYPE_DIR_NODE: + status = ocfs_recover_dir_node (osb, + log_rec->rec.dir.orig_off, + log_rec->rec.dir.saved_off); + if (status < 0) { + /* + ** Bad one. We should disable this volume and try + ** and let somebody else do the recovery... + */ + } + break; + + case LOG_TYPE_RECOVERY: + /* + ** This node was recovering another node and died. + ** All locks in the function need to be recursive... + */ + node_num = osb->node_recovering; + + status = ocfs_recover_vol (osb, + log_rec->rec.recovery.node_num); + if (status < 0) { + /* + ** Bad one. We should disable this volume and try + ** and let somebody else do the recovery... + */ + } + osb->node_recovering = node_num; + break; + + case LOG_TYPE_TRANS_START: + /* We are back to the record till which we needed to */ + /* roll back. Check to ensure the file size for recovery */ + /* log is 1 rec long */ + status = 0; + break; + + case LOG_CLEANUP_LOCK: + { + ub4 num_upd; + ub4 i; + ub8 lock_id; + ocfs_lock_res *lock_res; + ocfs_lock_res *lock_res_array[LOCK_UPDATE_LOG_SIZE]; + + num_upd = clean_rec->rec.lock.num_lock_upds; + + for (i = 0; i < num_upd; i++) { + lock_id = clean_rec->rec.lock.lock_upd[i].orig_off; + lock_res_array[i] = NULL; + + status = ocfs_lookup_sector_node (osb, lock_id, + &lock_res); + if (status >= 0) { + /* + ** We should not be doing anything with + ** this resource. Can lock the resource + ** but then we need to look at the + ** locking hierarchy between hash and + ** the resource mutex + */ + + if (lock_res->signature != 0x55AA) { + LOG_ERROR_STATUS(status = -EFAIL); + goto finally; + } + + ocfs_hash_del (&(osb->root_sect_node), + &(lock_res->sector_num), + sizeof (ub8)); + lock_res->sector_num = + clean_rec->rec.lock.lock_upd[i].new_off; + lock_res_array[i] = lock_res; + } else { + /* We don't have the resource so don't */ + /* bother with it */ + } + } + + for (i = 0; i < num_upd; i++) { + lock_res = lock_res_array[i]; + if (lock_res) { + /* Reinsert with new ID */ + ocfs_insert_sector_node (osb, lock_res); + } + } + } + break; + + default: + break; + } + + finally: + if (fe) + ocfs_release_file_entry (fe); + + if (lock_node) + ocfs_release_file_entry ((ocfs_file_entry *) lock_node); + + ocfs_safefree (read_buf); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_process_record */ + +/* + * ocfs_process_log() + * + * + * called by: ocfs_commit_trans(), ocfs_abort_trans(), ocfs_recover_vol() + */ +int ocfs_process_log (ocfs_super * osb, ub8 trans_id, ub4 node_num, ub4 * type) +{ + int status = 0; + ub8 file_size; + ub8 offset; + ub8 alloc_size; + ub4 log_type; + ub4 log_rec_size; + ub4 size; + ub4 log_file_id; + ocfs_log_record *log_rec = NULL; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, 0x%08x)\n", osb, HI (trans_id), + LO (trans_id), node_num, type); + + log_type = *type; + + if (log_type == LOG_RECOVER) { + log_file_id = (LOG_FILE_BASE_ID + node_num); + log_rec_size = osb->sect_size; + } else if (log_type == LOG_CLEANUP) { + log_file_id = (CLEANUP_FILE_BASE_ID + node_num); + log_rec_size = sizeof (ocfs_cleanup_record); + log_rec_size = (ub4) OCFS_ALIGN (log_rec_size, osb->sect_size); + } else { + /* Shouldn't be happening */ + goto finally; + } + + size = log_rec_size; + size = (ub4) OCFS_ALIGN (size, PAGE_SIZE); + + if ((log_rec = ocfs_malloc (size)) == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + status = ocfs_get_system_file_size (osb, log_file_id, &file_size, &alloc_size); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + if (file_size <= 0) { + if (log_type == LOG_RECOVER) + *type = LOG_CLEANUP; + goto finally; + } else { + if (log_type == LOG_RECOVER) { + /* + ** This helps in bdcast recovery by having other nodes just set + ** the event and not process cleanup log + */ + status = ocfs_extend_system_file (osb, + (CLEANUP_FILE_BASE_ID + node_num), 0); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + } + + while (file_size != 0) { + /* Recover from the log file */ + /* Read in the last record */ + offset = file_size - log_rec_size; + status = ocfs_read_system_file (osb, log_file_id, log_rec, + log_rec_size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + status = ocfs_process_record (osb, log_rec); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + status = ocfs_extend_system_file (osb, log_file_id, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + status = ocfs_get_system_file_size (osb, log_file_id, + &file_size, &alloc_size); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + + finally: + ocfs_safefree (log_rec); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_process_log() */ + +/* + * ocfs_start_trans() + * + * + * called by: ocfs_create_modify_file(), ocfs_set_rename_information() + */ +int ocfs_start_trans (ocfs_super * osb) +{ + LOG_ENTRY_ARGS ("(0x%08x)\n", osb); + + osb->curr_trans_id = osb->vol_node_map.largest_seq_num; + + if (osb->needs_flush) { + while (osb->needs_flush) + ocfs_sleep (100); /* in ms */ + } + + osb->trans_in_progress = true; + + LOG_EXIT_STATUS (0); + return 0; +} /* ocfs_start_trans */ + +/* + * ocfs_commit_trans() + * + * + * called by: ocfs_create_modify_file(), ocfs_set_rename_information() + */ +int ocfs_commit_trans (ocfs_super * osb, ub8 trans_id) +{ + int status = 0; + ub8 offset = 0; + ub4 log_type; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u)\n", osb, HI (trans_id), LO (trans_id)); + + /* Log to the file for multiple transactions... */ + status = ocfs_extend_system_file (osb, + (LOG_FILE_BASE_ID + osb->node_num), offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + log_type = LOG_CLEANUP; + + status = ocfs_process_log (osb, trans_id, osb->node_num, &log_type); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + status = ocfs_extend_system_file (osb, + (CLEANUP_FILE_BASE_ID + osb->node_num), offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + osb->curr_trans_id = -1; + + finally: + osb->trans_in_progress = false; + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_commit_trans */ + +/* + * ocfs_abort_trans() + * + * + * called by: ocfs_create_modify_file(), ocfs_set_rename_information() + */ +int ocfs_abort_trans (ocfs_super * osb, ub8 trans_id) +{ + int status = 0; + ub8 offset = 0; + ub4 log_type; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u)\n", osb, HI (trans_id), LO (trans_id)); + + /* Read the log file and free up stf... */ + log_type = LOG_RECOVER; + + status = ocfs_process_log (osb, trans_id, osb->node_num, &log_type); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + status = ocfs_extend_system_file (osb, + (LOG_FILE_BASE_ID + osb->node_num), offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + status = ocfs_extend_system_file (osb, + (CLEANUP_FILE_BASE_ID + osb->node_num), offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + osb->curr_trans_id = -1; + + finally: + osb->trans_in_progress = false; + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_abort_trans */ + +/* + * ocfs_reset_publish() + * + * + * called by: ocfs_recover_vol() + * + * NOTE: This function is very similar to ocfs_disk_reset_voting(). + * This function should replace the other one. + */ +int ocfs_reset_publish (ocfs_super * osb, ub8 node_num) +{ + int status = 0; + ocfs_publish *publish = NULL; + ub8 node_publ_off = 0; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u)\n", osb, HI (node_num), LO (node_num)); + + if ((publish = ocfs_malloc (osb->sect_size)) == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + node_publ_off = osb->vol_layout.publ_sect_off + + (node_num * osb->sect_size); + + /* Read the publish sector */ + status = ocfs_read_disk (osb, publish, osb->sect_size, node_publ_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + publish->dirty = 0; + publish->vote = 0; + publish->vote_type = 0; + + /* Write the publish sector */ + status = ocfs_write_disk (osb, publish, osb->sect_size, node_publ_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + finally: + ocfs_safefree (publish); + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_reset_publish */ + +/* + * ocfs_recover_vol() + * + * called by: ocfs_process_record(), ocfs_disk_request_vote(), + * ocfs_acquire_lock(), ocfs_check_volume(), ocfs_nm_thread() + */ +int ocfs_recover_vol (ocfs_super * osb, ub8 node_num) +{ + int status = 0; + int tmpstat; + bool recovery_lock = false; + bool lock_acq = false; + ub8 lock_id = 0; + ub8 file_size = 0; + ub8 alloc_size = 0; + ocfs_lock_res *lock_res = NULL; + ocfs_log_record *log_rec = NULL; + ub4 size; + ub4 log_type; + ub8 trans_id = 0; + ub8 cleanup_file_size = 0; + ub4 file_id; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u)\n", osb, HI (node_num), LO (node_num)); + + if ((node_num < 0) || (node_num > OCFS_MAXIMUM_NODES)) { + LOG_ERROR_STATUS (status = -EINVAL); + goto finally; + } + + LOG_TRACE_ARGS ("Called for node %u.%u\n", HI (node_num), LO (node_num)); + + /* Grab the local recovery resource to ensure no other thread comes */ + /* in from this node for recovery */ + ocfs_down_sem (&(osb->recovery_lock), true); + recovery_lock = true; + + if (osb->node_recovering == node_num) { + goto finally; + } + + /* Now reset the publish sector to have the dirty bit not set... */ + status = ocfs_reset_publish (osb, node_num); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + file_id = (ub4) (LOG_FILE_BASE_ID + node_num); + + /* Read in the the recovery log */ + status = ocfs_get_system_file_size (osb, file_id, &file_size, + &alloc_size); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + file_id = (ub4) (CLEANUP_FILE_BASE_ID + node_num); + status = ocfs_get_system_file_size (osb, file_id, &cleanup_file_size, + &alloc_size); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + if ((file_size == 0) && (cleanup_file_size == 0)) { + /* Nothing to do so ... */ + /* + ** Read the log file and go back to the last checkpoint, + ** start of file for us. Read the logs for the transaction + ** being recovered and un + */ + + osb->node_recovering = OCFS_INVALID_NODE_NUM; + status = 0; + goto finally; + } + + osb->node_recovering = node_num; + osb->vol_state = VOLUME_IN_RECOVERY; + + /* + ** Grab the lock on the log file for the node which needs recovery, + ** this ensures nobody else in the cluster process the recovery + */ + lock_id = ((LOG_FILE_BASE_ID + node_num) * osb->sect_size) + + osb->vol_layout.root_int_off; + + status = ocfs_acquire_lock (osb, lock_id, OCFS_DLM_EXCLUSIVE_LOCK, + FLAG_FILE_CREATE, &lock_res, NULL); + if (status < 0) { + goto finally; + } + + lock_acq = true; + + if (node_num != osb->node_num) { + /* + ** Write a log entry indicating this node is doing recovery + ** for nodenum, if this node now dies during recovery. + ** The node doing recovery for this node will know it needs + ** to recover the vol for node node num too... + */ + size = max(sizeof (ocfs_log_record), + sizeof (ocfs_cleanup_record)); + size = (ub4) OCFS_ALIGN (size, PAGE_SIZE); + + if ((log_rec = ocfs_malloc (size)) == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + log_rec->log_id = osb->curr_trans_id; + log_rec->log_type = LOG_TYPE_RECOVERY; + log_rec->rec.recovery.node_num = node_num; + + /* + ** Log the original dirnode sector and the new cluster + ** where the info is stored + */ + status = ocfs_write_log (osb, log_rec, LOG_RECOVER); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + + log_type = LOG_RECOVER; + + status = ocfs_process_log (osb, trans_id, osb->node_num, &log_type); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* + ** If a cleanup file exists we should just reset the file size + ** if we aborted the transaction otherwise process the cleanup file.... + */ + if (log_type == LOG_CLEANUP) { + status = ocfs_process_log (osb, trans_id, osb->node_num, + &log_type); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + + /* Read the log file and go back to the last checkpoint, */ + /* start of file for us. Read the logs for the transaction */ + /* being recovered and un */ + osb->node_recovering = OCFS_INVALID_NODE_NUM; + + /* The vol state migh thave to turn inti flags... */ + osb->vol_state = VOLUME_ENABLED; + + if (recovery_lock) { + ocfs_up_sem (&(osb->recovery_lock)); + recovery_lock = false; + } + + finally: + if (recovery_lock) { + ocfs_up_sem (&(osb->recovery_lock)); + recovery_lock = false; + } + + if (lock_acq) { + tmpstat = ocfs_release_lock (osb, lock_id, + OCFS_DLM_EXCLUSIVE_LOCK, + FLAG_FILE_CREATE, lock_res); + if (tmpstat < 0) + status = tmpstat; + } + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_recover_vol */ + +/* + * ocfs_write_log() + * + * called by: ocfs_recover_vol(), ocfs_del_file(), ocfs_alloc_node_block() + */ +int ocfs_write_log (ocfs_super * osb, ocfs_log_record * log_rec, ub4 type) +{ + int status = 0; + int tmpstat; + sb4 log_file_id = -1; + ub8 lock_id = 0; + ub8 file_size = 0; + ub8 offset = 0; + ub8 log_rec_size = 0; + ub8 alloc_size = 0; + ocfs_lock_res *lock_res; + bool log_lock = false; + bool lock_acq = false; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u)\n", osb, log_rec, type); + + /* Get the log lock */ + ocfs_down_sem (&(osb->log_lock), true); + log_lock = true; + + if (type == LOG_RECOVER) { + log_file_id = (LOG_FILE_BASE_ID + osb->node_num); + log_rec_size = osb->sect_size; + } else if (type == LOG_CLEANUP) { + log_file_id = (CLEANUP_FILE_BASE_ID + osb->node_num); + log_rec_size = sizeof (ocfs_cleanup_record); + log_rec_size = OCFS_ALIGN (log_rec_size, osb->sect_size); + } else { + /* Shouldn't be happening */ + goto finally; + } + + /* + ** Always log to the end of the file after taking a file lock + ** and a log lock + */ + lock_id = (log_file_id * osb->sect_size) + osb->vol_layout.root_int_off; + + status = ocfs_acquire_lock (osb, lock_id, OCFS_DLM_EXCLUSIVE_LOCK, + FLAG_FILE_CREATE, &lock_res, NULL); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + lock_acq = true; + + status = ocfs_get_system_file_size (osb, log_file_id, &file_size, + &alloc_size); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + offset = file_size; + + if (alloc_size < (file_size + log_rec_size)) { + file_size += ONE_MEGA_BYTE; + status = ocfs_extend_system_file (osb, log_file_id, file_size); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + + status = ocfs_write_system_file (osb, log_file_id, log_rec, + log_rec_size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + status = ocfs_extend_system_file (osb, log_file_id, + (offset + log_rec_size)); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + finally: + if (log_lock) { + ocfs_up_sem (&(osb->log_lock)); + log_lock = false; + } + + if (lock_acq) { + tmpstat = ocfs_release_lock (osb, lock_id, + OCFS_DLM_EXCLUSIVE_LOCK, + FLAG_FILE_CREATE, lock_res); + if (tmpstat < 0) + status = tmpstat; + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_write_log */ + +/* + * ocfs_write_node_log() + * + * called by: ocfs_free_extents_for_truncate(), log_lock), true); + log_lock = true; + + if (type == LOG_RECOVER) { + log_file_id = (LOG_FILE_BASE_ID + node_num); + log_rec_size = osb->sect_size; + } else if (type == LOG_CLEANUP) { + log_file_id = (CLEANUP_FILE_BASE_ID + node_num); + log_rec_size = sizeof (ocfs_cleanup_record); + log_rec_size = OCFS_ALIGN (log_rec_size, osb->sect_size); + } else { + /* Shouldn't be happening */ + goto finally; + } + + /* Always log to the eof after taking a file lock and a log lock */ + lock_id = (log_file_id * osb->sect_size) + osb->vol_layout.root_int_off; + + status = ocfs_acquire_lock (osb, lock_id, OCFS_DLM_EXCLUSIVE_LOCK, + FLAG_FILE_CREATE, &lock_res, NULL); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + lock_acq = true; + + status = ocfs_get_system_file_size (osb, log_file_id, &file_size, + &alloc_size); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + offset = file_size; + + if (alloc_size < (file_size + log_rec_size)) { + file_size += ONE_MEGA_BYTE; + status = ocfs_extend_system_file (osb, log_file_id, file_size); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + + status = ocfs_write_system_file (osb, log_file_id, log_rec, + log_rec_size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + status = ocfs_extend_system_file (osb, log_file_id, + (offset + log_rec_size)); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + finally: + if (log_lock) { + ocfs_up_sem (&(osb->log_lock)); + log_lock = false; + } + + if (lock_acq) { + tmpstat = + ocfs_release_lock (osb, lock_id, OCFS_DLM_EXCLUSIVE_LOCK, + FLAG_FILE_CREATE, lock_res); + if (tmpstat < 0) + status = tmpstat; + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_write_node_log */ diff -urNp ocfs/fs/ocfs/Common/ocfsgenutil.c 2.4.20pre5aa2/fs/ocfs/Common/ocfsgenutil.c --- ocfs/fs/ocfs/Common/ocfsgenutil.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/ocfsgenutil.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,65 @@ +/* + * ocfsgenutil.c + * + * Generic utilities + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_UTIL + +/* + * ocfs_debug_print() + * + * UNUSED... to be deleted. + */ +void ocfs_debug_print (ub4 Context, ub4 Level, char *FormatStr, ...) +{ + char buf[256]; + va_list va; + + if ((OcfsDebugCtxt & Context) && (OcfsDebugLevel & Level)) { + va_start (va, FormatStr); + vsprintf (buf, FormatStr, va); + va_end (va); + printk ("(%d) %s\n", current->pid, buf); + } + return; +} /* ocfs_debug_print */ + +int ocfs_compare_qstr (struct qstr * s1, struct qstr * s2) +{ + int s = strncmp ((const char *) s1->name, (const char *) s2->name, + s1->len < s2->len ? s1->len : s2->len); + + if (s != 0) + return s; + if (s1->len > s2->len) + return 1; + else if (s1->len < s2->len) + return -1; + else + return s; +} /* ocfs_compare_qstr */ + diff -urNp ocfs/fs/ocfs/Common/ocfsgenvolcfg.c 2.4.20pre5aa2/fs/ocfs/Common/ocfsgenvolcfg.c --- ocfs/fs/ocfs/Common/ocfsgenvolcfg.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/ocfsgenvolcfg.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,868 @@ +/* + * ocfsgenvolcfg.c + * + * Auto configuration, namely, node number. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_VOLCFG + +/* + * ocfs_worker() + * + * This function reiterates the lock on the disk from this node once + * it has obtained it. + */ +void ocfs_worker (void *Arg) +{ + ub4 length; + char *buffer; + int status; + ocfs_super *osb; + ub8 offset; + ocfs_cfg_task *cfg_task; + + LOG_ENTRY (); + + cfg_task = (ocfs_cfg_task *) Arg; + + /* Obtain the volume for which we need to reiterate the lock */ + osb = cfg_task->osb; + buffer = cfg_task->buffer; + length = osb->sect_size; + offset = cfg_task->lock_off; + + /* Write the sector back */ + status = ocfs_write_disk (osb, buffer, length, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + /* deliberate no exit jump here */ + } + + if (osb->lock_stop) { + LOG_TRACE_ARGS ("Last Lock written : %d\n", jiffies); + wake_up (&osb->lock_event); + } else { + LOG_TRACE_ARGS ("Lock written : %d\n", jiffies); + mod_timer (&osb->lock_timer, jiffies + OCFS_VOLCFG_LOCK_ITERATE); + } + + LOG_EXIT (); + return; +} /* ocfs_worker */ + +/* + * ocfs_assert_lock_owned() + * + * Routine called by a timer to reiterate the disk lock. + */ +void ocfs_assert_lock_owned (ub4 Arg) +{ + ocfs_cfg_task *cfg_task; + + LOG_ENTRY (); + + cfg_task = (ocfs_cfg_task *) Arg; + + /* initialize the task */ + INIT_TQUEUE (&(cfg_task->task), ocfs_worker, cfg_task); + + /* submit it */ + schedule_task (&cfg_task->task); + + LOG_EXIT (); + return ; +} /* ocfs_assert_lock_owned */ + +/* + * ocfs_add_to_disk_config() + * + */ +int ocfs_add_to_disk_config (ocfs_super * osb, ocfs_disk_node_config_info * NodeCfgInfo) +{ + int status = 0; + ub8 Offset; + ocfs_node_config_hdr *Hdr = NULL; + ocfs_disk_node_config_info *Node = NULL; + ub1 *buffer = NULL; + ub1 *p; + ub4 node; + ub4 sect_size = osb->sect_size; + + LOG_ENTRY (); + + /* Read the nodecfg from disk */ + Offset = osb->vol_layout.node_cfg_off; + status = + ocfs_read_disk_ex (osb, (void **) &buffer, + osb->vol_layout.node_cfg_size, + osb->vol_layout.node_cfg_size, Offset); + if (status < 0) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + Hdr = (ocfs_node_config_hdr *) buffer; + + /* Sanity check */ + if ((strncmp + (Hdr->signature, NODE_CONFIG_HDR_SIGN, NODE_CONFIG_SIGN_LEN)) + || ((Hdr->version < NODE_MIN_SUPPORTED_VER) + || (Hdr->version > NODE_CONFIG_VER))) { + LOG_ERROR_STATUS (status = -EINVAL); + goto finally; + } + + /* Find an empty slot in nodecfg */ + p = buffer + (2 * sect_size); + for (node = 0; node < OCFS_MAXIMUM_NODES; ++node, p += sect_size) { + Node = (ocfs_disk_node_config_info *) p; + if (Node->node_name[0] == '\0') + break; + } + + /* If no free slots */ + if (node >= OCFS_MAXIMUM_NODES) { + LOG_ERROR_STR + ("Unable to allocate node number as no slots available"); + status = -ENOMEM; + goto finally; + } + + /* Increment the seq_num to signal all nodes to refresh */ + Hdr->seq_num++; + Hdr->num_nodes++; + + /* Write the updated nodecfg hdr */ + Offset = osb->vol_layout.node_cfg_off; + status = ocfs_write_disk (osb, (void *) Hdr, (2 * sect_size), Offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* Copy the new nodecfg into the memory buffer */ + p = buffer + ((node + 2) * sect_size); + memcpy (p, NodeCfgInfo, sect_size); + + /* Write the new node details on disk */ + Offset = osb->vol_layout.node_cfg_off + ((node + 2) * sect_size); + Node = (ocfs_disk_node_config_info *) p; + status = ocfs_write_disk (osb, (void *) Node, sect_size, Offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* Write the NodeCfgHdr into the second sector of NewCfg. */ + /* We do so so that we can read the NodeCfgHdr easily when we */ + /* read the PublishSector, for e.g. in ocfs_nm_thread() */ + Offset = osb->vol_layout.new_cfg_off + sect_size; + status = ocfs_write_disk (osb, (void *) Hdr, sect_size, Offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + finally: + ocfs_safefree (buffer); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_add_to_disk_config */ + +/* + * ocfs_config_with_disk_lock() + * + * This function tries to obtain the lock on the disk for the volume + * specified. The logic for obtaining a disk lock is as follows : + * Read the disk and check to see if somebody else owns the disk if so + * wait for OCFS_VOLCFG_LOCK_TIME which is 2 sec currently, after which try + * and break if not write the nodename as a lock and set the lock owned + * bit. Now wait for OCFS_VOLCFG_LOCK_TIME and read the sector back if the + * nodename is still ours we own the lock. A timer with DPC is now kicked + * in every OCFS_VOLCFG_ITERATE_TIME currently 100 ms to reiterate we own the + * lock. If a node had the lock and crashed we will wait for 2+2 seconds + * and try and break the lock. If somebody else owned the lock we will + * do the same. + * + * Returns 0 if success, < 0 if error. + */ +int ocfs_config_with_disk_lock (ocfs_super * osb, ub8 LockOffset, ub1 * Buffer) +{ + int status = 0; + char *rd_buf = NULL; + char *lock_buf = NULL; + bool TriedAcquire = false; + bool BreakLock = false; + ocfs_disk_lock *DiskLock; + ocfs_cfg_task *cfg_task = NULL; + ub4 sect_size = osb->sect_size; + + LOG_ENTRY (); + + /* Allocate buffer for reading the disk */ + rd_buf = ocfs_malloc (sect_size); + if (rd_buf == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finito; + } + + cfg_task = ocfs_malloc (sizeof (ocfs_cfg_task)); + if (cfg_task == NULL) + { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finito; + } + + lock_buf = ocfs_malloc (sect_size); + if (lock_buf == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finito; + } + + /* initialize cfg_task with info reqd to reiterate the volcfg lock */ + cfg_task->osb = osb; + cfg_task->buffer = lock_buf; + cfg_task->lock_off = osb->vol_layout.new_cfg_off + sect_size; + + /* Initialize the kernel timer */ + init_timer(&osb->lock_timer); + osb->lock_timer.function = ocfs_assert_lock_owned; + osb->lock_timer.expires = 0; + osb->lock_timer.data = (ub4) cfg_task; + init_waitqueue_head (&osb->lock_event); + osb->lock_stop = 0; + + while (1) { + /* Read the starting sector for the volume */ + status = ocfs_read_disk (osb, rd_buf, sect_size, LockOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + + /* Check to see if the lock is currently owned or if we want to break */ + /* the lock. The first time around we will not break the lock after */ + /* which we always break it. */ + DiskLock = (ocfs_disk_lock *) rd_buf; + if ((DiskLock->file_lock == 0) || (BreakLock)) { + if (DiskLock->file_lock != 0) + LOG_TRACE_STR ("Breaking the Node Config Lock"); + else + LOG_TRACE_STR ("Locking Node Config"); + + /* Wait for Phase 1 is the O.S.latency between read/write. */ + /* Copy our name into the lock, and set the owned byte. */ + DiskLock = (ocfs_disk_lock *) Buffer; + DiskLock->file_lock = 1; + + /* Write to the disk... */ + status = + ocfs_write_disk (osb, Buffer, sect_size, LockOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + + /* Set the TriedAcquire, this determines if we need to do a */ + /* second read or just wait. */ + TriedAcquire = true; + } + + ocfs_sleep (OCFS_VOLCFG_LOCK_TIME); /* in ms */ + + /* Read the Disk... */ + status = ocfs_read_disk (osb, rd_buf, sect_size, LockOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + + /* If we tried to acquire and we still own it we take it... */ + if ((TriedAcquire) && (memcmp (rd_buf, Buffer, sect_size) == 0)) { + memcpy (lock_buf, Buffer, sect_size); + + /* Set timer to reiterate lock every OCFS_VOLCFG_LOCK_ITERATE jiffies */ + LOG_TRACE_ARGS ("Start Timer: %d\n", jiffies); + osb->lock_timer.expires = jiffies + OCFS_VOLCFG_LOCK_ITERATE; + add_timer(&osb->lock_timer); + + /* Write the Config info into the config table */ + DiskLock = (ocfs_disk_lock *) Buffer; + DiskLock->file_lock = 0; + ocfs_add_to_disk_config (osb, + (ocfs_disk_node_config_info *) Buffer); + goto finito; + } else { + /* Either somebody got the lock or somebody already had it wait */ + /* and then try to break it */ + LOG_TRACE_STR + ("Could not get lock for 2 sec, breaking other guys lock"); + ocfs_sleep (OCFS_VOLCFG_LOCK_TIME); /* in ms */ + BreakLock = true; + } + } + + finito: + ocfs_release_disk_lock (osb, LockOffset); + + ocfs_safefree (rd_buf); + ocfs_safefree (lock_buf); + ocfs_safefree (cfg_task); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_config_with_disk_lock */ + +/* + * ocfs_release_disk_lock() + * + * This function Cancels the timer to reiterate we own the disk lock and + * then frees it by writing the sector for the disk lock. + * + * Returns 0 if success, < 0 if error. + */ +int ocfs_release_disk_lock (ocfs_super * osb, ub8 LockOffset) +{ + int status = 0; + sb1 *buffer = NULL; + ub4 sect_size = osb->sect_size; + + LOG_ENTRY (); + + buffer = ocfs_malloc (sect_size); + if (buffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + /* reset lock... */ + memset (buffer, 0, sect_size); + + /* Cancel the timer so that we don't reiterate the lock anymore */ + osb->lock_stop = 1; + LOG_TRACE_STR ("Waiting for osb->lock_event\n"); + ocfs_wait (&osb->lock_event, false, 0); + del_timer_sync(&osb->lock_timer); + + /* sleep with the hope that any unfinished lock iteration is over */ + ocfs_sleep (100); + + /* Release the lock */ + status = ocfs_write_disk (osb, buffer, sect_size, LockOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + +finally: + ocfs_safefree (buffer); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_release_disk_lock */ + +/* + * ocfs_cfg_worker() + * + */ +void ocfs_cfg_worker (ocfs_super * osb) +{ + int status = 0; + ocfs_disk_node_config_info *disk; + void *buffer = NULL; + ub8 offset; + ub4 i; + ub4 sect_size = osb->sect_size; + + LOG_ENTRY (); + + buffer = ocfs_malloc (sect_size); + if (buffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } else + memset (buffer, 0, sect_size); + + disk = (ocfs_disk_node_config_info *) buffer; + + /* Dump stuff read from the local config file into the allocated mem */ + strncpy (disk->node_name, OcfsGlobalCtxt.node_name, + MAX_NODE_NAME_LENGTH); + disk->node_name[MAX_NODE_NAME_LENGTH] = '\0'; + + disk->num_interfaces = OcfsGlobalCtxt.num_ipc; + + for (i = 0; i < OcfsGlobalCtxt.num_ipc; i++) { + if (OcfsGlobalCtxt.comm_info[i].valid) { + disk->ipc_config[i].addr = + OcfsGlobalCtxt.comm_info[i].addr; + disk->ipc_config[i].port = + OcfsGlobalCtxt.comm_info[i].port; + disk->ipc_config[i].state = OCFS_IPC_STATE_CONFIG; + disk->ipc_config[i].mask = + OcfsGlobalCtxt.comm_info[i].mask; + disk->ipc_config[i].active = + OcfsGlobalCtxt.comm_info[i].active; + disk->ipc_config[i].type = + OcfsGlobalCtxt.comm_info[i].type; + } + } + + /* Write this nodes config onto disk */ + offset = osb->vol_layout.new_cfg_off; + status = ocfs_config_with_disk_lock (osb, offset, (ub1 *) disk); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + status = ocfs_chk_update_config (osb); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + bail: + ocfs_safefree (buffer); + + LOG_EXIT (); + return; +} /* ocfs_cfg_worker */ + +/* + * ocfs_add_upd_ipc_cfg() + * + */ +int ocfs_add_upd_ipc_cfg (ocfs_node_config_info ** node, + ocfs_disk_node_config_info * disk) +{ + int status = 0; + ub4 i; + + LOG_ENTRY (); + + if (*node == NULL) { + *node = (ocfs_node_config_info *) + ocfs_malloc (sizeof (ocfs_node_config_info)); + if (*node == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } + memset (*node, 0, sizeof (ocfs_node_config_info)); + } + + strncpy ((*node)->node_name, disk->node_name, MAX_NODE_NAME_LENGTH); + (*node)->node_name[MAX_NODE_NAME_LENGTH] = '\0'; + + for (i = 0; i < disk->num_interfaces; i++) { + (*node)->ipc_config[i].addr = disk->ipc_config[i].addr; + (*node)->ipc_config[i].port = disk->ipc_config[i].port; + (*node)->ipc_config[i].mask = disk->ipc_config[i].mask; + (*node)->ipc_config[i].state = disk->ipc_config[i].state; + (*node)->ipc_config[i].type = disk->ipc_config[i].type; + (*node)->ipc_config[i].active = disk->ipc_config[i].active; + + if ((*node)->ipc_config[i].active) + (*node)->primary_comm = i; + } + + (*node)->num_interfaces = disk->num_interfaces; + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_add_upd_ipc_cfg */ + +/* + * ocfs_update_node_config() + * + * TODO.... We should be locking the nodecfg in the function. + * We will be able to implement that when we make ocfs_config_with_disk_lock() + * more flexible. + */ +int ocfs_update_node_config (ocfs_super * osb) +{ + int status = 0; + ocfs_disk_node_config_info *node; + ub1 *buffer = NULL; + ub8 offset; + ub4 i; + ocfs_node_config_hdr *hdr; + ub4 sect_size = osb->sect_size; + + LOG_ENTRY (); + + buffer = ocfs_malloc (2 * sect_size); + if (buffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + /* Use first sector to r/w hdr and second for node */ + hdr = (ocfs_node_config_hdr *) buffer; + node = (ocfs_disk_node_config_info *) (buffer + sect_size); + + /* Clear buffer */ + memset ((void *) node, 0, sect_size); + + /* Move the config info into the buffer */ + strncpy (node->node_name, OcfsGlobalCtxt.node_name, + MAX_NODE_NAME_LENGTH); + node->node_name[MAX_NODE_NAME_LENGTH] = '\0'; + + node->num_interfaces = OcfsGlobalCtxt.num_ipc; + + for (i = 0; i < OcfsGlobalCtxt.num_ipc; i++) { + if (OcfsGlobalCtxt.comm_info[i].valid) { + node->ipc_config[i].addr = + OcfsGlobalCtxt.comm_info[i].addr; + node->ipc_config[i].port = + OcfsGlobalCtxt.comm_info[i].port; + node->ipc_config[i].state = OCFS_IPC_STATE_CONFIG; + node->ipc_config[i].mask = + OcfsGlobalCtxt.comm_info[i].mask; + node->ipc_config[i].active = + OcfsGlobalCtxt.comm_info[i].active; + node->ipc_config[i].type = + OcfsGlobalCtxt.comm_info[i].type; + } else + node->ipc_config[i].state = OCFS_IPC_NOT_CONFIG; + } + + /* Write the node details */ + offset = osb->vol_layout.node_cfg_off + + ((osb->node_num + 2) * sect_size); + status = ocfs_write_disk (osb, (void *) node, sect_size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* Update the header */ + offset = osb->vol_layout.node_cfg_off; + + /* Read the nodecfg header */ + status = ocfs_read_disk (osb, (void *) hdr, sect_size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* Increment the seq# */ + hdr->seq_num++; + + /* Write the nodecfg header */ + status = ocfs_write_disk (osb, (void *) hdr, sect_size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* Write the pNodeCfgHdr into the second sector of NewCfg. */ + /* We do so so that we can read the NodeCfgHdr easily when we */ + /* read the PublishSector, for e.g. in ocfs_nm_thread() */ + offset = osb->vol_layout.new_cfg_off + osb->sect_size; + status = ocfs_write_disk (osb, (void *) hdr, sect_size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + finally: + ocfs_safefree (buffer); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_update_node_config */ + +/* + * ocfs_chk_update_config() + * + */ +int ocfs_chk_update_config (ocfs_super * osb) +{ + int status = 0; + ocfs_node_config_hdr *hdr = NULL; + ocfs_disk_node_config_info *disk = NULL; + ub1 *buffer = NULL; + ub8 offset; + sb4 i; + ub4 sect_size = osb->sect_size; + ub1 *p; + + LOG_ENTRY (); + + /* Read in the config on the disk */ + offset = osb->vol_layout.node_cfg_off; + status = + ocfs_read_disk_ex (osb, (void **) &buffer, + osb->vol_layout.node_cfg_size, + osb->vol_layout.node_cfg_size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* 1st block in buffer is the NodeCfgHdr */ + hdr = (ocfs_node_config_hdr *) buffer; + + if ((strncmp + (hdr->signature, NODE_CONFIG_HDR_SIGN, NODE_CONFIG_SIGN_LEN)) + || (hdr->version < NODE_MIN_SUPPORTED_VER) + || (hdr->version > NODE_CONFIG_VER)) { + LOG_ERROR_STATUS (status = -EINVAL); + goto finally; + } + + if ((!osb->cfg_initialized) || (osb->cfg_seq_num != hdr->seq_num) || + (osb->num_cfg_nodes != hdr->num_nodes)) { + /* Add each node which is not in the global ctxt to it, also */ + /* add the node num and config ptr in the osb */ + osb->num_cfg_nodes = hdr->num_nodes; + + /* Find our Volume Node number if present else configure this node */ + /* and obtain the same also find the Ip address, port if present */ + + /* NodeCfgInfo starts from the 3rd sector of NodeCfg */ + p = buffer + (2 * sect_size); + + /* Read the nodecfg for all possible nodes */ + for (i = 0; i < OCFS_MAXIMUM_NODES; i++, p += sect_size) { + disk = (ocfs_disk_node_config_info *) p; + if (disk->node_name[0] != '\0') { + status = + ocfs_add_upd_ipc_cfg (&osb->node_cfg_info[i], + disk); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* If node_num is not set, set it if the node_name matches */ + if (osb->node_num == OCFS_INVALID_NODE_NUM) { + if (!strncmp + (OcfsGlobalCtxt.node_name, + disk->node_name, + MAX_NODE_NAME_LENGTH)) + osb->node_num = i; + } + } + } + + osb->cfg_initialized = true; + osb->cfg_seq_num = hdr->seq_num; + } + + LOG_TRACE_ARGS ("Num of configured nodes (%u)\n", osb->num_cfg_nodes); + + /* Dump the node cfg infos for all nodes */ + ocfs_show_all_node_cfgs (osb); + + finally: + ocfs_safefree (buffer); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_chk_update_config */ + +/* + * ocfs_add_node_to_config() + * + */ +int ocfs_add_node_to_config (ocfs_super * osb) +{ + int status = 0; + + LOG_ENTRY (); + + ocfs_cfg_worker (osb); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_add_node_to_config */ + +/* + * ocfs_get_config() + * + */ +int ocfs_get_config (ocfs_super * osb) +{ + int status = 0; + + LOG_ENTRY (); + + /* Update our config info for this volume from the disk */ + ocfs_chk_update_config (osb); + + if (osb->node_num == OCFS_INVALID_NODE_NUM) { + status = ocfs_add_node_to_config (osb); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + } else { + /* Checks if the info in osb->node_cfg_info[osb->node_num] is */ + /* the same as in OcfsGlobalCtxt.comm_info. If not, it updates it */ + /* in osb */ + if (!ocfs_is_node_config_ok (osb)) { + status = ocfs_refresh_node_config (osb); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + } + } + + LOG_TRACE_ARGS ("Node Num: %d\n", osb->node_num); + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_get_config */ + +/* + * ocfs_is_node_config_ok() + * + */ +bool ocfs_is_node_config_ok (ocfs_super * osb) +{ + ocfs_node_config_info *node; + ocfs_ipc_config_info *ipc; + ocfs_comm_info *g_ipc; + bool OK = true; + ub4 i; + + LOG_ENTRY (); + + node = osb->node_cfg_info[osb->node_num]; + ipc = node->ipc_config; + g_ipc = OcfsGlobalCtxt.comm_info; + + if (OcfsGlobalCtxt.num_ipc != node->num_interfaces) + OK = false; + + if (OK) { + for (i = 0; i < OcfsGlobalCtxt.num_ipc; ++i) { + if ((ipc[i].addr != g_ipc[i].addr) || + (ipc[i].port != g_ipc[i].port) || + (ipc[i].mask != g_ipc[i].mask) || + (ipc[i].type != g_ipc[i].type)) { + OK = false; + break; + } + } + } + + LOG_EXIT_LONG (OK); + return OK; +} /* ocfs_is_node_config_ok */ + +/* + * ocfs_refresh_node_config() + * + */ +int ocfs_refresh_node_config (ocfs_super * osb) +{ + ocfs_node_config_info *node; + ocfs_ipc_config_info *ipc; + ocfs_comm_info *g_ipc; + ub4 i; + int status = 0; + + LOG_ENTRY (); + + node = osb->node_cfg_info[osb->node_num]; + ipc = node->ipc_config; + g_ipc = OcfsGlobalCtxt.comm_info; + + for (i = 0; i < OcfsGlobalCtxt.num_ipc; ++i) { + if (g_ipc[i].valid) { + ipc[i].addr = g_ipc[i].addr; + ipc[i].port = g_ipc[i].port; + ipc[i].mask = g_ipc[i].mask; + ipc[i].active = g_ipc[i].active; + ipc[i].type = g_ipc[i].type; + ipc[i].state = OCFS_IPC_STATE_CONFIG; + if (ipc[i].active) + node->primary_comm = i; + } + } + + node->num_interfaces = OcfsGlobalCtxt.num_ipc; + + /* Update the NodeCfg on disk with the new info */ + status = ocfs_update_node_config (osb); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_refresh_node_config */ + +/* + * ocfs_show_all_node_cfgs() + * + */ +void ocfs_show_all_node_cfgs (ocfs_super * osb) +{ + ocfs_node_config_info *node; + ub4 i; + ub4 j; + + LOG_ENTRY (); + + for (i = 0; i < OCFS_MAXIMUM_NODES; i++) { + node = osb->node_cfg_info[i]; + + if (!node) + continue; + + if (node->node_name[0] == '\0') + continue; + + LOG_TRACE_ARGS + ("Node (%u) has name (%s) and has (%u) interconnects\n", i, + node->node_name, node->num_interfaces); + + for (j = 0; j < node->num_interfaces; j++) { + if (node->ipc_config[j].active) { + LOG_TRACE_ARGS + ("%d. ip=%d.%d.%d.%d, mask=%d.%d.%d.%d, " + "port=%d, state=%u\n", j, + NIPQUAD (node->ipc_config[j].addr), + NIPQUAD (node->ipc_config[j].mask), + node->ipc_config[j].port, + node->ipc_config[j].state); + } + } + } + + return; +} /* ocfs_show_all_node_cfgs */ diff -urNp ocfs/fs/ocfs/Common/ocfsgenvote.c 2.4.20pre5aa2/fs/ocfs/Common/ocfsgenvote.c --- ocfs/fs/ocfs/Common/ocfsgenvote.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/ocfsgenvote.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,890 @@ +/* + * ocfsgenvote.c + * + * IPC based DLM + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_VOTE + +/* + * ocfs_send_vote_reply() + * + */ +int ocfs_send_vote_reply (ocfs_super * osb, + ocfs_dlm_msg * DlmMesg, ub4 VoteStatus, bool bHandleOpen) +{ + ub4 msgSize; + ocfs_dlm_req_master *DlmReqMaster; + ocfs_dlm_reply_master *ReplyDlmMesg; + ocfs_dlm_msg *SendDlmMesg; + ub8 VoteMap; + int status = 0; + + LOG_ENTRY (); + + DlmReqMaster = (ocfs_dlm_req_master *) DlmMesg->msg_buf; + +//SM??? why -1? + msgSize = sizeof (ocfs_dlm_msg) - 1 + sizeof (ocfs_dlm_reply_master); + + SendDlmMesg = ocfs_malloc (msgSize); + if (SendDlmMesg == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } + + ocfs_init_dlm_msg (osb, SendDlmMesg, msgSize); + + SendDlmMesg->msg_type = OCFS_REPLY_MAKE_MASTER; + + ReplyDlmMesg = (ocfs_dlm_reply_master *) SendDlmMesg->msg_buf; + + ReplyDlmMesg->lock_id = DlmReqMaster->lock_id; + ReplyDlmMesg->status = VoteStatus; + ReplyDlmMesg->lock_seq_num = DlmReqMaster->lock_seq_num; + + VoteMap = (1 << DlmMesg->src_node); + ocfs_send_bcast (osb, VoteMap, SendDlmMesg); + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_send_vote_reply */ + +/* + * ocfs_comm_vote_for_del_ren() + * + */ +int ocfs_comm_vote_for_del_ren (ocfs_super * osb, + ocfs_lock_res ** LockResource, ocfs_dlm_msg * DlmMesg) +{ + int status = 0; + ocfs_dlm_req_master *DlmReqMaster; + ub4 NodeAskingVote; + ub4 flags; + ub4 retryCount = 0; + bool acq_oin = false; + ocfs_file_entry *FileEntry = NULL; + ocfs_lock_res *pLockResource; + ocfs_sem *oin_sem = NULL; + + LOG_ENTRY (); + + pLockResource = *LockResource; + DlmReqMaster = (ocfs_dlm_req_master *) DlmMesg->msg_buf; + + flags = DlmReqMaster->flags; + NodeAskingVote = DlmMesg->src_node; + + if (pLockResource->oin) { + UPDATE_OIN (pLockResource->oin); + } + + LOG_TRACE_ARGS ("Vote for del ren for node (%u) for lock 0x%08x.0x%08x " + " and seq %u.%u\n", NodeAskingVote, + HI (DlmReqMaster->lock_id), LO (DlmReqMaster->lock_id), + HI (DlmReqMaster->lock_seq_num), + LO (DlmReqMaster->lock_seq_num)); + + /* Check for oin */ + if (pLockResource->oin != NULL) { + ocfs_inode *oin; + + oin = pLockResource->oin; + + oin_sem = &(oin->main_res); + ocfs_down_sem (oin_sem, true); + acq_oin = true; + + /* If OIN_IN_USE is set we should go back and retry */ + while ((oin->oin_flags & OCFS_OIN_IN_USE) && (retryCount < 5)) { + if ((acq_oin)) { + ocfs_up_sem (oin_sem); + acq_oin = false; + } + + ocfs_sleep (20); /* in ms */ + retryCount++; + + if (!acq_oin) { + ocfs_down_sem (oin_sem, true); + acq_oin = true; + } + } + + if ((pLockResource->oin->ref_cnt == 0) && + (!(oin->oin_flags & OCFS_OIN_IN_USE))) { + if (!(oin->oin_flags & OCFS_OIN_IN_TEARDOWN)) { + if (acq_oin) { + ocfs_up_sem (oin_sem); + acq_oin = false; + } + + ocfs_release_lockres (pLockResource); + + ocfs_release_cached_oin (osb, oin); + ocfs_release_oin (oin, true); + pLockResource = NULL; + } + ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_OK, false); + goto finito; + } else { + LOG_TRACE_ARGS + ("Vote for del ren returned in use (%u) for " + "Lock 0x%08x.0x%08x and Seq %u.%u\n", + NodeAskingVote, HI (DlmReqMaster->lock_id), + LO (DlmReqMaster->lock_id), + HI (DlmReqMaster->lock_seq_num), + LO (DlmReqMaster->lock_seq_num)); + ocfs_send_vote_reply (osb, DlmMesg, + OCFS_DLM_VOTE_OIN_ALREADY_INUSE, false); + ocfs_release_lockres (pLockResource); + goto finito; + } + } else { + LOG_TRACE_ARGS + ("Vote for del ren voted to del/ren (%u) for lock " + "0x%08x.0x%08x and Seq %u.%u\n", NodeAskingVote, + HI (DlmReqMaster->lock_id), LO (DlmReqMaster->lock_id), + HI (DlmReqMaster->lock_seq_num), + LO (DlmReqMaster->lock_seq_num)); + ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_OK, false); + ocfs_release_lockres (pLockResource); + goto finito; + } + + finito: + /* Set the always update master on open flag */ + if (pLockResource) { + pLockResource->lock_state |= FLAG_ALWAYS_UPDATE_OPEN; + pLockResource->last_upd_seq_num = DlmReqMaster->lock_seq_num; + + if (pLockResource->master_node_num != OCFS_INVALID_NODE_NUM) { + if (!IS_NODE_ALIVE + (osb->publ_map, pLockResource->master_node_num, + OCFS_MAXIMUM_NODES)) { + pLockResource->master_node_num = NodeAskingVote; + } + } else { + pLockResource->master_node_num = NodeAskingVote; + } + + /* Change the master if there is no lock */ + if ((pLockResource->master_node_num == osb->node_num) && + (pLockResource->lock_state <= OCFS_DLM_SHARED_LOCK)) { + ub8 tmp = DlmReqMaster->lock_id; + + /* Change the lock ownership to the node asking for vote */ + status = + ocfs_get_file_entry (osb, &FileEntry, + DlmReqMaster->lock_id); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* Write new master on the disk */ + DISK_LOCK_CURRENT_MASTER (FileEntry) = NodeAskingVote; + + status = + ocfs_write_disk (osb, FileEntry, osb->sect_size, tmp); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + pLockResource->master_node_num = NodeAskingVote; + } + } + + finally: + if (FileEntry) + ocfs_release_file_entry (FileEntry); + + if (acq_oin && oin_sem) { + ocfs_up_sem (oin_sem); + acq_oin = false; + } + + if (pLockResource) + ocfs_release_lockres (pLockResource); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_comm_vote_for_del_ren */ + +/* + * ocfs_find_lockres() + * + */ +int ocfs_find_lockres (ocfs_super * osb, ub8 LockId, ocfs_lock_res ** LockResource) +{ + int status = -ENOENT; + ocfs_lock_res *lockResource = NULL; + + LOG_ENTRY (); + + *LockResource = NULL; + + status = ocfs_lookup_sector_node (osb, LockId, &lockResource); + if (status >= 0) { + *LockResource = lockResource; + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_find_lockres */ + +/* + * ocfs_check_ipc_msg() + * + */ +bool ocfs_check_ipc_msg (ub1 * Mesg, ub4 Length) +{ + bool bret = false; + ocfs_dlm_msg *DlmMesg; + + LOG_ENTRY (); + + DlmMesg = (ocfs_dlm_msg *) Mesg; + + if (DlmMesg == NULL) + goto bail; + + /* Compute and Compare the checksum */ + if (DlmMesg->magic != OCFS_DLM_MSG_MAGIC) { + LOG_ERROR_ARGS ("magic number did not match: %d != %d\n", + DlmMesg->magic, OCFS_DLM_MSG_MAGIC); + goto bail; + } + + if ((DlmMesg->src_node < 0) || (DlmMesg->src_node > 64)) { + LOG_ERROR_ARGS ("source node was invalid: %d\n", + DlmMesg->src_node); + goto bail; + } + + bret = true; + + bail: + LOG_EXIT_LONG (bret); + return bret; +} /* ocfs_check_ipc_msg */ + +/* + * ocfs_find_osb() + * + */ +void ocfs_find_osb (sb1 * VolumeID, ocfs_super ** osb) +{ + bool GlobalResourceAcquired = false; + struct list_head *iterEntry; + ocfs_super *tmp_osb = NULL; + + LOG_ENTRY (); + + ocfs_down_sem (&(OcfsGlobalCtxt.res), true); + GlobalResourceAcquired = true; + + list_for_each (iterEntry, &(OcfsGlobalCtxt.osb_next)) { + tmp_osb = list_entry (iterEntry, ocfs_super, osb_next); + + if (memcmp (tmp_osb->vol_layout.id, VolumeID, MAX_VOL_ID_LENGTH) + == 0) { + *osb = tmp_osb; + + if (GlobalResourceAcquired) { + ocfs_up_sem (&(OcfsGlobalCtxt.res)); + GlobalResourceAcquired = false; + } + goto bail; + } + } + + if (GlobalResourceAcquired) { + ocfs_up_sem (&(OcfsGlobalCtxt.res)); + GlobalResourceAcquired = false; + } + + *osb = NULL; + + bail: + LOG_EXIT (); + return; +} /* ocfs_find_osb */ + +/* + * ocfs_find_create_lockres() + * + */ +int ocfs_find_create_lockres (ocfs_super * osb, ub8 LockId, ocfs_lock_res ** LockResource) +{ + int status = 0; + ocfs_lock_res *pLockResource = NULL; + + LOG_ENTRY (); + + *LockResource = NULL; + + status = ocfs_lookup_sector_node (osb, LockId, &pLockResource); + if (status >= 0) { + *LockResource = pLockResource; + } else { + status = 0; + pLockResource = pLockResource = kmem_cache_alloc (OcfsGlobalCtxt.lockres_cache, GFP_KERNEL); + if (pLockResource == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } + + /* Init Resource */ + ocfs_init_lockres (osb, pLockResource, LockId); + OCFS_SET_FLAG (pLockResource->lock_state, LOCK_STATE_INIT); + + ocfs_insert_sector_node (osb, pLockResource); + *LockResource = pLockResource; + } + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_find_create_lockres */ + +/* + * ocfs_comm_process_vote() + * + */ +int ocfs_comm_process_vote (ocfs_super * osb, ocfs_dlm_msg * DlmMesg) +{ + int status = 0; + ocfs_lock_res *LockResource = NULL; + ocfs_dlm_req_master *DlmReqMaster; + ub4 NodeAskingVote = OCFS_INVALID_NODE_NUM; + ub4 flags; + bool acq_oin = false; + ub8 offset; + ub4 length; + ocfs_file_entry *FileEntry = NULL; + + LOG_ENTRY (); + + DlmReqMaster = (ocfs_dlm_req_master *) DlmMesg->msg_buf; + + LOG_TRACE_ARGS ("Called from node (%u) for Lock 0x%08x.0x%08x, " + "Seq %u.%u\n", DlmMesg->src_node, + HI (DlmReqMaster->lock_id), LO (DlmReqMaster->lock_id), + HI (DlmReqMaster->lock_seq_num), + LO (DlmReqMaster->lock_seq_num)); + + /* Find the resource */ + /* If resource is not found create on and set the init state on it */ + status = ocfs_find_create_lockres (osb, DlmReqMaster->lock_id, &LockResource); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* If there is no resource we don't care abt the resource so vote */ + ocfs_acquire_lockres (LockResource); + + if (LockResource->lock_state & LOCK_STATE_INIT) { + /* We are done, build a packet to return success to the caller */ + status = ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_OK, false); + ocfs_release_lockres (LockResource); + goto finally; + } + + if ((LockResource->master_node_num == osb->node_num) && + (DlmMesg->src_node == osb->node_num)) { + status = ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_OK, false); + ocfs_release_lockres (LockResource); + goto finally; + } + + flags = DlmReqMaster->flags; + NodeAskingVote = DlmMesg->src_node; + + /* If there is a resource check the state */ + + if ((flags & FLAG_FILE_DELETE) || (flags & FLAG_FILE_RENAME)) { + status = ocfs_comm_vote_for_del_ren (osb, &LockResource, DlmMesg); + goto finally; + } + + /* Cache stf */ + if (flags & FLAG_FILE_RELEASE_CACHE) { + ocfs_file_entry *TempEntry = NULL; + ub4 i; + + i = 0; + + LOG_TRACE_STR ("Called for FLAG_FILE_RELEASE_CACHE"); + + if (!osb->commit_cache_exec) { + osb->needs_flush = true; + + while ((osb->trans_in_progress) && (i < 10)) { + ocfs_sleep (100); /* in ms */ + i++; + } + + if (osb->trans_in_progress == false) { + osb->commit_cache_exec = true; + ocfs_commit_cache (osb, true); + osb->needs_flush = false; + osb->commit_cache_exec = false; + } + + { + length = osb->sect_size; + offset = DlmReqMaster->lock_id; + + status = + ocfs_get_file_entry (osb, &TempEntry, + DlmReqMaster->lock_id); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + if (DISK_LOCK_FILE_LOCK (TempEntry) > + OCFS_DLM_NO_LOCK) { + DISK_LOCK_FILE_LOCK (TempEntry) = + OCFS_DLM_NO_LOCK; + + status = + ocfs_write_force_disk (osb, TempEntry, + length, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + LockResource->lock_type = + OCFS_DLM_NO_LOCK; + } + + if (TempEntry) { + ocfs_safefree (TempEntry); + TempEntry = NULL; + } + } + + ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_OK, false); + ocfs_release_lockres (LockResource); + LOG_TRACE_STR ("Exiting for FLAG_FILE_RELEASE_CACHE"); + status = 0; + goto finally; + } + LOG_TRACE_ARGS ("Exiting for FLAG_FILE_RELEASE_CACHE"); + } + /* end cache stf */ + + if (flags & FLAG_FILE_UPDATE_OIN) { + ocfs_inode *oin; + + LOG_TRACE_ARGS + ("Got update oin from node (%u) for Lock 0x%08x.0x%08x, " + "Seq %u.%u\n", DlmMesg->src_node, + HI (DlmReqMaster->lock_id), LO (DlmReqMaster->lock_id), + HI (DlmReqMaster->lock_seq_num), + LO (DlmReqMaster->lock_seq_num)); + + /* Set the verify oin flag on the oin....??? */ + /* Assumption is that we have Lock resource or oin lock */ + + if (LockResource->oin != NULL) { + oin = LockResource->oin; + + ocfs_down_sem (&(oin->main_res), true); + acq_oin = true; + + /* Get the main resource too */ + UPDATE_OIN (LockResource->oin); + + if (acq_oin) { + ocfs_up_sem (&(oin->main_res)); + acq_oin = false; + } + } + ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_OK, false); + ocfs_release_lockres (LockResource); + goto finally; + } + + /* If there is a masternode and it is alive ask the node */ + /* asking for vote to update its state */ + if (LockResource->master_node_num != OCFS_INVALID_NODE_NUM) { + if (LockResource->master_node_num == osb->node_num) { + if (flags & FLAG_CHANGE_MASTER) { + ub8 tmp = DlmReqMaster->lock_id; + + ocfs_commit_cache (osb, true); + status = + ocfs_get_file_entry (osb, &FileEntry, + DlmReqMaster->lock_id); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + LOG_TRACE_ARGS + ("Got change Master from Node (%u) for " + "Lock 0x%08x.0x%08x, Seq %u.%u\n", + NodeAskingVote, HI (DlmReqMaster->lock_id), + LO (DlmReqMaster->lock_id), + HI (DlmReqMaster->lock_seq_num), + LO (DlmReqMaster->lock_seq_num)); + + if (LockResource->oin) { + DISK_LOCK_OIN_MAP (FileEntry) |= + (1 << osb->node_num); + } + + DISK_LOCK_CURRENT_MASTER (FileEntry) = + NodeAskingVote; + + /* Write new master on the disk */ + status = + ocfs_write_disk (osb, FileEntry, + osb->sect_size, tmp); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + LockResource->master_node_num = NodeAskingVote; + ocfs_release_lockres (LockResource); + ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_OK, + false); + goto finally; + } else if (flags & FLAG_ADD_OIN_MAP) { + ub8 tmp = DlmReqMaster->lock_id; + + status = + ocfs_get_file_entry (osb, &FileEntry, + DlmReqMaster->lock_id); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + LOG_TRACE_ARGS + ("Got add oin map from Node (%x) for " + "Lock 0x%08x.0x%08x, Seq %u.%u\n", + NodeAskingVote, HI (DlmReqMaster->lock_id), + LO (DlmReqMaster->lock_id), + HI (DlmReqMaster->lock_seq_num), + LO (DlmReqMaster->lock_seq_num)); + + if ((FileEntry-> + sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) + || + (!(FileEntry-> + sync_flags & OCFS_SYNC_FLAG_VALID))) { + ocfs_release_lockres (LockResource); + ocfs_send_vote_reply (osb, DlmMesg, + OCFS_DLM_VOTE_FILE_DEL, + false); + goto finally; + } else { + DISK_LOCK_OIN_MAP (FileEntry) |= + (1 << NodeAskingVote); + + /* Write new map on the disk */ + status = + ocfs_write_disk (osb, FileEntry, + osb->sect_size, tmp); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* Add this node to the oin map on the file entry */ + LockResource->oin_openmap = + DISK_LOCK_OIN_MAP (FileEntry); + ocfs_release_lockres (LockResource); + ocfs_send_vote_reply (osb, DlmMesg, + OCFS_DLM_VOTE_OK, false); + goto finally; + } + } + } else { + if (IS_NODE_ALIVE + (osb->publ_map, LockResource->master_node_num, + OCFS_MAXIMUM_NODES)) { + LOG_TRACE_ARGS + ("Got Master Req from Node (%u) for " + "Lock 0x%08x.0x%08x, Seq %u.%u, Currmaster is (%u)\n", + NodeAskingVote, HI (DlmReqMaster->lock_id), + LO (DlmReqMaster->lock_id), + HI (DlmReqMaster->lock_seq_num), + LO (DlmReqMaster->lock_seq_num), + LockResource->master_node_num); + + /* We have no business voting on this lock */ + ocfs_send_vote_reply (osb, DlmMesg, + OCFS_DLM_VOTE_UPDATE_RETRY, + false); + } else { + bool OpenHandle = false; + + /* Master Node is dead and a vote is needed to create a new master */ + LOG_TRACE_ARGS + ("Got Master Req from Node (%u) for Lock " + "0x%08x.0x%08x, Seq %u.%u, Current master is dead\n", + NodeAskingVote, HI (DlmReqMaster->lock_id), + LO (DlmReqMaster->lock_id), + HI (DlmReqMaster->lock_seq_num), + LO (DlmReqMaster->lock_seq_num)); + + if ((LockResource-> + lock_state & LOCK_STATE_IN_VOTING) + && (NodeAskingVote < osb->node_num)) { + /* If our node number is > his we win so send a mesg to */ + /* him to retry */ + ocfs_send_vote_reply (osb, DlmMesg, + OCFS_DLM_VOTE_UPDATE_RETRY, + OpenHandle); + goto finally; + } else { + if ((!(flags & FLAG_DIR)) + && ((flags & FLAG_FILE_EXTEND) + || (flags & FLAG_FILE_UPDATE))) { + if (LockResource->oin) { + OpenHandle = true; + } + } + ocfs_send_vote_reply (osb, DlmMesg, + OCFS_DLM_VOTE_UPDATE_RETRY, + OpenHandle); + } + } + } + } else { + bool OpenHandle = false; + + if ((LockResource->lock_state & LOCK_STATE_IN_VOTING) && + (NodeAskingVote < osb->node_num)) { + /* If our node number is > his we win so send a mesg to him to retry */ + ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_UPDATE_RETRY, + OpenHandle); + goto finally; + } else { + /* Vote for the node */ + LOG_TRACE_ARGS + ("Got Master Req from Node (%u) for Lock " + "0x%08x.0x%08x, Seq %u.%u, No current master\n", + NodeAskingVote, HI (DlmReqMaster->lock_id), + LO (DlmReqMaster->lock_id), + HI (DlmReqMaster->lock_seq_num), + LO (DlmReqMaster->lock_seq_num)); + + if ((!(flags & FLAG_DIR)) + && ((flags & FLAG_FILE_EXTEND) + || (flags & FLAG_FILE_UPDATE))) { + if (LockResource->oin) { + OpenHandle = true; + } + } + + LOG_TRACE_ARGS + ("Voting for Node (%u) for Lock 0x%08x.0x%08x, " + "Seq %u.%u, Vote %d, Openhandle %d\n", + NodeAskingVote, HI (DlmReqMaster->lock_id), + LO (DlmReqMaster->lock_id), + HI (DlmReqMaster->lock_seq_num), + LO (DlmReqMaster->lock_seq_num), OCFS_DLM_VOTE_OK, + OpenHandle); + + ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_OK, + OpenHandle); + goto finally; + } + } + + finally: + if (FileEntry) + ocfs_release_file_entry (FileEntry); + + if (LockResource) + ocfs_release_lockres (LockResource); + + LOG_TRACE_ARGS + ("Exited from Node (%u) for Lock 0x%08x.0x%08x, Seq %u.%u\n", + NodeAskingVote, HI (DlmReqMaster->lock_id), + LO (DlmReqMaster->lock_id), HI (DlmReqMaster->lock_seq_num), + LO (DlmReqMaster->lock_seq_num)); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_comm_process_vote */ + +/* + * ocfs_comm_process_vote_reply() + * + */ +int ocfs_comm_process_vote_reply (ocfs_super * osb, ocfs_dlm_msg * dlm_msg) +{ + int status = 0; + ocfs_lock_res *lockres = NULL; + ocfs_dlm_reply_master *reply; + + LOG_ENTRY (); + + reply = (ocfs_dlm_reply_master *) dlm_msg->msg_buf; + + /* Search for the resource on the recv list and */ + /* based on the vote do appropriate work */ + status = ocfs_find_lockres (osb, reply->lock_id, &lockres); + if (status < 0) + return (status); + + if ((lockres->lock_state & LOCK_STATE_IN_VOTING) && + (lockres->last_upd_seq_num == reply->lock_seq_num)) { + if (reply->status == OCFS_DLM_VOTE_OK) { + /* Accumalate all the votes... */ + lockres->vote_status = OCFS_DLM_VOTE_OK; + lockres->got_vote_map |= (1 << dlm_msg->src_node); + + LOG_TRACE_ARGS ("OCFS_DLM_VOTE_OK Src Node %u\n", + dlm_msg->src_node); + + if (lockres->got_vote_map == lockres->req_vote_map) { + LOG_TRACE_ARGS ("OCFS_DLM_VOTE_OK Vote from " \ + "all Node (%u.%u)\n", + lockres->got_vote_map); + lockres->vote_status = 0; + wake_up ((wait_queue_head_t *) lockres->voted_event); + } + } else { + LOG_TRACE_ARGS ("OCFS_DLM_VOTE_NOT OK status (0x%08x\n", + reply->status); + /* switch on status and determine the action to take */ + lockres->vote_status = reply->status; + wake_up ((wait_queue_head_t *) lockres->voted_event); + } + } else { + OCFS_BREAKPOINT (); + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_comm_process_vote_reply */ + +/* + * ocfs_dlm_recv_msg() + * + */ +void ocfs_dlm_recv_msg (void *Arg) +{ + ocfs_recv_context *recv_ctxt; + ub1 *DlmPacket; + ub4 freeIndex; + sb4 numPosted; + ocfs_recv_comp_context *recv_comp_ctxt = (ocfs_recv_comp_context *) Arg; + + LOG_ENTRY (); + + atomic_dec (&(recv_comp_ctxt->recv_ctxt->num_posted)); + numPosted = atomic_read (&(recv_comp_ctxt->recv_ctxt->num_posted)); + + if (numPosted < OCFS_LOW_MARK_UDP) + wake_up (recv_comp_ctxt->recv_ctxt->event); + + DlmPacket = (ub1 *) + recv_comp_ctxt->recv_ctxt->recv_packet[recv_comp_ctxt->index]; + + if (recv_comp_ctxt->status >= 0) { + if (ocfs_check_ipc_msg (DlmPacket, recv_comp_ctxt->recvd_len)) + ocfs_comm_process_msg (DlmPacket); + else + LOG_ERROR_STR ("Received an Invalid Packet"); + } + + recv_ctxt = recv_comp_ctxt->recv_ctxt; + freeIndex = recv_comp_ctxt->index; + + ocfs_down_sem (recv_ctxt->free_lock, true); + recv_ctxt->free[freeIndex] = true; + recv_ctxt->num_used--; + ocfs_up_sem (recv_ctxt->free_lock); + + ocfs_safefree (recv_comp_ctxt->work_item); + ocfs_safefree (recv_comp_ctxt); + + LOG_EXIT (); + return; +} /* ocfs_dlm_recv_msg */ + +/* + * ocfs_comm_process_msg() + * + */ +int ocfs_comm_process_msg (ub1 * Mesg) +{ + int status = 0; + ocfs_super *osb; + ocfs_dlm_msg *DlmMesg; + + LOG_ENTRY (); + + DlmMesg = (ocfs_dlm_msg *) Mesg; + + ocfs_find_osb (DlmMesg->vol_id, &osb); + + if (osb == NULL) + goto bail; + + /* Record the fact that we received a message from a particular node */ + /* so that we know which node is sending on which comm channel... */ + + switch (DlmMesg->msg_type) { + case OCFS_REQUEST_MAKE_MASTER: + LOG_TRACE_STR ("Called OCFS_REQUEST_MAKE_MASTER"); + ocfs_comm_process_vote (osb, DlmMesg); + break; + case OCFS_DISK_VOTE_REQUEST: + LOG_TRACE_STR ("Called OCFS_DISK_VOTE_REQUEST"); + ocfs_comm_process_vote (osb, DlmMesg); + break; + + case OCFS_REPLY_MAKE_MASTER: + LOG_TRACE_STR ("Called OCFS_REPLY_MAKE_MASTER"); + ocfs_comm_process_vote_reply (osb, DlmMesg); + break; + case OCFS_DISK_VOTE_REPLY: + LOG_TRACE_STR ("Called OCFS_DISK_VOTE_REPLY"); + ocfs_comm_process_vote_reply (osb, DlmMesg); + break; + + default: + break; + } + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_comm_process_msg */ diff -urNp ocfs/fs/ocfs/Common/ocfsver.c 2.4.20pre5aa2/fs/ocfs/Common/ocfsver.c --- ocfs/fs/ocfs/Common/ocfsver.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Common/ocfsver.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,11 @@ +#include + +#include + +#define OCFS_VERSION_STR "2" +#define OCFS_BUILD_STR "14225afeef205e279b9191562d9df555" + +void ocfs_version_print (void) +{ + printk(KERN_INFO "Oracle Cluster FileSystem 2 (build 14225afeef205e279b9191562d9df555)"); +} diff -urNp ocfs/fs/ocfs/Linux/inc/ocfsbitmap.h 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsbitmap.h --- ocfs/fs/ocfs/Linux/inc/ocfsbitmap.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsbitmap.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,39 @@ +/* + * ocfsbitmap.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSBITMAP_H_ +#define _OCFSBITMAP_H_ + +void ocfs_initialize_bitmap (ocfs_alloc_bm * bitmap, void *buf, ub4 sz); +int ocfs_find_clear_bits (ocfs_alloc_bm * bitmap, ub4 numBits, ub4 offset, ub4 sysonly); +int ocfs_count_bits (ocfs_alloc_bm * bitmap); + +#if defined(__KERNEL__) +void ocfs_set_bits (ocfs_alloc_bm * bitmap, ub4 start, ub4 num); +void ocfs_clear_bits (ocfs_alloc_bm * bitmap, ub4 start, ub4 num); +#endif + +#endif /* _OCFSBITMAP_H_ */ diff -urNp ocfs/fs/ocfs/Linux/inc/ocfsclose.h 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsclose.h --- ocfs/fs/ocfs/Linux/inc/ocfsclose.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsclose.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,32 @@ +/* + * ocfsclose.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSCLOSE_H_ +#define _OCFSCLOSE_H_ + +int ocfs_common_close (struct inode *inode, struct file *file); + +#endif /* _OCFSCLOSE_H_ */ diff -urNp ocfs/fs/ocfs/Linux/inc/ocfsconf.h 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsconf.h --- ocfs/fs/ocfs/Linux/inc/ocfsconf.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsconf.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,95 @@ +/* + * ocfsconf.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +/* Format of the ocfs.conf file: + * + * # This is a comment + * + * ipcdlm: + * ip_address = + * ip_port = + * subnet_mask = + * type = udp + * hostname = + * active = yes + * + * ipcdlm: + * ip_address = + * ip_port = + * subnet_mask = + * type = udp + * hostname = + * active = no + * + * ipcdlm: + * ip_address = + * ip_port = + * subnet_mask = + * type = udp + * hostname = + * active = no + */ + +#ifndef _OCFSCONF_H_ +#define _OCFSCONF_H_ + +typedef enum _ocfs_conf_toktypes +{ + T_UNKNOWN, T_COMMENT, T_PARAMETER, T_COLON, T_EQUAL +} +ocfs_conf_toktypes; + +typedef enum _ocfs_conf_tokstates +{ + S_BEG, S_SECTION, S_IGNORE +} +ocfs_conf_tokstates; + +typedef struct _ocfs_conf_process +{ + char *section; + void *(*sectionfn) (void *sect, char *lhv, int lhvlen, char *rhv, + int rhvlen); +} +ocfs_conf_process; + +ocfs_conf_toktypes ocfs_get_next_token (char *buf, + int eol, + char **tokstr, + int *toklen, + char **newbuf); + +int ocfs_read_conf (krnl_file * fp); + +void *ocfs_conf_ipc_dlm (void *sect, + char *lhv, + int lhvlen, + char *rhv, + int rhvlen); + +int ocfs_read_node_info (void); + +#endif /* _OCFSCONF_H_ */ diff -urNp ocfs/fs/ocfs/Linux/inc/ocfsdlmp.h 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsdlmp.h --- ocfs/fs/ocfs/Linux/inc/ocfsdlmp.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsdlmp.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,36 @@ +/* + * ocfsdlmp.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSDLMP_H_ +#define _OCFSDLMP_H_ + +int ocfs_insert_sector_node (ocfs_super * osb, ocfs_lock_res * lock_res); + +int ocfs_lookup_sector_node (ocfs_super * osb, ub8 lock_id, ocfs_lock_res ** lock_res); + +int ocfs_volume_thread (void *arg); + +#endif /* _OCFSDLMP_H_ */ diff -urNp ocfs/fs/ocfs/Linux/inc/ocfsfile.h 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsfile.h --- ocfs/fs/ocfs/Linux/inc/ocfsfile.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsfile.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,36 @@ +/* + * ocfsfile.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSFILE_H_ +#define _OCFSFILE_H_ + +int ocfs_set_disposition_information (struct inode *dir, struct dentry *dentry); + +int ocfs_set_rename_information (struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); + +#endif /* _OCFSFILE_H_ */ diff -urNp ocfs/fs/ocfs/Linux/inc/ocfsfilp.h 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsfilp.h --- ocfs/fs/ocfs/Linux/inc/ocfsfilp.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsfilp.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,49 @@ +/* + * ocfsfilp.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSFILP_H_ +#define _OCFSFILP_H_ + +#define BUFFER_ALIGN 512 + +typedef struct _krnl_file +{ + struct file *filp; /* file ptr */ + char *buf; /* read buffer */ + int bufsz; /* size of buffer */ + int readsz; /* size read */ + int loc; /* processed index */ + int eof; /* 1 when EOF, else 0 */ +} +krnl_file; + +krnl_file *ocfs_open_file (const char *pathname, int flags, mode_t mode); + +void ocfs_close_file (krnl_file * fp); + +int ocfs_read_file (krnl_file * fp, char *s, int size); + +#endif /* _OCFSFILP_H_ */ diff -urNp ocfs/fs/ocfs/Linux/inc/ocfshash.h 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfshash.h --- ocfs/fs/ocfs/Linux/inc/ocfshash.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfshash.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,135 @@ +/* + * ocfshash.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSHASH_H_ +#define _OCFSHASH_H_ + +/* Data structures */ +typedef struct _HASHBUCKET +{ + void *key; + ub4 keylen; + void *val; + ub4 vallen; + struct _HASHBUCKET *next; +} +HASHBUCKET; + +typedef struct +{ + ub4 size; + ub4 mask; + ub4 entries; + ub4 inithash; + ub4 newbuckets; /* Used for statistics */ + ub4 reusedbuckets; /* Used for statistics */ + ocfs_sem hashlock; + HASHBUCKET *lastfree; + HASHBUCKET *freelist; + HASHBUCKET *buckets; +} +HASHTABLE; + +/* Function prototypes */ +int ocfs_hash_create (HASHTABLE * ht, ub4 noofbits); + +void ocfs_hash_destroy (HASHTABLE * ht, void (*freefn) (const void *p)); + +int ocfs_hash_add (HASHTABLE * ht, void *key, ub4 keylen, void *val, ub4 vallen); + +int ocfs_hash_del (HASHTABLE * ht, void *key, ub4 keylen); + +int ocfs_hash_get (HASHTABLE * ht, void *key, ub4 keylen, void **val, ub4 * vallen); + +void ocfs_hash_stat (HASHTABLE * ht, char *data, ub4 datalen); + +#define HASHSTAT_BUFLEN 2048 + +#define hashsize(n) ((ub4)1<<(n)) +#define hashmask(n) (hashsize(n)-1) + +#define HASHTABLE_DESTROYED(h) (((HASHTABLE *)h)->buckets==NULL) + +/* + * -------------------------------------------------------------------- + * mix -- mix 3 32-bit values reversibly. + * For every delta with one or two bits set, and the deltas of all three + * high bits or all three low bits, whether the original value of a,b,c + * is almost all zero or is uniformly distributed. + * If mix() is run forward or backward, at least 32 bits in a,b,c + * have at least 1/4 probability of changing. + * If mix() is run forward, every bit of c will change between 1/3 and + * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) + * mix() takes 36 machine instructions, but only 18 cycles on a superscalar + * machine (like a Pentium or a Sparc). No faster mixer seems to work, + * that's the result of my brute-force search. There were about 2^^68 + * hashes to choose from. I only tested about a billion of those. + * -------------------------------------------------------------------- + * */ +#define mix(a,b,c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} + +/* + * -------------------------------------------------------------------- + * hash() -- hash a variable-length key into a 32-bit value + * k : the key (the unaligned variable-length array of bytes) + * len : the length of the key, counting by bytes + * initval : can be any 4-byte value + * + * Returns a 32-bit value. Every bit of the key affects every bit of + * the return value. Every 1-bit and 2-bit delta achieves avalanche. + * About 6*len+35 instructions. + * + * The best hash table sizes are powers of 2. There is no need to do + * mod a prime (mod is sooo slow!). If you need less than 32 bits, + * use a bitmask. For example, if you need only 10 bits, do + * h = (h & hashmask(10)); + * In which case, the hash table should have hashsize(10) elements. + * + * If you are hashing n strings (ub1 **)k, do it like this: + * for (i=0, h=0; i= LinuxVersionCode(2,4,10) +static int ocfs_direct_IO (int rw, + struct inode *inode, + struct kiobuf *iobuf, unsigned long blocknr, int blocksize); +#endif + +#endif /* OCFSMAIN_PRIVATE_DECLS */ + +#endif /* _OCFSMAIN_H_ */ diff -urNp ocfs/fs/ocfs/Linux/inc/ocfsmount.h 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsmount.h --- ocfs/fs/ocfs/Linux/inc/ocfsmount.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsmount.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,36 @@ +/* + * ocfsmount.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSMOUNT_H_ +#define _OCFSMOUNT_H_ + +int ocfs_read_disk_header (ub1 ** buffer, struct super_block *sb); + +int ocfs_mount_volume (struct super_block *sb); + +int ocfs_dismount_volume (struct super_block *sb); + +#endif /* _OCFSMOUNT_H_ */ diff -urNp ocfs/fs/ocfs/Linux/inc/ocfsport.h 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsport.h --- ocfs/fs/ocfs/Linux/inc/ocfsport.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsport.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,83 @@ +/* + * ocfsport.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSPORT_H_ +#define _OCFSPORT_H_ + +void ocfs_init_sem (ocfs_sem * res); + +bool ocfs_down_sem (ocfs_sem * res, bool wait); + +void ocfs_up_sem (ocfs_sem * res); + +int ocfs_del_sem (ocfs_sem * res); + +int ocfs_wait (void *Object, bool Alertable, ub4 Timeout); + +void ocfs_daemonize (char *name); + +bool ocfs_get_task (pid_t pid, struct task_struct **task); + +int ocfs_sleep (ub4 ms); + +void ocfs_print_qstr (struct qstr *x); + +void ocfs_extent_map_init (ocfs_extent_map * map); + +void ocfs_extent_map_destroy (ocfs_extent_map * map); + +ub4 ocfs_extent_map_get_count (ocfs_extent_map * map); + +bool ocfs_extent_map_add (ocfs_extent_map * map, + sb8 virtual, sb8 physical, sb8 sectorcount); + +void ocfs_extent_map_remove (ocfs_extent_map * map, sb8 virtual, sb8 sectorcount); + +bool ocfs_extent_map_lookup (ocfs_extent_map * map, sb8 virtual, + sb8 * physical, sb8 * sectorcount, ub4 * index); + +bool ocfs_extent_map_next_entry (ocfs_extent_map * map, ub4 runindex, + sb8 * virtual, sb8 * physical, sb8 * sectorcount); + +void *ocfs_linux_dbg_alloc (int Size, char *file, int line); + +void ocfs_linux_dbg_free (const void *Buffer); + +bool ocfs_linux_get_inode_offset (struct inode *inode, ub8 * off, + ocfs_inode ** oin); + +bool ocfs_linux_get_dir_entry_offset (ocfs_super * osb, + ub8 * off, + ub8 parentOff, + struct qstr * fileName, + ocfs_file_entry ** fileEntry); + +void ocfs_flush_cache (ocfs_super * osb); + +bool ocfs_purge_cache_section (ocfs_inode * oin, ub8 * file_off, ub4 Length); + + +#endif /* _OCFSPORT_H_ */ diff -urNp ocfs/fs/ocfs/Linux/inc/ocfsproc.h 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsproc.h --- ocfs/fs/ocfs/Linux/inc/ocfsproc.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/inc/ocfsproc.h Fri Sep 6 01:46:16 2002 @@ -0,0 +1,66 @@ +/* + * ocfsproc.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSPROC_H_ +#define _OCFSPROC_H_ + +int ocfs_proc_init (void); + +void ocfs_proc_deinit (void); + +void ocfs_proc_add_volume (ocfs_super * osb); + +void ocfs_proc_remove_volume (ocfs_super * osb); + +#ifdef OCFSPROC_PRIVATE_DECLS +static int ocfs_proc_calc_metrics (char *page, + char **start, off_t off, int count, int *eof, int len); + +#ifdef OCFS_LINUX_MEM_DEBUG +static int ocfs_proc_memallocs (char *page, + char **start, off_t off, int count, int *eof, void *data); +#endif + +static int ocfs_proc_version (char *page, + char **start, off_t off, int count, int *eof, void *data); + +static int ocfs_proc_nodenum (char *page, + char **start, off_t off, int count, int *eof, void *data); + +static int ocfs_proc_nodename (char *page, + char **start, off_t off, int count, int *eof, void *data); + +static int ocfs_proc_mountpoint (char *page, + char **start, off_t off, int count, int *eof, void *data); + +static int ocfs_proc_statistics (char *page, + char **start, off_t off, int count, int *eof, void *data); + +static int ocfs_proc_hash_stats (char *page, + char **start, off_t off, int count, int *eof, void *data); +#endif /* OCFSPROC_PRIVATE_DECLS */ + +#endif /* _OCFSPROC_H_ */ diff -urNp ocfs/fs/ocfs/Linux/ocfsbitmap.c 2.4.20pre5aa2/fs/ocfs/Linux/ocfsbitmap.c --- ocfs/fs/ocfs/Linux/ocfsbitmap.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/ocfsbitmap.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,160 @@ +/* + * ocfsbitmap.c + * + * Bitmap infrastructure code + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#if defined(__KERNEL__) +#include +extern struct list_head item_list; +#else +#include +#include +#endif + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_PORT + +#define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255) +#define BX_(x) ((x) - (((x)>>1)&0x77777777) \ + - (((x)>>2)&0x33333333) \ + - (((x)>>3)&0x11111111)) + +/* + * ocfs_initialize_bitmap() + * + */ +void ocfs_initialize_bitmap (ocfs_alloc_bm * bitmap, void *buf, ub4 sz) +{ + LOG_ENTRY (); + + bitmap->buf = buf; + bitmap->size = sz; + bitmap->failed = 0; + bitmap->ok_retries = 0; + + LOG_EXIT (); + return; +} /* ocfs_initialize_bitmap */ + +/* + * ocfs_find_clear_bits() + * + * sysonly is passed # bits in bitmap that are rserved for system file space in case we have + * a disk full. + */ +int ocfs_find_clear_bits (ocfs_alloc_bm * bitmap, ub4 numBits, ub4 offset, ub4 sysonly) +{ + ub4 next_zero, off, count, size, first_zero = -1; + void *buffer; + + LOG_ENTRY (); + + buffer = bitmap->buf; + size = bitmap->size - sysonly; + count = 0; + off = offset; + + while ((size - off + count >= numBits) && (next_zero = find_next_zero_bit (buffer, size, off)) != size) { + if (next_zero != off) { + first_zero = next_zero; + off = next_zero + 1; + count = 0; + } else { + off++; + if (count == 0) + first_zero = next_zero; + } + + count++; + + if (count == numBits) { + goto bail; + } + } + first_zero = -1; + + bail: + LOG_EXIT_LONG (first_zero); + return first_zero; +} /* ocfs_find_clear_bits */ + +/* + * ocfs_count_bits() + * + */ +int ocfs_count_bits (ocfs_alloc_bm * bitmap) +{ + ub4 size, count = 0, off = 0; + unsigned char tmp; + ub1 *buffer; + + buffer = bitmap->buf; + + LOG_ENTRY (); + + size = (bitmap->size >> 3); + + while (off < size) { + memcpy (&tmp, buffer, 1); + count += BITCOUNT (tmp); + off++; + buffer++; + } + + LOG_EXIT_ULONG (count); + return count; +} /* ocfs_count_bits */ + +#ifdef __KERNEL__ +/* + * ocfs_set_bits() + * + */ +void ocfs_set_bits (ocfs_alloc_bm * bitmap, ub4 start, ub4 num) +{ + LOG_ENTRY (); + + while (num--) + set_bit (start++, bitmap->buf); + + LOG_EXIT (); + return; +} /* ocfs_set_bits */ + +/* + * ocfs_clear_bits() + * + */ +void ocfs_clear_bits (ocfs_alloc_bm * bitmap, ub4 start, ub4 num) +{ + LOG_ENTRY (); + + while (num--) + clear_bit (start++, bitmap->buf); + + LOG_EXIT (); + return; +} /* ocfs_clear_bits */ + +#endif /* __KERNEL */ diff -urNp ocfs/fs/ocfs/Linux/ocfsconf.c 2.4.20pre5aa2/fs/ocfs/Linux/ocfsconf.c --- ocfs/fs/ocfs/Linux/ocfsconf.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/ocfsconf.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,269 @@ +/* + * ocfsconf.c + * + * Reads the config file ocfs.conf. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_VOLCFG + +ocfs_conf_process confprocs[] = { {"ipcdlm", ocfs_conf_ipc_dlm}, + {NULL, NULL} }; + +/* + * ocfs_get_next_token() + * + * @buf: string to extract the token from + * @eol: if 1, returns string till eol as one token + * @tokstr: returns ptr to the first char of the token + * @toklen: token length + * @newbuf: returns ptr to the string still to be processed + * + * Returns the next token with the token type. For parameter values, set + * eol to 1, as the value is in free-format. + */ +ocfs_conf_toktypes ocfs_get_next_token (char *buf, int eol, char **tokstr, int *toklen, + char **newbuf) +{ + ocfs_conf_toktypes toktyp; + char *p; + + p = buf; + + while (isspace (*p)) + ++p; + + if (eol) { + *tokstr = p; + *toklen = strlen (p); + toktyp = T_UNKNOWN; + } else { + switch (*p) { + case '#': + *tokstr = p; + *toklen = strlen (p); + toktyp = T_COMMENT; + break; + + case ':': + *tokstr = p; + *toklen = 1; + toktyp = T_COLON; + break; + + case '=': + *tokstr = p; + *toklen = 1; + toktyp = T_EQUAL; + break; + + default: + if (isalnum (*p)) { + *tokstr = p; + while ((isalnum (*p) || *p == '_') && + *p != '\0') + ++p; + *toklen = p - *tokstr; + toktyp = T_PARAMETER; + } else { + *tokstr = p; + *toklen = strlen (p); + toktyp = T_UNKNOWN; + } + break; + } + } + + if (newbuf) + *newbuf = *tokstr + *toklen; + + return toktyp; +} /* ocfs_get_next_token */ + +/* + * ocfs_read_conf() + * + * Reads the ocfs.conf file and passes the various parameter-value pairs + * to the section function. The specific section function processes the + * parameter-value pairs. + * + * Returns 1 on success, 0 on error. + */ +int ocfs_read_conf (krnl_file * fp) +{ + ocfs_conf_tokstates tokstate; + ocfs_conf_toktypes toktype; + char *tokstr; + int toklen; + char buf[1000]; + char tmpbuf[1000]; + int tmplen; + int len; + char *p; + void *sect; + void *(*sectionfn) (void *sect, char *lhv, int lhvlen, char *rhv, + int rhvlen); + + tokstate = S_BEG; + sectionfn = NULL; + sect = NULL; + + while ((len = ocfs_read_file (fp, buf, sizeof (buf))) > 0) { + buf[len - 1] = '\0'; + toktype = ocfs_get_next_token (buf, 0, &tokstr, &toklen, &p); + + switch (toktype) { + case T_COMMENT: + break; + + case T_PARAMETER: + strncpy (tmpbuf, tokstr, toklen), tmpbuf[toklen] = + '\0'; + tmplen = toklen; + + toktype = + ocfs_get_next_token (p, 0, &tokstr, &toklen, &p); + if (toktype == T_COLON) { + int i; + + for (i = 0; confprocs[i].section; ++i) { + if (!strnicmp + (confprocs[i].section, tmpbuf, + tmplen)) { + sectionfn = + confprocs[i].sectionfn; + sect = + sectionfn (NULL, NULL, 0, NULL, 0); + break; + } + } + + if (sectionfn && sect) + tokstate = S_SECTION; + else + tokstate = S_IGNORE; + } else if (toktype == T_EQUAL) { + if (tokstate == S_SECTION) { + ocfs_get_next_token (p, 1, &tokstr, &toklen, &p); + if (toklen) + sectionfn (sect, tmpbuf, tmplen, tokstr, toklen); + } + } + break; + + default: + break; + } + } + + if (len < 0) { + LOG_ERROR_ARGS ("errno=%d\n", len); + return 0; + } else + return 1; +} /* ocfs_read_conf */ + +/* + * ocfs_conf_ipc_dlm() + * + * @sect: NULL = new section, NOTNULL = address of section + * @lhv: parameter name + * @lhvlen: parameter name len + * @rhv: value + * @rhvlen: value len + * + * The conf function handler for the ipcdlm section. + */ +void *ocfs_conf_ipc_dlm (void *sect, char *lhv, int lhvlen, char *rhv, int rhvlen) +{ + ocfs_comm_info *comminfo; + char tmpstr[255]; + + if (sect) { + /* Add the parameter to the section */ + comminfo = (ocfs_comm_info *) sect; + strncpy (tmpstr, rhv, rhvlen); + tmpstr[rhvlen] = '\0'; + + if (!strnicmp (lhv, OCFS_IP_ADDR, lhvlen)) { + comminfo->addr = in_aton (tmpstr); + } else if (!strnicmp (lhv, OCFS_IP_PORT, lhvlen)) { + comminfo->port = simple_strtol (tmpstr, NULL, 10); + } else if (!strnicmp (lhv, OCFS_IP_MASK, lhvlen)) { + comminfo->mask = in_aton (tmpstr); + } else if (!strnicmp (lhv, OCFS_IP_HOST, lhvlen)) { + + } else if (!strnicmp (lhv, OCFS_COMM_TYPE, lhvlen)) { + if (!strnicmp (tmpstr, "udp", rhvlen)) + comminfo->type = 1; /* TODO */ + } else if (!strnicmp (lhv, OCFS_COMM_ACTIVE, lhvlen)) { + if (!strnicmp (tmpstr, "yes", rhvlen)) + comminfo->active = 1; + else + comminfo->active = 0; + comminfo->valid = 1; + } + } else { + /* Create a new section */ + comminfo = &(OcfsGlobalCtxt.comm_info[OcfsGlobalCtxt.num_ipc]); + ++OcfsGlobalCtxt.num_ipc; + memset (comminfo, 0, sizeof (ocfs_comm_info)); + } + + return (void *) comminfo; +} /* ocfs_conf_ipc_dlm */ + +/* + * ocfs_read_node_info() + * + */ +int ocfs_read_node_info (void) +{ + int status = 0; + krnl_file *fp; + + LOG_ENTRY (); + + OcfsGlobalCtxt.num_ipc = 0; + + /* Open the file */ + fp = ocfs_open_file (OCFSCONF_FILE, O_RDONLY, 0600); + if (!fp) { + LOG_ERROR_ARGS ("errno=%d, file=%s\n", errno, OCFSCONF_FILE); + status = -EFAIL; + goto bail; + } + + /* Read the file */ + if (!ocfs_read_conf (fp)) + status = -EFAIL; + + /* Close the file */ + ocfs_close_file (fp); + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_read_node_info */ diff -urNp ocfs/fs/ocfs/Linux/ocfsdlm.c 2.4.20pre5aa2/fs/ocfs/Linux/ocfsdlm.c --- ocfs/fs/ocfs/Linux/ocfsdlm.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/ocfsdlm.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,138 @@ +/* + * ocfsdlm.c + * + * Allows one dlm thread per mounted volume instead of one + * for all volumes. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_DLM + +/* + * ocfs_insert_sector_node() + * + */ +int ocfs_insert_sector_node (ocfs_super * osb, ocfs_lock_res * lock_res) +{ + int status = 0; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", osb, lock_res); + + if (HASHTABLE_DESTROYED (&(osb->root_sect_node))) { + LOG_ERROR_STR ("root_sect_node has already been destroyed!"); + status = -EFAIL; + goto bail; + } + + if (!ocfs_hash_add (&(osb->root_sect_node), &(lock_res->sector_num), + sizeof (ub8), lock_res, sizeof (ocfs_lock_res *))) { + LOG_ERROR_STATUS(status = -EFAIL); + goto bail; + } + + LOG_TRACE_ARGS ("HashAdd: %u.%u, 0x%08x\n", HI (lock_res->sector_num), + LO (lock_res->sector_num), lock_res); + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_insert_sector_node */ + +/* + * ocfs_lookup_sector_node() + * + */ +int ocfs_lookup_sector_node (ocfs_super * osb, ub8 lock_id, ocfs_lock_res ** lock_res) +{ + int status = -ENOENT; + ub4 len = 0; + + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, 0x%08x)\n", osb, HI (lock_id), + LO (lock_id), lock_res); + + if (HASHTABLE_DESTROYED (&(osb->root_sect_node))) { + LOG_ERROR_STR ("root_sect_node has already been destroyed!"); + status = -EFAIL; + goto bail; + } + + if (ocfs_hash_get (&(osb->root_sect_node), &(lock_id), sizeof (ub8), + (void **) lock_res, &len)) { + status = ((len == sizeof (ocfs_lock_res *)) ? 0 : -ENOENT); + LOG_TRACE_ARGS ("HashGet: %u.%u, 0x%08x\n", HI (lock_id), + LO (lock_id), *lock_res); + } + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_lookup_sector_node */ + +#if defined(DLM_THREAD_PER_VOLUME) +/* + * ocfs_volume_thread() + * + * Called by OcfsMountVolume(). This function is executed as a kernel thread + * for each mounted ocfs volume. + */ +int ocfs_volume_thread (void *arg) +{ + ocfs_super *osb; + char proc[100]; + int status = 0; + + LOG_ENTRY (); + + osb = (ocfs_super *) arg; + + sprintf (proc, "ocfsnm - %d", osb->osb_id); + ocfs_daemonize (proc); + + /* The delay changes based on multiplier */ + while (!(OcfsGlobalCtxt.flags & OCFS_FLAG_SHUTDOWN_VOL_THREAD) && + !(osb->osb_flags & OCFS_OSB_FLAGS_BEING_DISMOUNTED)) { + if (OcfsGlobalCtxt.hbm == 0) { + LOG_ERROR_STR ("OcfsGlobalCtxt Heartbeat was 0"); + OcfsGlobalCtxt.hbm = DISK_HBEAT_NO_COMM; + } + + ocfs_sleep (500); + + if ((OcfsGlobalCtxt.flags & OCFS_FLAG_SHUTDOWN_VOL_THREAD) || + (osb->osb_flags & OCFS_OSB_FLAGS_BEING_DISMOUNTED)) + break; + + status = ocfs_nm_thread (osb); + if (status < 0) { + if (osb->osb_flags & OCFS_OSB_FLAGS_BEING_DISMOUNTED) + break; + } + } + + complete (&(osb->complete)); + LOG_EXIT_LONG (0); + return 0; +} /* ocfs_volume_thread */ +#endif /* defined(DLM_THREAD_PER_VOLUME) */ diff -urNp ocfs/fs/ocfs/Linux/ocfsfile.c 2.4.20pre5aa2/fs/ocfs/Linux/ocfsfile.c --- ocfs/fs/ocfs/Linux/ocfsfile.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/ocfsfile.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,315 @@ +/* + * ocfsfile.c + * + * + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_FILEINFO + +/* + * ocfs_set_disposition_information() + * + * Called during file deletion. It checks for the attributes and looks up + * in the OIN list if the file is present. If it finds the file, it deletes + * the partition on ths disk and also removes the OIN from the list. + */ +int ocfs_set_disposition_information (struct inode *dir, struct dentry *dentry) +{ + int status = 0; + bool main_resAcquired = false; + ocfs_inode *OIN = NULL; + ocfs_super *osb = NULL; + struct inode *inode; + ub8 parentOff, fileOff; + + LOG_ENTRY (); + + if (!dentry->d_inode) { + LOG_ERROR_STR ("Bad inode"); + status = -EFAIL; + goto finally; + } + inode = dentry->d_inode; + osb = ((ocfs_super *)(dir->i_sb->u.generic_sbp)); + + if (inode_data_is_oin (inode)) { + OIN = ((ocfs_inode *)inode->u.generic_ip); + if (OIN == NULL) { + LOG_ERROR_STR ("Bad oin"); + status = -EFAIL; + goto finally; + } + + ocfs_down_sem (&(OIN->main_res), true); + main_resAcquired = true; + + /* + ** Check if the user wants to delete the file or not delete the file. + ** Do some checking to see if the file can even be deleted. + */ + if (OIN->oin_flags & OCFS_OIN_DELETE_ON_CLOSE) { + LOG_ERROR_STR ("OCFS_OIN_DELETE_ON_CLOSE set"); + goto finally; + } + + if (OIN->oin_flags & OCFS_OIN_ROOT_DIRECTORY) { + LOG_ERROR_STR ("OCFS_OIN_DIRECTORY set"); + status = -EPERM; + goto finally; + } + + OCFS_SET_FLAG (OIN->oin_flags, OCFS_OIN_DELETE_ON_CLOSE); + + if (main_resAcquired) { + ocfs_up_sem (&(OIN->main_res)); + main_resAcquired = false; + } + + } + + /* Call CreateModify with delete flag to free up the bitmap etc. */ + ocfs_linux_get_inode_offset (dir, &parentOff, NULL); + + if (S_ISDIR (inode->i_mode)) + ocfs_linux_get_dir_entry_offset (osb, &fileOff, parentOff, + &(dentry->d_name), NULL); + else + ocfs_linux_get_inode_offset (inode, &fileOff, NULL); + + status = -EFAIL; + if (fileOff != -1) + status = + ocfs_create_modify_file (osb, parentOff, NULL, NULL, 0, + &fileOff, FLAG_FILE_DELETE, NULL, NULL); + + if (status < 0) { + /* + * We probably don't need to do that, as it will be a good die to + * let the OIN get released even if the operation failed, so that + * we can create new OIN next time. ???? + */ + LOG_ERROR_STATUS(status); + if (OIN) { + ocfs_down_sem (&(OIN->main_res), true); + OCFS_CLEAR_FLAG (OIN->oin_flags, + OCFS_OIN_DELETE_ON_CLOSE); + OCFS_CLEAR_FLAG (OIN->oin_flags, OCFS_OIN_IN_USE); + ocfs_up_sem (&(OIN->main_res)); + } + goto finally; + } + + if (OIN) + ocfs_release_cached_oin (osb, OIN); + + finally: + if (main_resAcquired) { + ocfs_up_sem (&(OIN->main_res)); + main_resAcquired = false; + } + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_set_disposition_information */ + +/* + * ocfs_set_rename_information() + * + */ +int ocfs_set_rename_information (struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + int status = 0; + ocfs_inode *oldOIN = NULL; + ocfs_file_entry *newfe = NULL; + ocfs_file_entry *oldfe = NULL; + ub8 oldOffset; + ub8 newDirOff; + ub8 oldDirOff; + ocfs_super *osb = NULL; + bool DeleteTargetOin = false; + ub8 t; + + LOG_ENTRY (); + + newfe = ocfs_allocate_file_entry (); + if (newfe == NULL) { + LOG_ERROR_STR ("Could not allocate mem for newfe"); + status = -ENOMEM; + goto finally; + } + + oldfe = ocfs_allocate_file_entry (); + if (oldfe == NULL) { + LOG_ERROR_STR ("Could not allocate mem for oldfe"); + status = -ENOMEM; + goto finally; + } + + osb = ((ocfs_super *)(old_dir->i_sb->u.generic_sbp)); + + /* old parent dir offset */ + ocfs_linux_get_inode_offset (old_dir, &oldDirOff, NULL); + + /* old file offset */ + ocfs_linux_get_inode_offset (old_dentry->d_inode, &oldOffset, &oldOIN); + if (S_ISDIR (old_dentry->d_inode->i_mode)) { + /* overwrite oldOffset to get ptr to OCFS_FILE_ENTRY not DIR_NODE */ + ocfs_linux_get_dir_entry_offset (osb, &oldOffset, oldDirOff, &(old_dentry->d_name), NULL); + } + + /* new parent dir offset */ + if (inode_data_is_oin (new_dir)) + newDirOff = ((ocfs_inode *)new_dir->u.generic_ip)->dir_disk_off; + else + newDirOff = GET_INODE_OFFSET (new_dir); + + /* Don't ever take the main resource for the OIN before this as */ + /* Locking hierarchy will be broken */ + if (new_dentry->d_inode != NULL && + inode_data_is_oin (new_dentry->d_inode)) { + /* overwriting an existing inode */ + ocfs_inode *newOIN = ((ocfs_inode *)new_dentry->d_inode->u.generic_ip); + + if (!(newOIN->oin_flags & OCFS_OIN_IN_TEARDOWN) && + !(newOIN->oin_flags & OCFS_OIN_DELETE_ON_CLOSE)) { + /* OIN exists and it's not marked for deletion! */ + ocfs_down_sem (&(newOIN->main_res), true); + OCFS_SET_FLAG (newOIN->oin_flags, OCFS_OIN_IN_USE); + ocfs_up_sem (&(newOIN->main_res)); + status = ocfs_verify_update_oin (osb, newOIN); + DeleteTargetOin = true; + } + } + + status = ocfs_find_files_on_disk (osb, newDirOff, + &(new_dentry->d_name), newfe, NULL); + + if ((status < 0) && (status != -ENOENT)) { + /* If we cannot find the file specified we should just */ + /* return the error... */ + LOG_ERROR_STATUS (status); + goto finally; + } + + ocfs_start_trans (osb); + + if (status >= 0) { + /* Try and delete the file we found. */ + /* Call CreateModify with delete flag as we need to free up */ + /* the bitmap etc. */ + status = ocfs_del_file (osb, newDirOff, FLAG_RESET_VALID, newfe->this_sector); + + if (status < 0) { + /* Delete this file entry, createmodify will create a new */ + /* one with the changed attributes. */ + /* This is dangerous as we can potentially fail in */ + /* CreateModify and we have no file left?? */ + /* TODO we should make this transactional such that */ + /* either we get the new file or the old file stays. */ + /* Also, we need to ensure nobdy has the file open currently. */ + LOG_ERROR_STATUS (status); + goto finally; + } + // Delete the Oin if one exists + if (DeleteTargetOin) { + ocfs_release_cached_oin (osb, oldOIN); + ocfs_release_oin (oldOIN, true); + } + } + + if (old_dir != new_dir) { + /* Delete the file Entry only on the source directory */ + + LOG_TRACE_STR ("Source & Target Directories are different"); + + status = ocfs_read_file_entry (osb, oldfe, oldOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + status = ocfs_del_file (osb, oldDirOff, FLAG_DEL_NAME, oldOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + oldfe->sync_flags &= ~OCFS_SYNC_FLAG_VALID; + strncpy(oldfe->filename, new_dentry->d_name.name, new_dentry->d_name.len); + oldfe->filename[new_dentry->d_name.len] = '\0'; + oldfe->filename_len = new_dentry->d_name.len; + + OcfsQuerySystemTime (&t); + + /* Initialize the lock state */ + + // DISK_LOCK_SEQNUM(oldfe) = changeSeqNum; + DISK_LOCK_CURRENT_MASTER (oldfe) = osb->node_num; + DISK_LOCK_FILE_LOCK (oldfe) = OCFS_DLM_ENABLE_CACHE_LOCK; + DISK_LOCK_LAST_WRITE (oldfe) = t; + DISK_LOCK_LAST_READ (oldfe) = t; + DISK_LOCK_READER_NODE (oldfe) = osb->node_num; + DISK_LOCK_WRITER_NODE (oldfe) = osb->node_num; + oldfe->modify_time = t; + oldfe->create_time = t; + + status = ocfs_create_file (osb, newDirOff, oldfe); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + ocfs_commit_trans (osb, osb->curr_trans_id); + + if (oldOIN) { + OCFS_SET_FLAG (oldOIN->oin_flags, + OCFS_OIN_DELETE_ON_CLOSE); + } + } else { + /* Write the new file name to disk */ + LOG_TRACE_STR ("Source & Target Directories are same"); + status = ocfs_rename_file (osb, oldDirOff, &(new_dentry->d_name), oldOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + ocfs_commit_trans (osb, osb->curr_trans_id); + } + + finally: + if (status < 0 && osb->trans_in_progress) + ocfs_abort_trans (osb, osb->curr_trans_id); + if (newfe) + ocfs_release_file_entry (newfe); + if (oldfe) + ocfs_release_file_entry (oldfe); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_set_rename_information */ diff -urNp ocfs/fs/ocfs/Linux/ocfsfilp.c 2.4.20pre5aa2/fs/ocfs/Linux/ocfsfilp.c --- ocfs/fs/ocfs/Linux/ocfsfilp.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/ocfsfilp.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,152 @@ +/* + * ocfsfilp.c + * + * Provides userspace style open/read/write files in the kernel. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* + * ocfs_open_file() + * + * Behaves similar to open(). Check out the manpages for the details + * on the arguments. + */ +krnl_file *ocfs_open_file (const char *pathname, int flags, mode_t mode) +{ + krnl_file *fp; + + if ((fp = kmalloc (sizeof (krnl_file), GFP_NOFS)) == NULL) { + errno = -ENOMEM; + return NULL; + } else + memset (fp, 0, sizeof (krnl_file)); + + fp->filp = filp_open (pathname, flags, mode); + if (IS_ERR (fp->filp)) { + errno = PTR_ERR (fp->filp); + kfree (fp); + return NULL; + } + + return fp; +} /* ocfs_open_file */ + +/* + * ocfs_close_file() + * + * Closes the open file. + */ +void ocfs_close_file (krnl_file * fp) +{ + if (fp) { + if (fp->filp) + filp_close (fp->filp, NULL); + + if (fp->buf) + kfree (fp->buf); + + kfree (fp); + } + + return; +} /* ocfs_close_file */ + +/* + * ocfs_read_file() + * + * Behaves similar to fgets(). Reads till the eol or (size - 1). + * Returned buffer is always null terminated. eol is not overwritten by null. + * Return value if >= 0 denotes the size of the returned buffer excluding + * terminating null. Return value < 0 denotes the errno. + */ +int ocfs_read_file (krnl_file * fp, char *s, int size) +{ + mm_segment_t old_fs; + int ind; + + ssize_t (*read) (struct file *, char *, size_t, loff_t *); + + if (!fp || !fp->filp) + return errno = -EBADF; + + if (!s || !size || !fp->filp->f_op + || ((read = fp->filp->f_op->read) == NULL)) + return errno = -EINVAL; + + if (fp->eof && fp->loc >= fp->readsz) + return 0; + + if (!fp->buf) { + fp->readsz = 0; + fp->loc = 0; + fp->bufsz = size + ((size % BUFFER_ALIGN) ? + (BUFFER_ALIGN - (size % BUFFER_ALIGN)) : 0); + if ((fp->buf = kmalloc (fp->bufsz, GFP_NOFS)) == NULL) + return errno = -ENOMEM; + } + + ind = 0; + + while (1) { + if (fp->loc < fp->readsz) { + for (; fp->loc < fp->readsz && ind < size - 1; + ++fp->loc, ++ind) { + if ((s[ind] = fp->buf[fp->loc]) == '\n') { + int fac = (ind < size - 1) ? 1 : 0; + + fp->loc += fac; + ind += fac; + s[ind] = '\0'; + return ind; + } + } + + if (ind == size - 1) { + s[ind] = '\0'; + return ind; + } + } + + if (fp->eof) { + s[ind] = '\0'; + return ind; + } else { + old_fs = get_fs (); + set_fs (get_ds ()); + fp->readsz = + read (fp->filp, fp->buf, fp->bufsz, + &fp->filp->f_pos); + set_fs (old_fs); + + if (fp->readsz == 0) + fp->eof = 1; + else if (fp->readsz < 0) + return errno = fp->readsz; + + fp->loc = 0; + } + } + + return 0; +} /* ocfs_read_file */ diff -urNp ocfs/fs/ocfs/Linux/ocfshash.c 2.4.20pre5aa2/fs/ocfs/Linux/ocfshash.c --- ocfs/fs/ocfs/Linux/ocfshash.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/ocfshash.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,498 @@ +/* + * ocfshash.c + * + * Allows for creation and destruction of a hash table which one + * can use to read, write and delete data. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_HASH + +/* + * ocfs_hash_create() + * + */ +int ocfs_hash_create (HASHTABLE *ht, ub4 noofbits) +{ + int ret = 0; + + LOG_ENTRY (); + + if (noofbits > 32 || noofbits < 1) { + LOG_ERROR_STR ("Error in noofbits"); + goto bail; + } + + ht->size = hashsize (noofbits); + ht->mask = hashmask (noofbits); + ht->inithash = 0x10325476; + ht->entries = 0; + ht->newbuckets = 0; + ht->reusedbuckets = 0; + ht->freelist = NULL; + ht->lastfree = NULL; + + ocfs_init_sem (&(ht->hashlock)); + + ht->buckets = + (HASHBUCKET *) ocfs_malloc ((ht->size * sizeof (HASHBUCKET))); + if (!ht->buckets) { + LOG_ERROR_STR ("Error in mem alloc"); + goto bail; + } + + memset (ht->buckets, 0, (ht->size * sizeof (HASHBUCKET))); + ret = 1; + + bail: + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_hash_create */ + +/* + * ocfs_hash_destroy() + * + * @ht: ptr to the hash table + * @freefn: if not null, uses function to free bucket->val + * + */ +void ocfs_hash_destroy (HASHTABLE *ht, void (*freefn) (const void *p)) +{ + HASHBUCKET *bucket; + HASHBUCKET *nxtbucket; + ub4 slot; + + LOG_ENTRY (); + + if (!ht || !ht->buckets) + goto bail; + + if (ht->buckets) { + for (slot = 0; slot < ht->size; slot++) { + if (freefn) { + bucket = &(ht->buckets[slot]); + if (bucket->key && bucket->val) + freefn (bucket->val); + } + + bucket = ht->buckets[slot].next; + while (bucket) { + if (freefn && bucket->key && bucket->val) + freefn (bucket->val); + nxtbucket = bucket->next; + ocfs_safefree (bucket); + bucket = nxtbucket; + } + } + } + + bucket = ht->freelist; + while (bucket) { + nxtbucket = bucket->next; + ocfs_safefree (bucket); + bucket = nxtbucket; + } + + ocfs_safefree (ht->buckets); + ht->buckets = NULL; + + bail: + LOG_EXIT (); + return; +} /* ocfs_hash_destroy */ + +/* + * ocfs_hash_add() + * + * @ht: ptr to the hash table + * @key: key + * @keylen: length of key + * @val: value + * @vallen: length of value + * + */ +int ocfs_hash_add (HASHTABLE * ht, void *key, ub4 keylen, void *val, ub4 vallen) +{ + HASHBUCKET *bucket; + HASHBUCKET *prvbucket = NULL; + HASHBUCKET *lastbucket; + ub4 slot; + int ret = 1; + int lockacqrd = false; + + LOG_ENTRY (); + + if (!ht || !ht->buckets) { + ret = 0; + goto bail; + } + + slot = ocfs_hash (key, keylen, ht->inithash) & ht->mask; + bucket = &(ht->buckets[slot]); + + /* Acquire Lock */ + ocfs_down_sem (&(ht->hashlock), true); + lockacqrd = true; + + while (bucket) { + if (bucket->key) { + if (!memcmp (bucket->key, key, keylen)) { + /* return error if key already exists */ + LOG_ERROR_STR ("Duplicate key"); + ret = 0; + goto bail; + } + } else { + /* Fill the empty bucket */ + bucket->key = key; + bucket->keylen = keylen; + bucket->val = val; + bucket->vallen = vallen; + + /* Increment the number of entries */ + ht->entries++; + ret = 1; + goto bail; + } + prvbucket = bucket; + bucket = bucket->next; + } + + /* Save the last bucket for this slot */ + lastbucket = prvbucket; + + /* Check if any bucket in freelist ... */ + if (ht->freelist) { + /* ... if so, attach it to the end of the slot list ... */ + lastbucket->next = bucket = ht->freelist; + + /* ... and detach it from the freelist */ + if (ht->lastfree == ht->freelist) + ht->freelist = ht->lastfree = NULL; + else + ht->freelist = ht->freelist->next; + + /* Fill the empty bucket */ + bucket->key = key; + bucket->keylen = keylen; + bucket->val = val; + bucket->vallen = vallen; + bucket->next = NULL; + ht->reusedbuckets++; + + /* Increment the number of entries */ + ht->entries++; + ret = 1; + goto bail; + } + + /* Create a new bucket and add to the end of list */ + if ((bucket = (HASHBUCKET *) ocfs_malloc (sizeof (HASHBUCKET))) == NULL) { + LOG_ERROR_STR ("Error in mem alloc"); + ret = 0; + goto bail; + } + + bucket->key = key; + bucket->keylen = keylen; + bucket->val = val; + bucket->vallen = vallen; + bucket->next = NULL; + lastbucket->next = bucket; + ht->newbuckets++; + + /* Increment the number of entries */ + ht->entries++; + + bail: + /* Release Lock */ + if (lockacqrd) + ocfs_up_sem (&(ht->hashlock)); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_hash_add */ + +/* + * ocfs_hash_del() + * + * @ht: ptr to hash table + * @key: key to be deleted + * @keylen: length of key + * + */ +int ocfs_hash_del (HASHTABLE * ht, void *key, ub4 keylen) +{ + HASHBUCKET *bucket; + HASHBUCKET *prvbucket = NULL; + ub4 slot; + int ret = 0; + int lockacqrd = false; + + LOG_ENTRY (); + + if (!ht || !ht->buckets) + goto bail; + + slot = ocfs_hash (key, keylen, ht->inithash) & ht->mask; + bucket = &(ht->buckets[slot]); + + /* Acquire Lock */ + ocfs_down_sem (&(ht->hashlock), true); + lockacqrd = true; + + while (bucket) { + if (bucket->key) { + if (!memcmp (bucket->key, key, keylen)) { + /* Found it */ + if (!prvbucket) { + /* If first bucket, clear it */ + bucket->key = NULL; + } else { + /* If not first bucket, detach the bucket from list ... */ + prvbucket->next = bucket->next; + + /* ... clear it ... */ + bucket->key = NULL; + bucket->next = NULL; + + /* ... and attach to the end of the free list */ + if (ht->lastfree) { + ht->lastfree->next = bucket; + ht->lastfree = bucket; + } else { + ht->lastfree = ht->freelist = + bucket; + } + } + /* Decrement the number of entries and exit */ + ht->entries--; + ret = 1; + goto bail; + } + } + prvbucket = bucket; + bucket = bucket->next; + } + + bail: + /* Release Lock */ + if (lockacqrd) + ocfs_up_sem (&(ht->hashlock)); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_hash_del */ + +/* + * ocfs_hash_get() + * + */ +int ocfs_hash_get (HASHTABLE * ht, void *key, ub4 keylen, void **val, ub4 * vallen) +{ + HASHBUCKET *bucket; + ub4 slot; + int ret = 0; + int lockacqrd = false; + + LOG_ENTRY (); + + if (!ht || !ht->buckets) + goto bail; + + slot = ocfs_hash (key, keylen, ht->inithash) & ht->mask; + bucket = &(ht->buckets[slot]); + + /* Acquire Lock */ + ocfs_down_sem (&(ht->hashlock), true); + lockacqrd = true; + + while (bucket) { + if (bucket->key) { + if (!memcmp (bucket->key, key, keylen)) { + /* found it */ + *val = bucket->val; + *vallen = bucket->vallen; + ret = 1; + goto bail; + } + } + bucket = bucket->next; + } + + bail: + /* Release Lock */ + if (lockacqrd) + ocfs_up_sem (&(ht->hashlock)); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_hash_get */ + +/* + * ocfs_hash_stat() + * + */ +void ocfs_hash_stat (HASHTABLE * ht, char *data, ub4 datalen) +{ + HASHBUCKET *bucket; + ub4 slot; + ub4 i; + char tmp[HASHSTAT_BUFLEN]; + char *p; + ub4 len = 0; + ub4 stats[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int lockacqrd = false; + + LOG_ENTRY (); + + if (!ht || !ht->buckets) + goto bail; + + if (!data || !datalen) + goto bail; + + /* Acquire Lock */ + ocfs_down_sem (&(ht->hashlock), true); + lockacqrd = true; + + for (slot = 0; slot < ht->size; ++slot) { + bucket = &(ht->buckets[slot]); + i = 0; + + while (bucket) { + if (bucket->key) + ++i; + bucket = bucket->next; + } + + if (i < 9) + stats[i]++; + else + stats[9]++; + } + + for (i = 0, p = tmp; i < 10; ++i, p += len) + len = sprintf (p, "%2u: %u\n", i, stats[i]); + + sprintf (p, "New: %u, Reused: %u\n", ht->newbuckets, ht->reusedbuckets); + + strncpy (data, tmp, datalen - 1); + data[datalen - 1] = '\0'; + + bail: + /* Release Lock */ + if (lockacqrd) + ocfs_up_sem (&(ht->hashlock)); + + LOG_EXIT (); + return; +} /* ocfs_hash_stat */ + +/* + * -------------------------------------------------------------------- + * hash() -- hash a variable-length key into a 32-bit value + * k : the key (the unaligned variable-length array of bytes) + * len : the length of the key, counting by bytes + * initval : can be any 4-byte value + * + * Returns a 32-bit value. Every bit of the key affects every bit of + * the return value. Every 1-bit and 2-bit delta achieves avalanche. + * About 6*len+35 instructions. + * + * The best hash table sizes are powers of 2. There is no need to do + * mod a prime (mod is sooo slow!). If you need less than 32 bits, + * use a bitmask. For example, if you need only 10 bits, do + * h = (h & hashmask(10)); + * In which case, the hash table should have hashsize(10) elements. + * + * If you are hashing n strings (ub1 **)k, do it like this: + * for (i=0, h=0; i= 12) { + a += (k[0] + ((ub4) k[1] << 8) + ((ub4) k[2] << 16) + + ((ub4) k[3] << 24)); + b += (k[4] + ((ub4) k[5] << 8) + ((ub4) k[6] << 16) + + ((ub4) k[7] << 24)); + c += (k[8] + ((ub4) k[9] << 8) + ((ub4) k[10] << 16) + + ((ub4) k[11] << 24)); + mix (a, b, c); + k += 12; + len -= 12; + } + + /*------------------------------------- handle the last 11 bytes */ + c += length; + switch (len) { /* all the case statements fall through */ + case 11: + c += ((ub4) k[10] << 24); + case 10: + c += ((ub4) k[9] << 16); + case 9: + c += ((ub4) k[8] << 8); + /* the first byte of c is reserved for the length */ + case 8: + b += ((ub4) k[7] << 24); + case 7: + b += ((ub4) k[6] << 16); + case 6: + b += ((ub4) k[5] << 8); + case 5: + b += k[4]; + case 4: + a += ((ub4) k[3] << 24); + case 3: + a += ((ub4) k[2] << 16); + case 2: + a += ((ub4) k[1] << 8); + case 1: + a += k[0]; + /* case 0: nothing left to add */ + } + mix (a, b, c); + /*-------------------------------------------- report the result */ + return c; +} /* hash */ diff -urNp ocfs/fs/ocfs/Linux/ocfsioctl.c 2.4.20pre5aa2/fs/ocfs/Linux/ocfsioctl.c --- ocfs/fs/ocfs/Linux/ocfsioctl.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/ocfsioctl.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,84 @@ +/* + * ocfsioctl.c + * + * ocfs' ioctl interface + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_IOCTL + +/* + * ocfs_ioctl() + * + */ +int ocfs_ioctl (struct inode *inode, + struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct ocfs_ioc data; + extern char *OcfsVersion; + int ret = 0; + ocfs_super *osb; + ocfs_cdsl cdsl; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u, %lu)\n", inode, filp, cmd, arg); + + if (_IOC_TYPE (cmd) != OCFS_IOC_MAGIC) { + ret = -ENOTTY; + goto exit_ioctl; + } + + switch (cmd) { + case OCFS_IOC_GETTYPE: + strncpy (data.name, OCFS_NAME, sizeof (data.name) - 1); + strncpy (data.version, OcfsVersion, + sizeof (data.version) - 1); + strncpy (data.nodename, OcfsGlobalCtxt.node_name, + sizeof (data.nodename) - 1); + // data.nodenum = OcfsGlobalCtxt.node_num; TODO + data.nodenum = 999; /* TODO */ + ret = copy_to_user ((struct ocfs_ioc *) arg, &data, + sizeof (struct ocfs_ioc)); + break; + case OCFS_IOC_CDSL_MODIFY: + osb = ((ocfs_super *)(inode->i_sb->u.generic_sbp)); + ret = + copy_from_user (&cdsl, (void *) arg, + sizeof (ocfs_cdsl)); + if (!ret) { + if (ocfs_create_delete_cdsl (inode, filp, osb, &cdsl) < 0) { + ret = -EINVAL; + } + } + break; + case OCFS_IOC_CDSL_GETINFO: /* TODO: implement */ + default: + ret = -ENOTTY; + break; + } + + exit_ioctl: + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_ioctl */ diff -urNp ocfs/fs/ocfs/Linux/ocfsiosup.c 2.4.20pre5aa2/fs/ocfs/Linux/ocfsiosup.c --- ocfs/fs/ocfs/Linux/ocfsiosup.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/ocfsiosup.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,528 @@ +/* + * ocfsiosup.c + * + * Read and write to disk + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include "ocfs.h" +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_IOSUP + +/* + * ocfs_write_sector() + * + */ +int ocfs_write_sector (ocfs_super * osb, void *Buffer, ub8 Offset) +{ + int ret; + + LOG_ENTRY (); + + ret = ocfs_write_disk (osb, Buffer, OCFS_SECTOR_SIZE, Offset); + + LOG_EXIT_STATUS (ret); + return ret; +} /* ocfs_write_sector */ + +/* + * ocfs_read_sector() + * + */ +int ocfs_read_sector (ocfs_super * osb, void *Buffer, ub8 Offset) +{ + int ret; + + LOG_ENTRY (); + + ret = ocfs_read_disk (osb, Buffer, OCFS_SECTOR_SIZE, Offset); + + LOG_EXIT_STATUS (ret); + return ret; +} /* ocfs_read_sector */ + +/* + * LinuxWriteForceDisk() + * + */ +int LinuxWriteForceDisk (ocfs_super * osb, + void *Buffer, ub4 Length, ub8 Offset, bool Cached) +{ + int status = 0; + struct super_block *sb; + ub8 blocknum; + kdev_t dev; + struct buffer_head *bh; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u, %u.%u)\n", osb, Buffer, Length, + HI (Offset), LO (Offset)); + + if (osb == NULL || osb->sb == NULL) { + status = -EFAIL; + LOG_TRACE_STR ("Bad osb or superblock"); + goto bail; + } + + sb = osb->sb; + dev = sb->s_dev; + + blocknum = Offset >> sb->s_blocksize_bits; + + if (blocknum == 0) { + LOG_TRACE_STR ("Blocknum is zero!!!"); + } + + do { + if (Length < sb->s_blocksize) + bh = bread (dev, blocknum++, Length); + else + bh = getblk (dev, blocknum++, sb->s_blocksize); + + if (bh == NULL) { + status = -EFAIL; + goto bail; + } + + memcpy (bh->b_data, Buffer, + Length < sb->s_blocksize ? Length : sb->s_blocksize); + mark_buffer_dirty (bh); + ll_rw_block (WRITE, 1, &bh); + if ((osb->cache_fs || Cached) && /* either type of cache */ + Offset > osb->vol_layout.data_start_off) + { + //wait_on_buffer (bh); + brelse (bh); + } else { + wait_on_buffer (bh); + bforget(bh); + bh = NULL; + } + + Buffer = (ub1 *) Buffer + sb->s_blocksize; + Length -= sb->s_blocksize; + } while (Length > 0); + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* LinuxWriteForceDisk */ + +/* + * LinuxReadForceDisk() + * + */ +int LinuxReadForceDisk (ocfs_super * osb, + void *Buffer, ub4 Length, ub8 Offset, bool Cached) +{ + int status = 0; + struct super_block *sb; + ub4 numbuffers; + ub8 blocknum; + kdev_t dev; + struct buffer_head *bh; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u, %u.%u)\n", osb, Buffer, Length, + HI (Offset), LO (Offset)); + + if (osb == NULL || osb->sb == NULL) { + status = -EFAIL; + LOG_TRACE_STR ("Bad osb or superblock"); + goto bail; + } + + sb = osb->sb; + dev = sb->s_dev; + + if (Length > sb->s_blocksize) + numbuffers = Length >> sb->s_blocksize_bits; + else if (Length > 0) + numbuffers = 1; + else + numbuffers = 0; + + blocknum = Offset >> sb->s_blocksize_bits; + + if (numbuffers < 1) { + LOG_TRACE_STR ("No buffers will be read!!!"); + LOG_TRACE_ARGS + ("Len=%u Off=%u.%u numbuffers=%u blocknum=%u.%u\n", Length, + HI (Offset), LO (Offset), numbuffers, HI (blocknum), + LO (blocknum)); + } + + while (numbuffers--) { + bh = getblk (dev, blocknum++, sb->s_blocksize); + if (bh == NULL) { + status = -EFAIL; + LOG_TRACE_ARGS + ("getblk failed for block %u.%u, size %u\n", + HI (blocknum - 1), LO (blocknum - 1), + sb->s_blocksize); + goto bail; + } + if ((!(osb->cache_fs || Cached)) || + Offset < osb->vol_layout.data_start_off) + { + mark_buffer_uptodate(bh, false); + ll_rw_block (READ, 1, &bh); + wait_on_buffer (bh); + memcpy (Buffer, bh->b_data, sb->s_blocksize); + bforget(bh); + bh = NULL; + } else { + //mark_buffer_uptodate(bh, false); + ll_rw_block (READ, 1, &bh); + wait_on_buffer (bh); + memcpy (Buffer, bh->b_data, sb->s_blocksize); + brelse (bh); + } + + Buffer = (ub1 *) Buffer + sb->s_blocksize; + } + + bail: + LOG_EXIT_STATUS (status); + return status; + +} /* LinuxReadForceDisk */ + +/* + * ocfs_write_metadata() + * + */ +int ocfs_write_metadata (ocfs_super * osb, void *Buffer, ub4 Length, ub8 Offset) +{ + int status = 0; + sb8 tempVbo = 0; + sb8 tempLbo = 0; + bool bRet = false; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u, %u.%u)\n", osb, Buffer, Length, + HI (Offset), LO (Offset)); + + tempLbo = tempVbo = Offset; + + { + int i = 0; + + while (((osb->needs_flush)) && (i < 3000) + && (!osb->trans_in_progress)) { + ocfs_sleep (100); /* 100ms */ + i++; + } + } + + ocfs_down_sem (&(osb->map_lock), true); + bRet = + ocfs_add_extent_map_entry (osb, &osb->metadata_map, tempVbo, tempLbo, + (ub4) Length); + if (!bRet) { + ocfs_remove_extent_map_entry (osb, &osb->metadata_map, tempVbo, + Length); + bRet = + ocfs_add_extent_map_entry (osb, &osb->metadata_map, tempVbo, + tempLbo, (ub4) Length); + } + ocfs_up_sem (&(osb->map_lock)); + + status = LinuxWriteForceDisk (osb, Buffer, Length, Offset, true); + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_write_metadata */ + + +/* + * ocfs_read_metadata() + * + */ +int ocfs_read_metadata (ocfs_super * osb, void *Buffer, ub4 Length, ub8 Offset) +{ + int status = 0; + ub4 RunsInExtentMap = 0, ExtentMapIndex; + sb8 diskOffsetToFind = 0, foundFileOffset = 0; + sb8 foundDiskOffset = 0; + ub4 tempLen = 0, numMetaDataRuns = 0, numDataRuns = 0; + ub4 remainingLength, length, ioRunSize, i = 0; + ocfs_io_runs *IoDataRuns = NULL, *IoMetaDataRuns = NULL; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u, %u.%u)\n", osb, Buffer, Length, + HI (Offset), LO (Offset)); + + ioRunSize = (OCFS_MAX_DATA_EXTENTS * sizeof (ocfs_io_runs)); + ioRunSize = OCFS_ALIGN (ioRunSize, PAGE_SIZE); + + IoDataRuns = ocfs_malloc (ioRunSize); + if (IoDataRuns == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + IoMetaDataRuns = ocfs_malloc (ioRunSize); + if (IoMetaDataRuns == NULL) { + LOG_ERROR_STATUS (status -ENOMEM); + goto finally; + } + + remainingLength = Length; + length = 0; + diskOffsetToFind = Offset; + + { + int i = 0; + + while (((osb->needs_flush)) && (i < 3000) + && (!osb->trans_in_progress)) { + ocfs_sleep (100); /* 100ms */ + i++; + } + } + + ocfs_down_sem (&(osb->map_lock), true); + + RunsInExtentMap = ocfs_extent_map_get_count (&osb->metadata_map); + for (ExtentMapIndex = 0; ExtentMapIndex < RunsInExtentMap; + ExtentMapIndex++) { + if (!ocfs_get_next_extent_map_entry + (osb, &osb->metadata_map, ExtentMapIndex, &foundFileOffset, + &foundDiskOffset, &tempLen)) { + /* It means this is a hole */ + continue; + } + + length = tempLen; + + /* |***TO*FIND***| */ + /* |---FOUND---| */ + if (foundDiskOffset >= (diskOffsetToFind + remainingLength)) { + break; + } + + /* |***TO*FIND***| */ + /* |---FOUND---| */ + if (diskOffsetToFind >= (foundDiskOffset + length)) { + continue; + } else { + /* |***TO*FIND***| */ + /* |---FOUND-------------------| */ + if ((diskOffsetToFind >= foundDiskOffset) && + ((diskOffsetToFind + remainingLength) <= + (foundDiskOffset + length))) { + + IoMetaDataRuns[numMetaDataRuns].offset = + diskOffsetToFind; + IoMetaDataRuns[numMetaDataRuns].disk_off = + diskOffsetToFind; + IoMetaDataRuns[numMetaDataRuns].byte_cnt = + remainingLength; + remainingLength -= + IoMetaDataRuns[numMetaDataRuns].byte_cnt; + diskOffsetToFind += + IoMetaDataRuns[numMetaDataRuns].byte_cnt; + numMetaDataRuns++; + break; + + } + /* |***TO*FIND***|***or****| */ + /* |---FOUND---| */ + else if ((diskOffsetToFind < foundDiskOffset) + && ((diskOffsetToFind + remainingLength) > + foundDiskOffset)) { + IoDataRuns[numDataRuns].offset = + diskOffsetToFind; + IoDataRuns[numDataRuns].disk_off = + diskOffsetToFind; + IoDataRuns[numDataRuns].byte_cnt = + foundDiskOffset - diskOffsetToFind; + remainingLength -= + IoDataRuns[numDataRuns].byte_cnt; + diskOffsetToFind += + IoDataRuns[numDataRuns].byte_cnt; + numDataRuns++; + + IoMetaDataRuns[numMetaDataRuns].offset = + foundDiskOffset; + IoMetaDataRuns[numMetaDataRuns].disk_off = + foundDiskOffset; + IoMetaDataRuns[numMetaDataRuns].byte_cnt = + (remainingLength > + length) ? length : remainingLength; + + remainingLength -= + IoMetaDataRuns[numMetaDataRuns].byte_cnt; + diskOffsetToFind += + IoMetaDataRuns[numMetaDataRuns].byte_cnt; + numMetaDataRuns++; + + if (remainingLength > 0) { + continue; + } else { + break; + } + + } + /* |***TO*FIND***| */ + /* |---FOUND---| */ + else if ((diskOffsetToFind >= foundDiskOffset) && + ((diskOffsetToFind + remainingLength) > + (foundDiskOffset + length))) { + IoMetaDataRuns[numMetaDataRuns].offset = + diskOffsetToFind; + IoMetaDataRuns[numMetaDataRuns].disk_off = + diskOffsetToFind; + IoMetaDataRuns[numMetaDataRuns].byte_cnt = + length - (diskOffsetToFind - + foundDiskOffset); + remainingLength -= + IoMetaDataRuns[numMetaDataRuns].byte_cnt; + diskOffsetToFind += + IoMetaDataRuns[numMetaDataRuns].byte_cnt; + numMetaDataRuns++; + continue; + + } + } + } + + ocfs_up_sem (&(osb->map_lock)); + + if (remainingLength > 0) { + IoDataRuns[numDataRuns].offset = diskOffsetToFind; + IoDataRuns[numDataRuns].disk_off = diskOffsetToFind; + IoDataRuns[numDataRuns].byte_cnt = remainingLength; + numDataRuns++; + } + + /* look for the specified offset in the map .if it exists then */ + /* do the read from cache, else go to disk. */ + for (i = 0; i < numDataRuns; i++) { + ub8 newOffset = 0; + ub4 newLength = IoDataRuns[i].byte_cnt; + ub4 diff; + + newOffset = IoDataRuns[i].disk_off; + diff = (ub4) (newOffset - Offset); + + status = + ocfs_read_force_disk (osb, (void *) ((ub1 *) Buffer + diff), + newLength, newOffset); + if (status < 0) { + goto finally; + } + } + + for (i = 0; i < numMetaDataRuns; i++) { + ub8 newOffset = 0; + ub4 diff; + ub4 newLength = IoMetaDataRuns[i].byte_cnt; + + newOffset = IoMetaDataRuns[i].disk_off; + + diff = (ub4) (newOffset - Offset); + + LinuxReadForceDisk (osb, (void *) ((ub1 *) Buffer + diff), + newLength, newOffset, true); + } + + finally: + if (IoDataRuns) { + ocfs_free (IoDataRuns); + IoDataRuns = NULL; + } + + if (IoMetaDataRuns) { + ocfs_free (IoMetaDataRuns); + IoMetaDataRuns = NULL; + } + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_read_metadata */ + +/* + * ocfs_write_force_disk() + * + */ +int ocfs_write_force_disk (ocfs_super * osb, void *Buffer, ub4 Length, ub8 Offset) +{ + return LinuxWriteForceDisk (osb, Buffer, Length, Offset, false); +} /* ocfs_write_force_disk */ + +/* + * ocfs_write_disk() + * + */ +int ocfs_write_disk (ocfs_super * osb, void *Buffer, ub4 Length, ub8 Offset) +{ + return LinuxWriteForceDisk (osb, Buffer, Length, Offset, false); +} /* ocfs_write_disk */ + +/* + * ocfs_read_force_disk() + * + */ +int ocfs_read_force_disk (ocfs_super * osb, void *Buffer, ub4 Length, ub8 Offset) +{ + return LinuxReadForceDisk (osb, Buffer, Length, Offset, false); +} /* ocfs_read_force_disk */ + +/* + * ocfs_read_disk() + * + */ +int ocfs_read_disk (ocfs_super * osb, void *Buffer, ub4 Length, ub8 Offset) +{ + if (Offset < osb->vol_layout.bitmap_off) + return LinuxReadForceDisk (osb, Buffer, Length, Offset, false); + return ocfs_read_metadata (osb, Buffer, Length, Offset); +} /* ocfs_read_disk */ + +/* + * ocfs_read_disk_ex() + * + */ +int ocfs_read_disk_ex (ocfs_super * osb, + void **Buffer, ub4 AllocLen, ub4 ReadLen, ub8 Offset) +{ + int status = 0; + + LOG_ENTRY (); + + if (*Buffer == NULL) { + *Buffer = ocfs_malloc (AllocLen); + if (*Buffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } + } + + status = ocfs_read_disk (osb, *Buffer, ReadLen, Offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_read_disk_ex */ diff -urNp ocfs/fs/ocfs/Linux/ocfsipc.c 2.4.20pre5aa2/fs/ocfs/Linux/ocfsipc.c --- ocfs/fs/ocfs/Linux/ocfsipc.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/ocfsipc.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,732 @@ +/* + * ocfsipc.c + * + * IPC infrastructure to support ipcdlm + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_IPC + +ocfs_ipc_ctxt OcfsIpcCtxt; + +/* + * ocfs_cleanup_ipc() + * + */ +int ocfs_cleanup_ipc (void) +{ + return 0; +} /* ocfs_cleanup_ipc */ + +/* + * ocfs_init_ipc() + * + */ +int ocfs_init_ipc (void) +{ + return 0; +} /* ocfs_init_ipc */ + +/* + * ocfs_recv_thread() + * + */ +int ocfs_recv_thread (void *Context) +{ + int status; + ub4 length = 0; + ub4 i; + ub4 commId = (ub4) Context; + ocfs_recv_context *RecvCtxt = NULL; + ocfs_recv_comp_context *RecvCompContext; + ocfs_comm_info *IpcConfigInfo; + + LOG_ENTRY (); + + ocfs_daemonize ("ocfsrecvthread"); + + RecvCtxt = ocfs_malloc (sizeof (ocfs_recv_context)); + if (RecvCtxt == NULL) { + LOG_ERROR_STATUS (-ENOMEM); + goto bail; + } + + IpcConfigInfo = &(OcfsGlobalCtxt.comm_info[commId]); + + atomic_set (&RecvCtxt->num_posted, 0); + + RecvCtxt->event = ocfs_malloc (sizeof (wait_queue_head_t)); + if (RecvCtxt->event == NULL) { + LOG_ERROR_STATUS (-ENOMEM); + goto bail; + } + + init_waitqueue_head (RecvCtxt->event); + + RecvCtxt->free_lock = ocfs_malloc (sizeof (ocfs_sem)); + if (RecvCtxt->free_lock == NULL) { + LOG_ERROR_STATUS (-ENOMEM); + goto bail; + } + + ocfs_init_sem (RecvCtxt->free_lock); + + RecvCtxt->next_free = 0; + RecvCtxt->num_used = 0; + + /* Allocate low bnd packets for processing recv's */ + + for (i = 0; i < MAX_UDP_PACKETS; i++) { + RecvCtxt->recv_packet[i] = NULL; + RecvCtxt->free[i] = true; + } + + for (i = 0; i < OCFS_HIGH_MARK_UDP; i++) { + RecvCtxt->recv_packet[i] = ocfs_malloc (OCFS_MAX_DLM_PKT_SIZE); + if (RecvCtxt->recv_packet[i] == NULL) { + LOG_ERROR_STATUS (-ENOMEM); + } + } + + while (1) { + length = 0; + + ocfs_down_sem (RecvCtxt->free_lock, true); + + while ((atomic_read (&RecvCtxt->num_posted) > OCFS_LOW_MARK_UDP) + && (RecvCtxt->num_used > OCFS_HIGH_MARK_UDP)) { + ocfs_up_sem (RecvCtxt->free_lock); + ocfs_wait (RecvCtxt->event, false, 1000); /* in ms */ + ocfs_down_sem (RecvCtxt->free_lock, true); + } + + for (i = 0; i < MAX_UDP_PACKETS; i++) { + if (RecvCtxt->free[i]) { + if (RecvCtxt->recv_packet[i] == NULL) { + RecvCtxt->recv_packet[i] = + ocfs_malloc (OCFS_MAX_DLM_PKT_SIZE); + if (RecvCtxt->recv_packet[i] == NULL) { + LOG_ERROR_STATUS (-ENOMEM); + continue; + } + } + RecvCtxt->free[i] = false; + break; + } + } + + if (i == MAX_UDP_PACKETS) { + ocfs_up_sem (RecvCtxt->free_lock); + continue; + } + + RecvCtxt->num_used++; + ocfs_up_sem (RecvCtxt->free_lock); + + /* Allocate a context to pass to the completion routine... */ + RecvCompContext = ocfs_malloc (sizeof (ocfs_recv_comp_context)); + if (RecvCompContext == NULL) { + LOG_ERROR_STATUS (-ENOMEM); + RecvCtxt->free[i] = true; + RecvCtxt->num_used--; + ocfs_up_sem (RecvCtxt->free_lock); + continue; + } + + RecvCompContext->recv_ctxt = RecvCtxt; + RecvCompContext->index = i; + + atomic_inc (&RecvCtxt->num_posted); + + length = OCFS_DLM_MAX_MSG_SIZE; + status = ocfs_recv_udp_msg (commId, RecvCtxt->recv_packet[i], &length, + NULL, RecvCompContext); + if (status < 0) { + if (status != -EBADF) { + ocfs_down_sem (RecvCtxt->free_lock, true); + RecvCtxt->free[i] = true; + RecvCtxt->num_used--; + ocfs_up_sem (RecvCtxt->free_lock); + ocfs_safefree (RecvCompContext); + LOG_ERROR_STATUS (status); + /* Delay for a while to the stack stabilize on startup of machine */ + /* BUGBUG : we might need to free up the recv_packet memory */ + /* in case of an Error. */ + } else { + /* Thread is being killed. */ + goto finally; + } + } + } + + finally: + if (OcfsIpcCtxt.send_sock) { + sock_release (OcfsIpcCtxt.send_sock); + OcfsIpcCtxt.send_sock = NULL; + } + + if (OcfsIpcCtxt.recv_sock) { + sock_release (OcfsIpcCtxt.recv_sock); + OcfsIpcCtxt.recv_sock = NULL; + } + + /* Flush all scheduled tasks */ + flush_scheduled_tasks (); + + /* signal main thread of ipcdlm's exit */ + complete (&(OcfsIpcCtxt.complete)); + + bail: + LOG_EXIT (); + return 0; +} /* ocfs_recv_thread */ + +/* + * ocfs_init_udp() + * + */ +int ocfs_init_udp (void) +{ + int status = 0; + ub4 i; + int child_pid; + + LOG_ENTRY (); + + /* Start the receive threads */ + for (i = 0; i < OCFS_MAX_IPC; i++) { + /* Create receive thread only for active network transports */ + if (!OcfsGlobalCtxt.comm_info[i].active) + continue; + + status = ocfs_init_udp_sock (i, &OcfsIpcCtxt.send_sock, + &OcfsIpcCtxt.recv_sock); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + child_pid = kernel_thread (ocfs_recv_thread, (void *) i, + CLONE_FS | CLONE_FILES | + CLONE_SIGHAND); + if (child_pid < 0) { + status = -EFAIL; + LOG_ERROR_ARGS ("unable to launch ipcdlm thread (%d)\n", + child_pid); + goto bail; + } else { + init_completion (&(OcfsIpcCtxt.complete)); + if (!ocfs_get_task (child_pid, &(OcfsIpcCtxt.task))) + OcfsIpcCtxt.task = NULL; + } + } + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_init_udp */ + +/* + * ocfs_init_ipc_dlm() + * + */ +int ocfs_init_ipc_dlm (ocfs_ipc_dlm_config * IpcDlmConfig, ocfs_protocol Protocol) +{ + int status = 0; + + LOG_ENTRY (); + + ocfs_init_ipc (); + + OcfsIpcCtxt.dlm_msg_size = OCFS_DLM_MAX_MSG_SIZE; + OcfsIpcCtxt.version = OCFS_IPC_DLM_VERSION; + + switch (Protocol) { + case OCFS_TCP: + break; + + case OCFS_UDP: + ocfs_init_udp (); + break; + + default: + return -EINVAL; + break; + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_init_ipc_dlm */ + +/* + * ocfs_send_udp_msg() + * + */ +int ocfs_send_udp_msg (ocfs_ipc_config_info * SendIpcCfgInfo, + ocfs_ipc_config_info * LocalIpcCfgInfo, + void *Mesg, ub4 Length, wait_queue_head_t * Event) +{ + struct sockaddr_in sin; + int status; + + LOG_ENTRY (); + + memset (&sin, 0, sizeof (sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = SendIpcCfgInfo->addr; + sin.sin_port = htons (SendIpcCfgInfo->port); + + LOG_TRACE_ARGS ("about to send to %d.%d.%d.%d:%u\n", + NIPQUAD (sin.sin_addr.s_addr), ntohs (sin.sin_port)); + status = + ocfs_send_to (OcfsIpcCtxt.send_sock, (struct sockaddr *) &sin, + sizeof (sin), Mesg, Length); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_send_udp_msg */ + +/* + * ocfs_recv_completion() + * + */ +int ocfs_recv_completion (ocfs_recv_comp_context * RecvCompContext) +{ + LOG_ENTRY (); + + if (RecvCompContext) { + /* Queue the work Item in order to do the real work for the receive */ + schedule_task (RecvCompContext->work_item); + } else { + LOG_ERROR_STR("error in ocfs_recv_completion"); + } + + LOG_EXIT (); + return 0; +} /* ocfs_recv_completion */ + +/* + * ocfs_recv_udp_msg() + * + */ +int ocfs_recv_udp_msg (ub4 myCommIndex, + void *Mesg, + ub4 * Length, + wait_queue_head_t * Event, ocfs_recv_comp_context * RecvCompContext) +{ + struct sockaddr_in sin; + int sinlen; + int status = -EFAIL; + struct tq_struct *work_item = NULL; + + LOG_ENTRY (); + + work_item = ocfs_malloc (sizeof (struct tq_struct)); + if (work_item == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } else + memset (work_item, 0, sizeof (struct tq_struct)); + + /* Initialize the workitem with our worker routine and Q it. */ + INIT_TQUEUE (work_item, ocfs_dlm_recv_msg, RecvCompContext); + + RecvCompContext->work_item = work_item; + + sinlen = sizeof (struct sockaddr_in); + + memset (&sin, 0, sizeof (sin)); + + status = + ocfs_recv_from (OcfsIpcCtxt.recv_sock, + (struct sockaddr *) &sin, &sinlen, Mesg, (int *) Length); + if (status < 0) { + if (status == -EBADF) + goto bail; + else { + LOG_ERROR_STATUS (status); + goto bail; + } + } + + LOG_TRACE_ARGS ("Received packet from: %d.%d.%d.%d\n", + NIPQUAD (sin.sin_addr.s_addr)); + + if (status == 0) + ocfs_recv_completion (RecvCompContext); + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_recv_udp_msg */ + +/* + * ocfs_send_bcast() + * + */ +int ocfs_send_bcast (ocfs_super * osb, ub8 VoteMap, ocfs_dlm_msg * DlmMesg) +{ + int status = 0; + ub8 nodeMap; + ub4 nodeNum; + ub4 primeIpc; + ub4 commIndex; + ocfs_node_config_info *Node; + ub8 curr_tick = 0; + bool bSentOnSec = false; + ub4 timeIncr; + ub4 i; + + LOG_ENTRY_ARGS ("(votemap=0x%x)\n", VoteMap); + + timeIncr = 100000; + + /* Look at votemap and send to each node we need to sent to */ + for (nodeMap = VoteMap, nodeNum = 0; nodeMap != 0; + nodeMap >>= 1, nodeNum++) { + bSentOnSec = false; + + if (nodeMap & 0x1) { + /* Figure out which interconnect we want to send on */ + Node = osb->node_cfg_info[nodeNum]; + + LOG_TRACE_ARGS + ("Sending msg to node=%u, name=%s, interface=%u, " + "prim=0x%x\n", nodeNum, Node->node_name, + Node->num_interfaces, Node->primary_comm); + + if (Node->num_interfaces == 0) + continue; + +/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ +/* !!! first comparison is pointless !!! */ +/* !!! since primary_comm is unsigned !!! */ +/* !!! removing it !!! */ +/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ + + // if((Node->primary_comm >= 0) && (Node->primary_comm < OCFS_MAX_IPC) && + if ((Node->primary_comm < OCFS_MAX_IPC) && + (Node->primary_comm < Node->num_interfaces)) { + primeIpc = Node->primary_comm; + + Node->ipc_config[primeIpc].state |= + OCFS_IPC_STATE_CONFIG | + OCFS_IPC_STATE_PRIMARY; + + if ((Node->ipc_config[primeIpc]. + state & OCFS_IPC_STATE_CONFIG) + && (Node->ipc_config[primeIpc]. + state & OCFS_IPC_STATE_PRIMARY)) { + ocfs_dlm_send_msg (osb, + &Node-> + ipc_config[primeIpc], + DlmMesg); + curr_tick = jiffies; + + if (curr_tick > + Node->exp_recv[primeIpc]) { + /* Set the IPC as INactive */ + OCFS_CLEAR_FLAG (Node-> + ipc_config + [primeIpc].state, + OCFS_IPC_STATE_ACTIVE); + OCFS_SET_FLAG (Node-> + ipc_config + [primeIpc].state, + OCFS_IPC_STATE_INACTIVE); + } + + Node->exp_recv[primeIpc] = + curr_tick + + (10 * 1000 * 1000) / timeIncr; + + OCFS_SET_FLAG (Node->ipc_config[primeIpc]. + state, + OCFS_IPC_STATE_ACTIVE); + + if (Node->ipc_config[primeIpc]. + state & OCFS_IPC_STATE_ACTIVE) + continue; + } + } + + for (i = 0; i < Node->num_interfaces; i++) { + commIndex = + (Node->last_comm_indx % + Node->num_interfaces); + curr_tick = jiffies; + + if (curr_tick > Node->exp_recv[commIndex]) { + /* Set the IPC as INactive */ + OCFS_CLEAR_FLAG (Node-> + ipc_config[commIndex]. + state, + OCFS_IPC_STATE_ACTIVE); + OCFS_SET_FLAG (Node-> + ipc_config[commIndex]. + state, + OCFS_IPC_STATE_INACTIVE); + } + + if ((Node->ipc_config[commIndex]. + state & OCFS_IPC_STATE_CONFIG) + && (Node->ipc_config[commIndex]. + state & OCFS_IPC_STATE_ACTIVE) + && + (!(Node->ipc_config[commIndex]. + state & OCFS_IPC_STATE_PRIMARY))) { + ocfs_dlm_send_msg (osb, + &Node-> + ipc_config[commIndex], + DlmMesg); + Node->exp_recv[commIndex] = + curr_tick + + (10 * 1000 * 1000) / timeIncr; + Node->last_comm_indx = commIndex; + bSentOnSec = true; + break; + } + } + + if (!bSentOnSec) { + commIndex = + ((Node->last_comm_indx + + 1) % Node->num_interfaces); + Node->last_comm_indx = commIndex; + + if ((Node->ipc_config[commIndex]. + state & OCFS_IPC_STATE_CONFIG) + && + (!(Node->ipc_config[commIndex]. + state & OCFS_IPC_STATE_PRIMARY))) { + ocfs_dlm_send_msg (osb, + &Node-> + ipc_config[commIndex], + DlmMesg); + Node->exp_recv[commIndex] = + curr_tick + + (10 * 1000 * 1000) / timeIncr; + Node->last_comm_indx = commIndex; + } + } + } + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_send_bcast */ + +/* + * ocfs_dlm_send_msg() + * + */ +void ocfs_dlm_send_msg (ocfs_super * osb, + ocfs_ipc_config_info * IpcCfgInfo, ocfs_dlm_msg * DlmMesg) +{ + ocfs_node_config_info *Node; + ub4 i; + + LOG_ENTRY (); + + Node = osb->node_cfg_info[osb->node_num]; + + /* Match the mask to determine which comm interface to send over */ + for (i = 0; i < Node->num_interfaces; i++) { + if (IpcCfgInfo->mask == Node->ipc_config[i].mask) { + ocfs_send_udp_msg (IpcCfgInfo, &Node->ipc_config[i], DlmMesg, + DlmMesg->msg_len, NULL); + return; + } + } + + ocfs_send_udp_msg (IpcCfgInfo, &Node->ipc_config[Node->primary_comm], + DlmMesg, DlmMesg->msg_len, NULL); + + LOG_EXIT (); + return; +} /* ocfs_dlm_send_msg */ + +/* + * ocfs_init_udp_sock() + * + */ +int ocfs_init_udp_sock (int commid, + struct socket **send_sock, struct socket **recv_sock) +{ + struct sockaddr_in sin; + int error; + int status = -EFAIL; + ocfs_comm_info *Comm; + + LOG_ENTRY (); + + /* Create Send Socket */ + error = sock_create (PF_INET, SOCK_DGRAM, IPPROTO_UDP, send_sock); + if (error < 0) { + LOG_ERROR_ARGS ("unable to create send_socket, error=%d\n", + error); + goto bail; + } + + /* Bind Send Socket */ + memset (&sin, 0, sizeof (sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl (INADDR_ANY); + sin.sin_port = htons (0); + + error = (*send_sock)->ops->bind (*send_sock, (struct sockaddr *) &sin, + sizeof (sin)); + if (error < 0) { + LOG_ERROR_ARGS ("unable to bind send_socket, error=%d\n", + error); + goto bail; + } + + /* Create Receive Socket */ + error = sock_create (PF_INET, SOCK_DGRAM, IPPROTO_UDP, recv_sock); + if (error < 0) { + LOG_ERROR_ARGS ("unable to create recv_socket, error=%d\n", + error); + goto bail; + } + + Comm = &(OcfsGlobalCtxt.comm_info[commid]); + + /* Bind Receive Socket */ + memset (&sin, 0, sizeof (sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl (INADDR_ANY); + sin.sin_port = htons (Comm->port); + + error = (*recv_sock)->ops->bind (*recv_sock, (struct sockaddr *) &sin, + sizeof (sin)); + if (error < 0) { + LOG_ERROR_ARGS ("unable to bind recv_socket, error=%d\n", + error); + goto bail; + } + + status = 0; + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_init_udp_sock */ + +/* + * ocfs_send_to() + * + */ +int ocfs_send_to (struct socket *sock, + struct sockaddr *addr, int addrlen, char *buf, int buflen) +{ + int error; + struct msghdr msg; + struct iovec iov; + mm_segment_t oldfs; + int status = 0; + + LOG_ENTRY (); + + iov.iov_base = buf; + iov.iov_len = buflen; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_name = addr; + msg.msg_namelen = addrlen; + msg.msg_flags = 0; + + oldfs = get_fs (); + set_fs (get_ds ()); + error = sock_sendmsg (sock, &msg, buflen); + set_fs (oldfs); + + if (error < 0) { + LOG_ERROR_ARGS ("unable to sendmsg, error=%d\n", error); + status = -EFAIL; + } + + LOG_EXIT_STATUS (status); + return status; +} /* OcfsSentTo */ + +/* + * ocfs_recv_from() + * + */ +int ocfs_recv_from (struct socket *sock, + struct sockaddr *addr, int *addrlen, char *buf, int *buflen) +{ + struct iovec iov; + struct msghdr msg; + int error; + mm_segment_t oldfs; + int status = -EFAIL; + + LOG_ENTRY (); + + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_iovlen = 1; + msg.msg_iov = &iov; + iov.iov_len = *buflen; + iov.iov_base = buf; + msg.msg_name = addr; + msg.msg_namelen = *addrlen; + msg.msg_flags = 0; + + oldfs = get_fs (); + set_fs (get_ds ()); + error = sock_recvmsg (sock, &msg, *buflen, msg.msg_flags); + set_fs (oldfs); + if (error < 0) { + if (error == -ERESTARTSYS) { + status = -EBADF; + LOG_TRACE_STR ("Shutting down ipcdlm\n"); + goto bail; + } else { + LOG_ERROR_ARGS ("unable to recvmsg, error=%d\n", error); + goto bail; + } + } else { + if (msg.msg_namelen) { + *addrlen = msg.msg_namelen; + *buflen = iov.iov_len; + status = 0; + goto bail; + } + } + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_recv_from */ diff -urNp ocfs/fs/ocfs/Linux/ocfsmain.c 2.4.20pre5aa2/fs/ocfs/Linux/ocfsmain.c --- ocfs/fs/ocfs/Linux/ocfsmain.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/ocfsmain.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,3401 @@ +/* + * ocfsmain.c + * + * ocfs file system driver entry point + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#define OCFSMAIN_PRIVATE_DECLS + +#include +#include +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_INIT + +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,18) +#define free_kiovec_sz(nr, buf, bh) free_kiovec(nr, buf) +#define alloc_kiovec_sz(nr, buf, bh) alloc_kiovec(nr, buf) +#endif + +/* +** Globals +*/ +ocfs_global_ctxt OcfsGlobalCtxt; +ub4 OcfsDebugCtxt = OCFS_DEBUG_CONTEXT_INIT; +ub4 OcfsDebugLevel = OCFS_DEBUG_LEVEL_ERROR; +char *OcfsVersion = "0.6"; + +spinlock_t ProtectOSBId; +ub4 GlobalOSBId; /* Keeps track of next available OSB Id */ +spinlock_t ProtectMountCount; +ub4 GlobalMountCount; /* Number of volumes currently mounted */ + +struct list_head item_list; + +int ocfs_get_block2 (struct inode *inode, long iblock, long *oblock); +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,10) +static int ocfs_direct_IO (int rw, + struct inode *inode, + struct kiobuf *iobuf, unsigned long blocknr, int blocksize); +#endif + +#define ocfs_rmdir ocfs_unlink + +/* +** ops +*/ +static struct dentry_operations ocfs_dentry_ops = { + d_revalidate:ocfs_dentry_revalidate // let's test it out! +}; + +static struct super_operations ocfs_sops = { + statfs:ocfs_statfs, + put_inode:ocfs_put_inode, + clear_inode:ocfs_clear_inode, + //put_inode: force_delete, + //delete_inode: ocfs_delete_inode, + read_inode:ocfs_read_inode, + read_inode2:ocfs_read_inode2, + put_super:ocfs_put_super, + remount_fs:ocfs_remount, + +}; + +static struct address_space_operations ocfs_aops = { + readpage:ocfs_readpage, + writepage:ocfs_writepage, + prepare_write:ocfs_prepare_write, + commit_write:ocfs_commit_write, +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,10) + direct_IO:ocfs_direct_IO +#endif +}; + +static struct file_operations ocfs_fops = { + read:ocfs_file_read, + write:ocfs_file_write, + mmap:generic_file_mmap, + fsync:ocfs_sync_file, + flush:ocfs_flush, + release:ocfs_file_release, + open:ocfs_file_open, + ioctl:ocfs_ioctl +}; + +static struct file_operations ocfs_dops = { + read:generic_read_dir, + readdir:ocfs_readdir, + fsync:ocfs_sync_file, + release:ocfs_file_release, + //open: ocfs_file_open, + ioctl:ocfs_ioctl +}; + +static struct inode_operations ocfs_dir_iops = { + create:ocfs_create, + lookup:ocfs_lookup, + link:ocfs_link, + unlink:ocfs_unlink, + symlink:ocfs_symlink, + mkdir:ocfs_mkdir, + rmdir:ocfs_rmdir, + mknod:ocfs_mknod, + rename:ocfs_rename, +// revalidate: ocfs_inode_revalidate, + setattr:ocfs_setattr, + getattr:ocfs_getattr, +}; + +static struct inode_operations ocfs_file_iops = { + setattr:ocfs_setattr, + getattr:ocfs_getattr, +// revalidate: ocfs_inode_revalidate, +}; + +char *name = NULL; +ub4 dbgctxt = 0; +ub4 dbglvl = 0; + +MODULE_PARM (name, "s"); +MODULE_PARM (dbgctxt, "l"); +MODULE_PARM (dbglvl, "l"); + +/* + * ocfs_parse_options() + * + */ +static int ocfs_parse_options (char *options, uid_t * uid, gid_t * gid, bool * cache) +{ + char *c; + char *value; + int ret = 1; + + LOG_ENTRY (); + + *cache = false; + if (!options) { + ret = 0; + goto bail; + } + + for (c = strtok (options, ","); c != NULL; c = strtok (NULL, ",")) { + if ((value = strchr (c, '=')) != NULL) + *value++ = 0; + if (!strcmp (c, "gid")) { + if (!value || !*value) { + LOG_ERROR_STR + ("gid option requires an argument"); + goto bail; + } + *gid = simple_strtoul (value, &value, 0); + if (*value) { + LOG_ERROR_ARGS ("Invalid gid option: %s\n", + value); + goto bail; + } + } else if (!strcmp (c, "uid")) { + if (!value || !*value) { + LOG_ERROR_STR + ("ERROR: uid option requires an argument"); + goto bail; + } + *uid = simple_strtoul (value, &value, 0); + if (*value) { + LOG_ERROR_ARGS ("Invalid uid option: %s\n", + value); + goto bail; + } + } else if (!strcmp (c, "cache")) { + *cache = true; + } else if (!strcmp (c, "nocache")) { + *cache = false; + } else { + LOG_ERROR_ARGS ("Invalid mount option: %s\n", c); + goto bail; + } + } + ret = 0; + + bail: + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_parse_options */ + +/* + * ocfs_read_super() + * + */ +static struct super_block *ocfs_read_super (struct super_block *sb, void *data, + int silent) +{ + struct dentry *root; + int status; + struct inode *inode = NULL; + uid_t uid = current->fsuid; + gid_t gid = current->fsgid; + bool c; + ocfs_super *osb; + + LOG_ENTRY (); + + MOD_INC_USE_COUNT; + + if (ocfs_parse_options (data, &uid, &gid, &c) != 0) { + LOG_ERROR_STR ("ocfs_read_super: bad mount option"); + return NULL; + } + + /* TODO: fix this */ + sb->s_blocksize = 512; + sb->s_blocksize_bits = 9; + set_blocksize (sb->s_dev, 512); + + sb->s_magic = OCFS_MAGIC; + sb->s_op = &ocfs_sops; + + /* this is needed to support O_LARGE_FILE */ + sb->s_maxbytes = OCFS_LINUX_MAX_FILE_SIZE; + + status = ocfs_mount_volume (sb); + osb = ((ocfs_super *)(sb->u.generic_sbp)); + if (status < 0 || !osb) + goto read_super_error; + osb->cache_fs = c; /* set caching type */ + + inode = iget4 (sb, OCFS_ROOT_INODE_NUMBER, 0, NULL); + if (!inode) { + LOG_ERROR_STATUS (status); + goto read_super_error; + } + inode->i_mode = osb->vol_layout.prot_bits; + inode->i_mode |= S_IFDIR; + inode->i_uid = osb->vol_layout.uid; + inode->i_gid = osb->vol_layout.gid; + + root = d_alloc_root (inode); + if (!root) { + LOG_ERROR_STATUS (status); + iput (inode); + goto read_super_error; + } + + sb->s_root = root; + + LOG_EXIT_PTR (sb); + return sb; + + read_super_error: + /* TODO: cleanup */ + MOD_DEC_USE_COUNT; + if (inode != NULL) { + iput (inode); + inode = NULL; + } + + LOG_EXIT_PTR (0); + return NULL; +} /* ocfs_read_super */ + +static DECLARE_FSTYPE (ocfs_fs_type, "ocfs", ocfs_read_super, FS_REQUIRES_DEV); + +/* + * ocfs_driver_entry() + * + * Driver entry point. Called on insmod. + */ +static int __init ocfs_driver_entry (void) +{ + int status = 0; + + ocfs_version_print (); + + /* Read the debug parameters passed during insmod */ + OcfsDebugCtxt = dbgctxt; + OcfsDebugLevel = dbglvl; + + LOG_ENTRY (); + + memset (&OcfsGlobalCtxt, 0, sizeof (ocfs_global_ctxt)); + memset (&OcfsIpcCtxt, 0, sizeof (ocfs_ipc_ctxt)); + + /* Read /etc/ocfs.conf */ + status = ocfs_read_node_info (); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + if (name) { + if (strlen (name) < MAX_NODE_NAME_LENGTH) { + OcfsGlobalCtxt.node_name = name; + LOG_TRACE_ARGS ("Node name: '%s'\n", + OcfsGlobalCtxt.node_name); + } else { + status = -EINVAL; + LOG_ERROR_STR ("'name' too long"); + goto leave; + } + } else { + status = -EINVAL; + LOG_ERROR_STR ("'name' not set during insmod"); + goto leave; + } + + INIT_LIST_HEAD (&(OcfsGlobalCtxt.osb_next)); + + /* Initialize some required fields */ + OcfsGlobalCtxt.obj_id.type = OCFS_TYPE_GLOBAL_DATA; + OcfsGlobalCtxt.obj_id.size = sizeof (ocfs_global_ctxt); + + /* Initialize the global data resource */ + ocfs_init_sem (&(OcfsGlobalCtxt.res)); + OCFS_SET_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_GLBL_CTXT_RESOURCE_INITIALIZED); + + /* Initialize the memory slabs for oin, ofile, and file entry */ + status = ocfs_initialize_mem_lists (); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + /* Initialize the DLM */ + status = ocfs_init_dlm (); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + OcfsGlobalCtxt.hbm = DISK_HBEAT_COMM_ON; + + INIT_LIST_HEAD (&item_list); + + spin_lock_init (&ProtectOSBId); + spin_lock (&ProtectOSBId); + GlobalOSBId = 0; + spin_unlock (&ProtectOSBId); + + spin_lock_init (&ProtectMountCount); + spin_lock (&ProtectMountCount); + GlobalMountCount = 0; + spin_unlock (&ProtectMountCount); + +#if !defined(DLM_THREAD_PER_VOLUME) + /* Initialize DLMThreadMonitor */ + ocfs_init_sem (&(OcfsGlobalCtxt.DLMThreadMonitor)); + + /* Launch one DLMThread for all volumes */ + child_pid = kernel_thread (ocfs_polling_thread, NULL, + CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + if (child_pid < 0) { + status = -EFAIL; + LOG_ERROR_ARGS ("unable to launch dlm thread (%d)\n", + child_pid); + goto leave; + } +#endif + + /* Initialize the proc interface */ + ocfs_proc_init (); + + leave: + if (status < 0) { + /* Free up lookaside lists */ + if (OcfsGlobalCtxt.flags & OCFS_FLAG_MEM_LISTS_INITIALIZED) + ocfs_free_mem_lists (); + + /* Delete the global context resource */ + if (OcfsGlobalCtxt. + flags & OCFS_FLAG_GLBL_CTXT_RESOURCE_INITIALIZED) + OCFS_CLEAR_FLAG (OcfsGlobalCtxt.flags, + OCFS_FLAG_GLBL_CTXT_RESOURCE_INITIALIZED); + } + + LOG_EXIT_STATUS (status); + + if (status >= 0) + return register_filesystem (&ocfs_fs_type); + else + return -1; +} /* ocfs_driver_entry */ + +#ifdef OCFS_LINUX_MEM_DEBUG +typedef struct _ocfs_mem_object +{ + char name[30]; + ocfs_obj_id objid; +} +ocfs_mem_object; + +ocfs_mem_object mobj[] = { + {"ocfs_file", {OCFS_TYPE_OFILE, sizeof (ocfs_file)} + } + , + {"ocfs_inode", {OCFS_TYPE_OIN, sizeof (ocfs_inode)} + } + , + {"ocfs_super", {OCFS_TYPE_OSB, sizeof (ocfs_super)} + } + , + {"ocfs_global_ctxt", {OCFS_TYPE_GLOBAL_DATA, + sizeof (ocfs_global_ctxt)} + } + , + {"ocfs_dir_node", {-1, (128 * 1024)} + } + , + {"ocfs_alloc_ext", {-1, sizeof (ocfs_alloc_ext)} + } + , + {"COMP_CONTEXT", {-1, sizeof (COMP_CONTEXT)} + } + , + {"ocfs_index_node", {-1, sizeof (ocfs_index_node)} + } + , + {"ocfs_index_hdr", {-1, sizeof (ocfs_index_hdr)} + } + , + //{"INDOUBT_ENTRY", { -1, sizeof(INDOUBT_ENTRY) } }, + {"IPC_DLM_CONFIG", {-1, sizeof (IPC_DLM_CONFIG)} + } + , + {"ocfs_extent_map", {-1, sizeof (ocfs_extent_map)} + } + , + {"NODE_CONFIG_INFO", {-1, sizeof (NODE_CONFIG_INFO)} + } + , + {"ocfs_extent_group", {-1, sizeof (ocfs_extent_group)} + } + , + {"OCFS_ALLOC_LOG", {-1, sizeof (OCFS_ALLOC_LOG)} + } + , + {"OCFS_BCAST_REL_LOG", {-1, sizeof (OCFS_BCAST_REL_LOG)} + } + , + {"OCFS_CLEANUP_RECORD", {-1, sizeof (OCFS_CLEANUP_RECORD)} + } + , + {"OCFS_DELETE_LOG", {-1, sizeof (OCFS_DELETE_LOG)} + } + , + {"OCFS_DIR_LOG", {-1, sizeof (OCFS_DIR_LOG)} + } + , + {"ocfs_disk_entry", {-1, sizeof (ocfs_disk_entry)} + } + , + {"ocfs_disk_lock", {-1, sizeof (ocfs_disk_lock)} + } + , + {"ocfs_file_entry", {-1, sizeof (ocfs_file_entry)} + } + , + {"OCFS_FREE_BIMAP", {-1, sizeof (OCFS_FREE_BIMAP)} + } + , + {"OCFS_FREE_EXTENT_LOG", {-1, sizeof (OCFS_FREE_EXTENT_LOG)} + } + , + {"OCFS_FREE_LOG", {-1, sizeof (OCFS_FREE_LOG)} + } + , + {"OCFS_IO_RUNS", {-1, sizeof (OCFS_IO_RUNS)} + } + , + {"ocfs_ipc_ctxt", {-1, sizeof (ocfs_ipc_ctxt)} + } + , + {"OCFS_LOCK_LOG", {-1, sizeof (OCFS_LOCK_LOG)} + } + , + {"ocfs_lock_res", {-1, sizeof (ocfs_lock_res)} + } + , + {"OCFS_LOCK_UPDATE", {-1, sizeof (OCFS_LOCK_UPDATE)} + } + , + {"OCFS_LOG_RECORD", {-1, sizeof (OCFS_LOG_RECORD)} + } + , + {"ocfs_obj_id", {-1, sizeof (ocfs_obj_id)} + } + , + {"OCFS_RECOVERY_LOG", {-1, sizeof (OCFS_RECOVERY_LOG)} + } + , + {"ocfs_publish", {-1, sizeof (ocfs_publish)} + } + , + {"ocfs_alloc_bm", {-1, sizeof (ocfs_alloc_bm)} + } + , + {"ocfs_vol_layout", {-1, sizeof (ocfs_vol_layout)} + } + , + {"ocfs_vol_node_map", {-1, sizeof (ocfs_vol_node_map)} + } + , + {"ocfs_vol_disk_hdr", {-1, sizeof (ocfs_vol_disk_hdr)} + } + , + {"ocfs_vol_label", {-1, sizeof (ocfs_vol_label)} + } + , + {"ocfs_vote", {-1, sizeof (ocfs_vote)} + } + , + {"alloc_item", {-1, sizeof (alloc_item)} + } + , + {"ocfs_extent", {-1, sizeof (ocfs_extent)} + } + , +}; + +/* + * ocfs_memcheck() + * + */ +static void ocfs_memcheck (void) +{ + struct list_head *iter; + int i; + alloc_item *item; + char maybe[1000]; + + if ((OcfsDebugCtxt & OCFS_DEBUG_CONTEXT_MEM) && + (OcfsDebugLevel & OCFS_DEBUG_LEVEL_TRACE)) { + list_for_each (iter, &item_list) { + maybe[0] = '\0'; + item = list_entry (iter, alloc_item, list); + + LOG_ERROR_ARGS + ("unfreed mem %x: size=%d tag='%s'\n", + item->address, item->length, item->tag); + + for (i = 0; + i < (sizeof (mobj) / sizeof (ocfs_mem_object)); + i++) { + /* nice clean match */ + if (item->length >= sizeof (ub4) && + mobj[i].objid.Size == item->length && + mobj[i].objid.Type == + *((ub4 *) item->address)) { + LOG_ERROR_ARGS + ("object is a (%s)\n", + mobj[i].name); + + switch (mobj[i].objid.Type) { + case OCFS_TYPE_OIN: + LOG_ERROR_ARGS ("ObjectName: %s\n", + ocfs_print_string (& + ((ocfs_inode + *) + item-> + address)-> +ObjectName)); + break; + case OCFS_TYPE_OSB: + break; + default: + break; + } + break; + } + + /* the type seems to match OR the sizeof is right */ + if ((item->length >= sizeof (ub4) && + mobj[i].objid.Type == + *((ub4 *) item->address)) + || (item->length == mobj[i].objid.Size + && mobj[i].objid.Type == -1)) { + if (item->length == (1024 * 128)) { + ocfs_dir_node *dir = + (ocfs_dir_node *) item-> + address; + + LOG_TRACE_ARGS + ("ocfs_dir_node: node_disk_off = %u.%u\n", + HI (dir->node_disk_off), + LO (dir->node_disk_off)); + break; + } + strcat (maybe, mobj[i].name); + strcat (maybe, " "); + } + } + if (maybe[0]) { + LOG_ERROR_ARGS + ("object may be one of (%s)\n", + maybe); + } + } + } +} /* ocfs_memcheck */ +#endif /* OCFS_LINUX_MEM_DEBUG */ + +/* + * ocfs_driver_exit() + * + * Called on rmmod + */ +static void __exit ocfs_driver_exit (void) +{ + ocfs_super *osb = NULL; + struct list_head *iterEntry; + ub4 i = 0; +#if !defined(DLM_THREAD_PER_VOLUME) + ub4 timeout; +#endif + + LOG_ENTRY (); + + /* Signal DLM thread to exit */ + ocfs_down_sem (&(OcfsGlobalCtxt.res), true); + OCFS_SET_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_SHUTDOWN_VOL_THREAD); + + list_for_each (iterEntry, &(OcfsGlobalCtxt.osb_next)) { + osb = list_entry (iterEntry, ocfs_super, osb_next); + + ocfs_down_sem (&osb->osb_res, true); + OCFS_SET_FLAG (osb->osb_flags, OCFS_OSB_FLAGS_SHUTDOWN); + ocfs_up_sem (&osb->osb_res); + + osb->needs_flush = true; + + while ((osb->trans_in_progress) && (i < 10)) { + ocfs_sleep (100); + i++; + } + + ocfs_commit_cache (osb, true); + + osb->needs_flush = false; + + list_del (&osb->osb_next); + } + + if (OcfsGlobalCtxt.flags & OCFS_FLAG_MEM_LISTS_INITIALIZED) + ocfs_free_mem_lists (); + + ocfs_up_sem (&(OcfsGlobalCtxt.res)); + + +#if !defined(DLM_THREAD_PER_VOLUME) + /* Wait for the DLM thread to exit */ + ocfs_down_sem (&(OcfsGlobalCtxt.DLMThreadMonitor), true); + ocfs_up_sem (&(OcfsGlobalCtxt.DLMThreadMonitor)); + + /* Wait 3 sec for the dlm thread to completely exit */ + timeout = 3 * HZ; + while (timeout) { + set_current_state (TASK_INTERRUPTIBLE); + timeout = schedule_timeout (timeout); + } +#endif + + /* Deinit the proc interface */ + ocfs_proc_deinit (); + + unregister_filesystem (&ocfs_fs_type); + +#ifdef OCFS_LINUX_MEM_DEBUG + ocfs_memcheck (); +#endif + + LOG_EXIT (); + return; +} /* ocfs_driver_exit */ + +/* + * here's how inodes get read from disk: + * iget4 -> find_inode -> OCFS_FIND_INODE + * found? : return the in-memory inode + * not found? : get_new_inode -> OCFS_READ_INODE2 + */ + +/* + * ocfs_find_inode() + * + * This is the iget4 helper function + */ +int ocfs_find_inode (struct inode *inode, unsigned long ino, void *opaque) +{ + ocfs_find_inode_args *args; + ocfs_inode *oin; + int ret = 0; + ub8 fileOff, parentOff; + mode_t mode; + + LOG_ENTRY_ARGS ("(0x%08x, %u, 0x%08x)\n", inode, ino, opaque); + + if (opaque == NULL || inode == NULL) + goto bail; + args = (ocfs_find_inode_args *) opaque; + + if (ino != inode->i_ino) { + goto bail; + } + + /* why do i need the parent offset? isn't the file offset unique? */ + parentOff = 0; + if (!ocfs_linux_get_inode_offset (inode, &fileOff, NULL)) { + LOG_TRACE_STR ("error getting inode offset"); + goto bail; + } + + if (S_ISDIR (inode->i_mode)) { + LOG_TRACE_STR ("find_inode -> S_ISDIR\n"); + if (args->entry->extents[0].disk_off != fileOff) { + LOG_TRACE_ARGS + ("DIR : inode number same but full offset does not match: %u.%u != %u.%u\n", + args->entry->extents[0].disk_off, fileOff); + goto bail; + } + } else if (args->offset != fileOff) { + LOG_TRACE_ARGS + ("FILE : inode number same but full offset does not match: %u.%u != %u.%u\n", + args->offset, fileOff); + goto bail; + } + + /* not sure if this is appropriate, but we have the most + * current file entry so why not use it? */ + mode = args->entry->prot_bits; + + switch (args->entry->attribs & (~OCFS_ATTRIB_FILE_CDSL)) { + case OCFS_ATTRIB_DIRECTORY: + mode |= S_IFDIR; + break; + case OCFS_ATTRIB_CHAR: + mode |= S_IFCHR; + inode->i_rdev = + MKDEV (args->entry->dev_major, args->entry->dev_minor); + break; + case OCFS_ATTRIB_BLOCK: + mode |= S_IFBLK; + inode->i_rdev = + MKDEV (args->entry->dev_major, args->entry->dev_minor); + break; + case OCFS_ATTRIB_FIFO: + mode |= S_IFIFO; + break; + case OCFS_ATTRIB_SYMLINK: + mode |= S_IFLNK; + break; + case OCFS_ATTRIB_SOCKET: + mode |= S_IFSOCK; + break; + case OCFS_ATTRIB_REG: + default: + mode |= S_IFREG; + break; + } + oin = NULL; /* set it back to our current OIN if we have one */ + if (inode_data_is_oin (inode)) + oin = ((ocfs_inode *)inode->u.generic_ip); + ocfs_populate_inode (inode, + S_ISDIR (mode) ? args->entry->extents[0]. + disk_off : args->entry->this_sector, mode, + args->entry->uid, args->entry->gid, + args->entry->file_size, args->entry->modify_time, + args->entry->modify_time, args->entry->create_time, + (void *) oin); + ret = 1; + bail: + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_find_inode */ + +/* + * ocfs_populate_inode() + * + */ +static void ocfs_populate_inode (struct inode *inode, ub8 offset, + umode_t mode, uid_t uid, gid_t gid, loff_t size, + time_t atime, time_t mtime, time_t ctime, void *genptr) +{ + struct super_block *sb; + ocfs_super *osb; + + LOG_ENTRY_ARGS ("(0x%08x, %u, size:%u)\n", inode, mode, size); + + sb = inode->i_sb; + osb = ((ocfs_super *)(sb->u.generic_sbp)); + + inode->i_dev = sb->s_dev; + inode->i_mode = mode; + inode->i_uid = uid; + inode->i_gid = gid; + inode->i_blksize = (ub4) osb->vol_layout.cluster_size; // sb->s_blocksize; + inode->i_blocks = (size + sb->s_blocksize) >> sb->s_blocksize_bits; + inode->i_mapping->a_ops = &ocfs_aops; + inode->i_atime = atime; + inode->i_mtime = mtime; + inode->i_ctime = ctime; + + if (genptr) + SET_INODE_OIN (inode, genptr); + else + SET_INODE_OFFSET (inode, offset); + + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + inode->i_fop = &ocfs_fops; + inode->i_op = &ocfs_file_iops; + inode->i_size = size; + inode->i_rdev = sb->s_dev; + break; + case S_IFDIR: + inode->i_op = &ocfs_dir_iops; + inode->i_fop = &ocfs_dops; + inode->i_size = OCFS_DEFAULT_DIR_NODE_SIZE; + inode->i_rdev = sb->s_dev; + break; + case S_IFLNK: + inode->i_op = &page_symlink_inode_operations; + //inode->i_fop = &ocfs_fops; + inode->i_size = size; + inode->i_rdev = sb->s_dev; + break; + default: + init_special_inode (inode, mode, inode->i_rdev); + break; + } + + LOG_EXIT (); + return; +} /* ocfs_populate_inode */ + +/* + * ocfs_read_inode2() + * + * by this point, i_sb, i_dev, i_ino are filled in + * + */ +static void ocfs_read_inode2 (struct inode *inode, void *opaque) +{ + struct super_block *sb; + ocfs_find_inode_args *args; + ocfs_super *osb; + ocfs_inode *newoin; + umode_t mode; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", inode, opaque); + + if (inode == NULL || inode->i_sb == NULL) { + LOG_ERROR_STR ("bad inode"); + goto bail; + } + sb = inode->i_sb; + osb = ((ocfs_super *)(sb->u.generic_sbp)); + if (inode->i_ino == OCFS_ROOT_INODE_NUMBER) { + inode->i_mode = S_IFDIR | 0755; + inode->i_blksize = 512; /* TODO: fix */ + inode->i_blocks = 0; + inode->i_rdev = inode->i_dev; /* is this correct?! */ + inode->i_mapping->a_ops = &ocfs_aops; + inode->i_atime = CURRENT_TIME; + inode->i_mtime = CURRENT_TIME; + inode->i_ctime = CURRENT_TIME; + inode->i_op = &ocfs_dir_iops; + inode->i_fop = &ocfs_dops; + SET_INODE_OIN (inode, osb->oin_root_dir); + goto bail; + } + + if (opaque == NULL) { + make_bad_inode (inode); + goto bail; + } + + args = (ocfs_find_inode_args *) opaque; + newoin = NULL; + + mode = args->entry->prot_bits; + + switch (args->entry->attribs & (~OCFS_ATTRIB_FILE_CDSL)) { + case OCFS_ATTRIB_DIRECTORY: + mode |= S_IFDIR; + break; + case OCFS_ATTRIB_CHAR: + inode->i_rdev = + MKDEV (args->entry->dev_major, args->entry->dev_minor); + mode |= S_IFCHR; + break; + case OCFS_ATTRIB_BLOCK: + inode->i_rdev = + MKDEV (args->entry->dev_major, args->entry->dev_minor); + mode |= S_IFBLK; + break; + case OCFS_ATTRIB_FIFO: + mode |= S_IFIFO; + break; + case OCFS_ATTRIB_SYMLINK: + mode |= S_IFLNK; + break; + case OCFS_ATTRIB_SOCKET: + mode |= S_IFSOCK; + break; + case OCFS_ATTRIB_REG: + default: + mode |= S_IFREG; + break; + } + ocfs_populate_inode (inode, + S_ISDIR (mode) ? args->entry->extents[0]. + disk_off : args->entry->this_sector, mode, + args->entry->uid, args->entry->gid, + args->entry->file_size, args->entry->modify_time, + args->entry->modify_time, args->entry->create_time, + (void *) newoin); + + bail: + LOG_EXIT (); + return; +} /* ocfs_read_inode2 */ + +/* + * ocfs_read_inode() + * + */ +static void ocfs_read_inode (struct inode *inode) +{ + make_bad_inode (inode); +} /* ocfs_read_inode() */ + +/* + * ocfs_lookup() + * + */ +static struct dentry *ocfs_lookup (struct inode *dir, struct dentry *dentry) +{ + int status; + ocfs_find_inode_args args; + ocfs_file_entry *fe = NULL; + ocfs_inode *parentOin = NULL; + ub8 parentOffset; + struct inode *inode = NULL; + struct super_block *sb = dir->i_sb; + struct dentry *ret; + ocfs_super *osb = ((ocfs_super *)(sb->u.generic_sbp)); + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", dir, dentry); + + atomic_inc (&dir->i_count); + + if (dentry->d_name.len > OCFS_MAX_FILENAME_LENGTH) { + ret = ERR_PTR (-ENAMETOOLONG); + goto bail; + } + + if (!ocfs_linux_get_inode_offset (dir, &parentOffset, &parentOin)) { + LOG_ERROR_STR ("bad offset in parent inode"); + ret = ERR_PTR (-EACCES); + goto bail; + } + + fe = ocfs_allocate_file_entry(); + if (!fe) { + ret = ERR_PTR (-ENOMEM); + goto bail; + } + + status = ocfs_find_files_on_disk (osb, parentOffset, &(dentry->d_name), fe, NULL); + if (status >= 0) { + args.offset = fe->this_sector; + args.entry = fe; + inode = + iget4 (sb, LO (fe->this_sector), + (find_inode_t) ocfs_find_inode, (void *) (&args)); + if (inode == NULL) { + ret = ERR_PTR (-EACCES); + goto bail; + } + if (is_bad_inode (inode)) { + iput (inode); + inode = NULL; + ret = ERR_PTR (-EACCES); + goto bail; + } + } + dentry->d_op = &ocfs_dentry_ops; + d_add (dentry, inode); + ret = NULL; + + bail: + if (fe) + ocfs_release_file_entry (fe); + + atomic_dec (&dir->i_count); + LOG_EXIT_PTR (ret); + return ret; +} /* ocfs_lookup */ + +/* + * ocfs_statfs() + * + */ +static int ocfs_statfs (struct super_block *sb, struct statfs *buf) +{ + ocfs_super *osb = NULL; + ub4 size, countbits = 0; + int status = 0; + ocfs_alloc_bm *buffer; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", sb, buf); + + osb = ((ocfs_super *)(sb->u.generic_sbp)); + size = (ub4) OCFS_SECTOR_ALIGN ((osb->cluster_bitmap.size) / 8); + if ((buffer = vmalloc (sizeof (ocfs_alloc_bm))) == NULL) + return -1; + if ((buffer->buf = + vmalloc (sizeof (char) * size + sizeof (ocfs_alloc_bm))) == NULL) + return -1; + buffer->size = osb->cluster_bitmap.size; + status = + ocfs_read_force_disk (osb, buffer->buf, size, + osb->vol_layout.bitmap_off); + countbits = ocfs_count_bits (buffer); + + buf->f_type = OCFS_MAGIC; + buf->f_bsize = sb->s_blocksize; + buf->f_namelen = OCFS_MAX_FILENAME_LENGTH; + buf->f_blocks = + (unsigned long) ((unsigned long) (osb->cluster_bitmap.size) * + (unsigned long) (osb->vol_layout. + cluster_size >> 9)); + buf->f_bfree = + (unsigned long) buf->f_blocks - + (unsigned long) (countbits * (osb->vol_layout.cluster_size >> 9)); + buf->f_bavail = buf->f_bfree; + buf->f_files = (unsigned long) (osb->cluster_bitmap.size); + buf->f_ffree = (unsigned long) (osb->cluster_bitmap.size) - countbits; + vfree (buffer); + + LOG_EXIT_LONG (0); + return 0; +} /* ocfs_statfs */ + + +/* + * ocfs_block_symlink() + * + */ +static int ocfs_block_symlink (struct inode *inode, const char *symname, int len) +{ + struct address_space *mapping = inode->i_mapping; + struct page *page = grab_cache_page (mapping, 0); + int err = -ENOMEM; + char *kaddr; + + if (!page) + goto fail; + err = mapping->a_ops->prepare_write (NULL, page, 0, len - 1); + if (err) + goto fail_map; + kaddr = page_address (page); + memset (kaddr, 0, PAGE_CACHE_SIZE); + memcpy (kaddr, symname, len - 1); + mapping->a_ops->commit_write (NULL, page, 0, len - 1); + err = mapping->a_ops->readpage (NULL, page); + wait_on_page (page); + page_cache_release (page); + if (err < 0) + goto fail; + mark_inode_dirty (inode); + return 0; + + fail_map: + UnlockPage (page); + page_cache_release (page); + fail: + return err; +} /* ocfs_block_symlink */ + +/* + * ocfs_symlink_get_block() + * + */ +int ocfs_symlink_get_block (struct inode *inode, + long iblock, struct buffer_head *bh_result, int create) +{ + int err = -EIO; + ocfs_super *osb; + int status; + ocfs_file_entry *fe = NULL; + ub8 entryOffset; + + LOG_ENTRY_ARGS ("(0x%08x, %d, 0x%08x, %d)\n", inode, iblock, bh_result, + create); + + if (!inode) { + LOG_ERROR_STR ("bad inode"); + goto bail; + } + osb = ((ocfs_super *)(inode->i_sb->u.generic_sbp)); + + if ((iblock << 9) > PATH_MAX + 1) { + LOG_ERROR_ARGS ("file offset > PATH_MAX: %u.%u\n", iblock << 9); + goto bail; + } + + fe = ocfs_allocate_file_entry (); + if (fe == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto bail; + } + if (!ocfs_linux_get_inode_offset (inode, &entryOffset, NULL)) { + LOG_ERROR_STR ("could not get inode offset!"); + goto bail; + } + status = ocfs_read_file_entry (osb, fe, entryOffset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + if (!IS_VALID_FILE_ENTRY (fe)) { + LOG_ERROR_STR ("FileEntry Read from disk is invalid"); + goto bail; + } + if ((iblock << 9) >= (sb8) fe->alloc_size) { + LOG_ERROR_ARGS + ("file offset is outside the allocated size: %u.%u\n", + iblock << 9); + goto bail; + } + + bh_result->b_dev = inode->i_dev; + bh_result->b_blocknr = fe->extents[0].disk_off >> 9; + bh_result->b_blocknr += iblock; + bh_result->b_state |= (1UL << BH_Mapped); + err = 0; + + bail: + if (fe != NULL) + ocfs_release_file_entry (fe); + LOG_EXIT_LONG (err); + return err; +} /* ocfs_symlink_get_block */ + +/* + * ocfs_get_block() + * + */ +int ocfs_get_block (struct inode *inode, + long iblock, struct buffer_head *bh_result, int create) +{ + int err = -EIO; + ocfs_inode *oin; + ocfs_super *osb; + sb8 vbo; + sb8 lbo; + ub4 len, numExts; + int status; + void *ioRuns = NULL; + + if (S_ISLNK (inode->i_mode)) + return ocfs_symlink_get_block (inode, iblock, bh_result, + create); + + LOG_ENTRY_ARGS ("(0x%08x, %d, 0x%08x, %d)\n", inode, iblock, bh_result, + create); + + if (!inode || !inode_data_is_oin (inode)) { + LOG_ERROR_STR ("bad inode or inode has no oin"); + goto bail; + } + oin = ((ocfs_inode *)inode->u.generic_ip); + osb = (ocfs_super *) oin->osb; + + vbo = (sb8) iblock << inode->i_sb->s_blocksize_bits; + + len = 1; + status = ocfs_lookup_file_allocation (osb, oin, vbo, &lbo, + len, &numExts, &ioRuns); + if (status < 0) { + LOG_ERROR_STR ("ocfs_lookup_file_allocation failed"); + goto bail; + } + + if (create) { + /* TODO */ + LOG_TRACE_ARGS ("CREATE: offset: %d -> block#: %d\n", iblock, + lbo >> inode->i_sb->s_blocksize_bits); + /* goto bail; */ + } + bh_result->b_dev = inode->i_dev; + bh_result->b_blocknr = lbo >> inode->i_sb->s_blocksize_bits; + bh_result->b_state |= (1UL << BH_Mapped); + err = 0; + + bail: + if (ioRuns != NULL) { + ocfs_safefree (ioRuns); + } + LOG_EXIT_LONG (err); + return err; +} /* ocfs_get_block */ + +/* + * ocfs_get_block2() + * + */ +int ocfs_get_block2 (struct inode *inode, long iblock, long *oblock) +{ + int err = 0; + ocfs_inode *oin; + ocfs_super *osb; + sb8 vbo; + sb8 lbo; + ub4 len, numExts; + int status; + void *ioRuns = NULL; + + LOG_ENTRY_ARGS ("(0x%08x, %d)\n", inode, iblock); + + if (!inode || !inode_data_is_oin (inode)) { + LOG_ERROR_STR ("bad inode or inode has no oin"); + goto bail; + } + oin = ((ocfs_inode *)inode->u.generic_ip); + osb = (ocfs_super *) oin->osb; + + vbo = (sb8) iblock << inode->i_sb->s_blocksize_bits; + + len = 1; + status = ocfs_lookup_file_allocation (osb, oin, vbo, &lbo, + len, &numExts, &ioRuns); + if (status < 0) { + err = -1; + LOG_ERROR_STR ("ocfs_lookup_file_allocation failed"); + goto bail; + } + + bail: + if (ioRuns != NULL) { + ocfs_safefree (ioRuns); + } + LOG_EXIT_LONG (err); + *oblock = lbo >> inode->i_sb->s_blocksize_bits; + return err; +} /* ocfs_get_block2 */ + +/* + * ocfs_file_write() + * + */ +static int ocfs_file_write (struct file *filp, const char *buf, size_t count, + loff_t * ppos) +{ + int ret = 0; + int saAcquired = false, acquired = false, writingAtEOF = false; + ocfs_inode *oin = NULL; + ocfs_super *osb = NULL; + struct inode *inode = filp->f_dentry->d_inode; + int status; + ub8 newsize; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d)\n", filp, buf, count); + + /* happy write of zero bytes */ + if (count == 0) { + ret = 0; + goto bail; + } + + if (!inode || !inode_data_is_oin (inode)) { + LOG_TRACE_STR ("bad inode or inode has no oin"); + ret = -EIO; + goto bail; + } + oin = ((ocfs_inode *)inode->u.generic_ip); + osb = (ocfs_super *) oin->osb; + + if ((oin->lock_res->master_node_num != osb->node_num) || + (oin->lock_res->lock_state != OCFS_DLM_ENABLE_CACHE_LOCK)) { + oin->cache_enabled = false; + } else { + oin->cache_enabled = true; + } + + if (osb->osb_flags & OCFS_OSB_FLAGS_SHUTDOWN) { + LOG_TRACE_STR ("Volume has already started shutdown"); + ret = -EIO; + goto bail; + } + + if (filp->f_flags & O_DIRECT) { + /* anything special for o_direct? */ + LOG_TRACE_STR ("O_DIRECT"); + } else { + ocfs_down_sem (&(oin->main_res), true); + acquired = true; + + LOG_TRACE_ARGS ("non O_DIRECT write, fileopencount=%d\n", + oin->open_hndl_cnt); + if (oin->open_hndl_cnt > 1) { + if (oin->open_for_write) { + LOG_TRACE_STR + ("uh oh! someone else is doing non O_DIRECT writes!\n"); + ret = -EIO; + goto bail; + } else { + LOG_TRACE_STR + ("there are other readers, but you're the first writer\n"); + saAcquired = true; + oin->open_for_write = true; + } + } + if (!osb->cache_fs) { + LOG_TRACE_STR ("Plugging in O_SYNC for you"); + filp->f_flags |= O_SYNC; + } + } + + if (!acquired) { + ocfs_down_sem (&(oin->main_res), true); + acquired = true; + } + + if (OIN_NEEDS_VERIFICATION (oin)) { + LOG_TRACE_STR ("OIN_NEEDS_VERIFICATION"); + status = ocfs_verify_update_oin (osb, oin); + if (status < 0) { + LOG_TRACE_STR ("ocfs_verify_update_oin failed"); + LOG_TRACE_STR ("TODO: disable volume"); + ret = -EIO; + goto bail; + } + } + if (filp->f_flags & O_APPEND) { + writingAtEOF = true; + newsize = count + inode->i_size; + } else + newsize = count + *ppos; + + if (newsize > inode->i_size) + writingAtEOF = true; + + if (writingAtEOF) + LOG_TRACE_STR ("Writing at EOF"); + + LOG_TRACE_ARGS ("ppos=%u.%u newsize=%u.%u cursize=%u.%u\n", + HI (*ppos), LO (*ppos), HI (newsize), LO (newsize), + HI (inode->i_size), LO (inode->i_size)); + + if (newsize > oin->alloc_size) { + LOG_TRACE_ARGS + ("Will need more allocation: have=%u.%u, need=%u.%u\n", + HI (oin->alloc_size), LO (oin->alloc_size), HI (newsize), + LO (newsize)); + + if (acquired) { + ocfs_up_sem (&(oin->main_res)); + acquired = false; + } + + status = + ocfs_create_modify_file (osb, oin->parent_dirnode_off, oin, + NULL, newsize, &oin->file_disk_off, + FLAG_FILE_EXTEND, NULL, NULL); + if (status < 0) { + LOG_TRACE_ARGS + ("Failed to extend file from %u.%u to %u.%u !!!\n", + HI (*ppos), LO (*ppos), HI (newsize), + LO (newsize)); + ret = -ENOSPC; + goto bail; + } + + ocfs_down_sem (&(oin->main_res), true); + acquired = true; + } +#if LINUX_VERSION_CODE < LinuxVersionCode(2,4,10) + if (filp->f_flags & O_DIRECT) { + ret = ocfs_rw_direct (WRITE, filp, (char *) buf, count, ppos); + } else +#endif + ret = generic_file_write (filp, buf, count, ppos); + if (writingAtEOF && ret >= 0) { + struct iattr attr; + + LOG_TRACE_STR + ("Generic_file_write ok, asking for OIN update now"); + inode->i_size = newsize; + if (acquired) { + ocfs_up_sem (&(oin->main_res)); + acquired = false; + } + memset (&attr, 0, sizeof (struct iattr)); + attr.ia_valid |= ATTR_SIZE; + attr.ia_size = newsize; + status = + ocfs_create_modify_file (osb, oin->parent_dirnode_off, oin, + NULL, newsize, + &oin->file_disk_off, FLAG_FILE_UPDATE, + NULL, &attr); + if (status < 0) { + /* + * WE NEED TO MAKE SURE THIS ALWAYS WORKS OR + * WE NEED TO DO MORE HERE!!!! + */ + LOG_TRACE_STR + ("Big problem, failed to update oin after writing!"); + ret = -EIO; + } + } + + bail: + if (acquired) { + if (saAcquired) { + oin->open_for_write = false; + } + ocfs_up_sem (&(oin->main_res)); + acquired = false; + } + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_file_write */ + +/* + * ocfs_file_read() + * + */ +static int ocfs_file_read (struct file *filp, char *buf, size_t count, loff_t * ppos) +{ + int ret = 0; + int acquired = false; + ocfs_inode *oin = NULL; + ocfs_super *osb = NULL; + struct inode *inode = filp->f_dentry->d_inode; + int status; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d)\n", filp, buf, count); + + if (filp->f_flags & O_DIRECT) { + /* anything special for o_direct? */ + LOG_TRACE_STR ("O_DIRECT"); + } + + if (!inode || !inode_data_is_oin (inode)) { + LOG_ERROR_STR ("Bad inode or inode has no oin"); + ret = -EINVAL; + goto bail; + } + oin = ((ocfs_inode *)inode->u.generic_ip); + osb = (ocfs_super *) oin->osb; + + ocfs_down_sem (&(oin->main_res), true); + acquired = true; + + if (OIN_NEEDS_VERIFICATION (oin)) { + status = ocfs_verify_update_oin (osb, oin); + if (status < 0) { + LOG_TRACE_STR ("ocfs_verify_update_oin failed"); + LOG_TRACE_STR ("TODO: disable volume"); + ret = -EIO; + goto bail; + } + } + + ocfs_up_sem (&(oin->main_res)); + acquired = false; + +#if LINUX_VERSION_CODE < LinuxVersionCode(2,4,10) + if (filp->f_flags & O_DIRECT) { + ret = ocfs_rw_direct (READ, filp, buf, count, ppos); + } else +#endif + { + ret = generic_file_read (filp, buf, count, ppos); + } + + if (ret == -EINVAL) + LOG_TRACE_STR ("Generic_file_read returned -EINVAL"); + + bail: + if (acquired) + ocfs_up_sem (&(oin->main_res)); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_file_read */ + +/* + * ocfs_readpage() + * + */ +static int ocfs_readpage (struct file *file, struct page *page) +{ + int ret; + + LOG_ENTRY_ARGS ("(0x%08x, %u)\n", file, (page ? page->index : 0)); + + ret = block_read_full_page (page, ocfs_get_block); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_readpage */ + +/* + * ocfs_writepage() + * + */ +static int ocfs_writepage (struct page *page) +{ + int ret; + + LOG_ENTRY_ARGS ("(0x%08x)\n", page); + + ret = block_write_full_page (page, ocfs_get_block); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_writepage */ + +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,10) +/* + * ocfs_direct_IO() + * + */ +static int ocfs_direct_IO (int rw, + struct inode *inode, + struct kiobuf *iobuf, unsigned long blocknr, int blocksize) +{ + int ret; + + LOG_ENTRY (); + + ret = generic_direct_IO (rw, inode, iobuf, blocknr, blocksize, + ocfs_get_block); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_direct_IO */ +#endif + +/* + * ocfs_prepare_write() + * + */ +static int ocfs_prepare_write (struct file *file, + struct page *page, unsigned from, unsigned to) +{ + int ret; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u, %u)\n", file, page, from, to); + + ret = block_prepare_write (page, from, to, ocfs_get_block); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_prepare_write */ + +#define SECTOR_BITS 9 +#define SECTOR_SIZE (1U << SECTOR_BITS) +#define SECTOR_MASK (SECTOR_SIZE - 1) + +#if LINUX_VERSION_CODE < LinuxVersionCode(2,4,10) +/* + * ocfs_rw_direct() + * + */ +static ssize_t ocfs_rw_direct (int rw, struct file *filp, char *buf, size_t size, + loff_t * offp) +{ + struct kiobuf *iobuf; + int err = 0; + unsigned long blocknr, blocks; + size_t transferred; + int iosize; + int i; + struct inode *inode = filp->f_dentry->d_inode; + int max_sectors; + int nbhs; + int sector_size, sector_bits, sector_mask; + int ret = 0; + + nbhs = (size >> SECTOR_BITS); + if (nbhs > KIO_MAX_SECTORS) + nbhs = KIO_MAX_SECTORS; + err = alloc_kiovec_sz (1, &iobuf, &nbhs); + if (err) + goto out; + + sector_size = 512; + sector_bits = SECTOR_BITS; + sector_mask = 511; + max_sectors = KIO_MAX_SECTORS >> (sector_bits - SECTOR_BITS); + + err = -EINVAL; + if ((*offp & 511) || (size & 511)) + goto out; + err = 0; + + if (size) + err = -ENXIO; + + /* Split the IO into KIO_MAX_SECTORS chunks, mapping and */ + /* unmapping the single kiobuf as we go to perform each chunk of IO. */ + + transferred = 0; + blocknr = *offp >> SECTOR_BITS; + while (size > 0) { + blocks = size >> SECTOR_BITS; + if (blocks > max_sectors) + blocks = max_sectors; + if (!blocks) + break; + iosize = blocks << SECTOR_BITS; + + err = map_user_kiobuf (rw, iobuf, (unsigned long) buf, iosize); + if (err) { + break; + } + + for (i = 0; i < blocks; i++) { + ret = + ocfs_get_block2 (inode, blocknr++, + &iobuf->blocks[i]); + // iobuf->blocks[i] = ocfs_get_block2(inode, blocknr++); + if (ret == -1) // || blocknr * 512 >= inode->i_size) + { + err = 0; + unmap_kiobuf (iobuf); + goto out; + } + } + err = + brw_kiovec (rw, 1, &iobuf, inode->i_dev, iobuf->blocks, + 512); + if (err >= 0) { + transferred += err; + size -= err; + buf += err; + } + + unmap_kiobuf (iobuf); + + if (err != iosize) + break; + } + + if (transferred) { + *offp += transferred; + err = transferred; + } + + out: + free_kiovec_sz (1, &iobuf, &nbhs); + + return err; +} /* ocfs_rw_direct */ +#endif /* LINUX_VERSION_CODE < LinuxVersionCode(2,4,10) */ + +/* + * ocfs_commit_write() + * + */ +static int ocfs_commit_write (struct file *file, + struct page *page, unsigned from, unsigned to) +{ + int ret; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u, %u)\n", file, page, from, to); + + ret = generic_commit_write (file, page, from, to); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_commit_write */ + +/* + * ocfs_create_or_open_file() + * + */ +static int ocfs_create_or_open_file (struct inode *inode, + struct inode *dir, struct dentry *dentry, + int mode, ocfs_file ** newofile, int dev) +{ + int create = (inode == NULL); + int status = 0; + ocfs_super *osb = NULL; + ocfs_file *OFile = NULL; + ocfs_inode *ParentOin = NULL; + ocfs_inode *NewOIN = NULL; + ocfs_inode *DirEnt = NULL; + bool bAcquiredOSB = false; + bool bAcquiredOIN = false; + bool bClearInUse = false; + ocfs_file_entry *fe = NULL; + ub8 ParentDirNodeOffset; + ub8 parentDirCluster; + struct file *NewFileObject = NULL; + ub8 allocSize = 0; + ub8 endofFile = 0; + ocfs_sem *oin_sem = NULL; + + LOG_ENTRY (); + + OCFS_ASSERT (dir->i_sb); + OCFS_ASSERT (newofile); + osb = ((ocfs_super *)(dir->i_sb->u.generic_sbp)); + OCFS_ASSERT (osb); + OCFS_ASSERT (osb->obj_id.type == OCFS_TYPE_OSB); + + if (!ocfs_linux_get_inode_offset (dir, &ParentDirNodeOffset, &ParentOin)) { + LOG_ERROR_STR ("parent inode has bad oin"); + status = -ENOENT; + goto leave; + } + parentDirCluster = ParentDirNodeOffset; + + LOG_TRACE_STR ("Acquire OSB lock"); + ocfs_down_sem (&(osb->osb_res), true); + bAcquiredOSB = true; + + /* If the volume has been shutdown, fail the request */ + if (osb->osb_flags & OCFS_OSB_FLAGS_SHUTDOWN) { + LOG_ERROR_STR ("Volume has been shutdown"); + status = -EACCES; + goto leave; + } + ocfs_up_sem (&(osb->osb_res)); + bAcquiredOSB = false; + + if ((fe = ocfs_allocate_file_entry ()) == NULL) { + LOG_ERROR_STR ("Could not allocate mem for fileEntry"); + status = -ENOMEM; + goto leave; + } + + if (create) + status = -ENOENT; + else { + /* kch - for an open request we are already given the + * inode, and therefore we are given the oin too */ + DirEnt = NULL; + if (inode_data_is_oin (inode)) + DirEnt = ((ocfs_inode *)inode->u.generic_ip); + status = -EFAIL; + if (DirEnt != NULL) { + if (!(DirEnt->oin_flags & OCFS_OIN_IN_TEARDOWN) && + !(DirEnt->oin_flags & OCFS_OIN_DELETE_ON_CLOSE)) { + OCFS_SET_FLAG (DirEnt->oin_flags, + OCFS_OIN_IN_USE); + status = 0; + } + if (status < 0) { + LOG_TRACE_ARGS + ("oin (%p) is not in teardown and not being deleted\n", + DirEnt); + } + } else { + /* now it IS possible to have an inode but no OIN attached yet + * must be loaded now to open file */ + status = -ENOENT; + } + } + + if (status < 0) { + if (status != -ENOENT) { + /* This is an error case */ + goto leave; + } + + /* Look on the disk now ... */ + status = ocfs_find_files_on_disk (osb, ParentDirNodeOffset, &(dentry->d_name), + fe, NULL); + if (status >= 0) { + DirEnt = NULL; + ocfs_down_sem (&(osb->osb_res), true); + bAcquiredOSB = true; + status = ocfs_create_oin_from_entry (osb, fe, &DirEnt, + parentDirCluster, NULL); + ocfs_up_sem (&(osb->osb_res)); + bAcquiredOSB = false; + + if (status >= 0) { + /* Set Oin in Use... */ + bClearInUse = true; + } + + if (status < 0) { + if (status != -ENOENT) { + /* This is an error case */ + goto leave; + } + } + } else if (!create) { + LOG_ERROR_STR + ("Open request made for nonexistent file!"); + status = -ENOENT; + goto leave; + } + } + + if (status < 0) { /* not found on disk or in mem */ + if (!create || status != -ENOENT) { + /* This is an error case */ + goto leave; + } + } else { + bClearInUse = true; + } + + if (status < 0) { /* the CREATE case */ + ub8 fileEntry = 0; + ocfs_file_entry *tempFileEnt; + + if ((tempFileEnt = ocfs_allocate_file_entry ()) == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto leave; + } + + /* must pass a partially filled FILE_ENTRY to set */ + /* linux-only fields */ + memset (tempFileEnt, 0, sizeof (ocfs_file_entry)); + tempFileEnt->uid = current->fsuid; + tempFileEnt->gid = current->fsgid; + tempFileEnt->prot_bits = mode & 0007777; + if (S_ISCHR (mode) || S_ISBLK (mode)) { + tempFileEnt->dev_major = MAJOR (dev); + tempFileEnt->dev_minor = MINOR (dev); + } else { + tempFileEnt->dev_major = MAJOR (dir->i_sb->s_dev); + tempFileEnt->dev_minor = MINOR (dir->i_sb->s_dev); + } + + if (S_ISLNK (mode)) + tempFileEnt->attribs |= OCFS_ATTRIB_SYMLINK; + else if (S_ISCHR (mode)) + tempFileEnt->attribs |= OCFS_ATTRIB_CHAR; + else if (S_ISBLK (mode)) + tempFileEnt->attribs |= OCFS_ATTRIB_BLOCK; + else if (S_ISFIFO (mode)) + tempFileEnt->attribs |= OCFS_ATTRIB_FIFO; + else if (S_ISSOCK (mode)) + tempFileEnt->attribs |= OCFS_ATTRIB_SOCKET; + else if (S_ISDIR (mode)) + tempFileEnt->attribs |= OCFS_ATTRIB_DIRECTORY; + else + tempFileEnt->attribs |= OCFS_ATTRIB_REG; + + OCFS_ASSERT (create); + /* We need the dir_node for the file being created. */ + /* Create the file here using the new algorithm... */ + fileEntry = 0; + + status = + ocfs_create_modify_file (osb, parentDirCluster, NULL, &(dentry->d_name), + 0, &fileEntry, + S_ISDIR (mode) ? FLAG_FILE_CREATE_DIR : + FLAG_FILE_CREATE, tempFileEnt, NULL); + + ocfs_release_file_entry (tempFileEnt); + + if (status < 0) { + LOG_ERROR_ARGS ("create file failed for %s " + "with status (0x%08x)\n", + S_ISDIR (mode) ? "dir" : "file", + status); + goto leave; + } + + status = ocfs_find_files_on_disk (osb, parentDirCluster, &(dentry->d_name), + fe, NULL); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + fileEntry = fe->this_sector; + + /* Create a new ofile here ... */ + OFile = ocfs_allocate_ofile (); + if (OFile == NULL) { + LOG_ERROR_STR ("ocfs_allocate_ofile failed to allocate " + "memory for OFile"); + status = -ENOMEM; + goto leave; + } + + OFile->k_file = NewFileObject; + + status = ocfs_create_new_oin (&OFile->oin, &allocSize, &endofFile, + NewFileObject, osb); + if (status < 0) { + /* Release the memory for the OFile we allocated above */ + ocfs_release_ofile (OFile); + + LOG_ERROR_STATUS (status); + goto leave; + } + + ocfs_initialize_oin (OFile->oin, osb, + OCFS_OIN_CACHE_UPDATE | (S_ISDIR (mode) ? + OCFS_OIN_DIRECTORY : + 0), NewFileObject, + fileEntry, + S_ISDIR (mode) ? fe->extents[0]. + disk_off : fileEntry); + + if (ParentOin) + OCFS_CLEAR_FLAG (ParentOin->oin_flags, OCFS_OIN_IN_USE); + + if (OFile->oin->lock_res != NULL) { + OFile->oin->lock_res->master_node_num = + DISK_LOCK_CURRENT_MASTER (fe); + OFile->oin->lock_res->lock_state = + DISK_LOCK_FILE_LOCK (fe); + } + + /* Insert the OFile on the OIN list */ + NewOIN = OFile->oin; + + NewOIN->chng_seq_num = DISK_LOCK_SEQNUM (fe); + NewOIN->parent_dirnode_off = parentDirCluster; + + if (S_ISDIR (mode)) + NewOIN->dir_disk_off = fe->extents[0].disk_off; + status = 0; + *newofile = OFile; + goto leave; + } else { /* the OPEN case */ + + oin_sem = &(DirEnt->main_res); + ocfs_down_sem (oin_sem, true); + bAcquiredOIN = true; + + if (DirEnt->oin_flags & OCFS_OIN_DELETE_ON_CLOSE) { + LOG_TRACE_STR + ("oin has DELETE_ON_CLOSE set, returning DELETE_PENDING"); + status = -ENOENT; + goto leave; + } + + /* only call ocfs_verify_update_oin if there's a good inode */ + if (DirEnt->inode == inode) { + status = ocfs_verify_update_oin (osb, DirEnt); + if (status < 0) { + /* disable VOLUME TODO */ + goto leave; + } + } + + if (DirEnt->open_hndl_cnt > 0) { + /* The OIN is currently in use by some thread. */ + /* We must check whether the requested access/share access */ + /* conflicts with the existing open operations. */ + + LOG_TRACE_ARGS ("DirEnt->open_hndl_cnt > 0! : %u\n", + DirEnt->open_hndl_cnt); +#if 0 + if (mode & O_DIRECT) { + LOG_TRACE_STR ("O_DIRECT: open ok"); + status = 0; + } else if (mode & O_RDONLY) { + LOG_TRACE_STR ("O_RDONLY: open ok"); + status = 0; + } else if (mode & O_WRONLY || mode & O_RDWR) { + LOG_TRACE_STR + ("tried opening for write, but file is " + "already open!"); + status = -EFAIL; + } else +#endif + { + LOG_TRACE_STR ("I don't know!?"); + status = 0; + } + + if (status < 0) { + goto leave; + } + } else { + ocfs_delete_all_extent_maps (DirEnt); + } + + /* Allocate a new OFile */ + OFile = ocfs_allocate_ofile (); + if (OFile == NULL) { + LOG_ERROR_STR ("ocfs_allocate_ofile failed to allocate " + "memory for OFile"); + status = -ENOMEM; + goto leave; + } + + /* Setup the OFile and insert it on the oin list */ + OFile->k_file = NewFileObject; + OFile->oin = DirEnt; + OFile->oin->ref_cnt++; + OFile->oin->open_hndl_cnt++; + + /* We should clear the in use now as we are safe from the case */ + /* where the voting thread can vote and we have an open in */ + /* progress */ + OCFS_CLEAR_FLAG (OFile->oin->oin_flags, OCFS_OIN_IN_USE); + if (ParentOin) + OCFS_CLEAR_FLAG (ParentOin->oin_flags, OCFS_OIN_IN_USE); + + OCFS_ASSERT (OFile->oin); + + if (bAcquiredOIN) { + ocfs_up_sem (oin_sem); + bAcquiredOIN = false; + } + + ocfs_down_sem (&(osb->osb_res), true); + bAcquiredOSB = true; + (osb->file_open_cnt)++; + ocfs_up_sem (&(osb->osb_res)); + bAcquiredOSB = false; + + *newofile = OFile; + DirEnt->inode = inode; + SET_INODE_OIN (inode, DirEnt); + status = 0; + goto leave; + } + + leave: + + if (bClearInUse) { + if (!bAcquiredOIN && oin_sem) { + ocfs_down_sem (oin_sem, true); + bAcquiredOIN = true; + } + + OCFS_CLEAR_FLAG (DirEnt->oin_flags, OCFS_OIN_IN_USE); + + if (bAcquiredOIN && oin_sem) { + ocfs_up_sem (oin_sem); + bAcquiredOIN = false; + } + } + + if (bAcquiredOIN && oin_sem) { + ocfs_up_sem (oin_sem); + bAcquiredOIN = false; + } + + if (bAcquiredOSB) { + LOG_TRACE_STR ("Release OSB lock"); + ocfs_up_sem (&(osb->osb_res)); + bAcquiredOSB = false; + } + + if (fe != NULL) { + ocfs_release_file_entry (fe); + } + + LOG_EXIT_STATUS (status); + return (status); +} /* ocfs_create_or_open_file */ + +/* + * ocfs_file_open() + * + */ +static int ocfs_file_open (struct inode *inode, struct file *file) +{ + struct dentry *dentry; + struct inode *parent = NULL; + ocfs_file *ofile = NULL; + int status; + int ret, err = 0; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", inode, file); + + dentry = file->f_dentry; + if (dentry && dentry->d_parent && dentry->d_parent->d_inode) + parent = dentry->d_parent->d_inode; + + atomic_inc (&parent->i_count); + status = ocfs_create_or_open_file (inode, parent, dentry, file->f_flags, + &ofile, NODEV); + if (status < 0) { + if (status == -ENOENT) { + LOG_ERROR_STR ("File not found while opening file"); + ret = -ENOENT; + goto bail; + } else if (status == -ENOMEM) { + LOG_ERROR_STR ("Out of memory while opening file"); + ret = -ENOMEM; + goto bail; + } else if (status == -EACCES) { + LOG_ERROR_STR ("Access denied while opening file"); + ret = -EACCES; + goto bail; + } else { + LOG_ERROR_STATUS (status); + ret = -EACCES; + goto bail; + } + } + + file->private_data = (void *) ofile; + ofile->k_file = file; + ofile->f_iobuf_lock = 0; + ofile->f_iobuf = NULL; + ret = 0; + + bail: + if (ret != 0) + atomic_dec (&parent->i_count); + LOG_TRACE_ARGS + ("exiting file_open: file=%p dentry=%p inode=%p oin=%p kiovec=%d\n", + file, file->f_dentry, file->f_dentry->d_inode, + ((ocfs_inode *)file->f_dentry->d_inode->u.generic_ip), err); + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_file_open */ + +/* + * ocfs_mknod() + * + */ +static int ocfs_mknod (struct inode *dir, struct dentry *dentry, int mode, int dev) +{ + int status; + struct inode *inode; + ocfs_file *newofile = NULL; + int error = -EACCES; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d, %d)\n", dir, dentry, mode, dev); + + atomic_inc (&dir->i_count); + + status = + ocfs_create_or_open_file (NULL, dir, dentry, mode, &newofile, dev); + if (status >= 0) { + ocfs_inode *oin; + + if (newofile == NULL || newofile->oin == NULL) { + LOG_ERROR_STR ("OFile returned is bad. TODO: cleanup!"); + goto bail; + } + + oin = newofile->oin; + inode = new_inode (dir->i_sb); + error = PTR_ERR (inode); + if (!IS_ERR (inode)) { + inode->i_rdev = dev; + inode->i_ino = LO (oin->file_disk_off); + oin->inode = inode; + ocfs_populate_inode (inode, (ub8) 0, mode, + current->fsuid, current->fsgid, + 0, CURRENT_TIME, + CURRENT_TIME, CURRENT_TIME, + (void *) oin); + insert_inode_hash (inode); + d_instantiate (dentry, inode); + error = 0; + } else { + LOG_ERROR_ARGS ("new_inode failed!!! error=%d\n", + error); + } + } else if (status == -ENOSPC) { + LOG_ERROR_STR ("Disk is full. TODO: cleanup"); + error = -ENOSPC; + } else { + LOG_ERROR_STR + ("ocfs_create_or_open_file failed! TODO: cleanup!"); + } + + bail: + atomic_dec (&dir->i_count); + + /* uh, hmmm... */ + if (newofile != NULL) + ocfs_release_ofile (newofile); + + LOG_EXIT_LONG (error); + return error; +} /* ocfs_mknod */ + +/* + * ocfs_mkdir() + * + */ +static int ocfs_mkdir (struct inode *dir, struct dentry *dentry, int mode) +{ + int ret; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d)\n", dir, dentry, mode); + + ret = ocfs_mknod (dir, dentry, mode | S_IFDIR, NODEV); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_mkdir */ + +/* + * ocfs_create() + * + */ +static int ocfs_create (struct inode *dir, struct dentry *dentry, int mode) +{ + int ret; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d)\n", dir, dentry, mode); + + ret = ocfs_mknod (dir, dentry, mode | S_IFREG, NODEV); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_create */ + +/* + * ocfs_link() + * + */ +static int ocfs_link (struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) +{ + int status; + ub8 off; + ocfs_file_entry *fe; + struct inode *inode; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, 0x%08x)\n", old_dentry, dir, dentry); + + inode = old_dentry->d_inode; + fe = ocfs_allocate_file_entry (); + if (fe == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto bail; + } + if (!ocfs_linux_get_inode_offset (inode, &off, NULL)) { + LOG_ERROR_STR ("could not get inode offset!"); + goto bail; + } + + + status = ocfs_read_file_entry (((ocfs_super *)(inode->i_sb->u.generic_sbp)), fe, off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + if (!IS_VALID_FILE_ENTRY (fe)) { + LOG_ERROR_STR ("fe Read from disk is invalid"); + goto bail; + } + + bail: + if (fe) { + ocfs_release_file_entry (fe); + } + + LOG_EXIT_LONG (0); + return -EPERM; +} /* ocfs_link */ + +/* + * ocfs_positive() + * + */ +static inline int ocfs_positive (struct dentry *dentry) +{ + int ret; + + LOG_ENTRY_ARGS ("(0x%08x)\n", dentry); + + ret = dentry->d_inode && !d_unhashed (dentry); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_positive */ + +/* + * ocfs_empty() + * + */ +static int ocfs_empty (struct dentry *dentry) +{ + struct list_head *list; + int ret; + + LOG_ENTRY_ARGS ("(0x%08x)\n", dentry); + + spin_lock (&dcache_lock); + list = dentry->d_subdirs.next; + + while (list != &dentry->d_subdirs) { + struct dentry *de = list_entry (list, struct dentry, d_child); + + if (de->d_inode && !d_unhashed (de)) { + spin_unlock (&dcache_lock); + ret = 0; + goto bail; + } + list = list->next; + } + spin_unlock (&dcache_lock); + ret = 1; + + bail: + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_empty */ + +/* + * ocfs_unlink() + * + */ +static int ocfs_unlink (struct inode *dir, struct dentry *dentry) +{ + int status; + struct inode *inode; + ocfs_inode *oin = NULL; + int retval = -ENOTEMPTY; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", dir, dentry); + + + inode = dentry->d_inode; + + if ((atomic_read (&inode->i_count) > 1) + || (atomic_read (&dentry->d_count) > 2)) { + retval = -EBUSY; + goto bail; + } + + if (ocfs_empty (dentry)) { + status = ocfs_set_disposition_information (dir, dentry); + if (status < 0) { + LOG_ERROR_STR ("ocfs_set_disposition_information failed!"); + retval = -EBUSY; + goto bail; + } + + if (inode_data_is_oin (inode) + && (oin = ((ocfs_inode *)inode->u.generic_ip)) == NULL) { + LOG_ERROR_STR ("inode has bad oin"); + retval = -EIO; + goto bail; + } + + if (oin) + ocfs_release_oin (oin, true); + + inode->i_nlink--; + /* This probably is not needed, leavign it in as comment */ + /* if we ever hit bug in dcache again, it's something else */ + /* but this should take care of the BUG() in dcache : prune_dcache() */ + /* dput(dentry); */ + retval = 0; + } else + LOG_ERROR_STR ("dentry is not empty!"); + + bail: + LOG_EXIT_LONG (retval); + return retval; +} /* ocfs_unlink */ + +/* + * ocfs_rename() + * + */ +static int ocfs_rename (struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + int status; + struct inode *old_inode = old_dentry->d_inode; + struct inode *new_inode = new_dentry->d_inode; + int error = 0; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", + old_dir, old_dentry, new_dir, new_dentry); + + if ((atomic_read (&old_inode->i_count) > 1) + || (atomic_read (&old_dentry->d_count) > 1)) { + error = -EBUSY; + goto bail; + } + + if (new_inode) { + if (S_ISDIR (old_inode->i_mode) && !ocfs_empty (new_dentry)) { + error = -ENOTEMPTY; + LOG_TRACE_STR ("New (directory) dentry NOT empty!"); + goto bail; + } + + status = ocfs_set_rename_information (old_dir, old_dentry, new_dir, + new_dentry); + if (status < 0) { + error = -ENOENT; + LOG_ERROR_STATUS (status); + goto bail; + } + old_inode->i_nlink++; + new_dir->i_mtime = new_dir->i_ctime = CURRENT_TIME; + if (S_ISDIR (old_inode->i_mode)) + new_inode->i_nlink--; + new_inode->i_nlink--; + } else { +// if (S_ISDIR(old_inode->i_mode)) +// { +// error = -EMLINK; +// if (new_dir->i_nlink >= OCFS_LINK_MAX) +// goto bail; +// } + status = ocfs_set_rename_information (old_dir, old_dentry, new_dir, + new_dentry); + if (status < 0) { + error = -ENOENT; + LOG_ERROR_STATUS (status); + goto bail; + } + old_inode->i_nlink++; + if (S_ISDIR (old_inode->i_mode)) + new_dir->i_nlink++; + } + old_inode->i_nlink--; + if (S_ISDIR (old_inode->i_mode)) { + new_dir->i_mtime = new_dir->i_ctime = CURRENT_TIME; + old_dir->i_nlink--; + } +// mark_inode_dirty(new_dir); +// mark_inode_dirty(old_dir); +// mark_inode_dirty(new_inode); +// mark_inode_dirty(old_inode); + + bail: + LOG_EXIT_LONG (error); + return error; +} /* ocfs_rename */ + +/* + * ocfs_symlink() + * + */ +static int ocfs_symlink (struct inode *dir, struct dentry *dentry, const char *symname) +{ + int error; + ocfs_super *osb = NULL; + ocfs_inode *oin = NULL; + struct inode *inode, *parentInode = NULL; + ub8 fileOff, parentOff; + ub8 newsize; + int status; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, 0x%08x)\n", dir, dentry, symname); + + atomic_inc (&dir->i_count); + if (!dentry->d_parent || !dentry->d_parent->d_inode) { + LOG_ERROR_STR ("failed to get parent inode!"); + error = -EIO; + goto bail; + } + parentInode = dentry->d_parent->d_inode; + + if (!dentry->d_parent || !dentry->d_parent->d_inode) { + LOG_ERROR_STR ("failed to get parent inode!"); + error = -EIO; + goto bail; + } + parentInode = dentry->d_parent->d_inode; + + error = ocfs_mknod (dir, dentry, S_IFLNK | S_IRWXUGO, NODEV); + if (!error) { + int l = strlen (symname) + 1; + + newsize = l - 1; + /* now that d_inode points to something */ + inode = dentry->d_inode; + + if (!ocfs_linux_get_inode_offset (parentInode, &parentOff, NULL)) { + LOG_ERROR_STR ("failed to get parent offset!"); + error = -EIO; + goto bail; + } + + if (ocfs_linux_get_inode_offset (inode, &fileOff, &oin) + && oin != NULL) { + osb = (ocfs_super *) oin->osb; + ocfs_down_sem (&(oin->main_res), true); + if (OIN_NEEDS_VERIFICATION (oin)) { + LOG_TRACE_STR ("OIN_NEEDS_VERIFICATION"); + status = ocfs_verify_update_oin (osb, oin); + if (status < 0) { + LOG_TRACE_STR + ("ocfs_verify_update_oin failed"); + LOG_TRACE_STR ("TODO: disable volume"); + ocfs_up_sem (&(oin->main_res)); + error = -EIO; + goto bail; + } + } + ocfs_up_sem (&(oin->main_res)); + } + + status = + ocfs_create_modify_file (osb, parentOff, oin, NULL, newsize, + &fileOff, FLAG_FILE_EXTEND, NULL, NULL); + + if (status < 0) { + LOG_TRACE_ARGS ("Failed to extend file to %u.%u !!!\n", + HI (newsize), LO (newsize)); + error = -ENOSPC; + goto bail; + } + + if (oin != NULL) { + ocfs_down_sem (&(oin->main_res), true); + inode->i_size = newsize; + ocfs_up_sem (&(oin->main_res)); + } + error = ocfs_block_symlink (inode, symname, l); + } + + bail: + atomic_dec (&dir->i_count); + LOG_EXIT_LONG (error); + return error; +} /* ocfs_symlink */ + +/* + * ocfs_file_release() + * + */ +static int ocfs_file_release (struct inode *inode, struct file *file) +{ + //int nbhs = KIO_MAX_SECTORS; + ocfs_file *ofile = NULL; + ocfs_super * osb; + ocfs_inode *oin; + struct dentry *dentry; + struct inode *parent; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", inode, file); + + if (file->private_data) + ofile = (ocfs_file *) file->private_data; + + + /* dir */ + if (S_ISDIR (inode->i_mode)) { + /* fix all this - need a real open/close for directories */ + if (ofile) { + if (ofile->curr_dir_buf) { + ocfs_safefree (ofile->curr_dir_buf); + ofile->curr_dir_buf = NULL; + } + // hmm + // if (ofile->f_iobuf) + // free_kiovec_sz(1, &ofile->f_iobuf, &nbhs); + ocfs_release_ofile (ofile); + } + goto bail; + } + + /* file */ + osb = ((ocfs_super *)(inode->i_sb->u.generic_sbp)); + oin = NULL; + + if (ofile == NULL) + goto do_parent_dec; + + oin = ofile->oin; + OCFS_ASSERT (oin); + + ocfs_down_sem (&(oin->main_res), true); + ocfs_release_ofile (ofile); + ocfs_down_sem (&(osb->osb_res), true); + osb->file_open_cnt--; + oin->ref_cnt--; + oin->open_hndl_cnt--; + ocfs_up_sem (&(osb->osb_res)); + + if (oin->oin_flags & OCFS_OIN_ROOT_DIRECTORY) { + ocfs_up_sem (&(oin->main_res)); + goto do_parent_dec; + } + + LOG_TRACE_ARGS ("openhandles: %d / osbfiles: %d / refcount: %d\n", + oin->open_hndl_cnt, osb->file_open_cnt, oin->ref_cnt); + + if (oin->ref_cnt == 0) { + if (oin->oin_flags & OCFS_OIN_NEEDS_DELETION || + oin->oin_flags & OCFS_OIN_IN_USE) { + ocfs_up_sem (&(oin->main_res)); + goto do_parent_dec; + } + ocfs_up_sem (&(oin->main_res)); + ocfs_release_oin (oin, true); + } else { + ocfs_up_sem (&(oin->main_res)); + ocfs_release_cached_oin (osb, oin); + } + +do_parent_dec: + dentry = file->f_dentry; + if (dentry && dentry->d_parent && + dentry->d_parent->d_inode) { + parent = dentry->d_parent->d_inode; + if (parent) + atomic_dec (&parent->i_count); + } + +bail: + LOG_EXIT_LONG (0); + return 0; +} /* ocfs_file_release */ + +/* + * ocfs_flush() + * + */ +static int ocfs_flush (struct file *file) +{ + LOG_ENTRY_ARGS ("(0x%08x)\n", file); + + LOG_EXIT_LONG (0); + return 0; +} /* ocfs_flush */ + +/* + * ocfs_sync_file() + * + */ +static int ocfs_sync_file (struct file *file, struct dentry *dentry, int datasync) +{ + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d)\n", file, dentry, datasync); + + LOG_EXIT_LONG (0); + return 0; +} /* ocfs_sync_file */ + +/* + * ocfs_put_super() + * + */ +static void ocfs_put_super (struct super_block *sb) +{ + LOG_ENTRY_ARGS ("(0x%08x)\n", sb); + + fsync_no_super (sb->s_dev); + LOG_TRACE_STR ("put super... do nothing! DONE!!!!"); + MOD_DEC_USE_COUNT; + + LOG_EXIT (); + return; +} /* ocfs_put_super */ + +/* + * ocfs_readdir() + * + */ +static int ocfs_readdir (struct file *filp, void *dirent, filldir_t filldir) +{ + int pos; + struct inode *inode; + struct super_block *sb; + ocfs_super *osb; + ocfs_inode *oin; + ocfs_file *ofile; + ocfs_file_entry *entry = NULL; + ub8 rootOff; + int ret = 0; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", filp, dirent); + + if (!filp || + !filp->f_dentry || + !filp->f_dentry->d_inode || !filp->f_dentry->d_inode->i_sb) { + LOG_TRACE_STR ("Bad file pointer"); + goto bail; + } + pos = filp->f_pos; + inode = filp->f_dentry->d_inode; + sb = inode->i_sb; + if (!sb->u.generic_sbp) { + LOG_TRACE_STR ("Invalid OSB"); + goto bail; + } + osb = ((ocfs_super *)(sb->u.generic_sbp)); + + if (!ocfs_linux_get_inode_offset (inode, &rootOff, &oin)) { + LOG_TRACE_STR ("Inode has no OIN"); + goto bail; + } + + if (!S_ISDIR (inode->i_mode)) { + LOG_TRACE_STR ("Not a dir"); + ret = -ENOTDIR; + goto bail; + } + + switch (pos) { + case 0: + { + if (filldir (dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) + break; + pos++; + filp->f_pos++; + break; + } + case 1: + { + if (filldir (dirent, "..", 2, 1, + filp->f_dentry->d_parent->d_inode->i_ino, + DT_DIR) < 0) + break; + pos++; + filp->f_pos++; + break; + } + /* case 2: */ + default: + { + if (pos == 2) { + ocfs_safefree (filp->private_data); + filp->private_data = (void *) ocfs_allocate_ofile (); + if (filp->private_data == NULL) { + LOG_TRACE_STR ("Failed to allocate OFile"); + ret = -ENOMEM; + goto bail; + } + } + + entry = ocfs_allocate_file_entry (); + if (entry == NULL) { + LOG_TRACE_STR ("Failed to allocate file entry"); + ret = -ENOMEM; + goto bail; + } + ofile = (ocfs_file *) filp->private_data; + while (1) { + if (ocfs_find_files_on_disk (osb, rootOff, NULL, entry, ofile) < 0) + break; + + if (filldir + (dirent, entry->filename, + strlen (entry->filename), filp->f_pos, + LO (entry->this_sector), DT_UNKNOWN) < 0) { + goto bail; + } + pos++; + filp->f_pos++; + } + + break; + } + } + + bail: + if (entry) + ocfs_release_file_entry (entry); + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_readdir */ + +/* + * ocfs_put_inode() + * + */ +static void ocfs_put_inode (struct inode *inode) +{ + LOG_ENTRY_ARGS ("(0x%08x)\n", inode); + LOG_TRACE_ARGS ("put_inode: count=%d\n", inode->i_count); + if (inode_data_is_oin(inode) && (atomic_read (&inode->i_count) == 1) ) { + ocfs_inode *oin; + oin = ((ocfs_inode *)inode->u.generic_ip); + ocfs_delete_all_extent_maps (oin); + } + LOG_EXIT (); + return; +} /* ocfs_put_inode */ + +/* + * ocfs_clear_inode() + * + */ +static void ocfs_clear_inode (struct inode *inode) +{ + LOG_ENTRY(); + + if (inode) { + if (inode_data_is_oin (inode)) { + ocfs_inode *oin; + ocfs_super *osb; + ocfs_file *ofile; + struct list_head *iter; + + LOG_TRACE_STR ("inode with oin : clear inode\n"); + + oin = ((ocfs_inode *)inode->u.generic_ip); + osb = (ocfs_super *) oin->osb; + if (oin == osb->oin_root_dir) { + LOG_TRACE_STR + ("this is the root inode, doing cleanup now!"); + fsync_no_super (inode->i_sb->s_dev); + LOG_TRACE_STR ("syncing past root inode"); + LOG_TRACE_STR ("calling dismount"); + ocfs_dismount_volume (inode->i_sb); + goto bail; + } + + list_for_each (iter, &(oin->next_ofile)) { + ofile = + list_entry (iter, ocfs_file, next_ofile); + ocfs_release_ofile (ofile); + } + + ocfs_delete_all_extent_maps (oin); + + ocfs_release_cached_oin (osb, oin); + ocfs_release_oin (oin, true); + oin = NULL; + LOG_TRACE_STR ("yeah! done with deallocs!"); + } else { + ub8 fileOff; + ub4 len; + ocfs_lock_res *FoundResource; + ocfs_super *osb; + + osb = ((ocfs_super *)(inode->i_sb->u.generic_sbp)); + + if (ocfs_linux_get_inode_offset (inode, &fileOff, NULL)) { + if (!HASHTABLE_DESTROYED + (&(osb->root_sect_node))) { + if (ocfs_hash_get + (&(osb->root_sect_node), &(fileOff), + sizeof (ub8), + (void **) &FoundResource, &len)) { + FoundResource->oin = NULL; + LOG_TRACE_STR ("found lock"); + ocfs_hash_del (& + (osb-> + root_sect_node), + &(fileOff), + sizeof (ub8)); + kmem_cache_free (OcfsGlobalCtxt.lockres_cache, FoundResource); + } + } else { + LOG_TRACE_STR + ("hashtable has already been destroyed. skipping."); + } + } else { + LOG_TRACE_STR ("Could not find offset"); + } + } + } + bail: + LOG_EXIT (); + return; +} /* ocfs_clear_inode */ + +#if 0 +/* + * ocfs_delete_inode() + * + */ +static void ocfs_delete_inode (struct inode *inode) +{ + LOG_ENTRY (); + + LOG_TRACE_STR ("Inode being junked, need to do cleanup here"); + + LOG_EXIT (); + return; +} /* ocfs_delete_inode */ +#endif + +/* + * ocfs_setattr() + * + */ +static int ocfs_setattr (struct dentry *dentry, struct iattr *attr) +{ + struct inode *parentInode; + struct inode *inode = dentry->d_inode; + int error = 0; + ub8 newsize; + int status; + struct inode savedInode; + ocfs_inode *oin = NULL; + ocfs_super *osb = NULL; + ub8 parentOff, fileOff; + + LOG_ENTRY_ARGS ("(0x%08x)\n", dentry); + + osb = ((ocfs_super *)(inode->i_sb->u.generic_sbp)); + + if (!dentry->d_parent || !dentry->d_parent->d_inode) { + LOG_ERROR_STR ("bad inode or root inode"); + goto bail2; + } + parentInode = dentry->d_parent->d_inode; + atomic_inc (&parentInode->i_count); + newsize = attr->ia_size; + + if (attr->ia_valid & ATTR_MODE) + LOG_TRACE_ARGS ("mode change: %d\n", attr->ia_mode); + if (attr->ia_valid & ATTR_UID) + LOG_TRACE_ARGS ("uid change: %d\n", attr->ia_uid); + if (attr->ia_valid & ATTR_GID) + LOG_TRACE_ARGS ("gid change: %d\n", attr->ia_gid); + if (attr->ia_valid & ATTR_SIZE) + LOG_TRACE_STR ("size change..."); + if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME)) + LOG_TRACE_STR ("time change..."); + + if (!(attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | + ATTR_SIZE | ATTR_GID | ATTR_UID | ATTR_MODE))) { + LOG_TRACE_STR + ("can only change mode, uid, gid, size and time. exiting!"); + goto bail; + } + + error = inode_change_ok (inode, attr); + if (error) + goto bail; + + /* get the file and parent offsets, and the file oin if present */ + if (!ocfs_linux_get_inode_offset (inode, &fileOff, &oin) || + !ocfs_linux_get_inode_offset (parentInode, &parentOff, NULL)) { + LOG_ERROR_STR ("error getting inode offset"); + goto bail; + } + + if (attr->ia_valid & ATTR_SIZE) { + if (oin != NULL) { + ocfs_down_sem (&(oin->main_res), true); + if (OIN_NEEDS_VERIFICATION (oin)) { + LOG_TRACE_STR ("OIN_NEEDS_VERIFICATION"); + status = ocfs_verify_update_oin (osb, oin); + if (status < 0) { + LOG_TRACE_STR + ("ocfs_verify_update_oin failed"); + LOG_TRACE_STR ("TODO: disable volume"); + ocfs_up_sem (&(oin->main_res)); + error = -EIO; + goto bail; + } + } + ocfs_up_sem (&(oin->main_res)); + } + + status = + ocfs_create_modify_file (osb, parentOff, oin, NULL, newsize, + &fileOff, FLAG_FILE_EXTEND, NULL, NULL); + + if (status < 0) { + LOG_TRACE_ARGS ("Failed to extend file to %u.%u !!!\n", + HI (newsize), LO (newsize)); + error = -ENOSPC; + goto bail; + } + + if (oin != NULL) { + ocfs_down_sem (&(oin->main_res), true); + inode->i_size = newsize; + ocfs_up_sem (&(oin->main_res)); + } + } + + savedInode = *inode; + inode_setattr (inode, attr); + + /* if directory, put FILE_ENTRY ptr into fileOff */ + if (S_ISDIR (inode->i_mode)) + ocfs_linux_get_dir_entry_offset (osb, &fileOff, parentOff, &(dentry->d_name), NULL); + + status = -EFAIL; + if (fileOff != -1) + status = + ocfs_create_modify_file (osb, parentOff, NULL, NULL, newsize, + &fileOff, FLAG_FILE_UPDATE, NULL, attr); + if (status < 0) { + LOG_ERROR_STR + ("OcfsSetAttr failed! restoring inode and aborting transaction"); + *inode = savedInode; + // mark_inode_dirty(inode); + error = -EIO; + } + + bail: + atomic_dec (&parentInode->i_count); + bail2: + LOG_EXIT_LONG (error); + return error; +} /* ocfs_setattr */ + +/* + * ocfs_getattr() + * + */ +static int ocfs_getattr (struct dentry *dentry, struct iattr *attr) +{ + ocfs_inode *oin; + struct inode *inode; + struct super_block *sb = dentry->d_inode->i_sb; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", dentry, attr); + + inode = dentry->d_inode; + if (inode == NULL || !inode_data_is_oin (inode)) + goto bail; + oin = ((ocfs_inode *)inode->u.generic_ip); + if (oin == ((ocfs_super *)(sb->u.generic_sbp))->oin_root_dir) + goto bail; + if (oin != NULL) + ocfs_verify_update_oin (oin->osb, oin); + + bail: + LOG_EXIT_LONG (0); + return 0; +} /* ocfs_getattr */ + +/* + * ocfs_dentry_revalidate() + * + */ +static int ocfs_dentry_revalidate (struct dentry *dentry, int flags) +{ + int ret = 0; + + return 1; + + LOG_ENTRY_ARGS ("(0x%08x, %d)\n", dentry, flags); +#if 0 + ocfs_file_entry *fe; + struct inode *inode; + ocfs_inode *oin; + ocfs_super *osb; + ub8 off; + ocfs_find_inode_args args; + + if ((inode = dentry->d_inode) != NULL) { + osb = ((ocfs_super *)(inode->i_sb->u.generic_sbp); + if (inode_data_is_oin (inode)) { + oin = ((ocfs_inode *)inode->u.generic_ip); + ocfs_down_sem (&(oin->main_res), true); + oin->bNeedsVerification = true; + ocfs_up_sem (&(oin->main_res)); + UPDATE_OIN (oin); + ret = 1; + goto bail; + } else if (S_ISDIR (inode->i_mode)) { + ub8 parentOff; + + ret = 0; + if (osb->oin_root_dir->inode == inode) + goto bail; + + if (ocfs_linux_get_inode_offset + (dentry->d_parent->d_inode, &parentOff, NULL)) { + if (ocfs_linux_get_dir_entry_offset + (osb, &off, parentOff, &(dentry->d_name), &fe)) { + args.offset = fe->this_sector; + args.entry = fe; + ocfs_read_inode2 (inode, + (void *) &args); + ocfs_release_file_entry (fe); + ret = 1; + } + } + goto bail; + } else { + if (ocfs_linux_get_inode_offset (inode, &off, NULL)) { + ret = 0; + fe = ocfs_allocate_file_entry (); + if (fe) { + if (ocfs_read_file_entry + ((ocfs_super *)(inode->i_sb->u.generic_sbp), + fe, + off) >= 0) { + args.offset = fe->this_sector; + args.entry = fe; + ocfs_read_inode2 (inode, + (void *) + &args); + ret = 1; + } + ocfs_release_file_entry (fe); + } + goto bail; + } + } + } + + ret = 0; /* just return false */ + + bail: + return ret; +#endif + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_dentry_revalidate */ + +#if 0 +/* + * ocfs_inode_revalidate() + * + */ +static int ocfs_inode_revalidate (struct dentry *dentry) +{ + int ret; /* -ESTALE */ + struct inode *inode; + ocfs_inode *oin; + int status; + + LOG_ENTRY_ARGS ("(0x%08x)\n", dentry); + + ret = 0; + inode = dentry->d_inode; + if (inode == NULL || !inode_data_is_oin (inode)) + goto bail; + oin = ((ocfs_inode *)inode->u.generic_ip); + if (oin == (ocfs_super *)(dentry->d_inode->i_sb->u.generic_sbp)->oin_root_dir) + goto bail; + if (oin != NULL) +// if (OIN_NEEDS_VERIFICATION(oin)) + { + LOG_TRACE_STR ("OIN needs verification"); + status = ocfs_verify_update_oin (oin->osb, oin); + if (status < 0) { + LOG_ERROR_STR ("ocfs_verify_update_oin failed!"); + ret = -ESTALE; + goto bail; + } + } + + bail: + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_inode_revalidate */ +#endif + +/* + * ocfs_release_cached_oin() + * + */ +void ocfs_release_cached_oin (ocfs_super * osb, ocfs_inode * oin) +{ + bool bAcquiredOIN = false; + ocfs_lock_res *lockResource = NULL; + static spinlock_t lamelock = SPIN_LOCK_UNLOCKED; + + LOG_ENTRY_ARGS ("(oin = 0x%08x)\n", oin); + + spin_lock (&lamelock); /* TODO: figure out if this helps ;-) */ + if (oin == NULL) + goto bail; + + ocfs_down_sem (&(oin->main_res), true); + bAcquiredOIN = true; + + if (oin->open_hndl_cnt != 0 || oin->oin_flags & OCFS_OIN_IN_USE) { + if (bAcquiredOIN) { + ocfs_up_sem (&(oin->main_res)); + bAcquiredOIN = false; + } + goto bail; + } else { + OCFS_SET_FLAG (oin->oin_flags, OCFS_OIN_IN_TEARDOWN); + + ocfs_delete_name (oin); + + if (bAcquiredOIN) { + ocfs_up_sem (&(oin->main_res)); + bAcquiredOIN = false; + } + + + lockResource = (ocfs_lock_res *) oin->lock_res; + if (lockResource != NULL) { + if (lockResource->signature != 0x55AA) { + LOG_ERROR_STR("invalid lock resource"); + goto finito; + } + + if (lockResource->sector_num != 0) { + if (lockResource->oin == oin) { + lockResource->oin = NULL; + if (lockResource->in_cache_list) { + list_del (& + (lockResource-> + cache_list)); + lockResource->in_cache_list = + false; + } + + ocfs_safefree (lockResource->voted_event); + + if (!HASHTABLE_DESTROYED + (&(osb->root_sect_node))) { + ocfs_hash_del (& + (osb-> + root_sect_node), + &(lockResource-> + sector_num), + sizeof (ub8)); + } else { + LOG_TRACE_STR + ("hashtable already destroyed! continuing."); + } + kmem_cache_free (OcfsGlobalCtxt.lockres_cache, lockResource); + + oin->lock_res = NULL; + } + } + } + finito: + } + + bail: + spin_unlock (&lamelock); + + LOG_EXIT (); + return; +} /* ocfs_release_cached_oin */ + +/* + * ocfs_initialize_mem_lists() + * + */ +int ocfs_initialize_mem_lists (void) +{ + OcfsGlobalCtxt.oin_cache = + kmem_cache_create ("oin_cache", + sizeof (ocfs_inode) + OCFS_POINTER_SIZE, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + OcfsGlobalCtxt.ofile_cache = + kmem_cache_create ("ofile_cache", + sizeof (ocfs_file) + OCFS_POINTER_SIZE, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + OcfsGlobalCtxt.lockres_cache = + kmem_cache_create ("lockres_cache", + sizeof (ocfs_lock_res) + OCFS_POINTER_SIZE, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + OcfsGlobalCtxt.fe_cache = kmem_cache_create ("fileentry_cache", + OCFS_SECTOR_SIZE, + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + + OCFS_SET_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_MEM_LISTS_INITIALIZED); + return 0; +} /* ocfs_initialize_mem_lists */ + +/* + * ocfs_free_mem_lists() + * + */ +void ocfs_free_mem_lists (void) +{ + kmem_cache_destroy (OcfsGlobalCtxt.oin_cache); + kmem_cache_destroy (OcfsGlobalCtxt.ofile_cache); + kmem_cache_destroy (OcfsGlobalCtxt.fe_cache); + kmem_cache_destroy (OcfsGlobalCtxt.lockres_cache); + OCFS_CLEAR_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_MEM_LISTS_INITIALIZED); +} /* ocfs_free_mem_lists */ + +/* + * ocfs_set_exclusive_mount_flag() + * + */ +static void ocfs_set_exclusive_mount_flag (struct super_block *sb, int val) +{ + struct buffer_head *bh; + ocfs_vol_disk_hdr *hdr; + + LOG_ENTRY (); + + bh = bread (sb->s_dev, 0, 512); + hdr = (ocfs_vol_disk_hdr *) bh->b_data; + hdr->excl_mount = val; + mark_buffer_dirty (bh); + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + bforget (bh); + + LOG_EXIT (); +} /* ocfs_set_exclusive_mount_flag */ + +/* + * ocfs_remount() + * + */ +int ocfs_remount (struct super_block *sb, int *flags, char *data) +{ + int status; + ocfs_lock_res *lr; + ocfs_super *osb; + ub1 *buffer = NULL; + int ret = 0; + uid_t uid = current->fsuid; + gid_t gid = current->fsgid; + int length; + bool c; + + LOG_ENTRY (); + + ocfs_parse_options (data, &uid, &gid, &c); + osb = (ocfs_super *)(sb->u.generic_sbp); + + if (!c) { + osb->cache_fs = false; + ocfs_set_exclusive_mount_flag (sb, NOT_MOUNTED_EXCLUSIVE); + fsync_no_super (sb->s_dev); + LOG_ERROR_STR ("remounted with nocache"); + ret = 0; + goto bail; + } + + length = (OCFS_MAXIMUM_NODES * osb->sect_size); + buffer = ocfs_malloc (length); + if (buffer == NULL) { + LOG_ERROR_STR ("could not allocate memory!"); + ret = -ENOMEM; + goto bail; + } + + status = + ocfs_acquire_lock (osb, OCFS_VOLUME_LOCK_OFFSET, + OCFS_DLM_EXCLUSIVE_LOCK, FLAG_FILE_CREATE, &lr, + NULL); + if (status < 0) { + LOG_ERROR_STR ("failed to get lock on OCFS_VOLUME_LOCK_OFFSET"); + ret = -EBUSY; + goto bail; + } + + memset (buffer, 0, length); + { + bool save = osb->cache_fs; + + osb->cache_fs = false; + status = + ocfs_read_disk (osb, buffer, length, + osb->vol_layout.publ_sect_off); + osb->cache_fs = save; + } + if (status >= 0) { + int i; + + ocfs_down_sem (&(osb->osb_res), true); + ocfs_update_publish_map (osb, buffer, false); + ocfs_up_sem (&(osb->osb_res)); + + for (i = 0; i < OCFS_MAXIMUM_NODES; i++) { + if (IS_NODE_ALIVE + (osb->publ_map, i, OCFS_MAXIMUM_NODES)) + LOG_ERROR_ARGS ("node #%d is alive\n", i); + } + LOG_TRACE_ARGS ("publishmap = %u.%u\n", osb->publ_map); + if (osb->publ_map == (1 << osb->node_num)) { + ocfs_set_exclusive_mount_flag (sb, (osb->node_num)); + fsync_no_super (sb->s_dev); + LOG_ERROR_STR ("remount synced device"); + ret = 0; + } else { + LOG_ERROR_STR ("failed to remount device"); + } + } + + ocfs_safefree (buffer); + status = + ocfs_release_lock (osb, OCFS_VOLUME_LOCK_OFFSET, + OCFS_DLM_EXCLUSIVE_LOCK, 0, lr); + if (ret == 0) { + osb->cache_fs = true; + } + + bail: + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_remount */ + +module_init (ocfs_driver_entry); +module_exit (ocfs_driver_exit); +MODULE_LICENSE ("GPL"); diff -urNp ocfs/fs/ocfs/Linux/ocfsmount.c 2.4.20pre5aa2/fs/ocfs/Linux/ocfsmount.c --- ocfs/fs/ocfs/Linux/ocfsmount.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/ocfsmount.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,421 @@ +/* + * ocfsmount.c + * + * Mount and dismount volume + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_MOUNT + +static bool is_exclusive_node_alive (struct super_block *sb, + ocfs_vol_disk_hdr * hdr); + +extern spinlock_t ProtectOSBId; +extern ub4 GlobalOSBId; /* Keeps track of next available OSB Id */ +extern spinlock_t ProtectMountCount; +extern ub4 GlobalMountCount; /* Count of mounted volumes */ +static int errno; + +/* + * ocfs_read_disk_header() + * + */ +int ocfs_read_disk_header (ub1 ** buffer, struct super_block *sb) +{ + int status = 0; + struct buffer_head *bh = NULL; + + LOG_ENTRY (); + + if (buffer == NULL) { + status = -ENOMEM; + goto leave; + } + + /* Read the first sector bytes from the target device */ + if ((*buffer = ocfs_malloc (1024)) == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + + bh = bread (sb->s_dev, 0, 512); + if (!bh) { + status = -ENOMEM; + goto leave; + } + memcpy (*buffer, bh->b_data, 512); + bforget (bh); + + bh = bread (sb->s_dev, 1, 512); + if (!bh) { + status = -ENOMEM; + goto leave; + } + memcpy ((void *) (*buffer + 512), bh->b_data, 512); + bforget (bh); + + leave: + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_read_disk_header */ + + +/* + * is_exclusive_node_alive() + * + */ +static bool is_exclusive_node_alive (struct super_block *sb, + ocfs_vol_disk_hdr * hdr) +{ + struct buffer_head *bh = NULL; + bool ret = false; + ub8 off; + ub8 ts; + ocfs_publish *pub; + + /* get the blocknum of the publish sector in question */ + off = (hdr->publ_off >> (ub8) 9); + off += (ub8) hdr->excl_mount; + + /* get the timestamp from the publish sector */ + bh = bread (sb->s_dev, (ub4) off, 512); + if (!bh) { + LOG_ERROR_ARGS ("failed to read block: %u\n", (ub4) off); + return true; + } + pub = (ocfs_publish *) bh->b_data; + ts = pub->time; + bforget (bh); + + /* wait... */ + LOG_ERROR_STR ("sorry to have to do this, but you'll have to " + "wait a bit while I check the other node...\n"); + ocfs_sleep (5000); /* 5 seconds */ + + /* get the timestamp from the publish sector */ + bh = bread (sb->s_dev, (ub4) off, 512); + if (!bh) { + LOG_ERROR_ARGS ("failed to read block: %u\n", (ub4) off); + return true; + } + pub = (ocfs_publish *) bh->b_data; + + if (ts != pub->time) { + /* aha! she's still there! */ + LOG_ERROR_ARGS + ("timestamp still changing, the node is alive!: %u.%u -> %u.%u\n", + HI (ts), LO (ts), HI (pub->time), LO (pub->time)); + ret = true; + } else { + LOG_ERROR_ARGS + ("timestamp NOT changing, the node is DEAD!: %u.%u -> %u.%u\n", + HI (ts), LO (ts), HI (pub->time), LO (pub->time)); + ret = false; + } + + bforget (bh); + return ret; +} /* is_exclusive_node_alive */ + + +/* + * ocfs_mount_volume() + * + */ +int ocfs_mount_volume (struct super_block *sb) +{ + int status = 0; + ocfs_super *osb; + ub1 *buffer = NULL; + ocfs_vol_disk_hdr *volDiskHdr; + ocfs_vol_label *volLabel; + int sectsize; + +#if defined(DLM_THREAD_PER_VOLUME) + int child_pid; +#endif + + LOG_ENTRY (); + + /* TODO: not using this yet, EVERYTHING assumes 512! */ + sectsize = 512; + + status = ocfs_read_disk_header (&buffer, sb); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + volDiskHdr = (ocfs_vol_disk_hdr *) buffer; + + if (volDiskHdr->excl_mount != NOT_MOUNTED_EXCLUSIVE) { + if (is_exclusive_node_alive (sb, volDiskHdr)) { + LOG_ERROR_ARGS + ("Cannot mount. Another node (%d) has this volume mounted exclusive.\n", + volDiskHdr->excl_mount); + status = -EACCES; + goto leave; + } else { + LOG_ERROR_ARGS + ("Cannot mount. Node %d mounted this volume exclusive, but has DIED! Please recover.\n", + volDiskHdr->excl_mount); + status = -EACCES; + goto leave; + } + } + + /* We found a volume with our signature on it. Now go ahead and read */ + /* the root directory and other filesystem structures.... */ + + LOG_TRACE_STR ("ocfs_verify_volume..."); + status = ocfs_verify_volume (volDiskHdr); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + /* 2nd sector */ + volLabel = (ocfs_vol_label *) (buffer + sectsize); + + /* Check if the cluster name on the disk matches the one in the registry */ +#ifdef ENABLE_CLUSTER_NAME_CHECK /* TODO */ + if (OcfsGlobalCtxt.ClusterName == NULL || + volLabel->ClusterNameLength < 1 || + volLabel->ClusterName[0] == '\0' || + memcmp (OcfsGlobalCtxt.ClusterName, volLabel->ClusterName, + volLabel->ClusterNameLength) != 0) { + LOG_ERROR_ARGS + ("expected cluster name: '%s' volume cluster name: '%s'\n", + OcfsGlobalCtxt.ClusterName, volLabel->ClusterName); + status = -EINVAL; + goto leave; + } +#endif + + if ((osb = ocfs_malloc (sizeof (ocfs_super))) == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto leave; + } + memset(osb, 0, sizeof(ocfs_super)); + sb->u.generic_sbp = (void *)osb; + osb->sb = sb; + + status = ocfs_initialize_osb (osb, volDiskHdr, volLabel, sectsize); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } + + osb->sect_size = sectsize; + + spin_lock (&ProtectOSBId); + osb->osb_id = GlobalOSBId; + if (GlobalOSBId < ULONG_MAX) + GlobalOSBId++; + else { + spin_unlock (&ProtectOSBId); + LOG_ERROR_STR ("Too many volumes mounted"); + status = -ENOMEM; + goto leave; + } + spin_unlock (&ProtectOSBId); + +#if defined(DLM_THREAD_PER_VOLUME) + ocfs_down_sem (&(osb->osb_res), true); + /* Launch the DLM thread for the mounted volume */ + child_pid = kernel_thread (ocfs_volume_thread, osb, + CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + if (child_pid < 0) { + LOG_ERROR_ARGS ("unable to launch dlm thread (%d)\n", + child_pid); + status = -EFAIL; + goto leave; + } else { + init_completion (&osb->complete); + if (!ocfs_get_task (child_pid, &(osb->dlm_task))) + osb->dlm_task = NULL; + } + ocfs_up_sem (&(osb->osb_res)); +#endif + + /* Add proc entry for this volume */ + ocfs_proc_add_volume (osb); + + /* GlobalMountCount */ + spin_lock (&ProtectMountCount); + GlobalMountCount++; + if (GlobalMountCount == 1) { + /* Start the ipcdlm */ + ocfs_init_ipc_dlm (NULL, OCFS_UDP); + OcfsIpcCtxt.init = true; + } + spin_unlock (&ProtectMountCount); + + /* Join or Form the cluster... */ + LOG_TRACE_STR ("ocfs_vol_member_reconfig..."); + ocfs_down_sem (&(osb->osb_res), true); + status = ocfs_vol_member_reconfig (osb); + ocfs_up_sem (&(osb->osb_res)); + if (status < 0) { + LOG_ERROR_STR ("ocfs_vol_member_reconfig failed"); + goto leave; + } + + /* Read the publish sector for this node and cleanup dirent being */ + /* modified when we crashed. */ + LOG_TRACE_STR ("ocfs_check_volume..."); + ocfs_down_sem (&(osb->osb_res), true); + status = ocfs_check_volume (osb); + if (status < 0) { + ocfs_up_sem (&(osb->osb_res)); + LOG_ERROR_STATUS (status); + goto leave; + } + ocfs_up_sem (&(osb->osb_res)); + + osb->vol_state = VOLUME_MOUNTED; + + leave: + /* Delete Device on Failure */ + ocfs_safefree (buffer); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_mount_volume */ + +/* + * ocfs_dismount_volume() + * + */ +int ocfs_dismount_volume (struct super_block *sb) +{ + int status = 0; + bool AcquiredOSB = false; + ocfs_super *osb = NULL; + ocfs_inode *rootoin; +#if defined(DLM_THREAD_PER_VOLUME) + int i; +#endif + + LOG_ENTRY_ARGS ("(0x%08x)\n", sb); + + if (sb == NULL || sb->u.generic_sbp == NULL) { + status = -EFAIL; + goto leave; + } + + osb = (ocfs_super *)(sb->u.generic_sbp); + rootoin = osb->oin_root_dir; + + /* GlobalMountCount */ + spin_lock (&ProtectMountCount); + GlobalMountCount--; + if (GlobalMountCount == 0) { + /* Shutdown ipcdlm */ + if (OcfsIpcCtxt.task) { + LOG_TRACE_STR ("Waiting for ipcdlm to exit...."); + send_sig (SIGINT, OcfsIpcCtxt.task, 0); + wait_for_completion (&(OcfsIpcCtxt.complete)); + OcfsIpcCtxt.task = NULL; + } + } + spin_unlock (&ProtectMountCount); + + LOG_TRACE_STR ("Acquire OSB lock"); + ocfs_down_sem (&(osb->osb_res), true); + AcquiredOSB = true; + + if (osb->file_open_cnt > 0) { + LOG_ERROR_ARGS ("Dismount failed... file_open_cnt(%d) > 0\n", + osb->file_open_cnt); + LOG_ERROR_STR + ("WARNING!!! Need to uncomment this when file opens are correct!\n"); +/* commenting this out for now until we deal with open files properly */ + status = -EBUSY; + goto leave; + } + + LOG_TRACE_ARGS ("osb=0x%08x rootoin=0x%08x offset=%u.%u\n", osb, + rootoin, rootoin->file_disk_off); + + fsync_no_super (sb->s_dev); + + ocfs_release_oin (rootoin, true); + + /* Destroy the Hash table */ + ocfs_hash_destroy (&(osb->root_sect_node), ocfs_free); + + /* Remove the proc element for this volume */ + ocfs_proc_remove_volume (osb); + + /* Dismount */ + OCFS_SET_FLAG (osb->osb_flags, OCFS_OSB_FLAGS_BEING_DISMOUNTED); + osb->vol_state = VOLUME_BEING_DISMOUNTED; + +#if defined(DLM_THREAD_PER_VOLUME) + /* Wait for this volume's NM thread to exit */ + if (osb->dlm_task) { + LOG_TRACE_STR ("Waiting for nmthread to exit...."); + send_sig (SIGINT, osb->dlm_task, 0); + wait_for_completion (&(osb->complete)); + osb->dlm_task = NULL; + } +#endif + + ocfs_down_sem (&(OcfsGlobalCtxt.res), true); + vfree (osb->cluster_bitmap.buf); +// list_del(&osb->osb_next); /* this has been moved into ocfs_delete_osb */ + ocfs_up_sem (&(OcfsGlobalCtxt.res)); + + osb->vol_state = VOLUME_DISMOUNTED; + if (AcquiredOSB) { + LOG_TRACE_STR ("Release OSB lock"); + ocfs_up_sem (&(osb->osb_res)); + AcquiredOSB = false; + } + + /* Free all nodecfgs */ + for (i = 0; i < OCFS_MAXIMUM_NODES; ++i) { + ocfs_node_config_info *p; + + p = osb->node_cfg_info[i]; + ocfs_safefree (p); + } + + ocfs_delete_osb (osb); + ocfs_safefree (osb); + sb->s_dev = 0; + + leave: + if (AcquiredOSB) { + LOG_TRACE_STR ("Release OSB lock"); + ocfs_up_sem (&(osb->osb_res)); + AcquiredOSB = false; + } + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_dismount_volume */ diff -urNp ocfs/fs/ocfs/Linux/ocfsport.c 2.4.20pre5aa2/fs/ocfs/Linux/ocfsport.c --- ocfs/fs/ocfs/Linux/ocfsport.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/ocfsport.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,849 @@ +/* + * ocfsport.c + * + * Linux specific utilities + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_PORT + +#ifndef DEBUGOCFS +extern struct list_head item_list; +static int get_overlap_type (ub8 new, ub8 newend, ub8 exist, ub8 existend); +static bool OcfsCoalesceExtentMapEntry (ocfs_extent_map * map, + sb8 virtual, sb8 physical, sb8 sectorcount); + +/* + * ocfs_init_sem() + * + */ +void ocfs_init_sem (ocfs_sem * res) +{ + LOG_ENTRY_ARGS ("(0x%08x)\n", res); + + memset (res, 0, sizeof (ocfs_sem)); + init_MUTEX (&(res->sem)); + res->magic = OCFS_SEM_MAGIC; + + LOG_EXIT (); +} /* ocfs_init_sem */ + +/* + * ocfs_down_sem() + * + * Counter layer atop the sem. If a process which already owns the sem, + * attempts to re-acquire it, ocfs_down_sem() increments the + * count by 1. If however, a different process attempts to acquire that + * sem, it blocks waiting for the sem to be released. + * ocfs_up_sem() decrements the count by 1, if the owning + * process releases the sem. The sem is released when the counter hits 0. + */ +bool ocfs_down_sem (ocfs_sem * res, bool wait) +{ + bool ret = true; + + LOG_ENTRY_ARGS ("(0x%08x, %u)\n", res, wait); + + if (res->magic != OCFS_SEM_MAGIC) { + ret = false; + goto bail; + } + + if (res->pid == 0) { + down (&(res->sem)); + res->pid = current->pid; + res->count = 1; + } else { + if (res->pid == current->pid) { + res->count++; + } else { + if (wait) { + /* wait till acquire */ + down (&(res->sem)); + res->pid = current->pid; + res->count = 1; + } else { + ret = false; + goto bail; + } + } + } + + bail: + LOG_EXIT_ULONG (ret); + return ret; +} /* ocfs_down_sem */ + +/* + * ocfs_up_sem() + * + * ocfs_up_sem() decrements the count by 1, if the owning + * process releases the sem. The sem is released when the counter hits 0. + */ +void ocfs_up_sem (ocfs_sem * res) +{ + LOG_ENTRY_ARGS ("(0x%08x)\n", res); + + if (res->magic != OCFS_SEM_MAGIC) + goto bail; + + if (res->count && current->pid == res->pid) { + res->count--; + if (!res->count) + res->pid = 0; + up (&(res->sem)); + } + + bail: + LOG_EXIT (); + return; +} /* ocfs_up_sem */ + +/* + * ocfs_del_sem() + * + */ +int ocfs_del_sem (ocfs_sem * res) +{ + LOG_ENTRY_ARGS ("(0x%08x)\n", res); + + res->magic = OCFS_SEM_DELETED; + + LOG_EXIT (); + return 0; +} /* ocfs_del_sem */ + +/* + * ocfs_wait() + * + * Timeout is in ms. + */ +int ocfs_wait (void *Object, bool Alertable, ub4 Timeout) +{ + ub4 remjiffies; + ub4 numjiffies = 0; + int status = 0; + + if (Timeout) { + /* 10ms = 1 jiffy, minimum resolution is one jiffy */ + numjiffies = (Timeout / 10); + numjiffies = (numjiffies < 1) ? 1 : numjiffies; + + remjiffies = + interruptible_sleep_on_timeout ((wait_queue_head_t *) + Object, numjiffies); + if (remjiffies == 0) + status = -ETIMEDOUT; + } else { + interruptible_sleep_on ((wait_queue_head_t *) Object); + } + + return status; +} /* ocfs_wait */ + +/* + * ocfs_daemonize() + * + */ +void ocfs_daemonize (char *name) +{ + sigset_t tmpsig; + + sprintf (current->comm, name); + daemonize (); + + /* Block all signals except SIGKILL, SIGSTOP, SIGHUP and SIGINT */ + spin_lock_irq (¤t->sigmask_lock); + tmpsig = current->blocked; + siginitsetinv (¤t->blocked, SHUTDOWN_SIGS); + recalc_sigpending (current); + spin_unlock_irq (¤t->sigmask_lock); + + return; +} /* ocfs_daemonize */ + +/* + * ocfs_get_task() + * + */ +bool ocfs_get_task (pid_t pid, struct task_struct ** task) +{ + struct task_struct *p; + + for_each_task (p) { + if (p->pid == pid) { + *task = p; + return true; + } + } + + return false; +} /* ocfs_get_task */ + + +/* + * ocfs_sleep() + * + * The interval time is in milliseconds + */ +int ocfs_sleep (ub4 ms) +{ + ub4 numJiffies; + + LOG_ENTRY (); + + /* 10ms = 1 jiffy, minimum resolution is one jiffy */ + numJiffies = ms / 10; + numJiffies = (numJiffies < 1) ? 1 : numJiffies; + + while (numJiffies) { + set_current_state (TASK_INTERRUPTIBLE); + numJiffies = schedule_timeout (numJiffies); + } + + LOG_EXIT (); + return 0; +} /* ocfs_sleep */ + +/* + * ocfs_print_qstr() + * + */ +void ocfs_print_qstr (struct qstr *x) +{ + int len; + char tmp[PATH_MAX+1]; + + len = (x->len > PATH_MAX) ? PATH_MAX : x->len; + memcpy (tmp, x->name, len); + tmp[len] = '\0'; + printk ("%s", tmp); +} /* ocfs_print_qstr */ + +#ifdef OCFS_LINUX_MEM_DEBUG +#define SUPER_VERBOSE_MEM_DEBUG 1 +#endif + +/* + * ocfs_linux_dbg_alloc() + * + */ +void *ocfs_linux_dbg_alloc (int Size, char *file, int line) +{ + void *m; + + m = kmalloc (Size, GFP_KERNEL); +#ifdef OCFS_LINUX_MEM_DEBUG + if (m == NULL) { + LOG_ERROR_ARGS ("failed! (size=%d)\n", Size); + } else { + alloc_item *new; + + new = kmalloc (sizeof (alloc_item), GFP_KERNEL); + new->address = m; + new->length = Size; + snprintf (new->tag, 30, "%d:%s", line, file); + new->tag[29] = '\0'; + list_add (&new->list, &item_list); +#ifdef SUPER_VERBOSE_MEM_DEBUG + LOG_TRACE_ARGS (" + %x (%d, '%s')\n", m, Size, new->tag); +#endif + } +#endif + return m; +} /* ocfs_linux_dbg_alloc */ + +/* + * ocfs_linux_dbg_free() + * + */ +void ocfs_linux_dbg_free (const void *Buffer) +{ + +#ifdef OCFS_LINUX_MEM_DEBUG + struct list_head *iter; + + list_for_each (iter, &item_list) { + alloc_item *item = list_entry (iter, alloc_item, list); + + if (item->address == Buffer) { +#ifdef SUPER_VERBOSE_MEM_DEBUG + LOG_TRACE_ARGS (" - %x (%d, '%s')\n", Buffer, + item->length, item->tag); +#endif + kfree (Buffer); + list_del (&item->list); + kfree (item); + return; + } + } + LOG_ERROR_ARGS ("tried to free mem never allocated: %x\n", Buffer); +#endif +#ifndef OCFS_LINUX_MEM_DEBUG + kfree (Buffer); +#endif +} /* ocfs_linux_dbg_free */ + + +/* + * ocfs_linux_get_inode_offset() + * + */ +bool ocfs_linux_get_inode_offset (struct inode * inode, ub8 * off, ocfs_inode ** oin) +{ + if (off == NULL) + return false; + + if (oin != NULL) + *oin = NULL; + + if (inode_data_is_oin (inode)) { + ocfs_inode *f = ((ocfs_inode *)inode->u.generic_ip); + + if (f == NULL) { + LOG_ERROR_STR ("bad inode oin"); + *off = -1; + return false; + } else { + if (oin != NULL) + *oin = f; + if (S_ISDIR (inode->i_mode)) + *off = f->dir_disk_off; + else + *off = f->file_disk_off; + } + } else { + *off = GET_INODE_OFFSET (inode); + } + return (*off != -1); +} /* ocfs_linux_get_inode_offset */ + + +/* + * ocfs_linux_get_dir_entry_offset() + * + */ +bool ocfs_linux_get_dir_entry_offset (ocfs_super * osb, ub8 * off, ub8 parentOff, + struct qstr * fileName, ocfs_file_entry ** fileEntry) +{ + int status; + ocfs_file_entry *ent; + + if (off == NULL) + return false; + + *off = -1; + ent = ocfs_allocate_file_entry (); + if (ent != NULL) { + status = ocfs_find_files_on_disk (osb, parentOff, fileName, ent, NULL); + if (status >= 0) + *off = ent->this_sector; + + /* if the caller wants the file entry let him free it */ + if (fileEntry) + *fileEntry = ent; + else + ocfs_safefree (ent); + } + return (*off != -1); +} /* ocfs_linux_get_dir_entry_offset */ + + +/* + * ocfs_flush_cache() + * + */ +void ocfs_flush_cache (ocfs_super * osb) +{ + fsync_no_super (osb->sb->s_dev); +} /* ocfs_flush_cache */ + + +/* + * ocfs_purge_cache_section() + * + */ +bool ocfs_purge_cache_section (ocfs_inode * oin, ub8 * file_off, ub4 Length) +{ + if (oin != NULL && oin->inode != NULL) { + fsync_inode_buffers (oin->inode); + } + return true; +} /* ocfs_purge_cache_section */ + +/* prefetch has been declared to allow to build in debug mode */ +#ifdef DEBUG +#ifndef ARCH_HAS_PREFETCH +inline void prefetch (const void *x) +{; +} +#endif +#endif + +#endif /* !DEBUGOCFS */ + +/* Crazy wacky extent map stuff */ +/* works ok in userland debugocfs stuff too */ + +#define GET_EXTENT_MAP_ENTRY(map, i) ((ocfs_extent *) ((ub1 *)map->buf + \ + ((i) * sizeof(ocfs_extent)))) + +/* + * ocfs_extent_map_init() + * + */ +void ocfs_extent_map_init (ocfs_extent_map * map) +{ + LOG_ENTRY (); + + OCFS_ASSERT (map != NULL); + spin_lock_init(&(map->lock)); + map->capacity = 0; + map->count = 0; + map->initialized = true; + map->buf = NULL; + + LOG_EXIT (); + return; +} /* ocfs_extent_map_init */ + +/* + * ocfs_extent_map_destroy() + * + */ +void ocfs_extent_map_destroy (ocfs_extent_map * map) +{ + LOG_ENTRY (); + + OCFS_ASSERT (map != NULL); + if (map->initialized) { + spin_lock(&(map->lock)); + map->capacity = 0; + map->count = 0; + ocfs_safefree (map->buf); + map->initialized = false; + spin_unlock(&(map->lock)); + } + + LOG_EXIT (); + return; +} /* ocfs_extent_map_destroy */ + +/* + * ocfs_extent_map_get_count() + * + */ +ub4 ocfs_extent_map_get_count (ocfs_extent_map * map) +{ + ub4 ret; + + LOG_ENTRY (); + + OCFS_ASSERT (map != NULL); + spin_lock(&(map->lock)); + ret = map->count; + spin_unlock(&(map->lock)); + + LOG_EXIT_ULONG (ret); + return ret; +} /* ocfs_extent_map_get_count */ + +enum +{ + LEFT_NO_OVERLAP, + LEFT_ADJACENT, + LEFT_OVERLAP, + FULLY_CONTAINED, + FULLY_CONTAINING, + RIGHT_OVERLAP, + RIGHT_ADJACENT, + RIGHT_NO_OVERLAP +}; + +/* + * get_overlap_type() + * + */ +static int get_overlap_type (ub8 new, ub8 newend, ub8 exist, ub8 existend) +{ + OCFS_ASSERT (newend > new); + OCFS_ASSERT (existend > exist); + + if (new < exist) { + if (newend < exist) + return LEFT_NO_OVERLAP; + else if (newend == exist) + return LEFT_ADJACENT; + else if (newend >= existend) /* && newend > exist */ + return FULLY_CONTAINING; + else /* newend < existend && newend > exist */ + return LEFT_OVERLAP; + } else if (new > exist) { + if (new > existend) + return RIGHT_NO_OVERLAP; + else if (new == existend) + return RIGHT_ADJACENT; + else if (newend > existend) /* && new < existend */ + return RIGHT_OVERLAP; + else /* newend <= existend && new < existend */ + return FULLY_CONTAINED; + } else if (newend > existend) /* && new == exist */ + return FULLY_CONTAINING; + else /* newend <= existend && new == exist */ + return FULLY_CONTAINED; +} /* get_overlap_type */ + +/* + * OcfsCoalesceExtentMapEntry() + * + * Must call this with spinlock already held! + */ +static bool OcfsCoalesceExtentMapEntry (ocfs_extent_map * map, + sb8 virtual, sb8 physical, sb8 sectorcount) +{ + ocfs_extent *tmp, *tmp2; + int i, voverlap, loverlap, newIdx; + bool ret = false; + + LOG_ENTRY (); + + if (!map->initialized) { + LOG_ERROR_STR ("ExtentMap is not initialized"); + goto bail; + } + + /* attempt to coalesce this into an existing entry */ + + /* + * NOTE: if we are successful in coalescing this entry with an entry from somewhere + * in the list, we still need to check the rest of the list in case this entry + * ends up filling one or more holes + * |---- this ----| + * |-- found --| |-- another entry --| + * |---| <--- yet another entry + */ + + newIdx = -1; + for (i = 0; i < map->count; i++) { + tmp = GET_EXTENT_MAP_ENTRY (map, i); + voverlap = + get_overlap_type (virtual, virtual + sectorcount, + tmp->virtual, + tmp->virtual + tmp->sectors); + loverlap = + get_overlap_type (physical, physical + sectorcount, + tmp->physical, + tmp->physical + tmp->sectors); + + /* first off, if the virtual range and real range don't */ + /* overlap in the same way it definitely can't be coalesced */ + if (voverlap != loverlap) + continue; + + switch (voverlap) { + case FULLY_CONTAINED: /* already fully accounted for, done */ + ret = true; + goto bail; + break; + + case LEFT_ADJACENT: /* add new left part to found entry */ + sectorcount += tmp->sectors; + tmp->sectors = 0; /* mark for deletion */ + ret = true; + break; + + case RIGHT_ADJACENT: /* add new right part to found entry */ + virtual = tmp->virtual; + physical = tmp->physical; + sectorcount += tmp->sectors; + tmp->sectors = 0; /* mark for deletion */ + ret = true; + break; + + case FULLY_CONTAINING: /* completely take over this entry */ + tmp->sectors = 0; /* mark for deletion */ + ret = true; + break; + + case LEFT_OVERLAP: /* should begin at new physical/virtual, end at old end */ + if ((tmp->virtual - virtual) == (tmp->physical - physical)) + { + /* must be same distance from edge */ + sectorcount = + tmp->sectors + (tmp->virtual - virtual); + tmp->sectors = 0; /* mark for deletion */ + ret = true; + } + break; + + case RIGHT_OVERLAP: /* should begin at old physical/virtual, end at new end */ + if ((virtual - tmp->virtual) == + (physical - tmp->physical)) { + virtual = tmp->virtual; + physical = tmp->physical; + sectorcount = + virtual + sectorcount - tmp->virtual; + tmp->sectors = 0; /* mark for deletion */ + ret = true; + } + break; + + case LEFT_NO_OVERLAP: /* keep looking */ + case RIGHT_NO_OVERLAP: + break; + } + + if (tmp->sectors == 0) { + if (newIdx == -1) /* first time thru, this is where we */ + /* will put the coalesced entry */ + newIdx = i; + else { + /* otherwise swap the tail with the current... */ + tmp2 = GET_EXTENT_MAP_ENTRY (map, map->count - 1); + tmp->virtual = tmp2->virtual; + tmp->physical = tmp2->physical; + tmp->sectors = tmp2->sectors; + tmp2->sectors = 0; + map->count--; /* ...and dump the tail */ + } + } + } + + if (newIdx != -1) { /* finally, stick the coalesced thing into newIdx */ + tmp = GET_EXTENT_MAP_ENTRY (map, newIdx); + tmp->virtual = virtual; + tmp->physical = physical; + tmp->sectors = sectorcount; + } + + bail: + + LOG_EXIT_ULONG (ret); + return ret; +} /* OcfsCoalesceExtentMapEntry */ + +/* + * ocfs_extent_map_add() + * + */ +bool ocfs_extent_map_add (ocfs_extent_map * map, sb8 virtual, sb8 physical, + sb8 sectorcount) +{ + ocfs_extent *tmp; + void *newpool; + ub4 newmax; + bool ret = false; + + LOG_ENTRY (); + + OCFS_ASSERT (map != NULL); + + if (!map->initialized) { + LOG_ERROR_STR ("ExtentMap is not initialized"); + goto bail; + } + spin_lock(&(map->lock)); + + if ((ret = + OcfsCoalesceExtentMapEntry (map, virtual, physical, + sectorcount))) { + LOG_TRACE_STR ("Successfully coalesced map entry"); + goto release_spinlock; + } + + /* if extra allocation needed, do it now */ + if (map->count >= map->capacity) { + /* TODO: come up with some better algorithm, */ + /* for now: first-double size, second-just one more */ + newmax = + (map->capacity > + 0) ? map->capacity * 2 : INITIAL_EXTENT_MAP_SIZE; + newpool = ocfs_malloc (newmax * sizeof (ocfs_extent)); + if (newpool == NULL && newmax != INITIAL_EXTENT_MAP_SIZE) { + newmax = map->capacity + 1; + newpool = ocfs_malloc (newmax * sizeof (ocfs_extent)); + } + if (newpool == NULL) { + LOG_ERROR_STR ("failed to alloc new additional pool"); + goto release_spinlock; + } + if (map->buf && map->capacity) + memcpy (newpool, map->buf, + map->capacity * sizeof (ocfs_extent)); + ocfs_safefree (map->buf); + map->buf = newpool; + map->capacity = newmax; + } + + tmp = GET_EXTENT_MAP_ENTRY (map, map->count); + tmp->virtual = virtual; + tmp->physical = physical; + tmp->sectors = sectorcount; + map->count++; + ret = true; + +release_spinlock: + spin_unlock(&(map->lock)); + +bail: + LOG_EXIT_ULONG (ret); + return ret; +} /* ocfs_extent_map_add */ + +/* ocfs_extent_map_remove() + * + */ +void ocfs_extent_map_remove (ocfs_extent_map * map, sb8 virtual, sb8 sectorcount) +{ + ocfs_extent *tmp; + ub4 i; + int voverlap; + + LOG_ENTRY (); + + OCFS_ASSERT (map != NULL); + + if (!map->initialized) + goto bail; + spin_lock(&(map->lock)); + for (i = 0; i < map->count; i++) { + tmp = GET_EXTENT_MAP_ENTRY (map, i); + voverlap = + get_overlap_type (virtual, virtual + sectorcount, + tmp->virtual, + tmp->virtual + tmp->sectors); + switch (voverlap) { + case FULLY_CONTAINED: + /* for now, don't allow splitting of entries */ + if (virtual == tmp->virtual + && sectorcount == tmp->sectors) { + if (i != map->count - 1) + memcpy ((void *) tmp, (void *) + GET_EXTENT_MAP_ENTRY (map, + (map-> + count + - + 1)), + sizeof (ocfs_extent)); + map->count--; + goto release_spinlock; + } + break; + default: /* all others would be an error */ + break; + } + } + +release_spinlock: + spin_unlock(&(map->lock)); +bail: + + LOG_EXIT (); + return; +} /* ocfs_extent_map_remove */ + +/* + * ocfs_extent_map_lookup() + * + */ +bool ocfs_extent_map_lookup (ocfs_extent_map *map, sb8 virtual, sb8 *physical, + sb8 *sectorcount, ub4 *index) +{ + ocfs_extent *tmp; + bool ret = false; + ub4 idx = 0; + + LOG_ENTRY (); + + OCFS_ASSERT (map != NULL); + + if (!map->initialized) { + LOG_ERROR_STR ("BUG! Uninitialized ExtentMap!"); + goto bail; + } + spin_lock(&(map->lock)); + + for (idx = 0; idx < map->count; idx++) { + sb8 hi, lo, delta; + + tmp = GET_EXTENT_MAP_ENTRY (map, idx); + + lo = tmp->virtual; + hi = lo + tmp->sectors; + delta = virtual - lo; + + if (virtual >= lo && virtual < hi) { + *physical = tmp->physical + delta; + *sectorcount = tmp->sectors - delta; + idx++; + ret = true; + break; + } + } + spin_unlock(&(map->lock)); + +bail: + *index = idx; + + LOG_EXIT_ULONG (ret); + return ret; +} /* ocfs_extent_map_lookup */ + +/* + * ocfs_extent_map_next_entry() + * + */ +bool ocfs_extent_map_next_entry (ocfs_extent_map *map, ub4 runindex, + sb8 *virtual, sb8 *physical, sb8 *sectorcount) +{ + ocfs_extent *tmp; + bool ret = false; + + LOG_ENTRY (); + + OCFS_ASSERT (map != NULL); + + if (!map->initialized) + goto bail; + spin_lock(&(map->lock)); + if (runindex >= map->count) + goto release_spinlock; + tmp = GET_EXTENT_MAP_ENTRY (map, runindex); + *virtual = tmp->virtual; + *physical = tmp->physical; + *sectorcount = tmp->sectors; + ret = true; + +release_spinlock: + spin_unlock(&(map->lock)); +bail: + + LOG_EXIT_ULONG (ret); + return ret; +} /* ocfs_extent_map_next_entry */ diff -urNp ocfs/fs/ocfs/Linux/ocfsproc.c 2.4.20pre5aa2/fs/ocfs/Linux/ocfsproc.c --- ocfs/fs/ocfs/Linux/ocfsproc.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Linux/ocfsproc.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,402 @@ +/* + * ocfsproc.c + * + * ocfs proc interface + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#define OCFSPROC_PRIVATE_DECLS + +#include + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_PROC + +#ifdef OCFS_LINUX_MEM_DEBUG +extern struct list_head item_list; +#endif + +/* + * ocfs_proc_init() + * + */ +int ocfs_proc_init (void) +{ + static struct + { + char *name; + char *data; + int (*read_proc) (char *, char **, off_t, int, int *, void *); + } + *p, ProcList[] = + { + { + "ocfs/version", NULL, ocfs_proc_version} + , { + "ocfs/nodename", NULL, ocfs_proc_nodename} + , +#ifdef OCFS_LINUX_MEM_DEBUG + { + "ocfs/memallocs", NULL, ocfs_proc_memallocs} + , +#endif + { + NULL,} + }; + + LOG_ENTRY (); + + proc_mkdir ("ocfs", 0); + + for (p = ProcList; p->name; p++) + create_proc_read_entry (p->name, 0, NULL, p->read_proc, + p->data); + + LOG_EXIT_LONG (0); + return 0; +} /* ocfs_proc_init */ + +/* + * ocfs_proc_deinit() + * + */ +void ocfs_proc_deinit (void) +{ + LOG_ENTRY (); + + remove_proc_entry ("ocfs/version", NULL); + remove_proc_entry ("ocfs/nodename", NULL); + remove_proc_entry ("ocfs/memallocs", NULL); + remove_proc_entry ("ocfs", NULL); + + LOG_EXIT (); + return; +} /* ocfs_proc_deinit */ + +/* + * ocfs_proc_calc_metrics() + * + */ +static int ocfs_proc_calc_metrics (char *page, char **start, off_t off, + int count, int *eof, int len) +{ + LOG_ENTRY (); + + if (len <= off + count) + *eof = 1; + + *start = page + off; + + len -= off; + + if (len > count) + len = count; + + if (len < 0) + len = 0; + + LOG_EXIT (); + return len; +} /* ocfs_proc_calc_metrics */ + +#ifdef OCFS_LINUX_MEM_DEBUG +/* + * ocfs_proc_memallocs() + * + */ +static int ocfs_proc_memallocs (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int ret; + struct list_head *iter; + alloc_item *item; + int len = 0; + + LOG_ENTRY (); + + ret = sprintf ((char *) (page + len), "Pointer \tSize\tLine:File\n"); + len += ret; + + list_for_each (iter, &item_list) { + if (len >= 4096) { + LOG_ERROR_STR ("proc file truncated!\n"); + break; + } + item = list_entry (iter, alloc_item, list); + ret = + snprintf ((char *) (page + len), (4096 - len), + "%x\t%d\t%s\n", item->address, item->length, + item->tag); + if (ret < 0) { + LOG_ERROR_STR ("uh oh failed to sprintf!\n"); + break; + } + len += ret; + } + + ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_proc_memallocs */ +#endif + +/* + * ocfs_proc_version() + * + */ +static int ocfs_proc_version (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + extern char *OcfsVersion; + int len; + int ret; + + LOG_ENTRY (); + + strcpy (page, OcfsVersion); + strcat (page, "\n"); + len = strlen (page); + + ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_proc_version */ + +/* + * ocfs_proc_nodenum() + * + */ +static int ocfs_proc_nodenum (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + int ret; + ocfs_super *osb; + + LOG_ENTRY (); + + osb = (ocfs_super *) data; + sprintf (page, "%d\n", osb->node_num); + len = strlen (page); + + ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_proc_nodenum */ + +/* + * ocfs_proc_nodename() + * + */ +static int ocfs_proc_nodename (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + int ret; + + LOG_ENTRY (); + + strcpy (page, OcfsGlobalCtxt.node_name); + strcat (page, "\n"); + len = strlen (page); + + ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_proc_nodename */ + +/* + * ocfs_proc_add_volume() + * + */ +void ocfs_proc_add_volume (ocfs_super * osb) +{ + char newdir[20]; + char tmp[50]; + static struct + { + char *name; + char *data; + int (*read_proc) (char *, char **, off_t, int, int *, void *); + } + *p, ProcList[] = + { + { "nodenum", NULL, ocfs_proc_nodenum }, + { "mountpoint", NULL, ocfs_proc_mountpoint }, + { "statistics", NULL, ocfs_proc_statistics }, + { "hashstat", NULL, ocfs_proc_hash_stats }, + { NULL, } + }; + + LOG_ENTRY (); + + ProcList[0].data = (char *) osb; + ProcList[1].data = osb->vol_layout.mount_point; + ProcList[2].data = (char *) osb; + ProcList[3].data = (char *) osb; + + sprintf (newdir, "ocfs/%-d", osb->osb_id); + proc_mkdir (newdir, 0); + + for (p = ProcList; p->name; p++) { + sprintf (tmp, "%s/%s", newdir, p->name); + create_proc_read_entry (tmp, 0, NULL, p->read_proc, p->data); + } + + LOG_EXIT (); + return; +} /* ocfs_proc_add_volume */ + +/* + * ocfs_proc_remove_volume() + * + */ +void ocfs_proc_remove_volume (ocfs_super * osb) +{ + char tmp[50]; + + LOG_ENTRY (); + + sprintf (tmp, "ocfs/%-d/nodenum", osb->osb_id); + remove_proc_entry (tmp, NULL); + + sprintf (tmp, "ocfs/%-d/mountpoint", osb->osb_id); + remove_proc_entry (tmp, NULL); + + sprintf (tmp, "ocfs/%-d/statistics", osb->osb_id); + remove_proc_entry (tmp, NULL); + + sprintf (tmp, "ocfs/%-d", osb->osb_id); + remove_proc_entry (tmp, NULL); + + LOG_EXIT (); + return; +} /* ocfs_proc_remove_volume */ + +/* + * ocfs_proc_mountpoint() + * + */ +static int ocfs_proc_mountpoint (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + int ret; + + LOG_ENTRY (); + + strcpy (page, data); + strcat (page, "\n"); + len = strlen (page); + + ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_proc_mountpoint */ + +/* + * ocfs_proc_statistics() + * + */ +static int ocfs_proc_statistics (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + char tmp[1024]; + char pubmap[100]; + ocfs_super *osb; + ocfs_vol_layout *VolLayout; + int ret, i; + char *ptr; + + LOG_ENTRY (); + + osb = (ocfs_super *) data; + VolLayout = &(osb->vol_layout); + + ptr = pubmap; + for (i = 0; i < 32; i++) { + if (osb->publ_map & (1 << i)) + ptr += sprintf (ptr, "%d ", i); + } + if (pubmap != ptr) + *(ptr - 1) = '\0'; + +#define PROC_STATS \ + "File open count : %d.%u\n" \ + "Publish map : %s\n" \ + "Number of nodes : %u\n" \ + "Cluster size : %u\n" \ + "Volume size : %u.%u\n" \ + "Dir node size : %u.%u\n" \ + "File node size : %u.%u\n" \ + "Failed Large Allocs : %u\n" \ + "Retry Large Allocs : %u\n" + + sprintf (tmp, PROC_STATS, HI (osb->file_open_cnt), + LO (osb->file_open_cnt), pubmap, VolLayout->num_nodes, + VolLayout->cluster_size, HI (VolLayout->size), + LO (VolLayout->size), HI (VolLayout->dir_node_size), + LO (VolLayout->dir_node_size), HI (VolLayout->file_node_size), + LO (VolLayout->file_node_size), osb->cluster_bitmap.failed, + osb->cluster_bitmap.ok_retries); + + strcpy (page, tmp); + len = strlen (page); + + ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_proc_statistics */ + +/* + * ocfs_proc_hash_stats() + * + */ +static int ocfs_proc_hash_stats (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + int ret; + ocfs_super *osb; + char tmp[HASHSTAT_BUFLEN]; + + LOG_ENTRY (); + + osb = (ocfs_super *) data; + + ocfs_hash_stat (&(osb->root_sect_node), tmp, HASHSTAT_BUFLEN); + + strcpy (page, tmp); + len = strlen (page); + + ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len); + + LOG_EXIT_LONG (ret); + return ret; +} /* ocfs_proc_hash_stats */ diff -urNp ocfs/fs/ocfs/Makefile 2.4.20pre5aa2/fs/ocfs/Makefile --- ocfs/fs/ocfs/Makefile Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Makefile Fri Sep 6 01:46:16 2002 @@ -0,0 +1,74 @@ +O_TARGET := ocfs.o + +VERFILE = Common/ocfsver.c + +CPARTNER = \ + Common/ocfsgenalloc.c \ + Common/ocfsgensysfile.c \ + Common/ocfsgencreate.c \ + Common/ocfsgenutil.c \ + Common/ocfsgenmisc.c \ + Common/ocfsgentrans.c \ + Common/ocfsgendirnode.c \ + Common/ocfsgenvolcfg.c \ + Linux/ocfsiosup.c \ + Linux/ocfsmount.c \ + Linux/ocfsfile.c \ + Linux/ocfsmain.c \ + Linux/ocfsport.c \ + Linux/ocfsbitmap.c \ + Linux/ocfsproc.c \ + Linux/ocfsioctl.c \ + Linux/ocfshash.c \ + Linux/ocfsconf.c \ + Linux/ocfsfilp.c \ + Linux/ocfsipc.c \ + Common/ocfsgennm.c \ + Common/ocfsgenvote.c \ + Common/ocfsgendlm.c + +CALONE = Linux/ocfsdlm.c + +SUPPORT = Support/divdi3.c + +CFILES = $(CPARTNER) $(CALONE) $(SUPPORT) + +HTEMP1 = $(subst .c,.h,$(CPARTNER)) +HTEMP2 = $(subst Common,Common/inc,$(HTEMP1)) +HPARTNER = $(subst Linux,Linux/inc,$(HTEMP2)) + +HALONE = \ + Common/inc/ocfs.h \ + Common/inc/ocfsdef.h \ + Common/inc/ocfserr.h \ + Common/inc/ocfstrace.h \ + Common/inc/ocfsvol.h \ + Common/inc/ocfscom.h \ + Common/inc/ocfsdisk.h \ + Common/inc/ocfstrans.h \ + Common/inc/ocfsconst.h \ + Common/inc/ocfsdlm.h \ + Common/inc/ocfsver.h \ + Linux/inc/ocfsdlmp.h + +HFILES = $(HPARTNER) $(HALONE) + +OBJS = $(subst .c,.o,$(CFILES)) +VEROBJ = $(subst .c,.o,$(VERFILE)) + +obj-y := $(OBJS) $(VEROBJ) +obj-m := $(O_TARGET) + +EXTRA_CFLAGS += -DALLOW_INODE_UPDATES_WITH_NO_OIN +EXTRA_CFLAGS += -DPARANOID_LOCKS +EXTRA_CFLAGS += -ICommon/inc -ILinux/inc + +include $(TOPDIR)/Rules.make + +$(CFILES): $(HFILES) + +$(VERFILE): ocfsver.sh $(CFILES) $(HFILES) + @/bin/sh ./ocfsver.sh $@ $(VERSION) $(CFILES) $(HFILES) + +clean: + rm -f Common/*.o Linux/*.o Common/*.p Linux/*.p Common/*.s Linux/*.s $(VERFILE) diff -urNp ocfs/fs/ocfs/Support/divdi3.c 2.4.20pre5aa2/fs/ocfs/Support/divdi3.c --- ocfs/fs/ocfs/Support/divdi3.c Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/Support/divdi3.c Fri Sep 6 01:46:16 2002 @@ -0,0 +1,593 @@ +/* Ripped from gcc-2.95.1/gcc/libgcc2.c */ + +/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. */ + +typedef unsigned int UQItype __attribute__ ((mode (QI))); +typedef int SItype __attribute__ ((mode (SI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef int DItype __attribute__ ((mode (DI))); +typedef unsigned int UDItype __attribute__ ((mode (DI))); + +typedef int word_type __attribute__ ((mode (__word__))); + +/* Make sure that we don't accidentally use any normal C language built-in + type names in the first part of this file. Instead we want to use *only* + the type names defined above. The following macro definitions insure + that if we *do* accidentally use some normal C language built-in type name, + we will get a syntax error. */ + +#define char bogus_type +#define short bogus_type +#define int bogus_type +#define long bogus_type +#define unsigned bogus_type +#define float bogus_type +#define double bogus_type + +/* DIstructs are pairs of SItype values in the order determined by + little/big ENDIAN. */ + +#ifdef __i386__ + struct DIstruct {SItype low, high;}; +#endif +#ifdef __powerpc__ + struct DIstruct {SItype high, low;}; +#endif +#ifdef __s390__ + struct DIstruct {SItype high, low;}; +#endif + + +/* We need this union to unpack/pack DImode values, since we don't have + any arithmetic yet. Incoming DImode parameters are stored into the + `ll' field, and the unpacked result is read from the struct `s'. */ + +typedef union +{ + struct DIstruct s; + DItype ll; +} DIunion; + + +/* From gcc-2.95.1/gcc/longlong.h */ + +#ifndef SI_TYPE_SIZE +#define SI_TYPE_SIZE 32 +#endif + +#define __BITS4 (SI_TYPE_SIZE / 4) +#define __ll_B (1L << (SI_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((USItype) (t) % __ll_B) +#define __ll_highpart(t) ((USItype) (t) / __ll_B) + +#ifdef __i386__ +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subl %5,%1 + sbbl %3,%0" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "0" ((USItype) (ah)), \ + "g" ((USItype) (bh)), \ + "1" ((USItype) (al)), \ + "g" ((USItype) (bl))) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mull %3" \ + : "=a" ((USItype) (w0)), \ + "=d" ((USItype) (w1)) \ + : "%0" ((USItype) (u)), \ + "rm" ((USItype) (v))) +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divl %4" \ + : "=a" ((USItype) (q)), \ + "=d" ((USItype) (r)) \ + : "0" ((USItype) (n0)), \ + "1" ((USItype) (n1)), \ + "rm" ((USItype) (d))) +#define count_leading_zeros(count, x) \ + do { \ + USItype __cbtmp; \ + __asm__ ("bsrl %1,%0" \ + : "=r" (__cbtmp) : "rm" ((USItype) (x))); \ + (count) = __cbtmp ^ 31; \ + } while (0) +#endif /* __i386__ */ + +#ifdef __powerpc__ +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (ah) && (ah) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "r" ((USItype) (bh)), \ + "rI" ((USItype) (al)), \ + "r" ((USItype) (bl))); \ + else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "r" ((USItype) (bh)), \ + "rI" ((USItype) (al)), \ + "r" ((USItype) (bl))); \ + else if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "r" ((USItype) (ah)), \ + "rI" ((USItype) (al)), \ + "r" ((USItype) (bl))); \ + else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "r" ((USItype) (ah)), \ + "rI" ((USItype) (al)), \ + "r" ((USItype) (bl))); \ + else \ + __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "r" ((USItype) (ah)), \ + "r" ((USItype) (bh)), \ + "rI" ((USItype) (al)), \ + "r" ((USItype) (bl))); \ + } while (0) +#define count_leading_zeros(count, x) \ + __asm__ ("{cntlz|cntlzw} %0,%1" \ + : "=r" ((USItype) (count)) \ + : "r" ((USItype) (x))) +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhwu %0,%1,%2" \ + : "=r" ((USItype) ph) \ + : "%r" (__m0), \ + "r" (__m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#endif /* __powerpc__ */ + +#ifdef __s390__ +#define sub_ddmmss(sh, sl, ah, al, bh, bl) ({ \ + USItype __sh = (ah); \ + USItype __sl = (al); \ + __asm__ (" slr %1,%3\n" \ + " brc 3,0f\n" \ + " ahi %0,-1\n" \ + "0: slr %0,%2" \ + : "+&d" (__sh), "+d" (__sl) \ + : "d" (bh), "d" (bl) : "cc" ); \ + (sh) = __sh; \ + (sl) = __sl; \ +}) +#define umul_ppmm(wh, wl, u, v) ({ \ + USItype __wh = (u); \ + USItype __wl = (v); \ + __asm__ (" ltr 1,%0\n" \ + " mr 0,%1\n" \ + " jnm 0f\n" \ + " alr 0,%1\n" \ + "0: ltr %1,%1\n" \ + " jnm 1f\n" \ + " alr 0,%0\n" \ + "1: lr %0,0\n" \ + " lr %1,1\n" \ + : "+d" (__wh), "+d" (__wl) \ + : : "0", "1", "cc" ); \ + (wh) = __wh; \ + (wl) = __wl; \ +}) +static const UQItype __clz_tab[] = +{ + 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +}; +#define count_leading_zeros(count, x) \ + do { \ + USItype __xr = (x); \ + USItype __a; \ + \ + if (SI_TYPE_SIZE <= 32) \ + { \ + __a = __xr < ((USItype)1<<2*__BITS4) \ + ? (__xr < ((USItype)1<<__BITS4) ? 0 : __BITS4) \ + : (__xr < ((USItype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \ + } \ + else \ + { \ + for (__a = SI_TYPE_SIZE - 8; __a > 0; __a -= 8) \ + if (((__xr >> __a) & 0xff) != 0) \ + break; \ + } \ + \ + (count) = SI_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \ + } while (0) +#endif + +/* If this machine has no inline assembler, use C macros. */ + +/* Define this unconditionally, so it can be used for debugging. */ +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ + do { \ + USItype __d1, __d0, __q1, __q0; \ + USItype __r1, __r0, __m; \ + __d1 = __ll_highpart (d); \ + __d0 = __ll_lowpart (d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (USItype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart (n0); \ + if (__r1 < __m) \ + { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (USItype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ + if (__r0 < __m) \ + { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (USItype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ + } while (0) + +/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ +#if !defined (udiv_qrnnd) +#define UDIV_NEEDS_NORMALIZATION 1 +#define udiv_qrnnd __udiv_qrnnd_c +#endif + +/* End of gcc-2.95.1/gcc/longlong.h */ + + +DItype __divdi3 (DItype u, DItype v); +DItype __moddi3 (DItype u, DItype v); +UDItype __umoddi3 (UDItype u, UDItype v); +UDItype __udivdi3 (UDItype n, UDItype d); + + +static inline +DItype +__negdi2 (DItype u) +{ + DIunion w; + DIunion uu; + + uu.ll = u; + + w.s.low = -uu.s.low; + w.s.high = -uu.s.high - ((USItype) w.s.low > 0); + + return w.ll; +} + + +static inline +UDItype +__udivmoddi4 (UDItype n, UDItype d, UDItype *rp) +{ + DIunion ww; + DIunion nn, dd; + DIunion rr; + USItype d0, d1, n0, n1, n2; + USItype q0, q1; + USItype b, bm; + + nn.ll = n; + dd.ll = d; + + d0 = dd.s.low; + d1 = dd.s.high; + n0 = nn.s.low; + n1 = nn.s.high; + +#if !UDIV_NEEDS_NORMALIZATION + if (d1 == 0) + { + if (d0 > n1) + { + /* 0q = nn / 0D */ + + udiv_qrnnd (q0, n0, n1, n0, d0); + q1 = 0; + + /* Remainder in n0. */ + } + else + { + /* qq = NN / 0d */ + + if (d0 == 0) + d0 = 1 / d0; /* Divide intentionally by zero. */ + + udiv_qrnnd (q1, n1, 0, n1, d0); + udiv_qrnnd (q0, n0, n1, n0, d0); + + /* Remainder in n0. */ + } + + if (rp != 0) + { + rr.s.low = n0; + rr.s.high = 0; + *rp = rr.ll; + } + } + +#else /* UDIV_NEEDS_NORMALIZATION */ + + if (d1 == 0) + { + if (d0 > n1) + { + /* 0q = nn / 0D */ + + count_leading_zeros (bm, d0); + + if (bm != 0) + { + /* Normalize, i.e. make the most significant bit of the + denominator set. */ + + d0 = d0 << bm; + n1 = (n1 << bm) | (n0 >> (SI_TYPE_SIZE - bm)); + n0 = n0 << bm; + } + + udiv_qrnnd (q0, n0, n1, n0, d0); + q1 = 0; + + /* Remainder in n0 >> bm. */ + } + else + { + /* qq = NN / 0d */ + + if (d0 == 0) + d0 = 1 / d0; /* Divide intentionally by zero. */ + + count_leading_zeros (bm, d0); + + if (bm == 0) + { + /* From (n1 >= d0) /\ (the most significant bit of d0 is set), + conclude (the most significant bit of n1 is set) /\ (the + leading quotient digit q1 = 1). + + This special case is necessary, not an optimization. + (Shifts counts of SI_TYPE_SIZE are undefined.) */ + + n1 -= d0; + q1 = 1; + } + else + { + /* Normalize. */ + + b = SI_TYPE_SIZE - bm; + + d0 = d0 << bm; + n2 = n1 >> b; + n1 = (n1 << bm) | (n0 >> b); + n0 = n0 << bm; + + udiv_qrnnd (q1, n1, n2, n1, d0); + } + + /* n1 != d0... */ + + udiv_qrnnd (q0, n0, n1, n0, d0); + + /* Remainder in n0 >> bm. */ + } + + if (rp != 0) + { + rr.s.low = n0 >> bm; + rr.s.high = 0; + *rp = rr.ll; + } + } +#endif /* UDIV_NEEDS_NORMALIZATION */ + + else + { + if (d1 > n1) + { + /* 00 = nn / DD */ + + q0 = 0; + q1 = 0; + + /* Remainder in n1n0. */ + if (rp != 0) + { + rr.s.low = n0; + rr.s.high = n1; + *rp = rr.ll; + } + } + else + { + /* 0q = NN / dd */ + + count_leading_zeros (bm, d1); + if (bm == 0) + { + /* From (n1 >= d1) /\ (the most significant bit of d1 is set), + conclude (the most significant bit of n1 is set) /\ (the + quotient digit q0 = 0 or 1). + + This special case is necessary, not an optimization. */ + + /* The condition on the next line takes advantage of that + n1 >= d1 (true due to program flow). */ + if (n1 > d1 || n0 >= d0) + { + q0 = 1; + sub_ddmmss (n1, n0, n1, n0, d1, d0); + } + else + q0 = 0; + + q1 = 0; + + if (rp != 0) + { + rr.s.low = n0; + rr.s.high = n1; + *rp = rr.ll; + } + } + else + { + USItype m1, m0; + /* Normalize. */ + + b = SI_TYPE_SIZE - bm; + + d1 = (d1 << bm) | (d0 >> b); + d0 = d0 << bm; + n2 = n1 >> b; + n1 = (n1 << bm) | (n0 >> b); + n0 = n0 << bm; + + udiv_qrnnd (q0, n1, n2, n1, d1); + umul_ppmm (m1, m0, q0, d0); + + if (m1 > n1 || (m1 == n1 && m0 > n0)) + { + q0--; + sub_ddmmss (m1, m0, m1, m0, d1, d0); + } + + q1 = 0; + + /* Remainder in (n1n0 - m1m0) >> bm. */ + if (rp != 0) + { + sub_ddmmss (n1, n0, n1, n0, m1, m0); + rr.s.low = (n1 << b) | (n0 >> bm); + rr.s.high = n1 >> bm; + *rp = rr.ll; + } + } + } + } + + ww.s.low = q0; + ww.s.high = q1; + return ww.ll; +} + + +DItype +__divdi3 (DItype u, DItype v) +{ + word_type c = 0; + DIunion uu, vv; + DItype w; + + uu.ll = u; + vv.ll = v; + + if (uu.s.high < 0) + c = ~c, + uu.ll = __negdi2 (uu.ll); + if (vv.s.high < 0) + c = ~c, + vv.ll = __negdi2 (vv.ll); + + w = __udivmoddi4 (uu.ll, vv.ll, (UDItype *) 0); + if (c) + w = __negdi2 (w); + + return w; +} + + +DItype +__moddi3 (DItype u, DItype v) +{ + word_type c = 0; + DIunion uu, vv; + DItype w; + + uu.ll = u; + vv.ll = v; + + if (uu.s.high < 0) + c = ~c, + uu.ll = __negdi2 (uu.ll); + if (vv.s.high < 0) + vv.ll = __negdi2 (vv.ll); + + (void) __udivmoddi4 (uu.ll, vv.ll, &w); + if (c) + w = __negdi2 (w); + + return w; +} + + +UDItype +__umoddi3 (UDItype u, UDItype v) +{ + UDItype w; + + (void) __udivmoddi4 (u, v, &w); + + return w; +} + + +UDItype +__udivdi3 (UDItype n, UDItype d) +{ + return __udivmoddi4 (n, d, (UDItype *) 0); +} + + diff -urNp ocfs/fs/ocfs/ocfs.conf 2.4.20pre5aa2/fs/ocfs/ocfs.conf --- ocfs/fs/ocfs/ocfs.conf Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/ocfs.conf Fri Sep 6 01:46:16 2002 @@ -0,0 +1,32 @@ +# +# ocfs ipcdlm config +# Ensure this file exists in /etc +# Each ipcdelm section contains the information for the local node only. +# Currently, the dlm will pick up upto three local transports... +# will use only the active ones. +# + +ipcdlm: + ip_address = + ip_port = + subnet_mask = + type = udp + hostname = + active = yes + +ipcdlm: + ip_address = + ip_port = + subnet_mask = + type = udp + hostname = + active = no + +ipcdlm: + ip_address = + ip_port = + subnet_mask = + type = udp + hostname = + active = no + diff -urNp ocfs/fs/ocfs/ocfsver.sh 2.4.20pre5aa2/fs/ocfs/ocfsver.sh --- ocfs/fs/ocfs/ocfsver.sh Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa2/fs/ocfs/ocfsver.sh Fri Sep 6 01:46:16 2002 @@ -0,0 +1,32 @@ +#!/bin/sh + +echo -n "Making version file... " + +VERFILE=$1 +rm -f $VERFILE +shift + +VERSION=$1 +if test "x$VERSION" = "x"; then + echo "ERROR: No version given" + exit 1 +fi +shift + +MD5=`cat $@ | md5sum | sed -e 's/ .*//'` + +cat > $VERFILE < + +#include + +#define OCFS_VERSION_STR "$VERSION" +#define OCFS_BUILD_STR "$MD5" + +void ocfs_version_print (void) +{ + printk(KERN_INFO "Oracle Cluster FileSystem $VERSION (build $MD5)"); +} +OUTCODE + +echo "done"