diff -urNp x-ref/fs/ocfs/Common/inc/ocfs.h x/fs/ocfs/Common/inc/ocfs.h --- x-ref/fs/ocfs/Common/inc/ocfs.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfs.h Mon Oct 21 04:41:18 2002 @@ -29,6 +29,7 @@ /* XXX Hack to avoid warning */ struct mem_dqinfo; +extern inline void prefetch(const void *x); extern inline void mark_info_dirty(struct mem_dqinfo *info); /* @@ -74,8 +75,7 @@ extern inline void mark_info_dirty(struc #include #include -#include - +#include #include #include #include @@ -90,6 +90,7 @@ extern inline void mark_info_dirty(struc #include #include #include +#include #include #include #include @@ -98,7 +99,6 @@ extern inline void mark_info_dirty(struc #include #include -#include #include #include #include diff -urNp x-ref/fs/ocfs/Common/inc/ocfscom.h x/fs/ocfs/Common/inc/ocfscom.h --- x-ref/fs/ocfs/Common/inc/ocfscom.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfscom.h Mon Oct 21 04:41:18 2002 @@ -51,8 +51,6 @@ typedef struct _ocfs_alloc_bm } ocfs_alloc_bm; -#ifndef FORMAT_UTIL - typedef struct _ocfs_sem { long magic; /* OCFS_SEM_MAGIC */ @@ -62,8 +60,6 @@ typedef struct _ocfs_sem } ocfs_sem; -#endif /* FORMAT_UTIL */ - /* convenience macro */ #define ocfs_safefree(x) \ do \ @@ -91,9 +87,6 @@ ocfs_sem; #define ocfs_getpid() getpid() #endif - -#ifndef FORMAT_UTIL - typedef struct _ocfs_extent { sb8 virtual; @@ -114,8 +107,12 @@ ocfs_extent_map; typedef struct _alloc_item { + enum { SLAB_ITEM, KMALLOC_ITEM, VMALLOC_ITEM } type; void *address; - int length; + union { + int length; + void *slab; + } u; struct list_head list; char tag[30]; } @@ -140,9 +137,6 @@ alloc_item; i->u.generic_ip = (void *)o; \ } while (0) - -#endif /* FORMAT_UTIL */ - #define FIRST_FILE_ENTRY(dir) ((char *) ((char *)dir)+OCFS_SECTOR_SIZE) #define FILEENT(dir,idx) (ocfs_file_entry *) ( ((char *)dir) + \ ((dir->index[idx]+1) * OCFS_SECTOR_SIZE)) diff -urNp x-ref/fs/ocfs/Common/inc/ocfsconst.h x/fs/ocfs/Common/inc/ocfsconst.h --- x-ref/fs/ocfs/Common/inc/ocfsconst.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfsconst.h Mon Oct 21 04:41:18 2002 @@ -31,24 +31,13 @@ #define OCFS_RECOVER_LOG_FILENAME "RecoverLogFile" #define OCFS_CLEANUP_LOG_FILENAME "CleanUpLogFile" -#define ONE_SECOND (10 * 1000 * 1000) - -/* -** The Volume Recognition Sector -** Signature - 128 Bytes -** Volume Label - 64 bytes (MAXIMUM_VOLUME_LABEL_LENGTH) -** Volume Serial Number - 8 Bytes -** Volume Length - 8 Bytes -** Volume Starting Offset - 8 Bytes -** Root Directory Starting Offset - 8 Bytes -*/ - -#define ONE_MILLI_SEC (10 * 1000) /* in 0.1 microsec unit */ -#define ONE_MEGA_BYTE (1 * 1024 * 1024) +#define ONE_SECOND (10 * 1000 * 1000) /* 100 nanosec unit */ +#define ONE_MILLI_SEC (10 * 1000) /* 100 nanosec unit */ +#define ONE_MEGA_BYTE (1 * 1024 * 1024) /* in bytes */ #define MISS_COUNT_VALUE 5 -#define OCFS_DEFAULT_DIR_NODE_SIZE (1024*128) +#define OCFS_DEFAULT_DIR_NODE_SIZE (1024 * 128) #define OCFS_DEFAULT_FILE_NODE_SIZE (512) /* @@ -74,20 +63,30 @@ #define FLAG_FILE_CHANGE_TO_CDSL 0x4000000 #define FLAG_FILE_TRUNCATE 0x8000000 // kch- consider removing this and ocfs_truncate_file -#define OCFS_FILE_VOL_META_DATA 0 -#define OCFS_FILE_VOL_LOG_FILE (OCFS_FILE_VOL_META_DATA + \ - OCFS_MAXIMUM_NODES) -#define OCFS_FILE_DIR_ALLOC (OCFS_FILE_VOL_LOG_FILE + \ - OCFS_MAXIMUM_NODES) -#define OCFS_FILE_DIR_ALLOC_BITMAP (OCFS_FILE_DIR_ALLOC + OCFS_MAXIMUM_NODES) -#define OCFS_FILE_FILE_ALLOC (OCFS_FILE_DIR_ALLOC_BITMAP + \ - OCFS_MAXIMUM_NODES) -#define OCFS_FILE_FILE_ALLOC_BITMAP (OCFS_FILE_FILE_ALLOC + OCFS_MAXIMUM_NODES) -#define LOG_FILE_BASE_ID (OCFS_FILE_FILE_ALLOC_BITMAP + \ - OCFS_MAXIMUM_NODES) -#define CLEANUP_FILE_BASE_ID (LOG_FILE_BASE_ID + OCFS_MAXIMUM_NODES) +enum { + OCFS_INVALID_SYSFILE = -1, + OCFS_VOL_MD_SYSFILE = 0, + OCFS_VOL_MD_LOG_SYSFILE, + OCFS_DIR_SYSFILE, + OCFS_DIR_BM_SYSFILE, + OCFS_FILE_EXTENT_SYSFILE, + OCFS_FILE_EXTENT_BM_SYSFILE, + OCFS_RECOVER_LOG_SYSFILE, + OCFS_CLEANUP_LOG_SYSFILE, #ifdef LOCAL_ALLOC -#define OCFS_VOL_BITMAP_FILE (CLEANUP_FILE_BASE_ID + OCFS_MAXIMUM_NODES) + OCFS_VOL_BM_SYSFILE +#endif +}; +#define OCFS_FILE_VOL_META_DATA (OCFS_VOL_MD_SYSFILE * OCFS_MAXIMUM_NODES) +#define OCFS_FILE_VOL_LOG_FILE (OCFS_VOL_MD_LOG_SYSFILE * OCFS_MAXIMUM_NODES) +#define OCFS_FILE_DIR_ALLOC (OCFS_DIR_SYSFILE * OCFS_MAXIMUM_NODES) +#define OCFS_FILE_DIR_ALLOC_BITMAP (OCFS_DIR_BM_SYSFILE * OCFS_MAXIMUM_NODES) +#define OCFS_FILE_FILE_ALLOC (OCFS_FILE_EXTENT_SYSFILE * OCFS_MAXIMUM_NODES) +#define OCFS_FILE_FILE_ALLOC_BITMAP (OCFS_FILE_EXTENT_BM_SYSFILE * OCFS_MAXIMUM_NODES) +#define LOG_FILE_BASE_ID (OCFS_RECOVER_LOG_SYSFILE * OCFS_MAXIMUM_NODES) +#define CLEANUP_FILE_BASE_ID (OCFS_CLEANUP_LOG_SYSFILE * OCFS_MAXIMUM_NODES) +#ifdef LOCAL_ALLOC +#define OCFS_VOL_BITMAP_FILE (OCFS_VOL_BM_SYSFILE * OCFS_MAXIMUM_NODES) #endif @@ -122,14 +121,14 @@ #define OCFS_TYPE_OSB (0x05534643) #define OCFS_TYPE_GLOBAL_DATA (0x07534643) - #define CACHE_LOCK_SLOT_TIME (ONE_SECOND * 10) #define OCFS_DLM_NO_LOCK (0x0) #define OCFS_DLM_SHARED_LOCK (0x1) #define OCFS_DLM_EXCLUSIVE_LOCK (0x2) #define OCFS_DLM_ENABLE_CACHE_LOCK (0x8) -#define OCFS_INVALID_NODE_NUM (-1) + +#define OCFS_INVALID_NODE_NUM ULONG_MAX typedef enum _ocfs_rw_mode { @@ -152,6 +151,9 @@ ocfs_rw_mode; #define OCFS_INITIALIZED_PAGING_IO_RESOURCE (0x00004000) #define OCFS_OIN_INVALID (0x00008000) #define OCFS_OIN_IN_USE (0x00020000) +#define OCFS_OIN_OPEN_FOR_DIRECTIO (0x00100000) +#define OCFS_OIN_OPEN_FOR_WRITE (0x00200000) + #define OCFS_OSB_FLAGS_BEING_DISMOUNTED (0x00000004) #define OCFS_OSB_FLAGS_SHUTDOWN (0x00000008) @@ -171,8 +173,8 @@ ocfs_rw_mode; ** Information on Publish sector of each node */ #define DISK_HBEAT_COMM_ON 20 /* in the order of 5 secs */ -#define DISK_HBEAT_NO_COMM 4 /* in the order of 1 sec */ -#define DISK_HBEAT_INVALID 0 /* in the order of 100ms */ +#define DISK_HBEAT_NO_COMM 4 /* in the order of 1 sec */ +#define DISK_HBEAT_INVALID 0 /* in the order of 100ms */ /* ** Information on Vote sector of each node @@ -208,14 +210,12 @@ ocfs_rw_mode; #define OCFS_EXTENT_HEADER_SIGNATURE "EXTHDR2" #define OCFS_EXTENT_DATA_SIGNATURE "EXTDAT1" -#define OCFSCONF_FILE "/etc/ocfs.conf" +#define MAX_IP_ADDR_LEN 32 #define OCFS_IP_ADDR "ip_address" #define OCFS_IP_PORT "ip_port" #define OCFS_IP_MASK "subnet_mask" -#define OCFS_IP_HOST "hostname" #define OCFS_COMM_TYPE "type" -#define OCFS_COMM_ACTIVE "active" #define OCFS_SEM_MAGIC 0xAFABFACE #define OCFS_SEM_DELETED 0x0D0D0D0D @@ -231,10 +231,20 @@ ocfs_rw_mode; #define INITIAL_EXTENT_MAP_SIZE 10 #define OCFS_VOLCFG_LOCK_ITERATE 10 /* in jiffies */ -#define OCFS_VOLCFG_LOCK_TIME 2000 /* in ms */ +#define OCFS_VOLCFG_LOCK_TIME 1000 /* in ms */ +#define OCFS_VOLCFG_HDR_SECTORS 2 /* in sectors */ +#define OCFS_VOLCFG_NEWCFG_SECTORS 4 /* in sectors */ + +#define OCFS_PUBLISH_CLEAR 0 +#define OCFS_PUBLISH_SET 1 + +#define OCFS_NM_HEARTBEAT_TIME 500 /* in ms */ #ifndef O_DIRECT #define O_DIRECT 040000 #endif #define NOT_MOUNTED_EXCLUSIVE (-1) + + +#define IORUN_ALLOC_SIZE (OCFS_MAX_DATA_EXTENTS * sizeof (ocfs_io_runs)) diff -urNp x-ref/fs/ocfs/Common/inc/ocfsdef.h x/fs/ocfs/Common/inc/ocfsdef.h --- x-ref/fs/ocfs/Common/inc/ocfsdef.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfsdef.h Mon Oct 21 04:41:18 2002 @@ -30,14 +30,6 @@ extern bool bDebug; extern ub8 debugOffset; -/* -** DLM_THREAD_PER_VOLUME macro identifies sections of code which -** create one thread DLM thread per volume rather than one DLM thread -** for all volumes. This feature has currently been implemented for the -** LINUX port only. -*/ -#define DLM_THREAD_PER_VOLUME - # define UPDATE_OIN(a) do { \ (a)->needs_verification = true; \ ocfs_verify_update_oin((a)->osb, (a)); \ @@ -67,7 +59,7 @@ extern ub8 debugOffset; do { \ for ((k) = 0; (k) < OCFS_MAX_DATA_EXTENTS; (k)++) { \ if((sb8)((extent)->extents[(k)].file_off + \ - (extent)->extents[(k)].num_clusters) > (vbo)) \ + (extent)->extents[(k)].num_bytes) > (vbo)) \ break; \ } \ } while(0) @@ -90,6 +82,22 @@ extern ub8 debugOffset; (k) = ((k) >= 1) ? ((k) - 1) : (k); \ } while(0) +#ifdef LOCAL_ALLOC +#define OCFS_FILE_NUM_TO_SYSFILE_TYPE(num) ( (num >= 0 && num < OCFS_VOL_BITMAP_FILE + OCFS_MAXIMUM_NODES) ? \ + num/OCFS_MAXIMUM_NODES : OCFS_INVALID_SYSFILE ) +#define OCFS_SYSFILE_TYPE_TO_FILE_NUM(type,node) ( (type > OCFS_INVALID_SYSFILE && type <= OCFS_VOL_BM_SYSFILE && \ + node >=0 && node < OCFS_MAXIMUM_NODES) ? \ + (type * OCFS_MAXIMUM_NODES) + node : -1 ) +#else +#define OCFS_FILE_NUM_TO_SYSFILE_TYPE(num) ( (num >= 0 && num < CLEANUP_FILE_BASE_ID + OCFS_MAXIMUM_NODES) ? \ + num/OCFS_MAXIMUM_NODES : OCFS_INVALID_SYSFILE ) +#define OCFS_SYSFILE_TYPE_TO_FILE_NUM(type,node) ( (type > OCFS_INVALID_SYSFILE && type <= CLEANUP_FILE_BASE_ID && \ + node >=0 && node < OCFS_MAXIMUM_NODES) ? \ + (type * OCFS_MAXIMUM_NODES) + node : -1 ) +#endif + + + struct _ocfs_file; struct _ocfs_inode; struct _ocfs_super; @@ -128,7 +136,7 @@ struct _ocfs_super; #define UPDATE_PUBLISH_MAP(pubmap, num, flag, numnodes) \ do { \ ub8 var = 0x1; \ - if(!(flag)) \ + if((flag) == OCFS_PUBLISH_CLEAR) \ (pubmap) &= (~(var << ((num) % (numnodes)))); \ else \ (pubmap) |= (var << ((num) % (numnodes))); \ @@ -166,13 +174,14 @@ ocfs_file; typedef struct _ocfs_inode ocfs_inode; typedef struct _ocfs_super ocfs_super; typedef struct _ocfs_superduper ocfs_superduper; +typedef struct _ocfs_io_runs ocfs_io_runs; typedef struct _ocfs_lock_res { ub4 signature; ub1 lock_type; /* Support only Exclusive & Shared */ ub4 ref_cnt; /* Used in case of Shared resources */ - sb4 master_node_num; /* Master Node */ + ub4 master_node_num; /* Master Node */ ub8 last_upd_seq_num; ub8 last_lock_upd; ub8 sector_num; @@ -184,14 +193,15 @@ typedef struct _ocfs_lock_res ub4 lock_state; ocfs_inode *oin; spinlock_t lock_mutex; - wait_queue_head_t *voted_event; + wait_queue_head_t voted_event; + atomic_t voted_event_woken; ub8 req_vote_map; ub8 got_vote_map; ub4 vote_status; ub8 last_write_time; ub8 last_read_time; - sb4 writer_node_num; - sb4 reader_node_num; + ub4 writer_node_num; + ub4 reader_node_num; } ocfs_lock_res; @@ -211,8 +221,6 @@ struct _ocfs_inode struct _ocfs_super *osb; /* ocfs_inode belongs to this volume */ ub4 oin_flags; struct list_head next_ofile; /* list of all ofile(s) */ - bool open_for_write; - ub4 ref_cnt; /* when = 0, free ocfs_inode */ ub4 open_hndl_cnt; bool needs_verification; bool cache_enabled; @@ -234,12 +242,8 @@ ocfs_vol_state; typedef struct _ocfs_node_config_info { char node_name[MAX_NODE_NAME_LENGTH]; - ocfs_ipc_config_info ipc_config[OCFS_MAX_IPC]; - ub8 exp_recv[OCFS_MAX_IPC]; - ub1 num_interfaces; - ub1 primary_comm; - ub1 state; - ub1 last_comm_indx; + ocfs_guid guid; + ocfs_ipc_config_info ipc_config; } ocfs_node_config_info; @@ -269,7 +273,8 @@ struct _ocfs_super ub8 cfg_seq_num; bool cfg_initialized; ub4 num_cfg_nodes; - sb4 node_num; + ub4 node_num; + bool reclaim_id; /* reclaim the original node number*/ ub1 hbm; ub4 hbt; ub8 log_disk_off; @@ -295,19 +300,36 @@ struct _ocfs_super #endif /* PARANOID_LOCKS */ ocfs_sem vol_alloc_lock; struct timer_list lock_timer; - bool lock_stop; + atomic_t lock_stop; wait_queue_head_t lock_event; + atomic_t lock_event_woken; bool cache_fs; + ub4 prealloc_lock; + ocfs_io_runs *data_prealloc; + ocfs_io_runs *md_prealloc; + ub1 *cfg_prealloc; + ub4 cfg_len; + ub1 *log_prealloc; +}; + +enum { + OSB_DATA_LOCK, + OSB_MD_LOCK, + OSB_CFG_LOCK, + OSB_LOG_LOCK }; +#define OSB_PREALLOC_LOCK_TEST(osb, l) (osb->prealloc_lock & (1<prealloc_lock |= (1<prealloc_lock &= ~(1< - #ifndef _OCFSDISK_H_ #define _OCFSDISK_H_ typedef struct _ocfs_alloc_ext { - ub8 file_off; /* Starting Cluster on disk */ - ub8 num_clusters; /* No of Clusters used by this alloc */ + ub8 file_off; /* Starting offset within the file */ + ub8 num_bytes; /* Number of bytes used by this alloc */ ub8 disk_off; /* Physical Disk Offset */ } ocfs_alloc_ext; @@ -40,11 +38,11 @@ ocfs_alloc_ext; typedef struct _ocfs_publish { ub8 time; - bool vote; + sb4 vote; bool dirty; ub4 vote_type; ub8 vote_map; - ub8 seq_num; + ub8 publ_seq_num; ub8 dir_ent; ub1 hbm[OCFS_MAXIMUM_NODES]; /* Might be useful to keep track of total number of files and a */ @@ -56,7 +54,7 @@ ocfs_publish; typedef struct _ocfs_vote { ub1 vote[OCFS_MAXIMUM_NODES]; - ub8 seq_num; + ub8 vote_seq_num; ub8 dir_ent; ub1 open_handle; ub1 pad[7]; @@ -151,21 +149,15 @@ typedef struct _ocfs_dir_node } ocfs_dir_node; -/* Second sector on the volume contains this information */ -typedef struct _ocfs_disk_entry -{ - ub8 seq_num; - ub8 dir_ent; -} -ocfs_disk_entry; - typedef struct _ocfs_vol_node_map { ub8 time[OCFS_MAXIMUM_NODES]; ub8 scan_time[OCFS_MAXIMUM_NODES]; ub1 scan_rate[OCFS_MAXIMUM_NODES]; +#ifdef UNUSED ub1 exp_scan_rate[OCFS_MAXIMUM_NODES]; ub8 exp_rate_chng_time[OCFS_MAXIMUM_NODES]; +#endif ub4 miss_cnt[OCFS_MAXIMUM_NODES]; ub8 largest_seq_num; } @@ -176,9 +168,9 @@ typedef struct _ocfs_vol_layout ub8 start_off; ub4 num_nodes; ub4 cluster_size; - ub1 mount_point[128]; - ub1 id[64]; - ub1 label[64]; + ub1 mount_point[MAX_MOUNT_POINT_LEN]; + ub1 vol_id[MAX_VOL_ID_LENGTH]; + ub1 label[MAX_VOL_LABEL_LEN]; ub4 label_len; ub1 pad[4]; ub8 size; @@ -223,4 +215,10 @@ typedef struct _ocfs_extent_group } ocfs_extent_group; +typedef struct _ocfs_bitmap_lock +{ + ocfs_disk_lock disk_lock; + ub4 used_bits; +} +ocfs_bitmap_lock; #endif /*_OCFSDISK_H_ */ diff -urNp x-ref/fs/ocfs/Common/inc/ocfsdlm.h x/fs/ocfs/Common/inc/ocfsdlm.h --- x-ref/fs/ocfs/Common/inc/ocfsdlm.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfsdlm.h Mon Oct 21 04:41:18 2002 @@ -27,40 +27,36 @@ #ifndef _OCFSDLM_H_ #define _OCFSDLM_H_ -#define MAX_UDP_PACKETS (10) -#define OCFS_LOW_MARK_UDP (4) -#define OCFS_HIGH_MARK_UDP (7) - -typedef struct _ocfs_dlm_req_master -{ - ub8 lock_id; - ub4 flags; - ub8 lock_seq_num; -} -ocfs_dlm_req_master; +#define OCFS_MAX_DLM_PKT_SIZE 256 +#define OCFS_DLM_MAX_MSG_SIZE 256 + +#define OCFS_DLM_VOTE_OK 0 +#define OCFS_DLM_VOTE_OIN_ALREADY_INUSE 1 +#define OCFS_DLM_VOTE_UPDATE_RETRY 2 +#define OCFS_DLM_VOTE_FILE_DEL 3 + +#define OCFS_DLM_MSG_MAGIC 0x79677083 + +typedef struct _ocfs_dlm_msg_hdr +{ + ub8 lock_id; + ub4 flags; + ub8 lock_seq_num; +} ocfs_dlm_msg_hdr; + +typedef ocfs_dlm_msg_hdr ocfs_dlm_req_master; +typedef ocfs_dlm_msg_hdr ocfs_dlm_disk_vote_req; typedef struct _ocfs_dlm_reply_master { - ub8 lock_id; - ub4 flags; - ub8 lock_seq_num; + ocfs_dlm_msg_hdr h; ub4 status; } ocfs_dlm_reply_master; -typedef struct _ocfs_dlm_disk_vote_req -{ - ub8 lock_id; - ub4 flags; - ub8 lock_seq_num; -} -ocfs_dlm_disk_vote_req; - typedef struct _ocfs_dlm_disk_vote_reply { - ub8 lock_id; - ub4 flags; - ub8 lock_seq_num; + ocfs_dlm_msg_hdr h; ub4 status; } ocfs_dlm_disk_vote_reply; @@ -69,7 +65,7 @@ typedef struct _ocfs_dlm_msg { ub4 magic; ub4 msg_len; - ub1 vol_id[64]; + ub1 vol_id[MAX_VOL_ID_LENGTH]; ub4 src_node; ub4 dst_node; ub4 msg_type; @@ -78,32 +74,15 @@ typedef struct _ocfs_dlm_msg } ocfs_dlm_msg; -/* -** IPC related structs -*/ -typedef struct _ocfs_recv_context -{ - ocfs_dlm_msg *recv_packet[MAX_UDP_PACKETS]; - bool free[MAX_UDP_PACKETS]; - sb4 next_free; - sb4 num_used; - atomic_t num_posted; - ocfs_sem *free_lock; - wait_queue_head_t *event; -} -ocfs_recv_context; - -typedef struct _ocfs_recv_comp_context +typedef struct _ocfs_recv_ctxt { - ocfs_recv_context *recv_ctxt; - sb4 index; + sb4 msg_len; + ub1 msg[OCFS_MAX_DLM_PKT_SIZE]; int status; - ub4 recvd_len; - struct tq_struct *work_item; + struct tq_struct ipc_tq; } -ocfs_recv_comp_context; +ocfs_recv_ctxt; -#define OCFS_MAX_DLM_PKT_SIZE (256) enum { OCFS_REQUEST_MAKE_MASTER = 1, @@ -112,26 +91,4 @@ enum OCFS_DISK_VOTE_REPLY }; -typedef struct _ocfs_free_buf_ctxt -{ - bool *free; - sb4 *num_used; - ocfs_sem *free_lock; -} -ocfs_free_buf_ctxt; - -typedef struct _ocfs_dlm_comm_work_item -{ - struct tq_struct *work_item; - ocfs_dlm_msg *recv_packet; - ocfs_free_buf_ctxt free_buf_ctxt; -} -ocfs_dlm_comm_work_item; - -#define OCFS_DLM_VOTE_OK 0 -#define OCFS_DLM_VOTE_OIN_ALREADY_INUSE 1 -#define OCFS_DLM_VOTE_UPDATE_RETRY 2 -#define OCFS_DLM_VOTE_FILE_DEL 3 -#define OCFS_DLM_MSG_MAGIC (0x79677083) - #endif /* _OCFSDLM_H_ */ diff -urNp x-ref/fs/ocfs/Common/inc/ocfserr.h x/fs/ocfs/Common/inc/ocfserr.h --- x-ref/fs/ocfs/Common/inc/ocfserr.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfserr.h Mon Oct 21 04:41:18 2002 @@ -71,4 +71,14 @@ */ #define OCFS_ERROR_INTERNAL_ERROR ((ub4)0xE004A001L) +typedef struct _ocfs_errent +{ + int errno; + char *str; +} ocfs_errent; + + +extern ocfs_errent ocfs_error_strings[]; +extern int ocfs_num_err; + #endif /* _OCFSERR_H_ */ diff -urNp x-ref/fs/ocfs/Common/inc/ocfsgenalloc.h x/fs/ocfs/Common/inc/ocfsgenalloc.h --- x-ref/fs/ocfs/Common/inc/ocfsgenalloc.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfsgenalloc.h Mon Oct 21 04:41:18 2002 @@ -76,9 +76,8 @@ bool ocfs_get_next_extent_map_entry (ocf int ocfs_update_all_headers (ocfs_super * osb, ocfs_extent_group * AllocExtent, ub8 FileSize); -int ocfs_free_extents_for_truncate (ocfs_super * osb, - ocfs_file_entry * FileEntry, - ocfs_inode * oin, sb4 LogNodeNum, ub8 FileSize); + +int ocfs_free_extents_for_truncate (ocfs_super * osb, ocfs_file_entry * FileEntry); int ocfs_get_leaf_extent (ocfs_super * osb, ocfs_file_entry * FileEntry, diff -urNp x-ref/fs/ocfs/Common/inc/ocfsgencreate.h x/fs/ocfs/Common/inc/ocfsgencreate.h --- x-ref/fs/ocfs/Common/inc/ocfsgencreate.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfsgencreate.h Mon Oct 21 04:41:18 2002 @@ -83,9 +83,11 @@ int ocfs_create_delete_cdsl (struct inod int ocfs_find_create_cdsl (ocfs_super * osb, ocfs_file_entry * fe); +#ifdef UNUSED_CODE int ocfs_update_file_entry_slot (ocfs_super * osb, ocfs_inode * oin, ocfs_rw_mode rw_mode); void ocfs_check_lock_state (ocfs_super * osb, ocfs_inode * oin); +#endif int ocfs_delete_cdsl (ocfs_super * osb, ub8 parent_off, ocfs_file_entry * fe); @@ -93,6 +95,6 @@ int ocfs_create_cdsl (ocfs_super * osb, int ocfs_change_to_cdsl (ocfs_super * osb, ub8 parent_off, ocfs_file_entry * fe); -int ocfs_truncate_file (ocfs_super * osb, ocfs_inode * oin, ub8 file_size); +int ocfs_truncate_file (ocfs_super * osb, ub8 file_off, ub8 file_size); #endif /* _OCFSGENCREATE_H_ */ diff -urNp x-ref/fs/ocfs/Common/inc/ocfsgendlm.h x/fs/ocfs/Common/inc/ocfsgendlm.h --- x-ref/fs/ocfs/Common/inc/ocfsgendlm.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfsgendlm.h Mon Oct 21 04:41:18 2002 @@ -35,116 +35,98 @@ typedef struct _ocfs_offset_map } ocfs_offset_map; -int ocfs_insert_cache_link (ocfs_super * osb, ocfs_lock_res * LockResource); +int ocfs_insert_cache_link (ocfs_super * osb, ocfs_lock_res * lockres); -int ocfs_update_lock_state (ocfs_super * osb, ocfs_lock_res * LockResource, ub4 Flags); +int ocfs_update_lock_state (ocfs_super * osb, ocfs_lock_res * lockres, + ub4 flags); -int ocfs_disk_request_vote (ocfs_super * osb, - ub8 LockId, - ub4 LockType, ub4 Flags, ub8 VoteMap, ub8 * LockSeqNo); +int ocfs_disk_request_vote (ocfs_super * osb, ub8 lock_id, ub4 lock_type, + ub4 flags, ub8 vote_map, ub8 * lock_seq_num); -int ocfs_wait_for_disk_lock_release (ocfs_super * osb, - ub8 Offset, ub4 TimeToWait, ub4 LockType); +int ocfs_wait_for_disk_lock_release (ocfs_super * osb, ub8 offset, + ub4 time_to_wait, ub4 lock_type); -int ocfs_wait_for_lock_release (ocfs_super * osb, - ub8 Offset, - ub4 TimeToWait, ocfs_lock_res * LockResource, ub4 LockType); +int ocfs_wait_for_lock_release (ocfs_super * osb, ub8 offset, ub4 time_to_wait, + ocfs_lock_res * lockres, ub4 lock_type); -int ocfs_get_vote_on_disk (ocfs_super * osb, - ub8 LockId, - ub4 LockType, - ub4 Flags, - ub8 * GotVoteMap, - ub8 VoteMap, ub8 LockSeqNum, ub8 * oin_open_map); +int ocfs_get_vote_on_disk (ocfs_super * osb, ub8 lock_id, ub4 lock_type, + ub4 flags, ub8 * got_vote_map, ub8 vote_map, + ub8 lock_seq_num, ub8 * oin_open_map); -int ocfs_disk_reset_voting (ocfs_super * osb, ub8 LockId, ub4 LockType); +int ocfs_disk_reset_voting (ocfs_super * osb, ub8 lock_id, ub4 lock_type); -int ocfs_wait_for_vote (ocfs_super * osb, - ub8 LockId, - ub4 LockType, - ub4 Flags, - ub8 VoteMap, - ub4 TimeToWait, ub8 LockSeqNum, ocfs_lock_res * LockResource); +int ocfs_wait_for_vote (ocfs_super * osb, ub8 lock_id, ub4 lock_type, ub4 flags, + ub8 vote_map, ub4 time_to_wait, ub8 lock_seq_num, + ocfs_lock_res * lockres); -int ocfs_prime_voting (ocfs_super * osb, ub8 LockId, ub4 LockType, ub8 VoteMap); /* empty */ +int ocfs_prime_voting (ocfs_super * osb, ub8 lock_id, ub4 lock_type, ub8 vote_map); /* empty */ -int ocfs_reset_voting (ocfs_super * osb, ub8 LockId, ub4 LockType, ub8 VoteMap); +int ocfs_reset_voting (ocfs_super * osb, ub8 lock_id, ub4 lock_type, ub8 vote_map); -int ocfs_request_vote (ocfs_super * osb, - ub8 LockId, ub4 LockType, ub4 Flags, ub8 VoteMap, ub8 * LockSeqNo); +int ocfs_request_vote (ocfs_super * osb, ub8 lock_id, ub4 lock_type, ub4 flags, + ub8 vote_map, ub8 * lock_seq_num); -int ocfs_comm_request_vote (ocfs_super * osb, - ub8 LockId, - ub4 LockType, ub4 Flags, ocfs_file_entry * FileEntry); +int ocfs_comm_request_vote (ocfs_super * osb, ub8 lock_id, ub4 lock_type, + ub4 flags, ocfs_file_entry * fe); -void ocfs_init_dlm_msg (ocfs_super * osb, ocfs_dlm_msg * DlmMesg, ub4 MsgSize); +void ocfs_init_dlm_msg (ocfs_super * osb, ocfs_dlm_msg * dlm_msg, ub4 msg_len); -int ocfs_send_dlm_request_msg (ocfs_super * osb, - ub8 LockId, - ub4 LockType, - ub4 Flags, - ocfs_lock_res * LockResource, ub8 VoteMap, ub4 MesgType); +int ocfs_send_dlm_request_msg (ocfs_super * osb, ub8 lock_id, ub4 lock_type, + ub4 flags, ocfs_lock_res * lockres, + ub8 vote_map, ub4 msg_type); -int ocfs_comm_make_lock_master (ocfs_super * osb, - ub8 LockId, - ub4 LockType, - ub4 Flags, - ocfs_lock_res * LockResource, - ocfs_file_entry * FileEntry, ub8 VoteMap); +int ocfs_comm_make_lock_master (ocfs_super * osb, ub8 lock_id, ub4 lock_type, + ub4 flags, ocfs_lock_res * lockres, + ocfs_file_entry * fe, ub8 vote_map); -int ocfs_make_lock_master (ocfs_super * osb, - ub8 LockId, - ub4 LockType, - ub4 Flags, - ocfs_lock_res * LockResource, ocfs_file_entry * FileEntry); +int ocfs_make_lock_master (ocfs_super * osb, ub8 lock_id, ub4 lock_type, + ub4 flags, ocfs_lock_res * lockres, + ocfs_file_entry * fe); -void ocfs_acquire_lockres (ocfs_lock_res * LockResource); +void ocfs_acquire_lockres (ocfs_lock_res * lockres); -void ocfs_release_lockres (ocfs_lock_res * LockResource); +void ocfs_release_lockres (ocfs_lock_res * lockres); -int ocfs_update_disk_lock (ocfs_super * osb, - ocfs_lock_res * LockResource, - ub4 Flags, ocfs_file_entry * FileEntry); +int ocfs_update_disk_lock (ocfs_super * osb, ocfs_lock_res * lockres, + ub4 flags, ocfs_file_entry * fe); -int ocfs_update_master_on_open (ocfs_super * osb, ocfs_lock_res * lockResource); +int ocfs_update_master_on_open (ocfs_super * osb, ocfs_lock_res * lockres); -void ocfs_init_lockres (ocfs_super * osb, ocfs_lock_res * lockResource, ub8 LockId); +void ocfs_init_lockres (ocfs_super * osb, ocfs_lock_res * lockres, + ub8 lock_id); -int ocfs_create_update_lock (ocfs_super * osb, ocfs_inode * oin, ub8 LockId, ub4 Flags); +int ocfs_create_update_lock (ocfs_super * osb, ocfs_inode * oin, ub8 lock_id, + ub4 flags); -int ocfs_get_x_for_del (ocfs_super * osb, - ub8 LockId, - ub4 LockType, - ub4 Flags, - ocfs_lock_res * lockResource, ocfs_file_entry * FileEntry); +int ocfs_get_x_for_del (ocfs_super * osb, ub8 lock_id, ub4 lock_type, ub4 flags, + ocfs_lock_res * lockres, ocfs_file_entry * fe); -int ocfs_acquire_lock (ocfs_super * osb, - ub8 LockId, - ub4 LockType, - ub4 Flags, - ocfs_lock_res ** LockResource, ocfs_file_entry * LockSector); +int ocfs_try_exclusive_lock (ocfs_super *osb, ocfs_lock_res *lockres, ub4 flags, + ub4 updated, ocfs_file_entry *fe, ub8 lock_id, + ub4 lock_type); -int ocfs_disk_release_lock (ocfs_super * osb, - ub8 LockId, - ub4 LockType, ub4 Flags, ocfs_lock_res * LockResource); +int ocfs_acquire_lock (ocfs_super * osb, ub8 lock_id, ub4 lock_type, ub4 flags, + ocfs_lock_res ** lockres, ocfs_file_entry * lock_fe); -int ocfs_release_lock (ocfs_super * osb, - ub8 LockId, - ub4 LockType, ub4 Flags, ocfs_lock_res * LockResource); +int ocfs_disk_release_lock (ocfs_super * osb, ub8 lock_id, ub4 lock_type, + ub4 flags, ocfs_lock_res * lockres, ocfs_file_entry *fe); + +int ocfs_release_lock (ocfs_super * osb, ub8 lock_id, ub4 lock_type, ub4 flags, + ocfs_lock_res * lockres, ocfs_file_entry *fe); int ocfs_init_dlm (void); int ocfs_add_lock_to_recovery (void); /* unused */ -int ocfs_create_log_extent_map (ocfs_super * osb, /* unused */ - ocfs_io_runs ** TransRuns, - ub4 * PNumTransRuns, ub8 diskOffset, ub8 ByteCount); +int ocfs_create_log_extent_map (ocfs_super * osb, ocfs_io_runs ** TransRuns, + ub4 * PNumTransRuns, ub8 diskOffset, + ub8 ByteCount); /* unused */ -int ocfs_lookup_cache_link (ocfs_super * osb, - ub1 * Buffer, ub8 ActualDiskOffset, ub8 Length); +int ocfs_lookup_cache_link (ocfs_super * osb, ub1 * buf, ub8 actual_diski_off, + ub8 length); -int ocfs_process_log_file (ocfs_super * osb, bool Flag); +int ocfs_process_log_file (ocfs_super * osb, bool flag); -int ocfs_break_cache_lock (ocfs_super * osb, ocfs_lock_res * LockRes); +int ocfs_break_cache_lock (ocfs_super * osb, ocfs_lock_res * lockres); #endif /* _OCFSGENDLM_H_ */ diff -urNp x-ref/fs/ocfs/Common/inc/ocfsgenmisc.h x/fs/ocfs/Common/inc/ocfsgenmisc.h --- x-ref/fs/ocfs/Common/inc/ocfsgenmisc.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfsgenmisc.h Mon Oct 21 04:41:18 2002 @@ -27,8 +27,6 @@ #ifndef _OCFSGENMISC_H_ #define _OCFSGENMISC_H_ -void ocfs_delete_name (ocfs_inode * oin); - int ocfs_create_meta_log_files (ocfs_super * osb); int ocfs_create_new_oin (ocfs_inode ** Returnedoin, @@ -39,16 +37,6 @@ int ocfs_create_root_dir_node (ocfs_supe int ocfs_create_root_oin (ocfs_super * osb); -ocfs_file *ocfs_allocate_ofile (void); - -ocfs_inode *ocfs_allocate_oin (void); - -ocfs_file_entry *ocfs_allocate_file_entry (void); - -void ocfs_release_file_entry (ocfs_file_entry * FileEntry); - -void ocfs_release_ofile (ocfs_file * OFile); - void ocfs_delete_all_extent_maps (ocfs_inode * oin); void ocfs_release_oin (ocfs_inode * oin, bool FreeMemory); @@ -67,4 +55,103 @@ void ocfs_delete_osb (ocfs_super * osb); int ocfs_commit_cache (ocfs_super * osb, bool Flag); + + +/* sorry about all the macros, but file and line are important */ + +/* lockres macros */ +#ifdef OCFS_MEM_DBG +#define ocfs_allocate_lockres() (ocfs_lock_res *)ocfs_dbg_slab_alloc( \ + OcfsGlobalCtxt.lockres_cache, \ + __FILE__, __LINE__) +#define ocfs_free_lockres(res) ocfs_dbg_slab_free( \ + OcfsGlobalCtxt.lockres_cache, res) +#else /* !OCFS_MEM_DBG */ +#define ocfs_allocate_lockres() (ocfs_lock_res *)kmem_cache_alloc ( \ + OcfsGlobalCtxt.lockres_cache, GFP_NOFS) +#define ocfs_free_lockres(res) kmem_cache_free(OcfsGlobalCtxt.lockres_cache, res) +#endif + +/* ofile macros */ +#ifdef OCFS_MEM_DBG +#define ocfs_allocate_ofile() ((ocfs_file *)({ \ + ocfs_file *of = NULL; \ + of = ocfs_dbg_slab_alloc(OcfsGlobalCtxt.ofile_cache, __FILE__, __LINE__); \ + if (of != NULL) { \ + memset (of, 0, sizeof (ocfs_file)); \ + of->obj_id.type = OCFS_TYPE_OFILE; \ + of->obj_id.size = sizeof (ocfs_file); \ + } \ + of; })) + +#define ocfs_release_ofile(of) ({ \ + OCFS_ASSERT (of); \ + ocfs_safefree (of->curr_dir_buf); \ + ocfs_dbg_slab_free (OcfsGlobalCtxt.ofile_cache, of); }) +#else /* !OCFS_MEM_DBG */ +#define ocfs_allocate_ofile() ((ocfs_file *)({ \ + ocfs_file *of = NULL; \ + of = kmem_cache_alloc (OcfsGlobalCtxt.ofile_cache, GFP_NOFS); \ + if (of != NULL) { \ + memset (of, 0, sizeof (ocfs_file)); \ + of->obj_id.type = OCFS_TYPE_OFILE; \ + of->obj_id.size = sizeof (ocfs_file); \ + } \ + of; })) + +#define ocfs_release_ofile(of) ({ \ + OCFS_ASSERT (of); \ + ocfs_safefree (of->curr_dir_buf); \ + kmem_cache_free (OcfsGlobalCtxt.ofile_cache, of); }) +#endif + + +/* file entry macros */ +#ifdef OCFS_MEM_DBG +#define ocfs_allocate_file_entry() ((ocfs_file_entry *)({ \ + ocfs_file_entry *FileEntry = NULL; \ + FileEntry = ocfs_dbg_slab_alloc (OcfsGlobalCtxt.fe_cache, __FILE__, __LINE__); \ + if (FileEntry != NULL) \ + memset (FileEntry, 0, OCFS_SECTOR_SIZE); \ + FileEntry; })) + +#define ocfs_release_file_entry(fe) ({ \ + OCFS_ASSERT (fe); \ + ocfs_dbg_slab_free (OcfsGlobalCtxt.fe_cache, fe); }) +#else /* !OCFS_MEM_DBG */ +#define ocfs_allocate_file_entry() ((ocfs_file_entry *)({ \ + ocfs_file_entry *FileEntry = NULL; \ + FileEntry = kmem_cache_alloc (OcfsGlobalCtxt.fe_cache, GFP_NOFS); \ + if (FileEntry != NULL) \ + memset (FileEntry, 0, OCFS_SECTOR_SIZE); \ + FileEntry; })) + +#define ocfs_release_file_entry(fe) ({ \ + OCFS_ASSERT (fe); \ + kmem_cache_free (OcfsGlobalCtxt.fe_cache, fe); }) +#endif + +/* oin macros - currently the release is handled separately */ +#ifdef OCFS_MEM_DBG +#define ocfs_allocate_oin() ((ocfs_inode *)({ \ + ocfs_inode *oin = NULL; \ + oin = ocfs_dbg_slab_alloc (OcfsGlobalCtxt.oin_cache, __FILE__, __LINE__); \ + if (oin != NULL) { \ + memset (oin, 0, sizeof (ocfs_inode)); \ + oin->obj_id.type = OCFS_TYPE_OIN; \ + oin->obj_id.size = sizeof (ocfs_inode); \ + } \ + oin; })) +#else /* !OCFS_MEM_DBG */ +#define ocfs_allocate_oin() ((ocfs_inode *)({ \ + ocfs_inode *oin = NULL; \ + oin = kmem_cache_alloc (OcfsGlobalCtxt.oin_cache, GFP_NOFS); \ + if (oin != NULL) { \ + memset (oin, 0, sizeof (ocfs_inode)); \ + oin->obj_id.type = OCFS_TYPE_OIN; \ + oin->obj_id.size = sizeof (ocfs_inode); \ + } \ + oin; })) +#endif + #endif /* _OCFSGENMISC_H_ */ diff -urNp x-ref/fs/ocfs/Common/inc/ocfsgennm.h x/fs/ocfs/Common/inc/ocfsgennm.h --- x-ref/fs/ocfs/Common/inc/ocfsgennm.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfsgennm.h Mon Oct 21 04:41:18 2002 @@ -29,34 +29,24 @@ int ocfs_flush_data (ocfs_inode * oin); -void ocfs_update_publish_map (ocfs_super * osb, void *buffer, bool first_time); +int ocfs_disk_update_resource (ocfs_super * osb, ocfs_lock_res * lockres, + ocfs_file_entry * file_ent); -int ocfs_nm_heart_beat (ocfs_super * osb, ub4 ChannelFlag, ub1 Operation); +int ocfs_find_update_res (ocfs_super * osb, ub8 lock_id, + ocfs_lock_res ** lockres, ocfs_file_entry * fe, + ub4 * updated); -int ocfs_polling_thread (void *unused); +int ocfs_vote_for_del_ren (ocfs_super * osb, ocfs_publish * publish, + ub4 node_num, ocfs_vote * vote, + ocfs_lock_res ** lockres); -int ocfs_nm_join_cluster (ocfs_super * osb); /* unused */ +struct inode * ocfs_get_inode_from_offset(ocfs_super * osb, ub8 fileoff); -int ocfs_disk_update_resource (ocfs_super * osb, - ocfs_lock_res * LockResource, ocfs_file_entry * FileEntry); +int ocfs_process_update_inode_request (ocfs_super * osb, ocfs_vote * vote, + ocfs_publish * publish, + ocfs_lock_res * lockres, ub4 node_num); -int ocfs_find_update_res (ocfs_super * osb, - ub8 LockId, - ocfs_lock_res ** LockResource, - ocfs_file_entry * FileEntry, ub4 * Updated); - -int ocfs_vote_for_del_ren (ocfs_super * osb, - ocfs_publish * PublishToVote, - ub4 NodeAskingVote, - ocfs_vote * VoteSector, ocfs_lock_res ** lockResource); - -int ocfs_process_update_inode_request (ocfs_super * osb, - ocfs_vote * VoteSector, - ocfs_publish * PublishToVote, - ocfs_lock_res * lockResource, ub4 NodeAskingVote); - -int ocfs_process_vote (ocfs_super * osb, - ocfs_publish * PublishToVote, ub4 NodeAskingVote); +int ocfs_process_vote (ocfs_super * osb, ocfs_publish * publish, ub4 node_num); int ocfs_nm_thread (ocfs_super * mount_osb); diff -urNp x-ref/fs/ocfs/Common/inc/ocfsgensysfile.h x/fs/ocfs/Common/inc/ocfsgensysfile.h --- x-ref/fs/ocfs/Common/inc/ocfsgensysfile.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfsgensysfile.h Mon Oct 21 04:41:18 2002 @@ -39,7 +39,7 @@ ub8 ocfs_file_to_disk_off (ocfs_super * int ocfs_get_system_file_size (ocfs_super * osb, ub4 FileId, ub8 * Length, ub8 * AllocSize); -int ocfs_extend_system_file (ocfs_super * osb, ub4 FileId, ub8 FileSize); +int ocfs_extend_system_file (ocfs_super * osb, ub4 FileId, ub8 FileSize, ocfs_file_entry *fe); int ocfs_find_extents_of_system_file (ocfs_super * osb, ub8 file_off, diff -urNp x-ref/fs/ocfs/Common/inc/ocfsgenutil.h x/fs/ocfs/Common/inc/ocfsgenutil.h --- x-ref/fs/ocfs/Common/inc/ocfsgenutil.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfsgenutil.h Mon Oct 21 04:41:18 2002 @@ -27,8 +27,7 @@ #ifndef _OCFSGENUTIL_H_ #define _OCFSGENUTIL_H_ -void ocfs_debug_print (ub4 Context, ub4 Level, char *FormatStr, ...); - int ocfs_compare_qstr (struct qstr * s1, struct qstr * s2); +char *ocfs_strerror(int errno); #endif /* _OCFSGENUTIL_H_ */ diff -urNp x-ref/fs/ocfs/Common/inc/ocfsgenvolcfg.h x/fs/ocfs/Common/inc/ocfsgenvolcfg.h --- x-ref/fs/ocfs/Common/inc/ocfsgenvolcfg.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfsgenvolcfg.h Mon Oct 21 04:41:18 2002 @@ -29,29 +29,45 @@ typedef struct _ocfs_cfg_task { - struct tq_struct task; + struct tq_struct cfg_tq; ocfs_super *osb; ub8 lock_off; ub1 *buffer; } ocfs_cfg_task; +typedef enum _ocfs_volcfg_op +{ + OCFS_VOLCFG_ADD, + OCFS_VOLCFG_UPD +} +ocfs_volcfg_op; + void ocfs_worker (void *Arg); void ocfs_assert_lock_owned (ub4 Arg); -int ocfs_add_to_disk_config (ocfs_super * osb, ocfs_disk_node_config_info * NodeCfgInfo); +int ocfs_add_to_disk_config (ocfs_super * osb, ub4 pref_node_num, + ocfs_disk_node_config_info * NodeCfgInfo); + +int ocfs_write_volcfg_header (ocfs_super * osb, ocfs_volcfg_op op); + +void ocfs_volcfg_gblctxt_to_disknode(ocfs_disk_node_config_info *disk); + +void ocfs_volcfg_gblctxt_to_node(ocfs_node_config_info *node); -int ocfs_config_with_disk_lock (ocfs_super * osb, ub8 LockOffset, ub1 * Buffer); +int ocfs_config_with_disk_lock (ocfs_super * osb, ub8 LockOffset, ub1 * Buffer, + ub4 node_num, ocfs_volcfg_op op); int ocfs_release_disk_lock (ocfs_super * osb, ub8 LockOffset); void ocfs_cfg_worker (ocfs_super * osb); -int ocfs_add_upd_ipc_cfg (ocfs_node_config_info ** CfgInfo, +int ocfs_disknode_to_node (ocfs_node_config_info ** CfgInfo, ocfs_disk_node_config_info * NodeCfgInfo); -int ocfs_update_node_config (ocfs_super * osb); +int ocfs_update_disk_config (ocfs_super * osb, ub4 node_num, + ocfs_disk_node_config_info * disk); int ocfs_chk_update_config (ocfs_super * osb); @@ -59,7 +75,7 @@ int ocfs_add_node_to_config (ocfs_super int ocfs_get_config (ocfs_super * osb); -bool ocfs_is_node_config_ok (ocfs_super * osb); +bool ocfs_has_node_config_changed (ocfs_super * osb); int ocfs_refresh_node_config (ocfs_super * osb); diff -urNp x-ref/fs/ocfs/Common/inc/ocfsgenvote.h x/fs/ocfs/Common/inc/ocfsgenvote.h --- x-ref/fs/ocfs/Common/inc/ocfsgenvote.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfsgenvote.h Mon Oct 21 04:41:18 2002 @@ -27,27 +27,27 @@ #ifndef _OCFSGENVOTE_H_ #define _OCFSGENVOTE_H_ -int ocfs_send_vote_reply (ocfs_super * osb, - ocfs_dlm_msg * DlmMesg, ub4 status, bool HandleOpen); +int ocfs_send_vote_reply (ocfs_super * osb, ocfs_dlm_msg * dlm_msg, + ub4 vote_status, bool inode_open); -int ocfs_comm_vote_for_del_ren (ocfs_super * osb, - ocfs_lock_res ** LockResource, ocfs_dlm_msg * DlmMesg); +int ocfs_comm_vote_for_del_ren (ocfs_super * osb, ocfs_lock_res ** lockres, + ocfs_dlm_msg * dlm_msg); -int ocfs_find_lockres (ocfs_super * osb, ub8 LockId, ocfs_lock_res ** LockResource); +int ocfs_find_lockres (ocfs_super * osb, ub8 lock_id, ocfs_lock_res ** lockres); -bool ocfs_check_ipc_msg (ub1 * Mesg, ub4 Length); +bool ocfs_check_ipc_msg (ub1 * msg, ub4 msg_len); -void ocfs_find_osb (sb1 * VolumeID, ocfs_super ** osb); +void ocfs_find_osb (sb1 * volume_id, ocfs_super ** osb); -int ocfs_find_create_lockres (ocfs_super * osb, - ub8 LockId, ocfs_lock_res ** LockResource); +int ocfs_find_create_lockres (ocfs_super * osb, ub8 lock_id, + ocfs_lock_res ** lockres); -int ocfs_comm_process_vote (ocfs_super * osb, ocfs_dlm_msg * DlmMesg); +int ocfs_comm_process_vote (ocfs_super * osb, ocfs_dlm_msg * dlm_msg); -int ocfs_comm_process_vote_reply (ocfs_super * osb, ocfs_dlm_msg * DlmMesg); +int ocfs_comm_process_vote_reply (ocfs_super * osb, ocfs_dlm_msg * dlm_msg); -void ocfs_dlm_recv_msg (void *Arg); +void ocfs_dlm_recv_msg (void *val); -int ocfs_comm_process_msg (ub1 * DlmMesg); +int ocfs_comm_process_msg (ub1 * msg); #endif /* _OCFSGENVOTE_H_ */ diff -urNp x-ref/fs/ocfs/Common/inc/ocfsheartbeat.h x/fs/ocfs/Common/inc/ocfsheartbeat.h --- x-ref/fs/ocfs/Common/inc/ocfsheartbeat.h Thu Jan 1 01:00:00 1970 +++ x/fs/ocfs/Common/inc/ocfsheartbeat.h Mon Oct 21 04:41:18 2002 @@ -0,0 +1,34 @@ +/* + * ocfsheartbeat.h + * + * Function prototypes for related 'C' file. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#ifndef _OCFSHEARTBEAT_H_ +#define _OCFSHEARTBEAT_H_ + +void ocfs_update_publish_map (ocfs_super * osb, void *buffer, bool first_time); + +int ocfs_nm_heart_beat (ocfs_super * osb, ub4 flag, bool read_publish); + +#endif /* _OCFSHEARTBEAT_H_ */ diff -urNp x-ref/fs/ocfs/Common/inc/ocfstrace.h x/fs/ocfs/Common/inc/ocfstrace.h --- x-ref/fs/ocfs/Common/inc/ocfstrace.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfstrace.h Mon Oct 21 04:41:19 2002 @@ -27,20 +27,21 @@ #ifndef _OCFSTRACE_H_ #define _OCFSTRACE_H_ -extern ub4 OcfsDebugCtxt; -extern ub4 OcfsDebugLevel; +extern ub4 debug_context; +extern ub4 debug_level; #define HI(val) ((ub4)((val) >> 32)) #define LO(val) ((ub4)((val) & 0x00000000FFFFFFFFUL)) /* Tracing Levels */ #define OCFS_DEBUG_LEVEL_ERROR 0x00000001 -#define OCFS_DEBUG_LEVEL_WARNING 0x00000002 -#define OCFS_DEBUG_LEVEL_TRACE 0x00000004 -#define OCFS_DEBUG_LEVEL_ENTRY 0x00000008 -#define OCFS_DEBUG_LEVEL_EXIT 0x00000010 -#define OCFS_DEBUG_LEVEL_VERBOSE 0x00000020 -#define OCFS_DEBUG_LEVEL_ALL 0xffffffff +#define OCFS_DEBUG_LEVEL_TRACE 0x00000002 + +#define OCFS_DEBUG_LEVEL_ENTRY 0x00000010 +#define OCFS_DEBUG_LEVEL_EXIT 0x00000020 + +#define OCFS_DEBUG_LEVEL_TIMING 0x00000100 +#define OCFS_DEBUG_LEVEL_STACK 0x00000200 /* Tracing Contexts */ #define OCFS_DEBUG_CONTEXT_INIT 0x00000001 /* ocfsgeninit.c,ocfsmain.c */ @@ -52,6 +53,7 @@ extern ub4 OcfsDebugLevel; #define OCFS_DEBUG_CONTEXT_IPC 0x00000080 /* ocfsipc.c */ #define OCFS_DEBUG_CONTEXT_VOLCFG 0x00000100 /* ocfsgenvolcfg.c */ +#define OCFS_DEBUG_CONTEXT_HEARTBEAT 0x00000200 /* ocfsgenheartbeat.c */ #define OCFS_DEBUG_CONTEXT_MOUNT 0x00001000 /* ocfsmount.c */ #define OCFS_DEBUG_CONTEXT_SHUTDOWN 0x00002000 /* ocfsgenshutdn.c */ @@ -74,22 +76,46 @@ extern ub4 OcfsDebugLevel; #define OCFS_DEBUG_CONTEXT_PROC 0x20000000 /* ocfsproc.c */ #define OCFS_DEBUG_CONTEXT_IOSUP 0x40000000 /* ocfsiosup.c */ -#define OCFS_DEBUG_CONTEXT_ALL 0xffffffff -/* OcfsDebugDump */ -# define OcfsDebugDump(Context, Level, fmt, arg...) \ - do { \ - if ((OcfsDebugCtxt & Context) && (OcfsDebugLevel & Level)) \ - printk(fmt, ## arg); \ - } while(0) +#define GET_STACK(s) \ + IF_LEVEL(OCFS_DEBUG_LEVEL_STACK) { \ + sb4 esp; \ + __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : \ + "0" (8191)); \ + esp -= sizeof(struct task_struct); \ + sprintf((s), "[%ld] ", esp); \ + } + +/* privately used macros */ +# define IF_LEVEL(level) \ + if ((debug_context & OCFS_DEBUG_CONTEXT) && (debug_level & level)) + + +# define GET_TIMING(s, hi, lo) \ + do { \ + IF_LEVEL(OCFS_DEBUG_LEVEL_TIMING) { \ + ub4 _lo, _hi; \ + rdtsc (_lo, _hi); \ + if ((s) == NULL) { \ + (hi) = _hi; (lo) = _lo; \ + } else { \ + ub8 _b, _e; \ + _b = hi; _b <<= 32; _b |= lo; \ + _e = _hi; _e <<= 32; _e |= _lo; \ + _e -= _b; \ + sprintf((s), " => [%u.%u]", \ + HI(_e), LO(_e)); \ + } \ + } \ + } while (0) /* IF macro */ -#define IF_TRACE(func) \ - do { \ - if ((OcfsDebugCtxt & OCFS_DEBUG_CONTEXT) && \ - (OcfsDebugLevel & OCFS_DEBUG_LEVEL_TRACE)) \ - func; \ - } while(0) +#define IF_TRACE(func) \ + do { \ + if ((debug_context & OCFS_DEBUG_CONTEXT) && \ + (debug_level & OCFS_DEBUG_LEVEL_TRACE)) \ + func; \ + } while (0) /* TRACE disabled. ERROR macros are never disabled. */ #if !defined(TRACE) @@ -107,146 +133,180 @@ extern ub4 OcfsDebugLevel; /* TRACE enabled */ #if defined(TRACE) + /* ENTRY macros */ /* LOG_ENTRY() * */ -# define LOG_ENTRY() \ - OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_ENTRY, \ - "(%d) [%u.%06u] ENTRY: %s()\n", ocfs_getpid(), \ - xtime.tv_sec, xtime.tv_usec, __FUNCTION__) +# define LOG_ENTRY() \ + ub4 _HI = 0, _LO = 0; \ + do { \ + ub1 _t[16]; \ + *_t = '\0'; \ + GET_STACK(_t); \ + GET_TIMING(NULL, _HI, _LO); \ + IF_LEVEL(OCFS_DEBUG_LEVEL_ENTRY) { \ + printk("(%d) %sENTRY: %s() \n", \ + ocfs_getpid (), _t, __FUNCTION__); \ + } \ + } while (0) /* LOG_ENTRY_ARGS() * - * Note: The macro expects the args to be enclosed in parenthesis and - * terminated by a newline. + * Note: The macro expects the args to be terminated by a newline. */ -# define LOG_ENTRY_ARGS(fmt, arg...) \ - do { \ - OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_ENTRY, \ - "(%d) [%u.%06u] ENTRY: %s", ocfs_getpid(), \ - xtime.tv_sec, xtime.tv_usec, __FUNCTION__); \ - OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_ENTRY, \ - fmt, ##arg); \ - } while(0) +# define LOG_ENTRY_ARGS(fmt, arg...) \ + ub4 _HI = 0, _LO = 0; \ + do { \ + ub1 _t[16]; \ + *_t = '\0'; \ + GET_STACK(_t); \ + GET_TIMING(NULL, _HI, _LO); \ + IF_LEVEL(OCFS_DEBUG_LEVEL_ENTRY) { \ + printk("(%d) %sENTRY: %s", \ + ocfs_getpid (), _t, __FUNCTION__); \ + printk(fmt, ##arg); \ + } \ + } while (0) /* EXIT macros */ /* LOG_EXIT() * */ -# define LOG_EXIT() \ - OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_EXIT, \ - "(%d) [%u.%06u] EXIT : %s()\n", ocfs_getpid(), \ - xtime.tv_sec, xtime.tv_usec, __FUNCTION__) +# define LOG_EXIT() \ + do { \ + IF_LEVEL(OCFS_DEBUG_LEVEL_EXIT) { \ + ub1 _t[32]; \ + *_t = '\0'; \ + GET_TIMING(_t, _HI, _LO); \ + printk("(%d) EXIT : %s() %s\n", ocfs_getpid (), \ + __FUNCTION__, _t); \ + } \ + } while (0) /* LOG_EXIT_STATUS() * */ -# define LOG_EXIT_STATUS(val) \ - OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_EXIT, \ - "(%d) [%u.%06u] EXIT : %s() = 0x%08x\n", \ - ocfs_getpid(), xtime.tv_sec, xtime.tv_usec, \ - __FUNCTION__, val) +# define LOG_EXIT_STATUS(val) \ + do { \ + IF_LEVEL(OCFS_DEBUG_LEVEL_EXIT) { \ + ub1 _t[32]; \ + *_t = '\0'; \ + GET_TIMING(_t, _HI, _LO); \ + printk("(%d) EXIT : %s() = %d %s\n", \ + ocfs_getpid (), __FUNCTION__, val, _t); \ + } \ + } while (0) /* LOG_EXIT_LONG() * */ -# define LOG_EXIT_LONG(val) \ - OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_EXIT, \ - "(%d) [%u.%06u] EXIT : %s() = %d\n", \ - ocfs_getpid(), xtime.tv_sec, xtime.tv_usec, \ - __FUNCTION__, val) +# define LOG_EXIT_LONG(val) LOG_EXIT_STATUS(val) /* LOG_EXIT_ULONG() * */ -# define LOG_EXIT_ULONG(val) \ - OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_EXIT, \ - "(%d) [%u.%06u] EXIT : %s() = %u\n", \ - ocfs_getpid(), xtime.tv_sec, xtime.tv_usec, \ - __FUNCTION__, val) +# define LOG_EXIT_ULONG(val) \ + do { \ + IF_LEVEL(OCFS_DEBUG_LEVEL_EXIT) { \ + ub1 _t[50]; \ + *_t = '\0'; \ + GET_TIMING(_t, _HI, _LO); \ + printk("(%d) EXIT : %s() = %u %s\n", \ + ocfs_getpid (), __FUNCTION__, val, _t); \ + } \ + } while (0) /* LOG_EXIT_PTR() * */ -# define LOG_EXIT_PTR(val) \ - OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_EXIT, \ - "(%d) [%u.%06u] EXIT : %s() = 0x%08x\n", \ - ocfs_getpid(), xtime.tv_sec, xtime.tv_usec, \ - __FUNCTION__, val) +# define LOG_EXIT_PTR(val) \ + do { \ + IF_LEVEL(OCFS_DEBUG_LEVEL_EXIT) { \ + ub1 _t[50]; \ + *_t = '\0'; \ + GET_TIMING(_t, _HI, _LO); \ + printk("(%d) EXIT : %s() = 0x%08x %s\n", \ + ocfs_getpid (), __FUNCTION__, val, _t); \ + } \ + } while (0) /* LOG_EXIT_ARGS() * - * Note: The macro expects the args to be enclosed in parenthesis and - * terminated by a newline. + * Note: The macro expects the args to be terminated by a newline. */ -# define LOG_EXIT_ARGS(fmt, arg...) \ - do { \ - OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_EXIT, \ - "(%d) [%u.%06u] EXIT : %s(), ", \ - ocfs_getpid(), xtime.tv_sec, xtime.tv_usec, \ - __FUNCTION__); \ - OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_EXIT, \ - fmt, ## arg); \ - } while(0) -#endif /* TRACE */ +# define LOG_EXIT_ARGS(fmt, arg...) \ + do { \ + IF_LEVEL(OCFS_DEBUG_LEVEL_EXIT) { \ + ub1 _t[50]; \ + *_t = '\0'; \ + GET_TIMING(_t, _HI, _LO); \ + printk("(%d) EXIT : %s() = ", \ + ocfs_getpid (), __FUNCTION__); \ + printk(fmt, ## arg); \ + printk("%s\n", _t); \ + } \ + } while (0) -/* ERROR macros are not compiled out */ +/* TRACE macros */ -/* LOG_ERROR_STR() +/* LOG_TRACE_STR() * */ -#define LOG_ERROR_STR(str) \ - printk("(%d) ERROR: %s, %s(), %s, %d\n", ocfs_getpid(), \ - str, __FUNCTION__, __FILE__, __LINE__) +# define LOG_TRACE_STR(str) \ + do { \ + IF_LEVEL(OCFS_DEBUG_LEVEL_TRACE) \ + printk("(%d) TRACE: %s() %s\n", ocfs_getpid (), \ + __FUNCTION__, str); \ + } while (0) -/* LOG_ERROR_ARGS() - * - * Note: The macro expects the args to be enclosed in parenthesis and - * terminated by a newline. - */ -#define LOG_ERROR_ARGS(fmt, arg...) \ - do { \ - printk("(%d) ERROR: %s(), %s, %d, ", ocfs_getpid(), \ - __FUNCTION__, __FILE__, __LINE__); \ - printk(fmt, ## arg); \ - } while(0) -/* LOG_ERROR_STATUS() +/* LOG_TRACE_ARGS() * + * Note: The macro expects the args to be terminated by a newline. */ -#define LOG_ERROR_STATUS(status) \ - printk("(%d) ERROR: status = 0x%08x, %s(), %s, %d\n", \ - ocfs_getpid(), status, __FUNCTION__, __FILE__, __LINE__) +# define LOG_TRACE_ARGS(fmt, arg...) \ + do { \ + IF_LEVEL(OCFS_DEBUG_LEVEL_TRACE) { \ + printk("(%d) TRACE: %s() ", ocfs_getpid (), \ + __FUNCTION__); \ + printk(fmt, ## arg); \ + } \ + } while (0) +#endif /* TRACE */ -/* TRACE enabled */ -#if defined(TRACE) -/* TRACE macros */ +/* ERROR macros are not compiled out */ -/* LOG_TRACE_STR() +/* LOG_ERROR_STR() * */ -# define LOG_TRACE_STR(str) \ - OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_TRACE, \ - "(%d) TRACE: %s() %s\n", ocfs_getpid(), \ - __FUNCTION__, str) +#define LOG_ERROR_STR(str) \ + do { \ + printk(KERN_ERR "(%d) ERROR: %s, %s, %d\n", ocfs_getpid (), \ + str, __FILE__, __LINE__); \ + } while (0) +/* LOG_ERROR_ARGS() + * + * Note: The macro expects the args to be terminated by a newline. + */ +#define LOG_ERROR_ARGS(fmt, arg...) \ + do { \ + printk(KERN_ERR "(%d) ERROR: %s, %d, ", ocfs_getpid (), \ + __FILE__, __LINE__); \ + printk(fmt, ## arg); \ + } while (0) -/* LOG_TRACE_ARGS() +/* LOG_ERROR_STATUS() * - * Note: The macro expects the args to be enclosed in parenthesis and - * terminated by a newline. */ -# define LOG_TRACE_ARGS(fmt, arg...) \ - do { \ - OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_TRACE, \ - "(%d) TRACE: %s() ", ocfs_getpid(), \ - __FUNCTION__); \ - OcfsDebugDump(OCFS_DEBUG_CONTEXT, OCFS_DEBUG_LEVEL_TRACE, \ - fmt, ## arg); \ - } while(0) -#endif /* TRACE */ +#define LOG_ERROR_STATUS(status) \ + do { \ + printk(KERN_ERR "(%d) ERROR: status = %d (%s), %s, %d\n", \ + ocfs_getpid (), status, ocfs_strerror(status), \ + __FILE__, __LINE__); \ + } while (0) #endif /* _OCFSTRACE_H_ */ diff -urNp x-ref/fs/ocfs/Common/inc/ocfsvol.h x/fs/ocfs/Common/inc/ocfsvol.h --- x-ref/fs/ocfs/Common/inc/ocfsvol.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/inc/ocfsvol.h Mon Oct 21 04:41:19 2002 @@ -33,13 +33,15 @@ #define OCFS_MAJOR_VER_STRING "1" #define OCFS_VOLUME_SIGNATURE "OracleCFS" +#define MAX_VOL_SIGNATURE_LEN 128 +#define MAX_MOUNT_POINT_LEN 128 typedef struct _ocfs_vol_disk_hdr { ub4 minor_version; ub4 major_version; - ub1 signature[128]; /* OracleCFS */ - ub1 mount_point[128]; /* Mount point.... for e.g., /mnt/vol1 */ + ub1 signature[MAX_VOL_SIGNATURE_LEN]; /* OracleCFS */ + ub1 mount_point[MAX_MOUNT_POINT_LEN]; /* for e.g., /mnt/vol1 */ ub8 serial_num; ub8 device_size; /* Size of the device in bytes */ ub8 start_off; /* Start of the volume... typically 0 */ @@ -78,14 +80,14 @@ ocfs_vol_disk_hdr; typedef struct _ocfs_disk_lock { - sb4 curr_master; /* INVALID_MASTER -1, */ + ub4 curr_master; ub1 file_lock; ub8 last_write_time; ub8 last_read_time; - sb4 writer_node_num; - sb4 reader_node_num; + ub4 writer_node_num; + ub4 reader_node_num; ub8 oin_node_map; - ub8 seq_num; + ub8 dlock_seq_num; } ocfs_disk_lock; @@ -95,19 +97,21 @@ ocfs_disk_lock; #define DISK_LOCK_LAST_READ(x) ( ((ocfs_disk_lock *)x)->last_read_time ) #define DISK_LOCK_LAST_WRITE(x) ( ((ocfs_disk_lock *)x)->last_write_time ) #define DISK_LOCK_READER_NODE(x) ( ((ocfs_disk_lock *)x)->reader_node_num ) -#define DISK_LOCK_SEQNUM(x) ( ((ocfs_disk_lock *)x)->seq_num ) +#define DISK_LOCK_SEQNUM(x) ( ((ocfs_disk_lock *)x)->dlock_seq_num ) #define DISK_LOCK_WRITER_NODE(x) ( ((ocfs_disk_lock *)x)->writer_node_num ) -#define MAX_VOL_ID_LENGTH 64 +#define MAX_VOL_ID_LENGTH 16 +#define MAX_VOL_LABEL_LEN 64 +#define MAX_CLUSTER_NAME_LEN 64 typedef struct _ocfs_vol_label { ocfs_disk_lock disk_lock; - ub1 label[64]; + ub1 label[MAX_VOL_LABEL_LEN]; ub2 label_len; - ub1 id[MAX_VOL_ID_LENGTH]; - ub2 id_len; - unsigned char cluster_name[64]; + ub1 vol_id[MAX_VOL_ID_LENGTH]; + ub2 vol_id_len; + ub1 cluster_name[MAX_CLUSTER_NAME_LEN]; ub2 cluster_name_len; } ocfs_vol_label; @@ -118,45 +122,52 @@ ocfs_vol_label; #define OCFS_IPC_STATE_INACTIVE (0x4) #define OCFS_IPC_STATE_PRIMARY (0x8) -#define OCFS_IPC_DEFAULT_PORT 7001 +#define OCFS_IPC_DEFAULT_PORT 7000 typedef struct _ocfs_ipc_config_info { - ub4 addr; - ub4 port; - ub4 mask; - ub1 state; ub1 type; - ub1 active; - ub1 pad[1]; + char ip_addr[MAX_IP_ADDR_LEN + 1]; + ub4 ip_port; + char ip_mask[MAX_IP_ADDR_LEN + 1]; } ocfs_ipc_config_info; #define OCFS_IPC_DLM_VERSION 0x0201 -#define OCFS_DLM_MAX_MSG_SIZE (256) #define OCFS_NODE_NOT_CONFIG 0 #define OCFS_NODE_STATE_CONFIG 1 -#define OCFS_MAX_IPC 3 +#define GUID_LEN 32 +#define HOSTID_LEN 20 +#define MACID_LEN 12 +/* TODO this structure will break in 64-bit.... need to pack */ +typedef union _ocfs_guid +{ + struct + { + char host_id[HOSTID_LEN]; + char mac_id[MACID_LEN]; + } id; + char guid[GUID_LEN]; +} +ocfs_guid; + #define MAX_NODE_NAME_LENGTH 32 typedef struct _ocfs_disk_node_config_info { ocfs_disk_lock disk_lock; char node_name[MAX_NODE_NAME_LENGTH + 1]; - ocfs_ipc_config_info ipc_config[OCFS_MAX_IPC]; - ub1 num_interfaces; - ub1 primary_comm; - ub1 state; - ub1 pad[6]; + ocfs_guid guid; + ocfs_ipc_config_info ipc_config; } ocfs_disk_node_config_info; -#define NODE_CONFIG_HDR_SIGN "NDCFG10" +#define NODE_CONFIG_HDR_SIGN "NODECFG" #define NODE_CONFIG_SIGN_LEN 8 -#define NODE_CONFIG_VER 1 -#define NODE_MIN_SUPPORTED_VER 1 +#define NODE_CONFIG_VER 2 +#define NODE_MIN_SUPPORTED_VER 2 typedef struct _ocfs_node_config_hdr { @@ -165,7 +176,7 @@ typedef struct _ocfs_node_config_hdr ub4 version; ub4 num_nodes; ub4 last_node; - ub8 seq_num; + ub8 cfg_seq_num; } ocfs_node_config_hdr; diff -urNp x-ref/fs/ocfs/Common/ocfsgenalloc.c x/fs/ocfs/Common/ocfsgenalloc.c --- x-ref/fs/ocfs/Common/ocfsgenalloc.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/ocfsgenalloc.c Mon Oct 21 04:41:19 2002 @@ -89,7 +89,7 @@ int ocfs_read_file_entry (ocfs_super * o { int status = 0; - LOG_ENTRY_ARGS ("osb=%p, fileentry=%p, offset=%u.%u\n", osb, FileEntry, + LOG_ENTRY_ARGS ("(osb=%p, fileentry=%p, offset=%u.%u)\n", osb, FileEntry, HI (DiskOffset), LO (DiskOffset)); OCFS_ASSERT (FileEntry); @@ -219,7 +219,7 @@ int ocfs_allocate_new_data_node (ocfs_su k = ExtentHeader->next_free_ext; ExtentHeader->extents[k].file_off = FileEntry->alloc_size; - ExtentHeader->extents[k].num_clusters = actualLength; + ExtentHeader->extents[k].num_bytes = actualLength; ExtentHeader->extents[k].disk_off = physicalOffset; ExtentHeader->next_free_ext++; depth = ExtentHeader->granularity; @@ -245,7 +245,7 @@ int ocfs_allocate_new_data_node (ocfs_su k = FileEntry->next_free_ext; FileEntry->extents[k].file_off = FileEntry->alloc_size; - FileEntry->extents[k].num_clusters = actualLength; + FileEntry->extents[k].num_bytes = actualLength; FileEntry->extents[k].disk_off = physicalOffset; FileEntry->next_free_ext++; depth = FileEntry->granularity; @@ -283,7 +283,7 @@ int ocfs_allocate_new_data_node (ocfs_su IterExtentHeader->extents[0].disk_off = (ub8) (physicalOffset + (OCFS_SECTOR_SIZE * (i + 1))); IterExtentHeader->extents[0].file_off = FileEntry->alloc_size; - IterExtentHeader->extents[0].num_clusters = actualLength; + IterExtentHeader->extents[0].num_bytes = actualLength; IterExtentHeader->next_free_ext = 1; IterExtentHeader->alloc_file_off = fileOffset + (OCFS_SECTOR_SIZE * i); @@ -312,7 +312,7 @@ int ocfs_allocate_new_data_node (ocfs_su strcpy (IterExtent->signature, OCFS_EXTENT_DATA_SIGNATURE); IterExtent->extents[0].file_off = FileEntry->alloc_size; - IterExtent->extents[0].num_clusters = actualLength; + IterExtent->extents[0].num_bytes = actualLength; IterExtent->extents[0].disk_off = actualDiskOffset; IterExtent->curr_sect = 1; IterExtent->max_sects = NUM_SECTORS_IN_LEAF_NODE; @@ -356,7 +356,7 @@ int ocfs_allocate_new_data_node (ocfs_su k = ExtentHeader->next_free_ext - 1; - ExtentHeader->extents[k].num_clusters += actualLength; + ExtentHeader->extents[k].num_bytes += actualLength; status = ocfs_write_sector (osb, ExtentHeader, ExtentHeader->this_ext); @@ -365,7 +365,7 @@ int ocfs_allocate_new_data_node (ocfs_su } } k = FileEntry->next_free_ext - 1; - FileEntry->extents[k].num_clusters += actualLength; + FileEntry->extents[k].num_bytes += actualLength; } finally: ocfs_safefree (tempBuf); @@ -435,7 +435,7 @@ int ocfs_add_to_last_data_node (ocfs_sup if (ocfs_check_for_extent_merge (&(OcfsExtent->extents[k]), actualDiskOffset)) { /* We can join the extents, just increase the len of extent */ - OcfsExtent->extents[k].num_clusters += actualLength; + OcfsExtent->extents[k].num_bytes += actualLength; status = 0; goto bail; } @@ -569,7 +569,7 @@ int ocfs_add_to_last_data_node (ocfs_sup /* FileOffset for the new Extent will be equal to the previous */ /* allocation size of file */ OcfsExtent->extents[k].file_off = FileEntry->alloc_size; - OcfsExtent->extents[k].num_clusters = actualLength; + OcfsExtent->extents[k].num_bytes = actualLength; OcfsExtent->extents[k].disk_off = actualDiskOffset; OcfsExtent->next_free_ext++; } @@ -601,7 +601,7 @@ int ocfs_add_to_last_data_node (ocfs_sup k = OcfsExtentHeader->next_free_ext - 1; - OcfsExtentHeader->extents[k].num_clusters += + OcfsExtentHeader->extents[k].num_bytes += actualLength; status = ocfs_write_sector (osb, OcfsExtentHeader, @@ -613,7 +613,7 @@ int ocfs_add_to_last_data_node (ocfs_sup k = FileEntry->next_free_ext - 1; - FileEntry->extents[k].num_clusters += actualLength; + FileEntry->extents[k].num_bytes += actualLength; } finally: @@ -732,8 +732,8 @@ int ocfs_grow_extent_tree (ocfs_super * for (k = 0; k < OCFS_MAX_FILE_ENTRY_EXTENTS; k++) { OcfsExtent->extents[k].file_off = FileEntry->extents[k].file_off; - OcfsExtent->extents[k].num_clusters = - FileEntry->extents[k].num_clusters; + OcfsExtent->extents[k].num_bytes = + FileEntry->extents[k].num_bytes; OcfsExtent->extents[k].disk_off = FileEntry->extents[k].disk_off; } @@ -772,7 +772,7 @@ int ocfs_grow_extent_tree (ocfs_super * ExtentHeader->extents[OCFS_MAX_FILE_ENTRY_EXTENTS]. file_off = FileEntry->alloc_size; ExtentHeader->extents[OCFS_MAX_FILE_ENTRY_EXTENTS]. - num_clusters = actualLength; + num_bytes = actualLength; ExtentHeader->next_free_ext = OCFS_MAX_FILE_ENTRY_EXTENTS + 1; @@ -789,7 +789,7 @@ int ocfs_grow_extent_tree (ocfs_super * physicalOffset + (OCFS_SECTOR_SIZE * (i + 1)); ExtentHeader->extents[0].file_off = FileEntry->alloc_size; - ExtentHeader->extents[0].num_clusters = actualLength; + ExtentHeader->extents[0].num_bytes = actualLength; ExtentHeader->next_free_ext = 1; ExtentHeader->alloc_file_off = fileOffset + (OCFS_SECTOR_SIZE * (i + 1)); @@ -818,7 +818,7 @@ int ocfs_grow_extent_tree (ocfs_super * strcpy (OcfsExtent->signature, OCFS_EXTENT_DATA_SIGNATURE); OcfsExtent->extents[i].file_off = FileEntry->alloc_size; - OcfsExtent->extents[i].num_clusters = actualLength; + OcfsExtent->extents[i].num_bytes = actualLength; OcfsExtent->extents[i].disk_off = actualDiskOffset; OcfsExtent->curr_sect = 1; OcfsExtent->max_sects = NUM_SECTORS_IN_LEAF_NODE; @@ -858,7 +858,7 @@ int ocfs_grow_extent_tree (ocfs_super * for (i = 0; i < OCFS_MAX_FILE_ENTRY_EXTENTS; i++) { FileEntry->extents[i].file_off = 0; - FileEntry->extents[i].num_clusters = 0; + FileEntry->extents[i].num_bytes = 0; FileEntry->extents[i].disk_off = 0; } @@ -867,7 +867,7 @@ int ocfs_grow_extent_tree (ocfs_super * FileEntry->local_ext = false; FileEntry->extents[0].file_off = 0; - FileEntry->extents[0].num_clusters = FileEntry->alloc_size + + FileEntry->extents[0].num_bytes = FileEntry->alloc_size + actualLength; FileEntry->extents[0].disk_off = physicalOffset; FileEntry->last_ext_ptr = lastExtentPtr; @@ -921,7 +921,7 @@ int ocfs_allocate_extent (ocfs_super * o if (ocfs_check_for_extent_merge (&FileEntry->extents[k], actualDiskOffset)) { /* We can join the extents, just increase the len of extent */ - FileEntry->extents[k].num_clusters += actualLength; + FileEntry->extents[k].num_bytes += actualLength; status = 0; goto finally; } @@ -935,7 +935,7 @@ int ocfs_allocate_extent (ocfs_super * o /* file_off for the new extent will be equal to the previous */ /* allocation size of file */ FileEntry->extents[k].file_off = FileEntry->alloc_size; - FileEntry->extents[k].num_clusters = actualLength; + FileEntry->extents[k].num_bytes = actualLength; FileEntry->extents[k].disk_off = actualDiskOffset; FileEntry->next_free_ext++; @@ -1014,7 +1014,7 @@ bool ocfs_check_for_extent_merge (ocfs_a /* joined with the last extent. */ if ((LastExtent->disk_off + - LastExtent->num_clusters) == ActualDiskOffset) { + LastExtent->num_bytes) == ActualDiskOffset) { ret = true; } @@ -1106,23 +1106,23 @@ int ocfs_update_all_headers (ocfs_super if (IS_VALID_EXTENT_HEADER (ExtentHeader)) { for (i = 0; i < AllocExtent->next_free_ext; i++) { if ((AllocExtent->extents[i].file_off + - AllocExtent->extents[i].num_clusters) > + AllocExtent->extents[i].num_bytes) > FileSize) { if (AllocExtent->extents[i].file_off > FileSize) { AllocExtent->extents[i]. file_off = AllocExtent->extents[i]. - num_clusters = 0; + num_bytes = 0; AllocExtent->next_free_ext = i; break; } else { AllocExtent->extents[i]. - num_clusters -= + num_bytes -= ((AllocExtent->extents[i]. file_off + AllocExtent->extents[i]. - num_clusters) - FileSize); + num_bytes) - FileSize); AllocExtent->next_free_ext = i + 1; break; @@ -1140,22 +1140,22 @@ int ocfs_update_all_headers (ocfs_super for (i = 0; i < FileEntry->next_free_ext; i++) { if ((FileEntry->extents[i].file_off + - FileEntry->extents[i].num_clusters) > + FileEntry->extents[i].num_bytes) > FileSize) { if (FileEntry->extents[i].file_off > FileSize) { FileEntry->extents[i].file_off = FileEntry->extents[i]. - num_clusters = 0; + num_bytes = 0; FileEntry->next_free_ext = i; break; } else { FileEntry->extents[i]. - num_clusters -= + num_bytes -= ((FileEntry->extents[i]. file_off + FileEntry->extents[i]. - num_clusters) - FileSize); + num_bytes) - FileSize); FileEntry->next_free_ext = i + 1; break; @@ -1172,26 +1172,103 @@ finally: } /* ocfs_update_all_headers */ +static int _write_free_extent_log(ocfs_super *osb, ocfs_cleanup_record *CleanupLogRec, + ub4 len, ub4 fileoff, ub4 nodenum) +{ + ub4 numUpdt; + int status=0; + ocfs_free_bitmap *fb; + + numUpdt = CleanupLogRec->rec.free.num_free_upds; + if (numUpdt >= FREE_LOG_SIZE) { + status = ocfs_write_node_log (osb, (ocfs_log_record *) CleanupLogRec, + osb->node_num, LOG_CLEANUP); + if (status < 0) { + LOG_ERROR_ARGS ("ocfs_write_node_log failed with %d\n", status); + return status; + } + numUpdt = CleanupLogRec->rec.free.num_free_upds = 0; + } + fb = &(CleanupLogRec->rec.free.free_bitmap[numUpdt]); + fb->length = len; + fb->file_off = fileoff; + fb->type = DISK_ALLOC_VOLUME; + fb->node_num = nodenum; + (CleanupLogRec->rec.free.num_free_upds)++; + return status; +} + +static int _squish_extent_entries(ocfs_super *osb, ocfs_alloc_ext *extarr, ub4 *freeExtent, + ocfs_cleanup_record *CleanupLogRec, ub8 FileSize, bool flag) +{ + int status = 0; + bool FirstTime = true; + ocfs_alloc_ext *ext; + ub4 i, csize = osb->vol_layout.cluster_size, + numBitsAllocated = 0, bitmapOffset = 0, + firstfree = *freeExtent; + ub8 bytes, foff, doff, + dstart = osb->vol_layout.data_start_off, + diskOffsetTobeFreed, lengthTobeFreed = 0, + actualSize = 0, origLength = 0; + + firstfree = *freeExtent; + for (i = 0; i < firstfree; i++) { + ext = &(extarr[i]); + bytes = ext->num_bytes; + foff = ext->file_off; + doff = ext->disk_off; + actualSize = (bytes + foff); + if (flag || actualSize > FileSize) { + if (flag || foff >= FileSize) { + if (!flag && FirstTime) { + *freeExtent = i; + FirstTime = false; + } + numBitsAllocated = (ub4) (bytes/csize); + bitmapOffset = (ub4) ((doff - dstart) / csize); + ext->num_bytes = ext->disk_off = ext->file_off = 0; + } else { + if (FirstTime) { + *freeExtent = i + 1; + FirstTime = false; + } + origLength = bytes; + ext->num_bytes = bytes = FileSize - foff; + lengthTobeFreed = origLength - bytes; + if (lengthTobeFreed == 0) { + continue; + } + numBitsAllocated = (ub4) (lengthTobeFreed / csize); + diskOffsetTobeFreed = doff + bytes; + bitmapOffset = (ub4) ((diskOffsetTobeFreed - dstart) / csize); + } + status = _write_free_extent_log(osb, CleanupLogRec, numBitsAllocated, + bitmapOffset, -1); + if (status < 0) + break; + } + } + return status; +} + + /* * ocfs_free_extents_for_truncate() * */ -int ocfs_free_extents_for_truncate (ocfs_super * osb, - ocfs_file_entry * FileEntry, - ocfs_inode * oin, sb4 LogNodeNum, ub8 FileSize) +int ocfs_free_extents_for_truncate (ocfs_super * osb, ocfs_file_entry * FileEntry) { int status = 0; - ub4 i, size, numUpdt = 0; - ub4 numBitsAllocated = 0, bitmapOffset = 0; + ub4 size; ocfs_cleanup_record *CleanupLogRec = NULL; ocfs_extent_group *AllocExtent = NULL; - bool FirstTime = true; - ub4 freeExtent = 0; - ub8 actualSize = 0, origLength = 0; - ub8 diskOffsetTobeFreed, lengthTobeFreed = 0; + bool done = false; + ub8 alloc_size; LOG_ENTRY (); + alloc_size = FileEntry->alloc_size; size = sizeof (ocfs_cleanup_record); size = (ub4) OCFS_ALIGN (size, PAGE_SIZE); @@ -1205,497 +1282,110 @@ int ocfs_free_extents_for_truncate (ocfs CleanupLogRec->log_id = osb->curr_trans_id; CleanupLogRec->log_type = LOG_FREE_BITMAP; + /* local extents */ if (FileEntry->local_ext) { - freeExtent = FileEntry->next_free_ext; - for (i = 0; i < FileEntry->next_free_ext; i++) { - actualSize = - (FileEntry->extents[i].num_clusters + - FileEntry->extents[i].file_off); - if (actualSize > FileSize) { - if (FileEntry->extents[i].file_off >= FileSize) { - if (FirstTime) { - freeExtent = i; - FirstTime = false; - } - numBitsAllocated = - (ub4) (FileEntry->extents[i]. - num_clusters / - (osb->vol_layout. - cluster_size)); - - bitmapOffset = - (ub4) ((FileEntry->extents[i]. - disk_off - - osb->vol_layout. - data_start_off) / - (osb->vol_layout. - cluster_size)); - - FileEntry->extents[i].num_clusters = - FileEntry->extents[i].disk_off = - FileEntry->extents[i].file_off = 0; - - } else { - if (FirstTime) { - freeExtent = i + 1; - FirstTime = false; - } - - origLength = - FileEntry->extents[i].num_clusters; - FileEntry->extents[i].num_clusters = - (FileSize - - FileEntry->extents[i].file_off); - lengthTobeFreed = - (origLength - - FileEntry->extents[i]. - num_clusters); - - if (lengthTobeFreed == 0) { - continue; - } - - numBitsAllocated = - (ub4) (lengthTobeFreed / - (osb->vol_layout. - cluster_size)); - diskOffsetTobeFreed = - FileEntry->extents[i].disk_off + - FileEntry->extents[i].num_clusters; - - bitmapOffset = - (ub4) ((diskOffsetTobeFreed - - osb->vol_layout. - data_start_off) / - (osb->vol_layout. - cluster_size)); - - } - - numUpdt = CleanupLogRec->rec.free.num_free_upds; - - CleanupLogRec->rec.free.free_bitmap[numUpdt]. - length = numBitsAllocated; - CleanupLogRec->rec.free.free_bitmap[numUpdt]. - file_off = bitmapOffset; - CleanupLogRec->rec.free.free_bitmap[numUpdt]. - type = DISK_ALLOC_VOLUME; - CleanupLogRec->rec.free.free_bitmap[numUpdt]. - node_num = -1; - (CleanupLogRec->rec.free.num_free_upds)++; - } - - } - - /* Also Update the next_free_ext */ - FileEntry->next_free_ext = freeExtent; - } else { - freeExtent = 0; - FirstTime = true; + status = _squish_extent_entries(osb, FileEntry->extents, (ub4 *)&FileEntry->next_free_ext, + CleanupLogRec, alloc_size, false); + if (status < 0) { + LOG_ERROR_ARGS ("_squish_extent_entries failed with %d\n", status); + goto finally; + } + goto write_log; + } + + /* non-local extents */ + size = OCFS_ALIGN (sizeof (ocfs_extent_group), PAGE_SIZE); + + AllocExtent = ocfs_malloc (size); + if (AllocExtent == NULL) { + LOG_ERROR_STR ("ocfs_malloc failed for AllocExtent"); + status = -ENOMEM; + goto finally; + } - size = OCFS_ALIGN (sizeof (ocfs_extent_group), PAGE_SIZE); + memset (AllocExtent, 0, size); - AllocExtent = ocfs_malloc (size); - if (AllocExtent == NULL) { - LOG_ERROR_STR - ("ocfs_free_file_extents():ocfs_malloc failed for PAllocExtent"); - status = -ENOMEM; + while (FileEntry->last_ext_ptr != 0) { + status = ocfs_read_metadata (osb, AllocExtent, osb->sect_size, FileEntry->last_ext_ptr); + if (status < 0) { + LOG_ERROR_ARGS ("ocfs_read_metadata failed with %d\n", status); goto finally; } - memset (AllocExtent, 0, size); - - while (FileEntry->last_ext_ptr != 0) { - status = - ocfs_read_metadata (osb, AllocExtent, - (ub4) osb->sect_size, - FileEntry->last_ext_ptr); - if (status < 0) { - LOG_ERROR_ARGS - ("ocfs_read_disk failed with status(%x)", - status); + switch (AllocExtent->type) + { + case OCFS_EXTENT_DATA: + if (!IS_VALID_EXTENT_DATA (AllocExtent)) { + LOG_ERROR_STR ("Invalid Extent Data"); + status = -EFAIL; goto finally; } - if ((AllocExtent->type != OCFS_EXTENT_DATA) && - (AllocExtent->type != OCFS_EXTENT_HEADER)) { + if (AllocExtent->extents[0].file_off >= alloc_size) { + status = _squish_extent_entries(osb, AllocExtent->extents, &AllocExtent->next_free_ext, + CleanupLogRec, alloc_size, true); + if (status < 0) { + LOG_ERROR_ARGS ("_squish_extent_entries failed with %d\n", status); + goto finally; + } + status = _write_free_extent_log(osb, CleanupLogRec, 1, AllocExtent->alloc_file_off, + AllocExtent->alloc_node); + if (status < 0) { + LOG_ERROR_ARGS ("_write_free_extent_log failed with %d\n", status); + goto finally; + } + } else { + status = _squish_extent_entries(osb, AllocExtent->extents, &AllocExtent->next_free_ext, + CleanupLogRec, alloc_size, false); + if (status < 0) { + LOG_ERROR_ARGS ("_squish_extent_entries failed with %d\n", status); + goto finally; + } + status = _write_free_extent_log(osb, CleanupLogRec, 1, AllocExtent->alloc_file_off, + AllocExtent->alloc_node); + if (status < 0) { + LOG_ERROR_ARGS ("_write_free_extent_log failed with %d\n", status); + goto finally; + } + ocfs_update_all_headers (osb, AllocExtent, alloc_size); + done = true; + } + break; + case OCFS_EXTENT_HEADER: + if (!IS_VALID_EXTENT_HEADER (AllocExtent)) { LOG_ERROR_STR ("Invalid Extent Header"); status = -EFAIL; goto finally; } - if (AllocExtent->extents[0].file_off >= FileSize) { - if (AllocExtent->type == OCFS_EXTENT_DATA) { - - if (!IS_VALID_EXTENT_DATA (AllocExtent)) { - LOG_ERROR_STR - ("Invalid Extent Header"); - status = -EFAIL; - goto finally; - } - - for (i = 0; - i < AllocExtent->next_free_ext; - i++) { - numBitsAllocated = - (ub4) (AllocExtent-> - extents[i]. - num_clusters / - (osb->vol_layout. - cluster_size)); - bitmapOffset = - (ub4) ((AllocExtent-> - extents[i]. - disk_off - - osb->vol_layout. - data_start_off) / - (osb->vol_layout. - cluster_size)); - - numUpdt = - CleanupLogRec->rec.free. - num_free_upds; - if (numUpdt >= FREE_LOG_SIZE) { - status = - ocfs_write_node_log - (osb, - (ocfs_log_record *) - CleanupLogRec, - LogNodeNum, - LOG_CLEANUP); - if (status < 0) { - LOG_ERROR_ARGS - ("ocfs_read_disk failed with status(%x)\n", - status); - goto finally; - } - numUpdt = - CleanupLogRec->rec. - free.num_free_upds = - 0; - } - - AllocExtent->extents[i]. - num_clusters = - AllocExtent->extents[i]. - disk_off = - AllocExtent->extents[i]. - file_off = 0; - - CleanupLogRec->rec.free. - free_bitmap[numUpdt]. - length = numBitsAllocated; - CleanupLogRec->rec.free. - free_bitmap[numUpdt]. - file_off = bitmapOffset; - CleanupLogRec->rec.free. - free_bitmap[numUpdt].type = - DISK_ALLOC_VOLUME; - CleanupLogRec->rec.free. - free_bitmap[numUpdt]. - node_num = -1; - (CleanupLogRec->rec.free. - num_free_upds)++; - } - - if (numUpdt >= FREE_LOG_SIZE) { - status = - ocfs_write_node_log (osb, - (ocfs_log_record - *) - CleanupLogRec, - LogNodeNum, - LOG_CLEANUP); - if (status < 0) { - LOG_ERROR_ARGS - ("ocfs_free_extents_for_truncate(): ocfs_read_disk failed with status(%x)\n", - status); - goto finally; - } - numUpdt = - CleanupLogRec->rec.free. - num_free_upds = 0; - } - - CleanupLogRec->rec.free. - free_bitmap[numUpdt].length = 1; - CleanupLogRec->rec.free. - free_bitmap[numUpdt].file_off = - AllocExtent->alloc_file_off; - CleanupLogRec->rec.free. - free_bitmap[numUpdt].type = - DISK_ALLOC_EXTENT_NODE; - CleanupLogRec->rec.free. - free_bitmap[numUpdt].node_num = - AllocExtent->alloc_node; - (CleanupLogRec->rec.free. - num_free_upds)++; - if (AllocExtent->alloc_file_off == 0) { - LOG_ERROR_ARGS - ("File offset was 0 for file %s type DISK_ALLOC_EXTENT_NODE blk 200\n", - FileEntry->filename); - } - - } else { - OCFS_ASSERT (AllocExtent->type == - OCFS_EXTENT_HEADER); - if (!IS_VALID_EXTENT_HEADER - (AllocExtent)) { - LOG_ERROR_STR - ("Invalid Extent Header"); - status = -EFAIL; - goto finally; - } - - numUpdt = - CleanupLogRec->rec.free. - num_free_upds; - if (numUpdt >= FREE_LOG_SIZE) { - status = - ocfs_write_node_log (osb, - (ocfs_log_record - *) - CleanupLogRec, - LogNodeNum, - LOG_CLEANUP); - if (status < 0) { - LOG_ERROR_ARGS - ("ocfs_read_disk failed with status(%x)\n", - status); - goto finally; - } - numUpdt = - CleanupLogRec->rec.free. - num_free_upds = 0; - } - - CleanupLogRec->rec.free. - free_bitmap[numUpdt].length = 1; - - if (AllocExtent->alloc_file_off == 0) { - OCFS_BREAKPOINT(); - } - - CleanupLogRec->rec.free. - free_bitmap[numUpdt].file_off = - AllocExtent->alloc_file_off; - - CleanupLogRec->rec.free. - free_bitmap[numUpdt].type = - DISK_ALLOC_EXTENT_NODE; - - CleanupLogRec->rec.free. - free_bitmap[numUpdt].node_num = - AllocExtent->alloc_node; - (CleanupLogRec->rec.free. - num_free_upds)++; - - } + if (AllocExtent->extents[0].file_off >= alloc_size) { + status = _write_free_extent_log(osb, CleanupLogRec, 1, AllocExtent->alloc_file_off, + AllocExtent->alloc_node); + if (status < 0) { + LOG_ERROR_ARGS ("_write_free_extent_log failed with %d\n", status); + goto finally; + } } else { - if (AllocExtent->type == OCFS_EXTENT_DATA) { - freeExtent = AllocExtent->next_free_ext; - - for (i = 0; - i < AllocExtent->next_free_ext; - i++) { - actualSize = - (AllocExtent->extents[i]. - num_clusters + - AllocExtent->extents[i]. - file_off); - if (actualSize > FileSize) { - if (AllocExtent-> - extents[i]. - file_off >= - FileSize) { - if (FirstTime) { - freeExtent - = i; - FirstTime - = - false; - } - numBitsAllocated - = (ub4) - (AllocExtent-> - extents[i]. - num_clusters - / - (osb-> - vol_layout. - cluster_size)); - bitmapOffset = - (ub4) ((AllocExtent->extents[i].disk_off - osb->vol_layout.data_start_off) / (osb->vol_layout.cluster_size)); - - AllocExtent-> - extents[i]. - num_clusters - = - AllocExtent-> - extents[i]. - disk_off = - AllocExtent-> - extents[i]. - file_off = - 0; - - } else { - if (FirstTime) { - freeExtent - = - i + - 1; - FirstTime - = - false; - } - - origLength = - AllocExtent-> - extents[i]. - num_clusters; - AllocExtent-> - extents[i]. - num_clusters - = - (FileSize - - AllocExtent-> - extents[i]. - file_off); - lengthTobeFreed - = - (origLength - - - AllocExtent-> - extents[i]. - num_clusters); - - if (lengthTobeFreed == 0) { - continue; - } - - numBitsAllocated - = (ub4) - (lengthTobeFreed - / - (osb-> - vol_layout. - cluster_size)); - diskOffsetTobeFreed - = - AllocExtent-> - extents[i]. - disk_off + - AllocExtent-> - extents[i]. - num_clusters; - - bitmapOffset = - (ub4) ((diskOffsetTobeFreed - osb->vol_layout.data_start_off) / (osb->vol_layout.cluster_size)); - } - - numUpdt = - CleanupLogRec->rec. - free.num_free_upds; - if (numUpdt >= - FREE_LOG_SIZE) { - status = - ocfs_write_node_log - (osb, - (ocfs_log_record - *) - CleanupLogRec, - LogNodeNum, - LOG_CLEANUP); - if (status < 0) { - LOG_ERROR_ARGS - ("ocfs_read_disk failed with status(%x)\n", - status); - goto finally; - } - numUpdt = - CleanupLogRec-> - rec.free. - num_free_upds - = 0; - } - - CleanupLogRec->rec.free. - free_bitmap - [numUpdt].length = - numBitsAllocated; - CleanupLogRec->rec.free. - free_bitmap - [numUpdt].file_off = - bitmapOffset; - CleanupLogRec->rec.free. - free_bitmap - [numUpdt].type = - DISK_ALLOC_VOLUME; - CleanupLogRec->rec.free. - free_bitmap - [numUpdt].node_num = - -1; - (CleanupLogRec->rec. - free.num_free_upds)++; - } - } - - AllocExtent->next_free_ext = freeExtent; - - if (numUpdt >= FREE_LOG_SIZE) { - status = - ocfs_write_node_log (osb, - (ocfs_log_record - *) - CleanupLogRec, - LogNodeNum, - LOG_CLEANUP); - if (status < 0) { - LOG_ERROR_ARGS - ("ocfs_read_disk failed with status(%x)\n", - status); - goto finally; - } - numUpdt = - CleanupLogRec->rec.free. - num_free_upds = 0; - } - - CleanupLogRec->rec.free. - free_bitmap[numUpdt].length = 1; - CleanupLogRec->rec.free. - free_bitmap[numUpdt].file_off = - AllocExtent->alloc_file_off; - CleanupLogRec->rec.free. - free_bitmap[numUpdt].type = - DISK_ALLOC_EXTENT_NODE; - CleanupLogRec->rec.free. - free_bitmap[numUpdt].node_num = - AllocExtent->alloc_node; - (CleanupLogRec->rec.free. - num_free_upds)++; - if (AllocExtent->alloc_file_off == 0) { - LOG_ERROR_ARGS - ("File offset was 0 for file %s type DISK_ALLOC_EXTENT_NODE blk 200\n", - FileEntry->filename); - } - } else { - OCFS_BREAKPOINT(); - } + OCFS_BREAKPOINT(); + ocfs_update_all_headers (osb, AllocExtent, alloc_size); + done = true; + } + break; + default: + LOG_ERROR_STR ("Extent is not an OCFS_EXTENT_DATA or OCFS_EXTENT_HEADER"); + status = -EFAIL; + goto finally; + break; + } + if (done) + break; + } /* while (FileEntry->last_ext_ptr != 0) */ - ocfs_update_all_headers (osb, AllocExtent, FileSize); - break; - } - } - } - - /* Write the log. */ +write_log: if (CleanupLogRec->rec.free.num_free_upds > 0) { - status = ocfs_write_node_log (osb, - (ocfs_log_record *) CleanupLogRec, - LogNodeNum, LOG_CLEANUP); + status = ocfs_write_node_log (osb, (ocfs_log_record *) CleanupLogRec, + osb->node_num, LOG_CLEANUP); if (status < 0) { LOG_ERROR_STATUS(status); goto finally; @@ -1703,14 +1393,8 @@ int ocfs_free_extents_for_truncate (ocfs } finally: - if (AllocExtent) { - ocfs_free (AllocExtent); - AllocExtent = NULL; - } - - if (CleanupLogRec) { - ocfs_free (CleanupLogRec); - } + ocfs_safefree (AllocExtent); + ocfs_safefree (CleanupLogRec); LOG_EXIT_ULONG (status); return status; @@ -2047,7 +1731,7 @@ int ocfs_update_extent_map (ocfs_super * for (j = 0; j < FileEntry->next_free_ext; j++) { tempVbo = FileEntry->extents[j].file_off; tempLbo = FileEntry->extents[j].disk_off; - tempSize = FileEntry->extents[j].num_clusters; + tempSize = FileEntry->extents[j].num_bytes; /* Add the Extent to extent map list */ Ret = @@ -2065,11 +1749,11 @@ int ocfs_update_extent_map (ocfs_super * for (j = 0; j < OcfsExtent->next_free_ext; j++) { if ((sb8) (OcfsExtent->extents[j].file_off + - OcfsExtent->extents[j].num_clusters) > + OcfsExtent->extents[j].num_bytes) > (*localVbo)) { tempVbo = OcfsExtent->extents[j].file_off; tempLbo = OcfsExtent->extents[j].disk_off; - tempSize = OcfsExtent->extents[j].num_clusters; + tempSize = OcfsExtent->extents[j].num_bytes; /* Add the Extent to extent map list */ Ret = @@ -2160,7 +1844,7 @@ int ocfs_get_leaf_extent (ocfs_super * o for (i = 0; i < FileEntry->next_free_ext; i++) { if ((sb8) (FileEntry->extents[i].file_off + - FileEntry->extents[i].num_clusters) > Vbo) { + FileEntry->extents[i].num_bytes) > Vbo) { childDiskOffset = FileEntry->extents[i].disk_off; break; } @@ -2195,7 +1879,7 @@ int ocfs_get_leaf_extent (ocfs_super * o for (j = 0; j < ExtentHeader->next_free_ext; j++) { if ((sb8) (ExtentHeader->extents[j].file_off + - ExtentHeader->extents[j].num_clusters) > Vbo) + ExtentHeader->extents[j].num_bytes) > Vbo) { childDiskOffset = ExtentHeader->extents[j].disk_off; diff -urNp x-ref/fs/ocfs/Common/ocfsgencreate.c x/fs/ocfs/Common/ocfsgencreate.c --- x-ref/fs/ocfs/Common/ocfsgencreate.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/ocfsgencreate.c Mon Oct 21 04:41:19 2002 @@ -173,7 +173,7 @@ int ocfs_verify_update_oin (ocfs_super * for (j = 0; j < fe->next_free_ext; j++) { tempVbo = fe->extents[j].file_off; tempLbo = fe->extents[j].disk_off; - tempSize = fe->extents[j].num_clusters; + tempSize = fe->extents[j].num_bytes; ocfs_add_extent_map_entry (osb, &oin->map, tempVbo, tempLbo, @@ -239,6 +239,8 @@ int ocfs_find_contiguous_space_from_bitm ub8 startOffset = 0; bool bLockAcquired = false; ocfs_lock_res *pLockResource; + ocfs_file_entry *fe = NULL; + ocfs_bitmap_lock *bm_lock = NULL; LOG_ENTRY (); @@ -247,10 +249,17 @@ int ocfs_find_contiguous_space_from_bitm ocfs_down_sem (&(osb->vol_alloc_lock), true); /* Get the allocation lock here */ + fe = ocfs_allocate_file_entry(); + if (!fe) { + LOG_ERROR_STR ("failed to allocate file entry"); + status = -ENOMEM; + goto leave; + } + bm_lock = (ocfs_bitmap_lock *)fe; + status = ocfs_acquire_lock (osb, OCFS_BITMAP_LOCK_OFFSET, - OCFS_DLM_ENABLE_CACHE_LOCK, 0, &pLockResource, - NULL); + OCFS_DLM_ENABLE_CACHE_LOCK, 0, &pLockResource, fe); if (status < 0) { LOG_ERROR_STATUS (status); goto leave; @@ -356,6 +365,16 @@ int ocfs_find_contiguous_space_from_bitm LOG_ERROR_STATUS (status); goto leave; } + + /* write the bitmap size info to the lock sector */ + /* TODO: optimize by making this part of ocfs_release_lock + * for now, it will be back-to-back writes to same sector */ + bm_lock->used_bits = ocfs_count_bits(&osb->cluster_bitmap); + status = ocfs_write_force_disk(osb, bm_lock, OCFS_SECTOR_SIZE, OCFS_BITMAP_LOCK_OFFSET); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } *cluster_off = ByteOffset; *cluster_count = ClusterCount; @@ -366,13 +385,16 @@ int ocfs_find_contiguous_space_from_bitm if (bLockAcquired) { tmpstat = ocfs_release_lock (osb, OCFS_BITMAP_LOCK_OFFSET, - OCFS_DLM_EXCLUSIVE_LOCK, 0, pLockResource); + OCFS_DLM_EXCLUSIVE_LOCK, 0, pLockResource, fe); if (tmpstat < 0) { status = tmpstat; LOG_ERROR_STATUS (status); /* TODO FAIL VOLUME */ } } + + if (fe) + ocfs_release_file_entry(fe); LOG_EXIT_STATUS (status); return status; @@ -441,7 +463,7 @@ ocfs_create_oin_from_entry (ocfs_super * for (j = 0; j < fe->next_free_ext; j++) { tempVbo = fe->extents[j].file_off; tempLbo = fe->extents[j].disk_off; - tempSize = fe->extents[j].num_clusters; + tempSize = fe->extents[j].num_bytes; /* Add the Extent to extent map */ if (!ocfs_add_extent_map_entry (osb, &oin->map, @@ -493,7 +515,7 @@ ocfs_create_oin_from_entry (ocfs_super * pOcfsExtent->extents[j].disk_off; tempSize = pOcfsExtent->extents[j]. - num_clusters; + num_bytes; /* Add the Extent to extent map */ bRet = @@ -552,16 +574,24 @@ ocfs_find_files_on_disk (ocfs_super * os bool bRet, bLockAcquired = false; ocfs_lock_res *pLockResource; bool bReadDirNode = true; + ocfs_file_entry *dirfe = NULL; + bool icky_vmalloc = false; LOG_ENTRY_ARGS ("osb=%p, parent=%u.%u, fname=%p, entry=%p, ofile=%p\n", osb, parent_off, file_name, fe, ofile); lockId = parent_off; + dirfe = ocfs_allocate_file_entry(); + if (dirfe==NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS (status); + goto leave; + } + /* Get a shared lock on the directory... */ - tmpstat = - ocfs_acquire_lock (osb, lockId, OCFS_DLM_SHARED_LOCK, FLAG_DIR, - &pLockResource, NULL); + tmpstat = ocfs_acquire_lock (osb, lockId, OCFS_DLM_SHARED_LOCK, + FLAG_DIR, &pLockResource, dirfe); if (tmpstat < 0) { /* Volume should be disabled in this case */ status = tmpstat; @@ -573,6 +603,11 @@ ocfs_find_files_on_disk (ocfs_super * os || ((ofile != NULL) && (ofile->curr_dir_buf == NULL))) { pDirNode = ocfs_malloc (osb->vol_layout.dir_node_size); if (pDirNode == NULL) { + pDirNode = vmalloc (osb->vol_layout.dir_node_size); + icky_vmalloc = true; + } + if (pDirNode == NULL) { + icky_vmalloc = false; status = -ENOMEM; LOG_ERROR_STATUS (status); goto leave; @@ -629,7 +664,7 @@ ocfs_find_files_on_disk (ocfs_super * os { tmpstat = ocfs_release_lock (osb, lockId, OCFS_DLM_SHARED_LOCK, - FLAG_DIR, pLockResource); + FLAG_DIR, pLockResource, dirfe); if (tmpstat < 0) { // status = tmpstat ? /* Volume should be disabled in this case */ @@ -637,8 +672,15 @@ ocfs_find_files_on_disk (ocfs_super * os } } - if (ofile == NULL) { - ocfs_safefree (pDirNode); + if (dirfe) + ocfs_release_file_entry(dirfe); + + if (ofile == NULL && pDirNode) { + if (icky_vmalloc) + vfree(pDirNode); + else + ocfs_safefree(pDirNode); + } LOG_EXIT_STATUS (status); @@ -648,10 +690,8 @@ ocfs_find_files_on_disk (ocfs_super * os /* ocfs_initialize_dir_node() * */ -void -ocfs_initialize_dir_node (ocfs_super * osb, - ocfs_dir_node * dir_node, - ub8 bitmap_off, ub8 file_off, ub4 node) +void ocfs_initialize_dir_node (ocfs_super * osb, ocfs_dir_node * dir_node, + ub8 bitmap_off, ub8 file_off, ub4 node) { LOG_ENTRY (); @@ -768,6 +808,7 @@ ocfs_rename_file (ocfs_super * osb, { int status = 0; ocfs_dir_node *pLockNode = NULL; + ocfs_file_entry *dir_fe = NULL; ocfs_file_entry *fe = NULL; ub8 changeSeqNum = 0; bool bAcquiredLock = false; @@ -782,12 +823,21 @@ ocfs_rename_file (ocfs_super * osb, LOG_ENTRY (); + parentLockId = parent_off; parentLockFlags = (FLAG_FILE_CREATE | FLAG_DIR); + + dir_fe = ocfs_allocate_file_entry(); + if (dir_fe == NULL) { + status = -ENOMEM; + LOG_ERROR_STATUS(status); + goto leave; + } + pLockNode = (ocfs_dir_node *)dir_fe; + status = ocfs_acquire_lock (osb, parentLockId, OCFS_DLM_EXCLUSIVE_LOCK, (ub4) parentLockFlags, &pParentLockResource, - NULL); // never used, so why not pass NULL? - // (ocfs_file_entry *)pLockNode); + dir_fe); if (status < 0) { LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", status, HI (parentLockId), LO (parentLockId)); @@ -812,7 +862,7 @@ ocfs_rename_file (ocfs_super * osb, status = ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, lockFlags, - &pLockResource, NULL); + &pLockResource, fe); if (status < 0) { LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", status, HI (lockId), LO (lockId)); @@ -832,9 +882,6 @@ ocfs_rename_file (ocfs_super * osb, SET_VALID_BIT (fe->sync_flags); fe->sync_flags &= ~(OCFS_SYNC_FLAG_CHANGE); - status = - ocfs_get_file_entry (osb, (ocfs_file_entry **) & pLockNode, - fe->dir_node_ptr); pLockNode->index_dirty = 1; pLockNode->bad_off = (fe->this_sector - fe->dir_node_ptr) / osb->sect_size; @@ -852,9 +899,7 @@ ocfs_rename_file (ocfs_super * osb, pLockNode->index[pLockNode->num_ent_used - 1] = pLockNode->bad_off; - status = - ocfs_write_file_entry (osb, (ocfs_file_entry *) pLockNode, - fe->dir_node_ptr); + status = ocfs_write_file_entry (osb, dir_fe, fe->dir_node_ptr); if (status < 0) { LOG_ERROR_STATUS (status); goto leave; @@ -890,7 +935,7 @@ ocfs_rename_file (ocfs_super * osb, if (bAcquiredLock) { status = ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, - lockFlags, pLockResource); + lockFlags, pLockResource, fe); if (status < 0) { LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", status, HI (lockId), LO (lockId)); @@ -901,7 +946,7 @@ ocfs_rename_file (ocfs_super * osb, if (bParentLockAcquired) { status = ocfs_release_lock (osb, parentLockId, OCFS_DLM_EXCLUSIVE_LOCK, - parentLockFlags, pParentLockResource); + parentLockFlags, pParentLockResource, dir_fe); if (status < 0) { LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", status, HI (parentLockId), @@ -914,8 +959,8 @@ ocfs_rename_file (ocfs_super * osb, ocfs_release_file_entry (fe); } - if (pLockNode) { - ocfs_release_file_entry ((ocfs_file_entry *) pLockNode); + if (dir_fe) { + ocfs_release_file_entry (dir_fe); } LOG_EXIT_STATUS (status); @@ -932,24 +977,17 @@ ocfs_del_file (ocfs_super * osb, ub8 par ocfs_file_entry *fe = NULL; ub4 size = 0; ocfs_dir_node *pLockNode = NULL; - ub4 lockFlags; + ub4 lockFlags=0; bool bAcquiredLock = false; ocfs_lock_res *pLockResource; ocfs_cleanup_record *pCleanupLogRec = NULL; ocfs_log_record *pOcfsLogRec; - ub8 lockId; + ub8 lockId = 0; sb4 log_node_num = -1; LOG_ENTRY (); - fe = ocfs_allocate_file_entry (); - if (fe == NULL) { - status = -ENOMEM; - LOG_ERROR_STATUS (status); - goto leave; - } - - status = ocfs_read_file_entry (osb, fe, file_off); + status = ocfs_get_file_entry (osb, &fe, file_off); if (status < 0) { LOG_ERROR_STATUS (status); goto leave; @@ -966,7 +1004,7 @@ ocfs_del_file (ocfs_super * osb, ub8 par } else { lockId = fe->this_sector; lockFlags = (FLAG_FILE_DELETE); - (ocfs_file_entry *) pLockNode = fe; + pLockNode = (ocfs_dir_node *)fe; } status = ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, lockFlags, @@ -1051,6 +1089,20 @@ ocfs_del_file (ocfs_super * osb, ub8 par /* free up fileentry */ leave: + + /* NEW: adding a fake release lock for the dead file entry here */ + /* need this to alert dentry-owners on other nodes */ + /* Release the file lock if we acquired it */ + if (bAcquiredLock && lockFlags!=0 && lockId!=0) { + status = ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, + lockFlags, pLockResource, + (ocfs_file_entry *)pLockNode); + if (status < 0) { + LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", + status, HI (lockId), LO (lockId)); + } + } + if ((fe != (ocfs_file_entry *) pLockNode) && (pLockNode)) { ocfs_release_file_entry ((ocfs_file_entry *) pLockNode); } @@ -1120,7 +1172,7 @@ int ocfs_extend_file (ocfs_super * osb, lockId = fileEntry->this_sector; lockFlags = FLAG_FILE_EXTEND; bFileLockAcquired = true; - (ocfs_file_entry *) pLockNode = fileEntry; + pLockNode = (ocfs_dir_node *)fileEntry; if ((DISK_LOCK_FILE_LOCK (fileEntry) == OCFS_DLM_ENABLE_CACHE_LOCK) && (DISK_LOCK_CURRENT_MASTER (fileEntry) == osb->node_num)) { @@ -1235,7 +1287,7 @@ int ocfs_extend_file (ocfs_super * osb, tmpstat = ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, - lockFlags, pLockResource); + lockFlags, pLockResource, fileEntry); if (tmpstat < 0) { status = tmpstat; LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", @@ -1301,7 +1353,7 @@ int ocfs_change_file_size (ocfs_super * lockId = fileEntry->this_sector; lockFlags = FLAG_FILE_UPDATE; bFileLockAcquired = true; - (ocfs_file_entry *) pLockNode = fileEntry; + pLockNode = (ocfs_dir_node *)fileEntry; if (bCacheLock) status = @@ -1362,7 +1414,7 @@ int ocfs_change_file_size (ocfs_super * tmpstat = ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, - lockFlags, pLockResource); + lockFlags, pLockResource, fileEntry); if (tmpstat < 0) { status = tmpstat; LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", @@ -1535,7 +1587,8 @@ int ocfs_create_directory (ocfs_super * if (bAcquiredLock) { status = ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, - lockFlags, pLockResource); + lockFlags, pLockResource, + (ocfs_file_entry *) pLockNode); if (status < 0) { LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", status, HI (lockId), LO (lockId)); @@ -1645,7 +1698,8 @@ int ocfs_create_file (ocfs_super * osb, if (bAcquiredLock) { status = ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, - lockFlags, pLockResource); + lockFlags, pLockResource, + (ocfs_file_entry *) pLockNode); if (status < 0) { LOG_ERROR_ARGS ("Status 0x%08x for LockId %u.%u\n", status, HI (lockId), LO (lockId)); @@ -1804,7 +1858,7 @@ ocfs_create_modify_file (ocfs_super * os break; case FLAG_FILE_TRUNCATE: - status = ocfs_truncate_file (osb, oin, file_size); + status = ocfs_truncate_file (osb, *file_off, file_size); break; case FLAG_FILE_UPDATE: @@ -1866,7 +1920,6 @@ ocfs_initialize_oin (ocfs_inode * oin, oin->osb = osb; INIT_LIST_HEAD (&(oin->next_ofile)); oin->oin_flags |= flags; - oin->ref_cnt = 0; oin->open_hndl_cnt = 0; oin->file_disk_off = file_off; ocfs_extent_map_init (&oin->map); @@ -2040,15 +2093,6 @@ int ocfs_find_create_cdsl (ocfs_super * LOG_ENTRY (); /* Read and see if we have a relevant entry for this node */ - length = (8 * OCFS_MAXIMUM_NODES); - length = OCFS_ALIGN (length, PAGE_SIZE); - - /* Initialize the table with 0 */ - buffer = ocfs_malloc (length); - if (buffer == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto leave; - } pNewFileEntry = ocfs_allocate_file_entry (); if (pNewFileEntry == NULL) { @@ -2058,16 +2102,18 @@ int ocfs_find_create_cdsl (ocfs_super * memcpy (pNewFileEntry, fe, sizeof (ocfs_file_entry)); - status = - ocfs_read_disk (osb, (sb1 *) buffer, length, fe->extents[0].disk_off); + length = (8 * OCFS_MAXIMUM_NODES); + length = OCFS_ALIGN (length, OCFS_SECTOR_SIZE); + status = ocfs_read_disk_ex (osb, (void **) &buffer, length, length, + fe->extents[0].disk_off); + if (status < 0) { LOG_ERROR_STATUS (status); goto leave; } cdslInfo = (ub8 *) buffer; - - cdslOffset = *(cdslInfo + osb->node_num); + cdslOffset = cdslInfo[osb->node_num]; if (cdslOffset == 0) { ub8 physicalOffset, fileOffset, numSectorsAlloc, bitmapOffset, numClustersAlloc; @@ -2076,10 +2122,9 @@ int ocfs_find_create_cdsl (ocfs_super * LOG_TRACE_STR ("Calling ocfs_alloc_node_block from CDSL"); /* Allocate contiguous blocks on disk */ - status = - ocfs_alloc_node_block (osb, OCFS_SECTOR_SIZE, &physicalOffset, - &fileOffset, (ub8 *) & numSectorsAlloc, - osb->node_num, DISK_ALLOC_EXTENT_NODE); + status = ocfs_alloc_node_block (osb, OCFS_SECTOR_SIZE, + &physicalOffset, &fileOffset, (ub8 *) & numSectorsAlloc, + osb->node_num, DISK_ALLOC_EXTENT_NODE); if (status < 0) { LOG_ERROR_STATUS (status); goto leave; @@ -2094,14 +2139,13 @@ int ocfs_find_create_cdsl (ocfs_super * pNewFileEntry->this_sector = physicalOffset; - *(cdslInfo + osb->node_num) = cdslOffset = physicalOffset; + cdslInfo[osb->node_num] = cdslOffset = physicalOffset; if (pNewFileEntry->attribs & OCFS_ATTRIB_DIRECTORY) { - status = - ocfs_alloc_node_block (osb, osb->vol_layout.dir_node_size, - &bitmapOffset, &fileOffset, - &numClustersAlloc, osb->node_num, - DISK_ALLOC_DIR_NODE); + status = ocfs_alloc_node_block (osb, + osb->vol_layout.dir_node_size, &bitmapOffset, + &fileOffset, &numClustersAlloc, osb->node_num, + DISK_ALLOC_DIR_NODE); if (status < 0) { LOG_ERROR_STATUS (status); goto leave; @@ -2124,12 +2168,12 @@ int ocfs_find_create_cdsl (ocfs_super * PNewDirNode = PDirNode; memset (PNewDirNode, 0, osb->vol_layout.dir_node_size); - ocfs_initialize_dir_node (osb, PNewDirNode, bitmapOffset, - fileOffset, osb->node_num); + ocfs_initialize_dir_node (osb, PNewDirNode, + bitmapOffset, fileOffset, osb->node_num); DISK_LOCK_CURRENT_MASTER (PNewDirNode) = osb->node_num; DISK_LOCK_FILE_LOCK (PNewDirNode) = - OCFS_DLM_ENABLE_CACHE_LOCK; + OCFS_DLM_ENABLE_CACHE_LOCK; PNewDirNode->dir_node_flags |= DIR_NODE_FLAG_ROOT; status = ocfs_write_dir_node (osb, PNewDirNode, -1); @@ -2144,49 +2188,40 @@ int ocfs_find_create_cdsl (ocfs_super * pNewFileEntry->file_size = 0; } - status = - ocfs_write_file_entry (osb, pNewFileEntry, - pNewFileEntry->this_sector); + status = ocfs_write_file_entry (osb, pNewFileEntry, + pNewFileEntry->this_sector); if (status < 0) { LOG_ERROR_STATUS (status); goto leave; } - status = - ocfs_write_disk (osb, (sb1 *) buffer, length, - fe->extents[0].disk_off); + status = ocfs_write_disk (osb, (sb1 *) buffer, length, + fe->extents[0].disk_off); if (status < 0) { LOG_ERROR_STATUS (status); goto leave; } - memcpy (fe, pNewFileEntry, OCFS_SECTOR_SIZE); } else { - status = - ocfs_read_disk (osb, (sb1 *) fe, OCFS_SECTOR_SIZE, - cdslOffset); + status = ocfs_read_disk (osb, (sb1 *) fe, OCFS_SECTOR_SIZE, + cdslOffset); if (status < 0) { LOG_ERROR_STATUS (status); goto leave; } - } leave: - if (buffer) { - ocfs_safefree (buffer); - } - - if (pNewFileEntry) { + ocfs_safefree (buffer); + if (pNewFileEntry) ocfs_release_file_entry (pNewFileEntry); - } - LOG_EXIT_STATUS (status); return (status); } /* ocfs_find_create_cdsl */ +#ifdef UNUSED_CODE /* * ocfs_update_file_entry_slot() * @@ -2304,6 +2339,8 @@ void ocfs_check_lock_state (ocfs_super * LOG_EXIT (); return; } /* ocfs_check_lock_state */ +#endif /* UNUSED_CODE */ + /* @@ -2315,12 +2352,10 @@ int ocfs_delete_cdsl (ocfs_super * osb, int status = 0; ocfs_file_entry *newfe = NULL; ocfs_dir_node *pLockNode = NULL; - ub4 lockFlags = 0; bool bAcquiredLock = false; ocfs_lock_res *pLockResource; ub8 lockId = 0; - bool bParentLockAcquired = false; ub4 parentLockFlags = 0; ocfs_lock_res *pParentLockResource; @@ -2406,7 +2441,7 @@ int ocfs_delete_cdsl (ocfs_super * osb, if (bAcquiredLock) { status = ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, - lockFlags, pLockResource); + lockFlags, pLockResource, newfe); if (status < 0) { LOG_ERROR_ARGS ("ocfs_release_lock() failed with status(0x%08x) " @@ -2419,7 +2454,8 @@ int ocfs_delete_cdsl (ocfs_super * osb, if (bParentLockAcquired) { status = ocfs_release_lock (osb, parentLockId, OCFS_DLM_EXCLUSIVE_LOCK, - parentLockFlags, pParentLockResource); + parentLockFlags, pParentLockResource, + (ocfs_file_entry *)pLockNode); if (status < 0) { LOG_ERROR_ARGS ("ocfs_release_lock() failed with status(0x%08x) for LockId (%u.%u)\n", @@ -2446,7 +2482,6 @@ int ocfs_change_to_cdsl (ocfs_super * os ocfs_file_entry *pNewFileEntry = NULL; ocfs_dir_node *pLockNode = NULL; ub4 length; - ub8 lockId = 0, bitmapOffset, numClustersAlloc; ocfs_lock_res *pLockResource; ub4 lockFlags = 0; @@ -2529,12 +2564,12 @@ int ocfs_change_to_cdsl (ocfs_super * os pNewFileEntry->extents[0].disk_off = (bitmapOffset * osb->vol_layout.cluster_size) + osb->vol_layout.data_start_off; - pNewFileEntry->extents[0].num_clusters = + pNewFileEntry->extents[0].num_bytes = numClustersAlloc * osb->vol_layout.cluster_size; pNewFileEntry->extents[0].file_off = 0; pNewFileEntry->alloc_size = pNewFileEntry->file_size = - pNewFileEntry->extents[0].num_clusters; + pNewFileEntry->extents[0].num_bytes; pNewFileEntry->attribs |= OCFS_ATTRIB_FILE_CDSL; @@ -2602,7 +2637,7 @@ int ocfs_change_to_cdsl (ocfs_super * os if (bAcquiredLock) { tmpstat = ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, - lockFlags, pLockResource); + lockFlags, pLockResource, pNewFileEntry); if (tmpstat < 0) { status = tmpstat; LOG_ERROR_ARGS("ocfs_release_lock failed with " \ @@ -2612,6 +2647,9 @@ int ocfs_change_to_cdsl (ocfs_super * os } } + if (pNewFileEntry) { + ocfs_release_file_entry (pNewFileEntry); + } if (buffer) { ocfs_safefree (buffer); } @@ -2685,12 +2723,12 @@ int ocfs_create_cdsl (ocfs_super * osb, fileEntry->extents[0].disk_off = (bitmapOffset * osb->vol_layout.cluster_size) + osb->vol_layout.data_start_off; - fileEntry->extents[0].num_clusters = + fileEntry->extents[0].num_bytes = numClustersAlloc * osb->vol_layout.cluster_size; fileEntry->extents[0].file_off = 0; fileEntry->alloc_size = fileEntry->file_size = - fileEntry->extents[0].num_clusters; + fileEntry->extents[0].num_bytes; /* Initialize the table with 0 */ buffer = ocfs_malloc (length); @@ -2767,7 +2805,8 @@ int ocfs_create_cdsl (ocfs_super * osb, if (bAcquiredLock) { tmpstat = ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, - lockFlags, pLockResource); + lockFlags, pLockResource, + (ocfs_file_entry *) pLockNode); if (tmpstat < 0) { LOG_ERROR_ARGS ("ocfs_release_lock() failed with status(0x%08x) " @@ -2782,7 +2821,7 @@ int ocfs_create_cdsl (ocfs_super * osb, } if (pLockNode) { - ocfs_safefree (pLockNode); + ocfs_release_file_entry ((ocfs_file_entry *) pLockNode); } if (buffer) { @@ -2798,7 +2837,7 @@ int ocfs_create_cdsl (ocfs_super * osb, * ocfs_truncate_file() * */ -int ocfs_truncate_file (ocfs_super * osb, ocfs_inode * oin, ub8 file_size) +int ocfs_truncate_file (ocfs_super * osb, ub8 file_off, ub8 file_size) { int status = 0, tmpstat; ocfs_file_entry *fe = NULL; @@ -2810,15 +2849,13 @@ int ocfs_truncate_file (ocfs_super * osb ub8 changeSeqNum = 0; bool bCacheLock = false; ocfs_dir_node *pLockNode = NULL; - struct inode *inode = NULL; + ub8 new_alloc_size; + ub4 csize = osb->vol_layout.cluster_size; LOG_ENTRY (); - OCFS_ASSERT (oin); - inode = oin->inode; - OCFS_ASSERT (inode); + new_alloc_size = OCFS_ALIGN(file_size, csize); - /* Allocate memory for fileentry */ fe = ocfs_allocate_file_entry (); if (fe == NULL) { LOG_ERROR_STR ("ocfs_allocate_file_entry failed"); @@ -2826,15 +2863,13 @@ int ocfs_truncate_file (ocfs_super * osb goto leave; } - status = ocfs_read_file_entry (osb, fe, oin->file_disk_off); + status = ocfs_read_file_entry (osb, fe, file_off); if (status < 0) { LOG_ERROR_ARGS ("ocfs_read_file_entry failed with status(%x)\n", status); goto leave; } - /* Grab a lock on the entry found if we have more than 1 extents and */ - /* also make this node the master */ lockId = fe->this_sector; lockFlags = FLAG_FILE_TRUNCATE; bFileLockAcquired = true; @@ -2845,8 +2880,7 @@ int ocfs_truncate_file (ocfs_super * osb bCacheLock = true; } - status = - ocfs_acquire_lock (osb, lockId, + status = ocfs_acquire_lock (osb, lockId, bCacheLock ? OCFS_DLM_ENABLE_CACHE_LOCK : OCFS_DLM_EXCLUSIVE_LOCK, lockFlags, &pLockResource, (ocfs_file_entry *) pLockNode); @@ -2860,9 +2894,10 @@ int ocfs_truncate_file (ocfs_super * osb bAcquiredLock = true; - status = - ocfs_free_extents_for_truncate (osb, fe, oin, osb->node_num, - oin->alloc_size); + fe->file_size = file_size; + fe->alloc_size = new_alloc_size; + + status = ocfs_free_extents_for_truncate (osb, fe); if (status < 0) { LOG_ERROR_ARGS ("ocfs_free_extents_for_truncate failed with status (%x)\n", @@ -2875,29 +2910,19 @@ int ocfs_truncate_file (ocfs_super * osb DISK_LOCK_CURRENT_MASTER (fe) = osb->node_num; } - /* no need to do OCFS_SECTOR_ALIGN once the allocation size is correct. */ DISK_LOCK_SEQNUM (fe) = changeSeqNum; - - /* Set the valid bit and reset the change bit here... TODO */ SET_VALID_BIT (fe->sync_flags); fe->sync_flags &= ~(OCFS_SYNC_FLAG_CHANGE); - OcfsQuerySystemTime (&fe->modify_time); - fe->file_size = inode->i_size; - fe->alloc_size = oin->alloc_size; - status = ocfs_write_file_entry (osb, fe, fe->this_sector); if (status < 0) { LOG_ERROR_ARGS ("ocfs_write_file_entry failed with status(%x)\n", status); } - /* Update all open oins */ - /* Our local update is done, if somebody had asked for a bdcast lock */ - /* He shd set the state */ - leave: +leave: /* Release the file lock if we acquired it */ if (bAcquiredLock) { if (bFileLockAcquired) { @@ -2906,7 +2931,7 @@ int ocfs_truncate_file (ocfs_super * osb tmpstat = ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, - lockFlags, pLockResource); + lockFlags, pLockResource, fe); if (tmpstat < 0) { // status = tmpstat ? LOG_ERROR_ARGS @@ -2916,10 +2941,8 @@ int ocfs_truncate_file (ocfs_super * osb } /* free up fileEntry */ - if (fe) { + if (fe) ocfs_release_file_entry (fe); - fe = NULL; - } LOG_EXIT_STATUS(status); return status; diff -urNp x-ref/fs/ocfs/Common/ocfsgendirnode.c x/fs/ocfs/Common/ocfsgendirnode.c --- x-ref/fs/ocfs/Common/ocfsgendirnode.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/ocfsgendirnode.c Mon Oct 21 04:41:19 2002 @@ -39,9 +39,7 @@ */ void ocfs_print_file_entry (ocfs_file_entry * fe) { - LOG_ERROR_ARGS ("This fe has name %s\n", fe->filename); - - return; + LOG_TRACE_ARGS ("This fe has name %s\n", fe->filename); } /* ocfs_print_file_entry */ /* @@ -57,20 +55,18 @@ void ocfs_print_dir_node (ocfs_super * o LOG_ERROR_STR ("This is the Root of the BTree.\n"); } - LOG_ERROR_ARGS ("signature: %s\n", DirNode->signature); + LOG_TRACE_ARGS ("signature: %s\n", DirNode->signature); - LOG_ERROR_ARGS ("node_disk_off: %u.%u\n", HI (DirNode->node_disk_off), + LOG_TRACE_ARGS ("node_disk_off: %u.%u\n", HI (DirNode->node_disk_off), LO (DirNode->node_disk_off)); - LOG_ERROR_ARGS ("num_ents: %u, num_ent_used: %u\n", DirNode->num_ents, + LOG_TRACE_ARGS ("num_ents: %u, num_ent_used: %u\n", DirNode->num_ents, DirNode->num_ent_used); for (i = 0; i < DirNode->num_ent_used; i++) { pOrigFileEntry = FILEENT (DirNode, i); ocfs_print_file_entry (pOrigFileEntry); } - - return; } /* ocfs_print_dir_node */ /* @@ -104,9 +100,16 @@ int ocfs_alloc_node_block (ocfs_super * ub4 fileId = 0; ub4 extendFileId = 0; ocfs_log_record *pOcfsLogRec = NULL; + ocfs_file_entry *fe = NULL; LOG_ENTRY (); + fe = ocfs_allocate_file_entry(); + if (fe==NULL) { + status = -ENOMEM; + goto leave; + } + #ifdef PARANOID_LOCKS ocfs_down_sem (&(osb->dir_alloc_lock), true); ocfs_down_sem (&(osb->file_alloc_lock), true); @@ -132,7 +135,7 @@ int ocfs_alloc_node_block (ocfs_super * /* Get a lock on the file */ status = ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, - FLAG_FILE_CREATE, &pLockResource, NULL); + FLAG_FILE_CREATE, &pLockResource, fe); if (status < 0) { goto leave; } @@ -147,11 +150,9 @@ int ocfs_alloc_node_block (ocfs_super * /* Read in the bitmap file for the dir alloc and look for the required */ /* space, if found */ - status = ocfs_get_system_file_size (osb, fileId, &fileSize, &allocSize); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto leave; - } + fileSize = fe->file_size; + allocSize = fe->alloc_size; + prevFileSize = fileSize; if ((fileSize != 0) && (allocSize != 0)) { @@ -215,7 +216,7 @@ int ocfs_alloc_node_block (ocfs_super * status = ocfs_extend_system_file (osb, (extendFileId), - newFileSize + extent); + newFileSize + extent, NULL); if (status < 0) { goto leave; } @@ -224,7 +225,7 @@ int ocfs_alloc_node_block (ocfs_super * bitMapSize = newFileSize / (blockSize * 8); /* Calculate the new bitmap size */ - status = ocfs_extend_system_file (osb, fileId, bitMapSize); + status = ocfs_extend_system_file (osb, fileId, bitMapSize, fe); if (status < 0) { goto leave; } @@ -233,13 +234,9 @@ int ocfs_alloc_node_block (ocfs_super * vfree (buffer); buffer = NULL; - status = - ocfs_get_system_file_size (osb, fileId, &fileSize, - &allocSize); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto leave; - } + fileSize = fe->file_size; + allocSize = fe->alloc_size; + length = OCFS_ALIGN (allocSize, osb->sect_size); length = OCFS_ALIGN (allocSize, PAGE_SIZE); @@ -257,11 +254,13 @@ int ocfs_alloc_node_block (ocfs_super * LOG_ERROR_STATUS (status); goto leave; } + printk("fileid filesize = %u.%u\n", fileSize); ocfs_initialize_bitmap (&DirAllocBitMap, (ub4 *) buffer, (ub4) (fileSize * 8)); foundBit = prevFileSize * 8; + printk("foundbit = %d\n", foundBit); // continue; } @@ -319,8 +318,9 @@ int ocfs_alloc_node_block (ocfs_super * } *file_off = (ub8) ((ub8) foundBit * (ub8) blockSize); + /* this can just fall through */ if (*file_off == 0) { - LOG_ERROR_ARGS + LOG_TRACE_ARGS ("File offset was %u.%u for type (%x) blocksize=%u foundbit=%u\n", *file_off, Type, blockSize, foundBit); } @@ -335,7 +335,7 @@ int ocfs_alloc_node_block (ocfs_super * if (bLockAcquired) { tmpstat = ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, - FLAG_FILE_CREATE, pLockResource); + FLAG_FILE_CREATE, pLockResource, fe); if (tmpstat < 0) status = tmpstat; @@ -346,6 +346,8 @@ int ocfs_alloc_node_block (ocfs_super * vfree (buffer); buffer = NULL; } + if (fe) + ocfs_release_file_entry(fe); ocfs_safefree (pOcfsLogRec); @@ -435,7 +437,8 @@ int ocfs_free_vol_block (ocfs_super * os length = OCFS_ALIGN (allocSize, OCFS_SECTOR_SIZE); length = OCFS_ALIGN (allocSize, PAGE_SIZE); - if ((buffer = ocfs_malloc (length)) == NULL) { + /* !!! vmalloc !!! */ + if ((buffer = vmalloc (length)) == NULL) { LOG_ERROR_STATUS (status = -ENOMEM); goto leave; } @@ -502,7 +505,8 @@ int ocfs_free_vol_block (ocfs_super * os ocfs_up_sem (&(osb->file_alloc_lock)); ocfs_up_sem (&(osb->dir_alloc_lock)); #endif - ocfs_safefree (buffer); + if (buffer!=NULL) + vfree (buffer); LOG_EXIT_STATUS (status); return status; @@ -529,9 +533,15 @@ int ocfs_free_node_block (ocfs_super * o ocfs_lock_res *pLockResource; ub4 fileId = 0; ub4 extendFileId = 0; + ocfs_file_entry *fe = NULL; LOG_ENTRY (); + fe = ocfs_allocate_file_entry(); + if (fe==NULL) { + status = -ENOMEM; + goto leave; + } if (Type == DISK_ALLOC_DIR_NODE) { fileId = OCFS_FILE_DIR_ALLOC_BITMAP + NodeNum; blockSize = (ub4) osb->vol_layout.dir_node_size; @@ -549,7 +559,7 @@ int ocfs_free_node_block (ocfs_super * o /* Get a lock on the file */ status = ocfs_acquire_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, - FLAG_FILE_CREATE, &pLockResource, NULL); + FLAG_FILE_CREATE, &pLockResource, fe); if (status < 0) { goto leave; } @@ -592,13 +602,15 @@ int ocfs_free_node_block (ocfs_super * o if (bLockAcquired) { status = ocfs_release_lock (osb, lockId, OCFS_DLM_EXCLUSIVE_LOCK, - FLAG_FILE_CREATE, pLockResource); + FLAG_FILE_CREATE, pLockResource, fe); if (status < 0) { /* log failure */ } bLockAcquired = false; } ocfs_safefree (buffer); + if (fe) + ocfs_release_file_entry(fe); LOG_EXIT_STATUS (0); return 0; @@ -611,7 +623,8 @@ int ocfs_free_node_block (ocfs_super * o int ocfs_free_directory_block (ocfs_super * osb, ocfs_file_entry * fe, sb4 LogNodeNum) { int status = 0; - ocfs_dir_node *PDirNode = NULL; + ocfs_file_entry *dir_hdr_fe = NULL; + ocfs_dir_node *PDirNode; ub4 size; ub4 numUpdt; ub8 currentDirNode; @@ -633,13 +646,16 @@ int ocfs_free_directory_block (ocfs_supe size = OCFS_SECTOR_SIZE; - status = - ocfs_get_file_entry (osb, (ocfs_file_entry **) (&PDirNode), - currentDirNode); - if (status < 0) { + /* !!! I hate this stuff... */ + status = ocfs_get_file_entry (osb, &dir_hdr_fe, currentDirNode); + if (status < 0 || dir_hdr_fe==NULL) { + if (status >= 0) + status = -EFAIL; LOG_ERROR_STATUS (status); goto leave; } + /* alloc a file entry, but use it as a dir node header. yeah. ok. */ + PDirNode = (ocfs_dir_node *)dir_hdr_fe; pCleanupLogRec->log_id = osb->curr_trans_id; pCleanupLogRec->log_type = LOG_FREE_BITMAP; @@ -673,9 +689,9 @@ int ocfs_free_directory_block (ocfs_supe /* LOG_FREE_BITMAP */ if (PDirNode->next_node_ptr != INVALID_NODE_POINTER) { - status = - ocfs_read_disk (osb, PDirNode, OCFS_SECTOR_SIZE, - PDirNode->next_node_ptr); + status = ocfs_read_disk (osb, PDirNode, + OCFS_SECTOR_SIZE, + PDirNode->next_node_ptr); if (status < 0) { LOG_ERROR_STATUS (status); goto leave; @@ -697,7 +713,8 @@ int ocfs_free_directory_block (ocfs_supe } } leave: - ocfs_safefree (PDirNode); + if (dir_hdr_fe!=NULL) + ocfs_release_file_entry(dir_hdr_fe); ocfs_safefree (pCleanupLogRec); LOG_EXIT_STATUS (status); @@ -708,7 +725,8 @@ int ocfs_free_directory_block (ocfs_supe * ocfs_recover_dir_node() * */ -int ocfs_recover_dir_node (ocfs_super * osb, ub8 OrigDirNodeOffset, ub8 SavedDirNodeOffset) +int ocfs_recover_dir_node (ocfs_super * osb, ub8 OrigDirNodeOffset, + ub8 SavedDirNodeOffset) { LOG_ENTRY (); @@ -720,16 +738,16 @@ int ocfs_recover_dir_node (ocfs_super * * ocfs_read_dir_node() * */ -int ocfs_read_dir_node (ocfs_super * osb, ocfs_dir_node * DirNode, ub8 NodeDiskOffset) +int ocfs_read_dir_node (ocfs_super * osb, ocfs_dir_node * DirNode, + ub8 NodeDiskOffset) { int status = 0; LOG_ENTRY (); /* Read in the Dir Node from the disk into the buffer supplied */ - status = - ocfs_read_disk (osb, DirNode, osb->vol_layout.dir_node_size, - NodeDiskOffset); + status = ocfs_read_disk (osb, DirNode, osb->vol_layout.dir_node_size, + NodeDiskOffset); if (status < 0) { LOG_ERROR_STATUS (status); } @@ -1525,6 +1543,7 @@ int ocfs_insert_file (ocfs_super * osb, LOG_ERROR_STATUS (status); goto leave; } + memset (pNewDirNode, 0, osb->vol_layout.dir_node_size); ocfs_initialize_dir_node (osb, pNewDirNode, bitmapOffset, fileOffset, osb->node_num); } diff -urNp x-ref/fs/ocfs/Common/ocfsgendlm.c x/fs/ocfs/Common/ocfsgendlm.c --- x-ref/fs/ocfs/Common/ocfsgendlm.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/ocfsgendlm.c Mon Oct 21 04:41:19 2002 @@ -33,15 +33,15 @@ * ocfs_insert_cache_link() * */ -int ocfs_insert_cache_link (ocfs_super * osb, ocfs_lock_res * LockResource) +int ocfs_insert_cache_link (ocfs_super * osb, ocfs_lock_res * lockres) { int status = 0; LOG_ENTRY (); - LockResource->in_cache_list = true; + lockres->in_cache_list = true; - list_add_tail (&(LockResource->cache_list), &(osb->cache_lock_list)); + list_add_tail (&(lockres->cache_list), &(osb->cache_lock_list)); LOG_EXIT_STATUS (status); return status; @@ -51,70 +51,54 @@ int ocfs_insert_cache_link (ocfs_super * * ocfs_update_lock_state() * */ -int ocfs_update_lock_state (ocfs_super * osb, ocfs_lock_res * LockResource, ub4 Flags) +int ocfs_update_lock_state (ocfs_super * osb, ocfs_lock_res * lockres, + ub4 flags) { ub4 votemap; int status = 0; int tmpstat; ub8 lockseqno = 0; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u)\n", osb, LockResource, Flags); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u)\n", osb, lockres, flags); - votemap = (1 << LockResource->master_node_num); + votemap = (1 << lockres->master_node_num); - status = - ocfs_prime_voting (osb, LockResource->sector_num, LockResource->lock_type, - votemap); + status = ocfs_prime_voting (osb, lockres->sector_num, + lockres->lock_type, votemap); if (status < 0) { - /* Lock up Volume... */ LOG_ERROR_STATUS (status); goto finito; } /* Call Comm layer to broadcast to all nodes alive, that this node */ /* wants exclusive access to the lock */ - status = ocfs_request_vote (osb, LockResource->sector_num, - LockResource->lock_type, Flags, votemap, - &lockseqno); + status = ocfs_request_vote (osb, lockres->sector_num, + lockres->lock_type, flags, votemap, &lockseqno); if (status < 0) { - LOG_ERROR_ARGS - ("ocfs_update_master_on_open()... ocfs_request_vote failed map " - "0x%08x.0x%08x, lockid %u.%u, status 0x%08x\n", - HI (LockResource->oin_openmap), - LO (LockResource->oin_openmap), - HI (LockResource->sector_num), - LO (LockResource->sector_num), status); + LOG_ERROR_STATUS (status); goto finito; } - status = - ocfs_wait_for_vote (osb, LockResource->sector_num, LockResource->lock_type, - Flags, votemap, 5000, /*5sec */ lockseqno, - LockResource); + status = ocfs_wait_for_vote (osb, lockres->sector_num, + lockres->lock_type, flags, votemap, 5000, + lockseqno, lockres); if (status < 0) { - LOG_ERROR_ARGS - ("ocfs_update_master_on_open()... ocfs_wait_for_vote failed map " - "0x%08x.0x%08x, lockid %u.%u, status 0x%08x\n", - HI (LockResource->oin_openmap), - LO (LockResource->oin_openmap), - HI (LockResource->sector_num), - LO (LockResource->sector_num), status); + LOG_ERROR_STATUS (status); goto finito; } /* Change the lock id on the disk and on the resource */ - if (Flags & FLAG_CHANGE_MASTER) { - LockResource->master_node_num = osb->node_num; + if (flags & FLAG_CHANGE_MASTER) { + lockres->master_node_num = osb->node_num; } /* Lock state should transition from no lock */ finito: - tmpstat = ocfs_reset_voting (osb, LockResource->sector_num, - LockResource->lock_type, votemap); + tmpstat = ocfs_reset_voting (osb, lockres->sector_num, + lockres->lock_type, votemap); if (tmpstat < 0) { - status = tmpstat; - LOG_ERROR_STATUS (status); + LOG_ERROR_STATUS (status = tmpstat); } LOG_EXIT_STATUS (status); @@ -125,9 +109,8 @@ int ocfs_update_lock_state (ocfs_super * * ocfs_disk_request_vote() * */ -int ocfs_disk_request_vote (ocfs_super * osb, - ub8 LockId, - ub4 LockType, ub4 Flags, ub8 VoteMap, ub8 * LockSeqNo) +int ocfs_disk_request_vote (ocfs_super * osb, ub8 lock_id, ub4 lock_type, + ub4 flags, ub8 vote_map, ub8 * lock_seq_num) { int status = 0; ub8 offset = 0; @@ -139,78 +122,55 @@ int ocfs_disk_request_vote (ocfs_super * ocfs_publish *pubsect = NULL; ub8 largestseqno = 0; ub8 pubmap = 0; + ub1 *p; LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x.0x%08x, 0x%08x)\n", osb, - HI (LockId), LO (LockId), LockType, Flags, - HI (VoteMap), LO (VoteMap), LockSeqNo); + HI (lock_id), LO (lock_id), lock_type, flags, + HI (vote_map), LO (vote_map), lock_seq_num); pubmap = osb->publ_map; - offset = osb->vol_layout.publ_sect_off; - - numnodes = osb->num_cfg_nodes; - - OCFS_ASSERT (numnodes); - - /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - * !! kch - This seems like a bug to me. !! - * !! What if node #2 of 3 drops out? !! - * !! numnodes will be 2 but node #3 !! - * !! will still be writing to the third !! - * !! slot, right?!?! !! - * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ + numnodes = OCFS_MAXIMUM_NODES; size = (numnodes * osb->sect_size); - buffer = ocfs_malloc (size); - if (buffer == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto finally; - } - /* Read the Publish Sector of all nodes */ - status = ocfs_read_disk (osb, buffer, size, offset); + status = ocfs_read_disk_ex (osb, (void **)&buffer, size, size, offset); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - for (i = 0; i < numnodes; i++) { - /* for the time being the delay is for 1 sec */ - pubsect = - (ocfs_publish *) ((ub1 *) buffer + (i * OCFS_SECTOR_SIZE)); - if (pubsect->seq_num > largestseqno) { - largestseqno = pubsect->seq_num; - - if (pubsect->dirty) { - if (!IS_NODE_ALIVE (pubmap, i, numnodes)) { - LOG_TRACE_ARGS - ("Calling ocfs_recover_vol for NodeNum (%u)\n", - i); - /* Add recovery code here */ - ocfs_recover_vol (osb, i); - } else { - ocfs_sleep (500); /* 500 ms */ - } - status = -EAGAIN; - goto finally; - } + for (i = 0, p = buffer; i < numnodes; i++, p += osb->sect_size) { + pubsect = (ocfs_publish *) p; + if (pubsect->time == (ub8) 0) + continue; + if (pubsect->publ_seq_num <= largestseqno) + continue; + largestseqno = pubsect->publ_seq_num; + if (pubsect->dirty) { + if (!IS_NODE_ALIVE (pubmap, i, numnodes)) { + LOG_TRACE_ARGS ("ocfs_recover_vol(%u)\n", i); + ocfs_recover_vol (osb, i); + } else + ocfs_sleep (500); + status = -EAGAIN; + goto finally; } } - /* Increment the largest sequence number by one & */ - /* write it in its own Publish Sector and set the Dirty Bit */ - + /* Increment the largest sequence number by one & */ + /* write it in its own Publish Sector and set the Dirty Bit */ pubsect = (ocfs_publish *) (buffer + (osb->node_num * osb->sect_size)); largestseqno++; - pubsect->seq_num = largestseqno; + pubsect->publ_seq_num = largestseqno; pubsect->dirty = 1; pubsect->vote = FLAG_VOTE_NODE; - pubsect->vote_map = VoteMap; - pubsect->vote_type = Flags; - pubsect->dir_ent = LockId; + pubsect->vote_map = vote_map; + pubsect->vote_type = flags; + pubsect->dir_ent = lock_id; pub_off = osb->vol_layout.publ_sect_off + - (osb->node_num * osb->sect_size); + (osb->node_num * osb->sect_size); LOG_TRACE_ARGS ("Writing Publish Sector (%d)\n", pubsect->vote); @@ -220,7 +180,7 @@ int ocfs_disk_request_vote (ocfs_super * goto finally; } - *LockSeqNo = largestseqno; + *lock_seq_num = largestseqno; finally: ocfs_safefree (buffer); @@ -233,36 +193,34 @@ int ocfs_disk_request_vote (ocfs_super * * ocfs_wait_for_disk_lock_release() * * @osb: ocfs super block for the volume - * @Offset: - * @TimeToWait: - * @LockType: lowest level to which a lock must deprecate for us to break out. + * @offset: + * @time_to_wait: + * @lock_type: lowest level to which a lock must deprecate for us to break out. * * Returns 0 of success, < 0 if error. */ -int ocfs_wait_for_disk_lock_release (ocfs_super * osb, - ub8 Offset, ub4 TimeToWait, ub4 LockType) +int ocfs_wait_for_disk_lock_release (ocfs_super * osb, ub8 offset, + ub4 time_to_wait, ub4 lock_type) { int status = -ETIMEDOUT; int tmpstat = -ETIMEDOUT; ub4 timewaited = 0; - ocfs_file_entry *fileentry = NULL; + ocfs_file_entry *fe = NULL; LOG_ENTRY (); /* Create a sepearate thread which should set the event of the */ /* resource after N retries. */ - fileentry = ocfs_allocate_file_entry (); - if (fileentry == NULL) { + fe = ocfs_allocate_file_entry (); + if (fe == NULL) { LOG_ERROR_STATUS (status = -ENOMEM); goto finally; } - while (TimeToWait > timewaited) { - memset (fileentry, 0, sizeof (ocfs_file_entry)); - - tmpstat = - ocfs_read_force_disk (osb, fileentry, osb->sect_size, Offset); + while (time_to_wait > timewaited) { + memset (fe, 0, sizeof (ocfs_file_entry)); + tmpstat = ocfs_read_force_disk (osb, fe, osb->sect_size, offset); if (tmpstat < 0) { LOG_ERROR_STATUS (status = tmpstat); goto finally; @@ -270,54 +228,39 @@ int ocfs_wait_for_disk_lock_release (ocf /* This will always be zero when the first Node comes up after reboot */ /* (for volume lock) */ - if ((DISK_LOCK_CURRENT_MASTER (fileentry) == - OCFS_INVALID_NODE_NUM) - || (DISK_LOCK_CURRENT_MASTER (fileentry) == osb->node_num)) { - /* Lock was released */ + if ((DISK_LOCK_CURRENT_MASTER (fe) == OCFS_INVALID_NODE_NUM) || + (DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num)) { status = 0; goto finally; } - if (!IS_NODE_ALIVE - (osb->publ_map, DISK_LOCK_CURRENT_MASTER (fileentry), - OCFS_MAXIMUM_NODES)) { - /* The node holding the lock is dead do recovery... */ - LOG_TRACE_ARGS - ("Calling ocfs_recover_vol for NodeNum (%d)", - DISK_LOCK_CURRENT_MASTER (fileentry)); - -// ocfs_recover_vol(osb, DISK_LOCK_CURRENT_MASTER(pFileEntry)); + if (!IS_NODE_ALIVE (osb->publ_map, + DISK_LOCK_CURRENT_MASTER (fe), + OCFS_MAXIMUM_NODES)) { +// LOG_TRACE_ARGS ("ocfs_recover_vol(%u)\n", +// DISK_LOCK_CURRENT_MASTER (fe)); +// ocfs_recover_vol(osb, DISK_LOCK_CURRENT_MASTER(fe)); /* Reset the lock as not owned and return success?? */ /* This needs to be under some sort of cluster wide lock */ - - DISK_LOCK_CURRENT_MASTER (fileentry) = - OCFS_INVALID_NODE_NUM; - DISK_LOCK_FILE_LOCK (fileentry) = OCFS_DLM_NO_LOCK; + DISK_LOCK_CURRENT_MASTER (fe) = OCFS_INVALID_NODE_NUM; + DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK; status = 0; goto finally; } - LOG_TRACE_ARGS - ("Master node is (%d) for lockid %u.%u, lock state " - "is (%d)\n", DISK_LOCK_CURRENT_MASTER (fileentry), - HI (Offset), LO (Offset), DISK_LOCK_FILE_LOCK (fileentry)); - /* If we are here in the code it means the local node is not the master */ - - if (DISK_LOCK_FILE_LOCK (fileentry) <= LockType) { - /* Lock was released */ + if (DISK_LOCK_FILE_LOCK (fe) <= lock_type) { status = 0; goto finally; } else - ocfs_sleep (200); /* in ms */ - + ocfs_sleep (200); timewaited += 200; } finally: - if (fileentry != NULL) - ocfs_release_file_entry (fileentry); + if (fe != NULL) + ocfs_release_file_entry (fe); LOG_EXIT_STATUS (status); return status; @@ -327,130 +270,88 @@ int ocfs_wait_for_disk_lock_release (ocf * ocfs_wait_for_lock_release() * */ -int ocfs_wait_for_lock_release (ocfs_super * osb, - ub8 Offset, - ub4 TimeToWait, ocfs_lock_res * LockResource, ub4 LockType) -{ /* This is the lowest level to which a */ - /* lock must deprecate for us to break out. */ +int ocfs_wait_for_lock_release (ocfs_super * osb, ub8 offset, ub4 time_to_wait, + ocfs_lock_res * lockres, ub4 lock_type) +{ int status = -ETIMEDOUT; int tmpstat = -ETIMEDOUT; ub4 timewaited = 0; - ocfs_file_entry *fileentry = NULL; + ocfs_file_entry *fe = NULL; ub4 length = 0; LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, 0x%08x, %u)\n", osb, - HI (Offset), LO (Offset), TimeToWait, - LockResource, LockType); + HI (offset), LO (offset), time_to_wait, + lockres, lock_type); - /* Create a sepearate thread which should set the event of the */ - /* resource after N retries */ - fileentry = ocfs_allocate_file_entry (); - if (fileentry == NULL) { + fe = ocfs_allocate_file_entry (); + if (fe == NULL) { LOG_ERROR_STATUS (status = -ENOMEM); goto finally; } - /* For the time being the delay is for 1 second */ - while (TimeToWait > timewaited) { - memset (fileentry, 0, sizeof (ocfs_file_entry)); + while (time_to_wait > timewaited) { + memset (fe, 0, sizeof (ocfs_file_entry)); length = osb->sect_size; - - tmpstat = ocfs_read_disk (osb, fileentry, length, Offset); + tmpstat = ocfs_read_disk (osb, fe, length, offset); if (tmpstat < 0) { LOG_ERROR_STATUS (status = tmpstat); goto finally; } - /* ?? this will always be zero when the first Node comes up */ - /* after reboot ( for volume lock) */ - if ((DISK_LOCK_CURRENT_MASTER (fileentry) == - OCFS_INVALID_NODE_NUM) - || (DISK_LOCK_CURRENT_MASTER (fileentry) == osb->node_num)) { - /* Lock was released */ + if ((DISK_LOCK_CURRENT_MASTER (fe) == OCFS_INVALID_NODE_NUM) || + (DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num)) { status = 0; goto finally; } - if (!IS_NODE_ALIVE - (osb->publ_map, DISK_LOCK_CURRENT_MASTER (fileentry), - OCFS_MAXIMUM_NODES)) { - /* The node holding the lock is dead do recovery... ** TODO: */ - - LOG_ERROR_ARGS - ("ocfs_wait_for_lock_release() calling ocfs_recover_vol for " - "NodeNum (%d)\n", - DISK_LOCK_CURRENT_MASTER (fileentry)); - -// ocfs_recover_vol(osb, DISK_LOCK_CURRENT_MASTER(pFileEntry)); + if (!IS_NODE_ALIVE (osb->publ_map, DISK_LOCK_CURRENT_MASTER(fe), + OCFS_MAXIMUM_NODES)) { +// LOG_ERROR_ARGS ("ocfs_recover_vol(%u)\n", +// DISK_LOCK_CURRENT_MASTER (fe)); +// ocfs_recover_vol(osb, DISK_LOCK_CURRENT_MASTER(fe)); /* Reset the lock as not owned and return success?? */ /* This needs to be under some sort of cluster wide lock, */ - - DISK_LOCK_CURRENT_MASTER (fileentry) = - OCFS_INVALID_NODE_NUM; - DISK_LOCK_FILE_LOCK (fileentry) = OCFS_DLM_NO_LOCK; - + DISK_LOCK_CURRENT_MASTER (fe) = OCFS_INVALID_NODE_NUM; + DISK_LOCK_FILE_LOCK (fe) = OCFS_DLM_NO_LOCK; status = 0; goto finally; } - LOG_TRACE_ARGS ("Master Node is (%d) for Lock 0x%08x.0x%08x, " - "Lock State is (%u)\n", - DISK_LOCK_CURRENT_MASTER (fileentry), - HI (Offset), LO (Offset), - DISK_LOCK_FILE_LOCK (fileentry)); - - /* The Local node is not the master */ - - if (DISK_LOCK_FILE_LOCK (fileentry) >= - OCFS_DLM_ENABLE_CACHE_LOCK) { - LockResource->lock_type = - DISK_LOCK_FILE_LOCK (fileentry); - LockResource->master_node_num = - DISK_LOCK_CURRENT_MASTER (fileentry); - - LOG_TRACE_STR ("Calling ocfs_break_cache_lock"); - - status = ocfs_break_cache_lock (osb, LockResource); + /* The local node is not the master */ + if (DISK_LOCK_FILE_LOCK (fe) >= OCFS_DLM_ENABLE_CACHE_LOCK) { + lockres->lock_type = DISK_LOCK_FILE_LOCK (fe); + lockres->master_node_num = DISK_LOCK_CURRENT_MASTER (fe); + status = ocfs_break_cache_lock (osb, lockres); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - - /* if we succeeded, make sure the file entry shows the change */ - DISK_LOCK_FILE_LOCK (fileentry) = - LockResource->lock_type; - DISK_LOCK_CURRENT_MASTER (fileentry) = - LockResource->master_node_num; + DISK_LOCK_FILE_LOCK (fe) = lockres->lock_type; + DISK_LOCK_CURRENT_MASTER (fe) = lockres->master_node_num; } - if (DISK_LOCK_FILE_LOCK (fileentry) <= LockType) { - /* Lock was released */ + if (DISK_LOCK_FILE_LOCK (fe) <= lock_type) { status = 0; goto finally; } else { - ocfs_sleep (200); /* 200 ms */ + ocfs_sleep (200); } timewaited += 200; } finally: - if (LockResource && status >= 0) { - ocfs_acquire_lockres (LockResource); - - LockResource->lock_type = DISK_LOCK_FILE_LOCK (fileentry); - LockResource->master_node_num = - DISK_LOCK_CURRENT_MASTER (fileentry); - LockResource->oin_openmap = DISK_LOCK_OIN_MAP (fileentry); - - /* ?? shld we keep two times in lockresource also,or just the greater */ - /* of two */ - LockResource->last_lock_upd = DISK_LOCK_LAST_WRITE (fileentry); - ocfs_release_lockres (LockResource); + if (lockres && status >= 0) { + ocfs_acquire_lockres (lockres); + lockres->lock_type = DISK_LOCK_FILE_LOCK (fe); + lockres->master_node_num = DISK_LOCK_CURRENT_MASTER (fe); + lockres->oin_openmap = DISK_LOCK_OIN_MAP (fe); + lockres->last_lock_upd = DISK_LOCK_LAST_WRITE (fe); + ocfs_release_lockres (lockres); } - if (fileentry != NULL) - ocfs_release_file_entry (fileentry); + if (fe != NULL) + ocfs_release_file_entry (fe); LOG_EXIT_STATUS (status); return status; @@ -460,105 +361,73 @@ int ocfs_wait_for_lock_release (ocfs_sup * ocfs_get_vote_on_disk() * */ -int ocfs_get_vote_on_disk (ocfs_super * osb, - ub8 LockId, - ub4 LockType, - ub4 Flags, - ub8 * GotVoteMap, ub8 VoteMap, ub8 LockSeqNum, - ub8 * oin_open_map) +int ocfs_get_vote_on_disk (ocfs_super * osb, ub8 lock_id, ub4 lock_type, + ub4 flags, ub8 * got_vote_map, ub8 vote_map, + ub8 lock_seq_num, ub8 * oin_open_map) { int status = 0; ub4 length = 0; ub4 i; ub4 numnodes; ub1 *buffer = NULL; + ocfs_vote *vote; + ub1 *p; - LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x, 0x%08x.0x%08x, " - "%u.%u, 0x%08x)\n", osb, HI (LockId), LO (LockId), - LockType, Flags, GotVoteMap, HI (VoteMap), LO (VoteMap), - HI (LockSeqNum), LO (LockSeqNum), oin_open_map); + LOG_ENTRY_ARGS ("lockid=%u.%u, locktype=%u, votemap=0x%08x)\n", + HI (lock_id), LO (lock_id), lock_type, LO (vote_map)); - /* Can be number of configured nodes, active nodesif node numbers */ - /* are contiguous */ numnodes = OCFS_MAXIMUM_NODES; - /* TODO Is this still a valid assumption the other node could be */ - /* arbitrating a different entry we shd retry in that case */ - /* Read the vote sectors of all the nodes */ length = numnodes * osb->sect_size; - buffer = ocfs_malloc (length); - if (buffer == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto finally; - } - - status = ocfs_read_disk (osb, buffer, length, - osb->vol_layout.vote_sect_off); + status = ocfs_read_disk_ex (osb, (void **)&buffer, length, length, + osb->vol_layout.vote_sect_off); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - /* Change here for the bitmap */ - for (i = 0; i < numnodes; i++) { - ocfs_vote *VoteSect; - - VoteSect = (ocfs_vote *) (buffer + (i * osb->sect_size)); - - /* - ** Don't check the snapshot of publ_map from ocfs_nm_thread. - ** Instead of this we should kick in NM thread and get the - ** latest publ_map. - */ - if ((IS_NODE_ALIVE (VoteMap, i, numnodes)) && + for (i = 0, p = buffer; i < numnodes; i++, p += osb->sect_size) { + vote = (ocfs_vote *) p; + + /* A node we were asking to vote is dead */ + if ((IS_NODE_ALIVE (vote_map, i, numnodes)) && !(IS_NODE_ALIVE (osb->publ_map, i, numnodes))) { - /* A node we were asking to vote is dead */ - if (Flags & FLAG_FILE_UPDATE_OIN) { - (*GotVoteMap) |= 1 << i; + if (flags & FLAG_FILE_UPDATE_OIN) { + (*got_vote_map) |= 1 << i; } else { - /* - ** These are essentially the cases where we were either making - ** a master node or changing/updating the master node and - ** it died midway... - */ status = -EAGAIN; goto finally; } } - if ((IS_NODE_ALIVE (VoteMap, i, numnodes)) && - (IS_NODE_ALIVE (osb->publ_map, i, numnodes)) && - (VoteSect->seq_num == LockSeqNum) - && (VoteSect->dir_ent == LockId)) { - if (VoteSect->vote[osb->node_num] == FLAG_VOTE_NODE) { - (*GotVoteMap) |= 1 << i; - if ((Flags & FLAG_FILE_EXTEND) - || (Flags & FLAG_FILE_UPDATE)) { - (*oin_open_map) |= - (VoteSect->open_handle << i); - } - } else if (VoteSect->vote[osb->node_num] == - FLAG_VOTE_OIN_ALREADY_INUSE) { - (*GotVoteMap) |= 1 << i; - status = -EFAIL; + if (!IS_NODE_ALIVE (vote_map, i, numnodes) || + !IS_NODE_ALIVE (osb->publ_map, i, numnodes) || + vote->vote_seq_num != lock_seq_num || + vote->dir_ent != lock_id) + continue; - if (Flags & FLAG_FILE_DELETE) { - status = -EBUSY; - } - goto finally; - } else if (VoteSect->vote[osb->node_num] == - FLAG_VOTE_OIN_UPDATED) { - (*GotVoteMap) |= 1 << i; - } else if (VoteSect->vote[osb->node_num] == - FLAG_VOTE_UPDATE_RETRY) { - status = -EAGAIN; - goto finally; - } else if (VoteSect->vote[osb->node_num] == - FLAG_VOTE_FILE_DEL) { - status = -ENOENT; - goto finally; + /* A node we were asking to vote is alive */ + if (vote->vote[osb->node_num] == FLAG_VOTE_NODE) { + (*got_vote_map) |= 1 << i; + if (flags & FLAG_FILE_EXTEND || flags & FLAG_FILE_UPDATE) { + (*oin_open_map) |= (vote->open_handle << i); + } + } else if (vote->vote[osb->node_num] == FLAG_VOTE_OIN_ALREADY_INUSE) { + (*got_vote_map) |= 1 << i; + status = -EFAIL; + if (flags & FLAG_FILE_DELETE) { + status = -EBUSY; } + goto finally; + } else if (vote->vote[osb->node_num] == FLAG_VOTE_OIN_UPDATED) { + (*got_vote_map) |= 1 << i; + } else if (vote->vote[osb->node_num] == FLAG_VOTE_UPDATE_RETRY) { + status = -EAGAIN; + goto finally; + } else if (vote->vote[osb->node_num] == FLAG_VOTE_FILE_DEL) { + status = -ENOENT; + goto finally; } } @@ -573,26 +442,20 @@ int ocfs_get_vote_on_disk (ocfs_super * * ocfs_disk_reset_voting() * */ -int ocfs_disk_reset_voting (ocfs_super * osb, ub8 LockId, ub4 LockType) +int ocfs_disk_reset_voting (ocfs_super * osb, ub8 lock_id, ub4 lock_type) { int status = 0; ocfs_publish *pubsect = NULL; ub8 offset = 0; - LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u)\n", osb, HI (LockId), LO (LockId), - LockType); - - pubsect = ocfs_malloc (osb->sect_size); - if (pubsect == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto finally; - } + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u)\n", osb, HI (lock_id), + LO (lock_id), lock_type); + /* Read node's publish sector */ offset = osb->vol_layout.publ_sect_off + - (osb->node_num * osb->sect_size); - - /* Read Disk */ - status = ocfs_read_disk (osb, pubsect, osb->sect_size, offset); + (osb->node_num * osb->sect_size); + status = ocfs_read_disk_ex (osb, (void *)&pubsect, osb->sect_size, + osb->sect_size, offset); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -602,7 +465,7 @@ int ocfs_disk_reset_voting (ocfs_super * pubsect->vote = 0; pubsect->vote_type = 0; - /* Write Disk */ + /* Write it back */ status = ocfs_write_disk (osb, pubsect, osb->sect_size, offset); if (status < 0) { LOG_ERROR_STATUS (status); @@ -620,68 +483,41 @@ int ocfs_disk_reset_voting (ocfs_super * * ocfs_wait_for_vote() * */ -int ocfs_wait_for_vote (ocfs_super * osb, - ub8 LockId, - ub4 LockType, - ub4 Flags, - ub8 VoteMap, - ub4 TimeToWait, ub8 LockSeqNum, ocfs_lock_res * LockResource) +int ocfs_wait_for_vote (ocfs_super * osb, ub8 lock_id, ub4 lock_type, ub4 flags, + ub8 vote_map, ub4 time_to_wait, ub8 lock_seq_num, + ocfs_lock_res * lockres) { int status = -EAGAIN; ub4 timewaited = 0; ub8 gotvotemap = 0; ub8 fileopenmap = 0; - LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x.0x%08x, %u, " - "%u.%u, 0x%08x)\n", osb, HI (LockId), LO (LockId), - LockType, Flags, HI (VoteMap), LO (VoteMap), - TimeToWait, HI (LockSeqNum), LO (LockSeqNum), - LockResource); + LOG_ENTRY_ARGS ("lockid=%u.%u, locktype=%u, votemap=0x%08x)\n", + HI (lock_id), LO (lock_id), lock_type, LO (vote_map)); #define WAIT_FOR_VOTE_INCREMENT 200 - /* Create a sepearate thread which should set the event of the */ - /* resource after N retries. */ - while (TimeToWait > timewaited) { + while (time_to_wait > timewaited) { ocfs_sleep (WAIT_FOR_VOTE_INCREMENT); gotvotemap |= (1 << osb->node_num); - /* GetVoteOnComm */ - status = - ocfs_get_vote_on_disk (osb, LockId, LockType, Flags, &gotvotemap, - VoteMap, LockSeqNum, &fileopenmap); + status = ocfs_get_vote_on_disk (osb, lock_id, lock_type, flags, + &gotvotemap, vote_map, lock_seq_num, &fileopenmap); if (status < 0) { - LOG_ERROR_ARGS - ("ocfs_get_vote_on_disk failed with gotmap 0x%08x.0x%08x, " - "lockid %u.%u votemap 0x%08x.0x%08x\n", - HI (gotvotemap), LO (gotvotemap), HI (LockId), - LO (LockId), HI (VoteMap), LO (VoteMap)); + LOG_ERROR_STATUS (status); goto bail; } if (!(gotvotemap & (1 << osb->node_num))) { /* We need to bail out... */ - LOG_ERROR_ARGS - ("ocfs_get_vote_on_disk failed with gotmap did not have " - "this node num RETRY 0x%08x.0x%08x, lockid %u.%u, " - "votemap 0x%08x.0x%08x\n", HI (gotvotemap), - LO (gotvotemap), HI (LockId), LO (LockId), - HI (VoteMap), LO (VoteMap)); - status = -EAGAIN; + LOG_ERROR_STATUS (status = -EAGAIN); goto bail; } - VoteMap |= (1 << osb->node_num); - if (VoteMap == gotvotemap) { - if ((Flags & FLAG_FILE_EXTEND) - || (Flags & FLAG_FILE_UPDATE)) - LockResource->oin_openmap = fileopenmap; - - LOG_TRACE_ARGS ("ocfs_get_vote_on_disk SUCCESS 0x%08x.0x%08x, " - "lockid %u.%u, fileOpenMap 0x%08x.0x%08x\n", - HI (gotvotemap), LO (gotvotemap), - HI (LockId), LO (LockId), - HI (fileopenmap), LO (fileopenmap)); + vote_map |= (1 << osb->node_num); + if (vote_map == gotvotemap) { + if ((flags & FLAG_FILE_EXTEND) || (flags & FLAG_FILE_UPDATE)) + lockres->oin_openmap = fileopenmap; status = 0; goto bail; } @@ -697,31 +533,23 @@ int ocfs_wait_for_vote (ocfs_super * osb * ocfs_prime_voting() * */ -int ocfs_prime_voting (ocfs_super * osb, ub8 LockId, ub4 LockType, ub8 VoteMap) +int ocfs_prime_voting (ocfs_super * osb, ub8 lock_id, ub4 lock_type, ub8 vote_map) { - int status = 0; - - LOG_ENTRY (); - - LOG_EXIT_STATUS (status); - return status; + return 0; } /* ocfs_prime_voting */ /* * ocfs_reset_voting() * */ -int ocfs_reset_voting (ocfs_super * osb, ub8 LockId, ub4 LockType, ub8 VoteMap) +int ocfs_reset_voting (ocfs_super * osb, ub8 lock_id, ub4 lock_type, + ub8 vote_map) { int status; - LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, 0x%08x.0x%08x)\n", osb, - HI (LockId), LO (LockId), LockType, HI (VoteMap), - LO (VoteMap)); - - /* COMM reset VOTING */ + LOG_ENTRY (); - status = ocfs_disk_reset_voting (osb, LockId, LockType); + status = ocfs_disk_reset_voting (osb, lock_id, lock_type); LOG_EXIT_STATUS (status); return status; @@ -731,17 +559,15 @@ int ocfs_reset_voting (ocfs_super * osb, * ocfs_request_vote() * */ -int ocfs_request_vote (ocfs_super * osb, - ub8 LockId, ub4 LockType, ub4 Flags, ub8 VoteMap, ub8 * LockSeqNo) +int ocfs_request_vote (ocfs_super * osb, ub8 lock_id, ub4 lock_type, ub4 flags, + ub8 vote_map, ub8 * lock_seq_num) { int status; - LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x.0x%08x, 0x%08x)\n", osb, - HI (LockId), LO (LockId), LockType, Flags, - HI (VoteMap), LO (VoteMap), LockSeqNo); + LOG_ENTRY (); - status = - ocfs_disk_request_vote (osb, LockId, LockType, Flags, VoteMap, LockSeqNo); + status = ocfs_disk_request_vote (osb, lock_id, lock_type, flags, + vote_map, lock_seq_num); LOG_EXIT_STATUS (status); return status; @@ -751,9 +577,8 @@ int ocfs_request_vote (ocfs_super * osb, * ocfs_comm_request_vote() * */ -int ocfs_comm_request_vote (ocfs_super * osb, - ub8 LockId, - ub4 LockType, ub4 Flags, ocfs_file_entry * FileEntry) +int ocfs_comm_request_vote (ocfs_super * osb, ub8 lock_id, ub4 lock_type, + ub4 flags, ocfs_file_entry * fe) { ub8 votemap; ocfs_lock_res *lockres; @@ -761,16 +586,15 @@ int ocfs_comm_request_vote (ocfs_super * LOG_ENTRY (); - status = ocfs_lookup_sector_node (osb, LockId, &lockres); + status = ocfs_lookup_sector_node (osb, lock_id, &lockres); if (status < 0) { LOG_ERROR_STATUS (status); goto bail; } - if (((Flags & FLAG_FILE_DELETE) || (Flags & FLAG_FILE_RENAME)) && - (!(Flags & FLAG_DIR)) && - (DISK_LOCK_CURRENT_MASTER (FileEntry) == osb->node_num)) - votemap = DISK_LOCK_OIN_MAP (FileEntry); + if (((flags & FLAG_FILE_DELETE) || (flags & FLAG_FILE_RENAME)) && + (!(flags & FLAG_DIR)) && (DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num)) + votemap = DISK_LOCK_OIN_MAP (fe); else votemap = osb->publ_map; @@ -778,23 +602,19 @@ int ocfs_comm_request_vote (ocfs_super * status = 0; if (votemap != 0) - status = - ocfs_send_dlm_request_msg (osb, LockId, LockType, Flags, - lockres, votemap, - OCFS_DISK_VOTE_REQUEST); + status = ocfs_send_dlm_request_msg (osb, lock_id, lock_type, + flags, lockres, votemap, OCFS_DISK_VOTE_REQUEST); if (status == -ETIMEDOUT) { LOG_TRACE_STR ("IPC voting timed out"); status = -EFAIL; goto bail; } else { - LOG_TRACE_STR ("Got vote on comm in 1sec, what to do now?!?!"); if (lockres->vote_status >= 0) { - lockres->lock_type = (ub1) LockType; + lockres->lock_type = (ub1) lock_type; lockres->master_node_num = osb->node_num; status = ocfs_update_disk_lock (osb, lockres, - DLOCK_FLAG_MASTER | - DLOCK_FLAG_LOCK, FileEntry); + DLOCK_FLAG_MASTER | DLOCK_FLAG_LOCK, fe); if (status < 0) { LOG_ERROR_STATUS (status); goto bail; @@ -813,16 +633,16 @@ int ocfs_comm_request_vote (ocfs_super * * ocfs_init_dlm_msg() * */ -void ocfs_init_dlm_msg (ocfs_super * osb, ocfs_dlm_msg * DlmMesg, ub4 MsgSize) +void ocfs_init_dlm_msg (ocfs_super * osb, ocfs_dlm_msg * dlm_msg, ub4 msg_len) { LOG_ENTRY (); - DlmMesg->magic = OCFS_DLM_MSG_MAGIC; - DlmMesg->msg_len = MsgSize; + dlm_msg->magic = OCFS_DLM_MSG_MAGIC; + dlm_msg->msg_len = msg_len; - memcpy (DlmMesg->vol_id, osb->vol_layout.id, MAX_VOL_ID_LENGTH); + memcpy (dlm_msg->vol_id, osb->vol_layout.vol_id, MAX_VOL_ID_LENGTH); - DlmMesg->src_node = osb->node_num; + dlm_msg->src_node = osb->node_num; LOG_EXIT (); return; @@ -832,75 +652,60 @@ void ocfs_init_dlm_msg (ocfs_super * osb * ocfs_send_dlm_request_msg() * */ -int ocfs_send_dlm_request_msg (ocfs_super * osb, - ub8 LockId, - ub4 LockType, - ub4 Flags, - ocfs_lock_res * LockResource, ub8 VoteMap, ub4 MesgType) +int ocfs_send_dlm_request_msg (ocfs_super * osb, ub8 lock_id, ub4 lock_type, + ub4 flags, ocfs_lock_res * lockres, + ub8 vote_map, ub4 msg_type) { int status = 0; - ocfs_dlm_msg *DlmMesg = NULL; - ub4 msgSize; - bool UpdateDisk = true; + ocfs_dlm_msg *dlm_msg = NULL; + ub4 msg_len; + bool update_disk = true; //SM ???? LOG_ENTRY (); - msgSize = sizeof (ocfs_dlm_msg) - 1 + sizeof (ocfs_dlm_req_master); + msg_len = sizeof (ocfs_dlm_msg) - 1 + sizeof (ocfs_dlm_req_master); - DlmMesg = ocfs_malloc (msgSize); - if (DlmMesg == NULL) { + dlm_msg = ocfs_malloc (msg_len); + if (dlm_msg == NULL) { LOG_ERROR_STATUS (status = -ENOMEM); goto finally; } - /* If the lock resource has a master this is a simple request */ - /* to the master */ - ocfs_acquire_lockres (LockResource); - - if (LockResource->voted_event == NULL) { - LockResource->voted_event = - ocfs_malloc (sizeof (wait_queue_head_t)); - if (LockResource->voted_event == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto finally; - } - } - - init_waitqueue_head (LockResource->voted_event); - if (LockResource->master_node_num == -1) { - UpdateDisk = true; - LockResource->lock_state |= LOCK_STATE_IN_VOTING; - LockResource->req_vote_map = VoteMap; - LockResource->got_vote_map = 0; + ocfs_acquire_lockres (lockres); + if (lockres->master_node_num == OCFS_INVALID_NODE_NUM) { + update_disk = true; + lockres->lock_state |= LOCK_STATE_IN_VOTING; + lockres->req_vote_map = vote_map; + lockres->got_vote_map = 0; } + lockres->lock_state |= LOCK_STATE_IN_VOTING; + ocfs_release_lockres (lockres); - LockResource->lock_state |= LOCK_STATE_IN_VOTING; - ocfs_release_lockres (LockResource); - - ocfs_init_dlm_msg (osb, DlmMesg, msgSize); + ocfs_init_dlm_msg (osb, dlm_msg, msg_len); - DlmMesg->msg_type = MesgType; + dlm_msg->msg_type = msg_type; - if (MesgType == OCFS_REQUEST_MAKE_MASTER) { + if (msg_type == OCFS_REQUEST_MAKE_MASTER) { ocfs_dlm_req_master *req = - (ocfs_dlm_req_master *) DlmMesg->msg_buf; - req->lock_id = LockId; - req->flags = Flags; - req->lock_seq_num = LockResource->last_upd_seq_num; - } else if (MesgType == OCFS_DISK_VOTE_REQUEST) { + (ocfs_dlm_req_master *) dlm_msg->msg_buf; + req->lock_id = lock_id; + req->flags = flags; + req->lock_seq_num = lockres->last_upd_seq_num; + } else if (msg_type == OCFS_DISK_VOTE_REQUEST) { ocfs_dlm_disk_vote_req *req = - (ocfs_dlm_disk_vote_req *) DlmMesg->msg_buf; - req->lock_id = LockId; - req->flags = Flags; - req->lock_seq_num = LockResource->last_upd_seq_num; + (ocfs_dlm_disk_vote_req *) dlm_msg->msg_buf; + req->lock_id = lock_id; + req->flags = flags; + req->lock_seq_num = lockres->last_upd_seq_num; } - ocfs_send_bcast (osb, VoteMap, DlmMesg); - status = ocfs_wait (LockResource->voted_event, false, 1000); + ocfs_send_bcast (osb, vote_map, dlm_msg); + status = ocfs_wait (lockres->voted_event, + atomic_read (&lockres->voted_event_woken), 1000); + atomic_set (&lockres->voted_event_woken, 0); finally: - ocfs_safefree (DlmMesg); - + ocfs_safefree (dlm_msg); LOG_EXIT_STATUS (status); return status; } /* ocfs_send_dlm_request_msg */ @@ -909,48 +714,38 @@ int ocfs_send_dlm_request_msg (ocfs_supe * ocfs_comm_make_lock_master() * */ -int ocfs_comm_make_lock_master (ocfs_super * osb, - ub8 LockId, - ub4 LockType, - ub4 Flags, - ocfs_lock_res * LockResource, - ocfs_file_entry * FileEntry, ub8 VoteMap) +int ocfs_comm_make_lock_master (ocfs_super * osb, ub8 lock_id, ub4 lock_type, + ub4 flags, ocfs_lock_res * lockres, + ocfs_file_entry * fe, ub8 vote_map) { int status = 0; - ocfs_dlm_msg *DlmMesg = NULL; - bool UpdateDisk = true; LOG_ENTRY (); - status = - ocfs_send_dlm_request_msg (osb, LockId, LockType, Flags, LockResource, - VoteMap, OCFS_REQUEST_MAKE_MASTER); + status = ocfs_send_dlm_request_msg (osb, lock_id, lock_type, flags, + lockres, vote_map, + OCFS_REQUEST_MAKE_MASTER); if (status == -ETIMEDOUT) { LOG_TRACE_STR ("IPC voting timed out"); status = -EFAIL; } else { - LOG_TRACE_STR ("Got vote on comm in 1sec, updating disk"); + LOG_TRACE_STR ("Got vote on comm, updating disk"); + if (lockres->vote_status >= 0) { + lockres->lock_type = (ub1) lock_type; + lockres->master_node_num = osb->node_num; - if (UpdateDisk && LockResource->vote_status >= 0) { - LockResource->lock_type = (ub1) LockType; - LockResource->master_node_num = osb->node_num; - - status = ocfs_update_disk_lock (osb, LockResource, - DLOCK_FLAG_MASTER | - DLOCK_FLAG_LOCK, FileEntry); + status = ocfs_update_disk_lock (osb, lockres, + DLOCK_FLAG_MASTER | DLOCK_FLAG_LOCK, + fe); if (status < 0) { LOG_ERROR_STATUS (status); - /* This is really bad and we need to DISABLE our ocfs_nm_thread and */ - /* let the other nodes know we are away from this volume... */ goto finally; } } - status = LockResource->vote_status; + status = lockres->vote_status; } finally: - ocfs_safefree (DlmMesg); - LOG_EXIT_STATUS (status); return status; } @@ -959,119 +754,93 @@ int ocfs_comm_make_lock_master (ocfs_sup * ocfs_make_lock_master() * */ -int ocfs_make_lock_master (ocfs_super * osb, - ub8 LockId, - ub4 LockType, - ub4 Flags, - ocfs_lock_res * LockResource, ocfs_file_entry * FileEntry) +int ocfs_make_lock_master (ocfs_super * osb, ub8 lock_id, ub4 lock_type, + ub4 flags, ocfs_lock_res * lockres, + ocfs_file_entry * fe) { - ub8 voteMap = 0; - ub8 tempMap = 0; + ub8 vote_map = 0; int status = 0; int tmpstat; ub8 lockSeqNo = 0; - LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x, 0x%08x)\n", osb, - HI (LockId), LO (LockId), LockType, Flags, LockResource, - FileEntry); + LOG_ENTRY (); - voteMap = osb->publ_map; + vote_map = osb->publ_map; - if (((Flags & FLAG_FILE_DELETE) || (Flags & FLAG_FILE_RENAME)) && - (!(Flags & FLAG_DIR)) && - (DISK_LOCK_CURRENT_MASTER (FileEntry) == osb->node_num)) { - voteMap = DISK_LOCK_OIN_MAP (FileEntry); + if (((flags & FLAG_FILE_DELETE) || (flags & FLAG_FILE_RENAME)) && + (!(flags & FLAG_DIR)) && + (DISK_LOCK_CURRENT_MASTER (fe) == osb->node_num)) { + vote_map = DISK_LOCK_OIN_MAP (fe); } - /* Vote over comm */ - tempMap = (1 << osb->node_num); - voteMap |= (tempMap); + vote_map &= ~(1 << osb->node_num); - status = ocfs_comm_make_lock_master (osb, LockId, LockType, Flags, LockResource, - FileEntry, voteMap); - if (status >= 0) /* if succ, return. Else continue. */ +#ifdef USE_COMM_MAKE_LOCK_MASTER + status = ocfs_comm_make_lock_master (osb, lock_id, lock_type, flags, + lockres, fe, vote_map); + if (status >= 0) goto finito; +#endif - tempMap = (1 << osb->node_num); - voteMap &= (~tempMap); + if (vote_map == 0) { + /* As this is the only node alive, make it master of the lock */ + if (lockres->lock_type <= lock_type) + lockres->lock_type = (ub1) lock_type; + lockres->master_node_num = osb->node_num; - if (voteMap == 0) { - /* This is the only node alive */ - /* Make this node the master of this lock */ - if (LockResource->lock_type <= LockType) - LockResource->lock_type = (ub1) LockType; - - LockResource->master_node_num = osb->node_num; - - /* Write that we now are the master to the disk */ - status = ocfs_update_disk_lock (osb, LockResource, - DLOCK_FLAG_MASTER | DLOCK_FLAG_LOCK, - FileEntry); + status = ocfs_update_disk_lock (osb, lockres, + DLOCK_FLAG_MASTER | DLOCK_FLAG_LOCK, fe); if (status < 0) { LOG_ERROR_STATUS (status); - /* This is really bad and we need to DISABLE our ocfs_nm_thread and */ - /* let the other nodes know we are away from this volume... TODO */ goto finito; } - - LOG_TRACE_ARGS ("Votemap did NOT go to disk 0x%08x.0x%08x, " - "lockid %u.%u masternode (%d)\n", - HI (voteMap), LO (voteMap), HI (LockId), - LO (LockId), - DISK_LOCK_CURRENT_MASTER (FileEntry)); goto finito; } - LOG_TRACE_ARGS - ("Votemap was 0x%08x.0x%08x, lockid %u.%u masternode (%d)\n", - HI (voteMap), LO (voteMap), HI (LockId), LO (LockId), - DISK_LOCK_CURRENT_MASTER (FileEntry)); + LOG_TRACE_ARGS ("votemap 0x%08x, lockid %u.%u, masternode (%d)\n", + LO (vote_map), HI (lock_id), LO (lock_id), + DISK_LOCK_CURRENT_MASTER (fe)); - status = ocfs_prime_voting (osb, LockId, LockType, voteMap); + status = ocfs_prime_voting (osb, lock_id, lock_type, vote_map); if (status < 0) { - /* Lock up Volume... */ LOG_ERROR_STATUS (status); goto finito; } /* Call Comm layer to broadcast to all nodes alive, that this node */ /* wants exclusive access to the lock. */ - status = - ocfs_request_vote (osb, LockId, LockType, Flags, voteMap, &lockSeqNo); + status = ocfs_request_vote (osb, lock_id, lock_type, flags, vote_map, + &lockSeqNo); if (status < 0) { LOG_ERROR_STATUS (status); goto finito; } - status = ocfs_wait_for_vote (osb, LockId, LockType, Flags, voteMap, - 5000 /* 5 secs */ , lockSeqNo, LockResource); + status = ocfs_wait_for_vote (osb, lock_id, lock_type, flags, vote_map, + 5000, lockSeqNo, lockres); if (status < 0) { LOG_ERROR_STATUS (status); goto finito; } /* Make this node the master of this lock */ - if (LockResource->lock_type <= LockType) - LockResource->lock_type = (ub1) LockType; + if (lockres->lock_type <= lock_type) + lockres->lock_type = (ub1) lock_type; - LockResource->master_node_num = osb->node_num; + lockres->master_node_num = osb->node_num; /* Write that we now are the master to the disk */ - status = ocfs_update_disk_lock (osb, LockResource, - DLOCK_FLAG_MASTER | DLOCK_FLAG_LOCK | - DLOCK_FLAG_OPEN_MAP, FileEntry); + status = ocfs_update_disk_lock (osb, lockres, + DLOCK_FLAG_MASTER | DLOCK_FLAG_LOCK | DLOCK_FLAG_OPEN_MAP, fe); if (status < 0) { LOG_ERROR_STATUS (status); - /* This is really bad and we need to DISABLE our ocfs_nm_thread and */ - /* let the other nodes know we are away from this volume... TODO */ goto finito; } finito: - tmpstat = ocfs_reset_voting (osb, LockId, LockType, voteMap); + tmpstat = ocfs_reset_voting (osb, lock_id, lock_type, vote_map); if (tmpstat < 0) { - status = tmpstat; - LOG_ERROR_STATUS (status); + LOG_ERROR_STATUS (status = tmpstat); } LOG_EXIT_STATUS (status); @@ -1082,34 +851,31 @@ int ocfs_make_lock_master (ocfs_super * * ocfs_acquire_lockres() * */ -void ocfs_acquire_lockres (ocfs_lock_res * LockResource) +void ocfs_acquire_lockres (ocfs_lock_res * lockres) { int mypid; - LOG_ENTRY_ARGS ("(0x%08x)\n", LockResource); + LOG_ENTRY_ARGS ("(0x%08x)\n", lockres); mypid = ocfs_getpid (); while (1) { - spin_lock (&LockResource->lock_mutex); + spin_lock (&lockres->lock_mutex); - if (LockResource->in_use) { - /* someone else has it */ - if (LockResource->thread_id != mypid) { - spin_unlock (&LockResource->lock_mutex); + if (lockres->in_use) { + if (lockres->thread_id != mypid) { + spin_unlock (&lockres->lock_mutex); ocfs_sleep (1); /* 1 ms, dangerously short! */ } - /* I acquired it in this callstack already */ else { - LockResource->in_use++; - spin_unlock (&LockResource->lock_mutex); + lockres->in_use++; + spin_unlock (&lockres->lock_mutex); break; } - } else { /* no owner, take it */ - - LockResource->in_use = 1; - LockResource->thread_id = mypid; - spin_unlock (&LockResource->lock_mutex); + } else { + lockres->in_use = 1; + lockres->thread_id = mypid; + spin_unlock (&lockres->lock_mutex); break; } } @@ -1122,19 +888,19 @@ void ocfs_acquire_lockres (ocfs_lock_res * ocfs_release_lockres() * */ -void ocfs_release_lockres (ocfs_lock_res * LockResource) +void ocfs_release_lockres (ocfs_lock_res * lockres) { - LOG_ENTRY_ARGS ("(0x%08x)\n", LockResource); + LOG_ENTRY_ARGS ("(0x%08x)\n", lockres); - if (LockResource->in_use == 0) { - LOG_TRACE_ARGS("Releasing lock resource: %p inuse was zero, thread=%d\n", - LockResource, LockResource->thread_id); - LockResource->thread_id = 0; - LockResource->in_use = 0; + if (lockres->in_use == 0) { + LOG_TRACE_ARGS("Releasing lock resource: %p inuse was zero, " + "thread=%d\n", lockres, lockres->thread_id); + lockres->thread_id = 0; + lockres->in_use = 0; } else { - LockResource->in_use--; - if (LockResource->in_use == 0) { - LockResource->thread_id = 0; + lockres->in_use--; + if (lockres->in_use == 0) { + lockres->thread_id = 0; } } @@ -1146,53 +912,49 @@ void ocfs_release_lockres (ocfs_lock_res * ocfs_update_disk_lock() * */ -int ocfs_update_disk_lock (ocfs_super * osb, - ocfs_lock_res * LockResource, - ub4 Flags, ocfs_file_entry * FileEntry) +int ocfs_update_disk_lock (ocfs_super * osb, ocfs_lock_res * lockres, + ub4 flags, ocfs_file_entry * fe) { int status = 0; ub8 offset = 0; - ocfs_file_entry *fe = NULL; + ocfs_file_entry *tmp_fe = NULL; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u, 0x%08x)\n", osb, LockResource, - Flags, FileEntry); + LOG_ENTRY (); - if (FileEntry == NULL) - status = ocfs_get_file_entry (osb, &fe, LockResource->sector_num); + offset = lockres->sector_num; + if (fe == NULL) + status = ocfs_get_file_entry (osb, &tmp_fe, lockres->sector_num); else { - fe = FileEntry; - offset = LockResource->sector_num; - status = - ocfs_read_disk (osb, (void *) fe, (ub4) osb->sect_size, - offset); + tmp_fe = fe; + status = ocfs_read_disk (osb, (void *) tmp_fe, + (ub4) osb->sect_size, offset); } - if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - if (Flags & DLOCK_FLAG_MASTER) - DISK_LOCK_CURRENT_MASTER (fe) = LockResource->master_node_num; + if (flags & DLOCK_FLAG_MASTER) + DISK_LOCK_CURRENT_MASTER (tmp_fe) = lockres->master_node_num; - if (Flags & DLOCK_FLAG_LOCK) - DISK_LOCK_FILE_LOCK (fe) = LockResource->lock_type; + if (flags & DLOCK_FLAG_LOCK) + DISK_LOCK_FILE_LOCK (tmp_fe) = lockres->lock_type; - if (Flags & DLOCK_FLAG_OPEN_MAP) - DISK_LOCK_OIN_MAP (fe) = LockResource->oin_openmap; + if (flags & DLOCK_FLAG_OPEN_MAP) + DISK_LOCK_OIN_MAP (tmp_fe) = lockres->oin_openmap; - if (Flags & DLOCK_FLAG_SEQ_NUM) - DISK_LOCK_SEQNUM (fe) = LockResource->last_upd_seq_num; + if (flags & DLOCK_FLAG_SEQ_NUM) + DISK_LOCK_SEQNUM (tmp_fe) = lockres->last_upd_seq_num; - status = ocfs_write_disk (osb, fe, osb->sect_size, offset); + status = ocfs_write_disk (osb, tmp_fe, osb->sect_size, offset); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } finally: - if ((fe != FileEntry) && (fe)) - ocfs_release_file_entry (fe); + if ((tmp_fe != fe) && (tmp_fe)) + ocfs_release_file_entry (tmp_fe); LOG_EXIT_STATUS (status); return status; @@ -1202,84 +964,49 @@ int ocfs_update_disk_lock (ocfs_super * * ocfs_update_master_on_open() * */ -int ocfs_update_master_on_open (ocfs_super * osb, ocfs_lock_res * LockResource) +int ocfs_update_master_on_open (ocfs_super * osb, ocfs_lock_res * lockres) { int status = -EAGAIN; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", osb, LockResource); - - //Just for the time being , we are going to fix it . - LOG_EXIT (); - return 0; + LOG_ENTRY (); while (status == -EAGAIN) { - if (!IS_NODE_ALIVE - (osb->publ_map, LockResource->master_node_num, - OCFS_MAXIMUM_NODES)) { - LOG_TRACE_ARGS - ("Master is dead, lockid %u.%u, master(%d)\n", - HI (LockResource->sector_num), - LO (LockResource->sector_num), - LockResource->master_node_num); + if (!IS_NODE_ALIVE (osb->publ_map, lockres->master_node_num, + OCFS_MAXIMUM_NODES)) { + LOG_TRACE_ARGS ("Master (%u) dead, lockid %u.%u\n", + lockres->master_node_num, + HI (lockres->sector_num), LO (lockres->sector_num)); status = 0; goto bail; } - ocfs_acquire_lockres (LockResource); + ocfs_acquire_lockres (lockres); + + if (lockres->master_node_num == osb->node_num) { + LOG_TRACE_ARGS ("Added node to map 0x%08x, lockid %u.%u\n", + LO (lockres->oin_openmap), HI (lockres->sector_num), + LO (lockres->sector_num)); - if (LockResource->master_node_num == osb->node_num) { - LOG_TRACE_ARGS - ("Added this node to map 0x%08x.0x%08x, lockid %u.%u\n", - HI (LockResource->oin_openmap), - LO (LockResource->oin_openmap), - HI (LockResource->sector_num), - LO (LockResource->sector_num)); - - /* Write the node map to the disk... */ - LockResource->oin_openmap |= (1 << osb->node_num); - - status = - ocfs_update_disk_lock (osb, LockResource, - DLOCK_FLAG_OPEN_MAP, NULL); + lockres->oin_openmap |= (1 << osb->node_num); + status = ocfs_update_disk_lock (osb, lockres, + DLOCK_FLAG_OPEN_MAP, NULL); if (status < 0) { LOG_ERROR_STATUS (status); goto bail; } - ocfs_release_lockres (LockResource); + ocfs_release_lockres (lockres); } else { - LOG_TRACE_ARGS - ("ocfs_update_lock_state added this node to map " - "0x%08x.0x%08x, lockid %u.%u\n", - HI (LockResource->oin_openmap), - LO (LockResource->oin_openmap), - HI (LockResource->sector_num), - LO (LockResource->sector_num)); - /* TODO we need to handle the case when thee master node is not alive */ - status = - ocfs_update_lock_state (osb, LockResource, - FLAG_ADD_OIN_MAP); + status = ocfs_update_lock_state (osb, lockres, + FLAG_ADD_OIN_MAP); if (status < 0) { - LOG_ERROR_ARGS - ("ocfs_update_lock_state failed map 0x%08x.0x%08x, " - "lockid %u.%u, status 0x%08x\n", - HI (LockResource->oin_openmap), - LO (LockResource->oin_openmap), - HI (LockResource->sector_num), - LO (LockResource->sector_num), status); - ocfs_release_lockres (LockResource); + LOG_ERROR_STATUS (status); + ocfs_release_lockres (lockres); if (status == -EAGAIN) continue; goto bail; } - LOG_TRACE_ARGS - ("ocfs_update_lock_state SUCCESS map 0x%08x.0x%08x, " - "lockid %u.%u, status 0x%08x\n", - HI (LockResource->oin_openmap), - LO (LockResource->oin_openmap), - HI (LockResource->sector_num), - LO (LockResource->sector_num), status); - ocfs_release_lockres (LockResource); + ocfs_release_lockres (lockres); } } @@ -1292,30 +1019,32 @@ int ocfs_update_master_on_open (ocfs_sup * ocfs_init_lockres() * */ -void ocfs_init_lockres (ocfs_super * osb, ocfs_lock_res * LockResource, ub8 LockId) +void ocfs_init_lockres (ocfs_super * osb, ocfs_lock_res * lockres, ub8 lock_id) { - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u.%u)\n", osb, LockResource, - HI (LockId), LO (LockId)); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u.%u)\n", osb, lockres, + HI (lock_id), LO (lock_id)); - LockResource->signature = 0x55AA; - LockResource->lock_type = OCFS_DLM_NO_LOCK; - LockResource->ref_cnt = 0; - LockResource->master_node_num = OCFS_INVALID_NODE_NUM; - LockResource->last_upd_seq_num = 0; - LockResource->oin_openmap = 0; - LockResource->sector_num = LockId; - LockResource->in_use = 0; - LockResource->oin = NULL; - LockResource->lock_state = 0; - LockResource->voted_event = NULL; - LockResource->in_cache_list = false; - spin_lock_init(&LockResource->lock_mutex); + lockres->signature = 0x55AA; + lockres->lock_type = OCFS_DLM_NO_LOCK; + lockres->ref_cnt = 0; + lockres->master_node_num = OCFS_INVALID_NODE_NUM; + lockres->last_upd_seq_num = 0; + lockres->oin_openmap = 0; + lockres->sector_num = lock_id; + lockres->in_use = 0; + lockres->oin = NULL; + lockres->lock_state = 0; + lockres->in_cache_list = false; + + spin_lock_init (&lockres->lock_mutex); + init_waitqueue_head (&lockres->voted_event); + atomic_set (&lockres->voted_event_woken, 0); /* For read/write caching */ - LockResource->last_read_time = 0; - LockResource->last_write_time = 0; - LockResource->writer_node_num = OCFS_INVALID_NODE_NUM; - LockResource->reader_node_num = OCFS_INVALID_NODE_NUM; + lockres->last_read_time = 0; + lockres->last_write_time = 0; + lockres->writer_node_num = OCFS_INVALID_NODE_NUM; + lockres->reader_node_num = OCFS_INVALID_NODE_NUM; LOG_EXIT (); return; @@ -1325,122 +1054,105 @@ void ocfs_init_lockres (ocfs_super * osb * ocfs_create_update_lock() * */ -int ocfs_create_update_lock (ocfs_super * osb, ocfs_inode * oin, ub8 LockId, ub4 Flags) +int ocfs_create_update_lock (ocfs_super * osb, ocfs_inode * oin, ub8 lock_id, + ub4 flags) { int status = 0; ocfs_lock_res *lockres = NULL; - bool IsDir = false; + ocfs_lock_res *tmp_lockres = NULL; + bool is_dir = false; LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u.%u, %u)\n", osb, oin, - HI (LockId), LO (LockId), Flags); + HI (lock_id), LO (lock_id), flags); - IsDir = (Flags & OCFS_OIN_DIRECTORY) ? true : false; + is_dir = (flags & OCFS_OIN_DIRECTORY) ? true : false; /* Check the lock state on the disk / in our resource map */ - status = ocfs_lookup_sector_node (osb, LockId, &lockres); + status = ocfs_lookup_sector_node (osb, lock_id, &lockres); if (status >= 0) { if (lockres->signature != 0x55AA) { - LOG_ERROR_STATUS(status = -EFAIL); + LOG_ERROR_STATUS (status = -EFAIL); goto bail; } ocfs_acquire_lockres (lockres); if (lockres->oin) { if (lockres->oin->obj_id.type != OCFS_TYPE_OIN) { - LOG_ERROR_STATUS(status = -EFAIL); + LOG_ERROR_STATUS (status = -EFAIL); goto bail; } - else { + else lockres->oin->lock_res = NULL; - } } - lockres->oin = oin; - oin->oin_flags |= Flags; + oin->oin_flags |= flags; oin->lock_res = lockres; - ocfs_release_lockres (lockres); - } - if (status < 0) { + status = ocfs_wait_for_lock_release (osb, lock_id, 30000, lockres, + (is_dir ? OCFS_DLM_EXCLUSIVE_LOCK : OCFS_DLM_NO_LOCK)); + if (status < 0) { + LOG_ERROR_STR ("Lock owner is alive and taking too much time"); + goto bail; + } + } else { /* Create a resource and insert in the hash */ - lockres = kmem_cache_alloc (OcfsGlobalCtxt.lockres_cache, GFP_KERNEL); - (sizeof (ocfs_lock_res)); + lockres = ocfs_allocate_lockres(); if (lockres == NULL) { LOG_ERROR_STATUS (status = -ENOMEM); goto bail; } - ocfs_init_lockres (osb, lockres, LockId); - - /* Update lock resource */ - if (IsDir) { - status = - ocfs_wait_for_lock_release (osb, LockId, - 30000 /* 30 sec */ , - lockres, - OCFS_DLM_EXCLUSIVE_LOCK); - } else { - status = - ocfs_wait_for_lock_release (osb, LockId, - 30000 /* 30 sec */ , - lockres, OCFS_DLM_NO_LOCK); - } + ocfs_init_lockres (osb, lockres, lock_id); + status = ocfs_wait_for_lock_release (osb, lock_id, 30000, lockres, + (is_dir ? OCFS_DLM_EXCLUSIVE_LOCK : OCFS_DLM_NO_LOCK)); if (status < 0) { - /* - ** The lock owner is alive and is taking too long to do the - ** release for the lock - ** Notify him of our existance/ kick him out??? - ** Retry lock... - */ - LOG_ERROR_STR - ("Lock owner is alive and taking too much time"); + LOG_ERROR_STR ("Lock owner is alive and taking too much time"); + goto bail; } - ocfs_insert_sector_node (osb, lockres); + status = ocfs_insert_sector_node (osb, lockres, &tmp_lockres); if (status < 0) { - /* Failure LOCK up volume operation ** TODO */ - LOG_ERROR_STR ("Lock up volume"); + LOG_ERROR_STATUS (status); + goto bail; } - if (Flags & OCFS_OIN_CACHE_UPDATE) { - ocfs_insert_cache_link (osb, lockres); + /* Check if another process added a lockres after our lookup */ + if (tmp_lockres) { + /* If so, delete the recently allocated lockres */ + ocfs_free_lockres(lockres); + lockres = tmp_lockres; + status = ocfs_wait_for_lock_release (osb, lock_id, 30000, lockres, + (is_dir ? OCFS_DLM_EXCLUSIVE_LOCK : OCFS_DLM_NO_LOCK)); if (status < 0) { - /* Failure LOCK up volume operation TODO */ - LOG_ERROR_STR ("Lock up volume"); + LOG_ERROR_STR ("Lock owner is alive and taking too much time"); + goto bail; } - } - } else { - if (IsDir) { - status = - ocfs_wait_for_lock_release (osb, LockId, - 30000 /* 30 sec */ , - lockres, - OCFS_DLM_EXCLUSIVE_LOCK); } else { - status = - ocfs_wait_for_lock_release (osb, LockId, - 30000 /* 30 sec */ , - lockres, OCFS_DLM_NO_LOCK); + if (flags & OCFS_OIN_CACHE_UPDATE) { + status = ocfs_insert_cache_link (osb, lockres); + if (status < 0) { + LOG_ERROR_STR ("Lock up volume"); + goto bail; + } + } } } ocfs_acquire_lockres (lockres); lockres->oin = oin; - oin->oin_flags |= Flags; + oin->oin_flags |= flags; oin->lock_res = lockres; LOG_TRACE_ARGS ("MasterNode=%d, ThisNode=%d\n", lockres->master_node_num, osb->node_num); - if ((!IsDir) && (lockres->master_node_num != OCFS_INVALID_NODE_NUM) && - ((!IS_NODE_ALIVE (lockres->oin_openmap, osb->node_num, - OCFS_MAXIMUM_NODES)) || + if ((!is_dir) && (lockres->master_node_num != OCFS_INVALID_NODE_NUM) && + ((!IS_NODE_ALIVE (lockres->oin_openmap, osb->node_num, OCFS_MAXIMUM_NODES)) || (lockres->lock_state & FLAG_ALWAYS_UPDATE_OPEN))) { ocfs_release_lockres (lockres); - /* Send a message to master so that he can send the oin update to */ /* this node also. If u are the master then update File_entry */ /* and set the bit that this node has a open */ @@ -1463,98 +1175,316 @@ int ocfs_create_update_lock (ocfs_super * ocfs_get_x_for_del() * */ -int ocfs_get_x_for_del (ocfs_super * osb, - ub8 LockId, - ub4 LockType, - ub4 Flags, ocfs_lock_res * LockResource, - ocfs_file_entry * FileEntry) +int ocfs_get_x_for_del (ocfs_super * osb, ub8 lock_id, ub4 lock_type, ub4 flags, + ocfs_lock_res * lockres, ocfs_file_entry * fe) { - bool LockResourceAcquired = false; // true; + bool lockres_acq = false; int status = -EFAIL; - LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x, 0x%08x)\n", osb, - HI (LockId), LO (LockId), LockType, Flags, LockResource, - FileEntry); + LOG_ENTRY_ARGS ("(lockid=%u.%u, locktype=%u)\n", HI (lock_id), + LO (lock_id), lock_type); while (1) { - if (LockResourceAcquired == false) { - ocfs_acquire_lockres (LockResource); - LockResourceAcquired = true; + if (lockres_acq == false) { + ocfs_acquire_lockres (lockres); + lockres_acq = true; } /* If I am master and I am the only one in the oin node map */ /* update the disk */ - status = - ocfs_make_lock_master (osb, LockId, LockType, Flags, LockResource, - FileEntry); + status = ocfs_make_lock_master (osb, lock_id, lock_type, flags, + lockres, fe); if (status >= 0) { - /* RELEASE_LOCK */ - if (LockResourceAcquired) { - ocfs_release_lockres (LockResource); - LockResourceAcquired = false; + if (lockres_acq) { + ocfs_release_lockres (lockres); + lockres_acq = false; } - /* We got the lock */ status = 0; goto finally; } else if (status == -EAGAIN) { - if (LockResourceAcquired) { - ocfs_release_lockres (LockResource); - LockResourceAcquired = false; + if (lockres_acq) { + ocfs_release_lockres (lockres); + lockres_acq = false; } - status = - ocfs_disk_update_resource (osb, LockResource, FileEntry); + status = ocfs_disk_update_resource (osb, lockres, fe); if (status < 0) { - /* This should never fail so lock up the volume */ + LOG_ERROR_STATUS (status); osb->vol_state = VOLUME_DISABLED; goto finally; } continue; } else { - if (LockResourceAcquired) { - ocfs_release_lockres (LockResource); - LockResourceAcquired = false; + if (lockres_acq) { + ocfs_release_lockres (lockres); + lockres_acq = false; } goto finally; } } finally: - if (LockResourceAcquired) { - ocfs_release_lockres (LockResource); - LockResourceAcquired = false; + if (lockres_acq) { + ocfs_release_lockres (lockres); + lockres_acq = false; } LOG_EXIT_STATUS (status); return status; } /* ocfs_get_x_for_del */ +#define ACQUIRE_WITH_FLAG(lock, flag) \ + do { \ + if (!(flag)) { \ + ocfs_acquire_lockres(lock); \ + (flag) = true; \ + } \ + } while (0) + +#define RELEASE_WITH_FLAG(lock, flag) \ + do { \ + if (flag) { \ + ocfs_release_lockres(lock); \ + (flag) = false; \ + } \ + } while (0) + +/* + * ocfs_try_exclusive_lock() + * + */ +int ocfs_try_exclusive_lock(ocfs_super *osb, ocfs_lock_res *lockres, ub4 flags, + ub4 updated, ocfs_file_entry *fe, ub8 lock_id, + ub4 lock_type) +{ + int status; + bool lockres_acq = false; + bool make_lock_master; + + while (1) { + ACQUIRE_WITH_FLAG(lockres, lockres_acq); + + if (lockres->master_node_num != osb->node_num || !updated) { + status = ocfs_read_file_entry (osb, fe, lock_id); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + + if (lockres->master_node_num == osb->node_num) { + if ((flags & FLAG_FILE_DELETE) || (flags & FLAG_FILE_RENAME) || + (flags & FLAG_FILE_TRUNCATE)) { + status = ocfs_get_x_for_del (osb, lock_id, lock_type, flags, + lockres, fe); + goto finally; + } + + if (!updated) { + if (lockres->master_node_num != DISK_LOCK_CURRENT_MASTER (fe)) { + lockres->master_node_num = DISK_LOCK_CURRENT_MASTER (fe); + lockres->lock_type = DISK_LOCK_FILE_LOCK (fe); + lockres->oin_openmap = DISK_LOCK_OIN_MAP (fe); + RELEASE_WITH_FLAG(lockres, lockres_acq); + continue; + } + } + + DISK_LOCK_CURRENT_MASTER (fe) = osb->node_num; + + if (DISK_LOCK_FILE_LOCK (fe) < OCFS_DLM_EXCLUSIVE_LOCK) { + DISK_LOCK_FILE_LOCK (fe) = lock_type; + + if (lock_type == OCFS_DLM_ENABLE_CACHE_LOCK) { + status = ocfs_write_force_disk (osb, fe, osb->sect_size, lock_id); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + } + + status = ocfs_write_file_entry (osb, fe, lock_id); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* We got the lock */ + lockres->lock_type = lock_type; + status = 0; + goto finally; + } else { + make_lock_master = false; + + lockres->master_node_num = DISK_LOCK_CURRENT_MASTER (fe); + lockres->lock_type = DISK_LOCK_FILE_LOCK (fe); + lockres->oin_openmap = DISK_LOCK_OIN_MAP (fe); + + if (lockres->master_node_num != OCFS_INVALID_NODE_NUM) { + if (!IS_VALID_NODE_NUM (lockres->master_node_num)) { + LOG_ERROR_STATUS(status = -EINVAL); + goto finally; + } + } + + if (lockres->master_node_num == OCFS_INVALID_NODE_NUM) { + make_lock_master = true; + } else if (!IS_NODE_ALIVE (osb->publ_map, lockres->master_node_num, + OCFS_MAXIMUM_NODES)) { + make_lock_master = true; + RELEASE_WITH_FLAG(lockres, lockres_acq); + + LOG_TRACE_ARGS ("ocfs_recover_vol(%d)\n", + lockres->master_node_num); + status = ocfs_recover_vol (osb, lockres->master_node_num); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + ACQUIRE_WITH_FLAG(lockres, lockres_acq); + } + + if (make_lock_master) { + /* + * I am not master, master is dead or not there. + * If lock was owned we need to do recovery + * otherwise we need to arbitrate for the lock + */ + + RELEASE_WITH_FLAG(lockres, lockres_acq); + + status = ocfs_make_lock_master (osb, lock_id, lock_type, flags, + lockres, fe); + if (status >= 0) { + RELEASE_WITH_FLAG(lockres, lockres_acq); + + if (lock_type == OCFS_DLM_ENABLE_CACHE_LOCK) { + DISK_LOCK_FILE_LOCK (fe) = lock_type; + status = ocfs_write_force_disk (osb, fe, osb->sect_size, + lock_id); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + + DISK_LOCK_CURRENT_MASTER (fe) = osb->node_num; + DISK_LOCK_FILE_LOCK (fe) = lock_type; + + status = ocfs_write_file_entry (osb, fe, lock_id); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* We got the lock */ + status = 0; + goto finally; + } else if (status == -EAGAIN) { + RELEASE_WITH_FLAG(lockres, lockres_acq); + status = ocfs_disk_update_resource (osb, lockres, fe); + if (status < 0) { + LOG_ERROR_STATUS (status); + osb->vol_state = VOLUME_DISABLED; + goto finally; + } + continue; + } else { + RELEASE_WITH_FLAG(lockres, lockres_acq); + goto finally; + } + } else /* !make_lock_master */ { + /* + * MasterNode is alive and it is not this node + * If the lock is acquired already by the master + * wait for release else change master. + */ + + //if (lockres->lock_type >= 0) + if (true) + { + if ((flags & FLAG_FILE_DELETE) || + (flags & FLAG_FILE_RENAME) || + (flags & FLAG_FILE_TRUNCATE)) { + status = ocfs_get_x_for_del (osb, lock_id, lock_type, + flags, lockres, fe); + RELEASE_WITH_FLAG(lockres, lockres_acq); + goto finally; + } + + /* Change Lock Master */ + status = ocfs_update_lock_state (osb, lockres, FLAG_CHANGE_MASTER); + RELEASE_WITH_FLAG(lockres, lockres_acq); + + if (status < 0) { + if (status == -EAGAIN) + continue; + goto finally; + } + + status = ocfs_read_file_entry (osb, fe, lock_id); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + DISK_LOCK_CURRENT_MASTER (fe) = osb->node_num; + DISK_LOCK_FILE_LOCK (fe) = lock_type; + status = ocfs_write_file_entry (osb, fe, lock_id); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + /* Update our state... */ + lockres-> master_node_num = DISK_LOCK_CURRENT_MASTER (fe); + lockres->lock_type = DISK_LOCK_FILE_LOCK (fe); + lockres-> oin_openmap = DISK_LOCK_OIN_MAP (fe); + goto finally; + } else { + /* Wait for lock release */ + RELEASE_WITH_FLAG(lockres, lockres_acq); + + status = ocfs_wait_for_lock_release (osb, lock_id, 30000, lockres, + ((flags & FLAG_DIR) ? OCFS_DLM_SHARED_LOCK : OCFS_DLM_NO_LOCK)); + if (status < 0) { + if (status == -ETIMEDOUT) + continue; + else + goto finally; + } + /* Try and acquire the lock again */ + continue; + } + } /* make_lock_master */ + } /* master_node_num */ + } /* while */ + +finally: + RELEASE_WITH_FLAG(lockres, lockres_acq); + return status; +} /* ocfs_try_exclusive_lock */ + /* * ocfs_acquire_lock() * */ -int ocfs_acquire_lock (ocfs_super * osb, - ub8 LockId, - ub4 LockType, - ub4 Flags, - ocfs_lock_res ** LockResource, ocfs_file_entry * LockSector) +int ocfs_acquire_lock (ocfs_super * osb, ub8 lock_id, ub4 lock_type, ub4 flags, + ocfs_lock_res ** lockres, ocfs_file_entry * lock_fe) { int status = -EFAIL; - ocfs_lock_res *lockres = NULL; bool lockres_acq = false; ocfs_file_entry *disklock = NULL; ub4 updated = 0; LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x, 0x%08x)\n", osb, - HI (LockId), LO (LockId), LockType, Flags, LockResource, - LockSector); + HI (lock_id), LO (lock_id), lock_type, flags, lockres, + lock_fe); - /* If the caller passed in memory read it and update resource */ - /* from the entry */ - if (LockSector) - disklock = LockSector; + if (lock_fe) + disklock = lock_fe; else { - /* Allocate the entry */ disklock = ocfs_allocate_file_entry (); if (disklock == NULL) { LOG_ERROR_STATUS (status = -ENOMEM); @@ -1562,40 +1492,35 @@ int ocfs_acquire_lock (ocfs_super * osb, } } - status = ocfs_find_update_res (osb, LockId, &lockres, disklock, &updated); + status = ocfs_find_update_res (osb, lock_id, lockres, disklock, &updated); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - *LockResource = lockres; - switch (LockType) { + switch (lock_type) { case OCFS_DLM_SHARED_LOCK: - if (!(Flags & FLAG_DIR)) { + if (!(flags & FLAG_DIR)) { status = 0; goto finally; } - ocfs_acquire_lockres (lockres); + ocfs_acquire_lockres (*lockres); lockres_acq = true; - if (lockres->lock_type == OCFS_DLM_NO_LOCK) { - lockres->lock_type = OCFS_DLM_SHARED_LOCK; + if ((*lockres)->lock_type == OCFS_DLM_NO_LOCK) { + (*lockres)->lock_type = OCFS_DLM_SHARED_LOCK; } - if ((lockres->lock_type == OCFS_DLM_ENABLE_CACHE_LOCK) && - (lockres->master_node_num != osb->node_num)) { - LOG_TRACE_ARGS - ("Called for type OCFS_DLM_SHARED_LOCK, " - "calling ocfs_break_cache_lock (master=%u, this=%u)\n", - lockres->master_node_num, osb->node_num); - ocfs_break_cache_lock (osb, lockres); + if (((*lockres)->lock_type == OCFS_DLM_ENABLE_CACHE_LOCK) && + ((*lockres)->master_node_num != osb->node_num)) { + ocfs_break_cache_lock (osb, *lockres); } - lockres->ref_cnt++; + (*lockres)->ref_cnt++; if (lockres_acq) { - ocfs_release_lockres (lockres); + ocfs_release_lockres (*lockres); lockres_acq = false; } @@ -1605,471 +1530,11 @@ int ocfs_acquire_lock (ocfs_super * osb, case OCFS_DLM_EXCLUSIVE_LOCK: case OCFS_DLM_ENABLE_CACHE_LOCK: - /* This will be called for vol, allocation, file and directory */ - /* from create modify */ - while (1) { - ocfs_acquire_lockres (lockres); - lockres_acq = true; -#ifdef NOT_USED - /* If I am master update disk */ - if(LockId == OCFS_BITMAP_LOCK_OFFSET) - { - LOG_TRACE_ARGS("Called for BITMAP Exclusive lock, " \ - "Owner is node (%u), my Node num is (%u)\n", - lockres->master_node_num,osb->node_num); - } -#endif - - if (lockres->master_node_num == osb->node_num) { -#ifdef NOT_USED - if((lockres->lock_type == OCFS_DLM_ENABLE_CACHE_LOCK) && - (LockId == OCFS_BITMAP_LOCK_OFFSET)) - { - if(lockres_acq) - { - ocfs_release_lockres(lockres); - lockres_acq = false; - } - /* We got the lock */ - try_return( status = 0); - } -#endif - - /* Make lock Master */ - if ((Flags & FLAG_FILE_DELETE) - || (Flags & FLAG_FILE_RENAME) - || (Flags & FLAG_FILE_TRUNCATE)) { - if (!updated) { - /* Read the fileEntry from the disk... */ - status = - ocfs_read_file_entry (osb, - disklock, - LockId); - if (status < 0) { - LOG_ERROR_STATUS - (status); - goto finally; - } - } - - /* If this is a directory being deleted ask */ - /* everybody... */ - /* If this is a file and we are master and the */ - /* nodemap is just this node acquire a local lock */ - /* and set a flag in our resource to indicate that */ - /* the file has been deleted, a change master */ - /* / update master on open will now return */ - /* failure to the caller preventing them from */ - /* opening this file. */ - - status = - ocfs_get_x_for_del (osb, LockId, - LockType, Flags, - lockres, disklock); - - if (lockres_acq) { - ocfs_release_lockres - (lockres); - lockres_acq = false; - } - goto finally; - } -// if( lockres->lock_type >= OCFS_DLM_EXCLUSIVE_LOCK) -// { -// } - - if (!updated) { - /* Read the fileEntry from the disk... */ - status = - ocfs_read_file_entry (osb, - disklock, - LockId); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto finally; - } - - if (lockres->master_node_num != - DISK_LOCK_CURRENT_MASTER - (disklock)) { - /* Update our state... */ - lockres->master_node_num = - DISK_LOCK_CURRENT_MASTER - (disklock); - lockres->lock_type = - DISK_LOCK_FILE_LOCK - (disklock); - lockres->oin_openmap = - DISK_LOCK_OIN_MAP - (disklock); - - if (lockres_acq) { - ocfs_release_lockres - (lockres); - lockres_acq = false; - } - continue; - } - } else { - /* The file entry was already read in */ - /* ocfs_find_update_res */ - } - - DISK_LOCK_CURRENT_MASTER (disklock) = - osb->node_num; - - if (DISK_LOCK_FILE_LOCK (disklock) < - OCFS_DLM_EXCLUSIVE_LOCK) { - DISK_LOCK_FILE_LOCK (disklock) = - LockType; - - if (LockType == - OCFS_DLM_ENABLE_CACHE_LOCK) { -// ub8 dwOffset; -// ub4 SectorSize = osb->sect_size; - -// dwOffset.QuadPart = LockId; - status = - ocfs_write_force_disk (osb, - disklock, - osb-> - sect_size, - LockId); - if (status < 0) { - LOG_ERROR_STATUS - (status); - goto finally; - } - } - } - - status = - ocfs_write_file_entry (osb, disklock, - LockId); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto finally; - } - - lockres->lock_type = LockType; - - /* RELEASE_LOCK */ - if (lockres_acq) { - ocfs_release_lockres (lockres); - lockres_acq = false; - } - - /* We got the lock */ - status = 0; - goto finally; - } else { - bool bMakeLockMaster = false; - -// if(updated == 0) - { - /* Read the fileEntry from the disk... */ - status = - ocfs_read_file_entry (osb, - disklock, - LockId); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto finally; - } - - /* Update our state... */ - lockres->master_node_num = - DISK_LOCK_CURRENT_MASTER - (disklock); - lockres->lock_type = - DISK_LOCK_FILE_LOCK (disklock); - lockres->oin_openmap = - DISK_LOCK_OIN_MAP (disklock); - } - - if (lockres->master_node_num != - OCFS_INVALID_NODE_NUM) { - if (!IS_VALID_NODE_NUM - (lockres->master_node_num)) { - LOG_ERROR_STATUS(status = - -EINVAL); - goto finally; - } - } - - if (lockres->master_node_num == - OCFS_INVALID_NODE_NUM) { - bMakeLockMaster = true; - } else - if (!IS_NODE_ALIVE - (osb->publ_map, - lockres->master_node_num, - OCFS_MAXIMUM_NODES)) { - bMakeLockMaster = true; - - /* RELEASE_LOCK */ - if (lockres_acq) { - ocfs_release_lockres - (lockres); - lockres_acq = false; - } - - /* Recovery */ - LOG_TRACE_ARGS - ("Calling ocfs_recover_vol for NodeNum (%d)\n", - lockres->master_node_num); - status = - ocfs_recover_vol (osb, - lockres-> - master_node_num); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto finally; - } - - ocfs_acquire_lockres (lockres); - lockres_acq = true; - } - - if (bMakeLockMaster) { - /* I am not master, master is dead or not there. */ - /* If lock was owned we need to do recovery */ - /* otherwise we need to arbitrate for the lock */ - - /* ArbitrateLock */ - if (lockres_acq) { - ocfs_release_lockres - (lockres); - lockres_acq = false; - } - - status = - ocfs_make_lock_master (osb, LockId, - LockType, Flags, - lockres, - disklock); - if (status >= 0) { - /* release lock */ - if (lockres_acq) { - ocfs_release_lockres - (lockres); - lockres_acq = false; - } - - if (LockType == - OCFS_DLM_ENABLE_CACHE_LOCK) - { - DISK_LOCK_FILE_LOCK - (disklock) = - LockType; - status = - ocfs_write_force_disk - (osb, disklock, - osb->sect_size, - LockId); - if (status < 0) { - LOG_ERROR_STATUS - (status); - goto finally; - } - } - - DISK_LOCK_CURRENT_MASTER - (disklock) = - osb->node_num; - DISK_LOCK_FILE_LOCK - (disklock) = LockType; - - status = - ocfs_write_file_entry (osb, - disklock, - LockId); - if (status < 0) { - LOG_ERROR_STATUS - (status); - goto finally; - } - - /* We got the lock */ - status = 0; - goto finally; - } else if (status == -EAGAIN) { - if (lockres_acq) { - ocfs_release_lockres - (lockres); - lockres_acq = false; - } - - status = - ocfs_disk_update_resource (osb, - lockres, - disklock); - if (status < 0) { - /* should never fail... lock up vol */ - LOG_ERROR_STR - ("ocfs_disk_update_resource failed - Disabling Volume"); - osb->vol_state = - VOLUME_DISABLED; - goto finally; - } - continue; - } else { - if (lockres_acq) { - ocfs_release_lockres - (lockres); - lockres_acq = false; - } - goto finally; - } - } else { - /* MasterNode is alive and it is not this node */ - /* If the lock is acquired already by the master */ - /* wait for release else change master. */ - -/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ -/* !!! this next line is very questionable. !!! */ -/* !!! since lock_type is unsigned and cannot !!! */ -/* !!! be < 0, the else case will *never* be !!! */ -/* !!! executed. is that the intent?! !!! */ -/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ - -#if 0 - // commenting to avoid compile warning - if (lockres->lock_type >= 0) -#else - if (true) -#endif - { - if ((Flags & - FLAG_FILE_DELETE) - || (Flags & - FLAG_FILE_RENAME) - || (Flags & - FLAG_FILE_TRUNCATE)) - { - /* If this is a directory being */ - /* deleted ask everybody... */ - /* If this is a file and we are */ - /* master and the nodemap is just */ - /* this node acquire a local lock */ - /* and set a flag in our resource */ - /* to indicate that the file has */ - /* been deleted, a change master */ - /* / update master on open will */ - /* now return failure to the */ - /* caller preventing them from */ - /* opening this file. */ - - status = - ocfs_get_x_for_del (osb, - LockId, - LockType, - Flags, - lockres, - disklock); - if (lockres_acq) { - ocfs_release_lockres - (lockres); - lockres_acq - = false; - } - goto finally; - } - - /* Change Lock Master */ - status = - ocfs_update_lock_state (osb, - lockres, - FLAG_CHANGE_MASTER); - if (lockres_acq) { - ocfs_release_lockres - (lockres); - lockres_acq = false; - } - - if (status < 0) { - if (status == - -EAGAIN) - continue; - goto finally; - } - - { - status = - ocfs_read_file_entry - (osb, disklock, - LockId); - if (status < 0) { - LOG_ERROR_STATUS - (status); - goto finally; - } - - DISK_LOCK_CURRENT_MASTER - (disklock) = - osb->node_num; - DISK_LOCK_FILE_LOCK - (disklock) = - LockType; - status = - ocfs_write_file_entry - (osb, disklock, - LockId); - if (status < 0) { - LOG_ERROR_STATUS - (status); - goto finally; - } - - /* Update our state... */ - lockres-> - master_node_num - = - DISK_LOCK_CURRENT_MASTER - (disklock); - lockres->lock_type = - DISK_LOCK_FILE_LOCK - (disklock); - lockres-> - oin_openmap = - DISK_LOCK_OIN_MAP - (disklock); - } - goto finally; - } else { - /* Wait for lock release */ - if (lockres_acq) { - ocfs_release_lockres - (lockres); - lockres_acq = false; - } - - if (Flags & FLAG_DIR) { - status = - ocfs_wait_for_lock_release (osb, - LockId, 30000, - lockres, - OCFS_DLM_SHARED_LOCK); - } else { - status = - ocfs_wait_for_lock_release (osb, - LockId, 30000, - lockres, - OCFS_DLM_NO_LOCK); - } - - if (status < 0) { - if (status == -ETIMEDOUT) - continue; - else - goto finally; - } - /* Try and acquire the lock again */ - continue; - } - } - } - } + /* This will be called for vol, allocation, file and */ + /* directory from create modify */ + status = ocfs_try_exclusive_lock(osb, *lockres, flags, + updated, disklock, lock_id, + lock_type); break; default: @@ -2077,11 +1542,11 @@ int ocfs_acquire_lock (ocfs_super * osb, } finally: - if ((LockSector == NULL) && (disklock)) + if ((lock_fe == NULL) && (disklock)) ocfs_release_file_entry (disklock); if (lockres_acq) { - ocfs_release_lockres (lockres); + ocfs_release_lockres (*lockres); lockres_acq = false; } @@ -2093,36 +1558,36 @@ int ocfs_acquire_lock (ocfs_super * osb, * ocfs_disk_release_lock() * */ -int ocfs_disk_release_lock (ocfs_super * osb, - ub8 LockId, - ub4 LockType, ub4 Flags, ocfs_lock_res * LockResource) +int ocfs_disk_release_lock (ocfs_super * osb, ub8 lock_id, ub4 lock_type, + ub4 flags, ocfs_lock_res * lockres, ocfs_file_entry *fe) { ub4 votemap = 0; ub4 tempmap = 0; ub4 i; int status = 0; int tmpstat; - ocfs_file_entry *fe = NULL; ub8 lockseqno; - bool cachelock = false; + bool cachelock = false, fe_alloc = false; - LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x)\n", osb, HI (LockId), - LO (LockId), LockType, Flags, LockResource); + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x)\n", osb, HI (lock_id), + LO (lock_id), lock_type, flags, lockres); - status = ocfs_get_file_entry (osb, &fe, LockId); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto finito; + if (fe==NULL) { + status = ocfs_get_file_entry (osb, &fe, lock_id); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + fe_alloc = true; } + if (!IS_VALID_NODE_NUM (DISK_LOCK_CURRENT_MASTER (fe))) { LOG_ERROR_STATUS(status = -EINVAL); - status = -EINVAL; goto finito; } if (DISK_LOCK_CURRENT_MASTER (fe) != osb->node_num) { - /* How is this happening??? */ LOG_ERROR_ARGS ("Current master is NOT this NODE (%d)\n", DISK_LOCK_CURRENT_MASTER (fe)); status = 0; @@ -2136,121 +1601,81 @@ int ocfs_disk_release_lock (ocfs_super * } } - LOG_TRACE_ARGS ("ocfs_update_lock_state SUCCESS oin map 0x%08x.0x%08x, " - "lockid %u.%u flags 0x%08x\n", - HI (DISK_LOCK_OIN_MAP (fe)), - LO (DISK_LOCK_OIN_MAP (fe)), - HI (LockId), LO (LockId), Flags); - /* Send an update to all nodes alive, can be optimized later TODO */ - if (Flags & FLAG_FILE_RENAME) + if ((flags & FLAG_FILE_RENAME) || (flags & FLAG_FILE_DELETE)) votemap = (ub4) (osb->publ_map); -#ifdef ALLOW_INODE_UPDATES_WITH_NO_OIN /* TODO: figure out how to properly handle inode updates w/no oin */ votemap = (ub4) (osb->publ_map); // temporary hack, forces broadcast -#endif + /* remove current node from the votemap */ tempmap = (1 << osb->node_num); - votemap &= (~tempmap); - if (votemap != 0) { - /* Call Comm layer to broadcast to all nodes alive, that this node */ - /* wants exclusive access to the lock. */ - if (Flags & FLAG_FILE_UPDATE_OIN) { - LOG_TRACE_ARGS - ("Votemap was not 0, update oin votemap 0x%08x, " - "lockid %u.%u, flags 0x%08x\n", votemap, - HI (LockId), LO (LockId), Flags); + if (votemap == 0) + goto finally; - status = -EAGAIN; - while (status == -EAGAIN) { - /* - ** Do the real retry for getting the vote - ** kick in the vote thread so that if the other guy is waiting - ** on us check??? - ** in the ocfs_request_vote when reading all publish sectors - */ - status = - ocfs_comm_request_vote (osb, LockId, LockType, - Flags, fe); - if (status >= 0) { - LOG_TRACE_STR - ("ocfs_comm_request_vote succeeded!"); - break; - } -#ifdef ALLOW_INODE_UPDATES_WITH_NO_OIN - status = - ocfs_request_vote (osb, LockId, LockType, Flags, - votemap, &lockseqno); -#else - status = - ocfs_request_vote (osb, LockId, LockType, Flags, - DISK_LOCK_OIN_MAP (fe), - &lockseqno); -#endif - if (status == -EAGAIN) { - /* Check if there was somebody else who came in and changed our */ - LOG_TRACE_ARGS - ("ocfs_request_vote retrying votemap 0x%08x " - "lockid %u.%u, status 0x%08x\n", - votemap, HI (LockId), LO (LockId), status); - - if ((Flags & FLAG_FILE_UPDATE_OIN)) { - // ????? - } - continue; - } + if (!(flags & FLAG_FILE_UPDATE_OIN) && !(flags & FLAG_FILE_DELETE)) + goto finally; - if (status < 0) { - /* Disable the vol */ - LOG_TRACE_ARGS - ("ocfs_request_vote failed votemap 0x%08x, " - "lockid %u.%u, status 0x%08x\n", - votemap, HI (LockId), LO (LockId), status); - goto finito; - } -#ifdef ALLOW_INODE_UPDATES_WITH_NO_OIN - status = - ocfs_wait_for_vote (osb, LockId, LockType, - FLAG_FILE_UPDATE_OIN, votemap, - 5000, lockseqno, LockResource); -#else - status = - ocfs_wait_for_vote (osb, LockId, LockType, - FLAG_FILE_UPDATE_OIN, - DISK_LOCK_OIN_MAP (fe), 5000, - lockseqno, LockResource); + /* Call Comm layer to broadcast to all nodes alive, that this node */ + /* wants exclusive access to the lock. */ + + LOG_TRACE_ARGS ("Update oin votemap 0x%08x, lockid %u.%u, flags " + "0x%08x\n", votemap, HI (lock_id), LO (lock_id), flags); + status = -EAGAIN; + while (status == -EAGAIN) { +#if 0 + /* SM - commenting out comm voting for the time being */ + status = ocfs_comm_request_vote (osb, lock_id, lock_type, + flags, fe); + if (status >= 0) { + LOG_TRACE_STR ("ocfs_comm_request_vote succeeded!"); + break; + } #endif - if (status < 0) { - if (status == -EAGAIN) { - LOG_TRACE_ARGS - ("ocfs_wait_for_vote retrying votemap 0x%08x, " - "lockid %u.%u, flags 0x%08x\n", - votemap, HI (LockId), LO (LockId), Flags); - continue; - } - goto finito; - } + status = ocfs_request_vote (osb, lock_id, lock_type, flags, + votemap, &lockseqno); + if (status == -EAGAIN) { + LOG_TRACE_ARGS ("ocfs_request_vote retrying votemap " + "0x%08x lockid %u.%u, status 0x%08x\n", votemap, + HI (lock_id), LO (lock_id), status); + if ((flags & FLAG_FILE_UPDATE_OIN)) { + // ????? } + continue; + } - status = - ocfs_reset_voting (osb, LockId, LockType, - DISK_LOCK_OIN_MAP (fe)); - if (status < 0) { - LOG_ERROR_STATUS (status); + if (status < 0) { + LOG_TRACE_ARGS ("ocfs_request_vote failed votemap " + "0x%08x, lockid %u.%u, status 0x%08x\n", + votemap, HI (lock_id), LO (lock_id), status); + goto finally; + } + + status = ocfs_wait_for_vote (osb, lock_id, lock_type, + FLAG_FILE_UPDATE_OIN, votemap, + 5000, lockseqno, lockres); + if (status < 0) { + if (status == -EAGAIN) { + LOG_TRACE_ARGS ("ocfs_wait_for_vote retrying " + "votemap 0x%08x, lockid %u.%u, flags " + "0x%08x\n", votemap, HI (lock_id), + LO (lock_id), flags); + continue; } + goto finally; } } - LOG_TRACE_ARGS ("Votemap was 0, votemap 0x%08x, " - "lockid %u.%u, flags 0x%08x\n", - votemap, HI (LockId), LO (LockId), Flags); + finally: + status = ocfs_reset_voting (osb, lock_id, lock_type, + DISK_LOCK_OIN_MAP (fe)); + if (status < 0) + LOG_ERROR_STATUS (status); finito: - - if (Flags & FLAG_FILE_RELEASE_MASTER) + if (flags & FLAG_FILE_RELEASE_MASTER) DISK_LOCK_CURRENT_MASTER (fe) = OCFS_INVALID_NODE_NUM; if ((DISK_LOCK_FILE_LOCK (fe) == OCFS_DLM_ENABLE_CACHE_LOCK) && @@ -2261,15 +1686,15 @@ int ocfs_disk_release_lock (ocfs_super * /* Reset the lock on the disk */ if (!cachelock) { - tmpstat = ocfs_write_file_entry (osb, fe, LockId); + tmpstat = ocfs_write_file_entry (osb, fe, lock_id); if (tmpstat < 0) { status = tmpstat; LOG_ERROR_STATUS (status); } } - if (fe) - ocfs_release_file_entry (fe); + if (fe_alloc && fe) + ocfs_release_file_entry(fe); LOG_EXIT_STATUS (status); return status; @@ -2279,25 +1704,24 @@ int ocfs_disk_release_lock (ocfs_super * * ocfs_release_lock() * */ -int ocfs_release_lock (ocfs_super * osb, - ub8 LockId, - ub4 LockType, ub4 Flags, ocfs_lock_res * LockResource) +int ocfs_release_lock (ocfs_super * osb, ub8 lock_id, ub4 lock_type, ub4 flags, + ocfs_lock_res * lockres, ocfs_file_entry *fe) { int status = 0; bool lock_acq = false; - LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x)\n", osb, HI (LockId), - LO (LockId), LockType, Flags, LockResource); + LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, %u, 0x%08x)\n", osb, HI (lock_id), + LO (lock_id), lock_type, flags, lockres); - ocfs_acquire_lockres (LockResource); + ocfs_acquire_lockres (lockres); lock_acq = true; - switch (LockType) { + switch (lock_type) { case OCFS_DLM_SHARED_LOCK: - LockResource->ref_cnt--; - if (LockResource->lock_type == OCFS_DLM_SHARED_LOCK) { - if (LockResource->ref_cnt == 0) { - LockResource->lock_type = OCFS_DLM_NO_LOCK; + lockres->ref_cnt--; + if (lockres->lock_type == OCFS_DLM_SHARED_LOCK) { + if (lockres->ref_cnt == 0) { + lockres->lock_type = OCFS_DLM_NO_LOCK; } } status = 0; @@ -2307,55 +1731,56 @@ int ocfs_release_lock (ocfs_super * osb, break; } /* - ** Change Flags based on which kind of lock we are releasing - ** For directory we need special handling of oin updates when the release - ** is for XBcast - ** For file we need to update oin's - ** For Shared we need to update the lock state locally only + * Change flags based on which kind of lock we are releasing + * For directory we need special handling of oin updates when the release + * is for XBcast + * For file we need to update oin's + * For Shared we need to update the lock state locally only */ /* OcfsRelease */ /* CommReleaseLock */ - if (Flags & FLAG_FILE_DELETE) { - LockResource->lock_type = OCFS_DLM_NO_LOCK; - LockResource->master_node_num = OCFS_INVALID_NODE_NUM; + if (flags & FLAG_FILE_DELETE) { + lockres->lock_type = OCFS_DLM_NO_LOCK; + lockres->master_node_num = OCFS_INVALID_NODE_NUM; status = 0; - goto finally; + goto do_release_lock; } -// if(LockId != OCFS_BITMAP_LOCK_OFFSET) +// if(lock_id != OCFS_BITMAP_LOCK_OFFSET) { - if ((LockResource->lock_type == OCFS_DLM_ENABLE_CACHE_LOCK) && - (LockResource->master_node_num == osb->node_num)) { + if ((lockres->lock_type == OCFS_DLM_ENABLE_CACHE_LOCK) && + (lockres->master_node_num == osb->node_num)) { status = 0; goto finally; } } - if (LockId == OCFS_BITMAP_LOCK_OFFSET) { + if (lock_id == OCFS_BITMAP_LOCK_OFFSET) { LOG_TRACE_ARGS ("Bitmap lock state is (%d)\n", - LockResource->lock_type); + lockres->lock_type); } - LockResource->lock_type = OCFS_DLM_NO_LOCK; - if (Flags & FLAG_FILE_RELEASE_MASTER) - LockResource->master_node_num = OCFS_INVALID_NODE_NUM; + lockres->lock_type = OCFS_DLM_NO_LOCK; + if (flags & FLAG_FILE_RELEASE_MASTER) + lockres->master_node_num = OCFS_INVALID_NODE_NUM; +do_release_lock: if (lock_acq) { - ocfs_release_lockres (LockResource); + ocfs_release_lockres (lockres); lock_acq = false; } - status = ocfs_disk_release_lock (osb, LockId, LockType, Flags, LockResource); + status = ocfs_disk_release_lock (osb, lock_id, lock_type, flags, lockres, fe); if (status < 0) { /* Disable the vol */ LOG_ERROR_STATUS (status); goto finally; } - finally: +finally: if (lock_acq) { - ocfs_release_lockres (LockResource); + ocfs_release_lockres (lockres); lock_acq = false; } @@ -2384,12 +1809,10 @@ int ocfs_init_dlm (void) */ int ocfs_add_lock_to_recovery (void) { - LOG_ENTRY (); - - LOG_EXIT (); return 0; } /* ocfs_add_lock_to_recovery */ +#ifdef UNUSED /* * ocfs_create_log_extent_map() * @@ -2411,7 +1834,7 @@ int ocfs_create_log_extent_map (ocfs_sup ocfs_io_runs *IoMetaDataRuns = NULL; ocfs_io_runs *IoTransRuns = NULL; ocfs_io_runs *TransRuns = NULL; - ub8 fileSize; + ub8 file_size; ub8 remainingLength; bool bRet; ub4 RunsInExtentMap = 0; @@ -2554,16 +1977,16 @@ int ocfs_create_log_extent_map (ocfs_sup if (osb->log_disk_off == 0) ocfs_create_meta_log_files (osb); - fileSize = osb->log_file_size; + file_size = osb->log_file_size; - if (fileSize > (10 * ONE_MEGA_BYTE)) + if (file_size > (10 * ONE_MEGA_BYTE)) OCFS_BREAKPOINT (); tempVbo = IoMetaDataRuns[i].disk_off; /* Actual Disk Offset */ - tempLbo = fileSize + osb->log_disk_off; /* Log file disk Offset */ + tempLbo = file_size + osb->log_disk_off; /* Log file disk Offset */ tempSize = IoMetaDataRuns[i].byte_cnt; /* Lenght of run */ - osb->log_file_size = (fileSize + tempSize); + osb->log_file_size = (file_size + tempSize); /* Add the Extent to extent map list */ ocfs_down_sem (&(osb->map_lock), true); @@ -2594,12 +2017,12 @@ int ocfs_create_log_extent_map (ocfs_sup numTransRuns++; } - fileSize = osb->log_file_size; + file_size = osb->log_file_size; - if (fileSize > (10 * ONE_MEGA_BYTE)) + if (file_size > (10 * ONE_MEGA_BYTE)) OCFS_BREAKPOINT (); - if (fileSize >= (2 * ONE_MEGA_BYTE)) + if (file_size >= (2 * ONE_MEGA_BYTE)) osb->needs_flush = true; *PNumTransRuns = numTransRuns; @@ -2613,26 +2036,29 @@ int ocfs_create_log_extent_map (ocfs_sup LOG_EXIT_STATUS (status); return status; } /* ocfs_create_log_extent_map */ +#endif /* UNSED */ /* * ocfs_lookup_cache_link() * */ -int ocfs_lookup_cache_link (ocfs_super * osb, - ub1 * Buffer, ub8 actual_disk_off, ub8 Length) +int ocfs_lookup_cache_link (ocfs_super * osb, ub1 * buf, ub8 actual_disk_off, + ub8 length) { int status = 0; ocfs_file_entry *fe = NULL; ocfs_lock_res *lockres = NULL; ub8 offset = 0; - struct list_head *iterentry; + struct list_head *entry; + struct list_head *tmp_entry; + ocfs_inode *oin; LOG_ENTRY (); offset = actual_disk_off; - list_for_each (iterentry, &(osb->cache_lock_list)) { - lockres = list_entry (iterentry, ocfs_lock_res, cache_list); + list_for_each_safe (entry, tmp_entry, &(osb->cache_lock_list)) { + lockres = list_entry (entry, ocfs_lock_res, cache_list); if (lockres == NULL) { LOG_ERROR_STATUS(status = -EFAIL); goto bail; @@ -2641,22 +2067,17 @@ int ocfs_lookup_cache_link (ocfs_super * ocfs_acquire_lockres (lockres); if ((lockres->sector_num >= actual_disk_off) && - (lockres->sector_num < (actual_disk_off + Length))) { - LOG_TRACE_ARGS - ("ocfs_lookup_cache_link has a valid entry in cache " - "link for DiskOffset %u.%u\n", - HI (lockres->sector_num), - LO (lockres->sector_num)); + (lockres->sector_num < (actual_disk_off + length))) { + LOG_TRACE_ARGS ("ocfs_lookup_cache_link has a valid " + "entry in cache link for disk offset %u.%u\n", + HI (lockres->sector_num), LO (lockres->sector_num)); /* Change Lock type */ - fe = (ocfs_file_entry *) ((ub1 *) Buffer + - (lockres->sector_num - - actual_disk_off)); + fe = (ocfs_file_entry *) + ((ub1 *) buf + (lockres->sector_num - actual_disk_off)); /* Flush */ if (lockres->oin != NULL) { - ocfs_inode *oin; - oin = lockres->oin; oin->cache_enabled = false; @@ -2668,19 +2089,19 @@ int ocfs_lookup_cache_link (ocfs_super * if (oin->open_hndl_cnt == 0) lockres->lock_type = DISK_LOCK_FILE_LOCK (fe) = - OCFS_DLM_NO_LOCK; + OCFS_DLM_NO_LOCK; else ocfs_flush_cache (osb); } } else { - /* Release the lock, as there will be no open handle if there is */ - /* no oin, and so we don't need to keep the lock state to caching */ + /* Release the lock, as there will be no open */ + /* handle if there is no oin, and so we don't */ + /* need to keep the lock state to caching */ lockres->lock_type = DISK_LOCK_FILE_LOCK (fe) = - OCFS_DLM_NO_LOCK; + OCFS_DLM_NO_LOCK; } - lockres->in_cache_list = false; - list_del (iterentry); + list_del (entry); } ocfs_release_lockres (lockres); } @@ -2696,111 +2117,103 @@ int ocfs_lookup_cache_link (ocfs_super * * This is recovery. It will read the log file based on trans extent map and * do the actual disk writes of meta data at right disk offset. */ -int ocfs_process_log_file (ocfs_super * osb, bool Flag) +int ocfs_process_log_file (ocfs_super * osb, bool flag) { int status = 0; - ub1 *pMetaDataBuffer = NULL; - ub1 *pLTempBuffer = NULL; + ub1 *meta_data_buf = NULL; + ub1 *tmp_buf = NULL; ub4 size; ub4 i = 0; - ocfs_offset_map *pMapBuffer; - ub8 fileSize; - ub8 metaFileSize; - ub8 metaAllocSize; + ocfs_offset_map *map_buf; + ub8 file_size; + ub8 meta_file_size; + ub8 meta_alloc_size; + ub8 offset; LOG_ENTRY (); - metaAllocSize = 0; + meta_alloc_size = 0; status = ocfs_get_system_file_size (osb, - (OCFS_FILE_VOL_META_DATA + osb->node_num), - &metaFileSize, &metaAllocSize); + (OCFS_FILE_VOL_META_DATA + osb->node_num), + &meta_file_size, &meta_alloc_size); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - size = OCFS_ALIGN (metaFileSize, osb->vol_layout.cluster_size); - pMetaDataBuffer = ocfs_malloc (size); - if (pMetaDataBuffer == NULL) { + size = OCFS_ALIGN (meta_file_size, osb->vol_layout.cluster_size); + meta_data_buf = ocfs_malloc (size); + if (meta_data_buf == NULL) { LOG_ERROR_STATUS (status = -ENOMEM); goto finally; } - status = ocfs_read_system_file (osb, (OCFS_FILE_VOL_META_DATA + osb->node_num), - pMetaDataBuffer, size, 0); + status = ocfs_read_system_file (osb, + (OCFS_FILE_VOL_META_DATA + osb->node_num), + meta_data_buf, size, 0); if (status < 0) { LOG_ERROR_STATUS (status = -ENOMEM); goto finally; } - fileSize = osb->log_file_size; + file_size = osb->log_file_size; size = 0; i = 0; - while (metaFileSize != 0) { - ub8 tmpOffset; - - pMapBuffer = (ocfs_offset_map *) (pMetaDataBuffer + - (i * - sizeof (ocfs_offset_map))); - - if ((pMapBuffer->length % OCFS_SECTOR_SIZE) || - (pMapBuffer->actual_disk_off % OCFS_SECTOR_SIZE)) { - LOG_ERROR_STR - ("length or actual_disk_off is Unaligned"); + while (meta_file_size != 0) { + map_buf = (ocfs_offset_map *) + (meta_data_buf + (i * sizeof (ocfs_offset_map))); + + if ((map_buf->length % OCFS_SECTOR_SIZE) || + (map_buf->actual_disk_off % OCFS_SECTOR_SIZE)) { + LOG_ERROR_STR ("length or actual_disk_off is not aligned"); } - if (size < pMapBuffer->length) { - ocfs_safefree (pLTempBuffer); - - size = OCFS_ALIGN (pMapBuffer->length, PAGE_SIZE); - - pLTempBuffer = ocfs_malloc (size); - if (pLTempBuffer == NULL) { + if (size < map_buf->length) { + ocfs_safefree (tmp_buf); + size = OCFS_ALIGN (map_buf->length, osb->sect_size); + tmp_buf = ocfs_malloc (size); + if (tmp_buf == NULL) { LOG_ERROR_STATUS (status = -ENOMEM); goto finally; } } - tmpOffset = pMapBuffer->log_disk_off; + offset = map_buf->log_disk_off; - status = - ocfs_read_force_disk (osb, pLTempBuffer, pMapBuffer->length, - tmpOffset); + status = ocfs_read_force_disk (osb, tmp_buf, map_buf->length, + offset); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - tmpOffset = pMapBuffer->actual_disk_off; + offset = map_buf->actual_disk_off; - if (Flag) { - status = ocfs_lookup_cache_link (osb, pLTempBuffer, - pMapBuffer-> - actual_disk_off, - pMapBuffer->length); + if (flag) { + status = ocfs_lookup_cache_link (osb, tmp_buf, + map_buf->actual_disk_off, map_buf->length); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } } - status = - ocfs_write_force_disk (osb, pLTempBuffer, pMapBuffer->length, - tmpOffset); + status = ocfs_write_force_disk (osb, tmp_buf, map_buf->length, + offset); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - metaFileSize -= sizeof (ocfs_offset_map); + meta_file_size -= sizeof (ocfs_offset_map); i++; } finally: - ocfs_safefree (pMetaDataBuffer); - ocfs_safefree (pLTempBuffer); + ocfs_safefree (meta_data_buf); + ocfs_safefree (tmp_buf); LOG_EXIT_STATUS (status); return status; @@ -2810,31 +2223,32 @@ int ocfs_process_log_file (ocfs_super * * ocfs_break_cache_lock() * */ -int ocfs_break_cache_lock (ocfs_super * osb, ocfs_lock_res * LockRes) +int ocfs_break_cache_lock (ocfs_super * osb, ocfs_lock_res * lockres) { int status = -EAGAIN; + int tmpstat; ub4 votemap; ub8 lockseqno = 0; LOG_ENTRY (); /* Ask the node with cache to flush and revert to write thru on this file */ - votemap = (1 << LockRes->master_node_num); + votemap = (1 << lockres->master_node_num); while (status == -EAGAIN) { - if (!IS_NODE_ALIVE (osb->publ_map, LockRes->master_node_num, + if (!IS_NODE_ALIVE (osb->publ_map, lockres->master_node_num, OCFS_MAXIMUM_NODES)) { - LOG_ERROR_ARGS - ("master is dead lock id %u.%u, master node %u\n", - LockRes->sector_num, LockRes->master_node_num); + LOG_TRACE_ARGS ("Master (%u) is dead, lockid %u.%u\n", + lockres->master_node_num, lockres->sector_num); /* TODO recovery needs to be done here .....and then become master */ status = 0; goto finito; } - status = - ocfs_request_vote (osb, LockRes->sector_num, LockRes->lock_type, - FLAG_FILE_RELEASE_CACHE, votemap, &lockseqno); + status = ocfs_request_vote (osb, lockres->sector_num, + lockres->lock_type, + FLAG_FILE_RELEASE_CACHE, votemap, + &lockseqno); if (status == -EAGAIN) continue; else if (status < 0) { @@ -2842,10 +2256,10 @@ int ocfs_break_cache_lock (ocfs_super * goto finito; } - status = - ocfs_wait_for_vote (osb, LockRes->sector_num, LockRes->lock_type, - FLAG_FILE_RELEASE_CACHE, votemap, - 15000 /* 15sec */ , lockseqno, LockRes); + status = ocfs_wait_for_vote (osb, lockres->sector_num, + lockres->lock_type, + FLAG_FILE_RELEASE_CACHE, votemap, + 15000, lockseqno, lockres); if (status == -EAGAIN) continue; else if (status < 0) { @@ -2855,15 +2269,14 @@ int ocfs_break_cache_lock (ocfs_super * break; } - LockRes->lock_type = (ub1) OCFS_DLM_NO_LOCK; + lockres->lock_type = (ub1) OCFS_DLM_NO_LOCK; LOG_TRACE_STR ("okie dokie... ocfs_break_cache_lock done\n"); finito: - status = - ocfs_reset_voting (osb, LockRes->sector_num, LockRes->lock_type, votemap); - if (status < 0) { - LOG_ERROR_STATUS (status); - return (status); + tmpstat = ocfs_reset_voting (osb, lockres->sector_num, + lockres->lock_type, votemap); + if (tmpstat < 0) { + LOG_ERROR_STATUS (status = tmpstat); } LOG_EXIT_STATUS (status); diff -urNp x-ref/fs/ocfs/Common/ocfsgenmisc.c x/fs/ocfs/Common/ocfsgenmisc.c --- x-ref/fs/ocfs/Common/ocfsgenmisc.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/ocfsgenmisc.c Mon Oct 21 04:41:19 2002 @@ -34,20 +34,6 @@ ub4 OcfsErrorLogSequence = 0; /* - * ocfs_delete_name() - * - */ -void ocfs_delete_name (ocfs_inode * oin) -{ - LOG_ENTRY (); - - /* DO NOTHING -Manish */ - - LOG_EXIT (); - return; -} /* ocfs_delete_name */ - -/* * ocfs_create_meta_log_files() * */ @@ -90,13 +76,13 @@ int ocfs_create_meta_log_files (ocfs_sup } status = ocfs_extend_system_file (osb, (OCFS_FILE_VOL_LOG_FILE + - osb->node_num), (ONE_MEGA_BYTE * 10)); + osb->node_num), (ONE_MEGA_BYTE * 10), NULL); if (status < 0) { LOG_ERROR_STATUS (status); goto bail; } - ocfs_extend_system_file (osb, (OCFS_FILE_VOL_LOG_FILE + osb->node_num), 0); + ocfs_extend_system_file (osb, (OCFS_FILE_VOL_LOG_FILE + osb->node_num), 0, NULL); log_disk_off = ocfs_file_to_disk_off (osb, (OCFS_FILE_VOL_LOG_FILE + osb->node_num), 0); @@ -108,13 +94,13 @@ int ocfs_create_meta_log_files (ocfs_sup osb->log_disk_off = log_disk_off; status = ocfs_extend_system_file (osb, (OCFS_FILE_VOL_META_DATA + - osb->node_num), ONE_MEGA_BYTE); + osb->node_num), ONE_MEGA_BYTE, NULL); if (status < 0) { LOG_ERROR_STATUS (status); return (status); } - ocfs_extend_system_file (osb, (OCFS_FILE_VOL_META_DATA + osb->node_num), 0); + ocfs_extend_system_file (osb, (OCFS_FILE_VOL_META_DATA + osb->node_num), 0, NULL); log_disk_off = ocfs_file_to_disk_off (osb, (OCFS_FILE_VOL_META_DATA + osb->node_num), 0); @@ -144,14 +130,10 @@ int ocfs_create_new_oin (ocfs_inode ** R LOG_ENTRY (); - /* - ** Don't do OCFS_ASSERT for FileObject, as it is possible and - ** OK if FileObject is NULL - */ + /* Don't do OCFS_ASSERT for FileObject, as it is OK if FileObject is NULL */ OCFS_ASSERT (osb); - /* Allocate memory for a new oin */ oin = ocfs_allocate_oin (); *Returnedoin = oin; @@ -160,15 +142,13 @@ int ocfs_create_new_oin (ocfs_inode ** R goto finally; } - /* Initialize the main_res and PagingIoResource structures now. */ ocfs_init_sem (&(oin->main_res)); OCFS_SET_FLAG (oin->oin_flags, OCFS_INITIALIZED_MAIN_RESOURCE); /* Initialize the alloc size value here, file size will come later in i_size */ oin->alloc_size = *(alloc_size); - /* Insert the pointer to osb in the oin and also Initialize */ - /* the OFile list */ + /* Insert the pointer to osb in the oin and also Initialize the OFile list */ oin->osb = osb; INIT_LIST_HEAD (&(oin->next_ofile)); @@ -190,14 +170,20 @@ int ocfs_create_root_dir_node (ocfs_supe ocfs_vol_disk_hdr *volDiskHdr = NULL; ocfs_lock_res *LockResource; bool LockAcquired = false; + ocfs_file_entry *fe = NULL; LOG_ENTRY (); + fe = ocfs_allocate_file_entry(); + if (fe==NULL) { + status = -ENOMEM; + goto bail; + } /* Acquire volume Lock ... */ status = ocfs_acquire_lock (osb, OCFS_VOLUME_LOCK_OFFSET, OCFS_DLM_EXCLUSIVE_LOCK, FLAG_FILE_CREATE, - &LockResource, NULL); + &LockResource, fe); if (status < 0) { LOG_ERROR_STATUS (status); goto bail; @@ -265,15 +251,9 @@ int ocfs_create_root_dir_node (ocfs_supe goto bail; } - size = OCFS_SECTOR_SIZE; - /* Read the first sector bytes from the target device */ - if ((volDiskHdr = ocfs_malloc (PAGE_SIZE)) == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto bail; - } - - status = ocfs_read_disk (osb, (sb1 *) volDiskHdr, size, 0); + size = osb->sect_size; + status = ocfs_read_disk_ex (osb, (void **) &volDiskHdr, size, size, 0); if (status < 0) { LOG_ERROR_STATUS (status); goto bail; @@ -288,13 +268,6 @@ int ocfs_create_root_dir_node (ocfs_supe goto bail; } -// status = ocfs_create_meta_log_files(osb); -// if(status < 0) -// { -// LOG_ERROR_STATUS(status); -// goto bail; -// } - bail: if (status < 0) { LOG_ERROR_STR ("Disabling Volume"); @@ -305,7 +278,7 @@ int ocfs_create_root_dir_node (ocfs_supe if (LockAcquired) { tempstat = ocfs_release_lock (osb, OCFS_VOLUME_LOCK_OFFSET, - OCFS_DLM_EXCLUSIVE_LOCK, 0, LockResource); + OCFS_DLM_EXCLUSIVE_LOCK, 0, LockResource, fe); if (tempstat < 0) { osb->vol_state = VOLUME_DISABLED; status = tempstat; @@ -315,6 +288,8 @@ int ocfs_create_root_dir_node (ocfs_supe ocfs_safefree (volDiskHdr); NewDirNode = NULL; volDiskHdr = NULL; + if (fe) + ocfs_release_file_entry(fe); LOG_EXIT_STATUS (status); return status; @@ -330,43 +305,43 @@ int ocfs_create_root_oin (ocfs_super * o ocfs_inode *oin; ocfs_vol_disk_hdr *volDiskHdr = NULL; ocfs_lock_res *LockResource; + ocfs_file_entry *fe = NULL; LOG_ENTRY (); if (osb->vol_layout.root_start_off == 0) { bool LockAcquired = false; - volDiskHdr = ocfs_malloc (PAGE_SIZE); - if (volDiskHdr == NULL) { - LOG_ERROR_STR ("ocfs_malloc failed"); + fe = ocfs_allocate_file_entry(); + if (fe == NULL) { status = -ENOMEM; goto finally; } ocfs_wait_for_disk_lock_release (osb, OCFS_VOLUME_LOCK_OFFSET, 10000 /* 10 sec */ , OCFS_DLM_NO_LOCK); - status = - ocfs_acquire_lock (osb, OCFS_VOLUME_LOCK_OFFSET, + + status = ocfs_acquire_lock (osb, OCFS_VOLUME_LOCK_OFFSET, OCFS_DLM_EXCLUSIVE_LOCK, FLAG_FILE_CREATE, - &LockResource, NULL); + &LockResource, fe); if (status < 0) { LOG_ERROR_STR ("ocfs_acquire_lock failed"); goto finally; } LockAcquired = true; - status = - ocfs_read_disk (osb, (sb1 *) volDiskHdr, OCFS_SECTOR_SIZE, 0); + + status = ocfs_read_disk_ex (osb, (void **) &volDiskHdr, + OCFS_SECTOR_SIZE, OCFS_SECTOR_SIZE, 0); if (status < 0) { LOG_ERROR_STR ("ocfs_read_disk failed"); goto finally; } if (LockAcquired) { - status = - ocfs_release_lock (osb, OCFS_VOLUME_LOCK_OFFSET, - OCFS_DLM_EXCLUSIVE_LOCK, 0, - LockResource); + status = ocfs_release_lock (osb, OCFS_VOLUME_LOCK_OFFSET, + OCFS_DLM_EXCLUSIVE_LOCK, 0, + LockResource, fe); if (status < 0) { LOG_ERROR_STR ("ocfs_release_lock failed! disabling volume!!!!"); @@ -421,132 +396,14 @@ int ocfs_create_root_oin (ocfs_super * o finally: ocfs_safefree (volDiskHdr); + if (fe) + ocfs_release_file_entry(fe); LOG_EXIT_STATUS (status); return status; } /* ocfs_create_root_oin */ -/* - * ocfs_allocate_ofile() - * - */ -ocfs_file *ocfs_allocate_ofile () -{ - ocfs_file *OFile = NULL; - int status; - - LOG_ENTRY (); - - /* Removes the first entry from the specified lookaside list in */ - /* nonpaged memory. If the List is empty, it returns NULL */ - OFile = kmem_cache_alloc (OcfsGlobalCtxt.ofile_cache, GFP_KERNEL); - - if (OFile == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto finally; - } - memset (OFile, 0, sizeof (ocfs_file)); - OFile->obj_id.type = OCFS_TYPE_OFILE; - OFile->obj_id.size = sizeof (ocfs_file); - - finally: - LOG_EXIT_PTR (OFile); - return (OFile); -} /* ocfs_allocate_ofile */ - -/* - * ocfs_allocate_oin() - * - */ -ocfs_inode *ocfs_allocate_oin (void) -{ - ocfs_inode *oin = NULL; - int status; - - LOG_ENTRY (); - - oin = kmem_cache_alloc (OcfsGlobalCtxt.oin_cache, GFP_KERNEL); - if (oin == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto finally; - } - memset (oin, 0, sizeof (ocfs_inode)); - oin->obj_id.type = OCFS_TYPE_OIN; - oin->obj_id.size = sizeof (ocfs_inode); - - finally: - - LOG_EXIT_PTR (oin); - return oin; -} /* ocfs_allocate_oin */ - - -/* - * ocfs_allocate_file_entry() - * - */ -ocfs_file_entry *ocfs_allocate_file_entry (void) -{ - ocfs_file_entry *FileEntry = NULL; - int status = 0; - - LOG_ENTRY (); - - FileEntry = kmem_cache_alloc (OcfsGlobalCtxt.fe_cache, GFP_KERNEL); - if (FileEntry == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto bail; - } - - /* kmem_cache inited to give SECTOR_SIZE */ - memset (FileEntry, 0, OCFS_SECTOR_SIZE); - - bail: - LOG_EXIT_PTR (FileEntry); - return FileEntry; -} /* ocfs_allocate_file_entry */ - - -/* - * ocfs_release_file_entry() - * - */ -void ocfs_release_file_entry (ocfs_file_entry * FileEntry) -{ - LOG_ENTRY (); - - OCFS_ASSERT (FileEntry); - kmem_cache_free (OcfsGlobalCtxt.fe_cache, FileEntry); - FileEntry = NULL; - - LOG_EXIT (); - return; -} /* ocfs_release_file_entry */ - - -/* - * ocfs_release_ofile() - * - */ -void ocfs_release_ofile (ocfs_file * OFile) -{ - OCFS_ASSERT (OFile); - - LOG_ENTRY (); - - /* Free the directory name allocated in dir */ - if (OFile->curr_dir_buf != NULL) { - ocfs_safefree (OFile->curr_dir_buf); - OFile->curr_dir_buf = NULL; - } - - kmem_cache_free (OcfsGlobalCtxt.ofile_cache, OFile); - OFile = NULL; - - LOG_EXIT (); - return; -} /* ocfs_release_ofile */ /* * ocfs_delete_all_extent_maps() @@ -588,7 +445,7 @@ void ocfs_release_oin (ocfs_inode * oin, ocfs_super *osb; struct inode *inode; - LOG_ENTRY_ARGS ("(0x%08x, %u)\n", oin, FreeMemory); + LOG_ENTRY_ARGS ("oin = %p, free = %s\n", oin, FreeMemory? "yes" : "no"); if (!oin || !oin->osb) goto bail; @@ -634,7 +491,11 @@ void ocfs_release_oin (ocfs_inode * oin, } if (FreeMemory) { +#ifdef OCFS_MEM_DBG + ocfs_dbg_slab_free (OcfsGlobalCtxt.oin_cache, oin); +#else kmem_cache_free (OcfsGlobalCtxt.oin_cache, oin); +#endif oin = NULL; } bail: @@ -646,26 +507,28 @@ bail: * ocfs_initialize_osb() * */ -int ocfs_initialize_osb (ocfs_super * osb, - ocfs_vol_disk_hdr * VolDiskHdr, - ocfs_vol_label * VolLabel, ub4 sect_size) +int ocfs_initialize_osb (ocfs_super * osb, ocfs_vol_disk_hdr * vdh, + ocfs_vol_label * vol_label, ub4 sect_size) { int status = 0; - ocfs_publish *PublishSect = NULL; - ub4 BitMapSize, length; - void *BitMapBuffer, *buffer = NULL; - ub8 startOffset; + ocfs_publish *publish = NULL; + ub4 bitmap_len, length; + void *bitmap_buf, *buffer = NULL; + ub8 offset; + ocfs_vol_layout *vol_layout; LOG_ENTRY (); if (osb == NULL) { - status = -EFAIL; + LOG_ERROR_STATUS(status = -EFAIL); goto finally; } OCFS_CLEAR_FLAG (osb->osb_flags, OCFS_OSB_FLAGS_SHUTDOWN); - osb->vol_layout.cluster_size = (ub4) (VolDiskHdr->cluster_size); + vol_layout = &(osb->vol_layout); + + vol_layout->cluster_size = (ub4) (vdh->cluster_size); osb->obj_id.type = OCFS_TYPE_OSB; osb->obj_id.size = sizeof (ocfs_super); @@ -700,99 +563,116 @@ int ocfs_initialize_osb (ocfs_super * os osb->oin_root_dir = NULL; osb->node_num = OCFS_INVALID_NODE_NUM; - memcpy (osb->vol_layout.mount_point, VolDiskHdr->mount_point, strlen (VolDiskHdr->mount_point)); - osb->vol_layout.serial_num = VolDiskHdr->serial_num; - osb->vol_layout.size = (ub8) (VolDiskHdr->device_size); - osb->vol_layout.start_off = VolDiskHdr->start_off; - osb->vol_layout.bitmap_off = (ub8) VolDiskHdr->bitmap_off; - osb->vol_layout.publ_sect_off = VolDiskHdr->publ_off; - osb->vol_layout.vote_sect_off = VolDiskHdr->vote_off; - osb->vol_layout.root_bitmap_off = VolDiskHdr->root_bitmap_off; - osb->vol_layout.root_start_off = VolDiskHdr->root_off; - osb->vol_layout.root_int_off = VolDiskHdr->internal_off; - osb->vol_layout.root_size = VolDiskHdr->root_size; - osb->vol_layout.cluster_size = (ub4) VolDiskHdr->cluster_size; - osb->vol_layout.num_nodes = (ub4) VolDiskHdr->num_nodes; - osb->vol_layout.data_start_off = VolDiskHdr->data_start_off; - osb->vol_layout.root_bitmap_size = VolDiskHdr->root_bitmap_size; - osb->vol_layout.num_clusters = VolDiskHdr->num_clusters; - osb->vol_layout.dir_node_size = VolDiskHdr->dir_node_size; - osb->vol_layout.file_node_size = VolDiskHdr->file_node_size; - osb->vol_layout.node_cfg_off = VolDiskHdr->node_cfg_off; - osb->vol_layout.node_cfg_size = VolDiskHdr->node_cfg_size; - osb->vol_layout.new_cfg_off = VolDiskHdr->new_cfg_off; - osb->vol_layout.prot_bits = VolDiskHdr->prot_bits; - osb->vol_layout.uid = VolDiskHdr->uid; - osb->vol_layout.gid = VolDiskHdr->gid; - - memcpy (osb->vol_layout.id, VolLabel->id, MAX_VOL_ID_LENGTH); - - if (osb->vol_layout.dir_node_size == 0) - osb->vol_layout.dir_node_size = OCFS_DEFAULT_DIR_NODE_SIZE; - - if (osb->vol_layout.file_node_size == 0) - osb->vol_layout.file_node_size = OCFS_DEFAULT_FILE_NODE_SIZE; - - osb->max_dir_node_ent = (ub4) (osb->vol_layout.dir_node_size / OCFS_SECTOR_SIZE) - 2; - BitMapSize = (ub4) osb->vol_layout.num_clusters; + memcpy (vol_layout->mount_point, vdh->mount_point, strlen (vdh->mount_point)); + vol_layout->serial_num = vdh->serial_num; + vol_layout->size = (ub8) (vdh->device_size); + vol_layout->start_off = vdh->start_off; + vol_layout->bitmap_off = (ub8) vdh->bitmap_off; + vol_layout->publ_sect_off = vdh->publ_off; + vol_layout->vote_sect_off = vdh->vote_off; + vol_layout->root_bitmap_off = vdh->root_bitmap_off; + vol_layout->root_start_off = vdh->root_off; + vol_layout->root_int_off = vdh->internal_off; + vol_layout->root_size = vdh->root_size; + vol_layout->cluster_size = (ub4) vdh->cluster_size; + vol_layout->num_nodes = (ub4) vdh->num_nodes; + vol_layout->data_start_off = vdh->data_start_off; + vol_layout->root_bitmap_size = vdh->root_bitmap_size; + vol_layout->num_clusters = vdh->num_clusters; + vol_layout->dir_node_size = vdh->dir_node_size; + vol_layout->file_node_size = vdh->file_node_size; + vol_layout->node_cfg_off = vdh->node_cfg_off; + vol_layout->node_cfg_size = vdh->node_cfg_size; + vol_layout->new_cfg_off = vdh->new_cfg_off; + vol_layout->prot_bits = vdh->prot_bits; + vol_layout->uid = vdh->uid; + vol_layout->gid = vdh->gid; + + memcpy (vol_layout->vol_id, vol_label->vol_id, MAX_VOL_ID_LENGTH); + + if (vol_layout->dir_node_size == 0) + vol_layout->dir_node_size = OCFS_DEFAULT_DIR_NODE_SIZE; + + if (vol_layout->file_node_size == 0) + vol_layout->file_node_size = OCFS_DEFAULT_FILE_NODE_SIZE; + + osb->max_dir_node_ent = + (ub4) (vol_layout->dir_node_size / sect_size) - 2; + bitmap_len = (ub4) vol_layout->num_clusters; /* In the start one sector is for Volume header and second sector */ /* is for Global sequence Number and Directoy Entry. */ { - ub4 sz = OCFS_ALIGN ((BitMapSize + 7) / 8, PAGE_SIZE); + ub4 sz = OCFS_ALIGN ((bitmap_len + 7) / 8, PAGE_SIZE); - if ((BitMapBuffer = vmalloc_32 (sz)) == NULL) { - LOG_ERROR_STR ("vmalloc failed\n"); - LOG_ERROR_STATUS (status -ENOMEM); + if ((bitmap_buf = vmalloc_32 (sz)) == NULL) { + LOG_ERROR_STR ("vmalloc failed"); + status = -ENOMEM; goto finally; } } - ocfs_initialize_bitmap (&osb->cluster_bitmap, (ub4 *) BitMapBuffer, - BitMapSize); - status = ocfs_get_config (osb); - if (status < 0) { + ocfs_initialize_bitmap (&osb->cluster_bitmap, (ub4 *) bitmap_buf, + bitmap_len); + +//SM We need to free all these mem blocks if this function fails... + + osb->prealloc_lock = 0; + osb->data_prealloc = ocfs_malloc (IORUN_ALLOC_SIZE); + if (!osb->data_prealloc) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + + osb->md_prealloc = ocfs_malloc (IORUN_ALLOC_SIZE); + if (!osb->md_prealloc) { LOG_ERROR_STATUS (status = -ENOMEM); goto finally; } - if ((PublishSect = ocfs_malloc (osb->sect_size)) == NULL) { + osb->cfg_len = (OCFS_MAXIMUM_NODES + + OCFS_VOLCFG_NEWCFG_SECTORS) * sect_size; + osb->cfg_prealloc = ocfs_malloc (osb->cfg_len); + if (!osb->cfg_prealloc) { LOG_ERROR_STATUS (status = -ENOMEM); goto finally; } - startOffset = osb->vol_layout.publ_sect_off + - (osb->node_num * osb->sect_size); + osb->log_prealloc = ocfs_malloc (OCFS_ALIGN(sizeof (ocfs_cleanup_record), PAGE_SIZE)); + if (!osb->log_prealloc) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } - /* Read the Publish Sector of local Node */ - status = ocfs_read_force_disk (osb, PublishSect, - osb->sect_size, startOffset); + status = ocfs_get_config (osb); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - /* Zero out the time stamp to write a new value */ - PublishSect->time = 0; - OcfsQuerySystemTime (&PublishSect->time); - - status = - ocfs_write_disk (osb, PublishSect, osb->sect_size, startOffset); + /* Read the Publish Sector of local Node */ + offset = vol_layout->publ_sect_off + (osb->node_num * osb->sect_size); + status = ocfs_read_force_disk_ex (osb, (void **)&publish, osb->sect_size, + osb->sect_size, offset); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - length = (osb->num_cfg_nodes * osb->sect_size); + /* Zero out the time stamp to write a new value */ + publish->time = 0; + OcfsQuerySystemTime (&publish->time); - if ((buffer = ocfs_malloc (length)) == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); + status = ocfs_write_disk (osb, publish, osb->sect_size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); goto finally; } /* Read disk for all Publish Sectors */ - status = ocfs_read_force_disk (osb, buffer, length, - osb->vol_layout.publ_sect_off); + length = OCFS_MAXIMUM_NODES * osb->sect_size; + status = ocfs_read_force_disk_ex (osb, (void **)&buffer, length, length, + vol_layout->publ_sect_off); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -813,7 +693,7 @@ int ocfs_initialize_osb (ocfs_super * os OCFS_SET_FLAG (osb->osb_flags, OCFS_OSB_FLAGS_OSB_INITIALIZED); finally: - ocfs_safefree (PublishSect); + ocfs_safefree (publish); ocfs_safefree (buffer); LOG_EXIT_STATUS (status); @@ -824,39 +704,41 @@ int ocfs_initialize_osb (ocfs_super * os * ocfs_verify_volume() * */ -int ocfs_verify_volume (ocfs_vol_disk_hdr * VolDiskHdr) +int ocfs_verify_volume (ocfs_vol_disk_hdr * vdh) { int status = 0; LOG_ENTRY (); - if (VolDiskHdr == NULL) { - status = -EFAIL; + if (vdh == NULL) { + LOG_ERROR_STATUS (status = -EFAIL); goto bail; } /* Compare the Signature with the one we read from disk */ - if (memcmp (VolDiskHdr->signature, OCFS_VOLUME_SIGNATURE, + if (memcmp (vdh->signature, OCFS_VOLUME_SIGNATURE, strlen (OCFS_VOLUME_SIGNATURE)) != 0) { + LOG_ERROR_STR ("Invalid volume signature"); status = -EINVAL; goto bail; } /* Check the Volume Length and the ClusterSize. */ - if (VolDiskHdr->device_size == 0) { + if (vdh->device_size == 0) { + LOG_ERROR_STR ("Device size cannot be zero"); status = -EINVAL; goto bail; } - if (VolDiskHdr->cluster_size == 0) { + if (vdh->cluster_size == 0) { + LOG_ERROR_STR ("Cluster size cannot be zero"); status = -EINVAL; goto bail; } - if (VolDiskHdr->major_version != OCFS_MAJOR_VERSION) { + if (vdh->major_version != OCFS_MAJOR_VERSION) { LOG_ERROR_ARGS ("Version number not compatible: %u.%u\n", - VolDiskHdr->major_version, - VolDiskHdr->minor_version); + vdh->major_version, vdh->minor_version); status = -EINVAL; goto bail; } @@ -886,7 +768,7 @@ int ocfs_vol_member_reconfig (ocfs_super /* Trigger the NM on this node to init the VolMap based on the info */ /* on the disk currently and advertise to other nodes our existance. */ - ocfs_nm_heart_beat (osb, HEARTBEAT_METHOD_DISK, 0); + ocfs_nm_heart_beat (osb, HEARTBEAT_METHOD_DISK, true); /* Send a mesg to force the nm on all other nodes to process this */ /* volume, this should allow for them to detect our existance. */ @@ -925,46 +807,31 @@ int ocfs_vol_member_reconfig (ocfs_super int ocfs_check_volume (ocfs_super * osb) { int status = 0; - ub8 startOffset = 0; + ub8 offset = 0; ub1 *buffer = NULL; - ub4 length; - ocfs_publish *PublishSect; + ocfs_publish *publish; bool osb_resAcquired = false; LOG_ENTRY (); - length = sizeof (ocfs_file_entry); - length = (length > PAGE_SIZE) ? length : PAGE_SIZE; - - if ((buffer = ocfs_malloc (length)) == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto finally; - } - - /* Read the publish map */ - startOffset = osb->vol_layout.publ_sect_off + - (osb->node_num * osb->sect_size); - - length = osb->sect_size; - - /* Read Publish Sector of the node which died */ - status = ocfs_read_force_disk (osb, buffer, length, startOffset); + /* Read the node's publish sector */ + offset = osb->vol_layout.publ_sect_off + + (osb->node_num * osb->sect_size); + status = ocfs_read_force_disk_ex (osb, (void **)&buffer, osb->sect_size, + osb->sect_size, offset); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - PublishSect = (ocfs_publish *) buffer; + publish = (ocfs_publish *) buffer; LOG_TRACE_STR ("putting publishsector dirty check back in..."); - if (PublishSect->dirty) { + if (publish->dirty) { LOG_TRACE_STR ("publishsector IS dirty!!!"); - /* Acquire the osb lock */ LOG_TRACE_STR ("Acquiring osb lock"); - ocfs_down_sem (&(osb->osb_res), true); osb_resAcquired = true; - LOG_TRACE_STR ("Acquired osb lock"); status = ocfs_recover_vol (osb, osb->node_num); @@ -1013,6 +880,10 @@ void ocfs_delete_osb (ocfs_super * osb) ocfs_del_sem (&(osb->map_lock)); ocfs_extent_map_destroy (&osb->metadata_map); ocfs_extent_map_destroy (&osb->trans_map); + ocfs_safefree(osb->data_prealloc); + ocfs_safefree(osb->md_prealloc); + ocfs_safefree(osb->cfg_prealloc); + ocfs_safefree(osb->log_prealloc); memset (osb, 0, sizeof (ocfs_super)); LOG_EXIT (); @@ -1040,13 +911,13 @@ int ocfs_commit_cache (ocfs_super * osb, status = ocfs_extend_system_file (osb, (OCFS_FILE_VOL_LOG_FILE + osb->node_num), - 0); + 0, NULL); osb->log_file_size = 0; status = ocfs_extend_system_file (osb, (OCFS_FILE_VOL_META_DATA + osb->node_num), - 0); + 0, NULL); ocfs_extent_map_destroy (&osb->metadata_map); ocfs_extent_map_destroy (&osb->trans_map); diff -urNp x-ref/fs/ocfs/Common/ocfsgennm.c x/fs/ocfs/Common/ocfsgennm.c --- x-ref/fs/ocfs/Common/ocfsgennm.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/ocfsgennm.c Mon Oct 21 04:41:19 2002 @@ -1,7 +1,7 @@ /* * ocfsgennm.c * - * Keeps track of alive nodes in the cluster. + * process vote, nm thread, etc. * * Copyright (C) 2002 Oracle Corporation. All rights reserved. * @@ -69,291 +69,11 @@ int ocfs_flush_data (ocfs_inode * oin) } /* ocfs_flush_data */ /* - * ocfs_update_publish_map() - * - * @osb: ocfs super block for the volume - * @buffer: publish sectors read in the last round - * @first_time: if true, the buffer needs to be initialized - * - * Reads the publish sectors and compares the timestamp of each node - * to the one it read in the last round. As long as the timestamp keeps - * changing, the node is marked alive. Conversely, if the timestamp does - * not change over time, the node is marked dead. The function marks all - * the live nodes in the publishmap. - * - */ -void ocfs_update_publish_map (ocfs_super * osb, void *buffer, bool first_time) -{ - ocfs_publish *publish; - ocfs_vol_node_map *node_map; - ub8 curr_time = 0; - ub4 i; - ub4 num_nodes; - ub4 miss_cnt_val = MISS_COUNT_VALUE; - ub1 *p; - - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u)\n", osb, buffer, first_time); - - /* Total number of nodes */ - num_nodes = osb->num_cfg_nodes; - - node_map = &(osb->vol_node_map); - - /* Read the time stamp of all nodes and compare them with the time */ - /* stamp in memory for that node */ - OcfsQuerySystemTime (&curr_time); - - /* If this function is called for the first time and the local node */ - /* doesn't have any Timestamp of any node in local memory, then we */ - /* just update everybody's last time stamp in local memory and return. */ - /* Also we don't update publ_map at this time. */ - if (first_time) { - for (i = 0, p = (ub1 *) buffer; i < num_nodes; - i++, p += osb->sect_size) { - publish = (ocfs_publish *) p; - node_map->time[i] = publish->time; - node_map->scan_rate[i] = publish->hbm[i]; - node_map->scan_time[i] = curr_time; - } - goto bail; /* exit */ - } - - for (i = 0, p = (ub1 *) buffer; i < num_nodes; i++, p += osb->sect_size) { - publish = (ocfs_publish *) p; - - /* Check if the node is hung or not by comparing the disk */ - /* and memory timestamp values */ - if (node_map->time[i] == publish->time) { - /* If we are connected to this node and it is not */ - /* hearbeating on this volume it is time to send a */ - /* message to ask it to start heartbeating to the */ - /* volume. Check to see we expected a rate change */ - if (node_map->exp_scan_rate[i] != DISK_HBEAT_INVALID) { - if (node_map->scan_time[i] > - node_map->exp_rate_chng_time[i]) { - ub8 time1; - ub8 time2; - - time1 = (ub8) (curr_time - - node_map->scan_time[i]); - time2 = (ub8) (ONE_MILLI_SEC * 200 * - node_map->exp_scan_rate[i]); - - if (time1 > time2) - (node_map->miss_cnt[i])++; - } - - switch (node_map->exp_scan_rate[i]) { - case DISK_HBEAT_COMM_ON: - /* - * We are probably switching the node - * to larger from smaller rate. The - * comm establishment should have - * enabled the longer hbt on this. If - * some other node was asking for a - * smaller hbeat, and the actual - * hbeat is smaller we should not - * have hit this case, the stamp - * should have changed. Increment the - * misscount, the node is probably - * dead/hung. - */ - miss_cnt_val = MISS_COUNT_VALUE; - break; - - case DISK_HBEAT_NO_COMM: - miss_cnt_val = MISS_COUNT_VALUE; - break; - } - } else { - (node_map->miss_cnt[i])++; - } - - /* This will clear the bit for the osb publish map */ - /* for the node which is not updating its timestamp */ - if (node_map->miss_cnt[i] > MISS_COUNT_VALUE) { - UPDATE_PUBLISH_MAP (osb->publ_map, i, 0, - num_nodes); - /* - * If we are in fact connected to this node and - * it is not hearbeating on this volume it is - * time to send a message to disable volume - * access on this node and ask it to remount the - * volume now... - * Should we check to do some recovery here for - * the hung node, esp. releasing the locks? - */ - } - } else { - /* This will set the bit for the osb publish map for */ - /* the node which is up or new addition */ - node_map->miss_cnt[i] = 0; - node_map->time[i] = publish->time; - UPDATE_PUBLISH_MAP (osb->publ_map, i, 1, num_nodes); - - /* Update the multiple the other node wants us to hbeat */ - if ((publish->hbm[osb->node_num] != DISK_HBEAT_INVALID) - && (osb->hbm > publish->hbm[osb->node_num])) { - /* Go to the lowest multiplier any of the nodes */ - /* alive want us to heartbeat with. */ - osb->hbm = publish->hbm[osb->node_num]; - - if (osb->hbm == 0) { - LOG_ERROR_STR ("Heartbeat was 0"); - osb->hbm = DISK_HBEAT_NO_COMM; - } - - if (OcfsGlobalCtxt.hbm > osb->hbm) { - if (osb->hbm) - OcfsGlobalCtxt.hbm = osb->hbm; - } - - if (OcfsGlobalCtxt.hbm == 0) { - LOG_ERROR_STR ("Global heartbeat was 0"); - OcfsGlobalCtxt.hbm = DISK_HBEAT_NO_COMM; - } - } - - /* CALL IPC for a reconfig if this node was a joining node. */ - /* Also, delay the join somewhat... */ - } - node_map->scan_time[i] = curr_time; - } - - bail: - LOG_EXIT (); - return; -} /* ocfs_update_publish_map */ - -/* - * ocfs_nm_heart_beat() - * - * @osb: ocfs super block for the volume - * @flag: type of heart beat - * @op: - * - * Updates the timestamp in the nodes publish sector. - * - * Returns 0 if success, < 0 if error. - */ -int ocfs_nm_heart_beat (ocfs_super * osb, ub4 flag, ub1 op) -{ - ocfs_publish *publish = NULL; - int status = 0; - ub8 node_publ_off = 0; - - LOG_ENTRY_ARGS ("(0x%08x, %u, %u)\n", osb, flag, op); - - if (flag & HEARTBEAT_METHOD_DISK) { - publish = ocfs_malloc (osb->sect_size); - if (publish == NULL) { - status = -ENOMEM; - LOG_ERROR_STATUS (status); - goto finally; - } - - node_publ_off = osb->vol_layout.publ_sect_off + - (osb->node_num * osb->sect_size); - -//SM??? This function can be optimized by removing this read as this function -// is called only from ocfs_nm_thread() after that function has read the -// Publish Sectors for all the nodes - /* Read the publish sector */ - status = ocfs_read_force_disk (osb, publish, osb->sect_size, - node_publ_off); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto finally; - } - - OcfsQuerySystemTime (&publish->time); - - publish->hbm[osb->node_num] = osb->hbm; - - node_publ_off = osb->vol_layout.publ_sect_off + - (osb->node_num * osb->sect_size); - - /* Write the current time in local node's Publish sector */ - status = ocfs_write_force_disk (osb, publish, osb->sect_size, - node_publ_off); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto finally; - } - LOG_TRACE_ARGS ("Updating timestamp for node (%u)\n", - osb->node_num); - } - - if (flag & HEARTBEAT_METHOD_IPC) { - /* Plug this in later... */ - } - - finally: - ocfs_safefree (publish); - - LOG_EXIT_STATUS (status); - return status; -} /* ocfs_nm_heart_beat */ - -#if !defined(DLM_THREAD_PER_VOLUME) -/* - * ocfs_polling_thread() - * - */ -int ocfs_polling_thread (void *unused) -{ - struct list_head *iterEntry; - bool bGlobalResourceAcquired = false; - IPC_DLM_CONFIG IpcDlmConfig; - - LOG_ENTRY (); - - IpcDlmConfig.Version = OCFS_IPC_DLM_VERSION; - IpcDlmConfig.MsgSize = OCFS_DLM_MAX_MSG_SIZE; - - init_waitqueue_head (&OcfsGlobalCtxt.FlushEvent); - - ocfs_daemonize ("ocfsdlm"); - /* Acquire DLMThreadMonitor */ - ocfs_down_sem (&(OcfsGlobalCtxt.DLMThreadMonitor), true); - - /* The delay changes based on multiplier */ - while (!(OcfsGlobalCtxt.OcfsFlags & OCFS_FLAG_SHUTDOWN_VOL_THREAD)) { - if (OcfsGlobalCtxt.hbm == 0) { - LOG_ERROR_STR ("OcfsGlobalCtxt Heartbeat was 0"); - OcfsGlobalCtxt.hbm = DISK_HBEAT_NO_COMM; - } - - ocfs_wait (&OcfsGlobalCtxt.FlushEvent, false, 2000); /* ms */ - - init_waitqueue_head (&OcfsGlobalCtxt.FlushEvent); - - ocfs_nm_thread (NULL); - } - - /* Relinquish semaphore to signal main thread */ - ocfs_up_sem (&(OcfsGlobalCtxt.DLMThreadMonitor)); - - LOG_EXIT_LONG (0); - return 0; -} /* ocfs_polling_thread */ -#endif /* !defined(DLM_THREAD_PER_VOLUME) */ - -/* - * ocfs_nm_join_cluster() - * - */ -int ocfs_nm_join_cluster (ocfs_super * osb) -{ - return -EFAIL; -} /* ocfs_nm_join_cluster */ - -/* * ocfs_disk_update_resource() * * @osb: ocfs super block for the volume - * @LockResource: lockres to be updated - * @FileEntry: corresponding file entry + * @lock_res: lockres to be updated + * @file_ent: corresponding file entry * * Updates the in memory lock resource from the disklock info * stored in the file entry on disk. @@ -375,10 +95,8 @@ int ocfs_disk_update_resource (ocfs_supe if (file_ent) { fe = file_ent; status = ocfs_read_file_entry (osb, fe, lock_res->sector_num); - } else { + } else status = ocfs_get_file_entry (osb, &fe, lock_res->sector_num); - } - if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -406,10 +124,10 @@ int ocfs_disk_update_resource (ocfs_supe * ocfs_find_update_res() * * @osb: ocfs super block for the volume - * @LockId: sector number of the resource to be locked - * @LockResource: lockres of the resource - * @FileEntry: corresponding file entry - * @Updated: set to 1 if lockres is refreshed from disk + * @lock_id: sector number of the resource to be locked + * @lockres: lockres of the resource + * @fe: corresponding file entry + * @updated: set to 1 if lockres is refreshed from disk * * Searches for the lockres for the given lockid in the hashtable. * If not found, it allocates a lockres for the lockid, and adds @@ -418,86 +136,81 @@ int ocfs_disk_update_resource (ocfs_supe * * Returns 0 if success, < 0 if error. */ -int ocfs_find_update_res (ocfs_super * osb, - ub8 LockId, - ocfs_lock_res ** LockResource, - ocfs_file_entry * FileEntry, ub4 * Updated) +int ocfs_find_update_res (ocfs_super * osb, ub8 lock_id, + ocfs_lock_res ** lockres, ocfs_file_entry * fe, + ub4 * updated) { int status = 0; - ocfs_lock_res *lockres = NULL; + ocfs_lock_res *tmp_lockres = NULL; LOG_ENTRY_ARGS ("(0x%08x, %u.%u, 0x%08x, 0x%08x, 0x%08x)\n", osb, - HI (LockId), LO (LockId), LockResource, FileEntry, - Updated); + HI (lock_id), LO (lock_id), lockres, fe, + updated); - status = ocfs_lookup_sector_node (osb, LockId, &lockres); + status = ocfs_lookup_sector_node (osb, lock_id, lockres); if (status < 0) { - /* Create a resource and insert in the hash */ - lockres = kmem_cache_alloc (OcfsGlobalCtxt.lockres_cache, GFP_KERNEL); - - if (lockres == NULL) { + *lockres = ocfs_allocate_lockres(); + if (*lockres == NULL) { LOG_ERROR_STATUS (status = -ENOMEM); goto finally; } - /* Initialize Resource */ - ocfs_init_lockres (osb, lockres, LockId); + ocfs_init_lockres (osb, *lockres, lock_id); - /* Update from the disk */ - status = ocfs_disk_update_resource (osb, lockres, FileEntry); + status = ocfs_disk_update_resource (osb, *lockres, fe); if (status < 0) { - /* This should never fail so lock up the volume */ LOG_ERROR_STR ("Disabling Volume"); osb->vol_state = VOLUME_DISABLED; goto finally; } - if (LockId != lockres->sector_num) { + if (lock_id != (*lockres)->sector_num) { LOG_ERROR_STATUS(status = -EFAIL); goto finally; } - if (Updated) - *Updated = 1; + if (updated) + *updated = 1; - ocfs_insert_sector_node (osb, lockres); + status = ocfs_insert_sector_node (osb, *lockres, &tmp_lockres); if (status < 0) { - /* Failure LOCK up volume operation */ - LOG_ERROR_STR ("Disabling Volume"); - osb->vol_state = VOLUME_DISABLED; + LOG_ERROR_STATUS (status); goto finally; } - } else { - if (lockres->signature != 0x55AA) { - LOG_ERROR_STATUS(status = -EFAIL); + if (!tmp_lockres) goto finally; + else { + ocfs_release_lockres (*lockres); + *lockres = tmp_lockres; } + } - if (LockId != lockres->sector_num) { - LOG_ERROR_STATUS(status = -EFAIL); - goto finally; - } + /* sanity checks */ + if ((*lockres)->signature != 0x55AA) { + LOG_ERROR_STATUS(status = -EFAIL); + goto finally; + } - if (lockres->master_node_num != (sb4) osb->node_num) { - /* Update from the disk */ - status = ocfs_disk_update_resource (osb, lockres, FileEntry); - if (status < 0) { - /* This should never fail so lock up the volume */ - LOG_ERROR_STR ("Disabling Volume"); - osb->vol_state = VOLUME_DISABLED; - goto finally; - } + if (lock_id != (*lockres)->sector_num) { + LOG_ERROR_STATUS(status = -EFAIL); + goto finally; + } - if (Updated) - *Updated = 1; + if ((*lockres)->master_node_num != osb->node_num) { + status = ocfs_disk_update_resource (osb, *lockres, fe); + if (status < 0) { + LOG_ERROR_STR ("Disabling Volume"); + osb->vol_state = VOLUME_DISABLED; + goto finally; } + if (updated) + *updated = 1; } - *LockResource = lockres; - finally: - + if (status < 0) + *lockres = NULL; LOG_EXIT_STATUS (status); return status; } /* ocfs_find_update_res */ @@ -505,60 +218,60 @@ int ocfs_find_update_res (ocfs_super * o /* * ocfs_vote_for_del_ren() * + * @osb: + * @publish: + * @node_num: node asking for the vote + * @vote: + * @lockres: + * */ -int ocfs_vote_for_del_ren (ocfs_super * osb, ocfs_publish * PublishToVote, - ub4 NodeAskingVote, ocfs_vote * VoteSector, - ocfs_lock_res ** LockResource) +int ocfs_vote_for_del_ren (ocfs_super * osb, ocfs_publish * publish, + ub4 node_num, ocfs_vote * vote, + ocfs_lock_res ** lockres) { int status = 0; - ub4 Flags = 0; - ub4 retryCount = 0; + ub4 flags = 0; + ub4 retry_cnt = 0; bool acq_oin = false; - ocfs_file_entry *pFileEntry = NULL; - ocfs_lock_res *lockres; + ocfs_file_entry *fe = NULL; bool rls_oin = true; ocfs_sem *oin_sem = NULL; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u, 0x%08x, 0x%08x)\n", osb, - PublishToVote, NodeAskingVote, VoteSector, - LockResource); - - lockres = *LockResource; + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u, 0x%08x, 0x%08x)\n", osb, publish, + node_num, vote, lockres); - Flags = PublishToVote->vote_type; + flags = publish->vote_type; - ocfs_acquire_lockres (lockres); + ocfs_acquire_lockres (*lockres); - if (lockres->oin) { - UPDATE_OIN (lockres->oin); + if ((*lockres)->oin) { + UPDATE_OIN ((*lockres)->oin); } LOG_TRACE_ARGS ("For node (%u) for lockid %u.%u and seq %u.%u\n", - NodeAskingVote, - HI (PublishToVote->dir_ent), - LO (PublishToVote->dir_ent), - HI (PublishToVote->seq_num), - LO (PublishToVote->seq_num)); + node_num, HI (publish->dir_ent), + LO (publish->dir_ent), HI (publish->publ_seq_num), + LO (publish->publ_seq_num)); /* Check for oin */ - if (lockres->oin != NULL) { + if ((*lockres)->oin != NULL) { ocfs_inode *oin; - oin = lockres->oin; + oin = (*lockres)->oin; oin_sem = &(oin->main_res); ocfs_down_sem (oin_sem, true); acq_oin = true; /* If OIN_IN_USE is set we should go back and retry */ - while ((oin->oin_flags & OCFS_OIN_IN_USE) && (retryCount < 5)) { + while ((oin->oin_flags & OCFS_OIN_IN_USE) && (retry_cnt < 5)) { if (acq_oin) { ocfs_up_sem (oin_sem); acq_oin = false; } - ocfs_sleep (20); /* 20 ms */ - retryCount++; + ocfs_sleep (20); + retry_cnt++; if (!acq_oin) { ocfs_down_sem (oin_sem, true); @@ -566,41 +279,33 @@ int ocfs_vote_for_del_ren (ocfs_super * } } -// if((lockResource->oin->ReferenceCount == 0) && - if ((lockres->oin->open_hndl_cnt == 0) && + if (((*lockres)->oin->open_hndl_cnt == 0) && (!(oin->oin_flags & OCFS_OIN_IN_USE))) { if (!(oin->oin_flags & OCFS_OIN_IN_TEARDOWN)) { -//KASEY... Could we please make the code below more confusing? - { - if (acq_oin) { - ocfs_up_sem (oin_sem); - acq_oin = false; - } - - ocfs_delete_name (oin); + if (acq_oin) { + ocfs_up_sem (oin_sem); + acq_oin = false; + } - rls_oin = false; + rls_oin = false; - if (!acq_oin) { - ocfs_down_sem (oin_sem, true); - acq_oin = true; - } - - OCFS_SET_FLAG (oin->oin_flags, - OCFS_OIN_NEEDS_DELETION); + if (!acq_oin) { + ocfs_down_sem (oin_sem, true); + acq_oin = true; } + OCFS_SET_FLAG (oin->oin_flags, + OCFS_OIN_NEEDS_DELETION); + if (acq_oin) { ocfs_up_sem (oin_sem); acq_oin = false; } - ocfs_release_lockres (lockres); + ocfs_release_lockres (*lockres); if (oin && oin->inode) { struct inode *inode = oin->inode; - - //inode->i_nlink--; inode->i_nlink = 0; d_prune_aliases (inode); } @@ -614,251 +319,234 @@ int ocfs_vote_for_del_ren (ocfs_super * ocfs_purge_cache_section (oin, NULL, 0); ocfs_up_sem (&(oin->paging_io_res)); } - lockres = *LockResource = NULL; + *lockres = NULL; } - - VoteSector->vote[NodeAskingVote] = FLAG_VOTE_NODE; + vote->vote[node_num] = FLAG_VOTE_NODE; goto finito; } else { - LOG_TRACE_ARGS - ("Returned in use (%u) for lockid %u.%u and seq %u.%u\n", - NodeAskingVote, HI (PublishToVote->dir_ent), - LO (PublishToVote->dir_ent), - HI (PublishToVote->seq_num), - HI (PublishToVote->seq_num)); - VoteSector->vote[NodeAskingVote] = - FLAG_VOTE_OIN_ALREADY_INUSE; + LOG_TRACE_ARGS ("Returned in use (%u) for lockid %u.%u " + "and seq %u.%u\n", node_num, HI (publish->dir_ent), + LO (publish->dir_ent), HI (publish->publ_seq_num), + HI (publish->publ_seq_num)); + vote->vote[node_num] = FLAG_VOTE_OIN_ALREADY_INUSE; goto finito; } } else { - LOG_TRACE_ARGS - ("Voted to del/ren (%u) for lockid %u.%u and seq %u.%u\n", - NodeAskingVote, HI (PublishToVote->dir_ent), - LO (PublishToVote->dir_ent), HI (PublishToVote->seq_num), - HI (PublishToVote->seq_num)); - VoteSector->vote[NodeAskingVote] = FLAG_VOTE_NODE; + struct inode *inode = NULL; + if (flags & FLAG_FILE_DELETE) { + inode = ocfs_get_inode_from_offset(osb, publish->dir_ent); + if (inode) { + inode->i_nlink = 0; + d_prune_aliases (inode); + iput (inode); + inode = NULL; + } + } + LOG_TRACE_ARGS ("Voted to del/ren (%u) for lockid %u.%u and " + "seq %u.%u\n", node_num, HI (publish->dir_ent), + LO (publish->dir_ent), HI (publish->publ_seq_num), + HI (publish->publ_seq_num)); + vote->vote[node_num] = FLAG_VOTE_NODE; goto finito; } finito: /* Set the always update master on open flag */ - if (lockres) { - lockres->lock_state |= FLAG_ALWAYS_UPDATE_OPEN; - lockres->last_upd_seq_num = PublishToVote->seq_num; - - if (lockres->master_node_num != OCFS_INVALID_NODE_NUM) { - if (!IS_NODE_ALIVE - (osb->publ_map, lockres->master_node_num, - OCFS_MAXIMUM_NODES)) { - lockres->master_node_num = NodeAskingVote; + if (*lockres) { + (*lockres)->lock_state |= FLAG_ALWAYS_UPDATE_OPEN; + (*lockres)->last_upd_seq_num = publish->publ_seq_num; + + if ((*lockres)->master_node_num != OCFS_INVALID_NODE_NUM) { + if (!IS_NODE_ALIVE (osb->publ_map, (*lockres)->master_node_num, + OCFS_MAXIMUM_NODES)) { + (*lockres)->master_node_num = node_num; } } else { - lockres->master_node_num = NodeAskingVote; + (*lockres)->master_node_num = node_num; } /* Change the master if there is no lock */ - if ((lockres->master_node_num == osb->node_num) && - (lockres->lock_state <= OCFS_DLM_SHARED_LOCK)) { - ub8 tmp = PublishToVote->dir_ent; + if (((*lockres)->master_node_num == osb->node_num) && + ((*lockres)->lock_state <= OCFS_DLM_SHARED_LOCK)) { + ub8 tmp = publish->dir_ent; /* Change the lock ownership to the node asking for vote */ /* and write new master on the disk */ - status = - ocfs_get_file_entry (osb, &pFileEntry, - PublishToVote->dir_ent); + status = ocfs_get_file_entry (osb, &fe, publish->dir_ent); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - DISK_LOCK_CURRENT_MASTER (pFileEntry) = NodeAskingVote; - status = - ocfs_write_disk (osb, pFileEntry, osb->sect_size, - tmp); + DISK_LOCK_CURRENT_MASTER (fe) = node_num; + status = ocfs_write_disk (osb, fe, osb->sect_size, tmp); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - - lockres->master_node_num = NodeAskingVote; + (*lockres)->master_node_num = node_num; } } finally: - if (pFileEntry) - ocfs_release_file_entry (pFileEntry); + if (fe) + ocfs_release_file_entry (fe); if (acq_oin && oin_sem) { ocfs_up_sem (oin_sem); acq_oin = false; } - if (lockres) - ocfs_release_lockres (lockres); + if (*lockres) + ocfs_release_lockres (*lockres); LOG_EXIT_STATUS (status); return (status); } /* ocfs_vote_for_del_ren */ /* + * ocfs_get_inode_from_offset() + * + */ +struct inode * ocfs_get_inode_from_offset(ocfs_super * osb, ub8 fileoff) +{ + int status; + struct inode *inode = NULL; + ocfs_file_entry *fe; + ocfs_find_inode_args args; + + LOG_ENTRY (); + + status = ocfs_get_file_entry (osb, &fe, fileoff); + if (status >= 0) { + args.offset = fe->this_sector; + args.entry = fe; + inode = iget4 (osb->sb, (ub4) LO (fileoff), + (find_inode_t) ocfs_find_inode, + (void *) (&args)); + if (inode != NULL && is_bad_inode (inode)) { + iput (inode); + inode = NULL; + } + ocfs_release_file_entry (fe); + fe = NULL; + } + + LOG_EXIT_PTR (inode); + return inode; +} /* ocfs_get_inode_from_offset */ + +/* * ocfs_process_update_inode_request() * + * @osb: + * @vote: + * @publish: + * @lockres: + * @node_num: node asking for the vote + * * get an inode just long enough to dump its pages */ -int ocfs_process_update_inode_request (ocfs_super * osb, - ocfs_vote * VoteSector, - ocfs_publish * PublishToVote, - ocfs_lock_res * LockResource, - ub4 NodeAskingVote) +int ocfs_process_update_inode_request (ocfs_super * osb, ocfs_vote * vote, + ocfs_publish * publish, + ocfs_lock_res * lockres, ub4 node_num) { - ub8 fileoff; struct inode *inode = NULL; - ocfs_file_entry *pFileEntry; - int status; + int status = 0; LOG_ENTRY (); - if (LockResource && LockResource->oin) { + if (lockres && lockres->oin) { LOG_ERROR_STR ("should not be called if there exists an " \ "oin for this inode!\n"); - return -EFAIL; + status = -EFAIL; + goto bail; } - fileoff = PublishToVote->dir_ent; - - status = ocfs_get_file_entry (osb, &pFileEntry, fileoff); - if (status >= 0) { - ocfs_find_inode_args args; - - args.offset = pFileEntry->this_sector; - args.entry = pFileEntry; - inode = iget4 (osb->sb, (ub4) LO (fileoff), - (find_inode_t) ocfs_find_inode, - (void *) (&args)); - if (inode == NULL || is_bad_inode (inode)) { - if (inode) - iput (inode); - inode = NULL; - } else { - truncate_inode_pages (inode->i_mapping, 0); - iput (inode); - inode = NULL; - } - ocfs_release_file_entry (pFileEntry); - pFileEntry = NULL; + inode = ocfs_get_inode_from_offset(osb, publish->dir_ent); + if (inode) { + truncate_inode_pages (inode->i_mapping, 0); + iput (inode); + inode = NULL; } - if (LockResource) + if (lockres) ocfs_hash_del (&(osb->root_sect_node), - &(LockResource->sector_num), sizeof (ub8)); + &(lockres->sector_num), sizeof (ub8)); - VoteSector->dir_ent = PublishToVote->dir_ent; - VoteSector->seq_num = PublishToVote->seq_num; - VoteSector->vote[NodeAskingVote] = FLAG_VOTE_OIN_UPDATED; - LOG_TRACE_STR ("now doing a FLAG_VOTE_OIN_UPDATED vote!\n"); + vote->dir_ent = publish->dir_ent; + vote->vote_seq_num = publish->publ_seq_num; + vote->vote[node_num] = FLAG_VOTE_OIN_UPDATED; + LOG_TRACE_STR ("Now doing a FLAG_VOTE_OIN_UPDATED vote!"); + bail: LOG_EXIT (); - return 0; + return status; } /* ocfs_process_update_inode_request */ /* * ocfs_process_vote() * + * @osb: + * @publish: + * @node_num: node asking for the vote + * */ -int ocfs_process_vote (ocfs_super * osb, ocfs_publish * PublishToVote, - ub4 NodeAskingVote) +int ocfs_process_vote (ocfs_super * osb, ocfs_publish * publish, ub4 node_num) { int status = 0; ocfs_lock_res *lockres = NULL; - ub4 Flags; + ub4 flags; ub4 num_nodes; ub4 i; - ub8 nodeVoteOffset; + ub8 offset; bool acq_oin = false; - ocfs_file_entry *pFileEntry = NULL; - ocfs_vote *PVoteSector = NULL; + ocfs_file_entry *fe = NULL; + ocfs_vote *vote = NULL; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u)\n", osb, PublishToVote, - NodeAskingVote); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u)\n", osb, publish, node_num); - LOG_TRACE_ARGS ("Called from node (%u) for lockid %u.%u and " \ - "seq %u.%u\n", NodeAskingVote, - HI (PublishToVote->dir_ent), - LO (PublishToVote->dir_ent), - HI (PublishToVote->seq_num), - LO (PublishToVote->seq_num)); - - num_nodes = osb->num_cfg_nodes; - Flags = PublishToVote->vote_type; - - PVoteSector = ocfs_malloc (osb->sect_size); - if (PVoteSector == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto finito; - } - - nodeVoteOffset = osb->vol_layout.vote_sect_off + - (osb->node_num * osb->sect_size); + num_nodes = OCFS_MAXIMUM_NODES; + flags = publish->vote_type; - status = ocfs_read_force_disk (osb, PVoteSector, osb->sect_size, - nodeVoteOffset); + offset = osb->vol_layout.vote_sect_off + (osb->node_num * osb->sect_size); + status = ocfs_read_force_disk_ex (osb, (void **)&vote, osb->sect_size, + osb->sect_size, offset); if (status < 0) { LOG_ERROR_STATUS (status); goto finito; } - LOG_TRACE_ARGS - ("Called from node (%u) for lockid %u.%u and seq %u.%u\n", - NodeAskingVote, HI (PublishToVote->dir_ent), - LO (PublishToVote->dir_ent), HI (PublishToVote->seq_num), - LO (PublishToVote->seq_num)); - /* Exclusive vote for */ - status = ocfs_find_update_res (osb, PublishToVote->dir_ent, &lockres, - NULL, NULL); + status = ocfs_find_update_res (osb, publish->dir_ent, &lockres, NULL, + NULL); if (status < 0) { -#ifdef ALLOW_INODE_UPDATES_WITH_NO_OIN - /* if this is an update oin request, check if there is a - * matching inode which has no lock resource yet */ - if (Flags & FLAG_FILE_UPDATE_OIN) { - LOG_TRACE_ARGS ("Update request received for %u.%u. " - "Checking for inode.\n", - PublishToVote->dir_ent); - status = ocfs_process_update_inode_request (osb, - PVoteSector, PublishToVote, - lockres, NodeAskingVote); - } else /* ! FLAG_FILE_UPDATE_OIN */ -#endif - { + if (flags & FLAG_FILE_UPDATE_OIN) { + status = ocfs_process_update_inode_request (osb, vote, + publish, lockres, node_num); + } else LOG_ERROR_STATUS (status); - } goto finito; } /* Zero out the vote for everybody, if any already set and hung */ for (i = 0; i < num_nodes; i++) - PVoteSector->vote[i] = 0; + vote->vote[i] = 0; - /* - ** Check if we have the lock on resource here. - ** ocfs_acquire_lockres(lockResource); - */ - if ((Flags & FLAG_FILE_DELETE) || (Flags & FLAG_FILE_RENAME)) { - status = ocfs_vote_for_del_ren (osb, PublishToVote, - NodeAskingVote, PVoteSector, &lockres); + if ((flags & FLAG_FILE_DELETE) || (flags & FLAG_FILE_RENAME)) { + status = ocfs_vote_for_del_ren (osb, publish, node_num, vote, + &lockres); goto finito; } - if (Flags & FLAG_FILE_RELEASE_CACHE) { - ocfs_file_entry *pTempEntry = NULL; + if (flags & FLAG_FILE_RELEASE_CACHE) { + ocfs_file_entry *tmp_fe = NULL; i = 0; - LOG_TRACE_STR ("Called for FLAG_FILE_RELEASE_CACHE"); - if (!osb->commit_cache_exec) { osb->needs_flush = true; while ((osb->trans_in_progress) && (i < 10)) { - ocfs_sleep (100); /* in ms */ + ocfs_sleep (100); i++; } @@ -875,8 +563,8 @@ int ocfs_process_vote (ocfs_super * osb, lockres->lock_type = OCFS_DLM_NO_LOCK; } - status = ocfs_get_file_entry (osb, &pTempEntry, - PublishToVote->dir_ent); + status = ocfs_get_file_entry (osb, &tmp_fe, + publish->dir_ent); if (status < 0) { LOG_ERROR_STATUS (status); goto finito; @@ -885,315 +573,185 @@ int ocfs_process_vote (ocfs_super * osb, /* At this stage there is nothing in disk, so */ /* no need to update cache, as there is */ /* nothing there */ - if (DISK_LOCK_FILE_LOCK (pTempEntry) > - OCFS_DLM_NO_LOCK) { - ub8 tmp = PublishToVote->dir_ent; - - DISK_LOCK_FILE_LOCK (pTempEntry) = - OCFS_DLM_NO_LOCK; - - status = ocfs_write_force_disk (osb, - pTempEntry, - osb->sect_size, - tmp); + if (DISK_LOCK_FILE_LOCK (tmp_fe) > OCFS_DLM_NO_LOCK) { + ub8 tmp = publish->dir_ent; + + DISK_LOCK_FILE_LOCK (tmp_fe) = OCFS_DLM_NO_LOCK; + + status = ocfs_write_force_disk (osb, tmp_fe, + osb->sect_size, tmp); if (status < 0) { LOG_ERROR_STATUS (status); goto finito; } lockres->lock_type = OCFS_DLM_NO_LOCK; } - - ocfs_safefree (pTempEntry); - - PVoteSector->vote[NodeAskingVote] = - FLAG_VOTE_NODE; + ocfs_release_file_entry (tmp_fe); + vote->vote[node_num] = FLAG_VOTE_NODE; status = 0; } - - LOG_TRACE_STR ("Exiting for FLAG_FILE_RELEASE_CACHE"); goto finito; } - - LOG_TRACE_STR ("Exiting for FLAG_FILE_RELEASE_CACHE"); } - if (PublishToVote->vote_type & FLAG_FILE_UPDATE_OIN) { + if (publish->vote_type & FLAG_FILE_UPDATE_OIN) { ocfs_inode *oin; - LOG_TRACE_ARGS - ("Got update oin from node (%u) for lockid %u.%u " - "and seq %u.%u\n", NodeAskingVote, - HI (PublishToVote->dir_ent), LO (PublishToVote->dir_ent), - HI (PublishToVote->seq_num), LO (PublishToVote->seq_num)); - /* - ** Set the verify oin flag on the oin....??? Assumption is that - ** we have Lock resource or oin lock + * Set the verify oin flag on the oin....??? Assumption is that + * we have Lock resource or oin lock */ if (lockres->oin != NULL) { oin = lockres->oin; ocfs_down_sem (&(oin->main_res), true); acq_oin = true; - /* - ** Get the main resource too - */ - LOG_TRACE_STR ("Now calling UPDATE_OIN()!"); UPDATE_OIN (lockres->oin); - if (acq_oin) { ocfs_up_sem (&(oin->main_res)); acq_oin = false; } - PVoteSector->dir_ent = PublishToVote->dir_ent; - PVoteSector->seq_num = PublishToVote->seq_num; - PVoteSector->vote[NodeAskingVote] = - FLAG_VOTE_OIN_UPDATED; - } -#ifdef ALLOW_INODE_UPDATES_WITH_NO_OIN - else { /* lockResource->oin == NULL */ - LOG_TRACE_STR ("a lock resource exists, but no oin. " - "calling ocfs_process_update_inode_request.\n"); - status = - ocfs_process_update_inode_request (osb, PVoteSector, - PublishToVote, lockres, - NodeAskingVote); + vote->dir_ent = publish->dir_ent; + vote->vote_seq_num = publish->publ_seq_num; + vote->vote[node_num] = FLAG_VOTE_OIN_UPDATED; + } else { + status = ocfs_process_update_inode_request (osb, vote, + publish, lockres, node_num); } -#endif goto finito; } - /* - ** If there is a masternode and it is alive ask the node asking for - ** vote to update its state - */ if (lockres->master_node_num != OCFS_INVALID_NODE_NUM) { if (lockres->master_node_num == osb->node_num) { - if (Flags & FLAG_CHANGE_MASTER) { + /* I am currently the master of the lock */ + if (flags & FLAG_CHANGE_MASTER) { osb->needs_flush = true; - LOG_TRACE_ARGS - ("Got Change Master from Node (%u) for Lock " - "0x%08x.0x%08x, Seq %u.%u\n", - NodeAskingVote, - HI (PublishToVote->dir_ent), - LO (PublishToVote->dir_ent), - HI (PublishToVote->seq_num), - LO (PublishToVote->seq_num)); while ((osb->trans_in_progress) && (i < 10)) { - ocfs_sleep (100); /* in ms */ + ocfs_sleep (100); i++; } status = -EFAIL; if (osb->trans_in_progress == false) { - ub8 tmp = PublishToVote->dir_ent; + ub8 tmp = publish->dir_ent; ocfs_commit_cache (osb, true); - lockres->master_node_num = - NodeAskingVote; + lockres->master_node_num = node_num; osb->needs_flush = false; if (lockres->oin != NULL) { ocfs_flush_data (lockres->oin); - lockres->lock_type = - OCFS_DLM_NO_LOCK; + lockres->lock_type = OCFS_DLM_NO_LOCK; } - status = - ocfs_get_file_entry (osb, &pFileEntry, - PublishToVote->dir_ent); + status = ocfs_get_file_entry (osb, &fe, + publish->dir_ent); if (status < 0) { LOG_ERROR_STATUS (status); goto finito; } - LOG_TRACE_ARGS - ("Got change master from node (%u) for " - "lockid %u.%u and seq %u.%u\n", - NodeAskingVote, - HI (PublishToVote->dir_ent), - LO (PublishToVote->dir_ent), - HI (PublishToVote->seq_num), - LO (PublishToVote->seq_num)); - if (lockres->oin) - DISK_LOCK_OIN_MAP (pFileEntry) - |= (1 << osb->node_num); + DISK_LOCK_OIN_MAP (fe) |= (1 << osb->node_num); - DISK_LOCK_CURRENT_MASTER (pFileEntry) = - NodeAskingVote; + DISK_LOCK_CURRENT_MASTER (fe) = node_num; /* Write new master on the disk */ - status = - ocfs_write_disk (osb, pFileEntry, - osb->sect_size, tmp); + status = ocfs_write_disk (osb, fe, osb->sect_size, tmp); if (status < 0) { LOG_ERROR_STATUS (status); goto finito; } - - lockres->master_node_num = - NodeAskingVote; - PVoteSector->vote[NodeAskingVote] = - FLAG_VOTE_NODE; + lockres->master_node_num = node_num; + vote->vote[node_num] = FLAG_VOTE_NODE; status = 0; } - } else if (Flags & FLAG_ADD_OIN_MAP) { - status = - ocfs_get_file_entry (osb, &pFileEntry, - PublishToVote->dir_ent); + } else if (flags & FLAG_ADD_OIN_MAP) { + status = ocfs_get_file_entry (osb, &fe, publish->dir_ent); if (status < 0) { LOG_ERROR_STATUS (status); goto finito; } - LOG_TRACE_ARGS - ("Got add oin map from node (%u) for " - "lockid %u.%u and seq %u.%u\n", - NodeAskingVote, - HI (PublishToVote->dir_ent), - LO (PublishToVote->dir_ent), - HI (PublishToVote->seq_num), - LO (PublishToVote->seq_num)); - - if ((pFileEntry-> - sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) - || - (!(pFileEntry-> - sync_flags & OCFS_SYNC_FLAG_VALID))) { - PVoteSector->vote[NodeAskingVote] = - FLAG_VOTE_FILE_DEL; - PVoteSector->open_handle = false; + if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) || + (!(fe->sync_flags & OCFS_SYNC_FLAG_VALID))) { + vote->vote[node_num] = FLAG_VOTE_FILE_DEL; + vote->open_handle = false; } else { - ub8 tmp = PublishToVote->dir_ent; + ub8 tmp = publish->dir_ent; - DISK_LOCK_OIN_MAP (pFileEntry) |= - (1 << NodeAskingVote); + DISK_LOCK_OIN_MAP (fe) |= (1 << node_num); /* Write new map on the disk */ - status = - ocfs_write_disk (osb, pFileEntry, - osb->sect_size, tmp); + status = ocfs_write_disk (osb, fe, + osb->sect_size, tmp); if (status < 0) { LOG_ERROR_STATUS (status); goto finito; } /* Add this node to the oin map on the file entry */ - lockres->oin_openmap = - DISK_LOCK_OIN_MAP (pFileEntry); - PVoteSector->vote[NodeAskingVote] = - FLAG_VOTE_NODE; + lockres->oin_openmap = DISK_LOCK_OIN_MAP (fe); + vote->vote[node_num] = FLAG_VOTE_NODE; } } } else { - if (IS_NODE_ALIVE - (osb->publ_map, lockres->master_node_num, - OCFS_MAXIMUM_NODES)) { - LOG_TRACE_ARGS - ("Got master req from node (%u) for " - "lockid %u.%u and seq %u.%u currmaster is (%u)\n", - NodeAskingVote, - HI (PublishToVote->dir_ent), - LO (PublishToVote->dir_ent), - HI (PublishToVote->seq_num), - LO (PublishToVote->seq_num), - lockres->master_node_num); - - /* - ** We have no business voting on this lock - */ - PVoteSector->vote[NodeAskingVote] = - FLAG_VOTE_UPDATE_RETRY; - PVoteSector->open_handle = false; + /* I am not currently the master of the lock */ + if (IS_NODE_ALIVE (osb->publ_map, lockres->master_node_num, + OCFS_MAXIMUM_NODES)) { + /* We have no business voting on this lock */ + vote->vote[node_num] = FLAG_VOTE_UPDATE_RETRY; + vote->open_handle = false; } else { - /* - ** Master Node is dead and a vote is needed to create a new master - */ - PVoteSector->open_handle = false; - - LOG_TRACE_ARGS - ("Got master req from node (%u) for lockid %u.%u " - "and seq %u.%u current master is dead\n", - NodeAskingVote, - HI (PublishToVote->dir_ent), - LO (PublishToVote->dir_ent), - HI (PublishToVote->seq_num), - LO (PublishToVote->seq_num)); - - PVoteSector->vote[NodeAskingVote] = - FLAG_VOTE_NODE; - - if ((!(Flags & FLAG_DIR)) && - ((Flags & FLAG_FILE_EXTEND) - || (Flags & FLAG_FILE_UPDATE))) { - LOG_TRACE_ARGS - ("EXTEND/UPDATE: master_node_num = %u\n", - lockres->master_node_num); - if (lockres->oin) { - PVoteSector->open_handle = true; - } + /* Master Node is dead and a vote is needed */ + /* to create a new master */ + vote->open_handle = false; + vote->vote[node_num] = FLAG_VOTE_NODE; + + if ((!(flags & FLAG_DIR)) && + ((flags & FLAG_FILE_EXTEND) || (flags & FLAG_FILE_UPDATE))) { + if (lockres->oin) + vote->open_handle = true; } } } } else { /* Vote for the node */ + vote->vote[node_num] = FLAG_VOTE_NODE; + vote->open_handle = false; - LOG_TRACE_ARGS - ("Got master req from node (%u) for lockid %u.%u " - "and seq %u.%u no current master\n", NodeAskingVote, - HI (PublishToVote->dir_ent), LO (PublishToVote->dir_ent), - HI (PublishToVote->seq_num), LO (PublishToVote->seq_num)); - - PVoteSector->vote[NodeAskingVote] = FLAG_VOTE_NODE; - PVoteSector->open_handle = false; - - if ((!(Flags & FLAG_DIR)) && - ((Flags & FLAG_FILE_EXTEND) || (Flags & FLAG_FILE_UPDATE))) - { - LOG_TRACE_STR ("EXTEND/UPDATE: no master"); - if (lockres->oin) { - PVoteSector->open_handle = true; - } + if ((!(flags & FLAG_DIR)) && + ((flags & FLAG_FILE_EXTEND) || (flags & FLAG_FILE_UPDATE))) { + if (lockres->oin) + vote->open_handle = true; } } finito: - PVoteSector->dir_ent = PublishToVote->dir_ent; - PVoteSector->seq_num = PublishToVote->seq_num; - - LOG_TRACE_ARGS ("Voting for node (%u) for lockid %u.%u and seq %u.%u " - "vote %u openhandle %u\n", NodeAskingVote, - HI (PVoteSector->dir_ent), LO (PVoteSector->dir_ent), - HI (PVoteSector->seq_num), LO (PVoteSector->seq_num), - PVoteSector->vote[NodeAskingVote], - PVoteSector->open_handle); + vote->dir_ent = publish->dir_ent; + vote->vote_seq_num = publish->publ_seq_num; if (status >= 0) { - /* Write the vote sector */ - nodeVoteOffset = osb->vol_layout.vote_sect_off + - (osb->node_num * osb->sect_size); - - status = - ocfs_write_disk (osb, PVoteSector, osb->sect_size, nodeVoteOffset); - if (status < 0) { + offset = osb->vol_layout.vote_sect_off + + (osb->node_num * osb->sect_size); + status = ocfs_write_disk (osb, vote, osb->sect_size, offset); + if (status < 0) LOG_ERROR_STATUS (status); - } } - if (pFileEntry) - ocfs_release_file_entry (pFileEntry); + if (fe) + ocfs_release_file_entry (fe); - ocfs_safefree (PVoteSector); + ocfs_safefree (vote); - LOG_TRACE_ARGS - ("Exited from node (%u) for lockid %u.%u and seq %u.%u\n", - NodeAskingVote, HI (PublishToVote->dir_ent), - LO (PublishToVote->dir_ent), HI (PublishToVote->seq_num), - LO (PublishToVote->seq_num)); + if (lockres && lockres->oin==NULL) { + ocfs_hash_del (&(osb->root_sect_node), &(lockres->sector_num), + sizeof (ub8)); + ocfs_free_lockres(lockres); + } LOG_EXIT_STATUS (status); return status; @@ -1206,244 +764,130 @@ int ocfs_process_vote (ocfs_super * osb, int ocfs_nm_thread (ocfs_super * mount_osb) { int status; -#if !defined(DLM_THREAD_PER_VOLUME) - struct list_head *iterEntry; -#endif - ub1 *allocBuffer = NULL; ub1 *buffer = NULL; - ub1 *cfgBuffer = NULL; ocfs_super *osb = NULL; ocfs_publish *publish; ocfs_publish *publish_to_vote = NULL; - ub4 length = 0; ub4 i; - ub4 j; - ub4 highestVoteNode = 0; - ub4 maxAllocLen = 0; - ub8 startOffset = 0; + ub4 highest_vote_node = 0; + ub8 offset = 0; ub4 num_nodes = 0; - sb4 voteNode = -1; - bool bGlobalResourceAcquired = false; + ub4 vote_node = OCFS_INVALID_NODE_NUM; int ret = 0; - ocfs_node_config_hdr *pNodeCfgHdr = NULL; + ocfs_node_config_hdr *node_cfg_hdr = NULL; ub1 *p; LOG_ENTRY (); /* For each mounted volume reiterate the time stamp on the publish sector */ -#if defined(DLM_THREAD_PER_VOLUME) if (!mount_osb) { LOG_ERROR_STATUS (status = -EFAIL); goto finally; - } -#endif - -#if !defined(DLM_THREAD_PER_VOLUME) - ocfs_down_sem (&(OcfsGlobalCtxt.res), true); - bGlobalResourceAcquired = true; -#endif - - /* Step thru each osb */ -#if defined(DLM_THREAD_PER_VOLUME) - for (osb = mount_osb, j = 0; j < 1; ++j) -#else - list_for_each (iterEntry, &(OcfsGlobalCtxt.osb_next)) -#endif - { -#if !defined(DLM_THREAD_PER_VOLUME) - if (bGlobalResourceAcquired) { - ocfs_up_sem (&(OcfsGlobalCtxt.res)); - bGlobalResourceAcquired = false; - } - - /* Retrieve the next osb */ - osb = list_entry (iterEntry, ocfs_super, osb_next); - OCFS_ASSERT (osb); -#endif + } else + osb = mount_osb; - /* Ensure that the volume is valid ... */ - if (osb->obj_id.type != OCFS_TYPE_OSB) - continue; - - /* ... and that it is mounted */ - if (osb->osb_flags & OCFS_OSB_FLAGS_BEING_DISMOUNTED) { - /* TODO LINUX We need to remove this volume from this list */ - continue; - } + /* Ensure that the volume is valid ... */ + if (osb->obj_id.type != OCFS_TYPE_OSB) + goto finally; - if (!time_after (jiffies, osb->hbt)) - continue; + /* ... and that it is mounted */ + if (osb->osb_flags & OCFS_OSB_FLAGS_BEING_DISMOUNTED) + goto finally; - if (osb->vol_state == VOLUME_MOUNTED) { - if (osb->needs_flush) { - i = 0; - while ((osb->trans_in_progress) && (i < 10)) { - ocfs_sleep (100); /* in ms */ - i++; - } + if (!time_after (jiffies, osb->hbt)) + goto finally; - if (osb->trans_in_progress == false) { - ocfs_commit_cache (osb, false); - osb->needs_flush = false; - } + if (osb->vol_state == VOLUME_MOUNTED) { + if (osb->needs_flush) { + i = 0; + while ((osb->trans_in_progress) && (i < 10)) { + ocfs_sleep (100); + i++; } - } - - length = (osb->num_cfg_nodes + 4) * osb->sect_size; - length = OCFS_ALIGN (length, PAGE_SIZE); - if ((allocBuffer == NULL) || (length > maxAllocLen)) { - ocfs_safefree (allocBuffer); - - allocBuffer = ocfs_malloc (length); - if (allocBuffer == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto finally; + if (osb->trans_in_progress == false) { + ocfs_commit_cache (osb, false); + osb->needs_flush = false; } - maxAllocLen = length; } + } + + /* Get the Publish Sector start Offset */ + offset = osb->vol_layout.new_cfg_off; - cfgBuffer = allocBuffer; + /* Read disk for Publish Sectors of all nodes */ + status = ocfs_read_force_disk (osb, osb->cfg_prealloc, osb->cfg_len, + offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } - /* Get the Publish Sector start Offset */ - startOffset = osb->vol_layout.new_cfg_off; + /* If another node was added to the config read and update the cfg */ + node_cfg_hdr = (ocfs_node_config_hdr *) (osb->cfg_prealloc + osb->sect_size); - /* Read disk for Publish Sectors of all nodes */ - status = - ocfs_read_force_disk (osb, cfgBuffer, length, startOffset); + if ((osb->cfg_seq_num != node_cfg_hdr->cfg_seq_num) || + (osb->num_cfg_nodes != node_cfg_hdr->num_nodes)) { + status = ocfs_chk_update_config (osb); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } -// /* If some node is asking us to add it to the config... */ -// /* ocfs_add_to_disk_config */ -// if(osb->bConfigUpdateNeeded) -// { -// ocfs_update_node_config(osb); -// osb->bConfigUpdateNeeded = false; -// } - - /* If another node was added to the config read and update the cfg */ - pNodeCfgHdr = (ocfs_node_config_hdr *) (cfgBuffer + osb->sect_size); - - if ((osb->cfg_seq_num != pNodeCfgHdr->seq_num) || - (osb->num_cfg_nodes != pNodeCfgHdr->num_nodes)) { - status = ocfs_chk_update_config (osb); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto finally; - } - } + } - /* re read the publish based on the new config info... */ - if (length < ((osb->num_cfg_nodes + 4) * osb->sect_size)) { - length = (osb->num_cfg_nodes + 4) * osb->sect_size; - length = OCFS_ALIGN (length, PAGE_SIZE); - - if ((allocBuffer == NULL) || (length > maxAllocLen)) { - ocfs_safefree (allocBuffer); - - allocBuffer = ocfs_malloc (length); - if (allocBuffer == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto finally; - } - maxAllocLen = length; - cfgBuffer = allocBuffer; - } + buffer = osb->cfg_prealloc + + (OCFS_VOLCFG_NEWCFG_SECTORS * osb->sect_size); - startOffset = osb->vol_layout.new_cfg_off; + num_nodes = OCFS_MAXIMUM_NODES; - /* Read disk for Publish Sectors of all nodes */ - status = - ocfs_read_force_disk (osb, cfgBuffer, length, - startOffset); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto finally; - } - } + /* Refresh the publish map */ + ocfs_update_publish_map (osb, buffer, false); - buffer = allocBuffer + (4 * osb->sect_size); + LOG_TRACE_ARGS ("Publish map: 0x%08x\n", LO (osb->publ_map)); - /* Total number of nodes in this volume */ - num_nodes = osb->num_cfg_nodes; + /* Update the timestamp on disk to indicate that it is alive */ + ocfs_nm_heart_beat (osb, HEARTBEAT_METHOD_DISK, false); - LOG_TRACE_ARGS ("Number of nodes (%d)\n", num_nodes); + /* Check for the highest node looking for a vote, if anybody is looking */ + for (i = 0, p = buffer; i < num_nodes; i++, p += osb->sect_size) { + publish = (ocfs_publish *) p; - /* Update the Publish Map */ - ocfs_update_publish_map (osb, buffer, false); - - LOG_TRACE_ARGS ("Updated PublishMap: 0x%08x.0x%08x\n", - HI (osb->publ_map), LO (osb->publ_map)); - - /* Update the timestamp on disk to indicate that it is alive */ - ocfs_nm_heart_beat (osb, HEARTBEAT_METHOD_DISK, 0); - - /* Check for the highest node looking for a vote, if anybody is looking */ - for (i = 0, p = buffer; i < num_nodes; i++, p += osb->sect_size) { - publish = (ocfs_publish *) p; - if ((publish->vote == 0x1) && - (publish->vote_map & (1 << osb->node_num))) { - LOG_TRACE_ARGS - ("Node(%u): Vote=%s Dirty=%s Type=%u\n", i, - publish->vote ? "yes" : "no", - publish->dirty ? "yes" : "no", - publish->vote_type); - - highestVoteNode = i; - - /* Check if the node is alive or not */ - if (IS_NODE_ALIVE - (osb->publ_map, highestVoteNode, - num_nodes)) { - voteNode = highestVoteNode; - publish_to_vote = publish; - } else { - LOG_TRACE_ARGS - ("Calling ocfs_recover_vol for NodeNum (%u)\n", - highestVoteNode); - /* Recovery... */ - status = - ocfs_recover_vol (osb, - highestVoteNode); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto finally; - } - } - } - } + if (publish->time == (ub8) 0) + continue; + + if (publish->vote != FLAG_VOTE_NODE || + !(publish->vote_map & (1 << osb->node_num))) + continue; + + LOG_TRACE_ARGS ("Node(%u): Vote=%d Dirty=%d Type=%u\n", i, + publish->vote, publish->dirty, publish->vote_type); - if ((voteNode >= 0) && (voteNode != osb->node_num)) { - LOG_TRACE_ARGS ("Highest Node asking for Vote (%d) " - "local nodenum is (%d) node map is 0x%08x.0x%08x\n", - voteNode, osb->node_num, - HI (osb->publ_map), LO (osb->publ_map)); - - publish = (ocfs_publish *) (buffer + - (osb->node_num * - osb->sect_size)); - - if (publish->vote) { - LOG_TRACE_ARGS - ("This node %d wanted to create but was not the " - "highest number node %d\n", osb->node_num, - voteNode); - publish->vote = 0; + highest_vote_node = i; + + /* Check if the node is alive or not */ + if (IS_NODE_ALIVE (osb->publ_map, highest_vote_node, num_nodes)) { + vote_node = highest_vote_node; + publish_to_vote = publish; + } else { + status = ocfs_recover_vol (osb, highest_vote_node); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; } - ocfs_process_vote (osb, publish_to_vote, voteNode); } - osb->hbt = 50 + jiffies; } - finally: - if (bGlobalResourceAcquired) { - ocfs_up_sem (&(OcfsGlobalCtxt.res)); - bGlobalResourceAcquired = false; + if ((vote_node != OCFS_INVALID_NODE_NUM) && (vote_node != osb->node_num)) { + publish = (ocfs_publish *) + (buffer + (osb->node_num * osb->sect_size)); + if (publish->vote) + publish->vote = 0; + + ocfs_process_vote (osb, publish_to_vote, vote_node); } - ocfs_safefree (allocBuffer); + osb->hbt = 50 + jiffies; + finally: LOG_EXIT_STATUS (ret); return ret; } /* ocfs_nm_thread() */ diff -urNp x-ref/fs/ocfs/Common/ocfsgensysfile.c x/fs/ocfs/Common/ocfsgensysfile.c --- x-ref/fs/ocfs/Common/ocfsgensysfile.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/ocfsgensysfile.c Mon Oct 21 04:41:19 2002 @@ -186,17 +186,15 @@ int ocfs_read_system_file (ocfs_super * templength = IoRuns[i].byte_cnt; if (bWriteThru) { - status = - ocfs_read_disk (osb, (void *) tempBuffer, - (ub4) templength, tempOffset); + status = ocfs_read_disk (osb, (void *) tempBuffer, + (ub4) templength, tempOffset); if (status < 0) { LOG_ERROR_STATUS (status); goto leave; } } else { - status = - ocfs_read_metadata (osb, (void *) tempBuffer, - (ub4) templength, tempOffset); + status = ocfs_read_metadata (osb, (void *) tempBuffer, + (ub4) templength, tempOffset); if (status < 0) { LOG_ERROR_STATUS (status); goto leave; @@ -412,12 +410,12 @@ int ocfs_get_system_file_size (ocfs_supe * ocfs_extend_system_file() * */ -int ocfs_extend_system_file (ocfs_super * osb, ub4 FileId, ub8 FileSize) +int ocfs_extend_system_file (ocfs_super * osb, ub4 FileId, ub8 FileSize, ocfs_file_entry *fe) { int status = 0; ub8 actualDiskOffset = 0, actualLength = 0; - ocfs_file_entry *fe = NULL; bool bWriteThru = false; + bool local_fe = false; LOG_ENTRY_ARGS ("(FileId = %u, Size = %u.%u)\n", FileId, HI (FileSize), LO (FileSize)); @@ -428,13 +426,16 @@ int ocfs_extend_system_file (ocfs_super } OCFS_ASSERT (osb); - status = ocfs_force_get_file_entry (osb, &fe, + if (!fe) { + local_fe = true; + status = ocfs_force_get_file_entry (osb, &fe, (FileId * osb->sect_size) + osb->vol_layout.root_int_off, bWriteThru); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto leave; + if (status < 0) { + LOG_ERROR_STATUS (status); + goto leave; + } } if (!IS_VALID_FILE_ENTRY (fe)) { @@ -482,7 +483,7 @@ int ocfs_extend_system_file (ocfs_super status = ocfs_force_put_file_entry (osb, fe, bWriteThru); leave: - if (fe) { + if (local_fe) { ocfs_release_file_entry (fe); } @@ -544,13 +545,13 @@ int ocfs_find_extents_of_system_file (oc if (fe->local_ext) { for (j = 0; j < OCFS_MAX_FILE_ENTRY_EXTENTS; j++) { if ((fe->extents[j].file_off + - fe->extents[j].num_clusters) > newOffset) { + fe->extents[j].num_bytes) > newOffset) { IoRuns[Runoffset].disk_off = fe->extents[j].disk_off + (newOffset - fe->extents[j].file_off); IoRuns[Runoffset].byte_cnt = (ub4) ((fe->extents[j].file_off + - fe->extents[j].num_clusters) - + fe->extents[j].num_bytes) - newOffset); if (IoRuns[Runoffset].byte_cnt >= remainingLength) { @@ -592,7 +593,7 @@ int ocfs_find_extents_of_system_file (oc /* the last Data entry */ for (k = 0; k < OCFS_MAX_FILE_ENTRY_EXTENTS; k++) { if ((sb8) (fe->extents[k].file_off + - fe->extents[k].num_clusters) > + fe->extents[k].num_bytes) > newOffset) { break; } @@ -611,15 +612,16 @@ int ocfs_find_extents_of_system_file (oc status = ocfs_read_metadata (osb, (void *) buffer, allocSize, fe->extents[k].disk_off); + if (status < 0) { LOG_ERROR_STATUS (status); goto leave; } pOcfsExtent = (ocfs_extent_group *) buffer; - while (pOcfsExtent->type != OCFS_EXTENT_DATA) { ub8 diskoffset; + if (!IS_VALID_EXTENT_HEADER (pOcfsExtent)) { LOG_ERROR_STATUS(status = -EFAIL); goto leave; @@ -634,6 +636,7 @@ int ocfs_find_extents_of_system_file (oc if (pOcfsExtent->extents[k].disk_off == 0) { OCFS_BREAKPOINT (); } + diskoffset = pOcfsExtent->extents[k].disk_off; memset (buffer, 0, length); @@ -641,14 +644,13 @@ int ocfs_find_extents_of_system_file (oc status = ocfs_read_metadata (osb, (void *) buffer, allocSize, - diskoffset); + diskoffset); if (status < 0) { LOG_ERROR_STATUS (status); goto leave; } pOcfsExtent = (ocfs_extent_group *) buffer; } - pOcfsExtentHeader = (ocfs_extent_group *) buffer; searchVbo = newOffset; @@ -665,7 +667,7 @@ int ocfs_find_extents_of_system_file (oc for (j = 0; j < OCFS_MAX_DATA_EXTENTS; j++) { if ((pOcfsExtent->extents[j].file_off + pOcfsExtent->extents[j]. - num_clusters) > newOffset) { + num_bytes) > newOffset) { IoRuns[Runoffset].disk_off = pOcfsExtent->extents[j]. disk_off + (newOffset - @@ -678,7 +680,7 @@ int ocfs_find_extents_of_system_file (oc file_off + pOcfsExtent-> extents[j]. - num_clusters) - + num_bytes) - newOffset); if (IoRuns[Runoffset]. @@ -756,7 +758,7 @@ int ocfs_free_file_extents (ocfs_super * if (fe->local_ext) { for (i = 0; i < fe->next_free_ext; i++) { - numBitsAllocated = (ub4) (fe->extents[i].num_clusters / + numBitsAllocated = (ub4) (fe->extents[i].num_bytes / (osb->vol_layout. cluster_size)); @@ -820,7 +822,7 @@ int ocfs_free_file_extents (ocfs_super * i++) { numBitsAllocated = (ub4) (PAllocExtent->extents[i]. - num_clusters / + num_bytes / (osb->vol_layout. cluster_size)); bitmapOffset = @@ -1010,7 +1012,7 @@ int ocfs_write_map_file (ocfs_super * os LOG_TRACE_STR ("allocSize < neededSize"); ocfs_extend_system_file (osb, (OCFS_FILE_VOL_META_DATA + osb->node_num), - neededSize); + neededSize, NULL); } pMapBuffer = ocfs_malloc (neededSize); diff -urNp x-ref/fs/ocfs/Common/ocfsgentrans.c x/fs/ocfs/Common/ocfsgentrans.c --- x-ref/fs/ocfs/Common/ocfsgentrans.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/ocfsgentrans.c Mon Oct 21 04:41:19 2002 @@ -42,21 +42,40 @@ int ocfs_free_disk_bitmap (ocfs_super * ub4 num_upd; ub4 i; ub4 node_num; - ocfs_free_log *free_dir_node[OCFS_MAXIMUM_NODES]; - ocfs_free_log *free_ext_node[OCFS_MAXIMUM_NODES]; - ocfs_free_log *free_vol_bits; - ocfs_lock_res *dirnode_lockres[OCFS_MAXIMUM_NODES]; - ocfs_lock_res *extnode_lockres[OCFS_MAXIMUM_NODES]; + ocfs_free_log **free_dir_node = NULL; + ocfs_free_log **free_ext_node = NULL; + ocfs_free_log *free_vol_bits = NULL; + ocfs_lock_res **dirnode_lockres = NULL; + ocfs_lock_res **extnode_lockres = NULL; ocfs_lock_res *vol_lockres; ocfs_free_log *tmp_log; ocfs_free_log *free_log; ub4 tmp_indx; ub8 lock_id; + ocfs_file_entry *fe = NULL; + ocfs_bitmap_lock *bm_lock = NULL; LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", osb, log_rec); +#define ALLOC_BLOCK(ptr, len, err) \ + do { \ + (ptr) = ocfs_malloc (len); \ + if (!(ptr)) { \ + LOG_ERROR_STATUS ((err) = -ENOMEM); \ + goto finally; \ + } \ + } while (0) + + ALLOC_BLOCK(free_dir_node, + OCFS_MAXIMUM_NODES * sizeof (ocfs_free_log *), status); + ALLOC_BLOCK(free_ext_node, + OCFS_MAXIMUM_NODES * sizeof (ocfs_free_log *), status); + ALLOC_BLOCK(dirnode_lockres, + OCFS_MAXIMUM_NODES * sizeof (ocfs_lock_res *), status); + ALLOC_BLOCK(extnode_lockres, + OCFS_MAXIMUM_NODES * sizeof (ocfs_lock_res *), status); + /* init */ - free_vol_bits = NULL; for (i = 0; i < OCFS_MAXIMUM_NODES; i++) { free_dir_node[i] = NULL; free_ext_node[i] = NULL; @@ -133,10 +152,15 @@ int ocfs_free_disk_bitmap (ocfs_super * /* Get all locks */ if (free_vol_bits != NULL) { - lock_id = OCFS_BITMAP_LOCK_OFFSET; - status = ocfs_acquire_lock (osb, lock_id, + fe = ocfs_allocate_file_entry(); + if (!fe) { + LOG_ERROR_STR ("could not allocate file entry"); + goto finally; + } + bm_lock = (ocfs_bitmap_lock *)fe; + status = ocfs_acquire_lock (osb, OCFS_BITMAP_LOCK_OFFSET, OCFS_DLM_EXCLUSIVE_LOCK, - FLAG_FILE_CREATE, &vol_lockres, NULL); + FLAG_FILE_CREATE, &vol_lockres, fe); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -191,10 +215,16 @@ int ocfs_free_disk_bitmap (ocfs_super * /* release all locks */ if (free_vol_bits != NULL) { - lock_id = OCFS_BITMAP_LOCK_OFFSET; - status = ocfs_release_lock (osb, lock_id, + bm_lock->used_bits = ocfs_count_bits(&osb->cluster_bitmap); + status = ocfs_write_force_disk(osb, bm_lock, OCFS_SECTOR_SIZE, + OCFS_BITMAP_LOCK_OFFSET); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + status = ocfs_release_lock (osb, OCFS_BITMAP_LOCK_OFFSET, OCFS_DLM_EXCLUSIVE_LOCK, - FLAG_FILE_CREATE, vol_lockres); + FLAG_FILE_CREATE, vol_lockres, fe); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -208,7 +238,7 @@ int ocfs_free_disk_bitmap (ocfs_super * status = ocfs_release_lock (osb, lock_id, OCFS_DLM_EXCLUSIVE_LOCK, FLAG_FILE_CREATE, - dirnode_lockres[i]); + dirnode_lockres[i], NULL); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -223,7 +253,7 @@ int ocfs_free_disk_bitmap (ocfs_super * status = ocfs_release_lock (osb, lock_id, OCFS_DLM_EXCLUSIVE_LOCK, FLAG_FILE_CREATE, - extnode_lockres[i]); + extnode_lockres[i], NULL); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -238,7 +268,14 @@ int ocfs_free_disk_bitmap (ocfs_super * ocfs_safefree (free_ext_node[i]); } + ocfs_safefree (free_dir_node); + ocfs_safefree (free_ext_node); + ocfs_safefree (dirnode_lockres); + ocfs_safefree (extnode_lockres); + ocfs_safefree (free_vol_bits); + if (fe) + ocfs_release_file_entry(fe); LOG_EXIT_STATUS (status); return status; @@ -259,6 +296,15 @@ int ocfs_process_record (ocfs_super * os ocfs_dir_node *lock_node = NULL; ub1 *read_buf = NULL; ub4 node_num; + ub4 index; + ocfs_extent_group *alloc_ext; + ub8 disk_off = 0; + ub4 num_upd; + ub4 i; + ub8 lock_id; + ocfs_lock_res *lock_res; + ocfs_lock_res **lock_res_array = NULL; + ocfs_lock_res *tmp_lockres; LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", osb, buffer); @@ -396,24 +442,10 @@ int ocfs_process_record (ocfs_super * os break; case LOG_UPDATE_EXTENT: - { - ub4 index; - ocfs_extent_group *alloc_ext; - ub8 disk_off = 0; - - /* - ** Make sure we have the file lock here - */ - read_buf = ocfs_malloc (osb->sect_size); - if (read_buf == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto finally; - } - + /* Make sure we have the file lock here */ disk_off = log_rec->rec.extent.disk_off; - - status = ocfs_read_disk (osb, read_buf, - (ub4) osb->sect_size, disk_off); + status = ocfs_read_disk_ex (osb, (void **)&read_buf, + osb->sect_size, osb->sect_size, disk_off); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -424,7 +456,7 @@ int ocfs_process_record (ocfs_super * os index = log_rec->rec.extent.index; alloc_ext->extents[index].file_off = 0; - alloc_ext->extents[index].num_clusters = 0; + alloc_ext->extents[index].num_bytes = 0; alloc_ext->extents[index].disk_off = 0; disk_off = log_rec->rec.extent.disk_off; @@ -435,7 +467,6 @@ int ocfs_process_record (ocfs_super * os LOG_ERROR_STATUS (status); goto finally; } - } break; case LOG_TYPE_DIR_NODE: @@ -443,6 +474,7 @@ int ocfs_process_record (ocfs_super * os log_rec->rec.dir.orig_off, log_rec->rec.dir.saved_off); if (status < 0) { + LOG_ERROR_STATUS (status); /* ** Bad one. We should disable this volume and try ** and let somebody else do the recovery... @@ -460,6 +492,7 @@ int ocfs_process_record (ocfs_super * os status = ocfs_recover_vol (osb, log_rec->rec.recovery.node_num); if (status < 0) { + LOG_ERROR_STATUS (status); /* ** Bad one. We should disable this volume and try ** and let somebody else do the recovery... @@ -476,12 +509,12 @@ int ocfs_process_record (ocfs_super * os break; case LOG_CLEANUP_LOCK: - { - ub4 num_upd; - ub4 i; - ub8 lock_id; - ocfs_lock_res *lock_res; - ocfs_lock_res *lock_res_array[LOCK_UPDATE_LOG_SIZE]; + lock_res_array = ocfs_malloc (LOCK_UPDATE_LOG_SIZE * + sizeof (ocfs_lock_res *)); + if (!lock_res_array) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } num_upd = clean_rec->rec.lock.num_lock_upds; @@ -518,13 +551,20 @@ int ocfs_process_record (ocfs_super * os } for (i = 0; i < num_upd; i++) { + tmp_lockres = NULL; lock_res = lock_res_array[i]; if (lock_res) { /* Reinsert with new ID */ - ocfs_insert_sector_node (osb, lock_res); + status = ocfs_insert_sector_node (osb, lock_res, + &tmp_lockres); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + if (tmp_lockres) + LOG_ERROR_STR ("This too can happen"); } } - } break; default: @@ -534,12 +574,10 @@ int ocfs_process_record (ocfs_super * os finally: if (fe) ocfs_release_file_entry (fe); - if (lock_node) ocfs_release_file_entry ((ocfs_file_entry *) lock_node); - ocfs_safefree (read_buf); - + ocfs_safefree (lock_res_array); LOG_EXIT_STATUS (status); return status; } /* ocfs_process_record */ @@ -561,6 +599,7 @@ int ocfs_process_log (ocfs_super * osb, ub4 size; ub4 log_file_id; ocfs_log_record *log_rec = NULL; + bool use_prealloc = false; LOG_ENTRY_ARGS ("(0x%08x, %u.%u, %u, 0x%08x)\n", osb, HI (trans_id), LO (trans_id), node_num, type); @@ -582,10 +621,23 @@ int ocfs_process_log (ocfs_super * osb, size = log_rec_size; size = (ub4) OCFS_ALIGN (size, PAGE_SIZE); - if ((log_rec = ocfs_malloc (size)) == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto finally; - } + /* try to use prealloc log record if available */ + ocfs_down_sem (&osb->osb_res, true); + if (! OSB_PREALLOC_LOCK_TEST(osb, OSB_LOG_LOCK)) + { + OSB_PREALLOC_LOCK_SET(osb, OSB_LOG_LOCK); + log_rec = (ocfs_log_record *)osb->log_prealloc; + use_prealloc = true; + } + ocfs_up_sem(&osb->osb_res); + + if (log_rec == NULL) + { + if ((log_rec = ocfs_malloc (size)) == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + } status = ocfs_get_system_file_size (osb, log_file_id, &file_size, &alloc_size); if (status < 0) { @@ -604,7 +656,7 @@ int ocfs_process_log (ocfs_super * osb, ** the event and not process cleanup log */ status = ocfs_extend_system_file (osb, - (CLEANUP_FILE_BASE_ID + node_num), 0); + (CLEANUP_FILE_BASE_ID + node_num), 0, NULL); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -629,7 +681,7 @@ int ocfs_process_log (ocfs_super * osb, goto finally; } - status = ocfs_extend_system_file (osb, log_file_id, offset); + status = ocfs_extend_system_file (osb, log_file_id, offset, NULL); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -644,7 +696,12 @@ int ocfs_process_log (ocfs_super * osb, } finally: - ocfs_safefree (log_rec); + ocfs_down_sem (&osb->osb_res, true); + if (use_prealloc && OSB_PREALLOC_LOCK_TEST(osb, OSB_LOG_LOCK)) + OSB_PREALLOC_LOCK_CLEAR(osb, OSB_LOG_LOCK); + else + ocfs_safefree (log_rec); + ocfs_up_sem(&osb->osb_res); LOG_EXIT_STATUS (status); return status; @@ -689,7 +746,7 @@ int ocfs_commit_trans (ocfs_super * osb, /* Log to the file for multiple transactions... */ status = ocfs_extend_system_file (osb, - (LOG_FILE_BASE_ID + osb->node_num), offset); + (LOG_FILE_BASE_ID + osb->node_num), offset, NULL); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -704,7 +761,7 @@ int ocfs_commit_trans (ocfs_super * osb, } status = ocfs_extend_system_file (osb, - (CLEANUP_FILE_BASE_ID + osb->node_num), offset); + (CLEANUP_FILE_BASE_ID + osb->node_num), offset, NULL); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -743,14 +800,14 @@ int ocfs_abort_trans (ocfs_super * osb, } status = ocfs_extend_system_file (osb, - (LOG_FILE_BASE_ID + osb->node_num), offset); + (LOG_FILE_BASE_ID + osb->node_num), offset, NULL); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } status = ocfs_extend_system_file (osb, - (CLEANUP_FILE_BASE_ID + osb->node_num), offset); + (CLEANUP_FILE_BASE_ID + osb->node_num), offset, NULL); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -778,20 +835,15 @@ int ocfs_reset_publish (ocfs_super * osb { int status = 0; ocfs_publish *publish = NULL; - ub8 node_publ_off = 0; + ub8 node_publ_off; LOG_ENTRY_ARGS ("(0x%08x, %u.%u)\n", osb, HI (node_num), LO (node_num)); - if ((publish = ocfs_malloc (osb->sect_size)) == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto finally; - } - + /* Read the publish sector */ node_publ_off = osb->vol_layout.publ_sect_off + (node_num * osb->sect_size); - - /* Read the publish sector */ - status = ocfs_read_disk (osb, publish, osb->sect_size, node_publ_off); + status = ocfs_read_disk_ex (osb, (void **)&publish, osb->sect_size, + osb->sect_size, node_publ_off); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -837,9 +889,16 @@ int ocfs_recover_vol (ocfs_super * osb, ub8 trans_id = 0; ub8 cleanup_file_size = 0; ub4 file_id; + ocfs_file_entry *fe = NULL; LOG_ENTRY_ARGS ("(0x%08x, %u.%u)\n", osb, HI (node_num), LO (node_num)); + fe = ocfs_allocate_file_entry(); + if (fe == NULL) { + status = -ENOMEM; + goto finally; + } + if ((node_num < 0) || (node_num > OCFS_MAXIMUM_NODES)) { LOG_ERROR_STATUS (status = -EINVAL); goto finally; @@ -905,7 +964,7 @@ int ocfs_recover_vol (ocfs_super * osb, osb->vol_layout.root_int_off; status = ocfs_acquire_lock (osb, lock_id, OCFS_DLM_EXCLUSIVE_LOCK, - FLAG_FILE_CREATE, &lock_res, NULL); + FLAG_FILE_CREATE, &lock_res, fe); if (status < 0) { goto finally; } @@ -986,10 +1045,12 @@ int ocfs_recover_vol (ocfs_super * osb, if (lock_acq) { tmpstat = ocfs_release_lock (osb, lock_id, OCFS_DLM_EXCLUSIVE_LOCK, - FLAG_FILE_CREATE, lock_res); + FLAG_FILE_CREATE, lock_res, fe); if (tmpstat < 0) status = tmpstat; } + if (fe) + ocfs_release_file_entry(fe); LOG_EXIT_STATUS (status); return (status); @@ -1013,9 +1074,16 @@ int ocfs_write_log (ocfs_super * osb, oc ocfs_lock_res *lock_res; bool log_lock = false; bool lock_acq = false; + ocfs_file_entry *fe = NULL; LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u)\n", osb, log_rec, type); + fe = ocfs_allocate_file_entry(); + if (fe == NULL) { + status = -ENOMEM; + goto finally; + } + /* Get the log lock */ ocfs_down_sem (&(osb->log_lock), true); log_lock = true; @@ -1039,7 +1107,7 @@ int ocfs_write_log (ocfs_super * osb, oc lock_id = (log_file_id * osb->sect_size) + osb->vol_layout.root_int_off; status = ocfs_acquire_lock (osb, lock_id, OCFS_DLM_EXCLUSIVE_LOCK, - FLAG_FILE_CREATE, &lock_res, NULL); + FLAG_FILE_CREATE, &lock_res, fe); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -1047,18 +1115,13 @@ int ocfs_write_log (ocfs_super * osb, oc lock_acq = true; - status = ocfs_get_system_file_size (osb, log_file_id, &file_size, - &alloc_size); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto finally; - } - + file_size = fe->file_size; + alloc_size = fe->alloc_size; offset = file_size; if (alloc_size < (file_size + log_rec_size)) { file_size += ONE_MEGA_BYTE; - status = ocfs_extend_system_file (osb, log_file_id, file_size); + status = ocfs_extend_system_file (osb, log_file_id, file_size, fe); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -1073,7 +1136,7 @@ int ocfs_write_log (ocfs_super * osb, oc } status = ocfs_extend_system_file (osb, log_file_id, - (offset + log_rec_size)); + (offset + log_rec_size), fe); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -1088,10 +1151,12 @@ int ocfs_write_log (ocfs_super * osb, oc if (lock_acq) { tmpstat = ocfs_release_lock (osb, lock_id, OCFS_DLM_EXCLUSIVE_LOCK, - FLAG_FILE_CREATE, lock_res); + FLAG_FILE_CREATE, lock_res, fe); if (tmpstat < 0) status = tmpstat; } + if (fe) + ocfs_release_file_entry(fe); LOG_EXIT_STATUS (status); return status; @@ -1118,10 +1183,16 @@ int ocfs_write_node_log (ocfs_super * os ocfs_lock_res *lock_res; bool log_lock = false; bool lock_acq = false; + ocfs_file_entry *fe = NULL; LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u, %u)\n", osb, log_rec, node_num, type); + fe = ocfs_allocate_file_entry(); + if (fe == NULL) { + status = -ENOMEM; + goto finally; + } /* Get the log lock */ ocfs_down_sem (&(osb->log_lock), true); log_lock = true; @@ -1142,7 +1213,7 @@ int ocfs_write_node_log (ocfs_super * os lock_id = (log_file_id * osb->sect_size) + osb->vol_layout.root_int_off; status = ocfs_acquire_lock (osb, lock_id, OCFS_DLM_EXCLUSIVE_LOCK, - FLAG_FILE_CREATE, &lock_res, NULL); + FLAG_FILE_CREATE, &lock_res, fe); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -1150,18 +1221,13 @@ int ocfs_write_node_log (ocfs_super * os lock_acq = true; - status = ocfs_get_system_file_size (osb, log_file_id, &file_size, - &alloc_size); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto finally; - } - + file_size = fe->file_size; + alloc_size = fe->alloc_size; offset = file_size; if (alloc_size < (file_size + log_rec_size)) { file_size += ONE_MEGA_BYTE; - status = ocfs_extend_system_file (osb, log_file_id, file_size); + status = ocfs_extend_system_file (osb, log_file_id, file_size, fe); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -1176,7 +1242,7 @@ int ocfs_write_node_log (ocfs_super * os } status = ocfs_extend_system_file (osb, log_file_id, - (offset + log_rec_size)); + (offset + log_rec_size), fe); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -1191,10 +1257,12 @@ int ocfs_write_node_log (ocfs_super * os if (lock_acq) { tmpstat = ocfs_release_lock (osb, lock_id, OCFS_DLM_EXCLUSIVE_LOCK, - FLAG_FILE_CREATE, lock_res); + FLAG_FILE_CREATE, lock_res, fe); if (tmpstat < 0) status = tmpstat; } + if (fe) + ocfs_release_file_entry(fe); LOG_EXIT_STATUS (status); return status; diff -urNp x-ref/fs/ocfs/Common/ocfsgenutil.c x/fs/ocfs/Common/ocfsgenutil.c --- x-ref/fs/ocfs/Common/ocfsgenutil.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/ocfsgenutil.c Mon Oct 21 04:41:19 2002 @@ -29,25 +29,6 @@ /* Tracing */ #define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_UTIL -/* - * ocfs_debug_print() - * - * UNUSED... to be deleted. - */ -void ocfs_debug_print (ub4 Context, ub4 Level, char *FormatStr, ...) -{ - char buf[256]; - va_list va; - - if ((OcfsDebugCtxt & Context) && (OcfsDebugLevel & Level)) { - va_start (va, FormatStr); - vsprintf (buf, FormatStr, va); - va_end (va); - printk ("(%d) %s\n", current->pid, buf); - } - return; -} /* ocfs_debug_print */ - int ocfs_compare_qstr (struct qstr * s1, struct qstr * s2) { int s = strncmp ((const char *) s1->name, (const char *) s2->name, @@ -63,3 +44,16 @@ int ocfs_compare_qstr (struct qstr * s1, return s; } /* ocfs_compare_qstr */ +char *ocfs_strerror(int errno) +{ + int i; + if (errno==0) + return "OK"; + + for (i=0; ilock_stop) { + if (atomic_read (&osb->lock_stop)) { LOG_TRACE_ARGS ("Last Lock written : %d\n", jiffies); + atomic_set (&osb->lock_event_woken, 1); wake_up (&osb->lock_event); } else { LOG_TRACE_ARGS ("Lock written : %d\n", jiffies); @@ -87,10 +88,10 @@ void ocfs_assert_lock_owned (ub4 Arg) cfg_task = (ocfs_cfg_task *) Arg; /* initialize the task */ - INIT_TQUEUE (&(cfg_task->task), ocfs_worker, cfg_task); + INIT_TQUEUE (&(cfg_task->cfg_tq), ocfs_worker, cfg_task); /* submit it */ - schedule_task (&cfg_task->task); + schedule_task (&cfg_task->cfg_tq); LOG_EXIT (); return ; @@ -100,87 +101,76 @@ void ocfs_assert_lock_owned (ub4 Arg) * ocfs_add_to_disk_config() * */ -int ocfs_add_to_disk_config (ocfs_super * osb, ocfs_disk_node_config_info * NodeCfgInfo) +int ocfs_add_to_disk_config (ocfs_super * osb, ub4 pref_node_num, + ocfs_disk_node_config_info * new_disk_node) { int status = 0; - ub8 Offset; - ocfs_node_config_hdr *Hdr = NULL; - ocfs_disk_node_config_info *Node = NULL; + ub8 offset; + ocfs_disk_node_config_info *disk_node = NULL; ub1 *buffer = NULL; ub1 *p; - ub4 node; - ub4 sect_size = osb->sect_size; + ub4 node_num; + ub4 sect_size; + ub4 size; LOG_ENTRY (); - /* Read the nodecfg from disk */ - Offset = osb->vol_layout.node_cfg_off; - status = - ocfs_read_disk_ex (osb, (void **) &buffer, - osb->vol_layout.node_cfg_size, - osb->vol_layout.node_cfg_size, Offset); - if (status < 0) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto finally; - } - - Hdr = (ocfs_node_config_hdr *) buffer; + sect_size = osb->sect_size; - /* Sanity check */ - if ((strncmp - (Hdr->signature, NODE_CONFIG_HDR_SIGN, NODE_CONFIG_SIGN_LEN)) - || ((Hdr->version < NODE_MIN_SUPPORTED_VER) - || (Hdr->version > NODE_CONFIG_VER))) { - LOG_ERROR_STATUS (status = -EINVAL); + /* Read the nodecfg info for all nodes from disk */ + size = OCFS_VOLCFG_HDR_SECTORS * sect_size; + offset = osb->vol_layout.node_cfg_off + size; + size = osb->vol_layout.node_cfg_size - size; + status = ocfs_read_disk_ex (osb, (void **) &buffer, size, size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); goto finally; } - /* Find an empty slot in nodecfg */ - p = buffer + (2 * sect_size); - for (node = 0; node < OCFS_MAXIMUM_NODES; ++node, p += sect_size) { - Node = (ocfs_disk_node_config_info *) p; - if (Node->node_name[0] == '\0') - break; + /* Check if preferred node num is available */ + node_num = OCFS_INVALID_NODE_NUM; + if (pref_node_num >= 0 && pref_node_num < OCFS_MAXIMUM_NODES) { + p = buffer + (pref_node_num * sect_size); + disk_node = (ocfs_disk_node_config_info *)p; + if (disk_node->node_name[0] == '\0') + node_num = pref_node_num; } - /* If no free slots */ - if (node >= OCFS_MAXIMUM_NODES) { - LOG_ERROR_STR - ("Unable to allocate node number as no slots available"); - status = -ENOMEM; - goto finally; + /* if not, find the first available empty slot */ + if (node_num == OCFS_INVALID_NODE_NUM) { + p = buffer; + for (node_num = 0; node_num < OCFS_MAXIMUM_NODES; ++node_num, + p += sect_size) { + disk_node = (ocfs_disk_node_config_info *) p; + if (disk_node->node_name[0] == '\0') + break; + } } - /* Increment the seq_num to signal all nodes to refresh */ - Hdr->seq_num++; - Hdr->num_nodes++; - - /* Write the updated nodecfg hdr */ - Offset = osb->vol_layout.node_cfg_off; - status = ocfs_write_disk (osb, (void *) Hdr, (2 * sect_size), Offset); - if (status < 0) { - LOG_ERROR_STATUS (status); + /* If no free slots, error out */ + if (node_num >= OCFS_MAXIMUM_NODES) { + LOG_ERROR_STR ("Unable to allocate node number as no slots " \ + "are available"); + status = -ENOSPC; goto finally; } /* Copy the new nodecfg into the memory buffer */ - p = buffer + ((node + 2) * sect_size); - memcpy (p, NodeCfgInfo, sect_size); + p = buffer + (node_num * sect_size); + memcpy (p, new_disk_node, sect_size); /* Write the new node details on disk */ - Offset = osb->vol_layout.node_cfg_off + ((node + 2) * sect_size); - Node = (ocfs_disk_node_config_info *) p; - status = ocfs_write_disk (osb, (void *) Node, sect_size, Offset); + size = (node_num + OCFS_VOLCFG_HDR_SECTORS) * sect_size; + offset = osb->vol_layout.node_cfg_off + size; + disk_node = (ocfs_disk_node_config_info *) p; + status = ocfs_write_disk (osb, (void *) disk_node, sect_size, offset); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - /* Write the NodeCfgHdr into the second sector of NewCfg. */ - /* We do so so that we can read the NodeCfgHdr easily when we */ - /* read the PublishSector, for e.g. in ocfs_nm_thread() */ - Offset = osb->vol_layout.new_cfg_off + sect_size; - status = ocfs_write_disk (osb, (void *) Hdr, sect_size, Offset); + /* Update the nodecfg hdr on disk */ + status = ocfs_write_volcfg_header (osb, OCFS_VOLCFG_ADD); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -194,23 +184,80 @@ int ocfs_add_to_disk_config (ocfs_super } /* ocfs_add_to_disk_config */ /* + * ocfs_write_volcfg_header() + * + */ +int ocfs_write_volcfg_header (ocfs_super * osb, ocfs_volcfg_op op) +{ + int status = 0; + ocfs_node_config_hdr *hdr; + ub1 *buffer = NULL; + ub8 offset; + + LOG_ENTRY (); + + /* Read the nodecfg header */ + offset = osb->vol_layout.node_cfg_off; + status = ocfs_read_disk_ex (osb, (void **) &buffer, osb->sect_size, + osb->sect_size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + hdr = (ocfs_node_config_hdr *) buffer; + + if (op == OCFS_VOLCFG_ADD) + hdr->num_nodes++; + + /* Increment the seq# to trigger other nodes to re-read node cfg */ + hdr->cfg_seq_num++; + + /* Write the nodecfg header */ + status = ocfs_write_disk (osb, (void *) hdr, osb->sect_size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + /* Write the nodecfg hdr into the second sector of newcfg. */ + /* We do so so that we can read the nodecfg hdr easily when we */ + /* read the publish sector, for e.g. in ocfs_nm_thread() */ + offset = osb->vol_layout.new_cfg_off + osb->sect_size; + status = ocfs_write_disk (osb, (void *) hdr, osb->sect_size, offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + bail: + ocfs_safefree (buffer); + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_write_volcfg_header */ + +/* * ocfs_config_with_disk_lock() * * This function tries to obtain the lock on the disk for the volume * specified. The logic for obtaining a disk lock is as follows : - * Read the disk and check to see if somebody else owns the disk if so - * wait for OCFS_VOLCFG_LOCK_TIME which is 2 sec currently, after which try - * and break if not write the nodename as a lock and set the lock owned - * bit. Now wait for OCFS_VOLCFG_LOCK_TIME and read the sector back if the - * nodename is still ours we own the lock. A timer with DPC is now kicked - * in every OCFS_VOLCFG_ITERATE_TIME currently 100 ms to reiterate we own the - * lock. If a node had the lock and crashed we will wait for 2+2 seconds - * and try and break the lock. If somebody else owned the lock we will - * do the same. + * + * Read the volcfg lock sector. If it is not locked, lock it by stamping + * ones node number. Read the same sector after OCFS_VOLCFG_LOCK_TIME. + * If the contents have not been modified, the lock is ours. Retain the + * lock by reiterating the lock write operation every OCFS_VOLCFG_ITERATE_TIME. + * + * If the volcfg lock sector is owned by someone else, wait for + * OCFS_VOLCFG_LOCK_TIME and read the lock sector again. If the lock sector + * is owned by the same node as before attempt to break the lock as the + * node may have died. If however, the lock sector is now owned by someone + * else, wait for OCFS_VOLCFG_LOCK_TIME before repeating the entire exercise + * again. * * Returns 0 if success, < 0 if error. */ -int ocfs_config_with_disk_lock (ocfs_super * osb, ub8 LockOffset, ub1 * Buffer) +int ocfs_config_with_disk_lock (ocfs_super * osb, ub8 LockOffset, ub1 * Buffer, + ub4 node_num, ocfs_volcfg_op op) { int status = 0; char *rd_buf = NULL; @@ -219,12 +266,15 @@ int ocfs_config_with_disk_lock (ocfs_sup bool BreakLock = false; ocfs_disk_lock *DiskLock; ocfs_cfg_task *cfg_task = NULL; - ub4 sect_size = osb->sect_size; + ub4 sect_size; + ub8 lock_node_num = OCFS_INVALID_NODE_NUM; LOG_ENTRY (); + sect_size = osb->sect_size; + /* Allocate buffer for reading the disk */ - rd_buf = ocfs_malloc (sect_size); + rd_buf = ocfs_malloc (osb->sect_size); if (rd_buf == NULL) { LOG_ERROR_STATUS (status = -ENOMEM); goto finito; @@ -246,55 +296,53 @@ int ocfs_config_with_disk_lock (ocfs_sup /* initialize cfg_task with info reqd to reiterate the volcfg lock */ cfg_task->osb = osb; cfg_task->buffer = lock_buf; - cfg_task->lock_off = osb->vol_layout.new_cfg_off + sect_size; + cfg_task->lock_off = LockOffset; /* Initialize the kernel timer */ init_timer(&osb->lock_timer); osb->lock_timer.function = ocfs_assert_lock_owned; osb->lock_timer.expires = 0; osb->lock_timer.data = (ub4) cfg_task; + init_waitqueue_head (&osb->lock_event); - osb->lock_stop = 0; + atomic_set (&osb->lock_event_woken, 0); + atomic_set (&osb->lock_stop, 0); while (1) { - /* Read the starting sector for the volume */ + /* Read the volcfg lock sector */ status = ocfs_read_disk (osb, rd_buf, sect_size, LockOffset); if (status < 0) { LOG_ERROR_STATUS (status); goto finito; } - /* Check to see if the lock is currently owned or if we want to break */ - /* the lock. The first time around we will not break the lock after */ - /* which we always break it. */ DiskLock = (ocfs_disk_lock *) rd_buf; - if ((DiskLock->file_lock == 0) || (BreakLock)) { - if (DiskLock->file_lock != 0) - LOG_TRACE_STR ("Breaking the Node Config Lock"); + lock_node_num = DISK_LOCK_CURRENT_MASTER(DiskLock); + + if (DISK_LOCK_FILE_LOCK (DiskLock) == 0 || BreakLock) { + if (DISK_LOCK_FILE_LOCK (DiskLock) != 0) + LOG_TRACE_STR ("Try to break node config lock"); else - LOG_TRACE_STR ("Locking Node Config"); + LOG_TRACE_STR ("Lock node config"); - /* Wait for Phase 1 is the O.S.latency between read/write. */ - /* Copy our name into the lock, and set the owned byte. */ + /* Attempt to lock volcfg */ DiskLock = (ocfs_disk_lock *) Buffer; - DiskLock->file_lock = 1; + DISK_LOCK_CURRENT_MASTER (DiskLock) = osb->node_num; + DISK_LOCK_FILE_LOCK (DiskLock) = 1; - /* Write to the disk... */ - status = - ocfs_write_disk (osb, Buffer, sect_size, LockOffset); + /* Write into volcfg lock sector... */ + status = ocfs_write_disk (osb, Buffer, sect_size, + LockOffset); if (status < 0) { LOG_ERROR_STATUS (status); goto finito; } - - /* Set the TriedAcquire, this determines if we need to do a */ - /* second read or just wait. */ TriedAcquire = true; } - ocfs_sleep (OCFS_VOLCFG_LOCK_TIME); /* in ms */ + ocfs_sleep (OCFS_VOLCFG_LOCK_TIME); - /* Read the Disk... */ + /* Read the volcfg lock sector again... */ status = ocfs_read_disk (osb, rd_buf, sect_size, LockOffset); if (status < 0) { LOG_ERROR_STATUS (status); @@ -305,24 +353,40 @@ int ocfs_config_with_disk_lock (ocfs_sup if ((TriedAcquire) && (memcmp (rd_buf, Buffer, sect_size) == 0)) { memcpy (lock_buf, Buffer, sect_size); - /* Set timer to reiterate lock every OCFS_VOLCFG_LOCK_ITERATE jiffies */ + /* Set timer to reiterate lock every few jiffies */ LOG_TRACE_ARGS ("Start Timer: %d\n", jiffies); - osb->lock_timer.expires = jiffies + OCFS_VOLCFG_LOCK_ITERATE; + osb->lock_timer.expires = jiffies + + OCFS_VOLCFG_LOCK_ITERATE; add_timer(&osb->lock_timer); - /* Write the Config info into the config table */ + /* Write the config info into the disk */ DiskLock = (ocfs_disk_lock *) Buffer; - DiskLock->file_lock = 0; - ocfs_add_to_disk_config (osb, + DISK_LOCK_CURRENT_MASTER (DiskLock) = + OCFS_INVALID_NODE_NUM; + DISK_LOCK_FILE_LOCK (DiskLock) = 0; + + if (op == OCFS_VOLCFG_ADD) + status = ocfs_add_to_disk_config (osb, node_num, + (ocfs_disk_node_config_info *) Buffer); + else if (op == OCFS_VOLCFG_UPD) + status = ocfs_update_disk_config (osb, node_num, (ocfs_disk_node_config_info *) Buffer); - goto finito; + else + status = -EFAIL; + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finito; + } + break; } else { - /* Either somebody got the lock or somebody already had it wait */ - /* and then try to break it */ - LOG_TRACE_STR - ("Could not get lock for 2 sec, breaking other guys lock"); - ocfs_sleep (OCFS_VOLCFG_LOCK_TIME); /* in ms */ - BreakLock = true; + DiskLock = (ocfs_disk_lock *) rd_buf; + if (DISK_LOCK_CURRENT_MASTER (DiskLock) == lock_node_num) + BreakLock = true; + else { + LOG_TRACE_ARGS ("Node config locked by node: %d\n", + DISK_LOCK_CURRENT_MASTER (DiskLock)); + ocfs_sleep (OCFS_VOLCFG_LOCK_TIME); + } } } @@ -363,14 +427,12 @@ int ocfs_release_disk_lock (ocfs_super * memset (buffer, 0, sect_size); /* Cancel the timer so that we don't reiterate the lock anymore */ - osb->lock_stop = 1; - LOG_TRACE_STR ("Waiting for osb->lock_event\n"); - ocfs_wait (&osb->lock_event, false, 0); + LOG_TRACE_STR ("Waiting for osb->lock_event"); + atomic_set (&osb->lock_stop, 1); + ocfs_wait (osb->lock_event, atomic_read (&osb->lock_event_woken), 0); + atomic_set (&osb->lock_event_woken, 0); del_timer_sync(&osb->lock_timer); - /* sleep with the hope that any unfinished lock iteration is over */ - ocfs_sleep (100); - /* Release the lock */ status = ocfs_write_disk (osb, buffer, sect_size, LockOffset); if (status < 0) { @@ -378,7 +440,7 @@ int ocfs_release_disk_lock (ocfs_super * goto finally; } -finally: + finally: ocfs_safefree (buffer); LOG_EXIT_STATUS (status); @@ -386,16 +448,15 @@ finally: } /* ocfs_release_disk_lock */ /* - * ocfs_cfg_worker() + * ocfs_add_node_to_config() * */ -void ocfs_cfg_worker (ocfs_super * osb) +int ocfs_add_node_to_config (ocfs_super * osb) { int status = 0; ocfs_disk_node_config_info *disk; void *buffer = NULL; ub8 offset; - ub4 i; ub4 sect_size = osb->sect_size; LOG_ENTRY (); @@ -409,32 +470,14 @@ void ocfs_cfg_worker (ocfs_super * osb) disk = (ocfs_disk_node_config_info *) buffer; - /* Dump stuff read from the local config file into the allocated mem */ - strncpy (disk->node_name, OcfsGlobalCtxt.node_name, - MAX_NODE_NAME_LENGTH); - disk->node_name[MAX_NODE_NAME_LENGTH] = '\0'; - - disk->num_interfaces = OcfsGlobalCtxt.num_ipc; - - for (i = 0; i < OcfsGlobalCtxt.num_ipc; i++) { - if (OcfsGlobalCtxt.comm_info[i].valid) { - disk->ipc_config[i].addr = - OcfsGlobalCtxt.comm_info[i].addr; - disk->ipc_config[i].port = - OcfsGlobalCtxt.comm_info[i].port; - disk->ipc_config[i].state = OCFS_IPC_STATE_CONFIG; - disk->ipc_config[i].mask = - OcfsGlobalCtxt.comm_info[i].mask; - disk->ipc_config[i].active = - OcfsGlobalCtxt.comm_info[i].active; - disk->ipc_config[i].type = - OcfsGlobalCtxt.comm_info[i].type; - } - } + /* populate the disknodecfg info from global context */ + ocfs_volcfg_gblctxt_to_disknode (disk); /* Write this nodes config onto disk */ offset = osb->vol_layout.new_cfg_off; - status = ocfs_config_with_disk_lock (osb, offset, (ub1 *) disk); + status = ocfs_config_with_disk_lock (osb, offset, (ub1 *) disk, + OcfsGlobalCtxt.pref_node_num, + OCFS_VOLCFG_ADD); if (status < 0) { LOG_ERROR_STATUS (status); goto bail; @@ -449,26 +492,24 @@ void ocfs_cfg_worker (ocfs_super * osb) bail: ocfs_safefree (buffer); - LOG_EXIT (); - return; -} /* ocfs_cfg_worker */ + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_add_node_to_config */ /* - * ocfs_add_upd_ipc_cfg() + * ocfs_disknode_to_node() * */ -int ocfs_add_upd_ipc_cfg (ocfs_node_config_info ** node, - ocfs_disk_node_config_info * disk) +int ocfs_disknode_to_node (ocfs_node_config_info ** node, + ocfs_disk_node_config_info * disk) { int status = 0; - ub4 i; LOG_ENTRY (); if (*node == NULL) { - *node = (ocfs_node_config_info *) - ocfs_malloc (sizeof (ocfs_node_config_info)); - if (*node == NULL) { + if ((*node = (ocfs_node_config_info *) + ocfs_malloc (sizeof (ocfs_node_config_info))) == NULL) { LOG_ERROR_STATUS (status = -ENOMEM); goto bail; } @@ -476,128 +517,114 @@ int ocfs_add_upd_ipc_cfg (ocfs_node_conf } strncpy ((*node)->node_name, disk->node_name, MAX_NODE_NAME_LENGTH); - (*node)->node_name[MAX_NODE_NAME_LENGTH] = '\0'; - for (i = 0; i < disk->num_interfaces; i++) { - (*node)->ipc_config[i].addr = disk->ipc_config[i].addr; - (*node)->ipc_config[i].port = disk->ipc_config[i].port; - (*node)->ipc_config[i].mask = disk->ipc_config[i].mask; - (*node)->ipc_config[i].state = disk->ipc_config[i].state; - (*node)->ipc_config[i].type = disk->ipc_config[i].type; - (*node)->ipc_config[i].active = disk->ipc_config[i].active; + memcpy((*node)->guid.guid, disk->guid.guid, GUID_LEN); - if ((*node)->ipc_config[i].active) - (*node)->primary_comm = i; - } - - (*node)->num_interfaces = disk->num_interfaces; + (*node)->ipc_config.type = disk->ipc_config.type; + (*node)->ipc_config.ip_port = disk->ipc_config.ip_port; + strncpy((*node)->ipc_config.ip_addr, disk->ipc_config.ip_addr, + MAX_IP_ADDR_LEN); + strncpy((*node)->ipc_config.ip_mask, disk->ipc_config.ip_mask, + MAX_IP_ADDR_LEN); bail: LOG_EXIT_STATUS (status); return status; -} /* ocfs_add_upd_ipc_cfg */ +} /* ocfs_disknode_to_node */ /* - * ocfs_update_node_config() + * ocfs_update_disk_config() * - * TODO.... We should be locking the nodecfg in the function. - * We will be able to implement that when we make ocfs_config_with_disk_lock() - * more flexible. */ -int ocfs_update_node_config (ocfs_super * osb) +int ocfs_update_disk_config (ocfs_super * osb, ub4 node_num, + ocfs_disk_node_config_info * disk) { int status = 0; - ocfs_disk_node_config_info *node; - ub1 *buffer = NULL; ub8 offset; - ub4 i; - ocfs_node_config_hdr *hdr; - ub4 sect_size = osb->sect_size; LOG_ENTRY (); - buffer = ocfs_malloc (2 * sect_size); - if (buffer == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto finally; - } - - /* Use first sector to r/w hdr and second for node */ - hdr = (ocfs_node_config_hdr *) buffer; - node = (ocfs_disk_node_config_info *) (buffer + sect_size); - - /* Clear buffer */ - memset ((void *) node, 0, sect_size); - - /* Move the config info into the buffer */ - strncpy (node->node_name, OcfsGlobalCtxt.node_name, - MAX_NODE_NAME_LENGTH); - node->node_name[MAX_NODE_NAME_LENGTH] = '\0'; - - node->num_interfaces = OcfsGlobalCtxt.num_ipc; - - for (i = 0; i < OcfsGlobalCtxt.num_ipc; i++) { - if (OcfsGlobalCtxt.comm_info[i].valid) { - node->ipc_config[i].addr = - OcfsGlobalCtxt.comm_info[i].addr; - node->ipc_config[i].port = - OcfsGlobalCtxt.comm_info[i].port; - node->ipc_config[i].state = OCFS_IPC_STATE_CONFIG; - node->ipc_config[i].mask = - OcfsGlobalCtxt.comm_info[i].mask; - node->ipc_config[i].active = - OcfsGlobalCtxt.comm_info[i].active; - node->ipc_config[i].type = - OcfsGlobalCtxt.comm_info[i].type; - } else - node->ipc_config[i].state = OCFS_IPC_NOT_CONFIG; - } - /* Write the node details */ offset = osb->vol_layout.node_cfg_off + - ((osb->node_num + 2) * sect_size); - status = ocfs_write_disk (osb, (void *) node, sect_size, offset); + ((node_num + OCFS_VOLCFG_HDR_SECTORS) * osb->sect_size); + status = ocfs_write_disk (osb, (void *) disk, osb->sect_size, offset); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - /* Update the header */ - offset = osb->vol_layout.node_cfg_off; - - /* Read the nodecfg header */ - status = ocfs_read_disk (osb, (void *) hdr, sect_size, offset); + status = ocfs_write_volcfg_header (osb, OCFS_VOLCFG_UPD); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - /* Increment the seq# */ - hdr->seq_num++; + finally: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_update_disk_config */ - /* Write the nodecfg header */ - status = ocfs_write_disk (osb, (void *) hdr, sect_size, offset); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto finally; - } +/* + * ocfs_volcfg_gblctxt_to_disknode() + * + */ +void ocfs_volcfg_gblctxt_to_disknode(ocfs_disk_node_config_info *disk) +{ + ocfs_ipc_config_info *ipc; + ocfs_comm_info *g_ipc; - /* Write the pNodeCfgHdr into the second sector of NewCfg. */ - /* We do so so that we can read the NodeCfgHdr easily when we */ - /* read the PublishSector, for e.g. in ocfs_nm_thread() */ - offset = osb->vol_layout.new_cfg_off + osb->sect_size; - status = ocfs_write_disk (osb, (void *) hdr, sect_size, offset); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto finally; - } + LOG_ENTRY (); - finally: - ocfs_safefree (buffer); + ipc = &(disk->ipc_config); + g_ipc = &(OcfsGlobalCtxt.comm_info); - LOG_EXIT_STATUS (status); - return status; -} /* ocfs_update_node_config */ + if (OcfsGlobalCtxt.node_name) + strncpy (disk->node_name, OcfsGlobalCtxt.node_name, + MAX_NODE_NAME_LENGTH); + + memcpy(disk->guid.guid, OcfsGlobalCtxt.guid.guid, GUID_LEN); + + ipc->type = g_ipc->type; + ipc->ip_port = g_ipc->ip_port; + if (g_ipc->ip_addr) + strncpy (ipc->ip_addr, g_ipc->ip_addr, MAX_IP_ADDR_LEN); + if (g_ipc->ip_mask) + strncpy (ipc->ip_mask, g_ipc->ip_mask, MAX_IP_ADDR_LEN); + + LOG_EXIT (); + return ; +} /* ocfs_volcfg_gblctxt_to_disknode */ + +/* + * ocfs_volcfg_gblctxt_to_node() + * + */ +void ocfs_volcfg_gblctxt_to_node(ocfs_node_config_info *node) +{ + ocfs_ipc_config_info *ipc; + ocfs_comm_info *g_ipc; + + LOG_ENTRY (); + + ipc = &(node->ipc_config); + g_ipc = &(OcfsGlobalCtxt.comm_info); + + if (OcfsGlobalCtxt.node_name) + strncpy (node->node_name, OcfsGlobalCtxt.node_name, + MAX_NODE_NAME_LENGTH); + + memcpy(node->guid.guid, OcfsGlobalCtxt.guid.guid, GUID_LEN); + + ipc->type = g_ipc->type; + ipc->ip_port = g_ipc->ip_port; + if (g_ipc->ip_addr) + strncpy (ipc->ip_addr, g_ipc->ip_addr, MAX_IP_ADDR_LEN); + if (g_ipc->ip_mask) + strncpy (ipc->ip_mask, g_ipc->ip_mask, MAX_IP_ADDR_LEN); + + LOG_EXIT (); + return ; +} /* ocfs_volcfg_gblctxt_to_node */ /* * ocfs_chk_update_config() @@ -618,10 +645,9 @@ int ocfs_chk_update_config (ocfs_super * /* Read in the config on the disk */ offset = osb->vol_layout.node_cfg_off; - status = - ocfs_read_disk_ex (osb, (void **) &buffer, - osb->vol_layout.node_cfg_size, - osb->vol_layout.node_cfg_size, offset); + status = ocfs_read_disk_ex (osb, (void **) &buffer, + osb->vol_layout.node_cfg_size, + osb->vol_layout.node_cfg_size, offset); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; @@ -630,57 +656,92 @@ int ocfs_chk_update_config (ocfs_super * /* 1st block in buffer is the NodeCfgHdr */ hdr = (ocfs_node_config_hdr *) buffer; - if ((strncmp - (hdr->signature, NODE_CONFIG_HDR_SIGN, NODE_CONFIG_SIGN_LEN)) - || (hdr->version < NODE_MIN_SUPPORTED_VER) - || (hdr->version > NODE_CONFIG_VER)) { - LOG_ERROR_STATUS (status = -EINVAL); + if (strncmp (hdr->signature, NODE_CONFIG_HDR_SIGN, + NODE_CONFIG_SIGN_LEN)) { + LOG_ERROR_STR ("Invalid node config signature"); + status = -EINVAL; goto finally; } - if ((!osb->cfg_initialized) || (osb->cfg_seq_num != hdr->seq_num) || - (osb->num_cfg_nodes != hdr->num_nodes)) { - /* Add each node which is not in the global ctxt to it, also */ - /* add the node num and config ptr in the osb */ - osb->num_cfg_nodes = hdr->num_nodes; - - /* Find our Volume Node number if present else configure this node */ - /* and obtain the same also find the Ip address, port if present */ - - /* NodeCfgInfo starts from the 3rd sector of NodeCfg */ - p = buffer + (2 * sect_size); - - /* Read the nodecfg for all possible nodes */ - for (i = 0; i < OCFS_MAXIMUM_NODES; i++, p += sect_size) { - disk = (ocfs_disk_node_config_info *) p; - if (disk->node_name[0] != '\0') { - status = - ocfs_add_upd_ipc_cfg (&osb->node_cfg_info[i], - disk); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto finally; - } - - /* If node_num is not set, set it if the node_name matches */ - if (osb->node_num == OCFS_INVALID_NODE_NUM) { - if (!strncmp - (OcfsGlobalCtxt.node_name, - disk->node_name, - MAX_NODE_NAME_LENGTH)) - osb->node_num = i; - } - } + if (hdr->version < NODE_MIN_SUPPORTED_VER || + hdr->version > NODE_CONFIG_VER) { + LOG_ERROR_ARGS ("Node config version mismatch, (%d) < minimum" \ + " (%d) or > current (%d)\n", hdr->version, + NODE_MIN_SUPPORTED_VER, NODE_CONFIG_VER); + status = -EINVAL; + goto finally; + } + + /* Exit if nodecfg on disk has remained unchanged... */ + if ((osb->cfg_initialized) && (osb->cfg_seq_num == hdr->cfg_seq_num) && + (osb->num_cfg_nodes == hdr->num_nodes)) + goto finally; + + /* ... else refresh nodecfg in memory */ + p = buffer + (OCFS_VOLCFG_HDR_SECTORS * sect_size); + + /* Read the nodecfg for all possible nodes as there may be holes */ + /* i.e., node numbers need not be dolled out in sequence */ + for (i = 0; i < OCFS_MAXIMUM_NODES; i++, p += sect_size) { + disk = (ocfs_disk_node_config_info *) p; + + if (disk->node_name[0] == '\0') + continue; + + status = ocfs_disknode_to_node (&osb->node_cfg_info[i], disk); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; } - osb->cfg_initialized = true; - osb->cfg_seq_num = hdr->seq_num; + /* If nodenum is set, goto next node */ + if (osb->node_num != OCFS_INVALID_NODE_NUM) + continue; + + /* + * If node num is not set, set it if guid matches. + * If guid does not match and the hostid also does not + * match, goto next slot. + * However if the guid does not natch but the hostid + * matches, it means that the user re-ran ocfs_uid_gen + * with the -r option to reclaim its node number. In + * this case, allow the reclaim only if the user mounts + * the volume with the reclaimid option. Else, error. + */ + if (!memcmp(&OcfsGlobalCtxt.guid.guid, disk->guid.guid, + GUID_LEN)) { + osb->node_num = i; + continue; + } + + /* If the hostid does not match, goto next... */ + if (memcmp(&OcfsGlobalCtxt.guid.id.host_id, + disk->guid.id.host_id, HOSTID_LEN)) + continue; + + /* ...else allow node to reclaim the number if reclaimid set */ + if (osb->reclaim_id) { + osb->node_num = i; + /* Write this node's cfg with the new guid on disk */ + status = ocfs_refresh_node_config (osb); + if (status < 0) { + LOG_ERROR_STATUS(status); + goto finally; + } + } + else { + LOG_ERROR_STR("Re-mount volume with the reclaimid " \ + "option to reclaim the node number"); + status = -EFAIL; + goto finally; + } } + osb->cfg_initialized = true; + osb->cfg_seq_num = hdr->cfg_seq_num; + osb->num_cfg_nodes = hdr->num_nodes; LOG_TRACE_ARGS ("Num of configured nodes (%u)\n", osb->num_cfg_nodes); - - /* Dump the node cfg infos for all nodes */ - ocfs_show_all_node_cfgs (osb); + IF_TRACE(ocfs_show_all_node_cfgs (osb)); finally: ocfs_safefree (buffer); @@ -690,22 +751,6 @@ int ocfs_chk_update_config (ocfs_super * } /* ocfs_chk_update_config */ /* - * ocfs_add_node_to_config() - * - */ -int ocfs_add_node_to_config (ocfs_super * osb) -{ - int status = 0; - - LOG_ENTRY (); - - ocfs_cfg_worker (osb); - - LOG_EXIT_STATUS (status); - return status; -} /* ocfs_add_node_to_config */ - -/* * ocfs_get_config() * */ @@ -716,19 +761,25 @@ int ocfs_get_config (ocfs_super * osb) LOG_ENTRY (); /* Update our config info for this volume from the disk */ - ocfs_chk_update_config (osb); + status = ocfs_chk_update_config (osb); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } if (osb->node_num == OCFS_INVALID_NODE_NUM) { + if (osb->reclaim_id) { + LOG_ERROR_STR ("unable to reclaim id"); + status = -EINVAL; + goto bail; + } status = ocfs_add_node_to_config (osb); if (status < 0) { LOG_ERROR_STATUS (status); goto bail; } } else { - /* Checks if the info in osb->node_cfg_info[osb->node_num] is */ - /* the same as in OcfsGlobalCtxt.comm_info. If not, it updates it */ - /* in osb */ - if (!ocfs_is_node_config_ok (osb)) { + if (ocfs_has_node_config_changed (osb)) { status = ocfs_refresh_node_config (osb); if (status < 0) { LOG_ERROR_STATUS (status); @@ -745,41 +796,44 @@ int ocfs_get_config (ocfs_super * osb) } /* ocfs_get_config */ /* - * ocfs_is_node_config_ok() + * ocfs_has_node_config_changed() * */ -bool ocfs_is_node_config_ok (ocfs_super * osb) +bool ocfs_has_node_config_changed (ocfs_super * osb) { ocfs_node_config_info *node; ocfs_ipc_config_info *ipc; ocfs_comm_info *g_ipc; - bool OK = true; - ub4 i; + bool chg = false; LOG_ENTRY (); node = osb->node_cfg_info[osb->node_num]; - ipc = node->ipc_config; - g_ipc = OcfsGlobalCtxt.comm_info; + ipc = &(node->ipc_config); + g_ipc = &(OcfsGlobalCtxt.comm_info); - if (OcfsGlobalCtxt.num_ipc != node->num_interfaces) - OK = false; - - if (OK) { - for (i = 0; i < OcfsGlobalCtxt.num_ipc; ++i) { - if ((ipc[i].addr != g_ipc[i].addr) || - (ipc[i].port != g_ipc[i].port) || - (ipc[i].mask != g_ipc[i].mask) || - (ipc[i].type != g_ipc[i].type)) { - OK = false; - break; - } - } - } - - LOG_EXIT_LONG (OK); - return OK; -} /* ocfs_is_node_config_ok */ + if (OcfsGlobalCtxt.node_name && + strncmp (node->node_name, OcfsGlobalCtxt.node_name, + MAX_NODE_NAME_LENGTH)) + chg = true; + + if (!chg && ipc->type != g_ipc->type) + chg = true; + + if (!chg && ipc->ip_port != g_ipc->ip_port) + chg = true; + + if (!chg && g_ipc->ip_addr && + strncmp (ipc->ip_addr, g_ipc->ip_addr, MAX_IP_ADDR_LEN)) + chg = true; + + if (!chg && g_ipc->ip_mask && + strncmp (ipc->ip_mask, g_ipc->ip_mask, MAX_IP_ADDR_LEN)) + chg = true; + + LOG_EXIT_LONG (chg); + return chg; +} /* ocfs_has_node_config_changed */ /* * ocfs_refresh_node_config() @@ -788,40 +842,40 @@ bool ocfs_is_node_config_ok (ocfs_super int ocfs_refresh_node_config (ocfs_super * osb) { ocfs_node_config_info *node; - ocfs_ipc_config_info *ipc; - ocfs_comm_info *g_ipc; - ub4 i; + ocfs_disk_node_config_info *disk; + ub8 offset; + ub1 *buffer = NULL; int status = 0; LOG_ENTRY (); - node = osb->node_cfg_info[osb->node_num]; - ipc = node->ipc_config; - g_ipc = OcfsGlobalCtxt.comm_info; - - for (i = 0; i < OcfsGlobalCtxt.num_ipc; ++i) { - if (g_ipc[i].valid) { - ipc[i].addr = g_ipc[i].addr; - ipc[i].port = g_ipc[i].port; - ipc[i].mask = g_ipc[i].mask; - ipc[i].active = g_ipc[i].active; - ipc[i].type = g_ipc[i].type; - ipc[i].state = OCFS_IPC_STATE_CONFIG; - if (ipc[i].active) - node->primary_comm = i; - } + buffer = ocfs_malloc (osb->sect_size); + if (buffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; } - node->num_interfaces = OcfsGlobalCtxt.num_ipc; + memset ((void *) buffer, 0, osb->sect_size); + disk = (ocfs_disk_node_config_info *) buffer; + + /* populate the nodecfg info in disk from global context */ + ocfs_volcfg_gblctxt_to_disknode (disk); - /* Update the NodeCfg on disk with the new info */ - status = ocfs_update_node_config (osb); + /* populate the nodecfg info in mem from global context */ + node = osb->node_cfg_info[osb->node_num]; + ocfs_volcfg_gblctxt_to_node (node); + + /* Update the nodecfg on disk with the new info */ + offset = osb->vol_layout.new_cfg_off; + status = ocfs_config_with_disk_lock (osb, offset, (ub1 *) disk, + osb->node_num, OCFS_VOLCFG_UPD); if (status < 0) { LOG_ERROR_STATUS (status); goto bail; } bail: + ocfs_safefree(buffer); LOG_EXIT_STATUS (status); return status; } /* ocfs_refresh_node_config */ @@ -834,34 +888,16 @@ void ocfs_show_all_node_cfgs (ocfs_super { ocfs_node_config_info *node; ub4 i; - ub4 j; - - LOG_ENTRY (); for (i = 0; i < OCFS_MAXIMUM_NODES; i++) { node = osb->node_cfg_info[i]; - if (!node) - continue; - - if (node->node_name[0] == '\0') + if (!node || node->node_name[0] == '\0') continue; - LOG_TRACE_ARGS - ("Node (%u) has name (%s) and has (%u) interconnects\n", i, - node->node_name, node->num_interfaces); - - for (j = 0; j < node->num_interfaces; j++) { - if (node->ipc_config[j].active) { - LOG_TRACE_ARGS - ("%d. ip=%d.%d.%d.%d, mask=%d.%d.%d.%d, " - "port=%d, state=%u\n", j, - NIPQUAD (node->ipc_config[j].addr), - NIPQUAD (node->ipc_config[j].mask), - node->ipc_config[j].port, - node->ipc_config[j].state); - } - } + LOG_TRACE_ARGS ("Node (%u) is (%s)\n", i, node->node_name); + LOG_TRACE_ARGS ("ip=%s, port=%d\n", node->ipc_config.ip_addr, + node->ipc_config.ip_port); } return; diff -urNp x-ref/fs/ocfs/Common/ocfsgenvote.c x/fs/ocfs/Common/ocfsgenvote.c --- x-ref/fs/ocfs/Common/ocfsgenvote.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/ocfsgenvote.c Mon Oct 21 04:41:19 2002 @@ -33,43 +33,35 @@ * ocfs_send_vote_reply() * */ -int ocfs_send_vote_reply (ocfs_super * osb, - ocfs_dlm_msg * DlmMesg, ub4 VoteStatus, bool bHandleOpen) +int ocfs_send_vote_reply (ocfs_super * osb, ocfs_dlm_msg * dlm_msg, + ub4 vote_status, bool inode_open) { - ub4 msgSize; - ocfs_dlm_req_master *DlmReqMaster; - ocfs_dlm_reply_master *ReplyDlmMesg; - ocfs_dlm_msg *SendDlmMesg; - ub8 VoteMap; +#define MSGLEN (sizeof (ocfs_dlm_msg) - 1 + sizeof (ocfs_dlm_reply_master)) + ub4 msg_len = MSGLEN; + ocfs_dlm_req_master *req_master; + ocfs_dlm_reply_master *reply_master; + ocfs_dlm_msg *send_dlm_msg; + ub8 vote_map; int status = 0; + ub1 buf[MSGLEN]; +#undef MSGLEN LOG_ENTRY (); - DlmReqMaster = (ocfs_dlm_req_master *) DlmMesg->msg_buf; + req_master = (ocfs_dlm_req_master *) dlm_msg->msg_buf; -//SM??? why -1? - msgSize = sizeof (ocfs_dlm_msg) - 1 + sizeof (ocfs_dlm_reply_master); + send_dlm_msg = (ocfs_dlm_msg *)buf; + ocfs_init_dlm_msg (osb, send_dlm_msg, msg_len); + send_dlm_msg->msg_type = OCFS_REPLY_MAKE_MASTER; + + reply_master = (ocfs_dlm_reply_master *) send_dlm_msg->msg_buf; + reply_master->h.lock_id = req_master->lock_id; + reply_master->status = vote_status; + reply_master->h.lock_seq_num = req_master->lock_seq_num; - SendDlmMesg = ocfs_malloc (msgSize); - if (SendDlmMesg == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto bail; - } - - ocfs_init_dlm_msg (osb, SendDlmMesg, msgSize); - - SendDlmMesg->msg_type = OCFS_REPLY_MAKE_MASTER; - - ReplyDlmMesg = (ocfs_dlm_reply_master *) SendDlmMesg->msg_buf; + vote_map = (1 << dlm_msg->src_node); + ocfs_send_bcast (osb, vote_map, send_dlm_msg); - ReplyDlmMesg->lock_id = DlmReqMaster->lock_id; - ReplyDlmMesg->status = VoteStatus; - ReplyDlmMesg->lock_seq_num = DlmReqMaster->lock_seq_num; - - VoteMap = (1 << DlmMesg->src_node); - ocfs_send_bcast (osb, VoteMap, SendDlmMesg); - - bail: LOG_EXIT_STATUS (status); return status; } /* ocfs_send_vote_reply */ @@ -78,56 +70,58 @@ int ocfs_send_vote_reply (ocfs_super * o * ocfs_comm_vote_for_del_ren() * */ -int ocfs_comm_vote_for_del_ren (ocfs_super * osb, - ocfs_lock_res ** LockResource, ocfs_dlm_msg * DlmMesg) +int ocfs_comm_vote_for_del_ren (ocfs_super * osb, ocfs_lock_res ** lockres, + ocfs_dlm_msg * dlm_msg) { int status = 0; - ocfs_dlm_req_master *DlmReqMaster; - ub4 NodeAskingVote; + ocfs_dlm_req_master *req_master; + ub4 node_num; ub4 flags; - ub4 retryCount = 0; + ub4 retry_cnt = 0; bool acq_oin = false; - ocfs_file_entry *FileEntry = NULL; - ocfs_lock_res *pLockResource; + ocfs_file_entry *fe = NULL; ocfs_sem *oin_sem = NULL; + struct dentry *dentry; + struct list_head *iter; + struct inode *inode; + int refcount; + LOG_ENTRY (); - pLockResource = *LockResource; - DlmReqMaster = (ocfs_dlm_req_master *) DlmMesg->msg_buf; + req_master = (ocfs_dlm_req_master *) dlm_msg->msg_buf; - flags = DlmReqMaster->flags; - NodeAskingVote = DlmMesg->src_node; + flags = req_master->flags; + node_num = dlm_msg->src_node; - if (pLockResource->oin) { - UPDATE_OIN (pLockResource->oin); + if ((*lockres)->oin) { + UPDATE_OIN ((*lockres)->oin); } LOG_TRACE_ARGS ("Vote for del ren for node (%u) for lock 0x%08x.0x%08x " - " and seq %u.%u\n", NodeAskingVote, - HI (DlmReqMaster->lock_id), LO (DlmReqMaster->lock_id), - HI (DlmReqMaster->lock_seq_num), - LO (DlmReqMaster->lock_seq_num)); + " and seq %u.%u\n", node_num, HI (req_master->lock_id), + LO (req_master->lock_id), HI (req_master->lock_seq_num), + LO (req_master->lock_seq_num)); /* Check for oin */ - if (pLockResource->oin != NULL) { + if ((*lockres)->oin != NULL) { ocfs_inode *oin; - oin = pLockResource->oin; + oin = (*lockres)->oin; oin_sem = &(oin->main_res); ocfs_down_sem (oin_sem, true); acq_oin = true; /* If OIN_IN_USE is set we should go back and retry */ - while ((oin->oin_flags & OCFS_OIN_IN_USE) && (retryCount < 5)) { + while ((oin->oin_flags & OCFS_OIN_IN_USE) && (retry_cnt < 5)) { if ((acq_oin)) { ocfs_up_sem (oin_sem); acq_oin = false; } - ocfs_sleep (20); /* in ms */ - retryCount++; + ocfs_sleep (20); + retry_cnt++; if (!acq_oin) { ocfs_down_sem (oin_sem, true); @@ -135,101 +129,98 @@ int ocfs_comm_vote_for_del_ren (ocfs_sup } } - if ((pLockResource->oin->ref_cnt == 0) && - (!(oin->oin_flags & OCFS_OIN_IN_USE))) { + refcount = 0; + inode = (*lockres)->oin->inode; + list_for_each (iter, &(inode->i_dentry)) { + dentry = list_entry (iter, struct dentry, d_alias); + refcount += atomic_read(&dentry->d_count); + } + + if (refcount == 0 && (!(oin->oin_flags & OCFS_OIN_IN_USE))) { if (!(oin->oin_flags & OCFS_OIN_IN_TEARDOWN)) { if (acq_oin) { ocfs_up_sem (oin_sem); acq_oin = false; } - ocfs_release_lockres (pLockResource); + ocfs_release_lockres (*lockres); ocfs_release_cached_oin (osb, oin); ocfs_release_oin (oin, true); - pLockResource = NULL; + (*lockres) = NULL; } - ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_OK, false); + ocfs_send_vote_reply (osb, dlm_msg, OCFS_DLM_VOTE_OK, false); goto finito; } else { - LOG_TRACE_ARGS - ("Vote for del ren returned in use (%u) for " - "Lock 0x%08x.0x%08x and Seq %u.%u\n", - NodeAskingVote, HI (DlmReqMaster->lock_id), - LO (DlmReqMaster->lock_id), - HI (DlmReqMaster->lock_seq_num), - LO (DlmReqMaster->lock_seq_num)); - ocfs_send_vote_reply (osb, DlmMesg, - OCFS_DLM_VOTE_OIN_ALREADY_INUSE, false); - ocfs_release_lockres (pLockResource); + LOG_TRACE_ARGS ("Vote for del ren returned in use (%u) " + "for Lock 0x%08x.0x%08x and Seq %u.%u\n", node_num, + HI (req_master->lock_id), LO (req_master->lock_id), + HI (req_master->lock_seq_num), LO (req_master->lock_seq_num)); + ocfs_send_vote_reply (osb, dlm_msg, + OCFS_DLM_VOTE_OIN_ALREADY_INUSE, false); + ocfs_release_lockres (*lockres); goto finito; } } else { - LOG_TRACE_ARGS - ("Vote for del ren voted to del/ren (%u) for lock " - "0x%08x.0x%08x and Seq %u.%u\n", NodeAskingVote, - HI (DlmReqMaster->lock_id), LO (DlmReqMaster->lock_id), - HI (DlmReqMaster->lock_seq_num), - LO (DlmReqMaster->lock_seq_num)); - ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_OK, false); - ocfs_release_lockres (pLockResource); + LOG_TRACE_ARGS ("Vote for del ren voted to del/ren (%u) for lock " + "0x%08x.0x%08x and Seq %u.%u\n", node_num, HI (req_master->lock_id), + LO (req_master->lock_id), HI (req_master->lock_seq_num), + LO (req_master->lock_seq_num)); + ocfs_send_vote_reply (osb, dlm_msg, OCFS_DLM_VOTE_OK, false); + ocfs_release_lockres (*lockres); goto finito; } finito: /* Set the always update master on open flag */ - if (pLockResource) { - pLockResource->lock_state |= FLAG_ALWAYS_UPDATE_OPEN; - pLockResource->last_upd_seq_num = DlmReqMaster->lock_seq_num; - - if (pLockResource->master_node_num != OCFS_INVALID_NODE_NUM) { - if (!IS_NODE_ALIVE - (osb->publ_map, pLockResource->master_node_num, - OCFS_MAXIMUM_NODES)) { - pLockResource->master_node_num = NodeAskingVote; + if (*lockres) { + (*lockres)->lock_state |= FLAG_ALWAYS_UPDATE_OPEN; + (*lockres)->last_upd_seq_num = req_master->lock_seq_num; + + if ((*lockres)->master_node_num != OCFS_INVALID_NODE_NUM) { + if (!IS_NODE_ALIVE (osb->publ_map, (*lockres)->master_node_num, + OCFS_MAXIMUM_NODES)) { + (*lockres)->master_node_num = node_num; } } else { - pLockResource->master_node_num = NodeAskingVote; + (*lockres)->master_node_num = node_num; } /* Change the master if there is no lock */ - if ((pLockResource->master_node_num == osb->node_num) && - (pLockResource->lock_state <= OCFS_DLM_SHARED_LOCK)) { - ub8 tmp = DlmReqMaster->lock_id; + if (((*lockres)->master_node_num == osb->node_num) && + ((*lockres)->lock_state <= OCFS_DLM_SHARED_LOCK)) { + ub8 tmp = req_master->lock_id; /* Change the lock ownership to the node asking for vote */ - status = - ocfs_get_file_entry (osb, &FileEntry, - DlmReqMaster->lock_id); + status = ocfs_get_file_entry (osb, &fe, req_master->lock_id); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } /* Write new master on the disk */ - DISK_LOCK_CURRENT_MASTER (FileEntry) = NodeAskingVote; + DISK_LOCK_CURRENT_MASTER (fe) = node_num; - status = - ocfs_write_disk (osb, FileEntry, osb->sect_size, tmp); + status = ocfs_write_disk (osb, fe, osb->sect_size, tmp); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - pLockResource->master_node_num = NodeAskingVote; + (*lockres)->master_node_num = node_num; } } finally: - if (FileEntry) - ocfs_release_file_entry (FileEntry); + if (fe) + ocfs_release_file_entry (fe); if (acq_oin && oin_sem) { ocfs_up_sem (oin_sem); acq_oin = false; } - if (pLockResource) - ocfs_release_lockres (pLockResource); + if ((*lockres)) + ocfs_release_lockres (*lockres); LOG_EXIT_STATUS (status); return status; @@ -239,19 +230,15 @@ int ocfs_comm_vote_for_del_ren (ocfs_sup * ocfs_find_lockres() * */ -int ocfs_find_lockres (ocfs_super * osb, ub8 LockId, ocfs_lock_res ** LockResource) +int ocfs_find_lockres (ocfs_super * osb, ub8 lock_id, ocfs_lock_res ** lockres) { - int status = -ENOENT; - ocfs_lock_res *lockResource = NULL; + int status; LOG_ENTRY (); - *LockResource = NULL; - - status = ocfs_lookup_sector_node (osb, LockId, &lockResource); - if (status >= 0) { - *LockResource = lockResource; - } + status = ocfs_lookup_sector_node (osb, lock_id, lockres); + if (status < 0) + *lockres = NULL; LOG_EXIT_STATUS (status); return status; @@ -261,28 +248,36 @@ int ocfs_find_lockres (ocfs_super * osb, * ocfs_check_ipc_msg() * */ -bool ocfs_check_ipc_msg (ub1 * Mesg, ub4 Length) +bool ocfs_check_ipc_msg (ub1 * msg, ub4 msg_len) { bool bret = false; - ocfs_dlm_msg *DlmMesg; + ocfs_dlm_msg *dlm_msg; LOG_ENTRY (); - DlmMesg = (ocfs_dlm_msg *) Mesg; + dlm_msg = (ocfs_dlm_msg *) msg; - if (DlmMesg == NULL) + if (dlm_msg == NULL) { + LOG_ERROR_STR("Invalid IPC message"); goto bail; + } + + if (msg_len < sizeof(ocfs_dlm_msg)) { + LOG_ERROR_STR("IPC message too short"); + goto bail; + } /* Compute and Compare the checksum */ - if (DlmMesg->magic != OCFS_DLM_MSG_MAGIC) { - LOG_ERROR_ARGS ("magic number did not match: %d != %d\n", - DlmMesg->magic, OCFS_DLM_MSG_MAGIC); + if (dlm_msg->magic != OCFS_DLM_MSG_MAGIC) { + LOG_ERROR_ARGS ("Magic number mismatch: 0x%08x != 0x%08x\n", + dlm_msg->magic, OCFS_DLM_MSG_MAGIC); goto bail; } - if ((DlmMesg->src_node < 0) || (DlmMesg->src_node > 64)) { - LOG_ERROR_ARGS ("source node was invalid: %d\n", - DlmMesg->src_node); + if ((dlm_msg->src_node < 0) || + (dlm_msg->src_node > OCFS_MAXIMUM_NODES)) { + LOG_ERROR_ARGS ("Source node was invalid: %d\n", + dlm_msg->src_node); goto bail; } @@ -297,40 +292,25 @@ bool ocfs_check_ipc_msg (ub1 * Mesg, ub4 * ocfs_find_osb() * */ -void ocfs_find_osb (sb1 * VolumeID, ocfs_super ** osb) +void ocfs_find_osb (sb1 * volume_id, ocfs_super ** osb) { - bool GlobalResourceAcquired = false; - struct list_head *iterEntry; - ocfs_super *tmp_osb = NULL; + struct list_head *iter_osb; LOG_ENTRY (); ocfs_down_sem (&(OcfsGlobalCtxt.res), true); - GlobalResourceAcquired = true; - - list_for_each (iterEntry, &(OcfsGlobalCtxt.osb_next)) { - tmp_osb = list_entry (iterEntry, ocfs_super, osb_next); - if (memcmp (tmp_osb->vol_layout.id, VolumeID, MAX_VOL_ID_LENGTH) - == 0) { - *osb = tmp_osb; - - if (GlobalResourceAcquired) { - ocfs_up_sem (&(OcfsGlobalCtxt.res)); - GlobalResourceAcquired = false; - } + list_for_each (iter_osb, &(OcfsGlobalCtxt.osb_next)) { + *osb = list_entry (iter_osb, ocfs_super, osb_next); + if (!memcmp ((*osb)->vol_layout.vol_id, volume_id, + MAX_VOL_ID_LENGTH)) goto bail; - } - } - - if (GlobalResourceAcquired) { - ocfs_up_sem (&(OcfsGlobalCtxt.res)); - GlobalResourceAcquired = false; } *osb = NULL; bail: + ocfs_up_sem (&(OcfsGlobalCtxt.res)); LOG_EXIT (); return; } /* ocfs_find_osb */ @@ -339,32 +319,36 @@ void ocfs_find_osb (sb1 * VolumeID, ocfs * ocfs_find_create_lockres() * */ -int ocfs_find_create_lockres (ocfs_super * osb, ub8 LockId, ocfs_lock_res ** LockResource) +int ocfs_find_create_lockres (ocfs_super * osb, ub8 lock_id, + ocfs_lock_res ** lockres) { int status = 0; - ocfs_lock_res *pLockResource = NULL; + ocfs_lock_res *tmp_lockres = NULL; LOG_ENTRY (); - *LockResource = NULL; + *lockres = NULL; - status = ocfs_lookup_sector_node (osb, LockId, &pLockResource); - if (status >= 0) { - *LockResource = pLockResource; - } else { - status = 0; - pLockResource = pLockResource = kmem_cache_alloc (OcfsGlobalCtxt.lockres_cache, GFP_KERNEL); - if (pLockResource == NULL) { + status = ocfs_lookup_sector_node (osb, lock_id, lockres); + if (status < 0) { + *lockres = ocfs_allocate_lockres(); + if (!*lockres) { LOG_ERROR_STATUS (status = -ENOMEM); goto bail; } - /* Init Resource */ - ocfs_init_lockres (osb, pLockResource, LockId); - OCFS_SET_FLAG (pLockResource->lock_state, LOCK_STATE_INIT); + ocfs_init_lockres (osb, *lockres, lock_id); + OCFS_SET_FLAG ((*lockres)->lock_state, LOCK_STATE_INIT); - ocfs_insert_sector_node (osb, pLockResource); - *LockResource = pLockResource; + status = ocfs_insert_sector_node (osb, *lockres, &tmp_lockres); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + if (tmp_lockres) { + ocfs_free_lockres (*lockres); + *lockres = tmp_lockres; + } } bail: @@ -376,77 +360,67 @@ int ocfs_find_create_lockres (ocfs_super * ocfs_comm_process_vote() * */ -int ocfs_comm_process_vote (ocfs_super * osb, ocfs_dlm_msg * DlmMesg) +int ocfs_comm_process_vote (ocfs_super * osb, ocfs_dlm_msg * dlm_msg) { int status = 0; - ocfs_lock_res *LockResource = NULL; - ocfs_dlm_req_master *DlmReqMaster; - ub4 NodeAskingVote = OCFS_INVALID_NODE_NUM; + ocfs_lock_res *lockres = NULL; + ocfs_dlm_req_master *req_master; + ub4 node_num = OCFS_INVALID_NODE_NUM; ub4 flags; bool acq_oin = false; ub8 offset; ub4 length; - ocfs_file_entry *FileEntry = NULL; + ocfs_file_entry *fe = NULL; + ocfs_file_entry *temp_fe = NULL; + ub4 i; + ocfs_inode *oin; + bool oin_exists; LOG_ENTRY (); - DlmReqMaster = (ocfs_dlm_req_master *) DlmMesg->msg_buf; + req_master = (ocfs_dlm_req_master *) dlm_msg->msg_buf; LOG_TRACE_ARGS ("Called from node (%u) for Lock 0x%08x.0x%08x, " - "Seq %u.%u\n", DlmMesg->src_node, - HI (DlmReqMaster->lock_id), LO (DlmReqMaster->lock_id), - HI (DlmReqMaster->lock_seq_num), - LO (DlmReqMaster->lock_seq_num)); - - /* Find the resource */ - /* If resource is not found create on and set the init state on it */ - status = ocfs_find_create_lockres (osb, DlmReqMaster->lock_id, &LockResource); + "Seq %u.%u\n", dlm_msg->src_node, HI (req_master->lock_id), + LO (req_master->lock_id), HI (req_master->lock_seq_num), + LO (req_master->lock_seq_num)); + + status = ocfs_find_create_lockres (osb, req_master->lock_id, &lockres); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - /* If there is no resource we don't care abt the resource so vote */ - ocfs_acquire_lockres (LockResource); + ocfs_acquire_lockres (lockres); - if (LockResource->lock_state & LOCK_STATE_INIT) { - /* We are done, build a packet to return success to the caller */ - status = ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_OK, false); - ocfs_release_lockres (LockResource); + if (lockres->lock_state & LOCK_STATE_INIT) { + status = ocfs_send_vote_reply (osb, dlm_msg, OCFS_DLM_VOTE_OK, false); + ocfs_release_lockres (lockres); goto finally; } - if ((LockResource->master_node_num == osb->node_num) && - (DlmMesg->src_node == osb->node_num)) { - status = ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_OK, false); - ocfs_release_lockres (LockResource); + if ((lockres->master_node_num == osb->node_num) && + (dlm_msg->src_node == osb->node_num)) { + status = ocfs_send_vote_reply (osb, dlm_msg, OCFS_DLM_VOTE_OK, false); + ocfs_release_lockres (lockres); goto finally; } - flags = DlmReqMaster->flags; - NodeAskingVote = DlmMesg->src_node; - - /* If there is a resource check the state */ + flags = req_master->flags; + node_num = dlm_msg->src_node; if ((flags & FLAG_FILE_DELETE) || (flags & FLAG_FILE_RENAME)) { - status = ocfs_comm_vote_for_del_ren (osb, &LockResource, DlmMesg); + status = ocfs_comm_vote_for_del_ren (osb, &lockres, dlm_msg); goto finally; } - /* Cache stf */ if (flags & FLAG_FILE_RELEASE_CACHE) { - ocfs_file_entry *TempEntry = NULL; - ub4 i; - - i = 0; - - LOG_TRACE_STR ("Called for FLAG_FILE_RELEASE_CACHE"); - if (!osb->commit_cache_exec) { osb->needs_flush = true; + i = 0; while ((osb->trans_in_progress) && (i < 10)) { - ocfs_sleep (100); /* in ms */ + ocfs_sleep (100); i++; } @@ -457,286 +431,186 @@ int ocfs_comm_process_vote (ocfs_super * osb->commit_cache_exec = false; } - { - length = osb->sect_size; - offset = DlmReqMaster->lock_id; - - status = - ocfs_get_file_entry (osb, &TempEntry, - DlmReqMaster->lock_id); + length = osb->sect_size; + offset = req_master->lock_id; + + status = ocfs_get_file_entry (osb, &temp_fe, + req_master->lock_id); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + + if (DISK_LOCK_FILE_LOCK (temp_fe) > OCFS_DLM_NO_LOCK) { + DISK_LOCK_FILE_LOCK (temp_fe) = OCFS_DLM_NO_LOCK; + + status = ocfs_write_force_disk (osb, temp_fe, + length, offset); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - if (DISK_LOCK_FILE_LOCK (TempEntry) > - OCFS_DLM_NO_LOCK) { - DISK_LOCK_FILE_LOCK (TempEntry) = - OCFS_DLM_NO_LOCK; - - status = - ocfs_write_force_disk (osb, TempEntry, - length, offset); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto finally; - } - - LockResource->lock_type = - OCFS_DLM_NO_LOCK; - } + lockres->lock_type = OCFS_DLM_NO_LOCK; + } - if (TempEntry) { - ocfs_safefree (TempEntry); - TempEntry = NULL; - } + if (temp_fe) { + ocfs_release_file_entry (temp_fe); + temp_fe = NULL; } - ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_OK, false); - ocfs_release_lockres (LockResource); - LOG_TRACE_STR ("Exiting for FLAG_FILE_RELEASE_CACHE"); + ocfs_send_vote_reply (osb, dlm_msg, OCFS_DLM_VOTE_OK, false); + ocfs_release_lockres (lockres); status = 0; goto finally; } - LOG_TRACE_ARGS ("Exiting for FLAG_FILE_RELEASE_CACHE"); } - /* end cache stf */ if (flags & FLAG_FILE_UPDATE_OIN) { - ocfs_inode *oin; - - LOG_TRACE_ARGS - ("Got update oin from node (%u) for Lock 0x%08x.0x%08x, " - "Seq %u.%u\n", DlmMesg->src_node, - HI (DlmReqMaster->lock_id), LO (DlmReqMaster->lock_id), - HI (DlmReqMaster->lock_seq_num), - LO (DlmReqMaster->lock_seq_num)); - - /* Set the verify oin flag on the oin....??? */ - /* Assumption is that we have Lock resource or oin lock */ - - if (LockResource->oin != NULL) { - oin = LockResource->oin; + if (lockres->oin != NULL) { + oin = lockres->oin; ocfs_down_sem (&(oin->main_res), true); acq_oin = true; - - /* Get the main resource too */ - UPDATE_OIN (LockResource->oin); - + UPDATE_OIN (lockres->oin); if (acq_oin) { ocfs_up_sem (&(oin->main_res)); acq_oin = false; } } - ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_OK, false); - ocfs_release_lockres (LockResource); + ocfs_send_vote_reply (osb, dlm_msg, OCFS_DLM_VOTE_OK, false); + ocfs_release_lockres (lockres); goto finally; } /* If there is a masternode and it is alive ask the node */ /* asking for vote to update its state */ - if (LockResource->master_node_num != OCFS_INVALID_NODE_NUM) { - if (LockResource->master_node_num == osb->node_num) { + if (lockres->master_node_num != OCFS_INVALID_NODE_NUM) { + if (lockres->master_node_num == osb->node_num) { if (flags & FLAG_CHANGE_MASTER) { - ub8 tmp = DlmReqMaster->lock_id; + ub8 tmp = req_master->lock_id; ocfs_commit_cache (osb, true); - status = - ocfs_get_file_entry (osb, &FileEntry, - DlmReqMaster->lock_id); + status = ocfs_get_file_entry (osb, &fe, + req_master->lock_id); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - LOG_TRACE_ARGS - ("Got change Master from Node (%u) for " - "Lock 0x%08x.0x%08x, Seq %u.%u\n", - NodeAskingVote, HI (DlmReqMaster->lock_id), - LO (DlmReqMaster->lock_id), - HI (DlmReqMaster->lock_seq_num), - LO (DlmReqMaster->lock_seq_num)); - - if (LockResource->oin) { - DISK_LOCK_OIN_MAP (FileEntry) |= - (1 << osb->node_num); + if (lockres->oin) { + DISK_LOCK_OIN_MAP (fe) |= (1 << osb->node_num); } - DISK_LOCK_CURRENT_MASTER (FileEntry) = - NodeAskingVote; + DISK_LOCK_CURRENT_MASTER (fe) = node_num; /* Write new master on the disk */ - status = - ocfs_write_disk (osb, FileEntry, - osb->sect_size, tmp); + status = ocfs_write_disk (osb, fe, osb->sect_size, tmp); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - LockResource->master_node_num = NodeAskingVote; - ocfs_release_lockres (LockResource); - ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_OK, - false); + lockres->master_node_num = node_num; + ocfs_release_lockres (lockres); + ocfs_send_vote_reply (osb, dlm_msg, OCFS_DLM_VOTE_OK, false); goto finally; } else if (flags & FLAG_ADD_OIN_MAP) { - ub8 tmp = DlmReqMaster->lock_id; + ub8 tmp = req_master->lock_id; - status = - ocfs_get_file_entry (osb, &FileEntry, - DlmReqMaster->lock_id); + status = ocfs_get_file_entry (osb, &fe, req_master->lock_id); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } - LOG_TRACE_ARGS - ("Got add oin map from Node (%x) for " - "Lock 0x%08x.0x%08x, Seq %u.%u\n", - NodeAskingVote, HI (DlmReqMaster->lock_id), - LO (DlmReqMaster->lock_id), - HI (DlmReqMaster->lock_seq_num), - LO (DlmReqMaster->lock_seq_num)); - - if ((FileEntry-> - sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) - || - (!(FileEntry-> - sync_flags & OCFS_SYNC_FLAG_VALID))) { - ocfs_release_lockres (LockResource); - ocfs_send_vote_reply (osb, DlmMesg, - OCFS_DLM_VOTE_FILE_DEL, - false); + if ((fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED) || + (!(fe-> sync_flags & OCFS_SYNC_FLAG_VALID))) { + ocfs_release_lockres (lockres); + ocfs_send_vote_reply (osb, dlm_msg, + OCFS_DLM_VOTE_FILE_DEL, false); goto finally; } else { - DISK_LOCK_OIN_MAP (FileEntry) |= - (1 << NodeAskingVote); + DISK_LOCK_OIN_MAP (fe) |= (1 << node_num); /* Write new map on the disk */ - status = - ocfs_write_disk (osb, FileEntry, - osb->sect_size, tmp); + status = ocfs_write_disk (osb, fe, osb->sect_size, tmp); if (status < 0) { LOG_ERROR_STATUS (status); goto finally; } /* Add this node to the oin map on the file entry */ - LockResource->oin_openmap = - DISK_LOCK_OIN_MAP (FileEntry); - ocfs_release_lockres (LockResource); - ocfs_send_vote_reply (osb, DlmMesg, - OCFS_DLM_VOTE_OK, false); + lockres->oin_openmap = DISK_LOCK_OIN_MAP (fe); + ocfs_release_lockres (lockres); + ocfs_send_vote_reply (osb, dlm_msg, + OCFS_DLM_VOTE_OK, false); goto finally; } } } else { - if (IS_NODE_ALIVE - (osb->publ_map, LockResource->master_node_num, - OCFS_MAXIMUM_NODES)) { - LOG_TRACE_ARGS - ("Got Master Req from Node (%u) for " - "Lock 0x%08x.0x%08x, Seq %u.%u, Currmaster is (%u)\n", - NodeAskingVote, HI (DlmReqMaster->lock_id), - LO (DlmReqMaster->lock_id), - HI (DlmReqMaster->lock_seq_num), - LO (DlmReqMaster->lock_seq_num), - LockResource->master_node_num); - + if (IS_NODE_ALIVE (osb->publ_map, lockres->master_node_num, + OCFS_MAXIMUM_NODES)) { /* We have no business voting on this lock */ - ocfs_send_vote_reply (osb, DlmMesg, - OCFS_DLM_VOTE_UPDATE_RETRY, - false); + ocfs_send_vote_reply (osb, dlm_msg, + OCFS_DLM_VOTE_UPDATE_RETRY, false); } else { - bool OpenHandle = false; + oin_exists = false; + + /* Master Node is dead and a vote is needed */ + /* to create a new master */ - /* Master Node is dead and a vote is needed to create a new master */ - LOG_TRACE_ARGS - ("Got Master Req from Node (%u) for Lock " - "0x%08x.0x%08x, Seq %u.%u, Current master is dead\n", - NodeAskingVote, HI (DlmReqMaster->lock_id), - LO (DlmReqMaster->lock_id), - HI (DlmReqMaster->lock_seq_num), - LO (DlmReqMaster->lock_seq_num)); - - if ((LockResource-> - lock_state & LOCK_STATE_IN_VOTING) - && (NodeAskingVote < osb->node_num)) { - /* If our node number is > his we win so send a mesg to */ - /* him to retry */ - ocfs_send_vote_reply (osb, DlmMesg, - OCFS_DLM_VOTE_UPDATE_RETRY, - OpenHandle); + if ((lockres->lock_state & LOCK_STATE_IN_VOTING) && + (node_num < osb->node_num)) { + /* If our node number is > his we win */ + /* so send a mesg to him to retry */ + ocfs_send_vote_reply (osb, dlm_msg, + OCFS_DLM_VOTE_UPDATE_RETRY, oin_exists); goto finally; } else { - if ((!(flags & FLAG_DIR)) - && ((flags & FLAG_FILE_EXTEND) - || (flags & FLAG_FILE_UPDATE))) { - if (LockResource->oin) { - OpenHandle = true; + if ((!(flags & FLAG_DIR)) && + ((flags & FLAG_FILE_EXTEND) || + (flags & FLAG_FILE_UPDATE))) { + if (lockres->oin) { + oin_exists = true; } } - ocfs_send_vote_reply (osb, DlmMesg, - OCFS_DLM_VOTE_UPDATE_RETRY, - OpenHandle); + ocfs_send_vote_reply (osb, dlm_msg, + OCFS_DLM_VOTE_UPDATE_RETRY, oin_exists); } } } } else { - bool OpenHandle = false; + oin_exists = false; - if ((LockResource->lock_state & LOCK_STATE_IN_VOTING) && - (NodeAskingVote < osb->node_num)) { - /* If our node number is > his we win so send a mesg to him to retry */ - ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_UPDATE_RETRY, - OpenHandle); + if ((lockres->lock_state & LOCK_STATE_IN_VOTING) && + (node_num < osb->node_num)) { + /* If our node number is > his we win so send a mesg */ + /* to him to retry */ + ocfs_send_vote_reply (osb, dlm_msg, + OCFS_DLM_VOTE_UPDATE_RETRY, oin_exists); goto finally; } else { /* Vote for the node */ - LOG_TRACE_ARGS - ("Got Master Req from Node (%u) for Lock " - "0x%08x.0x%08x, Seq %u.%u, No current master\n", - NodeAskingVote, HI (DlmReqMaster->lock_id), - LO (DlmReqMaster->lock_id), - HI (DlmReqMaster->lock_seq_num), - LO (DlmReqMaster->lock_seq_num)); - - if ((!(flags & FLAG_DIR)) - && ((flags & FLAG_FILE_EXTEND) - || (flags & FLAG_FILE_UPDATE))) { - if (LockResource->oin) { - OpenHandle = true; + if ((!(flags & FLAG_DIR)) && + ((flags & FLAG_FILE_EXTEND) || (flags & FLAG_FILE_UPDATE))) { + if (lockres->oin) { + oin_exists = true; } } - LOG_TRACE_ARGS - ("Voting for Node (%u) for Lock 0x%08x.0x%08x, " - "Seq %u.%u, Vote %d, Openhandle %d\n", - NodeAskingVote, HI (DlmReqMaster->lock_id), - LO (DlmReqMaster->lock_id), - HI (DlmReqMaster->lock_seq_num), - LO (DlmReqMaster->lock_seq_num), OCFS_DLM_VOTE_OK, - OpenHandle); - - ocfs_send_vote_reply (osb, DlmMesg, OCFS_DLM_VOTE_OK, - OpenHandle); + ocfs_send_vote_reply (osb, dlm_msg, OCFS_DLM_VOTE_OK, + oin_exists); goto finally; } } finally: - if (FileEntry) - ocfs_release_file_entry (FileEntry); + if (fe) + ocfs_release_file_entry (fe); - if (LockResource) - ocfs_release_lockres (LockResource); - - LOG_TRACE_ARGS - ("Exited from Node (%u) for Lock 0x%08x.0x%08x, Seq %u.%u\n", - NodeAskingVote, HI (DlmReqMaster->lock_id), - LO (DlmReqMaster->lock_id), HI (DlmReqMaster->lock_seq_num), - LO (DlmReqMaster->lock_seq_num)); + if (lockres) + ocfs_release_lockres (lockres); LOG_EXIT_STATUS (status); return status; @@ -758,38 +632,40 @@ int ocfs_comm_process_vote_reply (ocfs_s /* Search for the resource on the recv list and */ /* based on the vote do appropriate work */ - status = ocfs_find_lockres (osb, reply->lock_id, &lockres); - if (status < 0) - return (status); + status = ocfs_find_lockres (osb, reply->h.lock_id, &lockres); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } if ((lockres->lock_state & LOCK_STATE_IN_VOTING) && - (lockres->last_upd_seq_num == reply->lock_seq_num)) { + (lockres->last_upd_seq_num == reply->h.lock_seq_num)) { if (reply->status == OCFS_DLM_VOTE_OK) { /* Accumalate all the votes... */ lockres->vote_status = OCFS_DLM_VOTE_OK; lockres->got_vote_map |= (1 << dlm_msg->src_node); - LOG_TRACE_ARGS ("OCFS_DLM_VOTE_OK Src Node %u\n", - dlm_msg->src_node); + LOG_TRACE_ARGS ("OCFS_DLM_VOTE_OK Src Node %u\n", dlm_msg->src_node); if (lockres->got_vote_map == lockres->req_vote_map) { LOG_TRACE_ARGS ("OCFS_DLM_VOTE_OK Vote from " \ - "all Node (%u.%u)\n", - lockres->got_vote_map); + "all Node (%u.%u)\n", HI(lockres->got_vote_map), + LO(lockres->got_vote_map)); lockres->vote_status = 0; - wake_up ((wait_queue_head_t *) lockres->voted_event); + atomic_set (&lockres->voted_event_woken, 1); + wake_up (&lockres->voted_event); } } else { - LOG_TRACE_ARGS ("OCFS_DLM_VOTE_NOT OK status (0x%08x\n", - reply->status); - /* switch on status and determine the action to take */ + LOG_TRACE_ARGS ("OCFS_DLM_VOTE_NOT OK status %d\n", reply->status); lockres->vote_status = reply->status; - wake_up ((wait_queue_head_t *) lockres->voted_event); + atomic_set (&lockres->voted_event_woken, 1); + wake_up (&lockres->voted_event); } } else { - OCFS_BREAKPOINT (); + LOG_ERROR_STR ("lock seq num did not match. Ignoring."); } + bail: LOG_EXIT_STATUS (status); return status; } /* ocfs_comm_process_vote_reply */ @@ -798,42 +674,24 @@ int ocfs_comm_process_vote_reply (ocfs_s * ocfs_dlm_recv_msg() * */ -void ocfs_dlm_recv_msg (void *Arg) +void ocfs_dlm_recv_msg (void *val) { - ocfs_recv_context *recv_ctxt; - ub1 *DlmPacket; - ub4 freeIndex; - sb4 numPosted; - ocfs_recv_comp_context *recv_comp_ctxt = (ocfs_recv_comp_context *) Arg; + ocfs_recv_ctxt *recv_ctxt; + ub1 *dlm_packet; LOG_ENTRY (); - atomic_dec (&(recv_comp_ctxt->recv_ctxt->num_posted)); - numPosted = atomic_read (&(recv_comp_ctxt->recv_ctxt->num_posted)); - - if (numPosted < OCFS_LOW_MARK_UDP) - wake_up (recv_comp_ctxt->recv_ctxt->event); - - DlmPacket = (ub1 *) - recv_comp_ctxt->recv_ctxt->recv_packet[recv_comp_ctxt->index]; + recv_ctxt = (ocfs_recv_ctxt *) val; + dlm_packet = (ub1 *) recv_ctxt->msg; - if (recv_comp_ctxt->status >= 0) { - if (ocfs_check_ipc_msg (DlmPacket, recv_comp_ctxt->recvd_len)) - ocfs_comm_process_msg (DlmPacket); + if (recv_ctxt->status >= 0) { + if (ocfs_check_ipc_msg (dlm_packet, recv_ctxt->msg_len)) + ocfs_comm_process_msg (dlm_packet); else - LOG_ERROR_STR ("Received an Invalid Packet"); + LOG_ERROR_STR ("Received an invalid packet"); } - recv_ctxt = recv_comp_ctxt->recv_ctxt; - freeIndex = recv_comp_ctxt->index; - - ocfs_down_sem (recv_ctxt->free_lock, true); - recv_ctxt->free[freeIndex] = true; - recv_ctxt->num_used--; - ocfs_up_sem (recv_ctxt->free_lock); - - ocfs_safefree (recv_comp_ctxt->work_item); - ocfs_safefree (recv_comp_ctxt); + ocfs_safefree (recv_ctxt); LOG_EXIT (); return; @@ -843,41 +701,42 @@ void ocfs_dlm_recv_msg (void *Arg) * ocfs_comm_process_msg() * */ -int ocfs_comm_process_msg (ub1 * Mesg) +int ocfs_comm_process_msg (ub1 * msg) { int status = 0; ocfs_super *osb; - ocfs_dlm_msg *DlmMesg; + ocfs_dlm_msg *dlm_msg; + ub8 nodemap; LOG_ENTRY (); - DlmMesg = (ocfs_dlm_msg *) Mesg; + dlm_msg = (ocfs_dlm_msg *) msg; - ocfs_find_osb (DlmMesg->vol_id, &osb); - - if (osb == NULL) + ocfs_find_osb (dlm_msg->vol_id, &osb); + if (osb == NULL) { + LOG_ERROR_STR("IPC message had invalid volume id"); goto bail; + } - /* Record the fact that we received a message from a particular node */ - /* so that we know which node is sending on which comm channel... */ + nodemap = (1 << dlm_msg->src_node); + if (!(osb->publ_map & nodemap)) { + LOG_ERROR_STR("IPC message from dead node"); + goto bail; + } - switch (DlmMesg->msg_type) { + switch (dlm_msg->msg_type) { case OCFS_REQUEST_MAKE_MASTER: - LOG_TRACE_STR ("Called OCFS_REQUEST_MAKE_MASTER"); - ocfs_comm_process_vote (osb, DlmMesg); + ocfs_comm_process_vote (osb, dlm_msg); break; case OCFS_DISK_VOTE_REQUEST: - LOG_TRACE_STR ("Called OCFS_DISK_VOTE_REQUEST"); - ocfs_comm_process_vote (osb, DlmMesg); + ocfs_comm_process_vote (osb, dlm_msg); break; case OCFS_REPLY_MAKE_MASTER: - LOG_TRACE_STR ("Called OCFS_REPLY_MAKE_MASTER"); - ocfs_comm_process_vote_reply (osb, DlmMesg); + ocfs_comm_process_vote_reply (osb, dlm_msg); break; case OCFS_DISK_VOTE_REPLY: - LOG_TRACE_STR ("Called OCFS_DISK_VOTE_REPLY"); - ocfs_comm_process_vote_reply (osb, DlmMesg); + ocfs_comm_process_vote_reply (osb, dlm_msg); break; default: diff -urNp x-ref/fs/ocfs/Common/ocfsheartbeat.c x/fs/ocfs/Common/ocfsheartbeat.c --- x-ref/fs/ocfs/Common/ocfsheartbeat.c Thu Jan 1 01:00:00 1970 +++ x/fs/ocfs/Common/ocfsheartbeat.c Mon Oct 21 04:41:19 2002 @@ -0,0 +1,200 @@ +/* + * ocfsheartbeat.c + * + * Keeps track of alive nodes in the cluster. + * + * Copyright (C) 2002 Oracle Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have recieved a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, + * Manish Singh, Wim Coekaerts + */ + +#if !defined(USERSPACE_TOOL) +#include +#endif + +#if defined(FORMAT_UTIL) +#include +#endif + +/* Tracing */ +#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_HEARTBEAT + +#if !defined(USERSPACE_TOOL) +/* + * ocfs_nm_heart_beat() + * + * @osb: ocfs super block for the volume + * @flag: type of heart beat + * @read_publish: if the publish sector needs to be re-read + * + * Updates the timestamp in the nodes publish sector. + * + * Returns 0 if success, < 0 if error. + */ +int ocfs_nm_heart_beat (ocfs_super * osb, ub4 flag, bool read_publish) +{ + ocfs_publish *publish = NULL; + int status = 0; + ub8 node_publ_off = 0; + + LOG_ENTRY_ARGS ("(0x%08x, %u, %s)\n", osb, flag, + read_publish ? "true" : "false"); + + if (flag & HEARTBEAT_METHOD_DISK) { + node_publ_off = osb->vol_layout.publ_sect_off + + (osb->node_num * osb->sect_size); + + if (read_publish) + { + status = ocfs_read_force_disk_ex (osb, (void **)&publish, + osb->sect_size, osb->sect_size, node_publ_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + } + else + { + publish = (ocfs_publish *) + (osb->cfg_prealloc + + ((4 + osb->node_num) * osb->sect_size)); + } + + OcfsQuerySystemTime (&publish->time); + + publish->hbm[osb->node_num] = osb->hbm; + + /* Write the current time in local node's publish sector */ + status = ocfs_write_force_disk (osb, publish, osb->sect_size, + node_publ_off); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto finally; + } + LOG_TRACE_ARGS ("Updating timestamp for node (%u)\n", + osb->node_num); + } + + if (flag & HEARTBEAT_METHOD_IPC) { + /* Plug this in later... */ + } + + finally: + if (read_publish) + ocfs_safefree (publish); + + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_nm_heart_beat */ +#endif /* !defined(USERPSPACE_TOOL) */ + + +/* + * ocfs_update_publish_map() + * + * @osb: ocfs super block for the volume + * @buffer: publish sectors read in the last round + * @first_time: if true, the buffer needs to be initialized + * + * Reads the publish sectors and compares the timestamp of each node + * to the one it read in the last round. As long as the timestamp keeps + * changing, the node is marked alive. Conversely, if the timestamp does + * not change over time, the node is marked dead. The function marks all + * the live nodes in the publishmap. + * + */ +void ocfs_update_publish_map (ocfs_super * osb, void *buffer, bool first_time) +{ + ocfs_publish *publish; + ocfs_vol_node_map *node_map; + ub8 curr_time = 0; + ub4 i; + ub4 num_nodes; + ub1 *p; + + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u)\n", osb, buffer, first_time); + + num_nodes = OCFS_MAXIMUM_NODES; + node_map = &(osb->vol_node_map); +#if !defined(USERSPACE_TOOL) + OcfsQuerySystemTime (&curr_time); +#endif + + /* First time thru, update buffer with timestamps for all nodes */ + if (first_time) { + for (i = 0, p = (ub1 *) buffer; i < num_nodes; + i++, p += osb->sect_size) { + publish = (ocfs_publish *) p; + node_map->time[i] = publish->time; + node_map->scan_rate[i] = publish->hbm[i]; + node_map->scan_time[i] = curr_time; + } + goto bail; /* exit */ + } + + for (i = 0, p = (ub1 *) buffer; i < num_nodes; + i++, p += osb->sect_size) { + publish = (ocfs_publish *) p; + + /* Loop if slot is unused */ + if (publish->time == (ub8) 0) + continue; + + /* Check if the node is hung or not by comparing the disk */ + /* and memory timestamp values */ + if (node_map->time[i] == publish->time) { + if (IS_NODE_ALIVE(osb->publ_map, i, num_nodes)) { + (node_map->miss_cnt[i])++; + if (node_map->miss_cnt[i] > MISS_COUNT_VALUE) + UPDATE_PUBLISH_MAP (osb->publ_map, i, + OCFS_PUBLISH_CLEAR, + num_nodes); + } + } else { + node_map->miss_cnt[i] = 0; + node_map->time[i] = publish->time; + UPDATE_PUBLISH_MAP (osb->publ_map, i, OCFS_PUBLISH_SET, + num_nodes); + +#if !defined(USERSPACE_TOOL) + /* Update the multiple the other node wants us to beat */ + if ((publish->hbm[osb->node_num] != DISK_HBEAT_INVALID) + && (osb->hbm > publish->hbm[osb->node_num])) { + /* Go to the lowest multiplier any of the nodes */ + /* alive want us to heartbeat with. */ + osb->hbm = publish->hbm[osb->node_num]; + + if (osb->hbm == 0) + osb->hbm = DISK_HBEAT_NO_COMM; + + if (OcfsGlobalCtxt.hbm > osb->hbm) + OcfsGlobalCtxt.hbm = osb->hbm; + + if (OcfsGlobalCtxt.hbm == 0) + OcfsGlobalCtxt.hbm = DISK_HBEAT_NO_COMM; + } +#endif + } + node_map->scan_time[i] = curr_time; + } + + bail: + LOG_EXIT (); + return; +} /* ocfs_update_publish_map */ diff -urNp x-ref/fs/ocfs/Common/ocfsver.c x/fs/ocfs/Common/ocfsver.c --- x-ref/fs/ocfs/Common/ocfsver.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Common/ocfsver.c Thu Jan 1 01:00:00 1970 @@ -1,11 +0,0 @@ -#include - -#include - -#define OCFS_VERSION_STR "2" -#define OCFS_BUILD_STR "14225afeef205e279b9191562d9df555" - -void ocfs_version_print (void) -{ - printk(KERN_INFO "Oracle Cluster FileSystem 2 (build 14225afeef205e279b9191562d9df555)"); -} diff -urNp x-ref/fs/ocfs/Cscope.make x/fs/ocfs/Cscope.make --- x-ref/fs/ocfs/Cscope.make Thu Jan 1 01:00:00 1970 +++ x/fs/ocfs/Cscope.make Mon Oct 21 04:41:19 2002 @@ -0,0 +1,10 @@ +cscope: + rm -f cscope.* + echo "-k" >> cscope.files + echo "-I Common/inc" >> cscope.files + echo "-I Linux/inc" >> cscope.files + find Common -name '*.c' -print >>cscope.files + find Linux -name '*.c' -print >>cscope.files + find Common -name '*.h' -print >>cscope.files + find Linux -name '*.h' -print >>cscope.files + cscope -b diff -urNp x-ref/fs/ocfs/Linux/inc/ocfsconf.h x/fs/ocfs/Linux/inc/ocfsconf.h --- x-ref/fs/ocfs/Linux/inc/ocfsconf.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/inc/ocfsconf.h Thu Jan 1 01:00:00 1970 @@ -1,95 +0,0 @@ -/* - * ocfsconf.h - * - * Function prototypes for related 'C' file. - * - * Copyright (C) 2002 Oracle Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have recieved a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, - * Manish Singh, Wim Coekaerts - */ - -/* Format of the ocfs.conf file: - * - * # This is a comment - * - * ipcdlm: - * ip_address = - * ip_port = - * subnet_mask = - * type = udp - * hostname = - * active = yes - * - * ipcdlm: - * ip_address = - * ip_port = - * subnet_mask = - * type = udp - * hostname = - * active = no - * - * ipcdlm: - * ip_address = - * ip_port = - * subnet_mask = - * type = udp - * hostname = - * active = no - */ - -#ifndef _OCFSCONF_H_ -#define _OCFSCONF_H_ - -typedef enum _ocfs_conf_toktypes -{ - T_UNKNOWN, T_COMMENT, T_PARAMETER, T_COLON, T_EQUAL -} -ocfs_conf_toktypes; - -typedef enum _ocfs_conf_tokstates -{ - S_BEG, S_SECTION, S_IGNORE -} -ocfs_conf_tokstates; - -typedef struct _ocfs_conf_process -{ - char *section; - void *(*sectionfn) (void *sect, char *lhv, int lhvlen, char *rhv, - int rhvlen); -} -ocfs_conf_process; - -ocfs_conf_toktypes ocfs_get_next_token (char *buf, - int eol, - char **tokstr, - int *toklen, - char **newbuf); - -int ocfs_read_conf (krnl_file * fp); - -void *ocfs_conf_ipc_dlm (void *sect, - char *lhv, - int lhvlen, - char *rhv, - int rhvlen); - -int ocfs_read_node_info (void); - -#endif /* _OCFSCONF_H_ */ diff -urNp x-ref/fs/ocfs/Linux/inc/ocfsdlmp.h x/fs/ocfs/Linux/inc/ocfsdlmp.h --- x-ref/fs/ocfs/Linux/inc/ocfsdlmp.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/inc/ocfsdlmp.h Mon Oct 21 04:41:19 2002 @@ -27,7 +27,8 @@ #ifndef _OCFSDLMP_H_ #define _OCFSDLMP_H_ -int ocfs_insert_sector_node (ocfs_super * osb, ocfs_lock_res * lock_res); +int ocfs_insert_sector_node (ocfs_super * osb, ocfs_lock_res * lock_res, + ocfs_lock_res ** found_lockres); int ocfs_lookup_sector_node (ocfs_super * osb, ub8 lock_id, ocfs_lock_res ** lock_res); diff -urNp x-ref/fs/ocfs/Linux/inc/ocfsfilp.h x/fs/ocfs/Linux/inc/ocfsfilp.h --- x-ref/fs/ocfs/Linux/inc/ocfsfilp.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/inc/ocfsfilp.h Thu Jan 1 01:00:00 1970 @@ -1,49 +0,0 @@ -/* - * ocfsfilp.h - * - * Function prototypes for related 'C' file. - * - * Copyright (C) 2002 Oracle Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have recieved a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, - * Manish Singh, Wim Coekaerts - */ - -#ifndef _OCFSFILP_H_ -#define _OCFSFILP_H_ - -#define BUFFER_ALIGN 512 - -typedef struct _krnl_file -{ - struct file *filp; /* file ptr */ - char *buf; /* read buffer */ - int bufsz; /* size of buffer */ - int readsz; /* size read */ - int loc; /* processed index */ - int eof; /* 1 when EOF, else 0 */ -} -krnl_file; - -krnl_file *ocfs_open_file (const char *pathname, int flags, mode_t mode); - -void ocfs_close_file (krnl_file * fp); - -int ocfs_read_file (krnl_file * fp, char *s, int size); - -#endif /* _OCFSFILP_H_ */ diff -urNp x-ref/fs/ocfs/Linux/inc/ocfshash.h x/fs/ocfs/Linux/inc/ocfshash.h --- x-ref/fs/ocfs/Linux/inc/ocfshash.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/inc/ocfshash.h Mon Oct 21 04:41:19 2002 @@ -58,7 +58,8 @@ int ocfs_hash_create (HASHTABLE * ht, ub void ocfs_hash_destroy (HASHTABLE * ht, void (*freefn) (const void *p)); -int ocfs_hash_add (HASHTABLE * ht, void *key, ub4 keylen, void *val, ub4 vallen); +int ocfs_hash_add (HASHTABLE * ht, void *key, ub4 keylen, void *val, ub4 vallen, + void **found, ub4 *foundlen); int ocfs_hash_del (HASHTABLE * ht, void *key, ub4 keylen); @@ -130,6 +131,6 @@ void ocfs_hash_stat (HASHTABLE * ht, cha * acceptable. Do NOT use for cryptographic purposes. * -------------------------------------------------------------------- * */ -ub4 ocfs_hash (ub1 * k, ub4 length, ub4 initval); +ub4 hash (ub1 * k, ub4 length, ub4 initval); #endif /* _OCFSHASH_H_ */ diff -urNp x-ref/fs/ocfs/Linux/inc/ocfsiosup.h x/fs/ocfs/Linux/inc/ocfsiosup.h --- x-ref/fs/ocfs/Linux/inc/ocfsiosup.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/inc/ocfsiosup.h Mon Oct 21 04:41:19 2002 @@ -31,25 +31,32 @@ int ocfs_write_sector (ocfs_super * osb, int ocfs_read_sector (ocfs_super * osb, void *Buffer, ub8 Offset); -int LinuxWriteForceDisk (ocfs_super * osb, - void *Buffer, ub4 Length, ub8 Offset, bool Cached); +int LinuxWriteForceDisk (ocfs_super * osb, void *Buffer, ub4 Length, + ub8 Offset, bool Cached); -int LinuxReadForceDisk (ocfs_super * osb, - void *Buffer, ub4 Length, ub8 Offset, bool Cached); +int LinuxReadForceDisk (ocfs_super * osb, void *Buffer, ub4 Length, + ub8 Offset, bool Cached); -int ocfs_write_metadata (ocfs_super * osb, void *Buffer, ub4 Length, ub8 Offset); +int ocfs_write_metadata (ocfs_super * osb, void *Buffer, ub4 Length, + ub8 Offset); -int ocfs_read_metadata (ocfs_super * osb, void *Buffer, ub4 Length, ub8 Offset); +int ocfs_read_metadata (ocfs_super * osb, void *Buffer, ub4 Length, + ub8 Offset); -int ocfs_write_force_disk (ocfs_super * osb, void *Buffer, ub4 Length, ub8 Offset); +int ocfs_write_force_disk (ocfs_super * osb, void *Buffer, ub4 Length, + ub8 Offset); int ocfs_write_disk (ocfs_super * osb, void *Buffer, ub4 Length, ub8 Offset); -int ocfs_read_force_disk (ocfs_super * osb, void *Buffer, ub4 Length, ub8 Offset); +int ocfs_read_force_disk (ocfs_super * osb, void *Buffer, ub4 Length, + ub8 Offset); + +int ocfs_read_force_disk_ex (ocfs_super * osb, void **Buffer, ub4 AllocLen, + ub4 ReadLen, ub8 Offset); int ocfs_read_disk (ocfs_super * osb, void *Buffer, ub4 Length, ub8 Offset); -int ocfs_read_disk_ex (ocfs_super * osb, - void **Buffer, ub4 AllocLen, ub4 ReadLen, ub8 Offset); +int ocfs_read_disk_ex (ocfs_super * osb, void **Buffer, ub4 AllocLen, + ub4 ReadLen, ub8 Offset); #endif /* _OCFSIOSUP_H_ */ diff -urNp x-ref/fs/ocfs/Linux/inc/ocfsipc.h x/fs/ocfs/Linux/inc/ocfsipc.h --- x-ref/fs/ocfs/Linux/inc/ocfsipc.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/inc/ocfsipc.h Mon Oct 21 04:41:19 2002 @@ -31,36 +31,28 @@ int ocfs_cleanup_ipc (void); /* unuse int ocfs_init_ipc (void); /* empty */ -int ocfs_recv_thread (void *Context); +int ocfs_recv_thread (void *unused); int ocfs_init_udp (void); -int ocfs_init_ipc_dlm (ocfs_ipc_dlm_config * IpcDlmConfig, ocfs_protocol Protocol); +int ocfs_init_ipc_dlm (ocfs_protocol protocol); -int ocfs_send_udp_msg (ocfs_ipc_config_info * SendIpcCfgInfo, - ocfs_ipc_config_info * LocalIpcCfgInfo, - void *Mesg, ub4 Length, wait_queue_head_t * Event); +int ocfs_send_udp_msg (ocfs_ipc_config_info * send, void *msg, ub4 msglen, + wait_queue_head_t * event); -int ocfs_recv_completion (ocfs_recv_comp_context * RecvCompContext); +int ocfs_recv_udp_msg (ocfs_recv_ctxt * recv_ctxt); -int ocfs_recv_udp_msg (ub4 myCommIndex, - void *Mesg, - ub4 * Length, - wait_queue_head_t * Event, - ocfs_recv_comp_context * RecvCompContext); +int ocfs_send_bcast (ocfs_super * osb, ub8 votemap, ocfs_dlm_msg * dlm_msg); -int ocfs_send_bcast (ocfs_super * osb, ub8 VoteMap, ocfs_dlm_msg * DlmMesg); +void ocfs_dlm_send_msg (ocfs_super * osb, ocfs_ipc_config_info * send, + ocfs_dlm_msg * dlm_msg); -void ocfs_dlm_send_msg (ocfs_super * osb, - ocfs_ipc_config_info * IpcCfgInfo, ocfs_dlm_msg * DlmMesg); +int ocfs_init_udp_sock (struct socket **send_sock, struct socket **recv_sock); -int ocfs_init_udp_sock (int commid, - struct socket **send_sock, struct socket **recv_sock); +int ocfs_send_to (struct socket *sock, struct sockaddr *addr, + int addrlen, char *buf, int buflen); -int ocfs_send_to (struct socket *sock, - struct sockaddr *addr, int addrlen, char *buf, int buflen); - -int ocfs_recv_from (struct socket *sock, - struct sockaddr *addr, int *addrlen, char *buf, int *buflen); +int ocfs_recv_from (struct socket *sock, struct sockaddr *addr, + int *addrlen, char *buf, int *buflen); #endif /* _OCFSIPC_H_ */ diff -urNp x-ref/fs/ocfs/Linux/inc/ocfsmain.h x/fs/ocfs/Linux/inc/ocfsmain.h --- x-ref/fs/ocfs/Linux/inc/ocfsmain.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/inc/ocfsmain.h Mon Oct 21 04:41:19 2002 @@ -53,12 +53,14 @@ ocfs_find_inode_args; #ifdef OCFSMAIN_PRIVATE_DECLS static int ocfs_parse_options (char *options, uid_t * uid, gid_t * gid, - bool * cache); + bool * cache, bool * reclaim_id); static struct super_block *ocfs_read_super (struct super_block *sb, void *data, int silent); static int __init ocfs_driver_entry (void); static void __exit ocfs_driver_exit (void); +int ocfs_read_params(void); + #ifdef OCFS_LINUX_MEM_DEBUG static void ocfs_memcheck (void); #endif diff -urNp x-ref/fs/ocfs/Linux/inc/ocfsmount.h x/fs/ocfs/Linux/inc/ocfsmount.h --- x-ref/fs/ocfs/Linux/inc/ocfsmount.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/inc/ocfsmount.h Mon Oct 21 04:41:19 2002 @@ -29,7 +29,7 @@ int ocfs_read_disk_header (ub1 ** buffer, struct super_block *sb); -int ocfs_mount_volume (struct super_block *sb); +int ocfs_mount_volume (struct super_block *sb, bool reclaim_id); int ocfs_dismount_volume (struct super_block *sb); diff -urNp x-ref/fs/ocfs/Linux/inc/ocfsport.h x/fs/ocfs/Linux/inc/ocfsport.h --- x-ref/fs/ocfs/Linux/inc/ocfsport.h Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/inc/ocfsport.h Mon Oct 21 04:41:19 2002 @@ -27,6 +27,104 @@ #ifndef _OCFSPORT_H_ #define _OCFSPORT_H_ +/* timeout structure taken from Ben's aio.c */ + +typedef struct _ocfs_timeout { + struct timer_list timer; + int timed_out; + wait_queue_head_t wait; +} +ocfs_timeout; + +static void ocfs_timeout_func(unsigned long data) +{ + ocfs_timeout *to = (ocfs_timeout *)data; + + to->timed_out = 1; + wake_up(&to->wait); +} + +static inline void ocfs_init_timeout(ocfs_timeout *to) +{ + init_timer(&to->timer); + to->timer.data = (unsigned long)to; + to->timer.function = ocfs_timeout_func; + to->timed_out = 0; + init_waitqueue_head(&to->wait); +} + +static inline void ocfs_set_timeout(ocfs_timeout *to, ub4 timeout) +{ + ub4 how_long; + + if (!timeout) { + to->timed_out = 1; + return ; + } + + how_long = (timeout * HZ / 1000); + how_long = (how_long < 1) ? 1 : how_long; + + to->timer.expires = jiffies + how_long; + add_timer(&to->timer); +} + +static inline void ocfs_clear_timeout(ocfs_timeout *to) +{ + del_timer_sync(&to->timer); +} + +#define __ocfs_wait(wq, condition, timeo, ret) \ +do { \ + ocfs_timeout __to; \ + \ + DECLARE_WAITQUEUE(__wait, current); \ + DECLARE_WAITQUEUE(__to_wait, current); \ + \ + ocfs_init_timeout(&__to); \ + \ + if (timeo) { \ + ocfs_set_timeout(&__to, timeo); \ + if (__to.timed_out) { \ + ocfs_clear_timeout(&__to); \ + } \ + } \ + \ + add_wait_queue(&wq, &__wait); \ + add_wait_queue(&__to.wait, &__to_wait); \ + do { \ + ret = 0; \ + set_task_state(current, TASK_INTERRUPTIBLE); \ + if (condition) \ + break; \ + ret = -ETIMEDOUT; \ + if (__to.timed_out) \ + break; \ + schedule(); \ + if (signal_pending(current)) { \ + ret = -EINTR; \ + break; \ + } \ + } while (1); \ + \ + set_task_state(current, TASK_RUNNING); \ + remove_wait_queue(&wq, &__wait); \ + remove_wait_queue(&__to.wait, &__to_wait); \ + \ + if (timeo) \ + ocfs_clear_timeout(&__to); \ + \ +} while(0) \ + +#define ocfs_wait(wq, condition, timeout) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __ocfs_wait(wq, condition, timeout, __ret); \ + __ret; \ +}) + + void ocfs_init_sem (ocfs_sem * res); bool ocfs_down_sem (ocfs_sem * res, bool wait); @@ -35,9 +133,7 @@ void ocfs_up_sem (ocfs_sem * res); int ocfs_del_sem (ocfs_sem * res); -int ocfs_wait (void *Object, bool Alertable, ub4 Timeout); - -void ocfs_daemonize (char *name); +void ocfs_daemonize (char *name, int len); bool ocfs_get_task (pid_t pid, struct task_struct **task); @@ -51,33 +147,35 @@ void ocfs_extent_map_destroy (ocfs_exten ub4 ocfs_extent_map_get_count (ocfs_extent_map * map); -bool ocfs_extent_map_add (ocfs_extent_map * map, - sb8 virtual, sb8 physical, sb8 sectorcount); +bool ocfs_extent_map_add (ocfs_extent_map * map, sb8 virtual, + sb8 physical, sb8 sectorcount); -void ocfs_extent_map_remove (ocfs_extent_map * map, sb8 virtual, sb8 sectorcount); +void ocfs_extent_map_remove (ocfs_extent_map * map, sb8 virtual, + sb8 sectorcount); bool ocfs_extent_map_lookup (ocfs_extent_map * map, sb8 virtual, - sb8 * physical, sb8 * sectorcount, ub4 * index); + sb8 * physical, sb8 * sectorcount, ub4 * index); bool ocfs_extent_map_next_entry (ocfs_extent_map * map, ub4 runindex, - sb8 * virtual, sb8 * physical, sb8 * sectorcount); + sb8 * virtual, sb8 * physical, + sb8 * sectorcount); + +void *ocfs_dbg_slab_alloc (kmem_cache_t *slab, char *file, int line); +void ocfs_dbg_slab_free (kmem_cache_t *slab, void *m); void *ocfs_linux_dbg_alloc (int Size, char *file, int line); void ocfs_linux_dbg_free (const void *Buffer); bool ocfs_linux_get_inode_offset (struct inode *inode, ub8 * off, - ocfs_inode ** oin); + ocfs_inode ** oin); -bool ocfs_linux_get_dir_entry_offset (ocfs_super * osb, - ub8 * off, - ub8 parentOff, - struct qstr * fileName, - ocfs_file_entry ** fileEntry); +bool ocfs_linux_get_dir_entry_offset (ocfs_super * osb, ub8 * off, + ub8 parentOff, struct qstr * fileName, + ocfs_file_entry ** fileEntry); void ocfs_flush_cache (ocfs_super * osb); bool ocfs_purge_cache_section (ocfs_inode * oin, ub8 * file_off, ub4 Length); - #endif /* _OCFSPORT_H_ */ diff -urNp x-ref/fs/ocfs/Linux/ocfsbitmap.c x/fs/ocfs/Linux/ocfsbitmap.c --- x-ref/fs/ocfs/Linux/ocfsbitmap.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/ocfsbitmap.c Mon Oct 21 04:41:19 2002 @@ -26,7 +26,6 @@ #if defined(__KERNEL__) #include -extern struct list_head item_list; #else #include #include @@ -65,7 +64,7 @@ void ocfs_initialize_bitmap (ocfs_alloc_ */ int ocfs_find_clear_bits (ocfs_alloc_bm * bitmap, ub4 numBits, ub4 offset, ub4 sysonly) { - ub4 next_zero, off, count, size, first_zero = -1; + ub4 next_zero, off, count, size, first_zero = -1; void *buffer; LOG_ENTRY (); @@ -76,6 +75,9 @@ int ocfs_find_clear_bits (ocfs_alloc_bm off = offset; while ((size - off + count >= numBits) && (next_zero = find_next_zero_bit (buffer, size, off)) != size) { + if (next_zero >= bitmap->size - sysonly) + break; + if (next_zero != off) { first_zero = next_zero; off = next_zero + 1; @@ -96,6 +98,11 @@ int ocfs_find_clear_bits (ocfs_alloc_bm bail: LOG_EXIT_LONG (first_zero); + if (first_zero != -1 && first_zero > bitmap->size) + { + LOG_ERROR_ARGS("um... first_zero>bitmap->size (%d > %d)\n", first_zero, bitmap->size); + first_zero = -1; + } return first_zero; } /* ocfs_find_clear_bits */ @@ -109,10 +116,10 @@ int ocfs_count_bits (ocfs_alloc_bm * bit unsigned char tmp; ub1 *buffer; - buffer = bitmap->buf; - LOG_ENTRY (); + buffer = bitmap->buf; + size = (bitmap->size >> 3); while (off < size) { diff -urNp x-ref/fs/ocfs/Linux/ocfsconf.c x/fs/ocfs/Linux/ocfsconf.c --- x-ref/fs/ocfs/Linux/ocfsconf.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/ocfsconf.c Thu Jan 1 01:00:00 1970 @@ -1,269 +0,0 @@ -/* - * ocfsconf.c - * - * Reads the config file ocfs.conf. - * - * Copyright (C) 2002 Oracle Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have recieved a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, - * Manish Singh, Wim Coekaerts - */ - -#include - -/* Tracing */ -#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_VOLCFG - -ocfs_conf_process confprocs[] = { {"ipcdlm", ocfs_conf_ipc_dlm}, - {NULL, NULL} }; - -/* - * ocfs_get_next_token() - * - * @buf: string to extract the token from - * @eol: if 1, returns string till eol as one token - * @tokstr: returns ptr to the first char of the token - * @toklen: token length - * @newbuf: returns ptr to the string still to be processed - * - * Returns the next token with the token type. For parameter values, set - * eol to 1, as the value is in free-format. - */ -ocfs_conf_toktypes ocfs_get_next_token (char *buf, int eol, char **tokstr, int *toklen, - char **newbuf) -{ - ocfs_conf_toktypes toktyp; - char *p; - - p = buf; - - while (isspace (*p)) - ++p; - - if (eol) { - *tokstr = p; - *toklen = strlen (p); - toktyp = T_UNKNOWN; - } else { - switch (*p) { - case '#': - *tokstr = p; - *toklen = strlen (p); - toktyp = T_COMMENT; - break; - - case ':': - *tokstr = p; - *toklen = 1; - toktyp = T_COLON; - break; - - case '=': - *tokstr = p; - *toklen = 1; - toktyp = T_EQUAL; - break; - - default: - if (isalnum (*p)) { - *tokstr = p; - while ((isalnum (*p) || *p == '_') && - *p != '\0') - ++p; - *toklen = p - *tokstr; - toktyp = T_PARAMETER; - } else { - *tokstr = p; - *toklen = strlen (p); - toktyp = T_UNKNOWN; - } - break; - } - } - - if (newbuf) - *newbuf = *tokstr + *toklen; - - return toktyp; -} /* ocfs_get_next_token */ - -/* - * ocfs_read_conf() - * - * Reads the ocfs.conf file and passes the various parameter-value pairs - * to the section function. The specific section function processes the - * parameter-value pairs. - * - * Returns 1 on success, 0 on error. - */ -int ocfs_read_conf (krnl_file * fp) -{ - ocfs_conf_tokstates tokstate; - ocfs_conf_toktypes toktype; - char *tokstr; - int toklen; - char buf[1000]; - char tmpbuf[1000]; - int tmplen; - int len; - char *p; - void *sect; - void *(*sectionfn) (void *sect, char *lhv, int lhvlen, char *rhv, - int rhvlen); - - tokstate = S_BEG; - sectionfn = NULL; - sect = NULL; - - while ((len = ocfs_read_file (fp, buf, sizeof (buf))) > 0) { - buf[len - 1] = '\0'; - toktype = ocfs_get_next_token (buf, 0, &tokstr, &toklen, &p); - - switch (toktype) { - case T_COMMENT: - break; - - case T_PARAMETER: - strncpy (tmpbuf, tokstr, toklen), tmpbuf[toklen] = - '\0'; - tmplen = toklen; - - toktype = - ocfs_get_next_token (p, 0, &tokstr, &toklen, &p); - if (toktype == T_COLON) { - int i; - - for (i = 0; confprocs[i].section; ++i) { - if (!strnicmp - (confprocs[i].section, tmpbuf, - tmplen)) { - sectionfn = - confprocs[i].sectionfn; - sect = - sectionfn (NULL, NULL, 0, NULL, 0); - break; - } - } - - if (sectionfn && sect) - tokstate = S_SECTION; - else - tokstate = S_IGNORE; - } else if (toktype == T_EQUAL) { - if (tokstate == S_SECTION) { - ocfs_get_next_token (p, 1, &tokstr, &toklen, &p); - if (toklen) - sectionfn (sect, tmpbuf, tmplen, tokstr, toklen); - } - } - break; - - default: - break; - } - } - - if (len < 0) { - LOG_ERROR_ARGS ("errno=%d\n", len); - return 0; - } else - return 1; -} /* ocfs_read_conf */ - -/* - * ocfs_conf_ipc_dlm() - * - * @sect: NULL = new section, NOTNULL = address of section - * @lhv: parameter name - * @lhvlen: parameter name len - * @rhv: value - * @rhvlen: value len - * - * The conf function handler for the ipcdlm section. - */ -void *ocfs_conf_ipc_dlm (void *sect, char *lhv, int lhvlen, char *rhv, int rhvlen) -{ - ocfs_comm_info *comminfo; - char tmpstr[255]; - - if (sect) { - /* Add the parameter to the section */ - comminfo = (ocfs_comm_info *) sect; - strncpy (tmpstr, rhv, rhvlen); - tmpstr[rhvlen] = '\0'; - - if (!strnicmp (lhv, OCFS_IP_ADDR, lhvlen)) { - comminfo->addr = in_aton (tmpstr); - } else if (!strnicmp (lhv, OCFS_IP_PORT, lhvlen)) { - comminfo->port = simple_strtol (tmpstr, NULL, 10); - } else if (!strnicmp (lhv, OCFS_IP_MASK, lhvlen)) { - comminfo->mask = in_aton (tmpstr); - } else if (!strnicmp (lhv, OCFS_IP_HOST, lhvlen)) { - - } else if (!strnicmp (lhv, OCFS_COMM_TYPE, lhvlen)) { - if (!strnicmp (tmpstr, "udp", rhvlen)) - comminfo->type = 1; /* TODO */ - } else if (!strnicmp (lhv, OCFS_COMM_ACTIVE, lhvlen)) { - if (!strnicmp (tmpstr, "yes", rhvlen)) - comminfo->active = 1; - else - comminfo->active = 0; - comminfo->valid = 1; - } - } else { - /* Create a new section */ - comminfo = &(OcfsGlobalCtxt.comm_info[OcfsGlobalCtxt.num_ipc]); - ++OcfsGlobalCtxt.num_ipc; - memset (comminfo, 0, sizeof (ocfs_comm_info)); - } - - return (void *) comminfo; -} /* ocfs_conf_ipc_dlm */ - -/* - * ocfs_read_node_info() - * - */ -int ocfs_read_node_info (void) -{ - int status = 0; - krnl_file *fp; - - LOG_ENTRY (); - - OcfsGlobalCtxt.num_ipc = 0; - - /* Open the file */ - fp = ocfs_open_file (OCFSCONF_FILE, O_RDONLY, 0600); - if (!fp) { - LOG_ERROR_ARGS ("errno=%d, file=%s\n", errno, OCFSCONF_FILE); - status = -EFAIL; - goto bail; - } - - /* Read the file */ - if (!ocfs_read_conf (fp)) - status = -EFAIL; - - /* Close the file */ - ocfs_close_file (fp); - - bail: - LOG_EXIT_STATUS (status); - return status; -} /* ocfs_read_node_info */ diff -urNp x-ref/fs/ocfs/Linux/ocfsdlm.c x/fs/ocfs/Linux/ocfsdlm.c --- x-ref/fs/ocfs/Linux/ocfsdlm.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/ocfsdlm.c Mon Oct 21 04:41:19 2002 @@ -34,9 +34,11 @@ * ocfs_insert_sector_node() * */ -int ocfs_insert_sector_node (ocfs_super * osb, ocfs_lock_res * lock_res) +int ocfs_insert_sector_node (ocfs_super * osb, ocfs_lock_res * lock_res, + ocfs_lock_res ** found_lock_res) { int status = 0; + ub4 tmp; LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", osb, lock_res); @@ -47,7 +49,9 @@ int ocfs_insert_sector_node (ocfs_super } if (!ocfs_hash_add (&(osb->root_sect_node), &(lock_res->sector_num), - sizeof (ub8), lock_res, sizeof (ocfs_lock_res *))) { + sizeof (ub8), lock_res, sizeof (ocfs_lock_res *), + (void **)found_lock_res, &tmp)) + { LOG_ERROR_STATUS(status = -EFAIL); goto bail; } @@ -90,7 +94,6 @@ int ocfs_lookup_sector_node (ocfs_super return status; } /* ocfs_lookup_sector_node */ -#if defined(DLM_THREAD_PER_VOLUME) /* * ocfs_volume_thread() * @@ -100,25 +103,24 @@ int ocfs_lookup_sector_node (ocfs_super int ocfs_volume_thread (void *arg) { ocfs_super *osb; - char proc[100]; + char proc[16]; int status = 0; LOG_ENTRY (); osb = (ocfs_super *) arg; - sprintf (proc, "ocfsnm - %d", osb->osb_id); - ocfs_daemonize (proc); + sprintf (proc, "ocfsnm-%d", osb->osb_id); + ocfs_daemonize (proc, strlen(proc)); /* The delay changes based on multiplier */ while (!(OcfsGlobalCtxt.flags & OCFS_FLAG_SHUTDOWN_VOL_THREAD) && !(osb->osb_flags & OCFS_OSB_FLAGS_BEING_DISMOUNTED)) { - if (OcfsGlobalCtxt.hbm == 0) { - LOG_ERROR_STR ("OcfsGlobalCtxt Heartbeat was 0"); + + if (OcfsGlobalCtxt.hbm == 0) OcfsGlobalCtxt.hbm = DISK_HBEAT_NO_COMM; - } - ocfs_sleep (500); + ocfs_sleep (OCFS_NM_HEARTBEAT_TIME); if ((OcfsGlobalCtxt.flags & OCFS_FLAG_SHUTDOWN_VOL_THREAD) || (osb->osb_flags & OCFS_OSB_FLAGS_BEING_DISMOUNTED)) @@ -135,4 +137,3 @@ int ocfs_volume_thread (void *arg) LOG_EXIT_LONG (0); return 0; } /* ocfs_volume_thread */ -#endif /* defined(DLM_THREAD_PER_VOLUME) */ diff -urNp x-ref/fs/ocfs/Linux/ocfsfile.c x/fs/ocfs/Linux/ocfsfile.c --- x-ref/fs/ocfs/Linux/ocfsfile.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/ocfsfile.c Mon Oct 21 04:41:19 2002 @@ -71,12 +71,12 @@ int ocfs_set_disposition_information (st ** Do some checking to see if the file can even be deleted. */ if (OIN->oin_flags & OCFS_OIN_DELETE_ON_CLOSE) { - LOG_ERROR_STR ("OCFS_OIN_DELETE_ON_CLOSE set"); + LOG_TRACE_STR ("OCFS_OIN_DELETE_ON_CLOSE set"); goto finally; } if (OIN->oin_flags & OCFS_OIN_ROOT_DIRECTORY) { - LOG_ERROR_STR ("OCFS_OIN_DIRECTORY set"); + LOG_TRACE_STR ("you cannot delete the root directory"); status = -EPERM; goto finally; } @@ -106,12 +106,9 @@ int ocfs_set_disposition_information (st &fileOff, FLAG_FILE_DELETE, NULL, NULL); if (status < 0) { - /* - * We probably don't need to do that, as it will be a good die to - * let the OIN get released even if the operation failed, so that - * we can create new OIN next time. ???? - */ - LOG_ERROR_STATUS(status); + if (status != -ENOTEMPTY && status != -EPERM && + status != -EBUSY) + LOG_ERROR_STATUS(status); if (OIN) { ocfs_down_sem (&(OIN->main_res), true); OCFS_CLEAR_FLAG (OIN->oin_flags, diff -urNp x-ref/fs/ocfs/Linux/ocfsfilp.c x/fs/ocfs/Linux/ocfsfilp.c --- x-ref/fs/ocfs/Linux/ocfsfilp.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/ocfsfilp.c Thu Jan 1 01:00:00 1970 @@ -1,152 +0,0 @@ -/* - * ocfsfilp.c - * - * Provides userspace style open/read/write files in the kernel. - * - * Copyright (C) 2002 Oracle Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have recieved a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran, - * Manish Singh, Wim Coekaerts - */ - -#include - -/* - * ocfs_open_file() - * - * Behaves similar to open(). Check out the manpages for the details - * on the arguments. - */ -krnl_file *ocfs_open_file (const char *pathname, int flags, mode_t mode) -{ - krnl_file *fp; - - if ((fp = kmalloc (sizeof (krnl_file), GFP_NOFS)) == NULL) { - errno = -ENOMEM; - return NULL; - } else - memset (fp, 0, sizeof (krnl_file)); - - fp->filp = filp_open (pathname, flags, mode); - if (IS_ERR (fp->filp)) { - errno = PTR_ERR (fp->filp); - kfree (fp); - return NULL; - } - - return fp; -} /* ocfs_open_file */ - -/* - * ocfs_close_file() - * - * Closes the open file. - */ -void ocfs_close_file (krnl_file * fp) -{ - if (fp) { - if (fp->filp) - filp_close (fp->filp, NULL); - - if (fp->buf) - kfree (fp->buf); - - kfree (fp); - } - - return; -} /* ocfs_close_file */ - -/* - * ocfs_read_file() - * - * Behaves similar to fgets(). Reads till the eol or (size - 1). - * Returned buffer is always null terminated. eol is not overwritten by null. - * Return value if >= 0 denotes the size of the returned buffer excluding - * terminating null. Return value < 0 denotes the errno. - */ -int ocfs_read_file (krnl_file * fp, char *s, int size) -{ - mm_segment_t old_fs; - int ind; - - ssize_t (*read) (struct file *, char *, size_t, loff_t *); - - if (!fp || !fp->filp) - return errno = -EBADF; - - if (!s || !size || !fp->filp->f_op - || ((read = fp->filp->f_op->read) == NULL)) - return errno = -EINVAL; - - if (fp->eof && fp->loc >= fp->readsz) - return 0; - - if (!fp->buf) { - fp->readsz = 0; - fp->loc = 0; - fp->bufsz = size + ((size % BUFFER_ALIGN) ? - (BUFFER_ALIGN - (size % BUFFER_ALIGN)) : 0); - if ((fp->buf = kmalloc (fp->bufsz, GFP_NOFS)) == NULL) - return errno = -ENOMEM; - } - - ind = 0; - - while (1) { - if (fp->loc < fp->readsz) { - for (; fp->loc < fp->readsz && ind < size - 1; - ++fp->loc, ++ind) { - if ((s[ind] = fp->buf[fp->loc]) == '\n') { - int fac = (ind < size - 1) ? 1 : 0; - - fp->loc += fac; - ind += fac; - s[ind] = '\0'; - return ind; - } - } - - if (ind == size - 1) { - s[ind] = '\0'; - return ind; - } - } - - if (fp->eof) { - s[ind] = '\0'; - return ind; - } else { - old_fs = get_fs (); - set_fs (get_ds ()); - fp->readsz = - read (fp->filp, fp->buf, fp->bufsz, - &fp->filp->f_pos); - set_fs (old_fs); - - if (fp->readsz == 0) - fp->eof = 1; - else if (fp->readsz < 0) - return errno = fp->readsz; - - fp->loc = 0; - } - } - - return 0; -} /* ocfs_read_file */ diff -urNp x-ref/fs/ocfs/Linux/ocfshash.c x/fs/ocfs/Linux/ocfshash.c --- x-ref/fs/ocfs/Linux/ocfshash.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/ocfshash.c Mon Oct 21 04:41:19 2002 @@ -37,6 +37,7 @@ int ocfs_hash_create (HASHTABLE *ht, ub4 noofbits) { int ret = 0; + ub4 size = 0; LOG_ENTRY (); @@ -56,10 +57,10 @@ int ocfs_hash_create (HASHTABLE *ht, ub4 ocfs_init_sem (&(ht->hashlock)); - ht->buckets = - (HASHBUCKET *) ocfs_malloc ((ht->size * sizeof (HASHBUCKET))); + size = ht->size * sizeof (HASHBUCKET); + ht->buckets = (HASHBUCKET *) ocfs_malloc (size); if (!ht->buckets) { - LOG_ERROR_STR ("Error in mem alloc"); + LOG_ERROR_ARGS ("unable to allocate %u bytes of memory\n", size); goto bail; } @@ -133,7 +134,8 @@ void ocfs_hash_destroy (HASHTABLE *ht, v * @vallen: length of value * */ -int ocfs_hash_add (HASHTABLE * ht, void *key, ub4 keylen, void *val, ub4 vallen) +int ocfs_hash_add (HASHTABLE * ht, void *key, ub4 keylen, void *val, ub4 vallen, + void **found, ub4 *foundlen) { HASHBUCKET *bucket; HASHBUCKET *prvbucket = NULL; @@ -149,7 +151,10 @@ int ocfs_hash_add (HASHTABLE * ht, void goto bail; } - slot = ocfs_hash (key, keylen, ht->inithash) & ht->mask; + *found = NULL; + *foundlen = 0; + + slot = hash (key, keylen, ht->inithash) & ht->mask; bucket = &(ht->buckets[slot]); /* Acquire Lock */ @@ -159,9 +164,11 @@ int ocfs_hash_add (HASHTABLE * ht, void while (bucket) { if (bucket->key) { if (!memcmp (bucket->key, key, keylen)) { - /* return error if key already exists */ + /* return warning & val if key already exists */ LOG_ERROR_STR ("Duplicate key"); - ret = 0; + *found = bucket->val; + *foundlen = bucket->vallen; + ret = 2; goto bail; } } else { @@ -256,7 +263,7 @@ int ocfs_hash_del (HASHTABLE * ht, void if (!ht || !ht->buckets) goto bail; - slot = ocfs_hash (key, keylen, ht->inithash) & ht->mask; + slot = hash (key, keylen, ht->inithash) & ht->mask; bucket = &(ht->buckets[slot]); /* Acquire Lock */ @@ -322,7 +329,7 @@ int ocfs_hash_get (HASHTABLE * ht, void if (!ht || !ht->buckets) goto bail; - slot = ocfs_hash (key, keylen, ht->inithash) & ht->mask; + slot = hash (key, keylen, ht->inithash) & ht->mask; bucket = &(ht->buckets[slot]); /* Acquire Lock */ @@ -439,7 +446,7 @@ void ocfs_hash_stat (HASHTABLE * ht, cha * acceptable. Do NOT use for cryptographic purposes. * -------------------------------------------------------------------- */ -ub4 ocfs_hash (k, length, initval) +ub4 hash (k, length, initval) register ub1 *k; /* the key */ register ub4 length; /* the length of the key */ register ub4 initval; /* the previous hash, or an arbitrary value */ diff -urNp x-ref/fs/ocfs/Linux/ocfsiosup.c x/fs/ocfs/Linux/ocfsiosup.c --- x-ref/fs/ocfs/Linux/ocfsiosup.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/ocfsiosup.c Mon Oct 21 04:41:19 2002 @@ -103,16 +103,17 @@ int LinuxWriteForceDisk (ocfs_super * os status = -EFAIL; goto bail; } - + lock_buffer(bh); memcpy (bh->b_data, Buffer, Length < sb->s_blocksize ? Length : sb->s_blocksize); + unlock_buffer(bh); mark_buffer_dirty (bh); ll_rw_block (WRITE, 1, &bh); if ((osb->cache_fs || Cached) && /* either type of cache */ Offset > osb->vol_layout.data_start_off) { - //wait_on_buffer (bh); - brelse (bh); + wait_on_buffer (bh); + put_bh (bh); } else { wait_on_buffer (bh); bforget(bh); @@ -187,15 +188,19 @@ int LinuxReadForceDisk (ocfs_super * osb mark_buffer_uptodate(bh, false); ll_rw_block (READ, 1, &bh); wait_on_buffer (bh); + lock_buffer(bh); memcpy (Buffer, bh->b_data, sb->s_blocksize); - bforget(bh); + unlock_buffer(bh); + put_bh(bh); bh = NULL; } else { //mark_buffer_uptodate(bh, false); ll_rw_block (READ, 1, &bh); wait_on_buffer (bh); + lock_buffer(bh); memcpy (Buffer, bh->b_data, sb->s_blocksize); - brelse (bh); + unlock_buffer(bh); + put_bh (bh); } Buffer = (ub1 *) Buffer + sb->s_blocksize; @@ -263,26 +268,45 @@ int ocfs_read_metadata (ocfs_super * osb sb8 diskOffsetToFind = 0, foundFileOffset = 0; sb8 foundDiskOffset = 0; ub4 tempLen = 0, numMetaDataRuns = 0, numDataRuns = 0; - ub4 remainingLength, length, ioRunSize, i = 0; + ub4 remainingLength, length, i = 0; + bool free_data=false, free_md=false; ocfs_io_runs *IoDataRuns = NULL, *IoMetaDataRuns = NULL; LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %u, %u.%u)\n", osb, Buffer, Length, HI (Offset), LO (Offset)); - ioRunSize = (OCFS_MAX_DATA_EXTENTS * sizeof (ocfs_io_runs)); - ioRunSize = OCFS_ALIGN (ioRunSize, PAGE_SIZE); - - IoDataRuns = ocfs_malloc (ioRunSize); - if (IoDataRuns == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto finally; - } - - IoMetaDataRuns = ocfs_malloc (ioRunSize); - if (IoMetaDataRuns == NULL) { - LOG_ERROR_STATUS (status -ENOMEM); - goto finally; - } + /* try to use prealloc ioruns if available */ + ocfs_down_sem (&osb->osb_res, true); + if (! OSB_PREALLOC_LOCK_TEST(osb, OSB_DATA_LOCK)) + { + OSB_PREALLOC_LOCK_SET(osb, OSB_DATA_LOCK); + IoDataRuns = osb->data_prealloc; + } + if (! OSB_PREALLOC_LOCK_TEST(osb, OSB_MD_LOCK)) + { + OSB_PREALLOC_LOCK_SET(osb, OSB_MD_LOCK); + IoMetaDataRuns = osb->md_prealloc; + } + ocfs_up_sem(&osb->osb_res); + + if (IoDataRuns==NULL) + { + free_data=true; + IoDataRuns = ocfs_malloc (IORUN_ALLOC_SIZE); + if (IoDataRuns == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto finally; + } + } + if (IoMetaDataRuns==NULL) + { + free_md=true; + IoMetaDataRuns = ocfs_malloc (IORUN_ALLOC_SIZE); + if (IoMetaDataRuns == NULL) { + LOG_ERROR_STATUS (status -ENOMEM); + goto finally; + } + } remainingLength = Length; length = 0; @@ -301,6 +325,7 @@ int ocfs_read_metadata (ocfs_super * osb ocfs_down_sem (&(osb->map_lock), true); RunsInExtentMap = ocfs_extent_map_get_count (&osb->metadata_map); + #warning smp race wrt map count most likely for (ExtentMapIndex = 0; ExtentMapIndex < RunsInExtentMap; ExtentMapIndex++) { if (!ocfs_get_next_extent_map_entry @@ -445,16 +470,28 @@ int ocfs_read_metadata (ocfs_super * osb } finally: - if (IoDataRuns) { + if (IoDataRuns && free_data) { ocfs_free (IoDataRuns); IoDataRuns = NULL; } - if (IoMetaDataRuns) { + if (IoMetaDataRuns && free_md) { ocfs_free (IoMetaDataRuns); IoMetaDataRuns = NULL; } + ocfs_down_sem (&osb->osb_res, true); + if (!free_data && OSB_PREALLOC_LOCK_TEST(osb, OSB_DATA_LOCK)) + { + OSB_PREALLOC_LOCK_CLEAR(osb, OSB_DATA_LOCK); + } + if (!free_md && OSB_PREALLOC_LOCK_TEST(osb, OSB_MD_LOCK)) + { + OSB_PREALLOC_LOCK_CLEAR(osb, OSB_MD_LOCK); + } + ocfs_up_sem(&osb->osb_res); + + LOG_EXIT_STATUS (status); return (status); } /* ocfs_read_metadata */ @@ -487,6 +524,36 @@ int ocfs_read_force_disk (ocfs_super * o } /* ocfs_read_force_disk */ /* + * ocfs_read_force_disk_ex() + * + */ +int ocfs_read_force_disk_ex (ocfs_super * osb, void **Buffer, ub4 AllocLen, + ub4 ReadLen, ub8 Offset) +{ + int status = 0; + + LOG_ENTRY (); + + if (*Buffer == NULL) { + *Buffer = ocfs_malloc (AllocLen); + if (*Buffer == NULL) { + LOG_ERROR_STATUS (status = -ENOMEM); + goto bail; + } + } + + status = ocfs_read_force_disk (osb, *Buffer, ReadLen, Offset); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } + + bail: + LOG_EXIT_STATUS (status); + return status; +} /* ocfs_read_force_disk_ex */ + +/* * ocfs_read_disk() * */ @@ -501,8 +568,8 @@ int ocfs_read_disk (ocfs_super * osb, vo * ocfs_read_disk_ex() * */ -int ocfs_read_disk_ex (ocfs_super * osb, - void **Buffer, ub4 AllocLen, ub4 ReadLen, ub8 Offset) +int ocfs_read_disk_ex (ocfs_super * osb, void **Buffer, ub4 AllocLen, + ub4 ReadLen, ub8 Offset) { int status = 0; diff -urNp x-ref/fs/ocfs/Linux/ocfsipc.c x/fs/ocfs/Linux/ocfsipc.c --- x-ref/fs/ocfs/Linux/ocfsipc.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/ocfsipc.c Mon Oct 21 04:41:19 2002 @@ -53,127 +53,31 @@ int ocfs_init_ipc (void) * ocfs_recv_thread() * */ -int ocfs_recv_thread (void *Context) +int ocfs_recv_thread (void *unused) { int status; - ub4 length = 0; - ub4 i; - ub4 commId = (ub4) Context; - ocfs_recv_context *RecvCtxt = NULL; - ocfs_recv_comp_context *RecvCompContext; - ocfs_comm_info *IpcConfigInfo; + ocfs_recv_ctxt *recv_ctxt = NULL; LOG_ENTRY (); - ocfs_daemonize ("ocfsrecvthread"); - - RecvCtxt = ocfs_malloc (sizeof (ocfs_recv_context)); - if (RecvCtxt == NULL) { - LOG_ERROR_STATUS (-ENOMEM); - goto bail; - } - - IpcConfigInfo = &(OcfsGlobalCtxt.comm_info[commId]); - - atomic_set (&RecvCtxt->num_posted, 0); - - RecvCtxt->event = ocfs_malloc (sizeof (wait_queue_head_t)); - if (RecvCtxt->event == NULL) { - LOG_ERROR_STATUS (-ENOMEM); - goto bail; - } - - init_waitqueue_head (RecvCtxt->event); - - RecvCtxt->free_lock = ocfs_malloc (sizeof (ocfs_sem)); - if (RecvCtxt->free_lock == NULL) { - LOG_ERROR_STATUS (-ENOMEM); - goto bail; - } - - ocfs_init_sem (RecvCtxt->free_lock); - - RecvCtxt->next_free = 0; - RecvCtxt->num_used = 0; - - /* Allocate low bnd packets for processing recv's */ - - for (i = 0; i < MAX_UDP_PACKETS; i++) { - RecvCtxt->recv_packet[i] = NULL; - RecvCtxt->free[i] = true; - } - - for (i = 0; i < OCFS_HIGH_MARK_UDP; i++) { - RecvCtxt->recv_packet[i] = ocfs_malloc (OCFS_MAX_DLM_PKT_SIZE); - if (RecvCtxt->recv_packet[i] == NULL) { - LOG_ERROR_STATUS (-ENOMEM); - } - } +#define LISTENER_PROCESS_NAME "ocfslsnr" + ocfs_daemonize (LISTENER_PROCESS_NAME, strlen(LISTENER_PROCESS_NAME)); while (1) { - length = 0; - - ocfs_down_sem (RecvCtxt->free_lock, true); - - while ((atomic_read (&RecvCtxt->num_posted) > OCFS_LOW_MARK_UDP) - && (RecvCtxt->num_used > OCFS_HIGH_MARK_UDP)) { - ocfs_up_sem (RecvCtxt->free_lock); - ocfs_wait (RecvCtxt->event, false, 1000); /* in ms */ - ocfs_down_sem (RecvCtxt->free_lock, true); - } - - for (i = 0; i < MAX_UDP_PACKETS; i++) { - if (RecvCtxt->free[i]) { - if (RecvCtxt->recv_packet[i] == NULL) { - RecvCtxt->recv_packet[i] = - ocfs_malloc (OCFS_MAX_DLM_PKT_SIZE); - if (RecvCtxt->recv_packet[i] == NULL) { - LOG_ERROR_STATUS (-ENOMEM); - continue; - } - } - RecvCtxt->free[i] = false; - break; - } - } - - if (i == MAX_UDP_PACKETS) { - ocfs_up_sem (RecvCtxt->free_lock); - continue; - } - - RecvCtxt->num_used++; - ocfs_up_sem (RecvCtxt->free_lock); - - /* Allocate a context to pass to the completion routine... */ - RecvCompContext = ocfs_malloc (sizeof (ocfs_recv_comp_context)); - if (RecvCompContext == NULL) { + recv_ctxt = ocfs_malloc (sizeof (ocfs_recv_ctxt)); + if (recv_ctxt == NULL) { LOG_ERROR_STATUS (-ENOMEM); - RecvCtxt->free[i] = true; - RecvCtxt->num_used--; - ocfs_up_sem (RecvCtxt->free_lock); - continue; + goto bail; } - RecvCompContext->recv_ctxt = RecvCtxt; - RecvCompContext->index = i; - - atomic_inc (&RecvCtxt->num_posted); + memset (recv_ctxt, 0, sizeof (ocfs_recv_ctxt)); + recv_ctxt->msg_len = OCFS_MAX_DLM_PKT_SIZE; - length = OCFS_DLM_MAX_MSG_SIZE; - status = ocfs_recv_udp_msg (commId, RecvCtxt->recv_packet[i], &length, - NULL, RecvCompContext); + status = ocfs_recv_udp_msg (recv_ctxt); if (status < 0) { + ocfs_safefree (recv_ctxt); if (status != -EBADF) { - ocfs_down_sem (RecvCtxt->free_lock, true); - RecvCtxt->free[i] = true; - RecvCtxt->num_used--; - ocfs_up_sem (RecvCtxt->free_lock); - ocfs_safefree (RecvCompContext); LOG_ERROR_STATUS (status); - /* Delay for a while to the stack stabilize on startup of machine */ - /* BUGBUG : we might need to free up the recv_packet memory */ - /* in case of an Error. */ } else { /* Thread is being killed. */ goto finally; @@ -210,37 +114,28 @@ int ocfs_recv_thread (void *Context) int ocfs_init_udp (void) { int status = 0; - ub4 i; int child_pid; LOG_ENTRY (); - /* Start the receive threads */ - for (i = 0; i < OCFS_MAX_IPC; i++) { - /* Create receive thread only for active network transports */ - if (!OcfsGlobalCtxt.comm_info[i].active) - continue; - - status = ocfs_init_udp_sock (i, &OcfsIpcCtxt.send_sock, - &OcfsIpcCtxt.recv_sock); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto bail; - } + /* start the listener thread */ + status = ocfs_init_udp_sock (&OcfsIpcCtxt.send_sock, + &OcfsIpcCtxt.recv_sock); + if (status < 0) { + LOG_ERROR_STATUS (status); + goto bail; + } - child_pid = kernel_thread (ocfs_recv_thread, (void *) i, - CLONE_FS | CLONE_FILES | - CLONE_SIGHAND); - if (child_pid < 0) { - status = -EFAIL; - LOG_ERROR_ARGS ("unable to launch ipcdlm thread (%d)\n", - child_pid); - goto bail; - } else { - init_completion (&(OcfsIpcCtxt.complete)); - if (!ocfs_get_task (child_pid, &(OcfsIpcCtxt.task))) - OcfsIpcCtxt.task = NULL; - } + child_pid = kernel_thread (ocfs_recv_thread, NULL, + CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + if (child_pid < 0) { + status = -EFAIL; + LOG_ERROR_ARGS ("unable to launch ipcdlm (%d)\n", child_pid); + goto bail; + } else { + init_completion (&(OcfsIpcCtxt.complete)); + if (!ocfs_get_task (child_pid, &(OcfsIpcCtxt.task))) + OcfsIpcCtxt.task = NULL; } bail: @@ -252,7 +147,7 @@ int ocfs_init_udp (void) * ocfs_init_ipc_dlm() * */ -int ocfs_init_ipc_dlm (ocfs_ipc_dlm_config * IpcDlmConfig, ocfs_protocol Protocol) +int ocfs_init_ipc_dlm (ocfs_protocol protocol) { int status = 0; @@ -263,8 +158,9 @@ int ocfs_init_ipc_dlm (ocfs_ipc_dlm_conf OcfsIpcCtxt.dlm_msg_size = OCFS_DLM_MAX_MSG_SIZE; OcfsIpcCtxt.version = OCFS_IPC_DLM_VERSION; - switch (Protocol) { + switch (protocol) { case OCFS_TCP: + status = -EINVAL; break; case OCFS_UDP: @@ -272,7 +168,7 @@ int ocfs_init_ipc_dlm (ocfs_ipc_dlm_conf break; default: - return -EINVAL; + status = -EINVAL; break; } @@ -284,9 +180,8 @@ int ocfs_init_ipc_dlm (ocfs_ipc_dlm_conf * ocfs_send_udp_msg() * */ -int ocfs_send_udp_msg (ocfs_ipc_config_info * SendIpcCfgInfo, - ocfs_ipc_config_info * LocalIpcCfgInfo, - void *Mesg, ub4 Length, wait_queue_head_t * Event) +int ocfs_send_udp_msg (ocfs_ipc_config_info * send, void *msg, ub4 msglen, + wait_queue_head_t * event) { struct sockaddr_in sin; int status; @@ -295,14 +190,14 @@ int ocfs_send_udp_msg (ocfs_ipc_config_i memset (&sin, 0, sizeof (sin)); sin.sin_family = AF_INET; - sin.sin_addr.s_addr = SendIpcCfgInfo->addr; - sin.sin_port = htons (SendIpcCfgInfo->port); + sin.sin_addr.s_addr = in_aton(send->ip_addr); + sin.sin_port = htons(send->ip_port); + + LOG_TRACE_ARGS ("about to send to %s:%u\n", + send->ip_addr, send->ip_port); - LOG_TRACE_ARGS ("about to send to %d.%d.%d.%d:%u\n", - NIPQUAD (sin.sin_addr.s_addr), ntohs (sin.sin_port)); - status = - ocfs_send_to (OcfsIpcCtxt.send_sock, (struct sockaddr *) &sin, - sizeof (sin), Mesg, Length); + status = ocfs_send_to (OcfsIpcCtxt.send_sock, (struct sockaddr *) &sin, + sizeof (sin), msg, msglen); if (status < 0) { LOG_ERROR_STATUS (status); goto bail; @@ -314,73 +209,37 @@ int ocfs_send_udp_msg (ocfs_ipc_config_i } /* ocfs_send_udp_msg */ /* - * ocfs_recv_completion() - * - */ -int ocfs_recv_completion (ocfs_recv_comp_context * RecvCompContext) -{ - LOG_ENTRY (); - - if (RecvCompContext) { - /* Queue the work Item in order to do the real work for the receive */ - schedule_task (RecvCompContext->work_item); - } else { - LOG_ERROR_STR("error in ocfs_recv_completion"); - } - - LOG_EXIT (); - return 0; -} /* ocfs_recv_completion */ - -/* * ocfs_recv_udp_msg() * */ -int ocfs_recv_udp_msg (ub4 myCommIndex, - void *Mesg, - ub4 * Length, - wait_queue_head_t * Event, ocfs_recv_comp_context * RecvCompContext) +int ocfs_recv_udp_msg (ocfs_recv_ctxt * recv_ctxt) { struct sockaddr_in sin; int sinlen; int status = -EFAIL; - struct tq_struct *work_item = NULL; LOG_ENTRY (); - work_item = ocfs_malloc (sizeof (struct tq_struct)); - if (work_item == NULL) { - LOG_ERROR_STATUS (status = -ENOMEM); - goto bail; - } else - memset (work_item, 0, sizeof (struct tq_struct)); - /* Initialize the workitem with our worker routine and Q it. */ - INIT_TQUEUE (work_item, ocfs_dlm_recv_msg, RecvCompContext); - - RecvCompContext->work_item = work_item; + INIT_TQUEUE (&recv_ctxt->ipc_tq, ocfs_dlm_recv_msg, recv_ctxt); sinlen = sizeof (struct sockaddr_in); + memset (&sin, 0, sinlen); - memset (&sin, 0, sizeof (sin)); - - status = - ocfs_recv_from (OcfsIpcCtxt.recv_sock, - (struct sockaddr *) &sin, &sinlen, Mesg, (int *) Length); + status = ocfs_recv_from (OcfsIpcCtxt.recv_sock, + (struct sockaddr *) &sin, &sinlen, + recv_ctxt->msg, (int *)&recv_ctxt->msg_len); if (status < 0) { - if (status == -EBADF) - goto bail; - else { + if (status != -EBADF) LOG_ERROR_STATUS (status); - goto bail; - } + goto bail; } LOG_TRACE_ARGS ("Received packet from: %d.%d.%d.%d\n", NIPQUAD (sin.sin_addr.s_addr)); if (status == 0) - ocfs_recv_completion (RecvCompContext); + schedule_task (&recv_ctxt->ipc_tq); bail: LOG_EXIT_STATUS (status); @@ -391,151 +250,28 @@ int ocfs_recv_udp_msg (ub4 myCommIndex, * ocfs_send_bcast() * */ -int ocfs_send_bcast (ocfs_super * osb, ub8 VoteMap, ocfs_dlm_msg * DlmMesg) +int ocfs_send_bcast (ocfs_super * osb, ub8 votemap, ocfs_dlm_msg * dlm_msg) { int status = 0; - ub8 nodeMap; - ub4 nodeNum; - ub4 primeIpc; - ub4 commIndex; - ocfs_node_config_info *Node; - ub8 curr_tick = 0; - bool bSentOnSec = false; - ub4 timeIncr; - ub4 i; - - LOG_ENTRY_ARGS ("(votemap=0x%x)\n", VoteMap); - - timeIncr = 100000; - - /* Look at votemap and send to each node we need to sent to */ - for (nodeMap = VoteMap, nodeNum = 0; nodeMap != 0; - nodeMap >>= 1, nodeNum++) { - bSentOnSec = false; - - if (nodeMap & 0x1) { - /* Figure out which interconnect we want to send on */ - Node = osb->node_cfg_info[nodeNum]; - - LOG_TRACE_ARGS - ("Sending msg to node=%u, name=%s, interface=%u, " - "prim=0x%x\n", nodeNum, Node->node_name, - Node->num_interfaces, Node->primary_comm); - - if (Node->num_interfaces == 0) - continue; - -/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ -/* !!! first comparison is pointless !!! */ -/* !!! since primary_comm is unsigned !!! */ -/* !!! removing it !!! */ -/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ - - // if((Node->primary_comm >= 0) && (Node->primary_comm < OCFS_MAX_IPC) && - if ((Node->primary_comm < OCFS_MAX_IPC) && - (Node->primary_comm < Node->num_interfaces)) { - primeIpc = Node->primary_comm; - - Node->ipc_config[primeIpc].state |= - OCFS_IPC_STATE_CONFIG | - OCFS_IPC_STATE_PRIMARY; - - if ((Node->ipc_config[primeIpc]. - state & OCFS_IPC_STATE_CONFIG) - && (Node->ipc_config[primeIpc]. - state & OCFS_IPC_STATE_PRIMARY)) { - ocfs_dlm_send_msg (osb, - &Node-> - ipc_config[primeIpc], - DlmMesg); - curr_tick = jiffies; - - if (curr_tick > - Node->exp_recv[primeIpc]) { - /* Set the IPC as INactive */ - OCFS_CLEAR_FLAG (Node-> - ipc_config - [primeIpc].state, - OCFS_IPC_STATE_ACTIVE); - OCFS_SET_FLAG (Node-> - ipc_config - [primeIpc].state, - OCFS_IPC_STATE_INACTIVE); - } - - Node->exp_recv[primeIpc] = - curr_tick + - (10 * 1000 * 1000) / timeIncr; - - OCFS_SET_FLAG (Node->ipc_config[primeIpc]. - state, - OCFS_IPC_STATE_ACTIVE); - - if (Node->ipc_config[primeIpc]. - state & OCFS_IPC_STATE_ACTIVE) - continue; - } - } + ub4 nodemap; + ub4 nodenum; + ocfs_node_config_info *node; + + LOG_ENTRY_ARGS ("(votemap=0x%x)\n", LO(votemap)); + + for (nodemap = LO(votemap), nodenum = 0; nodemap != 0; + nodemap >>= 1, nodenum++) { + if (nodenum == osb->node_num) + continue; - for (i = 0; i < Node->num_interfaces; i++) { - commIndex = - (Node->last_comm_indx % - Node->num_interfaces); - curr_tick = jiffies; - - if (curr_tick > Node->exp_recv[commIndex]) { - /* Set the IPC as INactive */ - OCFS_CLEAR_FLAG (Node-> - ipc_config[commIndex]. - state, - OCFS_IPC_STATE_ACTIVE); - OCFS_SET_FLAG (Node-> - ipc_config[commIndex]. - state, - OCFS_IPC_STATE_INACTIVE); - } - - if ((Node->ipc_config[commIndex]. - state & OCFS_IPC_STATE_CONFIG) - && (Node->ipc_config[commIndex]. - state & OCFS_IPC_STATE_ACTIVE) - && - (!(Node->ipc_config[commIndex]. - state & OCFS_IPC_STATE_PRIMARY))) { - ocfs_dlm_send_msg (osb, - &Node-> - ipc_config[commIndex], - DlmMesg); - Node->exp_recv[commIndex] = - curr_tick + - (10 * 1000 * 1000) / timeIncr; - Node->last_comm_indx = commIndex; - bSentOnSec = true; - break; - } - } + if (!(nodemap & 0x1)) + continue; - if (!bSentOnSec) { - commIndex = - ((Node->last_comm_indx + - 1) % Node->num_interfaces); - Node->last_comm_indx = commIndex; - - if ((Node->ipc_config[commIndex]. - state & OCFS_IPC_STATE_CONFIG) - && - (!(Node->ipc_config[commIndex]. - state & OCFS_IPC_STATE_PRIMARY))) { - ocfs_dlm_send_msg (osb, - &Node-> - ipc_config[commIndex], - DlmMesg); - Node->exp_recv[commIndex] = - curr_tick + - (10 * 1000 * 1000) / timeIncr; - Node->last_comm_indx = commIndex; - } - } + node = osb->node_cfg_info[nodenum]; + if (node) { + LOG_TRACE_ARGS ("Sending msg to node=%u, name=%s\n", + nodenum, node->node_name); + ocfs_dlm_send_msg (osb, &node->ipc_config, dlm_msg); } } @@ -547,27 +283,12 @@ int ocfs_send_bcast (ocfs_super * osb, u * ocfs_dlm_send_msg() * */ -void ocfs_dlm_send_msg (ocfs_super * osb, - ocfs_ipc_config_info * IpcCfgInfo, ocfs_dlm_msg * DlmMesg) +void ocfs_dlm_send_msg (ocfs_super * osb, ocfs_ipc_config_info * send, + ocfs_dlm_msg * dlm_msg) { - ocfs_node_config_info *Node; - ub4 i; - LOG_ENTRY (); - Node = osb->node_cfg_info[osb->node_num]; - - /* Match the mask to determine which comm interface to send over */ - for (i = 0; i < Node->num_interfaces; i++) { - if (IpcCfgInfo->mask == Node->ipc_config[i].mask) { - ocfs_send_udp_msg (IpcCfgInfo, &Node->ipc_config[i], DlmMesg, - DlmMesg->msg_len, NULL); - return; - } - } - - ocfs_send_udp_msg (IpcCfgInfo, &Node->ipc_config[Node->primary_comm], - DlmMesg, DlmMesg->msg_len, NULL); + ocfs_send_udp_msg (send, dlm_msg, dlm_msg->msg_len, NULL); LOG_EXIT (); return; @@ -577,21 +298,18 @@ void ocfs_dlm_send_msg (ocfs_super * osb * ocfs_init_udp_sock() * */ -int ocfs_init_udp_sock (int commid, - struct socket **send_sock, struct socket **recv_sock) +int ocfs_init_udp_sock (struct socket **send_sock, struct socket **recv_sock) { struct sockaddr_in sin; - int error; int status = -EFAIL; - ocfs_comm_info *Comm; + ocfs_comm_info *comm; LOG_ENTRY (); /* Create Send Socket */ - error = sock_create (PF_INET, SOCK_DGRAM, IPPROTO_UDP, send_sock); - if (error < 0) { - LOG_ERROR_ARGS ("unable to create send_socket, error=%d\n", - error); + status = sock_create (PF_INET, SOCK_DGRAM, IPPROTO_UDP, send_sock); + if (status < 0) { + LOG_ERROR_ARGS ("unable to create socket, error=%d\n", status); goto bail; } @@ -601,40 +319,35 @@ int ocfs_init_udp_sock (int commid, sin.sin_addr.s_addr = htonl (INADDR_ANY); sin.sin_port = htons (0); - error = (*send_sock)->ops->bind (*send_sock, (struct sockaddr *) &sin, + status = (*send_sock)->ops->bind (*send_sock, (struct sockaddr *) &sin, sizeof (sin)); - if (error < 0) { - LOG_ERROR_ARGS ("unable to bind send_socket, error=%d\n", - error); + if (status < 0) { + LOG_ERROR_ARGS ("unable to bind socket, error=%d\n", status); goto bail; } /* Create Receive Socket */ - error = sock_create (PF_INET, SOCK_DGRAM, IPPROTO_UDP, recv_sock); - if (error < 0) { - LOG_ERROR_ARGS ("unable to create recv_socket, error=%d\n", - error); + status = sock_create (PF_INET, SOCK_DGRAM, IPPROTO_UDP, recv_sock); + if (status < 0) { + LOG_ERROR_ARGS ("unable to create socket, error=%d\n", status); goto bail; } - Comm = &(OcfsGlobalCtxt.comm_info[commid]); + comm = &(OcfsGlobalCtxt.comm_info); /* Bind Receive Socket */ memset (&sin, 0, sizeof (sin)); sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl (INADDR_ANY); - sin.sin_port = htons (Comm->port); + sin.sin_port = htons (comm->ip_port); - error = (*recv_sock)->ops->bind (*recv_sock, (struct sockaddr *) &sin, + status = (*recv_sock)->ops->bind (*recv_sock, (struct sockaddr *) &sin, sizeof (sin)); - if (error < 0) { - LOG_ERROR_ARGS ("unable to bind recv_socket, error=%d\n", - error); + if (status < 0) { + LOG_ERROR_ARGS ("unable to bind socket, error=%d\n", status); goto bail; } - status = 0; - bail: LOG_EXIT_STATUS (status); return status; @@ -644,8 +357,8 @@ int ocfs_init_udp_sock (int commid, * ocfs_send_to() * */ -int ocfs_send_to (struct socket *sock, - struct sockaddr *addr, int addrlen, char *buf, int buflen) +int ocfs_send_to (struct socket *sock, struct sockaddr *addr, int addrlen, + char *buf, int buflen) { int error; struct msghdr msg; @@ -683,8 +396,8 @@ int ocfs_send_to (struct socket *sock, * ocfs_recv_from() * */ -int ocfs_recv_from (struct socket *sock, - struct sockaddr *addr, int *addrlen, char *buf, int *buflen) +int ocfs_recv_from (struct socket *sock, struct sockaddr *addr, int *addrlen, + char *buf, int *buflen) { struct iovec iov; struct msghdr msg; @@ -711,7 +424,7 @@ int ocfs_recv_from (struct socket *sock, if (error < 0) { if (error == -ERESTARTSYS) { status = -EBADF; - LOG_TRACE_STR ("Shutting down ipcdlm\n"); + LOG_TRACE_STR ("Shutting down ipcdlm"); goto bail; } else { LOG_ERROR_ARGS ("unable to recvmsg, error=%d\n", error); diff -urNp x-ref/fs/ocfs/Linux/ocfsmain.c x/fs/ocfs/Linux/ocfsmain.c --- x-ref/fs/ocfs/Linux/ocfsmain.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/ocfsmain.c Mon Oct 21 04:41:19 2002 @@ -28,7 +28,6 @@ #include #include -#include /* Tracing */ #define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_INIT @@ -42,23 +41,44 @@ ** Globals */ ocfs_global_ctxt OcfsGlobalCtxt; -ub4 OcfsDebugCtxt = OCFS_DEBUG_CONTEXT_INIT; -ub4 OcfsDebugLevel = OCFS_DEBUG_LEVEL_ERROR; char *OcfsVersion = "0.6"; +ocfs_errent ocfs_error_strings[] = + {{-EACCES, "Permission denied"}, + {-EAGAIN, "Try again"}, + {-EBADF, "Bad file number"}, + {-EBADFD, "File descriptor in bad state"}, + {-EBUSY, "Device or resource busy"}, + {-EEXIST, "File exists"}, + {-EINVAL, "Invalid argument"}, + {-EIO, "I/O error"}, + {-EMLINK, "Too many links"}, + {-ENAMETOOLONG, "File name too long"}, + {-ENOENT, "No such file or directory"}, + {-ENOMEM, "Out of memory"}, + {-ENOSPC, "No space left on device"}, + {-ENOTDIR, "Not a directory"}, + {-ENOTEMPTY, "Directory not empty"}, + {-ENOTTY, "Not a typewriter"}, + {-ENXIO, "No such device or address"}, + {-EPERM, "Operation not permitted"}, + {-ESTALE, "Stale NFS file handle"}, + {-ETIMEDOUT, "Connection timed out"}, + {-EFAIL, "Generic OCFS failure"}}; +int ocfs_num_err = sizeof(ocfs_error_strings)/sizeof(ocfs_errent); + + +ub4 OcfsDebugCtxt; +ub4 OcfsDebugLevel; + + spinlock_t ProtectOSBId; ub4 GlobalOSBId; /* Keeps track of next available OSB Id */ spinlock_t ProtectMountCount; ub4 GlobalMountCount; /* Number of volumes currently mounted */ -struct list_head item_list; int ocfs_get_block2 (struct inode *inode, long iblock, long *oblock); -#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,10) -static int ocfs_direct_IO (int rw, - struct inode *inode, - struct kiobuf *iobuf, unsigned long blocknr, int blocksize); -#endif #define ocfs_rmdir ocfs_unlink @@ -133,19 +153,31 @@ static struct inode_operations ocfs_file // revalidate: ocfs_inode_revalidate, }; -char *name = NULL; -ub4 dbgctxt = 0; -ub4 dbglvl = 0; - -MODULE_PARM (name, "s"); -MODULE_PARM (dbgctxt, "l"); -MODULE_PARM (dbglvl, "l"); +char *node_name = NULL; +ub4 node_number = OCFS_INVALID_NODE_NUM; +ub4 debug_context = 0; +ub4 debug_level = 0; +char *ip_address = NULL; +ub4 ip_port = 0; +char *guid = NULL; +ub4 cs = 0; + +MODULE_PARM (node_name, "s"); +MODULE_PARM (node_number, "l"); +MODULE_PARM (debug_context, "l"); +MODULE_PARM (debug_level, "l"); +MODULE_PARM (ip_address, "s"); +MODULE_PARM (ip_port, "l"); +MODULE_PARM (guid, "s"); +MODULE_PARM (cs, "l"); /* * ocfs_parse_options() * + * e.g., gid=9999,uid=9999,[no]cache,reclaimid */ -static int ocfs_parse_options (char *options, uid_t * uid, gid_t * gid, bool * cache) +static int ocfs_parse_options (char *options, uid_t * uid, gid_t * gid, + bool * cache, bool * reclaim_id) { char *c; char *value; @@ -154,6 +186,7 @@ static int ocfs_parse_options (char *opt LOG_ENTRY (); *cache = false; + *reclaim_id = false; if (!options) { ret = 0; goto bail; @@ -190,6 +223,8 @@ static int ocfs_parse_options (char *opt *cache = true; } else if (!strcmp (c, "nocache")) { *cache = false; + } else if (!strcmp (c, "reclaimid")) { + *reclaim_id = true; } else { LOG_ERROR_ARGS ("Invalid mount option: %s\n", c); goto bail; @@ -215,15 +250,16 @@ static struct super_block *ocfs_read_sup uid_t uid = current->fsuid; gid_t gid = current->fsgid; bool c; + bool reclaim_id; ocfs_super *osb; LOG_ENTRY (); MOD_INC_USE_COUNT; - if (ocfs_parse_options (data, &uid, &gid, &c) != 0) { + if (ocfs_parse_options (data, &uid, &gid, &c, &reclaim_id) != 0) { LOG_ERROR_STR ("ocfs_read_super: bad mount option"); - return NULL; + goto read_super_error; } /* TODO: fix this */ @@ -237,11 +273,12 @@ static struct super_block *ocfs_read_sup /* this is needed to support O_LARGE_FILE */ sb->s_maxbytes = OCFS_LINUX_MAX_FILE_SIZE; - status = ocfs_mount_volume (sb); + status = ocfs_mount_volume (sb, reclaim_id); osb = ((ocfs_super *)(sb->u.generic_sbp)); if (status < 0 || !osb) goto read_super_error; - osb->cache_fs = c; /* set caching type */ + + osb->cache_fs = c; /* set caching type */ inode = iget4 (sb, OCFS_ROOT_INODE_NUMBER, 0, NULL); if (!inode) { @@ -288,42 +325,23 @@ static int __init ocfs_driver_entry (voi { int status = 0; - ocfs_version_print (); - - /* Read the debug parameters passed during insmod */ - OcfsDebugCtxt = dbgctxt; - OcfsDebugLevel = dbglvl; - LOG_ENTRY (); + ocfs_version_print (); + memset (&OcfsGlobalCtxt, 0, sizeof (ocfs_global_ctxt)); memset (&OcfsIpcCtxt, 0, sizeof (ocfs_ipc_ctxt)); - /* Read /etc/ocfs.conf */ - status = ocfs_read_node_info (); - if (status < 0) { - LOG_ERROR_STATUS (status); - goto leave; - } - - if (name) { - if (strlen (name) < MAX_NODE_NAME_LENGTH) { - OcfsGlobalCtxt.node_name = name; - LOG_TRACE_ARGS ("Node name: '%s'\n", - OcfsGlobalCtxt.node_name); - } else { - status = -EINVAL; - LOG_ERROR_STR ("'name' too long"); - goto leave; - } - } else { - status = -EINVAL; - LOG_ERROR_STR ("'name' not set during insmod"); - goto leave; - } - +#ifdef OCFS_LINUX_MEM_DEBUG + INIT_LIST_HEAD (&(OcfsGlobalCtxt.item_list)); +#endif + INIT_LIST_HEAD (&(OcfsGlobalCtxt.osb_next)); INIT_LIST_HEAD (&(OcfsGlobalCtxt.osb_next)); + /* Read remaining insmod params */ + if ((status = ocfs_read_params ()) < 0) + goto leave; + /* Initialize some required fields */ OcfsGlobalCtxt.obj_id.type = OCFS_TYPE_GLOBAL_DATA; OcfsGlobalCtxt.obj_id.size = sizeof (ocfs_global_ctxt); @@ -348,8 +366,6 @@ static int __init ocfs_driver_entry (voi OcfsGlobalCtxt.hbm = DISK_HBEAT_COMM_ON; - INIT_LIST_HEAD (&item_list); - spin_lock_init (&ProtectOSBId); spin_lock (&ProtectOSBId); GlobalOSBId = 0; @@ -360,21 +376,6 @@ static int __init ocfs_driver_entry (voi GlobalMountCount = 0; spin_unlock (&ProtectMountCount); -#if !defined(DLM_THREAD_PER_VOLUME) - /* Initialize DLMThreadMonitor */ - ocfs_init_sem (&(OcfsGlobalCtxt.DLMThreadMonitor)); - - /* Launch one DLMThread for all volumes */ - child_pid = kernel_thread (ocfs_polling_thread, NULL, - CLONE_FS | CLONE_FILES | CLONE_SIGHAND); - if (child_pid < 0) { - status = -EFAIL; - LOG_ERROR_ARGS ("unable to launch dlm thread (%d)\n", - child_pid); - goto leave; - } -#endif - /* Initialize the proc interface */ ocfs_proc_init (); @@ -399,142 +400,78 @@ static int __init ocfs_driver_entry (voi return -1; } /* ocfs_driver_entry */ -#ifdef OCFS_LINUX_MEM_DEBUG -typedef struct _ocfs_mem_object +/* + * ocfs_read_params() + * + * Read insmod params + */ +int ocfs_read_params(void) { - char name[30]; - ocfs_obj_id objid; -} -ocfs_mem_object; - -ocfs_mem_object mobj[] = { - {"ocfs_file", {OCFS_TYPE_OFILE, sizeof (ocfs_file)} - } - , - {"ocfs_inode", {OCFS_TYPE_OIN, sizeof (ocfs_inode)} - } - , - {"ocfs_super", {OCFS_TYPE_OSB, sizeof (ocfs_super)} - } - , - {"ocfs_global_ctxt", {OCFS_TYPE_GLOBAL_DATA, - sizeof (ocfs_global_ctxt)} - } - , - {"ocfs_dir_node", {-1, (128 * 1024)} - } - , - {"ocfs_alloc_ext", {-1, sizeof (ocfs_alloc_ext)} - } - , - {"COMP_CONTEXT", {-1, sizeof (COMP_CONTEXT)} - } - , - {"ocfs_index_node", {-1, sizeof (ocfs_index_node)} - } - , - {"ocfs_index_hdr", {-1, sizeof (ocfs_index_hdr)} - } - , - //{"INDOUBT_ENTRY", { -1, sizeof(INDOUBT_ENTRY) } }, - {"IPC_DLM_CONFIG", {-1, sizeof (IPC_DLM_CONFIG)} - } - , - {"ocfs_extent_map", {-1, sizeof (ocfs_extent_map)} - } - , - {"NODE_CONFIG_INFO", {-1, sizeof (NODE_CONFIG_INFO)} - } - , - {"ocfs_extent_group", {-1, sizeof (ocfs_extent_group)} - } - , - {"OCFS_ALLOC_LOG", {-1, sizeof (OCFS_ALLOC_LOG)} - } - , - {"OCFS_BCAST_REL_LOG", {-1, sizeof (OCFS_BCAST_REL_LOG)} - } - , - {"OCFS_CLEANUP_RECORD", {-1, sizeof (OCFS_CLEANUP_RECORD)} - } - , - {"OCFS_DELETE_LOG", {-1, sizeof (OCFS_DELETE_LOG)} - } - , - {"OCFS_DIR_LOG", {-1, sizeof (OCFS_DIR_LOG)} - } - , - {"ocfs_disk_entry", {-1, sizeof (ocfs_disk_entry)} - } - , - {"ocfs_disk_lock", {-1, sizeof (ocfs_disk_lock)} - } - , - {"ocfs_file_entry", {-1, sizeof (ocfs_file_entry)} - } - , - {"OCFS_FREE_BIMAP", {-1, sizeof (OCFS_FREE_BIMAP)} - } - , - {"OCFS_FREE_EXTENT_LOG", {-1, sizeof (OCFS_FREE_EXTENT_LOG)} - } - , - {"OCFS_FREE_LOG", {-1, sizeof (OCFS_FREE_LOG)} - } - , - {"OCFS_IO_RUNS", {-1, sizeof (OCFS_IO_RUNS)} - } - , - {"ocfs_ipc_ctxt", {-1, sizeof (ocfs_ipc_ctxt)} - } - , - {"OCFS_LOCK_LOG", {-1, sizeof (OCFS_LOCK_LOG)} - } - , - {"ocfs_lock_res", {-1, sizeof (ocfs_lock_res)} - } - , - {"OCFS_LOCK_UPDATE", {-1, sizeof (OCFS_LOCK_UPDATE)} - } - , - {"OCFS_LOG_RECORD", {-1, sizeof (OCFS_LOG_RECORD)} - } - , - {"ocfs_obj_id", {-1, sizeof (ocfs_obj_id)} - } - , - {"OCFS_RECOVERY_LOG", {-1, sizeof (OCFS_RECOVERY_LOG)} - } - , - {"ocfs_publish", {-1, sizeof (ocfs_publish)} - } - , - {"ocfs_alloc_bm", {-1, sizeof (ocfs_alloc_bm)} - } - , - {"ocfs_vol_layout", {-1, sizeof (ocfs_vol_layout)} - } - , - {"ocfs_vol_node_map", {-1, sizeof (ocfs_vol_node_map)} - } - , - {"ocfs_vol_disk_hdr", {-1, sizeof (ocfs_vol_disk_hdr)} - } - , - {"ocfs_vol_label", {-1, sizeof (ocfs_vol_label)} - } - , - {"ocfs_vote", {-1, sizeof (ocfs_vote)} - } - , - {"alloc_item", {-1, sizeof (alloc_item)} - } - , - {"ocfs_extent", {-1, sizeof (ocfs_extent)} - } - , -}; + int status = 0; + ub4 check_sum = 0; + int i; + + /* Read remaining insmod params */ + if (node_number != OCFS_INVALID_NODE_NUM) { + if (node_number >= 0 && node_number < OCFS_MAXIMUM_NODES) { + OcfsGlobalCtxt.pref_node_num = node_number; + LOG_TRACE_ARGS("Preferred node number: %d\n", + node_number); + } + else { + status = -EINVAL; + LOG_ERROR_STR("'node_number' must be between 0 and 31"); + } + } + + if (ip_port == 0) + OcfsGlobalCtxt.comm_info.ip_port = OCFS_IPC_DEFAULT_PORT; + else + OcfsGlobalCtxt.comm_info.ip_port = ip_port; + LOG_TRACE_ARGS("IP port: %d\n", OcfsGlobalCtxt.comm_info.ip_port); + + if (node_name && strlen (node_name) < MAX_NODE_NAME_LENGTH) { + OcfsGlobalCtxt.node_name = node_name; + LOG_TRACE_ARGS ("Node name: %s\n", OcfsGlobalCtxt.node_name); + } else { + status = -EINVAL; + LOG_ERROR_STR ("'node_name' not set or too long"); + } + + if (ip_address && strlen (ip_address) < MAX_IP_ADDR_LEN) { + OcfsGlobalCtxt.comm_info.ip_addr = ip_address; + LOG_TRACE_ARGS ("IP address: %s\n", ip_address); + } else { + status = -EINVAL; + LOG_ERROR_STR ("'ip_address' not set or too long"); + } + + if (guid && strlen (guid) == GUID_LEN) { + memcpy (&OcfsGlobalCtxt.guid.guid, guid, GUID_LEN); + LOG_TRACE_ARGS ("Node guid: %s\n", guid); + } else { + status = -EINVAL; + LOG_ERROR_STR ("'guid' not set correctly"); + } + if (status == 0) { + for (i = 0; i < GUID_LEN; ++i) + check_sum += (ub4) guid[i]; + if (cs != check_sum) { + status = -EINVAL; + LOG_ERROR_STR ("load module using load_ocfs"); + } + } + + /* hardcoding... not used yet */ + OcfsGlobalCtxt.comm_info.type = OCFS_UDP; + OcfsGlobalCtxt.comm_info.ip_mask = NULL; + + return status; +} /* ocfs_read_params */ + + +#ifdef OCFS_LINUX_MEM_DEBUG /* * ocfs_memcheck() * @@ -542,79 +479,37 @@ ocfs_mem_object mobj[] = { static void ocfs_memcheck (void) { struct list_head *iter; - int i; alloc_item *item; - char maybe[1000]; + char *memtype; + char sizeinfo[20]; - if ((OcfsDebugCtxt & OCFS_DEBUG_CONTEXT_MEM) && - (OcfsDebugLevel & OCFS_DEBUG_LEVEL_TRACE)) { - list_for_each (iter, &item_list) { - maybe[0] = '\0'; - item = list_entry (iter, alloc_item, list); - - LOG_ERROR_ARGS - ("unfreed mem %x: size=%d tag='%s'\n", - item->address, item->length, item->tag); - - for (i = 0; - i < (sizeof (mobj) / sizeof (ocfs_mem_object)); - i++) { - /* nice clean match */ - if (item->length >= sizeof (ub4) && - mobj[i].objid.Size == item->length && - mobj[i].objid.Type == - *((ub4 *) item->address)) { - LOG_ERROR_ARGS - ("object is a (%s)\n", - mobj[i].name); - - switch (mobj[i].objid.Type) { - case OCFS_TYPE_OIN: - LOG_ERROR_ARGS ("ObjectName: %s\n", - ocfs_print_string (& - ((ocfs_inode - *) - item-> - address)-> -ObjectName)); - break; - case OCFS_TYPE_OSB: - break; - default: - break; - } - break; - } - - /* the type seems to match OR the sizeof is right */ - if ((item->length >= sizeof (ub4) && - mobj[i].objid.Type == - *((ub4 *) item->address)) - || (item->length == mobj[i].objid.Size - && mobj[i].objid.Type == -1)) { - if (item->length == (1024 * 128)) { - ocfs_dir_node *dir = - (ocfs_dir_node *) item-> - address; - - LOG_TRACE_ARGS - ("ocfs_dir_node: node_disk_off = %u.%u\n", - HI (dir->node_disk_off), - LO (dir->node_disk_off)); - break; - } - strcat (maybe, mobj[i].name); - strcat (maybe, " "); - } - } - if (maybe[0]) { - LOG_ERROR_ARGS - ("object may be one of (%s)\n", - maybe); - } - } + list_for_each (iter, &OcfsGlobalCtxt.item_list) { + item = list_entry (iter, alloc_item, list); + switch (item->type) + { + case SLAB_ITEM: + /* TODO: use the actual slab name */ + memtype = "SLAB"; + snprintf(sizeinfo, 20, "slab=%p", item->u.slab); + break; + case KMALLOC_ITEM: + memtype = "KMALLOC"; + snprintf(sizeinfo, 20, "size=%d", item->u.length); + break; + case VMALLOC_ITEM: + memtype = "VMALLOC"; + snprintf(sizeinfo, 20, "size=%d", item->u.length); + break; + default: + memtype = "UNKNOWN"; + snprintf(sizeinfo, 20, "size=%d", item->u.length); + break; + } + LOG_ERROR_ARGS ("unfreed %s mem %x: %s tag='%s'\n", memtype, + item->address, sizeinfo, item->tag); } -} /* ocfs_memcheck */ +} /* ocfs_memcheck */ + #endif /* OCFS_LINUX_MEM_DEBUG */ /* @@ -625,11 +520,9 @@ ObjectName)); static void __exit ocfs_driver_exit (void) { ocfs_super *osb = NULL; - struct list_head *iterEntry; + struct list_head *osb_entry; + struct list_head *osb_tmp; ub4 i = 0; -#if !defined(DLM_THREAD_PER_VOLUME) - ub4 timeout; -#endif LOG_ENTRY (); @@ -637,22 +530,19 @@ static void __exit ocfs_driver_exit (voi ocfs_down_sem (&(OcfsGlobalCtxt.res), true); OCFS_SET_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_SHUTDOWN_VOL_THREAD); - list_for_each (iterEntry, &(OcfsGlobalCtxt.osb_next)) { - osb = list_entry (iterEntry, ocfs_super, osb_next); + list_for_each_safe (osb_entry, osb_tmp, &(OcfsGlobalCtxt.osb_next)) { + osb = list_entry (osb_entry, ocfs_super, osb_next); ocfs_down_sem (&osb->osb_res, true); OCFS_SET_FLAG (osb->osb_flags, OCFS_OSB_FLAGS_SHUTDOWN); ocfs_up_sem (&osb->osb_res); osb->needs_flush = true; - while ((osb->trans_in_progress) && (i < 10)) { ocfs_sleep (100); i++; } - ocfs_commit_cache (osb, true); - osb->needs_flush = false; list_del (&osb->osb_next); @@ -663,20 +553,6 @@ static void __exit ocfs_driver_exit (voi ocfs_up_sem (&(OcfsGlobalCtxt.res)); - -#if !defined(DLM_THREAD_PER_VOLUME) - /* Wait for the DLM thread to exit */ - ocfs_down_sem (&(OcfsGlobalCtxt.DLMThreadMonitor), true); - ocfs_up_sem (&(OcfsGlobalCtxt.DLMThreadMonitor)); - - /* Wait 3 sec for the dlm thread to completely exit */ - timeout = 3 * HZ; - while (timeout) { - set_current_state (TASK_INTERRUPTIBLE); - timeout = schedule_timeout (timeout); - } -#endif - /* Deinit the proc interface */ ocfs_proc_deinit (); @@ -963,7 +839,8 @@ static struct dentry *ocfs_lookup (struc struct dentry *ret; ocfs_super *osb = ((ocfs_super *)(sb->u.generic_sbp)); - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", dir, dentry); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, '%*s')\n", dir, dentry, + dentry->d_name.len, dentry->d_name.name); atomic_inc (&dir->i_count); @@ -1021,44 +898,40 @@ static struct dentry *ocfs_lookup (struc */ static int ocfs_statfs (struct super_block *sb, struct statfs *buf) { - ocfs_super *osb = NULL; - ub4 size, countbits = 0; - int status = 0; - ocfs_alloc_bm *buffer; + ocfs_super *osb = NULL; + ub4 numbits, freebits = 0; + // ocfs_lock_res *pLockResource; + int status = 0; + ub1 lockbuf[512]; + ocfs_bitmap_lock *bm_lock = (ocfs_bitmap_lock *)lockbuf; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", sb, buf); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", sb, buf); - osb = ((ocfs_super *)(sb->u.generic_sbp)); - size = (ub4) OCFS_SECTOR_ALIGN ((osb->cluster_bitmap.size) / 8); - if ((buffer = vmalloc (sizeof (ocfs_alloc_bm))) == NULL) - return -1; - if ((buffer->buf = - vmalloc (sizeof (char) * size + sizeof (ocfs_alloc_bm))) == NULL) - return -1; - buffer->size = osb->cluster_bitmap.size; - status = - ocfs_read_force_disk (osb, buffer->buf, size, - osb->vol_layout.bitmap_off); - countbits = ocfs_count_bits (buffer); - - buf->f_type = OCFS_MAGIC; - buf->f_bsize = sb->s_blocksize; - buf->f_namelen = OCFS_MAX_FILENAME_LENGTH; - buf->f_blocks = - (unsigned long) ((unsigned long) (osb->cluster_bitmap.size) * - (unsigned long) (osb->vol_layout. - cluster_size >> 9)); - buf->f_bfree = - (unsigned long) buf->f_blocks - - (unsigned long) (countbits * (osb->vol_layout.cluster_size >> 9)); - buf->f_bavail = buf->f_bfree; - buf->f_files = (unsigned long) (osb->cluster_bitmap.size); - buf->f_ffree = (unsigned long) (osb->cluster_bitmap.size) - countbits; - vfree (buffer); + osb = ((ocfs_super *)(sb->u.generic_sbp)); + numbits = osb->cluster_bitmap.size; + + status = ocfs_read_force_disk (osb, lockbuf, OCFS_SECTOR_SIZE, (ub8)OCFS_BITMAP_LOCK_OFFSET); + + if (numbits >= bm_lock->used_bits) + freebits = numbits - bm_lock->used_bits; + + buf->f_type = OCFS_MAGIC; + buf->f_bsize = sb->s_blocksize; + buf->f_namelen = OCFS_MAX_FILENAME_LENGTH; + buf->f_blocks = + (unsigned long) ((unsigned long) (numbits) * + (unsigned long) (osb->vol_layout. + cluster_size >> 9)); + buf->f_bfree = + (unsigned long) (freebits * (osb->vol_layout.cluster_size >> 9)); + buf->f_bavail = buf->f_bfree; + buf->f_files = (unsigned long) (numbits); + buf->f_ffree = (unsigned long) (numbits) - freebits; + + LOG_EXIT_LONG (0); + return 0; +} /* ocfs_statfs */ - LOG_EXIT_LONG (0); - return 0; -} /* ocfs_statfs */ /* @@ -1178,17 +1051,19 @@ int ocfs_get_block (struct inode *inode, int status; void *ioRuns = NULL; - if (S_ISLNK (inode->i_mode)) - return ocfs_symlink_get_block (inode, iblock, bh_result, - create); - LOG_ENTRY_ARGS ("(0x%08x, %d, 0x%08x, %d)\n", inode, iblock, bh_result, create); + if (S_ISLNK (inode->i_mode)) { + err = ocfs_symlink_get_block (inode, iblock, bh_result, create); + goto bail; + } + if (!inode || !inode_data_is_oin (inode)) { LOG_ERROR_STR ("bad inode or inode has no oin"); goto bail; } + oin = ((ocfs_inode *)inode->u.generic_ip); osb = (ocfs_super *) oin->osb; @@ -1214,9 +1089,7 @@ int ocfs_get_block (struct inode *inode, err = 0; bail: - if (ioRuns != NULL) { - ocfs_safefree (ioRuns); - } + ocfs_safefree (ioRuns); LOG_EXIT_LONG (err); return err; } /* ocfs_get_block */ @@ -1280,7 +1153,8 @@ static int ocfs_file_write (struct file int status; ub8 newsize; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d)\n", filp, buf, count); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d, '%*s')\n", filp, buf, count, + filp->f_dentry->d_name.len, filp->f_dentry->d_name.name); /* happy write of zero bytes */ if (count == 0) { @@ -1319,7 +1193,7 @@ static int ocfs_file_write (struct file LOG_TRACE_ARGS ("non O_DIRECT write, fileopencount=%d\n", oin->open_hndl_cnt); if (oin->open_hndl_cnt > 1) { - if (oin->open_for_write) { + if (oin->oin_flags & OCFS_OIN_OPEN_FOR_WRITE) { LOG_TRACE_STR ("uh oh! someone else is doing non O_DIRECT writes!\n"); ret = -EIO; @@ -1328,18 +1202,9 @@ static int ocfs_file_write (struct file LOG_TRACE_STR ("there are other readers, but you're the first writer\n"); saAcquired = true; - oin->open_for_write = true; + OCFS_SET_FLAG(oin->oin_flags, OCFS_OIN_OPEN_FOR_WRITE); } } - if (!osb->cache_fs) { - LOG_TRACE_STR ("Plugging in O_SYNC for you"); - filp->f_flags |= O_SYNC; - } - } - - if (!acquired) { - ocfs_down_sem (&(oin->main_res), true); - acquired = true; } if (OIN_NEEDS_VERIFICATION (oin)) { @@ -1352,17 +1217,15 @@ static int ocfs_file_write (struct file goto bail; } } - if (filp->f_flags & O_APPEND) { - writingAtEOF = true; + if (filp->f_flags & O_APPEND) newsize = count + inode->i_size; - } else + else newsize = count + *ppos; - if (newsize > inode->i_size) + if (newsize > inode->i_size) { writingAtEOF = true; - - if (writingAtEOF) - LOG_TRACE_STR ("Writing at EOF"); + LOG_TRACE_STR ("Writing at EOF"); + } LOG_TRACE_ARGS ("ppos=%u.%u newsize=%u.%u cursize=%u.%u\n", HI (*ppos), LO (*ppos), HI (newsize), LO (newsize), @@ -1431,13 +1294,16 @@ static int ocfs_file_write (struct file } bail: + if (saAcquired) { + OCFS_CLEAR_FLAG(oin->oin_flags, OCFS_OIN_OPEN_FOR_WRITE); + } if (acquired) { - if (saAcquired) { - oin->open_for_write = false; - } ocfs_up_sem (&(oin->main_res)); acquired = false; } + if (inode && oin && !oin->cache_enabled && !(filp->f_flags & O_DIRECT)) { + fsync_inode_buffers(inode); + } LOG_EXIT_LONG (ret); return ret; @@ -1456,12 +1322,9 @@ static int ocfs_file_read (struct file * struct inode *inode = filp->f_dentry->d_inode; int status; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d)\n", filp, buf, count); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d, '%*s')\n", filp, buf, count, + filp->f_dentry->d_name.len, filp->f_dentry->d_name.name); - if (filp->f_flags & O_DIRECT) { - /* anything special for o_direct? */ - LOG_TRACE_STR ("O_DIRECT"); - } if (!inode || !inode_data_is_oin (inode)) { LOG_ERROR_STR ("Bad inode or inode has no oin"); @@ -1471,8 +1334,13 @@ static int ocfs_file_read (struct file * oin = ((ocfs_inode *)inode->u.generic_ip); osb = (ocfs_super *) oin->osb; - ocfs_down_sem (&(oin->main_res), true); - acquired = true; + if (filp->f_flags & O_DIRECT) { + /* anything special for o_direct? */ + LOG_TRACE_STR ("O_DIRECT"); + } else { + ocfs_down_sem (&(oin->main_res), true); + acquired = true; + } if (OIN_NEEDS_VERIFICATION (oin)) { status = ocfs_verify_update_oin (osb, oin); @@ -1484,8 +1352,10 @@ static int ocfs_file_read (struct file * } } - ocfs_up_sem (&(oin->main_res)); - acquired = false; + if(acquired) { + ocfs_up_sem (&(oin->main_res)); + acquired = false; + } #if LINUX_VERSION_CODE < LinuxVersionCode(2,4,10) if (filp->f_flags & O_DIRECT) { @@ -1601,21 +1471,24 @@ static ssize_t ocfs_rw_direct (int rw, s int sector_size, sector_bits, sector_mask; int ret = 0; - nbhs = (size >> SECTOR_BITS); - if (nbhs > KIO_MAX_SECTORS) - nbhs = KIO_MAX_SECTORS; - err = alloc_kiovec_sz (1, &iobuf, &nbhs); - if (err) - goto out; - + #warning not all devices have 512 byte sectors sector_size = 512; sector_bits = SECTOR_BITS; sector_mask = 511; max_sectors = KIO_MAX_SECTORS >> (sector_bits - SECTOR_BITS); err = -EINVAL; + + if (rw == READ) { + if (inode->i_size < *offp) /* read past end of file */ + return 0; + if (size > (inode->i_size - *offp)) + size = inode->i_size - *offp; + } + if ((*offp & 511) || (size & 511)) - goto out; + return -EINVAL; + err = 0; if (size) @@ -1626,6 +1499,14 @@ static ssize_t ocfs_rw_direct (int rw, s transferred = 0; blocknr = *offp >> SECTOR_BITS; + + nbhs = (size >> SECTOR_BITS); + if (nbhs > KIO_MAX_SECTORS) + nbhs = KIO_MAX_SECTORS; + err = alloc_kiovec_sz (1, &iobuf, &nbhs); + if (err) + goto out; + while (size > 0) { blocks = size >> SECTOR_BITS; if (blocks > max_sectors) @@ -1633,7 +1514,6 @@ static ssize_t ocfs_rw_direct (int rw, s if (!blocks) break; iosize = blocks << SECTOR_BITS; - err = map_user_kiobuf (rw, iobuf, (unsigned long) buf, iosize); if (err) { break; @@ -1709,10 +1589,11 @@ static int ocfs_create_or_open_file (str ocfs_file *OFile = NULL; ocfs_inode *ParentOin = NULL; ocfs_inode *NewOIN = NULL; - ocfs_inode *DirEnt = NULL; + ocfs_inode *oin = NULL; bool bAcquiredOSB = false; bool bAcquiredOIN = false; bool bClearInUse = false; + bool new_oin = false; ocfs_file_entry *fe = NULL; ub8 ParentDirNodeOffset; ub8 parentDirCluster; @@ -1721,7 +1602,8 @@ static int ocfs_create_or_open_file (str ub8 endofFile = 0; ocfs_sem *oin_sem = NULL; - LOG_ENTRY (); + LOG_ENTRY_ARGS ("%s request for '%*s'\n", create?"create":"open", + dentry->d_name.len, dentry->d_name.name); OCFS_ASSERT (dir->i_sb); OCFS_ASSERT (newofile); @@ -1760,27 +1642,31 @@ static int ocfs_create_or_open_file (str else { /* kch - for an open request we are already given the * inode, and therefore we are given the oin too */ - DirEnt = NULL; + down(&inode->i_sem); + oin = NULL; if (inode_data_is_oin (inode)) - DirEnt = ((ocfs_inode *)inode->u.generic_ip); + oin = ((ocfs_inode *)inode->u.generic_ip); status = -EFAIL; - if (DirEnt != NULL) { - if (!(DirEnt->oin_flags & OCFS_OIN_IN_TEARDOWN) && - !(DirEnt->oin_flags & OCFS_OIN_DELETE_ON_CLOSE)) { - OCFS_SET_FLAG (DirEnt->oin_flags, - OCFS_OIN_IN_USE); + if (oin != NULL) { + + if (!(oin->oin_flags & OCFS_OIN_IN_TEARDOWN) && + !(oin->oin_flags & OCFS_OIN_DELETE_ON_CLOSE)) { + OCFS_SET_FLAG (oin->oin_flags, OCFS_OIN_IN_USE); + status = 0; } if (status < 0) { - LOG_TRACE_ARGS - ("oin (%p) is not in teardown and not being deleted\n", - DirEnt); + if (oin->oin_flags & OCFS_OIN_IN_TEARDOWN) + LOG_ERROR_ARGS ("oin (%p) in teardown\n", oin); + else + LOG_ERROR_ARGS ("oin (%p) deleted\n", oin); } } else { /* now it IS possible to have an inode but no OIN attached yet * must be loaded now to open file */ status = -ENOENT; } + up(&inode->i_sem); } if (status < 0) { @@ -1793,11 +1679,12 @@ static int ocfs_create_or_open_file (str status = ocfs_find_files_on_disk (osb, ParentDirNodeOffset, &(dentry->d_name), fe, NULL); if (status >= 0) { - DirEnt = NULL; + oin = NULL; ocfs_down_sem (&(osb->osb_res), true); bAcquiredOSB = true; - status = ocfs_create_oin_from_entry (osb, fe, &DirEnt, + status = ocfs_create_oin_from_entry (osb, fe, &oin, parentDirCluster, NULL); + new_oin = true; ocfs_up_sem (&(osb->osb_res)); bAcquiredOSB = false; @@ -1949,12 +1836,46 @@ static int ocfs_create_or_open_file (str *newofile = OFile; goto leave; } else { /* the OPEN case */ + /* check if another process doing an open */ + /* concurrently has just set the oin */ + down(&inode->i_sem); + if (new_oin) { + if (inode_data_is_oin (inode)) { + // delete the oin we just made + oin->inode = NULL; + oin->lock_res = NULL; + ocfs_release_oin(oin, true); + // and use the correct one + oin = (ocfs_inode *)inode->u.generic_ip; + } else { + oin->inode = inode; + SET_INODE_OIN (inode, oin); + } + } + + /* we should now have a single oin regardless */ + /* of how many concurrent openers at this point */ + /* so take the oin->main_res so we won't need the i_sem */ - oin_sem = &(DirEnt->main_res); - ocfs_down_sem (oin_sem, true); - bAcquiredOIN = true; - if (DirEnt->oin_flags & OCFS_OIN_DELETE_ON_CLOSE) { + oin_sem = &(oin->main_res); + + if (!ocfs_down_sem (oin_sem, false)) { + // eek! we cannot sit around and wait for main_res while we're holding i_sem + bAcquiredOIN = false; + } else { + bAcquiredOIN = true; + } + up(&inode->i_sem); + + /* is this still a race!?!? */ + if (!bAcquiredOIN) { + oin_sem = &(oin->main_res); + ocfs_down_sem (oin_sem, true); + bAcquiredOIN = true; + } + + if (oin->oin_flags & OCFS_OIN_DELETE_ON_CLOSE) { LOG_TRACE_STR ("oin has DELETE_ON_CLOSE set, returning DELETE_PENDING"); status = -ENOENT; @@ -1962,45 +1883,46 @@ static int ocfs_create_or_open_file (str } /* only call ocfs_verify_update_oin if there's a good inode */ - if (DirEnt->inode == inode) { - status = ocfs_verify_update_oin (osb, DirEnt); + if (oin->inode == inode) { + status = ocfs_verify_update_oin (osb, oin); if (status < 0) { /* disable VOLUME TODO */ goto leave; } } - if (DirEnt->open_hndl_cnt > 0) { + if (oin->open_hndl_cnt > 0) { /* The OIN is currently in use by some thread. */ /* We must check whether the requested access/share access */ /* conflicts with the existing open operations. */ - LOG_TRACE_ARGS ("DirEnt->open_hndl_cnt > 0! : %u\n", - DirEnt->open_hndl_cnt); -#if 0 - if (mode & O_DIRECT) { - LOG_TRACE_STR ("O_DIRECT: open ok"); - status = 0; - } else if (mode & O_RDONLY) { - LOG_TRACE_STR ("O_RDONLY: open ok"); - status = 0; - } else if (mode & O_WRONLY || mode & O_RDWR) { - LOG_TRACE_STR - ("tried opening for write, but file is " - "already open!"); - status = -EFAIL; - } else -#endif - { - LOG_TRACE_STR ("I don't know!?"); - status = 0; - } + LOG_TRACE_ARGS ("oin->open_hndl_cnt > 0! : %u\n", + oin->open_hndl_cnt); + if (!(mode & O_DIRECT)) { + if (oin->oin_flags & OCFS_OIN_OPEN_FOR_DIRECTIO) { + status = -EACCES; + LOG_TRACE_STR("file is already open O_DIRECT, " + "cannot open non O_DIRECT"); + goto leave; + } + OCFS_CLEAR_FLAG(oin->oin_flags, OCFS_OIN_OPEN_FOR_DIRECTIO); + } else if (mode & O_DIRECT) { + if (!(oin->oin_flags & OCFS_OIN_OPEN_FOR_DIRECTIO)) { + status = -EACCES; + LOG_TRACE_STR("file is already open non O_DIRECT, " + "cannot open O_DIRECT"); + goto leave; + } + OCFS_SET_FLAG(oin->oin_flags, OCFS_OIN_OPEN_FOR_DIRECTIO); - if (status < 0) { - goto leave; } + status = 0; } else { - ocfs_delete_all_extent_maps (DirEnt); + ocfs_delete_all_extent_maps (oin); + if (mode & O_DIRECT) + OCFS_SET_FLAG(oin->oin_flags, OCFS_OIN_OPEN_FOR_DIRECTIO); + else + OCFS_CLEAR_FLAG(oin->oin_flags, OCFS_OIN_OPEN_FOR_DIRECTIO); } /* Allocate a new OFile */ @@ -2014,8 +1936,7 @@ static int ocfs_create_or_open_file (str /* Setup the OFile and insert it on the oin list */ OFile->k_file = NewFileObject; - OFile->oin = DirEnt; - OFile->oin->ref_cnt++; + OFile->oin = oin; OFile->oin->open_hndl_cnt++; /* We should clear the in use now as we are safe from the case */ @@ -2039,8 +1960,6 @@ static int ocfs_create_or_open_file (str bAcquiredOSB = false; *newofile = OFile; - DirEnt->inode = inode; - SET_INODE_OIN (inode, DirEnt); status = 0; goto leave; } @@ -2053,7 +1972,7 @@ static int ocfs_create_or_open_file (str bAcquiredOIN = true; } - OCFS_CLEAR_FLAG (DirEnt->oin_flags, OCFS_OIN_IN_USE); + OCFS_CLEAR_FLAG (oin->oin_flags, OCFS_OIN_IN_USE); if (bAcquiredOIN && oin_sem) { ocfs_up_sem (oin_sem); @@ -2086,17 +2005,14 @@ static int ocfs_create_or_open_file (str */ static int ocfs_file_open (struct inode *inode, struct file *file) { - struct dentry *dentry; - struct inode *parent = NULL; + struct dentry *dentry = file->f_dentry; + struct inode *parent = dentry->d_parent->d_inode; ocfs_file *ofile = NULL; int status; int ret, err = 0; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", inode, file); - - dentry = file->f_dentry; - if (dentry && dentry->d_parent && dentry->d_parent->d_inode) - parent = dentry->d_parent->d_inode; + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, '%*s')\n", inode, file, + file->f_dentry->d_name.len, file->f_dentry->d_name.name); atomic_inc (&parent->i_count); status = ocfs_create_or_open_file (inode, parent, dentry, file->f_flags, @@ -2149,7 +2065,8 @@ static int ocfs_mknod (struct inode *dir ocfs_file *newofile = NULL; int error = -EACCES; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d, %d)\n", dir, dentry, mode, dev); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d, %d, '%*s')\n", dir, dentry, mode, dev, + dentry->d_name.len, dentry->d_name.name); atomic_inc (&dir->i_count); @@ -2209,7 +2126,8 @@ static int ocfs_mkdir (struct inode *dir { int ret; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d)\n", dir, dentry, mode); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d, '%*s')\n", dir, dentry, mode, + dentry->d_name.len, dentry->d_name.name); ret = ocfs_mknod (dir, dentry, mode | S_IFDIR, NODEV); @@ -2225,7 +2143,8 @@ static int ocfs_create (struct inode *di { int ret; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d)\n", dir, dentry, mode); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d, '%*s')\n", dir, dentry, mode, + dentry->d_name.len, dentry->d_name.name); ret = ocfs_mknod (dir, dentry, mode | S_IFREG, NODEV); @@ -2244,7 +2163,9 @@ static int ocfs_link (struct dentry *old ocfs_file_entry *fe; struct inode *inode; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, 0x%08x)\n", old_dentry, dir, dentry); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, 0x%08x, old='%*s' new='%*s')\n", old_dentry, dir, dentry, + old_dentry->d_name.len, old_dentry->d_name.name, + dentry->d_name.len, dentry->d_name.name); inode = old_dentry->d_inode; fe = ocfs_allocate_file_entry (); @@ -2286,7 +2207,8 @@ static inline int ocfs_positive (struct { int ret; - LOG_ENTRY_ARGS ("(0x%08x)\n", dentry); + LOG_ENTRY_ARGS ("(0x%08x, '%*s')\n", dentry, + dentry->d_name.len, dentry->d_name.name); ret = dentry->d_inode && !d_unhashed (dentry); @@ -2303,7 +2225,8 @@ static int ocfs_empty (struct dentry *de struct list_head *list; int ret; - LOG_ENTRY_ARGS ("(0x%08x)\n", dentry); + LOG_ENTRY_ARGS ("(0x%08x, '%*s')\n", dentry, + dentry->d_name.len, dentry->d_name.name); spin_lock (&dcache_lock); list = dentry->d_subdirs.next; @@ -2337,7 +2260,8 @@ static int ocfs_unlink (struct inode *di ocfs_inode *oin = NULL; int retval = -ENOTEMPTY; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", dir, dentry); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, '%*s')\n", dir, dentry, + dentry->d_name.len, dentry->d_name.name); inode = dentry->d_inode; @@ -2351,7 +2275,12 @@ static int ocfs_unlink (struct inode *di if (ocfs_empty (dentry)) { status = ocfs_set_disposition_information (dir, dentry); if (status < 0) { - LOG_ERROR_STR ("ocfs_set_disposition_information failed!"); + + if (status != -ENOTEMPTY && status != -EPERM && + status != -EBUSY) + LOG_ERROR_STR ("ocfs_set_disposition_information failed!"); + else + LOG_TRACE_STR ("ocfs_set_disposition_information failed!"); retval = -EBUSY; goto bail; } @@ -2373,7 +2302,7 @@ static int ocfs_unlink (struct inode *di /* dput(dentry); */ retval = 0; } else - LOG_ERROR_STR ("dentry is not empty!"); + LOG_TRACE_STR ("dentry is not empty, cannot delete"); bail: LOG_EXIT_LONG (retval); @@ -2392,8 +2321,10 @@ static int ocfs_rename (struct inode *ol struct inode *new_inode = new_dentry->d_inode; int error = 0; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", - old_dir, old_dentry, new_dir, new_dentry); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, 0x%08x, 0x%08x, from='%*s' to='%*s')\n", + old_dir, old_dentry, new_dir, new_dentry, + old_dentry->d_name.len, old_dentry->d_name.name, + new_dentry->d_name.len, new_dentry->d_name.name); if ((atomic_read (&old_inode->i_count) > 1) || (atomic_read (&old_dentry->d_count) > 1)) { @@ -2467,7 +2398,8 @@ static int ocfs_symlink (struct inode *d ub8 newsize; int status; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, 0x%08x)\n", dir, dentry, symname); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, symname='%s' actual='%*s')\n", dir, dentry, symname, + dentry->d_name.len, dentry->d_name.name); atomic_inc (&dir->i_count); if (!dentry->d_parent || !dentry->d_parent->d_inode) { @@ -2555,7 +2487,10 @@ static int ocfs_file_release (struct ino struct dentry *dentry; struct inode *parent; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", inode, file); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, '%*s')\n", inode, file, + file->f_dentry->d_name.len, file->f_dentry->d_name.name); + + dentry = file->f_dentry; if (file->private_data) ofile = (ocfs_file *) file->private_data; @@ -2591,7 +2526,6 @@ static int ocfs_file_release (struct ino ocfs_release_ofile (ofile); ocfs_down_sem (&(osb->osb_res), true); osb->file_open_cnt--; - oin->ref_cnt--; oin->open_hndl_cnt--; ocfs_up_sem (&(osb->osb_res)); @@ -2601,9 +2535,16 @@ static int ocfs_file_release (struct ino } LOG_TRACE_ARGS ("openhandles: %d / osbfiles: %d / refcount: %d\n", - oin->open_hndl_cnt, osb->file_open_cnt, oin->ref_cnt); - - if (oin->ref_cnt == 0) { + oin->open_hndl_cnt, osb->file_open_cnt, + atomic_read(&dentry->d_count)); + + /* FIXME: in all the other places I run thru all the dentries */ + /* for the inode, but here I just check this one becuz I'm lz */ + /* no hard links yet so who cares */ + if (!atomic_read(&dentry->d_count)) { + if (oin->oin_flags & OCFS_OIN_OPEN_FOR_DIRECTIO) { + OCFS_CLEAR_FLAG(oin->oin_flags, OCFS_OIN_OPEN_FOR_DIRECTIO); + } if (oin->oin_flags & OCFS_OIN_NEEDS_DELETION || oin->oin_flags & OCFS_OIN_IN_USE) { ocfs_up_sem (&(oin->main_res)); @@ -2617,7 +2558,6 @@ static int ocfs_file_release (struct ino } do_parent_dec: - dentry = file->f_dentry; if (dentry && dentry->d_parent && dentry->d_parent->d_inode) { parent = dentry->d_parent->d_inode; @@ -2636,7 +2576,8 @@ bail: */ static int ocfs_flush (struct file *file) { - LOG_ENTRY_ARGS ("(0x%08x)\n", file); + LOG_ENTRY_ARGS ("(0x%08x, '%*s')\n", file, + file->f_dentry->d_name.len, file->f_dentry->d_name.name); LOG_EXIT_LONG (0); return 0; @@ -2648,7 +2589,8 @@ static int ocfs_flush (struct file *file */ static int ocfs_sync_file (struct file *file, struct dentry *dentry, int datasync) { - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d)\n", file, dentry, datasync); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, %d, '%*s')\n", file, dentry, datasync, + dentry->d_name.len, dentry->d_name.name); LOG_EXIT_LONG (0); return 0; @@ -2686,7 +2628,8 @@ static int ocfs_readdir (struct file *fi ub8 rootOff; int ret = 0; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", filp, dirent); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, '%*s')\n", filp, dirent, + filp->f_dentry->d_name.len, filp->f_dentry->d_name.name); if (!filp || !filp->f_dentry || @@ -2810,7 +2753,7 @@ static void ocfs_clear_inode (struct ino ocfs_file *ofile; struct list_head *iter; - LOG_TRACE_STR ("inode with oin : clear inode\n"); + LOG_TRACE_STR ("inode with oin : clear inode"); oin = ((ocfs_inode *)inode->u.generic_ip); osb = (ocfs_super *) oin->osb; @@ -2858,7 +2801,7 @@ static void ocfs_clear_inode (struct ino root_sect_node), &(fileOff), sizeof (ub8)); - kmem_cache_free (OcfsGlobalCtxt.lockres_cache, FoundResource); + ocfs_free_lockres(FoundResource); } } else { LOG_TRACE_STR @@ -2905,8 +2848,10 @@ static int ocfs_setattr (struct dentry * ocfs_inode *oin = NULL; ocfs_super *osb = NULL; ub8 parentOff, fileOff; + ocfs_file_entry *fe=NULL; - LOG_ENTRY_ARGS ("(0x%08x)\n", dentry); + LOG_ENTRY_ARGS ("(0x%08x, '%*s')\n", dentry, + dentry->d_name.len, dentry->d_name.name); osb = ((ocfs_super *)(inode->i_sb->u.generic_sbp)); @@ -2914,6 +2859,11 @@ static int ocfs_setattr (struct dentry * LOG_ERROR_STR ("bad inode or root inode"); goto bail2; } + if (dentry == inode->i_sb->s_root) { + LOG_ERROR_STR("changes to root inode not allowed"); + goto bail2; + } + parentInode = dentry->d_parent->d_inode; atomic_inc (&parentInode->i_count); newsize = attr->ia_size; @@ -2948,6 +2898,8 @@ static int ocfs_setattr (struct dentry * } if (attr->ia_valid & ATTR_SIZE) { + ub4 flags; + if (oin != NULL) { ocfs_down_sem (&(oin->main_res), true); if (OIN_NEEDS_VERIFICATION (oin)) { @@ -2965,9 +2917,16 @@ static int ocfs_setattr (struct dentry * ocfs_up_sem (&(oin->main_res)); } - status = - ocfs_create_modify_file (osb, parentOff, oin, NULL, newsize, - &fileOff, FLAG_FILE_EXTEND, NULL, NULL); +#ifdef RECLAIM_SPACE_ON_TRUNCATE + if (inode->i_size > newsize) + flags = FLAG_FILE_TRUNCATE; + else +#endif + flags = FLAG_FILE_EXTEND; + + status = ocfs_create_modify_file (osb, parentOff, oin, + NULL, newsize, &fileOff, + flags, NULL, NULL); if (status < 0) { LOG_TRACE_ARGS ("Failed to extend file to %u.%u !!!\n", @@ -2978,6 +2937,11 @@ static int ocfs_setattr (struct dentry * if (oin != NULL) { ocfs_down_sem (&(oin->main_res), true); +#ifdef RECLAIM_SPACE_ON_TRUNCATE + if (flags==FLAG_FILE_TRUNCATE) { + ocfs_delete_all_extent_maps(oin); + } +#endif inode->i_size = newsize; ocfs_up_sem (&(oin->main_res)); } @@ -2988,7 +2952,7 @@ static int ocfs_setattr (struct dentry * /* if directory, put FILE_ENTRY ptr into fileOff */ if (S_ISDIR (inode->i_mode)) - ocfs_linux_get_dir_entry_offset (osb, &fileOff, parentOff, &(dentry->d_name), NULL); + ocfs_linux_get_dir_entry_offset (osb, &fileOff, parentOff, &(dentry->d_name), &fe); status = -EFAIL; if (fileOff != -1) @@ -3006,6 +2970,8 @@ static int ocfs_setattr (struct dentry * bail: atomic_dec (&parentInode->i_count); bail2: + if (fe!=NULL) + ocfs_release_file_entry(fe); LOG_EXIT_LONG (error); return error; } /* ocfs_setattr */ @@ -3020,7 +2986,8 @@ static int ocfs_getattr (struct dentry * struct inode *inode; struct super_block *sb = dentry->d_inode->i_sb; - LOG_ENTRY_ARGS ("(0x%08x, 0x%08x)\n", dentry, attr); + LOG_ENTRY_ARGS ("(0x%08x, 0x%08x, '%*s')\n", dentry, attr, + dentry->d_name.len, dentry->d_name.name); inode = dentry->d_inode; if (inode == NULL || !inode_data_is_oin (inode)) @@ -3042,118 +3009,123 @@ static int ocfs_getattr (struct dentry * */ static int ocfs_dentry_revalidate (struct dentry *dentry, int flags) { - int ret = 0; - - return 1; - - LOG_ENTRY_ARGS ("(0x%08x, %d)\n", dentry, flags); -#if 0 - ocfs_file_entry *fe; + int ret = 0; /* if all else fails, just return false */ + ocfs_file_entry *fe = NULL; struct inode *inode; ocfs_inode *oin; ocfs_super *osb; ub8 off; ocfs_find_inode_args args; + struct qstr q; + + LOG_ENTRY_ARGS ("(0x%08x, %d, '%*s')\n", dentry, flags, + dentry->d_name.len, dentry->d_name.name); - if ((inode = dentry->d_inode) != NULL) { - osb = ((ocfs_super *)(inode->i_sb->u.generic_sbp); - if (inode_data_is_oin (inode)) { - oin = ((ocfs_inode *)inode->u.generic_ip); - ocfs_down_sem (&(oin->main_res), true); - oin->bNeedsVerification = true; - ocfs_up_sem (&(oin->main_res)); - UPDATE_OIN (oin); - ret = 1; - goto bail; - } else if (S_ISDIR (inode->i_mode)) { - ub8 parentOff; - - ret = 0; - if (osb->oin_root_dir->inode == inode) - goto bail; + if ((inode = dentry->d_inode) == NULL || + (osb = (ocfs_super *)(inode->i_sb->u.generic_sbp)) == NULL) + goto bail; - if (ocfs_linux_get_inode_offset - (dentry->d_parent->d_inode, &parentOff, NULL)) { - if (ocfs_linux_get_dir_entry_offset - (osb, &off, parentOff, &(dentry->d_name), &fe)) { - args.offset = fe->this_sector; - args.entry = fe; - ocfs_read_inode2 (inode, - (void *) &args); - ocfs_release_file_entry (fe); - ret = 1; - } - } - goto bail; - } else { - if (ocfs_linux_get_inode_offset (inode, &off, NULL)) { - ret = 0; - fe = ocfs_allocate_file_entry (); - if (fe) { - if (ocfs_read_file_entry - ((ocfs_super *)(inode->i_sb->u.generic_sbp), - fe, - off) >= 0) { - args.offset = fe->this_sector; - args.entry = fe; - ocfs_read_inode2 (inode, - (void *) - &args); - ret = 1; - } - ocfs_release_file_entry (fe); - } - goto bail; - } - } - } + /* check for oin */ + if (inode_data_is_oin (inode)) { + ocfs_lock_res *res; + ret = 1; /* with an oin we cannot fail revalidate */ + oin = ((ocfs_inode *)inode->u.generic_ip); - ret = 0; /* just return false */ + if (ocfs_lookup_sector_node (osb, oin->file_disk_off, &res)==0) { + /* if I hold cache lock, no revalidate needed */ + if (res->lock_type == OCFS_DLM_ENABLE_CACHE_LOCK && + res->master_node_num == osb->node_num) { + goto bail; + } + } + /* hit the disk */ + /* TODO: optimize */ + ocfs_down_sem (&(oin->main_res), true); + oin->needs_verification = true; + ocfs_up_sem (&(oin->main_res)); + UPDATE_OIN (oin); + goto bail; + } + + /* no oin for this dentry, must hit the disk */ + /* TODO: optimize */ + if (S_ISDIR (inode->i_mode)) { + ub8 parentOff; + struct inode *parent = dentry->d_parent->d_inode; + + /* shouldn't be revalidating root dir, need offset to parent, and fe for this dir */ + if (osb->oin_root_dir->inode == inode || + !ocfs_linux_get_inode_offset (parent, &parentOff, NULL) || + !ocfs_linux_get_dir_entry_offset (osb, &off, parentOff, &(dentry->d_name), &fe)) + goto bail; + } else if (ocfs_linux_get_inode_offset (inode, &off, NULL)) { + if (ocfs_force_get_file_entry (osb, &fe, off, true) < 0) + goto bail; + } else { + /* icky failure case :( */ + goto bail; + } - bail: - return ret; -#endif + /* we now have a file entry to call read_inode */ + q.name = fe->filename; + q.len = strlen(fe->filename); + if (fe->sync_flags & OCFS_SYNC_FLAG_MARK_FOR_DELETION || + fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED || + fe->sync_flags & OCFS_SYNC_FLAG_DELETED || + ocfs_compare_qstr(&dentry->d_name, &q) != 0) { + LOG_TRACE_STR("found the file entry, but it has been deleted or renamed!"); + ret = 0; /* it is now officially stale :) */ + } else { + args.offset = fe->this_sector; + args.entry = fe; + ocfs_read_inode2 (inode, (void *) &args); + ret = 1; + } + +bail: + if (fe) + ocfs_release_file_entry (fe); LOG_EXIT_LONG (ret); return ret; } /* ocfs_dentry_revalidate */ #if 0 -/* - * ocfs_inode_revalidate() - * - */ -static int ocfs_inode_revalidate (struct dentry *dentry) -{ - int ret; /* -ESTALE */ - struct inode *inode; - ocfs_inode *oin; - int status; - - LOG_ENTRY_ARGS ("(0x%08x)\n", dentry); - - ret = 0; - inode = dentry->d_inode; - if (inode == NULL || !inode_data_is_oin (inode)) - goto bail; - oin = ((ocfs_inode *)inode->u.generic_ip); - if (oin == (ocfs_super *)(dentry->d_inode->i_sb->u.generic_sbp)->oin_root_dir) - goto bail; - if (oin != NULL) -// if (OIN_NEEDS_VERIFICATION(oin)) - { - LOG_TRACE_STR ("OIN needs verification"); - status = ocfs_verify_update_oin (oin->osb, oin); - if (status < 0) { - LOG_ERROR_STR ("ocfs_verify_update_oin failed!"); - ret = -ESTALE; - goto bail; - } - } - - bail: - LOG_EXIT_LONG (ret); - return ret; -} /* ocfs_inode_revalidate */ + /* + * ocfs_inode_revalidate() + * + */ + static int ocfs_inode_revalidate (struct dentry *dentry) + { + int ret; /* -ESTALE */ + struct inode *inode; + ocfs_inode *oin; + int status; + + LOG_ENTRY_ARGS ("(0x%08x)\n", dentry); + + ret = 0; + inode = dentry->d_inode; + if (inode == NULL || !inode_data_is_oin (inode)) + goto bail; + oin = ((ocfs_inode *)inode->u.generic_ip); + if (oin == (ocfs_super *)(dentry->d_inode->i_sb->u.generic_sbp)->oin_root_dir) + goto bail; + if (oin != NULL) + // if (OIN_NEEDS_VERIFICATION(oin)) + { + LOG_TRACE_STR ("OIN needs verification"); + status = ocfs_verify_update_oin (oin->osb, oin); + if (status < 0) { + LOG_ERROR_STR ("ocfs_verify_update_oin failed!"); + ret = -ESTALE; + goto bail; + } + } + + bail: + LOG_EXIT_LONG (ret); + return ret; + } /* ocfs_inode_revalidate */ #endif /* @@ -3165,6 +3137,13 @@ void ocfs_release_cached_oin (ocfs_super bool bAcquiredOIN = false; ocfs_lock_res *lockResource = NULL; static spinlock_t lamelock = SPIN_LOCK_UNLOCKED; + ub4 vallen; + ocfs_lock_res *val=NULL; + struct dentry *dentry; + struct list_head *iter; + struct inode *inode; + int refcount = 0; + LOG_ENTRY_ARGS ("(oin = 0x%08x)\n", oin); @@ -3174,8 +3153,18 @@ void ocfs_release_cached_oin (ocfs_super ocfs_down_sem (&(oin->main_res), true); bAcquiredOIN = true; + inode = oin->inode; + + if (inode) { + list_for_each (iter, &(inode->i_dentry)) { + dentry = list_entry (iter, struct dentry, d_alias); + refcount += atomic_read(&dentry->d_count); + } + } + - if (oin->open_hndl_cnt != 0 || oin->oin_flags & OCFS_OIN_IN_USE) { + if (refcount != 0 || oin->open_hndl_cnt != 0 || + oin->oin_flags & OCFS_OIN_IN_USE) { if (bAcquiredOIN) { ocfs_up_sem (&(oin->main_res)); bAcquiredOIN = false; @@ -3184,58 +3173,50 @@ void ocfs_release_cached_oin (ocfs_super } else { OCFS_SET_FLAG (oin->oin_flags, OCFS_OIN_IN_TEARDOWN); - ocfs_delete_name (oin); - if (bAcquiredOIN) { ocfs_up_sem (&(oin->main_res)); bAcquiredOIN = false; } - lockResource = (ocfs_lock_res *) oin->lock_res; - if (lockResource != NULL) { - if (lockResource->signature != 0x55AA) { - LOG_ERROR_STR("invalid lock resource"); - goto finito; - } + if (lockResource == NULL) + goto bail; - if (lockResource->sector_num != 0) { - if (lockResource->oin == oin) { - lockResource->oin = NULL; - if (lockResource->in_cache_list) { - list_del (& - (lockResource-> - cache_list)); - lockResource->in_cache_list = - false; - } + if (lockResource->signature != 0x55AA) { + LOG_ERROR_STR("Invalid lock resource"); + goto bail; + } - ocfs_safefree (lockResource->voted_event); + if (lockResource->sector_num == 0 || lockResource->oin != oin) + goto bail; - if (!HASHTABLE_DESTROYED - (&(osb->root_sect_node))) { - ocfs_hash_del (& - (osb-> - root_sect_node), - &(lockResource-> - sector_num), - sizeof (ub8)); - } else { - LOG_TRACE_STR - ("hashtable already destroyed! continuing."); - } - kmem_cache_free (OcfsGlobalCtxt.lockres_cache, lockResource); + lockResource->oin = NULL; + if (lockResource->in_cache_list) { + list_del (& (lockResource-> cache_list)); + lockResource->in_cache_list = false; + } - oin->lock_res = NULL; - } - } + if (HASHTABLE_DESTROYED(&(osb->root_sect_node))) { + LOG_TRACE_STR ("hashtable already destroyed! Continuing."); + oin->lock_res = NULL; + goto bail; } - finito: + + if (ocfs_hash_get(&(osb->root_sect_node), + &(lockResource->sector_num), sizeof (ub8), + (void **)&val, &vallen) && val==lockResource) { + ocfs_hash_del (&(osb->root_sect_node), + &(val->sector_num), sizeof (ub8)); + ocfs_free_lockres(val); + } else { + LOG_ERROR_ARGS("lockres (0x%08x) is not in the hash! " \ + "(val=%p)\n", lockResource, val); + } + oin->lock_res = NULL; } bail: - spin_unlock (&lamelock); - + spin_unlock (&lamelock); LOG_EXIT (); return; } /* ocfs_release_cached_oin */ @@ -3246,24 +3227,23 @@ void ocfs_release_cached_oin (ocfs_super */ int ocfs_initialize_mem_lists (void) { - OcfsGlobalCtxt.oin_cache = - kmem_cache_create ("oin_cache", - sizeof (ocfs_inode) + OCFS_POINTER_SIZE, - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - OcfsGlobalCtxt.ofile_cache = - kmem_cache_create ("ofile_cache", - sizeof (ocfs_file) + OCFS_POINTER_SIZE, - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - OcfsGlobalCtxt.lockres_cache = - kmem_cache_create ("lockres_cache", - sizeof (ocfs_lock_res) + OCFS_POINTER_SIZE, - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + OcfsGlobalCtxt.oin_cache = kmem_cache_create ("oin_cache", + sizeof (ocfs_inode) + OCFS_POINTER_SIZE, 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + + OcfsGlobalCtxt.ofile_cache = kmem_cache_create ("ofile_cache", + sizeof (ocfs_file) + OCFS_POINTER_SIZE, 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + + OcfsGlobalCtxt.lockres_cache = kmem_cache_create ("lockres_cache", + sizeof (ocfs_lock_res) + OCFS_POINTER_SIZE, 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + OcfsGlobalCtxt.fe_cache = kmem_cache_create ("fileentry_cache", - OCFS_SECTOR_SIZE, - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + OCFS_SECTOR_SIZE, 0, SLAB_HWCACHE_ALIGN, NULL, NULL); OCFS_SET_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_MEM_LISTS_INITIALIZED); + return 0; } /* ocfs_initialize_mem_lists */ @@ -3317,10 +3297,18 @@ int ocfs_remount (struct super_block *sb gid_t gid = current->fsgid; int length; bool c; + bool reclaim_id; + ocfs_file_entry *fe = NULL; LOG_ENTRY (); - ocfs_parse_options (data, &uid, &gid, &c); + fe = ocfs_allocate_file_entry(); + if (fe == NULL) { + ret = -ENOMEM; + goto bail; + } + + ocfs_parse_options (data, &uid, &gid, &c, &reclaim_id); osb = (ocfs_super *)(sb->u.generic_sbp); if (!c) { @@ -3340,10 +3328,9 @@ int ocfs_remount (struct super_block *sb goto bail; } - status = - ocfs_acquire_lock (osb, OCFS_VOLUME_LOCK_OFFSET, - OCFS_DLM_EXCLUSIVE_LOCK, FLAG_FILE_CREATE, &lr, - NULL); + status = ocfs_acquire_lock (osb, OCFS_VOLUME_LOCK_OFFSET, + OCFS_DLM_EXCLUSIVE_LOCK, FLAG_FILE_CREATE, + &lr, fe); if (status < 0) { LOG_ERROR_STR ("failed to get lock on OCFS_VOLUME_LOCK_OFFSET"); ret = -EBUSY; @@ -3355,9 +3342,8 @@ int ocfs_remount (struct super_block *sb bool save = osb->cache_fs; osb->cache_fs = false; - status = - ocfs_read_disk (osb, buffer, length, - osb->vol_layout.publ_sect_off); + status = ocfs_read_disk (osb, buffer, length, + osb->vol_layout.publ_sect_off); osb->cache_fs = save; } if (status >= 0) { @@ -3384,14 +3370,16 @@ int ocfs_remount (struct super_block *sb } ocfs_safefree (buffer); - status = - ocfs_release_lock (osb, OCFS_VOLUME_LOCK_OFFSET, - OCFS_DLM_EXCLUSIVE_LOCK, 0, lr); + status = ocfs_release_lock (osb, OCFS_VOLUME_LOCK_OFFSET, + OCFS_DLM_EXCLUSIVE_LOCK, 0, lr, fe); if (ret == 0) { osb->cache_fs = true; } bail: + if (fe) + ocfs_release_file_entry(fe); + LOG_EXIT_LONG (ret); return ret; } /* ocfs_remount */ diff -urNp x-ref/fs/ocfs/Linux/ocfsmount.c x/fs/ocfs/Linux/ocfsmount.c --- x-ref/fs/ocfs/Linux/ocfsmount.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/ocfsmount.c Mon Oct 21 04:41:19 2002 @@ -145,7 +145,7 @@ static bool is_exclusive_node_alive (str * ocfs_mount_volume() * */ -int ocfs_mount_volume (struct super_block *sb) +int ocfs_mount_volume (struct super_block *sb, bool reclaim_id) { int status = 0; ocfs_super *osb; @@ -153,15 +153,12 @@ int ocfs_mount_volume (struct super_bloc ocfs_vol_disk_hdr *volDiskHdr; ocfs_vol_label *volLabel; int sectsize; - -#if defined(DLM_THREAD_PER_VOLUME) int child_pid; -#endif LOG_ENTRY (); /* TODO: not using this yet, EVERYTHING assumes 512! */ - sectsize = 512; + sectsize = OCFS_SECTOR_SIZE; status = ocfs_read_disk_header (&buffer, sb); if (status < 0) { @@ -173,15 +170,16 @@ int ocfs_mount_volume (struct super_bloc if (volDiskHdr->excl_mount != NOT_MOUNTED_EXCLUSIVE) { if (is_exclusive_node_alive (sb, volDiskHdr)) { - LOG_ERROR_ARGS - ("Cannot mount. Another node (%d) has this volume mounted exclusive.\n", - volDiskHdr->excl_mount); + LOG_ERROR_ARGS ("Cannot mount. Node %d has this " + "volume mounted exclusive.\n", + volDiskHdr->excl_mount); status = -EACCES; goto leave; } else { - LOG_ERROR_ARGS - ("Cannot mount. Node %d mounted this volume exclusive, but has DIED! Please recover.\n", - volDiskHdr->excl_mount); + LOG_ERROR_ARGS ("Cannot mount. Node %d mounted this " + "volume exclusive, but has DIED! " + "Please recover.\n", + volDiskHdr->excl_mount); status = -EACCES; goto leave; } @@ -223,11 +221,20 @@ int ocfs_mount_volume (struct super_bloc sb->u.generic_sbp = (void *)osb; osb->sb = sb; + osb->reclaim_id = reclaim_id; + status = ocfs_initialize_osb (osb, volDiskHdr, volLabel, sectsize); if (status < 0) { LOG_ERROR_STATUS (status); goto leave; } + + if (osb->vol_layout.root_start_off == 0 && osb->node_num != 0) { + LOG_ERROR_ARGS("The volume must be mounted by node 0 before it can " + "be used and you are node %u\n", osb->node_num); + status = -EINVAL; + goto leave; + } osb->sect_size = sectsize; @@ -243,9 +250,8 @@ int ocfs_mount_volume (struct super_bloc } spin_unlock (&ProtectOSBId); -#if defined(DLM_THREAD_PER_VOLUME) - ocfs_down_sem (&(osb->osb_res), true); /* Launch the DLM thread for the mounted volume */ + ocfs_down_sem (&(osb->osb_res), true); child_pid = kernel_thread (ocfs_volume_thread, osb, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); if (child_pid < 0) { @@ -259,7 +265,6 @@ int ocfs_mount_volume (struct super_bloc osb->dlm_task = NULL; } ocfs_up_sem (&(osb->osb_res)); -#endif /* Add proc entry for this volume */ ocfs_proc_add_volume (osb); @@ -269,7 +274,7 @@ int ocfs_mount_volume (struct super_bloc GlobalMountCount++; if (GlobalMountCount == 1) { /* Start the ipcdlm */ - ocfs_init_ipc_dlm (NULL, OCFS_UDP); + ocfs_init_ipc_dlm (OCFS_UDP); OcfsIpcCtxt.init = true; } spin_unlock (&ProtectMountCount); @@ -299,13 +304,17 @@ int ocfs_mount_volume (struct super_bloc osb->vol_state = VOLUME_MOUNTED; leave: - /* Delete Device on Failure */ ocfs_safefree (buffer); LOG_EXIT_STATUS (status); return status; } /* ocfs_mount_volume */ +static void lockres_hash_free_func (const void *p) +{ + ocfs_free_lockres((ocfs_lock_res *)p); +} + /* * ocfs_dismount_volume() * @@ -316,9 +325,7 @@ int ocfs_dismount_volume (struct super_b bool AcquiredOSB = false; ocfs_super *osb = NULL; ocfs_inode *rootoin; -#if defined(DLM_THREAD_PER_VOLUME) int i; -#endif LOG_ENTRY_ARGS ("(0x%08x)\n", sb); @@ -366,7 +373,7 @@ int ocfs_dismount_volume (struct super_b ocfs_release_oin (rootoin, true); /* Destroy the Hash table */ - ocfs_hash_destroy (&(osb->root_sect_node), ocfs_free); + ocfs_hash_destroy (&(osb->root_sect_node), lockres_hash_free_func); /* Remove the proc element for this volume */ ocfs_proc_remove_volume (osb); @@ -375,7 +382,6 @@ int ocfs_dismount_volume (struct super_b OCFS_SET_FLAG (osb->osb_flags, OCFS_OSB_FLAGS_BEING_DISMOUNTED); osb->vol_state = VOLUME_BEING_DISMOUNTED; -#if defined(DLM_THREAD_PER_VOLUME) /* Wait for this volume's NM thread to exit */ if (osb->dlm_task) { LOG_TRACE_STR ("Waiting for nmthread to exit...."); @@ -383,7 +389,6 @@ int ocfs_dismount_volume (struct super_b wait_for_completion (&(osb->complete)); osb->dlm_task = NULL; } -#endif ocfs_down_sem (&(OcfsGlobalCtxt.res), true); vfree (osb->cluster_bitmap.buf); diff -urNp x-ref/fs/ocfs/Linux/ocfsport.c x/fs/ocfs/Linux/ocfsport.c --- x-ref/fs/ocfs/Linux/ocfsport.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/ocfsport.c Mon Oct 21 04:41:19 2002 @@ -34,7 +34,6 @@ #define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_PORT #ifndef DEBUGOCFS -extern struct list_head item_list; static int get_overlap_type (ub8 new, ub8 newend, ub8 exist, ub8 existend); static bool OcfsCoalesceExtentMapEntry (ocfs_extent_map * map, sb8 virtual, sb8 physical, sb8 sectorcount); @@ -63,6 +62,7 @@ void ocfs_init_sem (ocfs_sem * res) * sem, it blocks waiting for the sem to be released. * ocfs_up_sem() decrements the count by 1, if the owning * process releases the sem. The sem is released when the counter hits 0. + * NT Port leftover, we want to get rid of this as soon as possible */ bool ocfs_down_sem (ocfs_sem * res, bool wait) { @@ -75,23 +75,32 @@ bool ocfs_down_sem (ocfs_sem * res, bool goto bail; } +#define WAIT_TILL_ACQUIRE(a) \ + do { \ + down(&((a)->sem)); \ + (a)->pid = current->pid; \ + (a)->count = 1; \ + } while(0) + if (res->pid == 0) { - down (&(res->sem)); - res->pid = current->pid; - res->count = 1; + if (wait) + WAIT_TILL_ACQUIRE(res); + else { + if (!down_trylock(&(res->sem))) { + res->pid = current->pid; + res->count = 1; + } + else + ret = false; + } } else { if (res->pid == current->pid) { res->count++; } else { - if (wait) { - /* wait till acquire */ - down (&(res->sem)); - res->pid = current->pid; - res->count = 1; - } else { + if (wait) + WAIT_TILL_ACQUIRE(res); + else ret = false; - goto bail; - } } } @@ -105,6 +114,8 @@ bool ocfs_down_sem (ocfs_sem * res, bool * * ocfs_up_sem() decrements the count by 1, if the owning * process releases the sem. The sem is released when the counter hits 0. + * Remained of NT port, we really really do not want this nesting + * but for now it's there, we'll clean it up */ void ocfs_up_sem (ocfs_sem * res) { @@ -140,43 +151,19 @@ int ocfs_del_sem (ocfs_sem * res) } /* ocfs_del_sem */ /* - * ocfs_wait() - * - * Timeout is in ms. - */ -int ocfs_wait (void *Object, bool Alertable, ub4 Timeout) -{ - ub4 remjiffies; - ub4 numjiffies = 0; - int status = 0; - - if (Timeout) { - /* 10ms = 1 jiffy, minimum resolution is one jiffy */ - numjiffies = (Timeout / 10); - numjiffies = (numjiffies < 1) ? 1 : numjiffies; - - remjiffies = - interruptible_sleep_on_timeout ((wait_queue_head_t *) - Object, numjiffies); - if (remjiffies == 0) - status = -ETIMEDOUT; - } else { - interruptible_sleep_on ((wait_queue_head_t *) Object); - } - - return status; -} /* ocfs_wait */ - -/* * ocfs_daemonize() * */ -void ocfs_daemonize (char *name) +void ocfs_daemonize (char *name, int len) { sigset_t tmpsig; - sprintf (current->comm, name); daemonize (); + reparent_to_init (); + + if (len > 15) + BUG(); + strncpy (current->comm, name, len); /* Block all signals except SIGKILL, SIGSTOP, SIGHUP and SIGINT */ spin_lock_irq (¤t->sigmask_lock); @@ -195,15 +182,21 @@ void ocfs_daemonize (char *name) bool ocfs_get_task (pid_t pid, struct task_struct ** task) { struct task_struct *p; + int ret = false; + #warning This is slow. + #warning this needs to go in a cleanup + read_lock(&tasklist_lock); for_each_task (p) { if (p->pid == pid) { *task = p; - return true; + ret = true; + break; } } + read_unlock(&tasklist_lock); - return false; + return ret; } /* ocfs_get_task */ @@ -211,6 +204,9 @@ bool ocfs_get_task (pid_t pid, struct ta * ocfs_sleep() * * The interval time is in milliseconds + * + * This function needs to be removed. + * Instead call schedule_timeout() directly and handle signals. */ int ocfs_sleep (ub4 ms) { @@ -219,13 +215,11 @@ int ocfs_sleep (ub4 ms) LOG_ENTRY (); /* 10ms = 1 jiffy, minimum resolution is one jiffy */ - numJiffies = ms / 10; + numJiffies = ms * HZ / 1000; numJiffies = (numJiffies < 1) ? 1 : numJiffies; - while (numJiffies) { - set_current_state (TASK_INTERRUPTIBLE); - numJiffies = schedule_timeout (numJiffies); - } + set_current_state (TASK_INTERRUPTIBLE); + numJiffies = schedule_timeout (numJiffies); LOG_EXIT (); return 0; @@ -246,11 +240,79 @@ void ocfs_print_qstr (struct qstr *x) printk ("%s", tmp); } /* ocfs_print_qstr */ + #ifdef OCFS_LINUX_MEM_DEBUG #define SUPER_VERBOSE_MEM_DEBUG 1 #endif /* + * ocfs_dbg_slab_alloc() + * + */ +void *ocfs_dbg_slab_alloc (kmem_cache_t *slab, char *file, int line) +{ + void *m; + m = kmem_cache_alloc(slab, GFP_NOFS); +#ifdef OCFS_LINUX_MEM_DEBUG + if (m == NULL) { + LOG_ERROR_ARGS("failed to alloc from slab = %p\n", slab); + } else { + alloc_item *new; + new = kmalloc (sizeof (alloc_item), GFP_NOFS); + new->type = SLAB_ITEM; + new->address = m; + new->u.slab = slab; + snprintf (new->tag, 30, "%d:%s", line, file); + new->tag[29] = '\0'; + list_add (&new->list, &OcfsGlobalCtxt.item_list); +#ifdef SUPER_VERBOSE_MEM_DEBUG + LOG_TRACE_ARGS (" + %x (%p, '%s')\n", m, slab, new->tag); +#endif + } +#endif + return m; +} /* ocfs_dbg_slab_alloc */ + +/* + * ocfs_dbg_slab_free() + * + */ +void ocfs_dbg_slab_free (kmem_cache_t *slab, void *m) +{ + +#ifdef OCFS_LINUX_MEM_DEBUG + struct list_head *iter; + alloc_item *item = NULL; + bool do_free = false; + + list_for_each (iter, &OcfsGlobalCtxt.item_list) { + item = list_entry (iter, alloc_item, list); + + if (item->address == m && item->type == SLAB_ITEM) { +#ifdef SUPER_VERBOSE_MEM_DEBUG + LOG_TRACE_ARGS (" - %x (%p, '%s')\n", m, item->u.slab, item->tag); +#endif + list_del (&item->list); + do_free = true; + break; + } + } + + if (do_free) { + kmem_cache_free(slab, m); + kfree (item); + return; + } + LOG_ERROR_ARGS ("tried to free mem never allocated: %x\n", m); +#endif +#ifndef OCFS_LINUX_MEM_DEBUG + kmem_cache_free(slab, m); +#endif +} /* ocfs_dbg_slab_free */ + + + +/* * ocfs_linux_dbg_alloc() * */ @@ -258,19 +320,19 @@ void *ocfs_linux_dbg_alloc (int Size, ch { void *m; - m = kmalloc (Size, GFP_KERNEL); + m = kmalloc (Size, GFP_NOFS); #ifdef OCFS_LINUX_MEM_DEBUG if (m == NULL) { LOG_ERROR_ARGS ("failed! (size=%d)\n", Size); } else { alloc_item *new; - - new = kmalloc (sizeof (alloc_item), GFP_KERNEL); + new = kmalloc (sizeof (alloc_item), GFP_NOFS); + new->type = KMALLOC_ITEM; new->address = m; - new->length = Size; + new->u.length = Size; snprintf (new->tag, 30, "%d:%s", line, file); new->tag[29] = '\0'; - list_add (&new->list, &item_list); + list_add (&new->list, &OcfsGlobalCtxt.item_list); #ifdef SUPER_VERBOSE_MEM_DEBUG LOG_TRACE_ARGS (" + %x (%d, '%s')\n", m, Size, new->tag); #endif @@ -288,21 +350,27 @@ void ocfs_linux_dbg_free (const void *Bu #ifdef OCFS_LINUX_MEM_DEBUG struct list_head *iter; + alloc_item *item = NULL; + bool do_free = false; - list_for_each (iter, &item_list) { - alloc_item *item = list_entry (iter, alloc_item, list); + list_for_each (iter, &OcfsGlobalCtxt.item_list) { + item = list_entry (iter, alloc_item, list); - if (item->address == Buffer) { + if (item->address == Buffer && item->type == KMALLOC_ITEM) { #ifdef SUPER_VERBOSE_MEM_DEBUG LOG_TRACE_ARGS (" - %x (%d, '%s')\n", Buffer, - item->length, item->tag); + item->u.length, item->tag); #endif - kfree (Buffer); list_del (&item->list); - kfree (item); - return; + do_free = true; + break; } } + if (do_free) { + kfree (Buffer); + kfree (item); + return; + } LOG_ERROR_ARGS ("tried to free mem never allocated: %x\n", Buffer); #endif #ifndef OCFS_LINUX_MEM_DEBUG @@ -369,7 +437,7 @@ bool ocfs_linux_get_dir_entry_offset (oc if (fileEntry) *fileEntry = ent; else - ocfs_safefree (ent); + ocfs_release_file_entry (ent); } return (*off != -1); } /* ocfs_linux_get_dir_entry_offset */ @@ -444,6 +512,7 @@ void ocfs_extent_map_destroy (ocfs_exten OCFS_ASSERT (map != NULL); if (map->initialized) { spin_lock(&(map->lock)); + #warning RACE! need to retest map->initialized here! map->capacity = 0; map->count = 0; ocfs_safefree (map->buf); @@ -466,6 +535,7 @@ ub4 ocfs_extent_map_get_count (ocfs_exte LOG_ENTRY (); OCFS_ASSERT (map != NULL); + #warning this locking almost has to be a bug spin_lock(&(map->lock)); ret = map->count; spin_unlock(&(map->lock)); diff -urNp x-ref/fs/ocfs/Linux/ocfsproc.c x/fs/ocfs/Linux/ocfsproc.c --- x-ref/fs/ocfs/Linux/ocfsproc.c Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Linux/ocfsproc.c Mon Oct 21 04:41:19 2002 @@ -31,10 +31,6 @@ /* Tracing */ #define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_PROC -#ifdef OCFS_LINUX_MEM_DEBUG -extern struct list_head item_list; -#endif - /* * ocfs_proc_init() * @@ -49,18 +45,12 @@ int ocfs_proc_init (void) } *p, ProcList[] = { - { - "ocfs/version", NULL, ocfs_proc_version} - , { - "ocfs/nodename", NULL, ocfs_proc_nodename} - , + { "ocfs/version", NULL, ocfs_proc_version }, + { "ocfs/nodename", NULL, ocfs_proc_nodename }, #ifdef OCFS_LINUX_MEM_DEBUG - { - "ocfs/memallocs", NULL, ocfs_proc_memallocs} - , + { "ocfs/memallocs", NULL, ocfs_proc_memallocs }, #endif - { - NULL,} + { NULL, } }; LOG_ENTRY (); @@ -126,31 +116,58 @@ static int ocfs_proc_calc_metrics (char static int ocfs_proc_memallocs (char *page, char **start, off_t off, int count, int *eof, void *data) { - int ret; + int ret, proc_overflow=0; struct list_head *iter; alloc_item *item; int len = 0; + char *slabname; LOG_ENTRY (); - ret = sprintf ((char *) (page + len), "Pointer \tSize\tLine:File\n"); + ret = sprintf ((char *) (page + len), "%8s %10s %30s\n", + "Pointer", "Size/Slab", "Line:File"); + printk("%8s %10s %30s\n", "Pointer", "Size/Slab", "Line:File"); len += ret; - list_for_each (iter, &item_list) { + list_for_each (iter, &OcfsGlobalCtxt.item_list) { if (len >= 4096) { + proc_overflow = 1; LOG_ERROR_STR ("proc file truncated!\n"); - break; } item = list_entry (iter, alloc_item, list); - ret = - snprintf ((char *) (page + len), (4096 - len), - "%x\t%d\t%s\n", item->address, item->length, - item->tag); + switch (item->type) + { + case SLAB_ITEM: + if (item->u.slab==OcfsGlobalCtxt.oin_cache) + slabname="oin"; + else if (item->u.slab==OcfsGlobalCtxt.ofile_cache) + slabname="ofile"; + else if (item->u.slab==OcfsGlobalCtxt.lockres_cache) + slabname="lockres"; + else if (item->u.slab==OcfsGlobalCtxt.fe_cache) + slabname="fe"; + else + slabname="unknown"; + if (!proc_overflow) + ret = snprintf ((char *) (page + len), (4096 - len), + "%08x %8s %s\n", item->address, slabname, item->tag); + printk("%08x %8s %s\n", item->address, slabname, item->tag); + break; + case KMALLOC_ITEM: + case VMALLOC_ITEM: + default: + if (!proc_overflow) + ret = snprintf ((char *) (page + len), (4096 - len), + "%08x %8d %s\n", item->address, item->u.length, item->tag); + printk("%08x %8d %s\n", item->address, item->u.length, item->tag); + break; + } if (ret < 0) { LOG_ERROR_STR ("uh oh failed to sprintf!\n"); - break; + proc_overflow = 1; } - len += ret; + if (!proc_overflow) + len += ret; } ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len); diff -urNp x-ref/fs/ocfs/Makefile x/fs/ocfs/Makefile --- x-ref/fs/ocfs/Makefile Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/Makefile Mon Oct 21 04:41:19 2002 @@ -20,10 +20,9 @@ CPARTNER = \ Linux/ocfsproc.c \ Linux/ocfsioctl.c \ Linux/ocfshash.c \ - Linux/ocfsconf.c \ - Linux/ocfsfilp.c \ Linux/ocfsipc.c \ Common/ocfsgennm.c \ + Common/ocfsheartbeat.c \ Common/ocfsgenvote.c \ Common/ocfsgendlm.c diff -urNp x-ref/fs/ocfs/ocfs.conf x/fs/ocfs/ocfs.conf --- x-ref/fs/ocfs/ocfs.conf Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/ocfs.conf Mon Oct 21 04:41:19 2002 @@ -1,32 +1,38 @@ # -# ocfs ipcdlm config +# ocfs config # Ensure this file exists in /etc -# Each ipcdelm section contains the information for the local node only. -# Currently, the dlm will pick up upto three local transports... -# will use only the active ones. # -ipcdlm: - ip_address = - ip_port = - subnet_mask = - type = udp - hostname = - active = yes - -ipcdlm: - ip_address = - ip_port = - subnet_mask = - type = udp - hostname = - active = no - -ipcdlm: - ip_address = - ip_port = - subnet_mask = - type = udp - hostname = - active = no +# node_name serves as a human readable node identifier. +# Eventhough the value should be unique in a cluster, ocfs does +# not require it as it does not use it to identify a node. +node_name = +# node_number allows the admin to specify the preferred node +# number for the node in the cluster. However, it works only +# as a suggestion, because ocfs will not fail the mount if the +# specified node number is already occupied. Note that the +# node number of a node can vary across mounts. +node_number = + +# debug_context is used for debugging. It will work only if the +# driver is built with -DTRACE +debug_context = + +# debug_level is used for debugging. It will work only if the +# driver is built with -DTRACE +debug_level = + +# ip_address specifies the host's ip address used by ocfs listener. +ip_address = + +# ip_port specifies the host's ip port used by ocfs listener. +ip_port = + +# guid parameter is added by the ocfs_uid_gen script and is used +# by ocfs to uniquely identify a node. The guid holds within it +# the mac address for the interface identified by the ip address. +# Run "ocfs_uid_gen -c" to generate the the guid the first time. +# If the mac address changes, run "ocfs_uid_gen -r" to get a +# new number guid, followed by "mount -o reclaimid" to reclaim the +# original node number for each volume. diff -urNp x-ref/fs/ocfs/ocfsver.sh x/fs/ocfs/ocfsver.sh --- x-ref/fs/ocfs/ocfsver.sh Mon Oct 21 04:38:41 2002 +++ x/fs/ocfs/ocfsver.sh Mon Oct 21 04:41:19 2002 @@ -25,7 +25,7 @@ cat > $VERFILE <