Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw

* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw: (57 commits) [GFS2] Accept old format NFS filehandles [GFS2] Small fixes to logging code [DLM] dump more lock values [GFS2] Remove i_mode passing from NFS File Handle [GFS2] Obtaining no_formal_ino from directory entry [GFS2] git-gfs2-nmw-build-fix [GFS2] System won't suspend with GFS2 file system mounted [GFS2] remounting w/o acl option leaves acls enabled [GFS2] inode size inconsistency [DLM] Telnet to port 21064 can stop all lockspaces [GFS2] Fix gfs2_block_truncate_page err return [GFS2] Addendum to the journaled file/unmount patch [GFS2] Simplify multiple glock aquisition [GFS2] assertion failure after writing to journaled file, umount [GFS2] Use zero_user_page() in stuffed_readpage() [GFS2] Remove bogus '\0' in rgrp.c [GFS2] Journaled file write/unstuff bug [DLM] don't require FS flag on all nodes [GFS2] Fix deallocation issues [GFS2] return conflicts for GETLK ...
2007-07-10 13:56:13 -07:00 · 2007-07-10 13:56:13 -07:00 · 1b21f458dd
--- a/fs/dlm/Makefile
+++ b/fs/dlm/Makefile
@ -8,6 +8,7 @@ dlm-y :=			ast.o \
 				member.o \
 				memory.o \
 				midcomms.o \
+				netlink.o \
 				lowcomms.o \
 				rcom.o \
 				recover.o \
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@ -90,6 +90,7 @@ struct cluster {
 	unsigned int cl_scan_secs;
 	unsigned int cl_log_debug;
 	unsigned int cl_protocol;
+	unsigned int cl_timewarn_cs;
 };

 enum {
@ -103,6 +104,7 @@ enum {
 	CLUSTER_ATTR_SCAN_SECS,
 	CLUSTER_ATTR_LOG_DEBUG,
 	CLUSTER_ATTR_PROTOCOL,
+	CLUSTER_ATTR_TIMEWARN_CS,
 };

 struct cluster_attribute {
@ -162,6 +164,7 @@ CLUSTER_ATTR(toss_secs, 1);
 CLUSTER_ATTR(scan_secs, 1);
 CLUSTER_ATTR(log_debug, 0);
 CLUSTER_ATTR(protocol, 0);
+CLUSTER_ATTR(timewarn_cs, 1);

 static struct configfs_attribute *cluster_attrs[] = {
 	[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@ -174,6 +177,7 @@ static struct configfs_attribute *cluster_attrs[] = {
 	[CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
 	[CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
 	[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
+	[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
 	NULL,
 };

@ -429,6 +433,8 @@ static struct config_group *make_cluster(struct config_group *g,
 	cl->cl_toss_secs = dlm_config.ci_toss_secs;
 	cl->cl_scan_secs = dlm_config.ci_scan_secs;
 	cl->cl_log_debug = dlm_config.ci_log_debug;
+	cl->cl_protocol = dlm_config.ci_protocol;
+	cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;

 	space_list = &sps->ss_group;
 	comm_list = &cms->cs_group;
@ -748,9 +754,16 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len)

 static struct space *get_space(char *name)
 {
+	struct config_item *i;
+
 	if (!space_list)
 		return NULL;
-	return to_space(config_group_find_obj(space_list, name));
+
+	down(&space_list->cg_subsys->su_sem);
+	i = config_group_find_obj(space_list, name);
+	up(&space_list->cg_subsys->su_sem);
+
+	return to_space(i);
 }

 static void put_space(struct space *sp)
@ -776,20 +789,20 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
 			if (cm->nodeid != nodeid)
 				continue;
 			found = 1;
+			config_item_get(i);
 			break;
 		} else {
 			if (!cm->addr_count ||
 			    memcmp(cm->addr[0], addr, sizeof(*addr)))
 				continue;
 			found = 1;
+			config_item_get(i);
 			break;
 		}
 	}
 	up(&clusters_root.subsys.su_sem);

-	if (found)
-		config_item_get(i);
-	else
+	if (!found)
 		cm = NULL;
 	return cm;
 }
@ -909,6 +922,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 #define DEFAULT_SCAN_SECS          5
 #define DEFAULT_LOG_DEBUG          0
 #define DEFAULT_PROTOCOL           0
+#define DEFAULT_TIMEWARN_CS      500 /* 5 sec = 500 centiseconds */

 struct dlm_config_info dlm_config = {
 	.ci_tcp_port = DEFAULT_TCP_PORT,
@ -920,6 +934,7 @@ struct dlm_config_info dlm_config = {
 	.ci_toss_secs = DEFAULT_TOSS_SECS,
 	.ci_scan_secs = DEFAULT_SCAN_SECS,
 	.ci_log_debug = DEFAULT_LOG_DEBUG,
-	.ci_protocol = DEFAULT_PROTOCOL
+	.ci_protocol = DEFAULT_PROTOCOL,
+	.ci_timewarn_cs = DEFAULT_TIMEWARN_CS
 };

--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@ -27,6 +27,7 @@ struct dlm_config_info {
 	int ci_scan_secs;
 	int ci_log_debug;
 	int ci_protocol;
+	int ci_timewarn_cs;
 };

 extern struct dlm_config_info dlm_config;
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@ -17,6 +17,7 @@
 #include <linux/debugfs.h>

 #include "dlm_internal.h"
+#include "lock.h"

 #define DLM_DEBUG_BUF_LEN 4096
 static char debug_buf[DLM_DEBUG_BUF_LEN];
@ -26,6 +27,8 @@ static struct dentry *dlm_root;

 struct rsb_iter {
 	int entry;
+	int locks;
+	int header;
 	struct dlm_ls *ls;
 	struct list_head *next;
 	struct dlm_rsb *rsb;
@ -57,8 +60,8 @@ static char *print_lockmode(int mode)
 	}
 }

-static void print_lock(struct seq_file *s, struct dlm_lkb *lkb,
-		       struct dlm_rsb *res)
+static void print_resource_lock(struct seq_file *s, struct dlm_lkb *lkb,
+				struct dlm_rsb *res)
 {
 	seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));

@ -85,6 +88,8 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
 	struct dlm_lkb *lkb;
 	int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;

+	lock_rsb(res);
+
 	seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length);
 	for (i = 0; i < res->res_length; i++) {
 		if (isprint(res->res_name[i]))
@ -129,15 +134,15 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
 	/* Print the locks attached to this resource */
 	seq_printf(s, "Granted Queue\n");
 	list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue)
-		print_lock(s, lkb, res);
+		print_resource_lock(s, lkb, res);

 	seq_printf(s, "Conversion Queue\n");
 	list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue)
-		print_lock(s, lkb, res);
+		print_resource_lock(s, lkb, res);

 	seq_printf(s, "Waiting Queue\n");
 	list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue)
-		print_lock(s, lkb, res);
+		print_resource_lock(s, lkb, res);

 	if (list_empty(&res->res_lookup))
 		goto out;
@ -151,6 +156,61 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
 		seq_printf(s, "\n");
 	}
 out:
+	unlock_rsb(res);
+	return 0;
+}
+
+static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *r)
+{
+	struct dlm_user_args *ua;
+	unsigned int waiting = 0;
+	uint64_t xid = 0;
+
+	if (lkb->lkb_flags & DLM_IFL_USER) {
+		ua = (struct dlm_user_args *) lkb->lkb_astparam;
+		if (ua)
+			xid = ua->xid;
+	}
+
+	if (lkb->lkb_timestamp)
+		waiting = jiffies_to_msecs(jiffies - lkb->lkb_timestamp);
+
+	/* id nodeid remid pid xid exflags flags sts grmode rqmode time_ms
+	   r_nodeid r_len r_name */
+
+	seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %u %u %d \"%s\"\n",
+		   lkb->lkb_id,
+		   lkb->lkb_nodeid,
+		   lkb->lkb_remid,
+		   lkb->lkb_ownpid,
+		   (unsigned long long)xid,
+		   lkb->lkb_exflags,
+		   lkb->lkb_flags,
+		   lkb->lkb_status,
+		   lkb->lkb_grmode,
+		   lkb->lkb_rqmode,
+		   waiting,
+		   r->res_nodeid,
+		   r->res_length,
+		   r->res_name);
+}
+
+static int print_locks(struct dlm_rsb *r, struct seq_file *s)
+{
+	struct dlm_lkb *lkb;
+
+	lock_rsb(r);
+
+	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue)
+		print_lock(s, lkb, r);
+
+	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue)
+		print_lock(s, lkb, r);
+
+	list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue)
+		print_lock(s, lkb, r);
+
+	unlock_rsb(r);
 	return 0;
 }

@ -166,6 +226,9 @@ static int rsb_iter_next(struct rsb_iter *ri)
 			read_lock(&ls->ls_rsbtbl[i].lock);
 			if (!list_empty(&ls->ls_rsbtbl[i].list)) {
 				ri->next = ls->ls_rsbtbl[i].list.next;
+				ri->rsb = list_entry(ri->next, struct dlm_rsb,
+							res_hashchain);
+				dlm_hold_rsb(ri->rsb);
 				read_unlock(&ls->ls_rsbtbl[i].lock);
 				break;
 			}
@ -176,6 +239,7 @@ static int rsb_iter_next(struct rsb_iter *ri)
 		if (ri->entry >= ls->ls_rsbtbl_size)
 			return 1;
 	} else {
+		struct dlm_rsb *old = ri->rsb;
 		i = ri->entry;
 		read_lock(&ls->ls_rsbtbl[i].lock);
 		ri->next = ri->next->next;
@ -184,11 +248,14 @@ static int rsb_iter_next(struct rsb_iter *ri)
 			ri->next = NULL;
 			ri->entry++;
 			read_unlock(&ls->ls_rsbtbl[i].lock);
+			dlm_put_rsb(old);
 			goto top;
                }
+		ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
+		dlm_hold_rsb(ri->rsb);
 		read_unlock(&ls->ls_rsbtbl[i].lock);
+		dlm_put_rsb(old);
 	}
-	ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);

 	return 0;
 }
@ -202,7 +269,7 @@ static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls)
 {
 	struct rsb_iter *ri;

-	ri = kmalloc(sizeof *ri, GFP_KERNEL);
+	ri = kzalloc(sizeof *ri, GFP_KERNEL);
 	if (!ri)
 		return NULL;

@ -260,7 +327,17 @@ static int rsb_seq_show(struct seq_file *file, void *iter_ptr)
 {
 	struct rsb_iter *ri = iter_ptr;

-	print_resource(ri->rsb, file);
+	if (ri->locks) {
+		if (ri->header) {
+			seq_printf(file, "id nodeid remid pid xid exflags flags "
+					 "sts grmode rqmode time_ms r_nodeid "
+					 "r_len r_name\n");
+			ri->header = 0;
+		}
+		print_locks(ri->rsb, file);
+	} else {
+		print_resource(ri->rsb, file);
+	}

 	return 0;
 }
@ -295,6 +372,83 @@ static const struct file_operations rsb_fops = {
 	.release = seq_release
 };

+/*
+ * Dump state in compact per-lock listing
+ */
+
+static struct rsb_iter *locks_iter_init(struct dlm_ls *ls, loff_t *pos)
+{
+	struct rsb_iter *ri;
+
+	ri = kzalloc(sizeof *ri, GFP_KERNEL);
+	if (!ri)
+		return NULL;
+
+	ri->ls = ls;
+	ri->entry = 0;
+	ri->next = NULL;
+	ri->locks = 1;
+
+	if (*pos == 0)
+		ri->header = 1;
+
+	if (rsb_iter_next(ri)) {
+		rsb_iter_free(ri);
+		return NULL;
+	}
+
+	return ri;
+}
+
+static void *locks_seq_start(struct seq_file *file, loff_t *pos)
+{
+	struct rsb_iter *ri;
+	loff_t n = *pos;
+
+	ri = locks_iter_init(file->private, pos);
+	if (!ri)
+		return NULL;
+
+	while (n--) {
+		if (rsb_iter_next(ri)) {
+			rsb_iter_free(ri);
+			return NULL;
+		}
+	}
+
+	return ri;
+}
+
+static struct seq_operations locks_seq_ops = {
+	.start = locks_seq_start,
+	.next  = rsb_seq_next,
+	.stop  = rsb_seq_stop,
+	.show  = rsb_seq_show,
+};
+
+static int locks_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int ret;
+
+	ret = seq_open(file, &locks_seq_ops);
+	if (ret)
+		return ret;
+
+	seq = file->private_data;
+	seq->private = inode->i_private;
+
+	return 0;
+}
+
+static const struct file_operations locks_fops = {
+	.owner   = THIS_MODULE,
+	.open    = locks_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
 /*
 * dump lkb's on the ls_waiters list
 */
@ -362,6 +516,20 @@ int dlm_create_debug_file(struct dlm_ls *ls)
 		return -ENOMEM;
 	}

+	memset(name, 0, sizeof(name));
+	snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_locks", ls->ls_name);
+
+	ls->ls_debug_locks_dentry = debugfs_create_file(name,
+							S_IFREG | S_IRUGO,
+							dlm_root,
+							ls,
+							&locks_fops);
+	if (!ls->ls_debug_locks_dentry) {
+		debugfs_remove(ls->ls_debug_waiters_dentry);
+		debugfs_remove(ls->ls_debug_rsb_dentry);
+		return -ENOMEM;
+	}
+
 	return 0;
 }

@ -371,6 +539,8 @@ void dlm_delete_debug_file(struct dlm_ls *ls)
 		debugfs_remove(ls->ls_debug_rsb_dentry);
 	if (ls->ls_debug_waiters_dentry)
 		debugfs_remove(ls->ls_debug_waiters_dentry);
+	if (ls->ls_debug_locks_dentry)
+		debugfs_remove(ls->ls_debug_locks_dentry);
 }

 int dlm_register_debugfs(void)
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@ -151,6 +151,7 @@ struct dlm_args {
 	void			*bastaddr;
 	int			mode;
 	struct dlm_lksb		*lksb;
+	unsigned long		timeout;
 };


@ -213,6 +214,9 @@ struct dlm_args {
 #define DLM_IFL_OVERLAP_UNLOCK  0x00080000
 #define DLM_IFL_OVERLAP_CANCEL  0x00100000
 #define DLM_IFL_ENDOFLIFE	0x00200000
+#define DLM_IFL_WATCH_TIMEWARN	0x00400000
+#define DLM_IFL_TIMEOUT_CANCEL	0x00800000
+#define DLM_IFL_DEADLOCK_CANCEL	0x01000000
 #define DLM_IFL_USER		0x00000001
 #define DLM_IFL_ORPHAN		0x00000002

@ -243,6 +247,9 @@ struct dlm_lkb {
 	struct list_head	lkb_wait_reply;	/* waiting for remote reply */
 	struct list_head	lkb_astqueue;	/* need ast to be sent */
 	struct list_head	lkb_ownqueue;	/* list of locks for a process */
+	struct list_head	lkb_time_list;
+	unsigned long		lkb_timestamp;
+	unsigned long		lkb_timeout_cs;

 	char			*lkb_lvbptr;
 	struct dlm_lksb		*lkb_lksb;      /* caller's status block */
@ -447,12 +454,16 @@ struct dlm_ls {
 	struct mutex		ls_orphans_mutex;
 	struct list_head	ls_orphans;

+	struct mutex		ls_timeout_mutex;
+	struct list_head	ls_timeout;
+
 	struct list_head	ls_nodes;	/* current nodes in ls */
 	struct list_head	ls_nodes_gone;	/* dead node list, recovery */
 	int			ls_num_nodes;	/* number of nodes in ls */
 	int			ls_low_nodeid;
 	int			ls_total_weight;
 	int			*ls_node_array;
+	gfp_t			ls_allocation;

 	struct dlm_rsb		ls_stub_rsb;	/* for returning errors */
 	struct dlm_lkb		ls_stub_lkb;	/* for returning errors */
@ -460,9 +471,12 @@ struct dlm_ls {

 	struct dentry		*ls_debug_rsb_dentry; /* debugfs */
 	struct dentry		*ls_debug_waiters_dentry; /* debugfs */
+	struct dentry		*ls_debug_locks_dentry; /* debugfs */

 	wait_queue_head_t	ls_uevent_wait;	/* user part of join/leave */
 	int			ls_uevent_result;
+	struct completion	ls_members_done;
+	int			ls_members_result;

 	struct miscdevice       ls_device;

@ -472,6 +486,7 @@ struct dlm_ls {
 	struct task_struct	*ls_recoverd_task;
 	struct mutex		ls_recoverd_active;
 	spinlock_t		ls_recover_lock;
+	unsigned long		ls_recover_begin; /* jiffies timestamp */
 	uint32_t		ls_recover_status; /* DLM_RS_ */
 	uint64_t		ls_recover_seq;
 	struct dlm_recover	*ls_recover_args;
@ -501,6 +516,7 @@ struct dlm_ls {
 #define LSFL_RCOM_READY		3
 #define LSFL_RCOM_WAIT		4
 #define LSFL_UEVENT_WAIT	5
+#define LSFL_TIMEWARN		6

 /* much of this is just saving user space pointers associated with the
   lock that we pass back to the user lib with an ast */
@ -518,6 +534,7 @@ struct dlm_user_args {
 	void __user		*castaddr;
 	void __user		*bastparam;
 	void __user		*bastaddr;
+	uint64_t		xid;
 };

 #define DLM_PROC_FLAGS_CLOSING 1
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@ -82,10 +82,13 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode);
 static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
 static int send_remove(struct dlm_rsb *r);
 static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
+static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
 static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
 				    struct dlm_message *ms);
 static int receive_extralen(struct dlm_message *ms);
 static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
+static void del_timeout(struct dlm_lkb *lkb);
+void dlm_timeout_warn(struct dlm_lkb *lkb);

 /*
 * Lock compatibilty matrix - thanks Steve
@ -194,17 +197,17 @@ void dlm_dump_rsb(struct dlm_rsb *r)

 /* Threads cannot use the lockspace while it's being recovered */

-static inline void lock_recovery(struct dlm_ls *ls)
+static inline void dlm_lock_recovery(struct dlm_ls *ls)
 {
 	down_read(&ls->ls_in_recovery);
 }

-static inline void unlock_recovery(struct dlm_ls *ls)
+void dlm_unlock_recovery(struct dlm_ls *ls)
 {
 	up_read(&ls->ls_in_recovery);
 }

-static inline int lock_recovery_try(struct dlm_ls *ls)
+int dlm_lock_recovery_try(struct dlm_ls *ls)
 {
 	return down_read_trylock(&ls->ls_in_recovery);
 }
@ -286,8 +289,22 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
 	if (is_master_copy(lkb))
 		return;

+	del_timeout(lkb);
+
 	DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););

+	/* if the operation was a cancel, then return -DLM_ECANCEL, if a
+	   timeout caused the cancel then return -ETIMEDOUT */
+	if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
+		lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
+		rv = -ETIMEDOUT;
+	}
+
+	if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
+		lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
+		rv = -EDEADLK;
+	}
+
 	lkb->lkb_lksb->sb_status = rv;
 	lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;

@ -581,6 +598,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
 	kref_init(&lkb->lkb_ref);
 	INIT_LIST_HEAD(&lkb->lkb_ownqueue);
 	INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
+	INIT_LIST_HEAD(&lkb->lkb_time_list);

 	get_random_bytes(&bucket, sizeof(bucket));
 	bucket &= (ls->ls_lkbtbl_size - 1);
@ -985,15 +1003,136 @@ void dlm_scan_rsbs(struct dlm_ls *ls)
 {
 	int i;

-	if (dlm_locking_stopped(ls))
-		return;
-
 	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
 		shrink_bucket(ls, i);
+		if (dlm_locking_stopped(ls))
+			break;
 		cond_resched();
 	}
 }

+static void add_timeout(struct dlm_lkb *lkb)
+{
+	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+
+	if (is_master_copy(lkb)) {
+		lkb->lkb_timestamp = jiffies;
+		return;
+	}
+
+	if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
+	    !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
+		lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
+		goto add_it;
+	}
+	if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
+		goto add_it;
+	return;
+
+ add_it:
+	DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb););
+	mutex_lock(&ls->ls_timeout_mutex);
+	hold_lkb(lkb);
+	lkb->lkb_timestamp = jiffies;
+	list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout);
+	mutex_unlock(&ls->ls_timeout_mutex);
+}
+
+static void del_timeout(struct dlm_lkb *lkb)
+{
+	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+
+	mutex_lock(&ls->ls_timeout_mutex);
+	if (!list_empty(&lkb->lkb_time_list)) {
+		list_del_init(&lkb->lkb_time_list);
+		unhold_lkb(lkb);
+	}
+	mutex_unlock(&ls->ls_timeout_mutex);
+}
+
+/* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and
+   lkb_lksb_timeout without lock_rsb?  Note: we can't lock timeout_mutex
+   and then lock rsb because of lock ordering in add_timeout.  We may need
+   to specify some special timeout-related bits in the lkb that are just to
+   be accessed under the timeout_mutex. */
+
+void dlm_scan_timeout(struct dlm_ls *ls)
+{
+	struct dlm_rsb *r;
+	struct dlm_lkb *lkb;
+	int do_cancel, do_warn;
+
+	for (;;) {
+		if (dlm_locking_stopped(ls))
+			break;
+
+		do_cancel = 0;
+		do_warn = 0;
+		mutex_lock(&ls->ls_timeout_mutex);
+		list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
+
+			if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
+			    time_after_eq(jiffies, lkb->lkb_timestamp +
+					  lkb->lkb_timeout_cs * HZ/100))
+				do_cancel = 1;
+
+			if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
+			    time_after_eq(jiffies, lkb->lkb_timestamp +
+				   	   dlm_config.ci_timewarn_cs * HZ/100))
+				do_warn = 1;
+
+			if (!do_cancel && !do_warn)
+				continue;
+			hold_lkb(lkb);
+			break;
+		}
+		mutex_unlock(&ls->ls_timeout_mutex);
+
+		if (!do_cancel && !do_warn)
+			break;
+
+		r = lkb->lkb_resource;
+		hold_rsb(r);
+		lock_rsb(r);
+
+		if (do_warn) {
+			/* clear flag so we only warn once */
+			lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
+			if (!(lkb->lkb_exflags & DLM_LKF_TIMEOUT))
+				del_timeout(lkb);
+			dlm_timeout_warn(lkb);
+		}
+
+		if (do_cancel) {
+			log_debug(ls, "timeout cancel %x node %d %s",
+				  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+			lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
+			lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL;
+			del_timeout(lkb);
+			_cancel_lock(r, lkb);
+		}
+
+		unlock_rsb(r);
+		unhold_rsb(r);
+		dlm_put_lkb(lkb);
+	}
+}
+
+/* This is only called by dlm_recoverd, and we rely on dlm_ls_stop() stopping
+   dlm_recoverd before checking/setting ls_recover_begin. */
+
+void dlm_adjust_timeouts(struct dlm_ls *ls)
+{
+	struct dlm_lkb *lkb;
+	long adj = jiffies - ls->ls_recover_begin;
+
+	ls->ls_recover_begin = 0;
+	mutex_lock(&ls->ls_timeout_mutex);
+	list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
+		lkb->lkb_timestamp += adj;
+	mutex_unlock(&ls->ls_timeout_mutex);
+}
+
 /* lkb is master or local copy */

 static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
@ -1275,10 +1414,8 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
 * queue for one resource.  The granted mode of each lock blocks the requested
 * mode of the other lock."
 *
- * Part 2: if the granted mode of lkb is preventing the first lkb in the
- * convert queue from being granted, then demote lkb (set grmode to NL).
- * This second form requires that we check for conv-deadlk even when
- * now == 0 in _can_be_granted().
+ * Part 2: if the granted mode of lkb is preventing an earlier lkb in the
+ * convert queue from being granted, then deadlk/demote lkb.
 *
 * Example:
 * Granted Queue: empty
@ -1287,41 +1424,52 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
 *
 * The first lock can't be granted because of the granted mode of the second
 * lock and the second lock can't be granted because it's not first in the
- * list.  We demote the granted mode of the second lock (the lkb passed to this
- * function).
+ * list.  We either cancel lkb's conversion (PR->EX) and return EDEADLK, or we
+ * demote the granted mode of lkb (from PR to NL) if it has the CONVDEADLK
+ * flag set and return DEMOTED in the lksb flags.
 *
- * After the resolution, the "grant pending" function needs to go back and try
- * to grant locks on the convert queue again since the first lock can now be
- * granted.
+ * Originally, this function detected conv-deadlk in a more limited scope:
+ * - if !modes_compat(lkb1, lkb2) && !modes_compat(lkb2, lkb1), or
+ * - if lkb1 was the first entry in the queue (not just earlier), and was
+ *   blocked by the granted mode of lkb2, and there was nothing on the
+ *   granted queue preventing lkb1 from being granted immediately, i.e.
+ *   lkb2 was the only thing preventing lkb1 from being granted.
+ *
+ * That second condition meant we'd only say there was conv-deadlk if
+ * resolving it (by demotion) would lead to the first lock on the convert
+ * queue being granted right away.  It allowed conversion deadlocks to exist
+ * between locks on the convert queue while they couldn't be granted anyway.
+ *
+ * Now, we detect and take action on conversion deadlocks immediately when
+ * they're created, even if they may not be immediately consequential.  If
+ * lkb1 exists anywhere in the convert queue and lkb2 comes in with a granted
+ * mode that would prevent lkb1's conversion from being granted, we do a
+ * deadlk/demote on lkb2 right away and don't let it onto the convert queue.
+ * I think this means that the lkb_is_ahead condition below should always
+ * be zero, i.e. there will never be conv-deadlk between two locks that are
+ * both already on the convert queue.
 */

-static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb)
+static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2)
 {
-	struct dlm_lkb *this, *first = NULL, *self = NULL;
+	struct dlm_lkb *lkb1;
+	int lkb_is_ahead = 0;

-	list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) {
-		if (!first)
-			first = this;
-		if (this == lkb) {
-			self = lkb;
+	list_for_each_entry(lkb1, &r->res_convertqueue, lkb_statequeue) {
+		if (lkb1 == lkb2) {
+			lkb_is_ahead = 1;
 			continue;
 		}

-		if (!modes_compat(this, lkb) && !modes_compat(lkb, this))
-			return 1;
+		if (!lkb_is_ahead) {
+			if (!modes_compat(lkb2, lkb1))
+				return 1;
+		} else {
+			if (!modes_compat(lkb2, lkb1) &&
+			    !modes_compat(lkb1, lkb2))
+				return 1;
+		}
 	}
-
-	/* if lkb is on the convert queue and is preventing the first
-	   from being granted, then there's deadlock and we demote lkb.
-	   multiple converting locks may need to do this before the first
-	   converting lock can be granted. */
-
-	if (self && self != first) {
-		if (!modes_compat(lkb, first) &&
-		    !queue_conflict(&rsb->res_grantqueue, first))
-			return 1;
-	}
-
 	return 0;
 }

@ -1450,42 +1598,57 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
 	if (!now && !conv && list_empty(&r->res_convertqueue) &&
 	    first_in_list(lkb, &r->res_waitqueue))
 		return 1;
-
 out:
-	/*
-	 * The following, enabled by CONVDEADLK, departs from VMS.
-	 */
-
-	if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) &&
-	    conversion_deadlock_detect(r, lkb)) {
-		lkb->lkb_grmode = DLM_LOCK_NL;
-		lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
-	}
-
 	return 0;
 }

-/*
- * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a
- * simple way to provide a big optimization to applications that can use them.
- */
-
-static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
+static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
+			  int *err)
 {
-	uint32_t flags = lkb->lkb_exflags;
 	int rv;
 	int8_t alt = 0, rqmode = lkb->lkb_rqmode;
+	int8_t is_convert = (lkb->lkb_grmode != DLM_LOCK_IV);
+
+	if (err)
+		*err = 0;

 	rv = _can_be_granted(r, lkb, now);
 	if (rv)
 		goto out;

-	if (lkb->lkb_sbflags & DLM_SBF_DEMOTED)
-		goto out;
+	/*
+	 * The CONVDEADLK flag is non-standard and tells the dlm to resolve
+	 * conversion deadlocks by demoting grmode to NL, otherwise the dlm
+	 * cancels one of the locks.
+	 */

-	if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR)
+	if (is_convert && can_be_queued(lkb) &&
+	    conversion_deadlock_detect(r, lkb)) {
+		if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) {
+			lkb->lkb_grmode = DLM_LOCK_NL;
+			lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
+		} else if (!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
+			if (err)
+				*err = -EDEADLK;
+			else {
+				log_print("can_be_granted deadlock %x now %d",
+					  lkb->lkb_id, now);
+				dlm_dump_rsb(r);
+			}
+		}
+		goto out;
+	}
+
+	/*
+	 * The ALTPR and ALTCW flags are non-standard and tell the dlm to try
+	 * to grant a request in a mode other than the normal rqmode.  It's a
+	 * simple way to provide a big optimization to applications that can
+	 * use them.
+	 */
+
+	if (rqmode != DLM_LOCK_PR && (lkb->lkb_exflags & DLM_LKF_ALTPR))
 		alt = DLM_LOCK_PR;
-	else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW)
+	else if (rqmode != DLM_LOCK_CW && (lkb->lkb_exflags & DLM_LKF_ALTCW))
 		alt = DLM_LOCK_CW;

 	if (alt) {
@ -1500,10 +1663,20 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
 	return rv;
 }

+/* FIXME: I don't think that can_be_granted() can/will demote or find deadlock
+   for locks pending on the convert list.  Once verified (watch for these
+   log_prints), we should be able to just call _can_be_granted() and not
+   bother with the demote/deadlk cases here (and there's no easy way to deal
+   with a deadlk here, we'd have to generate something like grant_lock with
+   the deadlk error.) */
+
+/* returns the highest requested mode of all blocked conversions */
+
 static int grant_pending_convert(struct dlm_rsb *r, int high)
 {
 	struct dlm_lkb *lkb, *s;
 	int hi, demoted, quit, grant_restart, demote_restart;
+	int deadlk;

 	quit = 0;
 restart:
@ -1513,14 +1686,29 @@ static int grant_pending_convert(struct dlm_rsb *r, int high)

 	list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) {
 		demoted = is_demoted(lkb);
-		if (can_be_granted(r, lkb, 0)) {
+		deadlk = 0;
+
+		if (can_be_granted(r, lkb, 0, &deadlk)) {
 			grant_lock_pending(r, lkb);
 			grant_restart = 1;
-		} else {
-			hi = max_t(int, lkb->lkb_rqmode, hi);
-			if (!demoted && is_demoted(lkb))
-				demote_restart = 1;
+			continue;
 		}
+
+		if (!demoted && is_demoted(lkb)) {
+			log_print("WARN: pending demoted %x node %d %s",
+				  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+			demote_restart = 1;
+			continue;
+		}
+
+		if (deadlk) {
+			log_print("WARN: pending deadlock %x node %d %s",
+				  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+			dlm_dump_rsb(r);
+			continue;
+		}
+
+		hi = max_t(int, lkb->lkb_rqmode, hi);
 	}

 	if (grant_restart)
@ -1538,7 +1726,7 @@ static int grant_pending_wait(struct dlm_rsb *r, int high)
 	struct dlm_lkb *lkb, *s;

 	list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
-		if (can_be_granted(r, lkb, 0))
+		if (can_be_granted(r, lkb, 0, NULL))
 			grant_lock_pending(r, lkb);
                else
 			high = max_t(int, lkb->lkb_rqmode, high);
@ -1733,7 +1921,7 @@ static void confirm_master(struct dlm_rsb *r, int error)
 }

 static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
-			 int namelen, uint32_t parent_lkid, void *ast,
+			 int namelen, unsigned long timeout_cs, void *ast,
 			 void *astarg, void *bast, struct dlm_args *args)
 {
 	int rv = -EINVAL;
@ -1776,10 +1964,6 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
 	if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr)
 		goto out;

-	/* parent/child locks not yet supported */
-	if (parent_lkid)
-		goto out;
-
 	if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid)
 		goto out;

@ -1791,6 +1975,7 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
 	args->astaddr = ast;
 	args->astparam = (long) astarg;
 	args->bastaddr = bast;
+	args->timeout = timeout_cs;
 	args->mode = mode;
 	args->lksb = lksb;
 	rv = 0;
@ -1845,6 +2030,7 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
 	lkb->lkb_lksb = args->lksb;
 	lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
 	lkb->lkb_ownpid = (int) current->pid;
+	lkb->lkb_timeout_cs = args->timeout;
 	rv = 0;
 out:
 	return rv;
@ -1903,6 +2089,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
 		if (is_overlap(lkb))
 			goto out;

+		/* don't let scand try to do a cancel */
+		del_timeout(lkb);
+
 		if (lkb->lkb_flags & DLM_IFL_RESEND) {
 			lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
 			rv = -EBUSY;
@ -1934,6 +2123,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
 		if (is_overlap_unlock(lkb))
 			goto out;

+		/* don't let scand try to do a cancel */
+		del_timeout(lkb);
+
 		if (lkb->lkb_flags & DLM_IFL_RESEND) {
 			lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
 			rv = -EBUSY;
@ -1984,7 +2176,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
 {
 	int error = 0;

-	if (can_be_granted(r, lkb, 1)) {
+	if (can_be_granted(r, lkb, 1, NULL)) {
 		grant_lock(r, lkb);
 		queue_cast(r, lkb, 0);
 		goto out;
@ -1994,6 +2186,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
 		error = -EINPROGRESS;
 		add_lkb(r, lkb, DLM_LKSTS_WAITING);
 		send_blocking_asts(r, lkb);
+		add_timeout(lkb);
 		goto out;
 	}

@ -2009,16 +2202,32 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
 static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
 {
 	int error = 0;
+	int deadlk = 0;

 	/* changing an existing lock may allow others to be granted */

-	if (can_be_granted(r, lkb, 1)) {
+	if (can_be_granted(r, lkb, 1, &deadlk)) {
 		grant_lock(r, lkb);
 		queue_cast(r, lkb, 0);
 		grant_pending_locks(r);
 		goto out;
 	}

+	/* can_be_granted() detected that this lock would block in a conversion
+	   deadlock, so we leave it on the granted queue and return EDEADLK in
+	   the ast for the convert. */
+
+	if (deadlk) {
+		/* it's left on the granted queue */
+		log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s",
+			  lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status,
+			  lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name);
+		revert_lock(r, lkb);
+		queue_cast(r, lkb, -EDEADLK);
+		error = -EDEADLK;
+		goto out;
+	}
+
 	/* is_demoted() means the can_be_granted() above set the grmode
 	   to NL, and left us on the granted queue.  This auto-demotion
 	   (due to CONVDEADLK) might mean other locks, and/or this lock, are
@ -2041,6 +2250,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
 		del_lkb(r, lkb);
 		add_lkb(r, lkb, DLM_LKSTS_CONVERT);
 		send_blocking_asts(r, lkb);
+		add_timeout(lkb);
 		goto out;
 	}

@ -2274,7 +2484,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
 	if (!ls)
 		return -EINVAL;

-	lock_recovery(ls);
+	dlm_lock_recovery(ls);

 	if (convert)
 		error = find_lkb(ls, lksb->sb_lkid, &lkb);
@ -2284,7 +2494,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
 	if (error)
 		goto out;

-	error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast,
+	error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
 			      astarg, bast, &args);
 	if (error)
 		goto out_put;
@ -2299,10 +2509,10 @@ int dlm_lock(dlm_lockspace_t *lockspace,
 out_put:
 	if (convert || error)
 		__put_lkb(ls, lkb);
-	if (error == -EAGAIN)
+	if (error == -EAGAIN || error == -EDEADLK)
 		error = 0;
 out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	dlm_put_lockspace(ls);
 	return error;
 }
@ -2322,7 +2532,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
 	if (!ls)
 		return -EINVAL;

-	lock_recovery(ls);
+	dlm_lock_recovery(ls);

 	error = find_lkb(ls, lkid, &lkb);
 	if (error)
@ -2344,7 +2554,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
 out_put:
 	dlm_put_lkb(lkb);
 out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	dlm_put_lockspace(ls);
 	return error;
 }
@ -2384,7 +2594,7 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
 	   pass into lowcomms_commit and a message buffer (mb) that we
 	   write our data into */

-	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
+	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
 	if (!mh)
 		return -ENOBUFS;

@ -3111,9 +3321,10 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
 		lkb->lkb_remid = ms->m_lkid;
 		if (is_altmode(lkb))
 			munge_altmode(lkb, ms);
-		if (result)
+		if (result) {
 			add_lkb(r, lkb, DLM_LKSTS_WAITING);
-		else {
+			add_timeout(lkb);
+		} else {
 			grant_lock_pc(r, lkb, ms);
 			queue_cast(r, lkb, 0);
 		}
@ -3172,6 +3383,12 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
 		queue_cast(r, lkb, -EAGAIN);
 		break;

+	case -EDEADLK:
+		receive_flags_reply(lkb, ms);
+		revert_lock_pc(r, lkb);
+		queue_cast(r, lkb, -EDEADLK);
+		break;
+
 	case -EINPROGRESS:
 		/* convert was queued on remote master */
 		receive_flags_reply(lkb, ms);
@ -3179,6 +3396,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
 			munge_demoted(lkb, ms);
 		del_lkb(r, lkb);
 		add_lkb(r, lkb, DLM_LKSTS_CONVERT);
+		add_timeout(lkb);
 		break;

 	case 0:
@ -3298,8 +3516,7 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
 	case -DLM_ECANCEL:
 		receive_flags_reply(lkb, ms);
 		revert_lock_pc(r, lkb);
-		if (ms->m_result)
-			queue_cast(r, lkb, -DLM_ECANCEL);
+		queue_cast(r, lkb, -DLM_ECANCEL);
 		break;
 	case 0:
 		break;
@ -3424,7 +3641,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
 			}
 		}

-		if (lock_recovery_try(ls))
+		if (dlm_lock_recovery_try(ls))
 			break;
 		schedule();
 	}
@ -3503,7 +3720,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
 		log_error(ls, "unknown message type %d", ms->m_type);
 	}

-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 out:
 	dlm_put_lockspace(ls);
 	dlm_astd_wake();
@ -4034,13 +4251,13 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)

 int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
 		     int mode, uint32_t flags, void *name, unsigned int namelen,
-		     uint32_t parent_lkid)
+		     unsigned long timeout_cs)
 {
 	struct dlm_lkb *lkb;
 	struct dlm_args args;
 	int error;

-	lock_recovery(ls);
+	dlm_lock_recovery(ls);

 	error = create_lkb(ls, &lkb);
 	if (error) {
@ -4062,7 +4279,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
 	   When DLM_IFL_USER is set, the dlm knows that this is a userspace
 	   lock and that lkb_astparam is the dlm_user_args structure. */

-	error = set_lock_args(mode, &ua->lksb, flags, namelen, parent_lkid,
+	error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
 			      DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
 	lkb->lkb_flags |= DLM_IFL_USER;
 	ua->old_mode = DLM_LOCK_IV;
@ -4094,19 +4311,20 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
 	list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
 	spin_unlock(&ua->proc->locks_spin);
 out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	return error;
 }

 int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
-		     int mode, uint32_t flags, uint32_t lkid, char *lvb_in)
+		     int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
+		     unsigned long timeout_cs)
 {
 	struct dlm_lkb *lkb;
 	struct dlm_args args;
 	struct dlm_user_args *ua;
 	int error;

-	lock_recovery(ls);
+	dlm_lock_recovery(ls);

 	error = find_lkb(ls, lkid, &lkb);
 	if (error)
@ -4127,6 +4345,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	if (lvb_in && ua->lksb.sb_lvbptr)
 		memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);

+	ua->xid = ua_tmp->xid;
 	ua->castparam = ua_tmp->castparam;
 	ua->castaddr = ua_tmp->castaddr;
 	ua->bastparam = ua_tmp->bastparam;
@ -4134,19 +4353,19 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	ua->user_lksb = ua_tmp->user_lksb;
 	ua->old_mode = lkb->lkb_grmode;

-	error = set_lock_args(mode, &ua->lksb, flags, 0, 0, DLM_FAKE_USER_AST,
-			      ua, DLM_FAKE_USER_AST, &args);
+	error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
+			      DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
 	if (error)
 		goto out_put;

 	error = convert_lock(ls, lkb, &args);

-	if (error == -EINPROGRESS || error == -EAGAIN)
+	if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK)
 		error = 0;
 out_put:
 	dlm_put_lkb(lkb);
 out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	kfree(ua_tmp);
 	return error;
 }
@ -4159,7 +4378,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	struct dlm_user_args *ua;
 	int error;

-	lock_recovery(ls);
+	dlm_lock_recovery(ls);

 	error = find_lkb(ls, lkid, &lkb);
 	if (error)
@ -4194,7 +4413,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 out_put:
 	dlm_put_lkb(lkb);
 out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	kfree(ua_tmp);
 	return error;
 }
@ -4207,7 +4426,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	struct dlm_user_args *ua;
 	int error;

-	lock_recovery(ls);
+	dlm_lock_recovery(ls);

 	error = find_lkb(ls, lkid, &lkb);
 	if (error)
@ -4231,11 +4450,59 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 out_put:
 	dlm_put_lkb(lkb);
 out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	kfree(ua_tmp);
 	return error;
 }

+int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
+{
+	struct dlm_lkb *lkb;
+	struct dlm_args args;
+	struct dlm_user_args *ua;
+	struct dlm_rsb *r;
+	int error;
+
+	dlm_lock_recovery(ls);
+
+	error = find_lkb(ls, lkid, &lkb);
+	if (error)
+		goto out;
+
+	ua = (struct dlm_user_args *)lkb->lkb_astparam;
+
+	error = set_unlock_args(flags, ua, &args);
+	if (error)
+		goto out_put;
+
+	/* same as cancel_lock(), but set DEADLOCK_CANCEL after lock_rsb */
+
+	r = lkb->lkb_resource;
+	hold_rsb(r);
+	lock_rsb(r);
+
+	error = validate_unlock_args(lkb, &args);
+	if (error)
+		goto out_r;
+	lkb->lkb_flags |= DLM_IFL_DEADLOCK_CANCEL;
+
+	error = _cancel_lock(r, lkb);
+ out_r:
+	unlock_rsb(r);
+	put_rsb(r);
+
+	if (error == -DLM_ECANCEL)
+		error = 0;
+	/* from validate_unlock_args() */
+	if (error == -EBUSY)
+		error = 0;
+ out_put:
+	dlm_put_lkb(lkb);
+ out:
+	dlm_unlock_recovery(ls);
+	return error;
+}
+
 /* lkb's that are removed from the waiters list by revert are just left on the
   orphans list with the granted orphan locks, to be freed by purge */

@ -4314,12 +4581,13 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
 {
 	struct dlm_lkb *lkb, *safe;

-	lock_recovery(ls);
+	dlm_lock_recovery(ls);

 	while (1) {
 		lkb = del_proc_lock(ls, proc);
 		if (!lkb)
 			break;
+		del_timeout(lkb);
 		if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
 			orphan_proc_lock(ls, lkb);
 		else
@ -4347,7 +4615,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
 	}

 	mutex_unlock(&ls->ls_clear_proc_locks);
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 }

 static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
@ -4429,12 +4697,12 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
 	if (nodeid != dlm_our_nodeid()) {
 		error = send_purge(ls, nodeid, pid);
 	} else {
-		lock_recovery(ls);
+		dlm_lock_recovery(ls);
 		if (pid == current->pid)
 			purge_proc_locks(ls, proc);
 		else
 			do_purge(ls, nodeid, pid);
-		unlock_recovery(ls);
+		dlm_unlock_recovery(ls);
 	}
 	return error;
 }
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@ -24,6 +24,10 @@ void dlm_put_rsb(struct dlm_rsb *r);
 void dlm_hold_rsb(struct dlm_rsb *r);
 int dlm_put_lkb(struct dlm_lkb *lkb);
 void dlm_scan_rsbs(struct dlm_ls *ls);
+int dlm_lock_recovery_try(struct dlm_ls *ls);
+void dlm_unlock_recovery(struct dlm_ls *ls);
+void dlm_scan_timeout(struct dlm_ls *ls);
+void dlm_adjust_timeouts(struct dlm_ls *ls);

 int dlm_purge_locks(struct dlm_ls *ls);
 void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
@ -34,15 +38,18 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
 int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);

 int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
-	uint32_t flags, void *name, unsigned int namelen, uint32_t parent_lkid);
+	uint32_t flags, void *name, unsigned int namelen,
+	unsigned long timeout_cs);
 int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
-	int mode, uint32_t flags, uint32_t lkid, char *lvb_in);
+	int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
+	unsigned long timeout_cs);
 int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	uint32_t flags, uint32_t lkid, char *lvb_in);
 int dlm_user_cancel(struct dlm_ls *ls,  struct dlm_user_args *ua_tmp,
 	uint32_t flags, uint32_t lkid);
 int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
 	int nodeid, int pid);
+int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid);
 void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);

 static inline int is_master(struct dlm_rsb *r)
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@ -197,13 +197,24 @@ static int do_uevent(struct dlm_ls *ls, int in)
 	else
 		kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);

+	log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
+
+	/* dlm_controld will see the uevent, do the necessary group management
+	   and then write to sysfs to wake us */
+
 	error = wait_event_interruptible(ls->ls_uevent_wait,
 			test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
+
+	log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
+
 	if (error)
 		goto out;

 	error = ls->ls_uevent_result;
 out:
+	if (error)
+		log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
+			  error, ls->ls_uevent_result);
 	return error;
 }

@ -234,8 +245,13 @@ static int dlm_scand(void *data)
 	struct dlm_ls *ls;

 	while (!kthread_should_stop()) {
-		list_for_each_entry(ls, &lslist, ls_list)
-			dlm_scan_rsbs(ls);
+		list_for_each_entry(ls, &lslist, ls_list) {
+			if (dlm_lock_recovery_try(ls)) {
+				dlm_scan_rsbs(ls);
+				dlm_scan_timeout(ls);
+				dlm_unlock_recovery(ls);
+			}
+		}
 		schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
 	}
 	return 0;
@ -395,6 +411,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 {
 	struct dlm_ls *ls;
 	int i, size, error = -ENOMEM;
+	int do_unreg = 0;

 	if (namelen > DLM_LOCKSPACE_LEN)
 		return -EINVAL;
@ -417,11 +434,22 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 		goto out;
 	memcpy(ls->ls_name, name, namelen);
 	ls->ls_namelen = namelen;
-	ls->ls_exflags = flags;
 	ls->ls_lvblen = lvblen;
 	ls->ls_count = 0;
 	ls->ls_flags = 0;

+	if (flags & DLM_LSFL_TIMEWARN)
+		set_bit(LSFL_TIMEWARN, &ls->ls_flags);
+
+	if (flags & DLM_LSFL_FS)
+		ls->ls_allocation = GFP_NOFS;
+	else
+		ls->ls_allocation = GFP_KERNEL;
+
+	/* ls_exflags are forced to match among nodes, and we don't
+	   need to require all nodes to have TIMEWARN or FS set */
+	ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS));
+
 	size = dlm_config.ci_rsbtbl_size;
 	ls->ls_rsbtbl_size = size;

@ -461,6 +489,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 	mutex_init(&ls->ls_waiters_mutex);
 	INIT_LIST_HEAD(&ls->ls_orphans);
 	mutex_init(&ls->ls_orphans_mutex);
+	INIT_LIST_HEAD(&ls->ls_timeout);
+	mutex_init(&ls->ls_timeout_mutex);

 	INIT_LIST_HEAD(&ls->ls_nodes);
 	INIT_LIST_HEAD(&ls->ls_nodes_gone);
@ -477,6 +507,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,

 	init_waitqueue_head(&ls->ls_uevent_wait);
 	ls->ls_uevent_result = 0;
+	init_completion(&ls->ls_members_done);
+	ls->ls_members_result = -1;

 	ls->ls_recoverd_task = NULL;
 	mutex_init(&ls->ls_recoverd_active);
@ -513,32 +545,49 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 	error = dlm_recoverd_start(ls);
 	if (error) {
 		log_error(ls, "can't start dlm_recoverd %d", error);
-		goto out_rcomfree;
+		goto out_delist;
 	}

-	dlm_create_debug_file(ls);
-
 	error = kobject_setup(ls);
 	if (error)
-		goto out_del;
+		goto out_stop;

 	error = kobject_register(&ls->ls_kobj);
 	if (error)
-		goto out_del;
+		goto out_stop;
+
+	/* let kobject handle freeing of ls if there's an error */
+	do_unreg = 1;
+
+	/* This uevent triggers dlm_controld in userspace to add us to the
+	   group of nodes that are members of this lockspace (managed by the
+	   cluster infrastructure.)  Once it's done that, it tells us who the
+	   current lockspace members are (via configfs) and then tells the
+	   lockspace to start running (via sysfs) in dlm_ls_start(). */

 	error = do_uevent(ls, 1);
 	if (error)
-		goto out_unreg;
+		goto out_stop;
+
+	wait_for_completion(&ls->ls_members_done);
+	error = ls->ls_members_result;
+	if (error)
+		goto out_members;
+
+	dlm_create_debug_file(ls);
+
+	log_debug(ls, "join complete");

 	*lockspace = ls;
 	return 0;

- out_unreg:
-	kobject_unregister(&ls->ls_kobj);
- out_del:
-	dlm_delete_debug_file(ls);
+ out_members:
+	do_uevent(ls, 0);
+	dlm_clear_members(ls);
+	kfree(ls->ls_node_array);
+ out_stop:
 	dlm_recoverd_stop(ls);
- out_rcomfree:
+ out_delist:
 	spin_lock(&lslist_lock);
 	list_del(&ls->ls_list);
 	spin_unlock(&lslist_lock);
@ -550,7 +599,10 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 out_rsbfree:
 	kfree(ls->ls_rsbtbl);
 out_lsfree:
-	kfree(ls);
+	if (do_unreg)
+		kobject_unregister(&ls->ls_kobj);
+	else
+		kfree(ls);
 out:
 	module_put(THIS_MODULE);
 	return error;
@ -570,6 +622,8 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace,
 	error = new_lockspace(name, namelen, lockspace, flags, lvblen);
 	if (!error)
 		ls_count++;
+	else if (!ls_count)
+		threads_stop();
 out:
 	mutex_unlock(&ls_lock);
 	return error;
@ -696,7 +750,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
 	dlm_clear_members_gone(ls);
 	kfree(ls->ls_node_array);
 	kobject_unregister(&ls->ls_kobj);
-        /* The ls structure will be freed when the kobject is done with */
+	/* The ls structure will be freed when the kobject is done with */

 	mutex_lock(&ls_lock);
 	ls_count--;
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@ -260,7 +260,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
 static void lowcomms_data_ready(struct sock *sk, int count_unused)
 {
 	struct connection *con = sock2con(sk);
-	if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
+	if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
 		queue_work(recv_workqueue, &con->rwork);
 }

@ -268,7 +268,7 @@ static void lowcomms_write_space(struct sock *sk)
 {
 	struct connection *con = sock2con(sk);

-	if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
+	if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags))
 		queue_work(send_workqueue, &con->swork);
 }

@ -720,11 +720,17 @@ static int tcp_accept_from_sock(struct connection *con)
 			INIT_WORK(&othercon->rwork, process_recv_sockets);
 			set_bit(CF_IS_OTHERCON, &othercon->flags);
 			newcon->othercon = othercon;
+			othercon->sock = newsock;
+			newsock->sk->sk_user_data = othercon;
+			add_sock(newsock, othercon);
+			addcon = othercon;
+		}
+		else {
+			printk("Extra connection from node %d attempted\n", nodeid);
+			result = -EAGAIN;
+			mutex_unlock(&newcon->sock_mutex);
+			goto accept_err;
 		}
-		othercon->sock = newsock;
-		newsock->sk->sk_user_data = othercon;
-		add_sock(newsock, othercon);
-		addcon = othercon;
 	}
 	else {
 		newsock->sk->sk_user_data = newcon;
@ -1400,8 +1406,11 @@ void dlm_lowcomms_stop(void)
 	down(&connections_lock);
 	for (i = 0; i <= max_nodeid; i++) {
 		con = __nodeid2con(i, 0);
-		if (con)
+		if (con) {
 			con->flags |= 0xFF;
+			if (con->sock)
+				con->sock->sk->sk_user_data = NULL;
+		}
 	}
 	up(&connections_lock);

--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@ -25,6 +25,8 @@ void dlm_unregister_debugfs(void);
 static inline int dlm_register_debugfs(void) { return 0; }
 static inline void dlm_unregister_debugfs(void) { }
 #endif
+int dlm_netlink_init(void);
+void dlm_netlink_exit(void);

 static int __init init_dlm(void)
 {
@ -50,10 +52,16 @@ static int __init init_dlm(void)
 	if (error)
 		goto out_debug;

+	error = dlm_netlink_init();
+	if (error)
+		goto out_user;
+
 	printk("DLM (built %s %s) installed\n", __DATE__, __TIME__);

 	return 0;

+ out_user:
+	dlm_user_exit();
 out_debug:
 	dlm_unregister_debugfs();
 out_config:
@ -68,6 +76,7 @@ static int __init init_dlm(void)

 static void __exit exit_dlm(void)
 {
+	dlm_netlink_exit();
 	dlm_user_exit();
 	dlm_config_exit();
 	dlm_memory_exit();
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@ -233,6 +233,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
 	*neg_out = neg;

 	error = ping_members(ls);
+	if (!error || error == -EPROTO) {
+		/* new_lockspace() may be waiting to know if the config
+		   is good or bad */
+		ls->ls_members_result = error;
+		complete(&ls->ls_members_done);
+	}
 	if (error)
 		goto out;

@ -284,6 +290,9 @@ int dlm_ls_stop(struct dlm_ls *ls)
 	dlm_recoverd_suspend(ls);
 	ls->ls_recover_status = 0;
 	dlm_recoverd_resume(ls);
+
+	if (!ls->ls_recover_begin)
+		ls->ls_recover_begin = jiffies;
 	return 0;
 }

--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <net/genetlink.h>
+#include <linux/dlm.h>
+#include <linux/dlm_netlink.h>
+
+#include "dlm_internal.h"
+
+static uint32_t dlm_nl_seqnum;
+static uint32_t listener_nlpid;
+
+static struct genl_family family = {
+	.id		= GENL_ID_GENERATE,
+	.name		= DLM_GENL_NAME,
+	.version	= DLM_GENL_VERSION,
+};
+
+static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
+{
+	struct sk_buff *skb;
+	void *data;
+
+	skb = genlmsg_new(size, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	/* add the message headers */
+	data = genlmsg_put(skb, 0, dlm_nl_seqnum++, &family, 0, cmd);
+	if (!data) {
+		nlmsg_free(skb);
+		return -EINVAL;
+	}
+
+	*skbp = skb;
+	return 0;
+}
+
+static struct dlm_lock_data *mk_data(struct sk_buff *skb)
+{
+	struct nlattr *ret;
+
+	ret = nla_reserve(skb, DLM_TYPE_LOCK, sizeof(struct dlm_lock_data));
+	if (!ret)
+		return NULL;
+	return nla_data(ret);
+}
+
+static int send_data(struct sk_buff *skb)
+{
+	struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
+	void *data = genlmsg_data(genlhdr);
+	int rv;
+
+	rv = genlmsg_end(skb, data);
+	if (rv < 0) {
+		nlmsg_free(skb);
+		return rv;
+	}
+
+	return genlmsg_unicast(skb, listener_nlpid);
+}
+
+static int user_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	listener_nlpid = info->snd_pid;
+	printk("user_cmd nlpid %u\n", listener_nlpid);
+	return 0;
+}
+
+static struct genl_ops dlm_nl_ops = {
+	.cmd		= DLM_CMD_HELLO,
+	.doit		= user_cmd,
+};
+
+int dlm_netlink_init(void)
+{
+	int rv;
+
+	rv = genl_register_family(&family);
+	if (rv)
+		return rv;
+
+	rv = genl_register_ops(&family, &dlm_nl_ops);
+	if (rv < 0)
+		goto err;
+	return 0;
+ err:
+	genl_unregister_family(&family);
+	return rv;
+}
+
+void dlm_netlink_exit(void)
+{
+	genl_unregister_ops(&family, &dlm_nl_ops);
+	genl_unregister_family(&family);
+}
+
+static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
+{
+	struct dlm_rsb *r = lkb->lkb_resource;
+	struct dlm_user_args *ua = (struct dlm_user_args *) lkb->lkb_astparam;
+
+	memset(data, 0, sizeof(struct dlm_lock_data));
+
+	data->version = DLM_LOCK_DATA_VERSION;
+	data->nodeid = lkb->lkb_nodeid;
+	data->ownpid = lkb->lkb_ownpid;
+	data->id = lkb->lkb_id;
+	data->remid = lkb->lkb_remid;
+	data->status = lkb->lkb_status;
+	data->grmode = lkb->lkb_grmode;
+	data->rqmode = lkb->lkb_rqmode;
+	data->timestamp = lkb->lkb_timestamp;
+	if (ua)
+		data->xid = ua->xid;
+	if (r) {
+		data->lockspace_id = r->res_ls->ls_global_id;
+		data->resource_namelen = r->res_length;
+		memcpy(data->resource_name, r->res_name, r->res_length);
+	}
+}
+
+void dlm_timeout_warn(struct dlm_lkb *lkb)
+{
+	struct dlm_lock_data *data;
+	struct sk_buff *send_skb;
+	size_t size;
+	int rv;
+
+	size = nla_total_size(sizeof(struct dlm_lock_data)) +
+	       nla_total_size(0); /* why this? */
+
+	rv = prepare_data(DLM_CMD_TIMEOUT, &send_skb, size);
+	if (rv < 0)
+		return;
+
+	data = mk_data(send_skb);
+	if (!data) {
+		nlmsg_free(send_skb);
+		return;
+	}
+
+	fill_data(data, lkb);
+
+	send_data(send_skb);
+}
+
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@ -38,7 +38,7 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
 	char *mb;
 	int mb_len = sizeof(struct dlm_rcom) + len;

-	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
+	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
 	if (!mh) {
 		log_print("create_rcom to %d type %d len %d ENOBUFS",
 			  to_nodeid, type, len);
@ -90,7 +90,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
 		log_error(ls, "version mismatch: %x nodeid %d: %x",
 			  DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
 			  rc->rc_header.h_version);
-		return -EINVAL;
+		return -EPROTO;
 	}

 	if (rf->rf_lvblen != ls->ls_lvblen ||
@ -98,7 +98,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
 		log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
 			  ls->ls_lvblen, ls->ls_exflags,
 			  nodeid, rf->rf_lvblen, rf->rf_lsflags);
-		return -EINVAL;
+		return -EPROTO;
 	}
 	return 0;
 }
@ -386,7 +386,8 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	dlm_recover_process_copy(ls, rc_in);
 }

-static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
+static int send_ls_not_ready(struct dlm_ls *ls, int nodeid,
+			     struct dlm_rcom *rc_in)
 {
 	struct dlm_rcom *rc;
 	struct rcom_config *rf;
@ -394,7 +395,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
 	char *mb;
 	int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);

-	mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb);
+	mh = dlm_lowcomms_get_buffer(nodeid, mb_len, ls->ls_allocation, &mb);
 	if (!mh)
 		return -ENOBUFS;
 	memset(mb, 0, mb_len);
@ -464,7 +465,7 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
 		log_print("lockspace %x from %d type %x not found",
 			  hd->h_lockspace, nodeid, rc->rc_type);
 		if (rc->rc_type == DLM_RCOM_STATUS)
-			send_ls_not_ready(nodeid, rc);
+			send_ls_not_ready(ls, nodeid, rc);
 		return;
 	}

--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@ -190,6 +190,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)

 	dlm_clear_members_gone(ls);

+	dlm_adjust_timeouts(ls);
+
 	error = enable_locking(ls, rv->seq);
 	if (error) {
 		log_debug(ls, "enable_locking failed %d", error);
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@ -33,16 +33,17 @@ static const struct file_operations device_fops;
 struct dlm_lock_params32 {
 	__u8 mode;
 	__u8 namelen;
-	__u16 flags;
+	__u16 unused;
+	__u32 flags;
 	__u32 lkid;
 	__u32 parent;
-
+	__u64 xid;
+	__u64 timeout;
 	__u32 castparam;
 	__u32 castaddr;
 	__u32 bastparam;
 	__u32 bastaddr;
 	__u32 lksb;
-
 	char lvb[DLM_USER_LVB_LEN];
 	char name[0];
 };
@ -68,6 +69,7 @@ struct dlm_lksb32 {
 };

 struct dlm_lock_result32 {
+	__u32 version[3];
 	__u32 length;
 	__u32 user_astaddr;
 	__u32 user_astparam;
@ -102,6 +104,8 @@ static void compat_input(struct dlm_write_request *kb,
 		kb->i.lock.flags = kb32->i.lock.flags;
 		kb->i.lock.lkid = kb32->i.lock.lkid;
 		kb->i.lock.parent = kb32->i.lock.parent;
+		kb->i.lock.xid = kb32->i.lock.xid;
+		kb->i.lock.timeout = kb32->i.lock.timeout;
 		kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam;
 		kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr;
 		kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam;
@ -115,6 +119,10 @@ static void compat_input(struct dlm_write_request *kb,
 static void compat_output(struct dlm_lock_result *res,
 			  struct dlm_lock_result32 *res32)
 {
+	res32->version[0] = res->version[0];
+	res32->version[1] = res->version[1];
+	res32->version[2] = res->version[2];
+
 	res32->user_astaddr = (__u32)(long)res->user_astaddr;
 	res32->user_astparam = (__u32)(long)res->user_astparam;
 	res32->user_lksb = (__u32)(long)res->user_lksb;
@ -130,6 +138,36 @@ static void compat_output(struct dlm_lock_result *res,
 }
 #endif

+/* Figure out if this lock is at the end of its life and no longer
+   available for the application to use.  The lkb still exists until
+   the final ast is read.  A lock becomes EOL in three situations:
+     1. a noqueue request fails with EAGAIN
+     2. an unlock completes with EUNLOCK
+     3. a cancel of a waiting request completes with ECANCEL/EDEADLK
+   An EOL lock needs to be removed from the process's list of locks.
+   And we can't allow any new operation on an EOL lock.  This is
+   not related to the lifetime of the lkb struct which is managed
+   entirely by refcount. */
+
+static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
+{
+	switch (sb_status) {
+	case -DLM_EUNLOCK:
+		return 1;
+	case -DLM_ECANCEL:
+	case -ETIMEDOUT:
+	case -EDEADLK:
+		if (lkb->lkb_grmode == DLM_LOCK_IV)
+			return 1;
+		break;
+	case -EAGAIN:
+		if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV)
+			return 1;
+		break;
+	}
+	return 0;
+}
+
 /* we could possibly check if the cancel of an orphan has resulted in the lkb
   being removed and then remove that lkb from the orphans list and free it */

@ -176,25 +214,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
 		log_debug(ls, "ast overlap %x status %x %x",
 			  lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);

-	/* Figure out if this lock is at the end of its life and no longer
-	   available for the application to use.  The lkb still exists until
-	   the final ast is read.  A lock becomes EOL in three situations:
-	     1. a noqueue request fails with EAGAIN
-	     2. an unlock completes with EUNLOCK
-	     3. a cancel of a waiting request completes with ECANCEL
-	   An EOL lock needs to be removed from the process's list of locks.
-	   And we can't allow any new operation on an EOL lock.  This is
-	   not related to the lifetime of the lkb struct which is managed
-	   entirely by refcount. */
-
-	if (type == AST_COMP &&
-	    lkb->lkb_grmode == DLM_LOCK_IV &&
-	    ua->lksb.sb_status == -EAGAIN)
-		eol = 1;
-	else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
-	    (ua->lksb.sb_status == -DLM_ECANCEL &&
-	     lkb->lkb_grmode == DLM_LOCK_IV))
-		eol = 1;
+	eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
 	if (eol) {
 		lkb->lkb_ast_type &= ~AST_BAST;
 		lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
@ -252,16 +272,18 @@ static int device_user_lock(struct dlm_user_proc *proc,
 	ua->castaddr = params->castaddr;
 	ua->bastparam = params->bastparam;
 	ua->bastaddr = params->bastaddr;
+	ua->xid = params->xid;

 	if (params->flags & DLM_LKF_CONVERT)
 		error = dlm_user_convert(ls, ua,
 				         params->mode, params->flags,
-				         params->lkid, params->lvb);
+				         params->lkid, params->lvb,
+					 (unsigned long) params->timeout);
 	else {
 		error = dlm_user_request(ls, ua,
 					 params->mode, params->flags,
 					 params->name, params->namelen,
-					 params->parent);
+					 (unsigned long) params->timeout);
 		if (!error)
 			error = ua->lksb.sb_lkid;
 	}
@ -299,6 +321,22 @@ static int device_user_unlock(struct dlm_user_proc *proc,
 	return error;
 }

+static int device_user_deadlock(struct dlm_user_proc *proc,
+				struct dlm_lock_params *params)
+{
+	struct dlm_ls *ls;
+	int error;
+
+	ls = dlm_find_lockspace_local(proc->lockspace);
+	if (!ls)
+		return -ENOENT;
+
+	error = dlm_user_deadlock(ls, params->flags, params->lkid);
+
+	dlm_put_lockspace(ls);
+	return error;
+}
+
 static int create_misc_device(struct dlm_ls *ls, char *name)
 {
 	int error, len;
@ -348,7 +386,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
 		return -EPERM;

 	error = dlm_new_lockspace(params->name, strlen(params->name),
-				  &lockspace, 0, DLM_USER_LVB_LEN);
+				  &lockspace, params->flags, DLM_USER_LVB_LEN);
 	if (error)
 		return error;

@ -524,6 +562,14 @@ static ssize_t device_write(struct file *file, const char __user *buf,
 		error = device_user_unlock(proc, &kbuf->i.lock);
 		break;

+	case DLM_USER_DEADLOCK:
+		if (!proc) {
+			log_print("no locking on control device");
+			goto out_sig;
+		}
+		error = device_user_deadlock(proc, &kbuf->i.lock);
+		break;
+
 	case DLM_USER_CREATE_LOCKSPACE:
 		if (proc) {
 			log_print("create/remove only on control device");
@ -641,6 +687,9 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
 	int struct_len;

 	memset(&result, 0, sizeof(struct dlm_lock_result));
+	result.version[0] = DLM_DEVICE_VERSION_MAJOR;
+	result.version[1] = DLM_DEVICE_VERSION_MINOR;
+	result.version[2] = DLM_DEVICE_VERSION_PATCH;
 	memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb));
 	result.user_lksb = ua->user_lksb;

@ -699,6 +748,20 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
 	return error;
 }

+static int copy_version_to_user(char __user *buf, size_t count)
+{
+	struct dlm_device_version ver;
+
+	memset(&ver, 0, sizeof(struct dlm_device_version));
+	ver.version[0] = DLM_DEVICE_VERSION_MAJOR;
+	ver.version[1] = DLM_DEVICE_VERSION_MINOR;
+	ver.version[2] = DLM_DEVICE_VERSION_PATCH;
+
+	if (copy_to_user(buf, &ver, sizeof(struct dlm_device_version)))
+		return -EFAULT;
+	return sizeof(struct dlm_device_version);
+}
+
 /* a read returns a single ast described in a struct dlm_lock_result */

 static ssize_t device_read(struct file *file, char __user *buf, size_t count,
@ -710,6 +773,16 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
 	DECLARE_WAITQUEUE(wait, current);
 	int error, type=0, bmode=0, removed = 0;

+	if (count == sizeof(struct dlm_device_version)) {
+		error = copy_version_to_user(buf, count);
+		return error;
+	}
+
+	if (!proc) {
+		log_print("non-version read from control device %zu", count);
+		return -EINVAL;
+	}
+
 #ifdef CONFIG_COMPAT
 	if (count < sizeof(struct dlm_lock_result32))
 #else
@ -747,11 +820,6 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
 		}
 	}

-	if (list_empty(&proc->asts)) {
-		spin_unlock(&proc->asts_spin);
-		return -EAGAIN;
-	}
-
 	/* there may be both completion and blocking asts to return for
 	   the lkb, don't remove lkb from asts list unless no asts remain */

@ -823,6 +891,7 @@ static const struct file_operations device_fops = {
 static const struct file_operations ctl_device_fops = {
 	.open    = ctl_device_open,
 	.release = ctl_device_close,
+	.read    = device_read,
 	.write   = device_write,
 	.owner   = THIS_MODULE,
 };
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@ -1,7 +1,7 @@
 obj-$(CONFIG_GFS2_FS) += gfs2.o
 gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
 	glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \
-	mount.o ondisk.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
+	mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
 	ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \
 	recovery.o rgrp.o super.o sys.o trans.o util.o

--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@ -718,7 +718,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 	for (x = 0; x < rlist.rl_rgrps; x++) {
 		struct gfs2_rgrpd *rgd;
 		rgd = rlist.rl_ghs[x].gh_gl->gl_object;
-		rg_blocks += rgd->rd_ri.ri_length;
+		rg_blocks += rgd->rd_length;
 	}

 	error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
@ -772,7 +772,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 			gfs2_free_data(ip, bstart, blen);
 	}

-	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;

 	gfs2_dinode_out(ip, dibh->b_data);

@ -824,7 +824,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
 		goto out_gunlock_q;

 	error = gfs2_trans_begin(sdp,
-			sdp->sd_max_height + al->al_rgd->rd_ri.ri_length +
+			sdp->sd_max_height + al->al_rgd->rd_length +
 			RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
 	if (error)
 		goto out_ipres;
@ -847,7 +847,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
 	}

 	ip->i_di.di_size = size;
-	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;

 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (error)
@ -885,7 +885,6 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
 	unsigned blocksize, iblock, length, pos;
 	struct buffer_head *bh;
 	struct page *page;
-	void *kaddr;
 	int err;

 	page = grab_cache_page(mapping, index);
@ -928,15 +927,13 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
 		/* Uhhuh. Read error. Complain and punt. */
 		if (!buffer_uptodate(bh))
 			goto unlock;
+		err = 0;
 	}

 	if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
 		gfs2_trans_add_bh(ip->i_gl, bh, 0);

-	kaddr = kmap_atomic(page, KM_USER0);
-	memset(kaddr + offset, 0, length);
-	flush_dcache_page(page);
-	kunmap_atomic(kaddr, KM_USER0);
+	zero_user_page(page, offset, length, KM_USER0);

 unlock:
 	unlock_page(page);
@ -962,7 +959,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)

 	if (gfs2_is_stuffed(ip)) {
 		ip->i_di.di_size = size;
-		ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+		ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
@ -974,7 +971,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)

 		if (!error) {
 			ip->i_di.di_size = size;
-			ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+			ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 			ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
 			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 			gfs2_dinode_out(ip, dibh->b_data);
@ -1044,10 +1041,10 @@ static int trunc_end(struct gfs2_inode *ip)
 		ip->i_di.di_height = 0;
 		ip->i_di.di_goal_meta =
 			ip->i_di.di_goal_data =
-			ip->i_num.no_addr;
+			ip->i_no_addr;
 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 	}
-	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 	ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;

 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@ -16,6 +16,7 @@
 #include <linux/delay.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/lm_interface.h>
+#include <linux/freezer.h>

 #include "gfs2.h"
 #include "incore.h"
@ -49,6 +50,8 @@ int gfs2_scand(void *data)
 	while (!kthread_should_stop()) {
 		gfs2_scand_internal(sdp);
 		t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
+		if (freezing(current))
+			refrigerator();
 		schedule_timeout_interruptible(t);
 	}

@ -74,6 +77,8 @@ int gfs2_glockd(void *data)
 		wait_event_interruptible(sdp->sd_reclaim_wq,
 					 (atomic_read(&sdp->sd_reclaim_count) ||
 					 kthread_should_stop()));
+		if (freezing(current))
+			refrigerator();
 	}

 	return 0;
@ -93,6 +98,8 @@ int gfs2_recoverd(void *data)
 	while (!kthread_should_stop()) {
 		gfs2_check_journals(sdp);
 		t = gfs2_tune_get(sdp,  gt_recoverd_secs) * HZ;
+		if (freezing(current))
+			refrigerator();
 		schedule_timeout_interruptible(t);
 	}

@ -141,6 +148,8 @@ int gfs2_logd(void *data)
 		}

 		t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
+		if (freezing(current))
+			refrigerator();
 		schedule_timeout_interruptible(t);
 	}

@ -191,6 +200,8 @@ int gfs2_quotad(void *data)
 		gfs2_quota_scan(sdp);

 		t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
+		if (freezing(current))
+			refrigerator();
 		schedule_timeout_interruptible(t);
 	}

--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@ -130,7 +130,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
 	memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
 	if (ip->i_di.di_size < offset + size)
 		ip->i_di.di_size = offset + size;
-	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 	gfs2_dinode_out(ip, dibh->b_data);

 	brelse(dibh);
@ -228,7 +228,7 @@ out:

 	if (ip->i_di.di_size < offset + copied)
 		ip->i_di.di_size = offset + copied;
-	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;

 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
@ -1456,7 +1456,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
 		if (dip->i_di.di_entries != g.offset) {
 			fs_warn(sdp, "Number of entries corrupt in dir %llu, "
 				"ip->i_di.di_entries (%u) != g.offset (%u)\n",
-				(unsigned long long)dip->i_num.no_addr,
+				(unsigned long long)dip->i_no_addr,
 				dip->i_di.di_entries,
 				g.offset);
 			error = -EIO;
@ -1488,24 +1488,55 @@ out:
 * Returns: errno
 */

-int gfs2_dir_search(struct inode *dir, const struct qstr *name,
-		    struct gfs2_inum_host *inum, unsigned int *type)
+struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
 {
 	struct buffer_head *bh;
 	struct gfs2_dirent *dent;
+	struct inode *inode;
+
+	dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
+	if (dent) {
+		if (IS_ERR(dent))
+			return ERR_PTR(PTR_ERR(dent));
+		inode = gfs2_inode_lookup(dir->i_sb, 
+				be16_to_cpu(dent->de_type),
+				be64_to_cpu(dent->de_inum.no_addr),
+				be64_to_cpu(dent->de_inum.no_formal_ino));
+		brelse(bh);
+		return inode;
+	}
+	return ERR_PTR(-ENOENT);
+}
+
+int gfs2_dir_check(struct inode *dir, const struct qstr *name,
+		   const struct gfs2_inode *ip)
+{
+	struct buffer_head *bh;
+	struct gfs2_dirent *dent;
+	int ret = -ENOENT;

 	dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
 	if (dent) {
 		if (IS_ERR(dent))
 			return PTR_ERR(dent);
-		if (inum)
-			gfs2_inum_in(inum, (char *)&dent->de_inum);
-		if (type)
-			*type = be16_to_cpu(dent->de_type);
+		if (ip) {
+			if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr)
+				goto out;
+			if (be64_to_cpu(dent->de_inum.no_formal_ino) !=
+			    ip->i_no_formal_ino)
+				goto out;
+			if (unlikely(IF2DT(ip->i_inode.i_mode) !=
+			    be16_to_cpu(dent->de_type))) {
+				gfs2_consist_inode(GFS2_I(dir));
+				ret = -EIO;
+				goto out;
+			}
+		}
+		ret = 0;
+out:
 		brelse(bh);
-		return 0;
 	}
-	return -ENOENT;
+	return ret;
 }

 static int dir_new_leaf(struct inode *inode, const struct qstr *name)
@ -1565,7 +1596,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
 */

 int gfs2_dir_add(struct inode *inode, const struct qstr *name,
-		 const struct gfs2_inum_host *inum, unsigned type)
+		 const struct gfs2_inode *nip, unsigned type)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct buffer_head *bh;
@ -1580,7 +1611,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 			if (IS_ERR(dent))
 				return PTR_ERR(dent);
 			dent = gfs2_init_dirent(inode, dent, name, bh);
-			gfs2_inum_out(inum, (char *)&dent->de_inum);
+			gfs2_inum_out(nip, dent);
 			dent->de_type = cpu_to_be16(type);
 			if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
 				leaf = (struct gfs2_leaf *)bh->b_data;
@ -1592,7 +1623,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 				break;
 			gfs2_trans_add_bh(ip->i_gl, bh, 1);
 			ip->i_di.di_entries++;
-			ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+			ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 			gfs2_dinode_out(ip, bh->b_data);
 			brelse(bh);
 			error = 0;
@ -1678,7 +1709,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
 		gfs2_consist_inode(dip);
 	gfs2_trans_add_bh(dip->i_gl, bh, 1);
 	dip->i_di.di_entries--;
-	dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
 	gfs2_dinode_out(dip, bh->b_data);
 	brelse(bh);
 	mark_inode_dirty(&dip->i_inode);
@ -1700,7 +1731,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
 */

 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
-		   struct gfs2_inum_host *inum, unsigned int new_type)
+		   const struct gfs2_inode *nip, unsigned int new_type)
 {
 	struct buffer_head *bh;
 	struct gfs2_dirent *dent;
@ -1715,7 +1746,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 		return PTR_ERR(dent);

 	gfs2_trans_add_bh(dip->i_gl, bh, 1);
-	gfs2_inum_out(inum, (char *)&dent->de_inum);
+	gfs2_inum_out(nip, dent);
 	dent->de_type = cpu_to_be16(new_type);

 	if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
@ -1726,7 +1757,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 		gfs2_trans_add_bh(dip->i_gl, bh, 1);
 	}

-	dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
 	gfs2_dinode_out(dip, bh->b_data);
 	brelse(bh);
 	return 0;
@ -1867,7 +1898,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
 	for (x = 0; x < rlist.rl_rgrps; x++) {
 		struct gfs2_rgrpd *rgd;
 		rgd = rlist.rl_ghs[x].gh_gl->gl_object;
-		rg_blocks += rgd->rd_ri.ri_length;
+		rg_blocks += rgd->rd_length;
 	}

 	error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@ -16,15 +16,16 @@ struct inode;
 struct gfs2_inode;
 struct gfs2_inum;

-int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
-		    struct gfs2_inum_host *inum, unsigned int *type);
+struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *filename);
+int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
+		   const struct gfs2_inode *ip);
 int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
-		 const struct gfs2_inum_host *inum, unsigned int type);
+		 const struct gfs2_inode *ip, unsigned int type);
 int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
 int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
 		  filldir_t filldir);
 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
-		   struct gfs2_inum_host *new_inum, unsigned int new_type);
+		   const struct gfs2_inode *nip, unsigned int new_type);

 int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);

--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@ -254,7 +254,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 	if (error)
 		return error;

-	error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length + RES_DINODE +
+	error = gfs2_trans_begin(sdp, rgd->rd_length + RES_DINODE +
 				 RES_EATTR + RES_STATFS + RES_QUOTA, blks);
 	if (error)
 		goto out_gunlock;
@ -300,7 +300,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,

 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
-		ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+		ip->i_inode.i_ctime = CURRENT_TIME;
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@ -700,7 +700,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 		goto out_gunlock_q;

 	error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
-				 blks + al->al_rgd->rd_ri.ri_length +
+				 blks + al->al_rgd->rd_length +
 				 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
 	if (error)
 		goto out_ipres;
@ -717,7 +717,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 					    (er->er_mode & S_IFMT));
 			ip->i_inode.i_mode = er->er_mode;
 		}
-		ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+		ip->i_inode.i_ctime = CURRENT_TIME;
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@ -852,7 +852,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
 			(ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
 		ip->i_inode.i_mode = er->er_mode;
 	}
-	ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	ip->i_inode.i_ctime = CURRENT_TIME;
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
@ -1133,7 +1133,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)

 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
-		ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+		ip->i_inode.i_ctime = CURRENT_TIME;
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@ -1352,7 +1352,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
 	for (x = 0; x < rlist.rl_rgrps; x++) {
 		struct gfs2_rgrpd *rgd;
 		rgd = rlist.rl_ghs[x].gh_gl->gl_object;
-		rg_blocks += rgd->rd_ri.ri_length;
+		rg_blocks += rgd->rd_length;
 	}

 	error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@ -422,11 +422,11 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
 static void gfs2_holder_wake(struct gfs2_holder *gh)
 {
 	clear_bit(HIF_WAIT, &gh->gh_iflags);
-	smp_mb();
+	smp_mb__after_clear_bit();
 	wake_up_bit(&gh->gh_iflags, HIF_WAIT);
 }

-static int holder_wait(void *word)
+static int just_schedule(void *word)
 {
        schedule();
        return 0;
@ -435,7 +435,20 @@ static int holder_wait(void *word)
 static void wait_on_holder(struct gfs2_holder *gh)
 {
 	might_sleep();
-	wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE);
+}
+
+static void gfs2_demote_wake(struct gfs2_glock *gl)
+{
+        clear_bit(GLF_DEMOTE, &gl->gl_flags);
+        smp_mb__after_clear_bit();
+        wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
+}
+
+static void wait_on_demote(struct gfs2_glock *gl)
+{
+	might_sleep();
+	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE);
 }

 /**
@ -528,7 +541,7 @@ static int rq_demote(struct gfs2_glock *gl)

 	if (gl->gl_state == gl->gl_demote_state ||
 	    gl->gl_state == LM_ST_UNLOCKED) {
-		clear_bit(GLF_DEMOTE, &gl->gl_flags);
+		gfs2_demote_wake(gl);
 		return 0;
 	}
 	set_bit(GLF_LOCK, &gl->gl_flags);
@ -666,12 +679,22 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
 * practise: LM_ST_SHARED and LM_ST_UNLOCKED
 */

-static void handle_callback(struct gfs2_glock *gl, unsigned int state)
+static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote)
 {
 	spin_lock(&gl->gl_spin);
 	if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) {
 		gl->gl_demote_state = state;
 		gl->gl_demote_time = jiffies;
+		if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
+		    gl->gl_object) {
+			struct inode *inode = igrab(gl->gl_object);
+			spin_unlock(&gl->gl_spin);
+			if (inode) {
+				d_prune_aliases(inode);
+				iput(inode);
+			}
+			return;
+		}
 	} else if (gl->gl_demote_state != LM_ST_UNLOCKED) {
 		gl->gl_demote_state = state;
 	}
@ -740,7 +763,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
 		if (ret & LM_OUT_CANCELED)
 			op_done = 0;
 		else
-			clear_bit(GLF_DEMOTE, &gl->gl_flags);
+			gfs2_demote_wake(gl);
 	} else {
 		spin_lock(&gl->gl_spin);
 		list_del_init(&gh->gh_list);
@ -848,7 +871,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
 	gfs2_assert_warn(sdp, !ret);

 	state_change(gl, LM_ST_UNLOCKED);
-	clear_bit(GLF_DEMOTE, &gl->gl_flags);
+	gfs2_demote_wake(gl);

 	if (glops->go_inval)
 		glops->go_inval(gl, DIO_METADATA);
@ -1174,7 +1197,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 	const struct gfs2_glock_operations *glops = gl->gl_ops;

 	if (gh->gh_flags & GL_NOCACHE)
-		handle_callback(gl, LM_ST_UNLOCKED);
+		handle_callback(gl, LM_ST_UNLOCKED, 0);

 	gfs2_glmutex_lock(gl);

@ -1196,6 +1219,13 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 	spin_unlock(&gl->gl_spin);
 }

+void gfs2_glock_dq_wait(struct gfs2_holder *gh)
+{
+	struct gfs2_glock *gl = gh->gh_gl;
+	gfs2_glock_dq(gh);
+	wait_on_demote(gl);
+}
+
 /**
 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
 * @gh: the holder structure
@ -1297,10 +1327,6 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
 * @num_gh: the number of structures
 * @ghs: an array of struct gfs2_holder structures
 *
- * Figure out how big an impact this function has.  Either:
- * 1) Replace this code with code that calls gfs2_glock_prefetch()
- * 2) Forget async stuff and just call nq_m_sync()
- * 3) Leave it like it is
 *
 * Returns: 0 on success (all glocks acquired),
 *          errno on failure (no glocks acquired)
@ -1308,62 +1334,28 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,

 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
 {
-	int *e;
-	unsigned int x;
-	int borked = 0, serious = 0;
+	struct gfs2_holder *tmp[4];
+	struct gfs2_holder **pph = tmp;
 	int error = 0;

-	if (!num_gh)
+	switch(num_gh) {
+	case 0:
 		return 0;
-
-	if (num_gh == 1) {
+	case 1:
 		ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
 		return gfs2_glock_nq(ghs);
-	}
-
-	e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
-	if (!e)
-		return -ENOMEM;
-
-	for (x = 0; x < num_gh; x++) {
-		ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
-		error = gfs2_glock_nq(&ghs[x]);
-		if (error) {
-			borked = 1;
-			serious = error;
-			num_gh = x;
+	default:
+		if (num_gh <= 4)
 			break;
-		}
+		pph = kmalloc(num_gh * sizeof(struct gfs2_holder *), GFP_NOFS);
+		if (!pph)
+			return -ENOMEM;
 	}

-	for (x = 0; x < num_gh; x++) {
-		error = e[x] = glock_wait_internal(&ghs[x]);
-		if (error) {
-			borked = 1;
-			if (error != GLR_TRYFAILED && error != GLR_CANCELED)
-				serious = error;
-		}
-	}
+	error = nq_m_sync(num_gh, ghs, pph);

-	if (!borked) {
-		kfree(e);
-		return 0;
-	}
-
-	for (x = 0; x < num_gh; x++)
-		if (!e[x])
-			gfs2_glock_dq(&ghs[x]);
-
-	if (serious)
-		error = serious;
-	else {
-		for (x = 0; x < num_gh; x++)
-			gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
-					  &ghs[x]);
-		error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e);
-	}
-
-	kfree(e);
+	if (pph != tmp)
+		kfree(pph);

 	return error;
 }
@ -1456,7 +1448,7 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
 	if (!gl)
 		return;

-	handle_callback(gl, state);
+	handle_callback(gl, state, 1);

 	spin_lock(&gl->gl_spin);
 	run_queue(gl);
@ -1596,7 +1588,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
 	if (gfs2_glmutex_trylock(gl)) {
 		if (list_empty(&gl->gl_holders) &&
 		    gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
-			handle_callback(gl, LM_ST_UNLOCKED);
+			handle_callback(gl, LM_ST_UNLOCKED, 0);
 		gfs2_glmutex_unlock(gl);
 	}

@ -1709,7 +1701,7 @@ static void clear_glock(struct gfs2_glock *gl)
 	if (gfs2_glmutex_trylock(gl)) {
 		if (list_empty(&gl->gl_holders) &&
 		    gl->gl_state != LM_ST_UNLOCKED)
-			handle_callback(gl, LM_ST_UNLOCKED);
+			handle_callback(gl, LM_ST_UNLOCKED, 0);
 		gfs2_glmutex_unlock(gl);
 	}
 }
@ -1823,7 +1815,8 @@ static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip)

 	print_dbg(gi, "  Inode:\n");
 	print_dbg(gi, "    num = %llu/%llu\n",
-		    ip->i_num.no_formal_ino, ip->i_num.no_addr);
+		  (unsigned long long)ip->i_no_formal_ino,
+		  (unsigned long long)ip->i_no_addr);
 	print_dbg(gi, "    type = %u\n", IF2DT(ip->i_inode.i_mode));
 	print_dbg(gi, "    i_flags =");
 	for (x = 0; x < 32; x++)
@ -1909,8 +1902,8 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
 	}
 	if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
 		print_dbg(gi, "  Demotion req to state %u (%llu uS ago)\n",
-			  gl->gl_demote_state,
-			  (u64)(jiffies - gl->gl_demote_time)*(1000000/HZ));
+			  gl->gl_demote_state, (unsigned long long)
+			  (jiffies - gl->gl_demote_time)*(1000000/HZ));
 	}
 	if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
 		if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@ -87,6 +87,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh);
 int gfs2_glock_poll(struct gfs2_holder *gh);
 int gfs2_glock_wait(struct gfs2_holder *gh);
 void gfs2_glock_dq(struct gfs2_holder *gh);
+void gfs2_glock_dq_wait(struct gfs2_holder *gh);

 void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
 int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@ -156,9 +156,9 @@ static void inode_go_sync(struct gfs2_glock *gl)
 		ip = NULL;

 	if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
-		gfs2_log_flush(gl->gl_sbd, gl);
 		if (ip)
 			filemap_fdatawrite(ip->i_inode.i_mapping);
+		gfs2_log_flush(gl->gl_sbd, gl);
 		gfs2_meta_sync(gl);
 		if (ip) {
 			struct address_space *mapping = ip->i_inode.i_mapping;
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@ -28,6 +28,14 @@ struct gfs2_sbd;

 typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);

+struct gfs2_log_header_host {
+	u64 lh_sequence;	/* Sequence number of this transaction */
+	u32 lh_flags;		/* GFS2_LOG_HEAD_... */
+	u32 lh_tail;		/* Block number of log tail */
+	u32 lh_blkno;
+	u32 lh_hash;
+};
+
 /*
 * Structure of operations that are associated with each
 * type of element in the log.
@ -60,12 +68,23 @@ struct gfs2_bitmap {
 	u32 bi_len;
 };

+struct gfs2_rgrp_host {
+	u32 rg_flags;
+	u32 rg_free;
+	u32 rg_dinodes;
+	u64 rg_igeneration;
+};
+
 struct gfs2_rgrpd {
 	struct list_head rd_list;	/* Link with superblock */
 	struct list_head rd_list_mru;
 	struct list_head rd_recent;	/* Recently used rgrps */
 	struct gfs2_glock *rd_gl;	/* Glock for this rgrp */
-	struct gfs2_rindex_host rd_ri;
+	u64 rd_addr;			/* grp block disk address */
+	u64 rd_data0;			/* first data location */
+	u32 rd_length;			/* length of rgrp header in fs blocks */
+	u32 rd_data;			/* num of data blocks in rgrp */
+	u32 rd_bitbytes;		/* number of bytes in data bitmaps */
 	struct gfs2_rgrp_host rd_rg;
 	u64 rd_rg_vn;
 	struct gfs2_bitmap *rd_bits;
@ -76,6 +95,8 @@ struct gfs2_rgrpd {
 	u32 rd_last_alloc_data;
 	u32 rd_last_alloc_meta;
 	struct gfs2_sbd *rd_sbd;
+	unsigned long rd_flags;
+#define GFS2_RDF_CHECK        0x0001          /* Need to check for unlinked inodes */
 };

 enum gfs2_state_bits {
@ -211,10 +232,24 @@ enum {
 	GIF_SW_PAGED		= 3,
 };

+struct gfs2_dinode_host {
+	u64 di_size;		/* number of bytes in file */
+	u64 di_blocks;		/* number of blocks in file */
+	u64 di_goal_meta;	/* rgrp to alloc from next */
+	u64 di_goal_data;	/* data block goal */
+	u64 di_generation;	/* generation number for NFS */
+	u32 di_flags;		/* GFS2_DIF_... */
+	u16 di_height;		/* height of metadata */
+	/* These only apply to directories  */
+	u16 di_depth;		/* Number of bits in the table */
+	u32 di_entries;		/* The number of entries in the directory */
+	u64 di_eattr;		/* extended attribute block number */
+};
+
 struct gfs2_inode {
 	struct inode i_inode;
-	struct gfs2_inum_host i_num;
-
+	u64 i_no_addr;
+	u64 i_no_formal_ino;
 	unsigned long i_flags;		/* GIF_... */

 	struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
@ -275,14 +310,6 @@ enum {
 	QDF_LOCKED		= 2,
 };

-struct gfs2_quota_lvb {
-        __be32 qb_magic;
-        u32 __pad;
-        __be64 qb_limit;      /* Hard limit of # blocks to alloc */
-        __be64 qb_warn;       /* Warn user when alloc is above this # */
-        __be64 qb_value;       /* Current # blocks allocated */
-};
-
 struct gfs2_quota_data {
 	struct list_head qd_list;
 	unsigned int qd_count;
@ -327,7 +354,9 @@ struct gfs2_trans {

 	unsigned int tr_num_buf;
 	unsigned int tr_num_buf_new;
+	unsigned int tr_num_databuf_new;
 	unsigned int tr_num_buf_rm;
+	unsigned int tr_num_databuf_rm;
 	struct list_head tr_list_buf;

 	unsigned int tr_num_revoke;
@ -354,6 +383,12 @@ struct gfs2_jdesc {
 	unsigned int jd_blocks;
 };

+struct gfs2_statfs_change_host {
+	s64 sc_total;
+	s64 sc_free;
+	s64 sc_dinodes;
+};
+
 #define GFS2_GLOCKD_DEFAULT	1
 #define GFS2_GLOCKD_MAX		16

@ -426,6 +461,28 @@ enum {

 #define GFS2_FSNAME_LEN		256

+struct gfs2_inum_host {
+	u64 no_formal_ino;
+	u64 no_addr;
+};
+
+struct gfs2_sb_host {
+	u32 sb_magic;
+	u32 sb_type;
+	u32 sb_format;
+
+	u32 sb_fs_format;
+	u32 sb_multihost_format;
+	u32 sb_bsize;
+	u32 sb_bsize_shift;
+
+	struct gfs2_inum_host sb_master_dir;
+	struct gfs2_inum_host sb_root_dir;
+
+	char sb_lockproto[GFS2_LOCKNAME_LEN];
+	char sb_locktable[GFS2_LOCKNAME_LEN];
+};
+
 struct gfs2_sbd {
 	struct super_block *sd_vfs;
 	struct super_block *sd_vfs_meta;
@ -544,6 +601,7 @@ struct gfs2_sbd {

 	unsigned int sd_log_blks_reserved;
 	unsigned int sd_log_commited_buf;
+	unsigned int sd_log_commited_databuf;
 	unsigned int sd_log_commited_revoke;

 	unsigned int sd_log_num_gl;
@ -552,7 +610,6 @@ struct gfs2_sbd {
 	unsigned int sd_log_num_rg;
 	unsigned int sd_log_num_databuf;
 	unsigned int sd_log_num_jdata;
-	unsigned int sd_log_num_hdrs;

 	struct list_head sd_log_le_gl;
 	struct list_head sd_log_le_buf;
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@ -38,12 +38,17 @@
 #include "trans.h"
 #include "util.h"

+struct gfs2_inum_range_host {
+	u64 ir_start;
+	u64 ir_length;
+};
+
 static int iget_test(struct inode *inode, void *opaque)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_inum_host *inum = opaque;
+	u64 *no_addr = opaque;

-	if (ip->i_num.no_addr == inum->no_addr &&
+	if (ip->i_no_addr == *no_addr &&
 	    inode->i_private != NULL)
 		return 1;

@ -53,37 +58,70 @@ static int iget_test(struct inode *inode, void *opaque)
 static int iget_set(struct inode *inode, void *opaque)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_inum_host *inum = opaque;
+	u64 *no_addr = opaque;

-	ip->i_num = *inum;
-	inode->i_ino = inum->no_addr;
+	inode->i_ino = (unsigned long)*no_addr;
+	ip->i_no_addr = *no_addr;
 	return 0;
 }

-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum)
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
 {
-	return ilookup5(sb, (unsigned long)inum->no_addr,
-			iget_test, inum);
+	unsigned long hash = (unsigned long)no_addr;
+	return ilookup5(sb, hash, iget_test, &no_addr);
 }

-static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum_host *inum)
+static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
 {
-	return iget5_locked(sb, (unsigned long)inum->no_addr,
-		     iget_test, iget_set, inum);
+	unsigned long hash = (unsigned long)no_addr;
+	return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
+}
+
+/**
+ * GFS2 lookup code fills in vfs inode contents based on info obtained
+ * from directory entry inside gfs2_inode_lookup(). This has caused issues
+ * with NFS code path since its get_dentry routine doesn't have the relevant
+ * directory entry when gfs2_inode_lookup() is invoked. Part of the code
+ * segment inside gfs2_inode_lookup code needs to get moved around.
+ *
+ * Clean up I_LOCK and I_NEW as well.
+ **/
+
+void gfs2_set_iop(struct inode *inode)
+{
+	umode_t mode = inode->i_mode;
+
+	if (S_ISREG(mode)) {
+		inode->i_op = &gfs2_file_iops;
+		inode->i_fop = &gfs2_file_fops;
+		inode->i_mapping->a_ops = &gfs2_file_aops;
+	} else if (S_ISDIR(mode)) {
+		inode->i_op = &gfs2_dir_iops;
+		inode->i_fop = &gfs2_dir_fops;
+	} else if (S_ISLNK(mode)) {
+		inode->i_op = &gfs2_symlink_iops;
+	} else {
+		inode->i_op = &gfs2_dev_iops;
+	}
+
+	unlock_new_inode(inode);
 }

 /**
 * gfs2_inode_lookup - Lookup an inode
 * @sb: The super block
- * @inum: The inode number
+ * @no_addr: The inode number
 * @type: The type of the inode
 *
 * Returns: A VFS inode, or an error
 */

-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned int type)
+struct inode *gfs2_inode_lookup(struct super_block *sb, 
+				unsigned int type,
+				u64 no_addr,
+				u64 no_formal_ino)
 {
-	struct inode *inode = gfs2_iget(sb, inum);
+	struct inode *inode = gfs2_iget(sb, no_addr);
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_glock *io_gl;
 	int error;
@ -93,29 +131,15 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i

 	if (inode->i_state & I_NEW) {
 		struct gfs2_sbd *sdp = GFS2_SB(inode);
-		umode_t mode = DT2IF(type);
 		inode->i_private = ip;
-		inode->i_mode = mode;
+		ip->i_no_formal_ino = no_formal_ino;

-		if (S_ISREG(mode)) {
-			inode->i_op = &gfs2_file_iops;
-			inode->i_fop = &gfs2_file_fops;
-			inode->i_mapping->a_ops = &gfs2_file_aops;
-		} else if (S_ISDIR(mode)) {
-			inode->i_op = &gfs2_dir_iops;
-			inode->i_fop = &gfs2_dir_fops;
-		} else if (S_ISLNK(mode)) {
-			inode->i_op = &gfs2_symlink_iops;
-		} else {
-			inode->i_op = &gfs2_dev_iops;
-		}
-
-		error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
+		error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
 		if (unlikely(error))
 			goto fail;
 		ip->i_gl->gl_object = ip;

-		error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
+		error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
 		if (unlikely(error))
 			goto fail_put;

@ -123,12 +147,38 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i
 		error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
 		if (unlikely(error))
 			goto fail_iopen;
+		ip->i_iopen_gh.gh_gl->gl_object = ip;

 		gfs2_glock_put(io_gl);
-		unlock_new_inode(inode);
+
+		if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
+			goto gfs2_nfsbypass;
+
+		inode->i_mode = DT2IF(type);
+
+		/*
+		 * We must read the inode in order to work out its type in
+		 * this case. Note that this doesn't happen often as we normally
+		 * know the type beforehand. This code path only occurs during
+		 * unlinked inode recovery (where it is safe to do this glock,
+		 * which is not true in the general case).
+		 */
+		if (type == DT_UNKNOWN) {
+			struct gfs2_holder gh;
+			error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+			if (unlikely(error))
+				goto fail_glock;
+			/* Inode is now uptodate */
+			gfs2_glock_dq_uninit(&gh);
+		}
+
+		gfs2_set_iop(inode);
 	}

+gfs2_nfsbypass:
 	return inode;
+fail_glock:
+	gfs2_glock_dq(&ip->i_iopen_gh);
 fail_iopen:
 	gfs2_glock_put(io_gl);
 fail_put:
@ -144,14 +194,12 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	struct gfs2_dinode_host *di = &ip->i_di;
 	const struct gfs2_dinode *str = buf;

-	if (ip->i_num.no_addr != be64_to_cpu(str->di_num.no_addr)) {
+	if (ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)) {
 		if (gfs2_consist_inode(ip))
 			gfs2_dinode_print(ip);
 		return -EIO;
 	}
-	if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino))
-		return -ESTALE;
-
+	ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
 	ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
 	ip->i_inode.i_rdev = 0;
 	switch (ip->i_inode.i_mode & S_IFMT) {
@ -175,11 +223,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	di->di_blocks = be64_to_cpu(str->di_blocks);
 	gfs2_set_inode_blocks(&ip->i_inode);
 	ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
-	ip->i_inode.i_atime.tv_nsec = 0;
+	ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
 	ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
-	ip->i_inode.i_mtime.tv_nsec = 0;
+	ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
 	ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
-	ip->i_inode.i_ctime.tv_nsec = 0;
+	ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);

 	di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
 	di->di_goal_data = be64_to_cpu(str->di_goal_data);
@ -247,7 +295,7 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip)
 	if (error)
 		goto out_qs;

-	rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
 	if (!rgd) {
 		gfs2_consist_inode(ip);
 		error = -EIO;
@ -314,7 +362,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
 	else
 		drop_nlink(&ip->i_inode);

-	ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+	ip->i_inode.i_ctime = CURRENT_TIME;

 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
@ -366,9 +414,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 	struct super_block *sb = dir->i_sb;
 	struct gfs2_inode *dip = GFS2_I(dir);
 	struct gfs2_holder d_gh;
-	struct gfs2_inum_host inum;
-	unsigned int type;
-	int error;
+	int error = 0;
 	struct inode *inode = NULL;
 	int unlock = 0;

@ -395,12 +441,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 			goto out;
 	}

-	error = gfs2_dir_search(dir, name, &inum, &type);
-	if (error)
-		goto out;
-
-	inode = gfs2_inode_lookup(sb, &inum, type);
-
+	inode = gfs2_dir_search(dir, name);
+	if (IS_ERR(inode))
+		error = PTR_ERR(inode);
 out:
 	if (unlock)
 		gfs2_glock_dq_uninit(&d_gh);
@ -409,6 +452,22 @@ out:
 	return inode ? inode : ERR_PTR(error);
 }

+static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
+{
+	const struct gfs2_inum_range *str = buf;
+
+	ir->ir_start = be64_to_cpu(str->ir_start);
+	ir->ir_length = be64_to_cpu(str->ir_length);
+}
+
+static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
+{
+	struct gfs2_inum_range *str = buf;
+
+	str->ir_start = cpu_to_be64(ir->ir_start);
+	str->ir_length = cpu_to_be64(ir->ir_length);
+}
+
 static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
 {
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
@ -548,7 +607,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
 	if (!dip->i_inode.i_nlink)
 		return -EPERM;

-	error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL);
+	error = gfs2_dir_check(&dip->i_inode, name, NULL);
 	switch (error) {
 	case -ENOENT:
 		error = 0;
@ -588,8 +647,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
 		*gid = current->fsgid;
 }

-static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum,
-			u64 *generation)
+static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	int error;
@ -605,7 +663,7 @@ static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum,
 	if (error)
 		goto out_ipreserv;

-	inum->no_addr = gfs2_alloc_di(dip, generation);
+	*no_addr = gfs2_alloc_di(dip, generation);

 	gfs2_trans_end(sdp);

@ -635,6 +693,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct gfs2_dinode *di;
 	struct buffer_head *dibh;
+	struct timespec tv = CURRENT_TIME;

 	dibh = gfs2_meta_new(gl, inum->no_addr);
 	gfs2_trans_add_bh(gl, dibh, 1);
@ -650,7 +709,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	di->di_nlink = 0;
 	di->di_size = 0;
 	di->di_blocks = cpu_to_be64(1);
-	di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
+	di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
 	di->di_major = cpu_to_be32(MAJOR(dev));
 	di->di_minor = cpu_to_be32(MINOR(dev));
 	di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
@ -680,6 +739,9 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	di->di_entries = 0;
 	memset(&di->__pad4, 0, sizeof(di->__pad4));
 	di->di_eattr = 0;
+	di->di_atime_nsec = cpu_to_be32(tv.tv_nsec);
+	di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
+	di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
 	memset(&di->di_reserved, 0, sizeof(di->di_reserved));

 	brelse(dibh);
@ -749,7 +811,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 			goto fail_quota_locks;

 		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-					 al->al_rgd->rd_ri.ri_length +
+					 al->al_rgd->rd_length +
 					 2 * RES_DINODE +
 					 RES_STATFS + RES_QUOTA, 0);
 		if (error)
@ -760,7 +822,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 			goto fail_quota_locks;
 	}

-	error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_inode.i_mode));
+	error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode));
 	if (error)
 		goto fail_end_trans;

@ -840,11 +902,11 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 			   unsigned int mode, dev_t dev)
 {
-	struct inode *inode;
+	struct inode *inode = NULL;
 	struct gfs2_inode *dip = ghs->gh_gl->gl_object;
 	struct inode *dir = &dip->i_inode;
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-	struct gfs2_inum_host inum;
+	struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
 	int error;
 	u64 generation;

@ -864,7 +926,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 	if (error)
 		goto fail_gunlock;

-	error = alloc_dinode(dip, &inum, &generation);
+	error = alloc_dinode(dip, &inum.no_addr, &generation);
 	if (error)
 		goto fail_gunlock;

@ -877,34 +939,36 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 	if (error)
 		goto fail_gunlock2;

-	inode = gfs2_inode_lookup(dir->i_sb, &inum, IF2DT(mode));
+	inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
+					inum.no_addr,
+					inum.no_formal_ino);
 	if (IS_ERR(inode))
 		goto fail_gunlock2;

 	error = gfs2_inode_refresh(GFS2_I(inode));
 	if (error)
-		goto fail_iput;
+		goto fail_gunlock2;

 	error = gfs2_acl_create(dip, GFS2_I(inode));
 	if (error)
-		goto fail_iput;
+		goto fail_gunlock2;

 	error = gfs2_security_init(dip, GFS2_I(inode));
 	if (error)
-		goto fail_iput;
+		goto fail_gunlock2;

 	error = link_dinode(dip, name, GFS2_I(inode));
 	if (error)
-		goto fail_iput;
+		goto fail_gunlock2;

 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 	return inode;

-fail_iput:
-	iput(inode);
 fail_gunlock2:
 	gfs2_glock_dq_uninit(ghs + 1);
+	if (inode)
+		iput(inode);
 fail_gunlock:
 	gfs2_glock_dq(ghs);
 fail:
@ -976,10 +1040,8 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
 */

 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-		   struct gfs2_inode *ip)
+		   const struct gfs2_inode *ip)
 {
-	struct gfs2_inum_host inum;
-	unsigned int type;
 	int error;

 	if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
@ -997,18 +1059,10 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 	if (error)
 		return error;

-	error = gfs2_dir_search(&dip->i_inode, name, &inum, &type);
+	error = gfs2_dir_check(&dip->i_inode, name, ip);
 	if (error)
 		return error;

-	if (!gfs2_inum_equal(&inum, &ip->i_num))
-		return -ENOENT;
-
-	if (IF2DT(ip->i_inode.i_mode) != type) {
-		gfs2_consist_inode(dip);
-		return -EIO;
-	}
-
 	return 0;
 }

@ -1132,10 +1186,11 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
 	struct gfs2_glock *gl = gh->gh_gl;
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct gfs2_inode *ip = gl->gl_object;
-	s64 curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum);
+	s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum);
 	unsigned int state;
 	int flags;
 	int error;
+	struct timespec tv = CURRENT_TIME;

 	if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
 	    gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
@ -1153,8 +1208,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
 	    (sdp->sd_vfs->s_flags & MS_RDONLY))
 		return 0;

-	curtime = get_seconds();
-	if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
+	if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
 		gfs2_glock_dq(gh);
 		gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
 				   gh);
@ -1165,8 +1219,8 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
 		/* Verify that atime hasn't been updated while we were
 		   trying to get exclusive lock. */

-		curtime = get_seconds();
-		if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
+		tv = CURRENT_TIME;
+		if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
 			struct buffer_head *dibh;
 			struct gfs2_dinode *di;

@ -1180,11 +1234,12 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
 			if (error)
 				goto fail_end_trans;

-			ip->i_inode.i_atime.tv_sec = curtime;
+			ip->i_inode.i_atime = tv;

 			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 			di = (struct gfs2_dinode *)dibh->b_data;
 			di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+			di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
 			brelse(dibh);

 			gfs2_trans_end(sdp);
@ -1252,3 +1307,66 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 	return error;
 }

+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
+{
+	const struct gfs2_dinode_host *di = &ip->i_di;
+	struct gfs2_dinode *str = buf;
+
+	str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+	str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
+	str->di_header.__pad0 = 0;
+	str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
+	str->di_header.__pad1 = 0;
+	str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
+	str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
+	str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
+	str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
+	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
+	str->di_size = cpu_to_be64(di->di_size);
+	str->di_blocks = cpu_to_be64(di->di_blocks);
+	str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+	str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
+	str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
+
+	str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
+	str->di_goal_data = cpu_to_be64(di->di_goal_data);
+	str->di_generation = cpu_to_be64(di->di_generation);
+
+	str->di_flags = cpu_to_be32(di->di_flags);
+	str->di_height = cpu_to_be16(di->di_height);
+	str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
+					     !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
+					     GFS2_FORMAT_DE : 0);
+	str->di_depth = cpu_to_be16(di->di_depth);
+	str->di_entries = cpu_to_be32(di->di_entries);
+
+	str->di_eattr = cpu_to_be64(di->di_eattr);
+	str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
+	str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
+	str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
+}
+
+void gfs2_dinode_print(const struct gfs2_inode *ip)
+{
+	const struct gfs2_dinode_host *di = &ip->i_di;
+
+	printk(KERN_INFO "  no_formal_ino = %llu\n",
+	       (unsigned long long)ip->i_no_formal_ino);
+	printk(KERN_INFO "  no_addr = %llu\n",
+	       (unsigned long long)ip->i_no_addr);
+	printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
+	printk(KERN_INFO "  di_blocks = %llu\n",
+	       (unsigned long long)di->di_blocks);
+	printk(KERN_INFO "  di_goal_meta = %llu\n",
+	       (unsigned long long)di->di_goal_meta);
+	printk(KERN_INFO "  di_goal_data = %llu\n",
+	       (unsigned long long)di->di_goal_data);
+	printk(KERN_INFO "  di_flags = 0x%.8X\n", di->di_flags);
+	printk(KERN_INFO "  di_height = %u\n", di->di_height);
+	printk(KERN_INFO "  di_depth = %u\n", di->di_depth);
+	printk(KERN_INFO "  di_entries = %u\n", di->di_entries);
+	printk(KERN_INFO "  di_eattr = %llu\n",
+	       (unsigned long long)di->di_eattr);
+}
+
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@ -10,17 +10,17 @@
 #ifndef __INODE_DOT_H__
 #define __INODE_DOT_H__

-static inline int gfs2_is_stuffed(struct gfs2_inode *ip)
+static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
 {
 	return !ip->i_di.di_height;
 }

-static inline int gfs2_is_jdata(struct gfs2_inode *ip)
+static inline int gfs2_is_jdata(const struct gfs2_inode *ip)
 {
 	return ip->i_di.di_flags & GFS2_DIF_JDATA;
 }

-static inline int gfs2_is_dir(struct gfs2_inode *ip)
+static inline int gfs2_is_dir(const struct gfs2_inode *ip)
 {
 	return S_ISDIR(ip->i_inode.i_mode);
 }
@ -32,9 +32,25 @@ static inline void gfs2_set_inode_blocks(struct inode *inode)
 		(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
 }

+static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr,
+				  u64 no_formal_ino)
+{
+	return ip->i_no_addr == no_addr && ip->i_no_formal_ino == no_formal_ino;
+}
+
+static inline void gfs2_inum_out(const struct gfs2_inode *ip,
+				 struct gfs2_dirent *dent)
+{
+	dent->de_inum.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+	dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr);
+}
+
+
 void gfs2_inode_attr_in(struct gfs2_inode *ip);
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type);
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum);
+void gfs2_set_iop(struct inode *inode);
+struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
+				u64 no_addr, u64 no_formal_ino);
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);

 int gfs2_inode_refresh(struct gfs2_inode *ip);

@ -47,12 +63,14 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
 		struct gfs2_inode *ip);
 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-		   struct gfs2_inode *ip);
+		   const struct gfs2_inode *ip);
 int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
 int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
 int gfs2_glock_nq_atime(struct gfs2_holder *gh);
 int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
+void gfs2_dinode_print(const struct gfs2_inode *ip);

 #endif /* __INODE_DOT_H__ */

--- a/fs/gfs2/locking/dlm/lock.c
+++ b/fs/gfs2/locking/dlm/lock.c
@ -174,7 +174,6 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
 	lp->cur = DLM_LOCK_IV;
 	lp->lvb = NULL;
 	lp->hold_null = NULL;
-	init_completion(&lp->ast_wait);
 	INIT_LIST_HEAD(&lp->clist);
 	INIT_LIST_HEAD(&lp->blist);
 	INIT_LIST_HEAD(&lp->delay_list);
@ -399,6 +398,12 @@ static void gdlm_del_lvb(struct gdlm_lock *lp)
 	lp->lksb.sb_lvbptr = NULL;
 }

+static int gdlm_ast_wait(void *word)
+{
+	schedule();
+	return 0;
+}
+
 /* This can do a synchronous dlm request (requiring a lock_dlm thread to get
   the completion) because gfs won't call hold_lvb() during a callback (from
   the context of a lock_dlm thread). */
@ -424,10 +429,10 @@ static int hold_null_lock(struct gdlm_lock *lp)
 	lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
 	set_bit(LFL_NOBAST, &lpn->flags);
 	set_bit(LFL_INLOCK, &lpn->flags);
+	set_bit(LFL_AST_WAIT, &lpn->flags);

-	init_completion(&lpn->ast_wait);
 	gdlm_do_lock(lpn);
-	wait_for_completion(&lpn->ast_wait);
+	wait_on_bit(&lpn->flags, LFL_AST_WAIT, gdlm_ast_wait, TASK_UNINTERRUPTIBLE);
 	error = lpn->lksb.sb_status;
 	if (error) {
 		printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@ -101,6 +101,7 @@ enum {
 	LFL_NOBAST		= 10,
 	LFL_HEADQUE		= 11,
 	LFL_UNLOCK_DELETE	= 12,
+	LFL_AST_WAIT		= 13,
 };

 struct gdlm_lock {
@ -117,7 +118,6 @@ struct gdlm_lock {
 	unsigned long		flags;		/* lock_dlm flags LFL_ */

 	int			bast_mode;	/* protected by async_lock */
-	struct completion	ast_wait;

 	struct list_head	clist;		/* complete */
 	struct list_head	blist;		/* blocking */
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@ -147,7 +147,7 @@ static int gdlm_mount(char *table_name, char *host_data,

 	error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
 				  &ls->dlm_lockspace,
-				  nodir ? DLM_LSFL_NODIR : 0,
+				  DLM_LSFL_FS | (nodir ? DLM_LSFL_NODIR : 0),
 				  GDLM_LVB_SIZE);
 	if (error) {
 		log_error("dlm_new_lockspace error %d", error);
--- a/fs/gfs2/locking/dlm/plock.c
+++ b/fs/gfs2/locking/dlm/plock.c
@ -242,7 +242,7 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
 	op->info.number		= name->ln_number;
 	op->info.start		= fl->fl_start;
 	op->info.end		= fl->fl_end;
-
+	op->info.owner		= (__u64)(long) fl->fl_owner;

 	send_op(op);
 	wait_event(recv_wq, (op->done != 0));
@ -254,16 +254,20 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
 	}
 	spin_unlock(&ops_lock);

+	/* info.rv from userspace is 1 for conflict, 0 for no-conflict,
+	   -ENOENT if there are no locks on the file */
+
 	rv = op->info.rv;

 	fl->fl_type = F_UNLCK;
 	if (rv == -ENOENT)
 		rv = 0;
-	else if (rv == 0 && op->info.pid != fl->fl_pid) {
+	else if (rv > 0) {
 		fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
 		fl->fl_pid = op->info.pid;
 		fl->fl_start = op->info.start;
 		fl->fl_end = op->info.end;
+		rv = 0;
 	}

 	kfree(op);
--- a/fs/gfs2/locking/dlm/thread.c
+++ b/fs/gfs2/locking/dlm/thread.c
@ -44,6 +44,13 @@ static void process_blocking(struct gdlm_lock *lp, int bast_mode)
 	ls->fscb(ls->sdp, cb, &lp->lockname);
 }

+static void wake_up_ast(struct gdlm_lock *lp)
+{
+	clear_bit(LFL_AST_WAIT, &lp->flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&lp->flags, LFL_AST_WAIT);
+}
+
 static void process_complete(struct gdlm_lock *lp)
 {
 	struct gdlm_ls *ls = lp->ls;
@ -136,7 +143,7 @@ static void process_complete(struct gdlm_lock *lp)
 	 */

 	if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
-		complete(&lp->ast_wait);
+		wake_up_ast(lp);
 		return;
 	}

@ -214,7 +221,7 @@ out:
 	if (test_bit(LFL_INLOCK, &lp->flags)) {
 		clear_bit(LFL_NOBLOCK, &lp->flags);
 		lp->cur = lp->req;
-		complete(&lp->ast_wait);
+		wake_up_ast(lp);
 		return;
 	}

--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@ -83,6 +83,11 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)

 			gfs2_assert(sdp, bd->bd_ail == ai);

+			if (!bh){
+				list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+                                continue;
+                        }
+
 			if (!buffer_busy(bh)) {
 				if (!buffer_uptodate(bh)) {
 					gfs2_log_unlock(sdp);
@ -125,6 +130,11 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
 					 bd_ail_st_list) {
 		bh = bd->bd_bh;

+		if (!bh){
+			list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+			continue;
+		}
+
 		gfs2_assert(sdp, bd->bd_ail == ai);

 		if (buffer_busy(bh)) {
@ -262,8 +272,8 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 * @sdp: The GFS2 superblock
 * @blks: The number of blocks to reserve
 *
- * Note that we never give out the last 6 blocks of the journal. Thats
- * due to the fact that there is are a small number of header blocks
+ * Note that we never give out the last few blocks of the journal. Thats
+ * due to the fact that there is a small number of header blocks
 * associated with each log flush. The exact number can't be known until
 * flush time, so we ensure that we have just enough free blocks at all
 * times to avoid running out during a log flush.
@ -274,6 +284,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 {
 	unsigned int try = 0;
+	unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize);

 	if (gfs2_assert_warn(sdp, blks) ||
 	    gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
@ -281,7 +292,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)

 	mutex_lock(&sdp->sd_log_reserve_mutex);
 	gfs2_log_lock(sdp);
-	while(sdp->sd_log_blks_free <= (blks + 6)) {
+	while(sdp->sd_log_blks_free <= (blks + reserved_blks)) {
 		gfs2_log_unlock(sdp);
 		gfs2_ail1_empty(sdp, 0);
 		gfs2_log_flush(sdp, NULL);
@ -357,6 +368,58 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer
 	return dist;
 }

+/**
+ * calc_reserved - Calculate the number of blocks to reserve when
+ *                 refunding a transaction's unused buffers.
+ * @sdp: The GFS2 superblock
+ *
+ * This is complex.  We need to reserve room for all our currently used
+ * metadata buffers (e.g. normal file I/O rewriting file time stamps) and 
+ * all our journaled data buffers for journaled files (e.g. files in the 
+ * meta_fs like rindex, or files for which chattr +j was done.)
+ * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
+ * will count it as free space (sd_log_blks_free) and corruption will follow.
+ *
+ * We can have metadata bufs and jdata bufs in the same journal.  So each
+ * type gets its own log header, for which we need to reserve a block.
+ * In fact, each type has the potential for needing more than one header 
+ * in cases where we have more buffers than will fit on a journal page.
+ * Metadata journal entries take up half the space of journaled buffer entries.
+ * Thus, metadata entries have buf_limit (502) and journaled buffers have
+ * databuf_limit (251) before they cause a wrap around.
+ *
+ * Also, we need to reserve blocks for revoke journal entries and one for an
+ * overall header for the lot.
+ *
+ * Returns: the number of blocks reserved
+ */
+static unsigned int calc_reserved(struct gfs2_sbd *sdp)
+{
+	unsigned int reserved = 0;
+	unsigned int mbuf_limit, metabufhdrs_needed;
+	unsigned int dbuf_limit, databufhdrs_needed;
+	unsigned int revokes = 0;
+
+	mbuf_limit = buf_limit(sdp);
+	metabufhdrs_needed = (sdp->sd_log_commited_buf +
+			      (mbuf_limit - 1)) / mbuf_limit;
+	dbuf_limit = databuf_limit(sdp);
+	databufhdrs_needed = (sdp->sd_log_commited_databuf +
+			      (dbuf_limit - 1)) / dbuf_limit;
+
+	if (sdp->sd_log_commited_revoke)
+		revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
+					  sizeof(u64));
+
+	reserved = sdp->sd_log_commited_buf + metabufhdrs_needed +
+		sdp->sd_log_commited_databuf + databufhdrs_needed +
+		revokes;
+	/* One for the overall header */
+	if (reserved)
+		reserved++;
+	return reserved;
+}
+
 static unsigned int current_tail(struct gfs2_sbd *sdp)
 {
 	struct gfs2_ail *ai;
@ -447,14 +510,14 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
 	return bh;
 }

-static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull)
+static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
 {
 	unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);

 	ail2_empty(sdp, new_tail);

 	gfs2_log_lock(sdp);
-	sdp->sd_log_blks_free += dist - (pull ? 1 : 0);
+	sdp->sd_log_blks_free += dist;
 	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
 	gfs2_log_unlock(sdp);

@ -504,7 +567,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
 	brelse(bh);

 	if (sdp->sd_log_tail != tail)
-		log_pull_tail(sdp, tail, pull);
+		log_pull_tail(sdp, tail);
 	else
 		gfs2_assert_withdraw(sdp, !pull);

@ -517,6 +580,7 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
 	struct list_head *head = &sdp->sd_log_flush_list;
 	struct gfs2_log_buf *lb;
 	struct buffer_head *bh;
+	int flushcount = 0;

 	while (!list_empty(head)) {
 		lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
@ -533,9 +597,20 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
 		} else
 			brelse(bh);
 		kfree(lb);
+		flushcount++;
 	}

-	log_write_header(sdp, 0, 0);
+	/* If nothing was journaled, the header is unplanned and unwanted. */
+	if (flushcount) {
+		log_write_header(sdp, 0, 0);
+	} else {
+		unsigned int tail;
+		tail = current_tail(sdp);
+
+		gfs2_ail1_empty(sdp, 0);
+		if (sdp->sd_log_tail != tail)
+			log_pull_tail(sdp, tail);
+	}
 }

 /**
@ -565,7 +640,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 	INIT_LIST_HEAD(&ai->ai_ail1_list);
 	INIT_LIST_HEAD(&ai->ai_ail2_list);

-	gfs2_assert_withdraw(sdp, sdp->sd_log_num_buf == sdp->sd_log_commited_buf);
+	gfs2_assert_withdraw(sdp,
+			     sdp->sd_log_num_buf + sdp->sd_log_num_jdata ==
+			     sdp->sd_log_commited_buf +
+			     sdp->sd_log_commited_databuf);
 	gfs2_assert_withdraw(sdp,
 			sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);

@ -576,16 +654,19 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 	lops_before_commit(sdp);
 	if (!list_empty(&sdp->sd_log_flush_list))
 		log_flush_commit(sdp);
-	else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
+	else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
+		gfs2_log_lock(sdp);
+		sdp->sd_log_blks_free--; /* Adjust for unreserved buffer */
+		gfs2_log_unlock(sdp);
 		log_write_header(sdp, 0, PULL);
+	}
 	lops_after_commit(sdp, ai);

 	gfs2_log_lock(sdp);
 	sdp->sd_log_head = sdp->sd_log_flush_head;
-	sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs;
 	sdp->sd_log_blks_reserved = 0;
 	sdp->sd_log_commited_buf = 0;
-	sdp->sd_log_num_hdrs = 0;
+	sdp->sd_log_commited_databuf = 0;
 	sdp->sd_log_commited_revoke = 0;

 	if (!list_empty(&ai->ai_ail1_list)) {
@ -602,32 +683,26 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)

 static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 {
-	unsigned int reserved = 0;
+	unsigned int reserved;
 	unsigned int old;

 	gfs2_log_lock(sdp);

 	sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
-	gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0);
+	sdp->sd_log_commited_databuf += tr->tr_num_databuf_new -
+		tr->tr_num_databuf_rm;
+	gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) ||
+			     (((int)sdp->sd_log_commited_databuf) >= 0));
 	sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
 	gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
-
-	if (sdp->sd_log_commited_buf)
-		reserved += sdp->sd_log_commited_buf;
-	if (sdp->sd_log_commited_revoke)
-		reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
-					    sizeof(u64));
-	if (reserved)
-		reserved++;
-
+	reserved = calc_reserved(sdp);
 	old = sdp->sd_log_blks_free;
 	sdp->sd_log_blks_free += tr->tr_reserved -
 				 (reserved - sdp->sd_log_blks_reserved);

 	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old);
-	gfs2_assert_withdraw(sdp,
-			     sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks +
-			     sdp->sd_log_num_hdrs);
+	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <=
+			     sdp->sd_jdesc->jd_blocks);

 	sdp->sd_log_blks_reserved = reserved;

@ -673,13 +748,13 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
-	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_hdrs);
 	gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));

 	sdp->sd_log_flush_head = sdp->sd_log_head;
 	sdp->sd_log_flush_wrapped = 0;

-	log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0);
+	log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT,
+			 (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL);

 	gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks);
 	gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@ -17,6 +17,7 @@

 #include "gfs2.h"
 #include "incore.h"
+#include "inode.h"
 #include "glock.h"
 #include "log.h"
 #include "lops.h"
@ -117,15 +118,13 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
 	struct gfs2_log_descriptor *ld;
 	struct gfs2_bufdata *bd1 = NULL, *bd2;
 	unsigned int total = sdp->sd_log_num_buf;
-	unsigned int offset = sizeof(struct gfs2_log_descriptor);
+	unsigned int offset = BUF_OFFSET;
 	unsigned int limit;
 	unsigned int num;
 	unsigned n;
 	__be64 *ptr;

-	offset += sizeof(__be64) - 1;
-	offset &= ~(sizeof(__be64) - 1);
-	limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
+	limit = buf_limit(sdp);
 	/* for 4k blocks, limit = 503 */

 	bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
@ -134,7 +133,6 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
 		if (total > limit)
 			num = limit;
 		bh = gfs2_log_get_buf(sdp);
-		sdp->sd_log_num_hdrs++;
 		ld = (struct gfs2_log_descriptor *)bh->b_data;
 		ptr = (__be64 *)(bh->b_data + offset);
 		ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
@ -469,25 +467,28 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
 	struct gfs2_inode *ip = GFS2_I(mapping->host);

 	gfs2_log_lock(sdp);
+	if (!list_empty(&bd->bd_list_tr)) {
+		gfs2_log_unlock(sdp);
+		return;
+	}
 	tr->tr_touched = 1;
-	if (list_empty(&bd->bd_list_tr) &&
-	    (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
+	if (gfs2_is_jdata(ip)) {
 		tr->tr_num_buf++;
 		list_add(&bd->bd_list_tr, &tr->tr_list_buf);
-		gfs2_log_unlock(sdp);
-		gfs2_pin(sdp, bd->bd_bh);
-		tr->tr_num_buf_new++;
-	} else {
-		gfs2_log_unlock(sdp);
 	}
+	gfs2_log_unlock(sdp);
+	if (!list_empty(&le->le_list))
+		return;
+
 	gfs2_trans_add_gl(bd->bd_gl);
-	gfs2_log_lock(sdp);
-	if (list_empty(&le->le_list)) {
-		if (ip->i_di.di_flags & GFS2_DIF_JDATA)
-			sdp->sd_log_num_jdata++;
-		sdp->sd_log_num_databuf++;
-		list_add(&le->le_list, &sdp->sd_log_le_databuf);
+	if (gfs2_is_jdata(ip)) {
+		sdp->sd_log_num_jdata++;
+		gfs2_pin(sdp, bd->bd_bh);
+		tr->tr_num_databuf_new++;
 	}
+	sdp->sd_log_num_databuf++;
+	gfs2_log_lock(sdp);
+	list_add(&le->le_list, &sdp->sd_log_le_databuf);
 	gfs2_log_unlock(sdp);
 }

@ -520,7 +521,6 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 	LIST_HEAD(started);
 	struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
 	struct buffer_head *bh = NULL,*bh1 = NULL;
-	unsigned int offset = sizeof(struct gfs2_log_descriptor);
 	struct gfs2_log_descriptor *ld;
 	unsigned int limit;
 	unsigned int total_dbuf = sdp->sd_log_num_databuf;
@ -528,9 +528,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 	unsigned int num, n;
 	__be64 *ptr = NULL;

-	offset += 2*sizeof(__be64) - 1;
-	offset &= ~(2*sizeof(__be64) - 1);
-	limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
+	limit = databuf_limit(sdp);

 	/*
 	 * Start writing ordered buffers, write journaled buffers
@ -581,10 +579,10 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 				gfs2_log_unlock(sdp);
 				if (!bh) {
 					bh = gfs2_log_get_buf(sdp);
-					sdp->sd_log_num_hdrs++;
 					ld = (struct gfs2_log_descriptor *)
 					     bh->b_data;
-					ptr = (__be64 *)(bh->b_data + offset);
+					ptr = (__be64 *)(bh->b_data +
+							 DATABUF_OFFSET);
 					ld->ld_header.mh_magic =
 						cpu_to_be32(GFS2_MAGIC);
 					ld->ld_header.mh_type =
@ -605,7 +603,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 				if (unlikely(magic != 0))
 					set_buffer_escaped(bh1);
 				gfs2_log_lock(sdp);
-				if (n++ > num)
+				if (++n >= num)
 					break;
 			} else if (!bh1) {
 				total_dbuf--;
@ -622,6 +620,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 		}
 		gfs2_log_unlock(sdp);
 		if (bh) {
+			set_buffer_mapped(bh);
 			set_buffer_dirty(bh);
 			ll_rw_block(WRITE, 1, &bh);
 			bh = NULL;
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@ -13,6 +13,13 @@
 #include <linux/list.h>
 #include "incore.h"

+#define BUF_OFFSET \
+	((sizeof(struct gfs2_log_descriptor) + sizeof(__be64) - 1) & \
+	 ~(sizeof(__be64) - 1))
+#define DATABUF_OFFSET \
+	((sizeof(struct gfs2_log_descriptor) + (2 * sizeof(__be64) - 1)) & \
+	 ~(2 * sizeof(__be64) - 1))
+
 extern const struct gfs2_log_operations gfs2_glock_lops;
 extern const struct gfs2_log_operations gfs2_buf_lops;
 extern const struct gfs2_log_operations gfs2_revoke_lops;
@ -21,6 +28,22 @@ extern const struct gfs2_log_operations gfs2_databuf_lops;

 extern const struct gfs2_log_operations *gfs2_log_ops[];

+static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
+{
+	unsigned int limit;
+
+	limit = (sdp->sd_sb.sb_bsize - BUF_OFFSET) / sizeof(__be64);
+	return limit;
+}
+
+static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
+{
+	unsigned int limit;
+
+	limit = (sdp->sd_sb.sb_bsize - DATABUF_OFFSET) / (2 * sizeof(__be64));
+	return limit;
+}
+
 static inline void lops_init_le(struct gfs2_log_element *le,
 				const struct gfs2_log_operations *lops)
 {
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@ -387,12 +387,18 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)

 			if (test_clear_buffer_pinned(bh)) {
 				struct gfs2_trans *tr = current->journal_info;
+				struct gfs2_inode *bh_ip =
+					GFS2_I(bh->b_page->mapping->host);
+
 				gfs2_log_lock(sdp);
 				list_del_init(&bd->bd_le.le_list);
 				gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
 				sdp->sd_log_num_buf--;
 				gfs2_log_unlock(sdp);
-				tr->tr_num_buf_rm++;
+				if (bh_ip->i_inode.i_private != NULL)
+					tr->tr_num_databuf_rm++;
+				else
+					tr->tr_num_buf_rm++;
 				brelse(bh);
 			}
 			if (bd) {
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@ -63,7 +63,7 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
 static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
 					 struct buffer_head **bhp)
 {
-	return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp);
+	return gfs2_meta_indirect_buffer(ip, 0, ip->i_no_addr, 0, bhp);
 }

 struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@ -82,20 +82,19 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
 	char *options, *o, *v;
 	int error = 0;

-	if (!remount) {
-		/*  If someone preloaded options, use those instead  */
-		spin_lock(&gfs2_sys_margs_lock);
-		if (gfs2_sys_margs) {
-			data = gfs2_sys_margs;
-			gfs2_sys_margs = NULL;
-		}
-		spin_unlock(&gfs2_sys_margs_lock);
-
-		/*  Set some defaults  */
-		args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
-		args->ar_quota = GFS2_QUOTA_DEFAULT;
-		args->ar_data = GFS2_DATA_DEFAULT;
+	/*  If someone preloaded options, use those instead  */
+	spin_lock(&gfs2_sys_margs_lock);
+	if (!remount && gfs2_sys_margs) {
+		data = gfs2_sys_margs;
+		gfs2_sys_margs = NULL;
 	}
+	spin_unlock(&gfs2_sys_margs_lock);
+
+	/*  Set some defaults  */
+	memset(args, 0, sizeof(struct gfs2_args));
+	args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
+	args->ar_quota = GFS2_QUOTA_DEFAULT;
+	args->ar_data = GFS2_DATA_DEFAULT;

 	/* Split the options into tokens with the "," character and
 	   process them */
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@ -1,251 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-
-#include "gfs2.h"
-#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
-#include "incore.h"
-
-#define pv(struct, member, fmt) printk(KERN_INFO "  "#member" = "fmt"\n", \
-				       struct->member);
-
-/*
- * gfs2_xxx_in - read in an xxx struct
- * first arg: the cpu-order structure
- * buf: the disk-order buffer
- *
- * gfs2_xxx_out - write out an xxx struct
- * first arg: the cpu-order structure
- * buf: the disk-order buffer
- *
- * gfs2_xxx_print - print out an xxx struct
- * first arg: the cpu-order structure
- */
-
-void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf)
-{
-	const struct gfs2_inum *str = buf;
-
-	no->no_formal_ino = be64_to_cpu(str->no_formal_ino);
-	no->no_addr = be64_to_cpu(str->no_addr);
-}
-
-void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf)
-{
-	struct gfs2_inum *str = buf;
-
-	str->no_formal_ino = cpu_to_be64(no->no_formal_ino);
-	str->no_addr = cpu_to_be64(no->no_addr);
-}
-
-static void gfs2_inum_print(const struct gfs2_inum_host *no)
-{
-	printk(KERN_INFO "  no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino);
-	printk(KERN_INFO "  no_addr = %llu\n", (unsigned long long)no->no_addr);
-}
-
-static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
-{
-	const struct gfs2_meta_header *str = buf;
-
-	mh->mh_magic = be32_to_cpu(str->mh_magic);
-	mh->mh_type = be32_to_cpu(str->mh_type);
-	mh->mh_format = be32_to_cpu(str->mh_format);
-}
-
-void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
-{
-	const struct gfs2_sb *str = buf;
-
-	gfs2_meta_header_in(&sb->sb_header, buf);
-
-	sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
-	sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
-	sb->sb_bsize = be32_to_cpu(str->sb_bsize);
-	sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
-
-	gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir);
-	gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir);
-
-	memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
-	memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
-}
-
-void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf)
-{
-	const struct gfs2_rindex *str = buf;
-
-	ri->ri_addr = be64_to_cpu(str->ri_addr);
-	ri->ri_length = be32_to_cpu(str->ri_length);
-	ri->ri_data0 = be64_to_cpu(str->ri_data0);
-	ri->ri_data = be32_to_cpu(str->ri_data);
-	ri->ri_bitbytes = be32_to_cpu(str->ri_bitbytes);
-
-}
-
-void gfs2_rindex_print(const struct gfs2_rindex_host *ri)
-{
-	printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)ri->ri_addr);
-	pv(ri, ri_length, "%u");
-
-	printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)ri->ri_data0);
-	pv(ri, ri_data, "%u");
-
-	pv(ri, ri_bitbytes, "%u");
-}
-
-void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
-{
-	const struct gfs2_rgrp *str = buf;
-
-	rg->rg_flags = be32_to_cpu(str->rg_flags);
-	rg->rg_free = be32_to_cpu(str->rg_free);
-	rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
-	rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
-}
-
-void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
-{
-	struct gfs2_rgrp *str = buf;
-
-	str->rg_flags = cpu_to_be32(rg->rg_flags);
-	str->rg_free = cpu_to_be32(rg->rg_free);
-	str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
-	str->__pad = cpu_to_be32(0);
-	str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
-	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
-}
-
-void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
-{
-	const struct gfs2_quota *str = buf;
-
-	qu->qu_limit = be64_to_cpu(str->qu_limit);
-	qu->qu_warn = be64_to_cpu(str->qu_warn);
-	qu->qu_value = be64_to_cpu(str->qu_value);
-}
-
-void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
-{
-	const struct gfs2_dinode_host *di = &ip->i_di;
-	struct gfs2_dinode *str = buf;
-
-	str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
-	str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
-	str->di_header.__pad0 = 0;
-	str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
-	str->di_header.__pad1 = 0;
-
-	gfs2_inum_out(&ip->i_num, &str->di_num);
-
-	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
-	str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
-	str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
-	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
-	str->di_size = cpu_to_be64(di->di_size);
-	str->di_blocks = cpu_to_be64(di->di_blocks);
-	str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
-	str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
-	str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
-
-	str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
-	str->di_goal_data = cpu_to_be64(di->di_goal_data);
-	str->di_generation = cpu_to_be64(di->di_generation);
-
-	str->di_flags = cpu_to_be32(di->di_flags);
-	str->di_height = cpu_to_be16(di->di_height);
-	str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
-					     !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
-					     GFS2_FORMAT_DE : 0);
-	str->di_depth = cpu_to_be16(di->di_depth);
-	str->di_entries = cpu_to_be32(di->di_entries);
-
-	str->di_eattr = cpu_to_be64(di->di_eattr);
-}
-
-void gfs2_dinode_print(const struct gfs2_inode *ip)
-{
-	const struct gfs2_dinode_host *di = &ip->i_di;
-
-	gfs2_inum_print(&ip->i_num);
-
-	printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
-	printk(KERN_INFO "  di_blocks = %llu\n", (unsigned long long)di->di_blocks);
-	printk(KERN_INFO "  di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta);
-	printk(KERN_INFO "  di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
-
-	pv(di, di_flags, "0x%.8X");
-	pv(di, di_height, "%u");
-
-	pv(di, di_depth, "%u");
-	pv(di, di_entries, "%u");
-
-	printk(KERN_INFO "  di_eattr = %llu\n", (unsigned long long)di->di_eattr);
-}
-
-void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
-{
-	const struct gfs2_log_header *str = buf;
-
-	gfs2_meta_header_in(&lh->lh_header, buf);
-	lh->lh_sequence = be64_to_cpu(str->lh_sequence);
-	lh->lh_flags = be32_to_cpu(str->lh_flags);
-	lh->lh_tail = be32_to_cpu(str->lh_tail);
-	lh->lh_blkno = be32_to_cpu(str->lh_blkno);
-	lh->lh_hash = be32_to_cpu(str->lh_hash);
-}
-
-void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
-{
-	const struct gfs2_inum_range *str = buf;
-
-	ir->ir_start = be64_to_cpu(str->ir_start);
-	ir->ir_length = be64_to_cpu(str->ir_length);
-}
-
-void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
-{
-	struct gfs2_inum_range *str = buf;
-
-	str->ir_start = cpu_to_be64(ir->ir_start);
-	str->ir_length = cpu_to_be64(ir->ir_length);
-}
-
-void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
-{
-	const struct gfs2_statfs_change *str = buf;
-
-	sc->sc_total = be64_to_cpu(str->sc_total);
-	sc->sc_free = be64_to_cpu(str->sc_free);
-	sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
-}
-
-void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
-{
-	struct gfs2_statfs_change *str = buf;
-
-	str->sc_total = cpu_to_be64(sc->sc_total);
-	str->sc_free = cpu_to_be64(sc->sc_free);
-	str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
-}
-
-void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
-{
-	const struct gfs2_quota_change *str = buf;
-
-	qc->qc_change = be64_to_cpu(str->qc_change);
-	qc->qc_flags = be32_to_cpu(str->qc_flags);
-	qc->qc_id = be32_to_cpu(str->qc_id);
-}
-
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@ -1,6 +1,6 @@
 /*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
@ -32,6 +32,7 @@
 #include "trans.h"
 #include "rgrp.h"
 #include "ops_file.h"
+#include "super.h"
 #include "util.h"
 #include "glops.h"

@ -49,6 +50,8 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
 		end = start + bsize;
 		if (end <= from || start >= to)
 			continue;
+		if (gfs2_is_jdata(ip))
+			set_buffer_uptodate(bh);
 		gfs2_trans_add_bh(ip->i_gl, bh, 0);
 	}
 }
@ -134,7 +137,9 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
 		return 0; /* don't care */
 	}

-	if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
+	if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) &&
+	    PageChecked(page)) {
+		ClearPageChecked(page);
 		error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
 		if (error)
 			goto out_ignore;
@ -203,11 +208,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 	 * so we need to supply one here. It doesn't happen often.
 	 */
 	if (unlikely(page->index)) {
-		kaddr = kmap_atomic(page, KM_USER0);
-		memset(kaddr, 0, PAGE_CACHE_SIZE);
-		kunmap_atomic(kaddr, KM_USER0);
-		flush_dcache_page(page);
-		SetPageUptodate(page);
+		zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
 		return 0;
 	}

@ -449,6 +450,31 @@ out_uninit:
 	return error;
 }

+/**
+ * adjust_fs_space - Adjusts the free space available due to gfs2_grow
+ * @inode: the rindex inode
+ */
+static void adjust_fs_space(struct inode *inode)
+{
+	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
+	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
+	u64 fs_total, new_free;
+
+	/* Total up the file system space, according to the latest rindex. */
+	fs_total = gfs2_ri_total(sdp);
+
+	spin_lock(&sdp->sd_statfs_spin);
+	if (fs_total > (m_sc->sc_total + l_sc->sc_total))
+		new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
+	else
+		new_free = 0;
+	spin_unlock(&sdp->sd_statfs_spin);
+	fs_warn(sdp, "File system extended by %llu blocks.\n",
+		(unsigned long long)new_free);
+	gfs2_statfs_change(sdp, new_free, new_free, 0);
+}
+
 /**
 * gfs2_commit_write - Commit write to a file
 * @file: The file to write to
@ -511,6 +537,9 @@ static int gfs2_commit_write(struct file *file, struct page *page,
 		di->di_size = cpu_to_be64(inode->i_size);
 	}

+	if (inode == sdp->sd_rindex)
+		adjust_fs_space(inode);
+
 	brelse(dibh);
 	gfs2_trans_end(sdp);
 	if (al->al_requested) {
@ -542,6 +571,23 @@ fail_nounlock:
 	return error;
 }

+/**
+ * gfs2_set_page_dirty - Page dirtying function
+ * @page: The page to dirty
+ *
+ * Returns: 1 if it dirtyed the page, or 0 otherwise
+ */
+ 
+static int gfs2_set_page_dirty(struct page *page)
+{
+	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
+
+	if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
+		SetPageChecked(page);
+	return __set_page_dirty_buffers(page);
+}
+
 /**
 * gfs2_bmap - Block map function
 * @mapping: Address space info
@ -578,6 +624,8 @@ static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
 	if (bd) {
 		bd->bd_bh = NULL;
 		bh->b_private = NULL;
+		if (!bd->bd_ail && list_empty(&bd->bd_le.le_list))
+			kmem_cache_free(gfs2_bufdata_cachep, bd);
 	}
 	gfs2_log_unlock(sdp);

@ -598,6 +646,8 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset)
 	unsigned int curr_off = 0;

 	BUG_ON(!PageLocked(page));
+	if (offset == 0)
+		ClearPageChecked(page);
 	if (!page_has_buffers(page))
 		return;

@ -728,8 +778,8 @@ static unsigned limit = 0;
 			return;

 		fs_warn(sdp, "ip = %llu %llu\n",
-			(unsigned long long)ip->i_num.no_formal_ino,
-			(unsigned long long)ip->i_num.no_addr);
+			(unsigned long long)ip->i_no_formal_ino,
+			(unsigned long long)ip->i_no_addr);

 		for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
 			fs_warn(sdp, "ip->i_cache[%u] = %s\n",
@ -810,6 +860,7 @@ const struct address_space_operations gfs2_file_aops = {
 	.sync_page = block_sync_page,
 	.prepare_write = gfs2_prepare_write,
 	.commit_write = gfs2_commit_write,
+	.set_page_dirty = gfs2_set_page_dirty,
 	.bmap = gfs2_bmap,
 	.invalidatepage = gfs2_invalidatepage,
 	.releasepage = gfs2_releasepage,
--- a/fs/gfs2/ops_address.h
+++ b/fs/gfs2/ops_address.h
@ -1,6 +1,6 @@
 /*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@ -21,6 +21,7 @@
 #include "glock.h"
 #include "ops_dentry.h"
 #include "util.h"
+#include "inode.h"

 /**
 * gfs2_drevalidate - Check directory lookup consistency
@ -40,14 +41,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 	struct gfs2_inode *dip = GFS2_I(parent->d_inode);
 	struct inode *inode = dentry->d_inode;
 	struct gfs2_holder d_gh;
-	struct gfs2_inode *ip;
-	struct gfs2_inum_host inum;
-	unsigned int type;
+	struct gfs2_inode *ip = NULL;
 	int error;
 	int had_lock=0;

-	if (inode && is_bad_inode(inode))
-		goto invalid;
+	if (inode) {
+		if (is_bad_inode(inode))
+			goto invalid;
+		ip = GFS2_I(inode);
+	}

 	if (sdp->sd_args.ar_localcaching)
 		goto valid;
@ -59,7 +61,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 			goto fail;
 	} 

-	error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type);
+	error = gfs2_dir_check(parent->d_inode, &dentry->d_name, ip);
 	switch (error) {
 	case 0:
 		if (!inode)
@ -73,16 +75,6 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 		goto fail_gunlock;
 	}

-	ip = GFS2_I(inode);
-
-	if (!gfs2_inum_equal(&ip->i_num, &inum))
-		goto invalid_gunlock;
-
-	if (IF2DT(ip->i_inode.i_mode) != type) {
-		gfs2_consist_inode(dip);
-		goto fail_gunlock;
-	}
-
 valid_gunlock:
 	if (!had_lock)
 		gfs2_glock_dq_uninit(&d_gh);
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@ -22,10 +22,14 @@
 #include "glops.h"
 #include "inode.h"
 #include "ops_dentry.h"
-#include "ops_export.h"
+#include "ops_fstype.h"
 #include "rgrp.h"
 #include "util.h"

+#define GFS2_SMALL_FH_SIZE 4
+#define GFS2_LARGE_FH_SIZE 8
+#define GFS2_OLD_FH_SIZE 10
+
 static struct dentry *gfs2_decode_fh(struct super_block *sb,
 				     __u32 *p,
 				     int fh_len,
@ -35,31 +39,28 @@ static struct dentry *gfs2_decode_fh(struct super_block *sb,
 				     void *context)
 {
 	__be32 *fh = (__force __be32 *)p;
-	struct gfs2_fh_obj fh_obj;
-	struct gfs2_inum_host *this, parent;
+	struct gfs2_inum_host inum, parent;

-	this 		= &fh_obj.this;
-	fh_obj.imode 	= DT_UNKNOWN;
 	memset(&parent, 0, sizeof(struct gfs2_inum));

 	switch (fh_len) {
 	case GFS2_LARGE_FH_SIZE:
+	case GFS2_OLD_FH_SIZE:
 		parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32;
 		parent.no_formal_ino |= be32_to_cpu(fh[5]);
 		parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32;
 		parent.no_addr |= be32_to_cpu(fh[7]);
-		fh_obj.imode = be32_to_cpu(fh[8]);
 	case GFS2_SMALL_FH_SIZE:
-		this->no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
-		this->no_formal_ino |= be32_to_cpu(fh[1]);
-		this->no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
-		this->no_addr |= be32_to_cpu(fh[3]);
+		inum.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
+		inum.no_formal_ino |= be32_to_cpu(fh[1]);
+		inum.no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
+		inum.no_addr |= be32_to_cpu(fh[3]);
 		break;
 	default:
 		return NULL;
 	}

-	return gfs2_export_ops.find_exported_dentry(sb, &fh_obj, &parent,
+	return gfs2_export_ops.find_exported_dentry(sb, &inum, &parent,
 						    acceptable, context);
 }

@ -75,10 +76,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
 	    (connectable && *len < GFS2_LARGE_FH_SIZE))
 		return 255;

-	fh[0] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
-	fh[1] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
-	fh[2] = cpu_to_be32(ip->i_num.no_addr >> 32);
-	fh[3] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
+	fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
+	fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
+	fh[2] = cpu_to_be32(ip->i_no_addr >> 32);
+	fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
 	*len = GFS2_SMALL_FH_SIZE;

 	if (!connectable || inode == sb->s_root->d_inode)
@ -90,13 +91,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
 	igrab(inode);
 	spin_unlock(&dentry->d_lock);

-	fh[4] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
-	fh[5] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
-	fh[6] = cpu_to_be32(ip->i_num.no_addr >> 32);
-	fh[7] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
-
-	fh[8]  = cpu_to_be32(inode->i_mode);
-	fh[9]  = 0;	/* pad to double word */
+	fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32);
+	fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
+	fh[6] = cpu_to_be32(ip->i_no_addr >> 32);
+	fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
 	*len = GFS2_LARGE_FH_SIZE;

 	iput(inode);
@ -144,7 +142,8 @@ static int gfs2_get_name(struct dentry *parent, char *name,
 	ip = GFS2_I(inode);

 	*name = 0;
-	gnfd.inum = ip->i_num;
+	gnfd.inum.no_addr = ip->i_no_addr;
+	gnfd.inum.no_formal_ino = ip->i_no_formal_ino;
 	gnfd.name = name;

 	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
@ -192,8 +191,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
 static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
-	struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj;
-	struct gfs2_inum_host *inum = &fh_obj->this;
+	struct gfs2_inum_host *inum = inum_obj;
 	struct gfs2_holder i_gh, ri_gh, rgd_gh;
 	struct gfs2_rgrpd *rgd;
 	struct inode *inode;
@ -202,9 +200,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)

 	/* System files? */

-	inode = gfs2_ilookup(sb, inum);
+	inode = gfs2_ilookup(sb, inum->no_addr);
 	if (inode) {
-		if (GFS2_I(inode)->i_num.no_formal_ino != inum->no_formal_ino) {
+		if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
 			iput(inode);
 			return ERR_PTR(-ESTALE);
 		}
@ -236,7 +234,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
 	gfs2_glock_dq_uninit(&rgd_gh);
 	gfs2_glock_dq_uninit(&ri_gh);

-	inode = gfs2_inode_lookup(sb, inum, fh_obj->imode);
+	inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
+					inum->no_addr,
+					0);
 	if (!inode)
 		goto fail;
 	if (IS_ERR(inode)) {
@ -250,6 +250,15 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
 		goto fail;
 	}

+	/* Pick up the works we bypass in gfs2_inode_lookup */
+	if (inode->i_state & I_NEW) 
+		gfs2_set_iop(inode);
+
+	if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
+		iput(inode);
+		goto fail;
+	}
+
 	error = -EIO;
 	if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) {
 		iput(inode);
--- a/fs/gfs2/ops_export.h
+++ b/fs/gfs2/ops_export.h
@ -1,22 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __OPS_EXPORT_DOT_H__
-#define __OPS_EXPORT_DOT_H__
-
-#define GFS2_SMALL_FH_SIZE 4
-#define GFS2_LARGE_FH_SIZE 10
-
-extern struct export_operations gfs2_export_ops;
-struct gfs2_fh_obj {
-	struct gfs2_inum_host this;
-	__u32            imode;
-};
-
-#endif /* __OPS_EXPORT_DOT_H__ */
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@ -502,7 +502,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
 	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
 	struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
 	struct lm_lockname name =
-		{ .ln_number = ip->i_num.no_addr,
+		{ .ln_number = ip->i_no_addr,
 		  .ln_type = LM_TYPE_PLOCK };

 	if (!(fl->fl_flags & FL_POSIX))
@ -557,7 +557,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
 		gfs2_glock_dq_uninit(fl_gh);
 	} else {
 		error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
-				      ip->i_num.no_addr, &gfs2_flock_glops,
+				      ip->i_no_addr, &gfs2_flock_glops,
 				      CREATE, &gl);
 		if (error)
 			goto out;
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@ -27,7 +27,6 @@
 #include "inode.h"
 #include "lm.h"
 #include "mount.h"
-#include "ops_export.h"
 #include "ops_fstype.h"
 #include "ops_super.h"
 #include "recovery.h"
@ -105,6 +104,7 @@ static void init_vfs(struct super_block *sb, unsigned noatime)
 	sb->s_magic = GFS2_MAGIC;
 	sb->s_op = &gfs2_super_ops;
 	sb->s_export_op = &gfs2_export_ops;
+	sb->s_time_gran = 1;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;

 	if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
@ -116,7 +116,6 @@ static void init_vfs(struct super_block *sb, unsigned noatime)

 static int init_names(struct gfs2_sbd *sdp, int silent)
 {
-	struct page *page;
 	char *proto, *table;
 	int error = 0;

@ -126,14 +125,9 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
 	/*  Try to autodetect  */

 	if (!proto[0] || !table[0]) {
-		struct gfs2_sb *sb;
-		page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
-		if (!page)
-			return -ENOBUFS;
-		sb = kmap(page);
-		gfs2_sb_in(&sdp->sd_sb, sb);
-		kunmap(page);
-		__free_page(page);
+		error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+		if (error)
+			return error;

 		error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
 		if (error)
@ -151,6 +145,9 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
 	snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
 	snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);

+	while ((table = strchr(sdp->sd_table_name, '/')))
+		*table = '_';
+
 out:
 	return error;
 }
@ -236,17 +233,17 @@ fail:
 	return error;
 }

-static struct inode *gfs2_lookup_root(struct super_block *sb,
-				      struct gfs2_inum_host *inum)
+static inline struct inode *gfs2_lookup_root(struct super_block *sb,
+					     u64 no_addr)
 {
-	return gfs2_inode_lookup(sb, inum, DT_DIR);
+	return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
 }

 static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
 {
 	struct super_block *sb = sdp->sd_vfs;
 	struct gfs2_holder sb_gh;
-	struct gfs2_inum_host *inum;
+	u64 no_addr;
 	struct inode *inode;
 	int error = 0;

@ -289,10 +286,10 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
 	sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);

 	/* Get the root inode */
-	inum = &sdp->sd_sb.sb_root_dir;
+	no_addr = sdp->sd_sb.sb_root_dir.no_addr;
 	if (sb->s_type == &gfs2meta_fs_type)
-		inum = &sdp->sd_sb.sb_master_dir;
-	inode = gfs2_lookup_root(sb, inum);
+		no_addr = sdp->sd_sb.sb_master_dir.no_addr;
+	inode = gfs2_lookup_root(sb, no_addr);
 	if (IS_ERR(inode)) {
 		error = PTR_ERR(inode);
 		fs_err(sdp, "can't read in root inode: %d\n", error);
@ -449,7 +446,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
 	if (undo)
 		goto fail_qinode;

-	inode = gfs2_lookup_root(sdp->sd_vfs, &sdp->sd_sb.sb_master_dir);
+	inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr);
 	if (IS_ERR(inode)) {
 		error = PTR_ERR(inode);
 		fs_err(sdp, "can't read in master directory: %d\n", error);
--- a/fs/gfs2/ops_fstype.h
+++ b/fs/gfs2/ops_fstype.h
@ -14,5 +14,6 @@

 extern struct file_system_type gfs2_fs_type;
 extern struct file_system_type gfs2meta_fs_type;
+extern struct export_operations gfs2_export_ops;

 #endif /* __OPS_FSTYPE_DOT_H__ */
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@ -157,7 +157,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	if (error)
 		goto out_gunlock;

-	error = gfs2_dir_search(dir, &dentry->d_name, NULL, NULL);
+	error = gfs2_dir_check(dir, &dentry->d_name, NULL);
 	switch (error) {
 	case -ENOENT:
 		break;
@ -206,7 +206,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 			goto out_gunlock_q;

 		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-					 al->al_rgd->rd_ri.ri_length +
+					 al->al_rgd->rd_length +
 					 2 * RES_DINODE + RES_STATFS +
 					 RES_QUOTA, 0);
 		if (error)
@ -217,8 +217,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 			goto out_ipres;
 	}

-	error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num,
-			     IF2DT(inode->i_mode));
+	error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode));
 	if (error)
 		goto out_end_trans;

@ -275,7 +274,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
 	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
 	gfs2_holder_init(ip->i_gl,  LM_ST_EXCLUSIVE, 0, ghs + 1);

-	rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
 	gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);


@ -420,7 +419,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
 		gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);

-		gfs2_inum_out(&dip->i_num, &dent->de_inum);
+		gfs2_inum_out(dip, dent);
 		dent->de_type = cpu_to_be16(DT_DIR);

 		gfs2_dinode_out(ip, di);
@ -472,7 +471,7 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
 	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);

-	rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
 	gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);

 	error = gfs2_glock_nq_m(3, ghs);
@ -614,7 +613,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		 * this is the case of the target file already existing
 		 * so we unlink before doing the rename
 		 */
-		nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr);
+		nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
 		if (nrgd)
 			gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
 	}
@ -653,7 +652,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		if (error)
 			goto out_gunlock;

-		error = gfs2_dir_search(ndir, &ndentry->d_name, NULL, NULL);
+		error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
 		switch (error) {
 		case -ENOENT:
 			error = 0;
@ -712,7 +711,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 			goto out_gunlock_q;

 		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-					 al->al_rgd->rd_ri.ri_length +
+					 al->al_rgd->rd_length +
 					 4 * RES_DINODE + 4 * RES_LEAF +
 					 RES_STATFS + RES_QUOTA + 4, 0);
 		if (error)
@ -750,7 +749,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		if (error)
 			goto out_end_trans;

-		error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR);
+		error = gfs2_dir_mvino(ip, &name, ndip, DT_DIR);
 		if (error)
 			goto out_end_trans;
 	} else {
@ -758,7 +757,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		error = gfs2_meta_inode_buffer(ip, &dibh);
 		if (error)
 			goto out_end_trans;
-		ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+		ip->i_inode.i_ctime = CURRENT_TIME;
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@ -768,8 +767,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 	if (error)
 		goto out_end_trans;

-	error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num,
-			     IF2DT(ip->i_inode.i_mode));
+	error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode));
 	if (error)
 		goto out_end_trans;

@ -905,8 +903,8 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
 	}

 	error = gfs2_truncatei(ip, attr->ia_size);
-	if (error)
-		return error;
+	if (error && (inode->i_size != ip->i_di.di_size))
+		i_size_write(inode, ip->i_di.di_size);

 	return error;
 }
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@ -326,8 +326,10 @@ static void gfs2_clear_inode(struct inode *inode)
 		gfs2_glock_schedule_for_reclaim(ip->i_gl);
 		gfs2_glock_put(ip->i_gl);
 		ip->i_gl = NULL;
-		if (ip->i_iopen_gh.gh_gl)
+		if (ip->i_iopen_gh.gh_gl) {
+			ip->i_iopen_gh.gh_gl->gl_object = NULL;
 			gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+		}
 	}
 }

@ -422,13 +424,13 @@ static void gfs2_delete_inode(struct inode *inode)
 	if (!inode->i_private)
 		goto out;

-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh);
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	if (unlikely(error)) {
 		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
 		goto out;
 	}

-	gfs2_glock_dq(&ip->i_iopen_gh);
+	gfs2_glock_dq_wait(&ip->i_iopen_gh);
 	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
 	error = gfs2_glock_nq(&ip->i_iopen_gh);
 	if (error)
--- a/fs/gfs2/ops_vm.c
+++ b/fs/gfs2/ops_vm.c
@ -66,7 +66,7 @@ static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
 	if (error)
 		goto out_gunlock_q;

-	error = gfs2_trans_begin(sdp, al->al_rgd->rd_ri.ri_length +
+	error = gfs2_trans_begin(sdp, al->al_rgd->rd_length +
 				 ind_blocks + RES_DINODE +
 				 RES_STATFS + RES_QUOTA, 0);
 	if (error)
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@ -66,6 +66,18 @@
 #define QUOTA_USER 1
 #define QUOTA_GROUP 0

+struct gfs2_quota_host {
+	u64 qu_limit;
+	u64 qu_warn;
+	s64 qu_value;
+};
+
+struct gfs2_quota_change_host {
+	u64 qc_change;
+	u32 qc_flags; /* GFS2_QCF_... */
+	u32 qc_id;
+};
+
 static u64 qd2offset(struct gfs2_quota_data *qd)
 {
 	u64 offset;
@ -561,6 +573,25 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
 	mutex_unlock(&sdp->sd_quota_mutex);
 }

+static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
+{
+	const struct gfs2_quota *str = buf;
+
+	qu->qu_limit = be64_to_cpu(str->qu_limit);
+	qu->qu_warn = be64_to_cpu(str->qu_warn);
+	qu->qu_value = be64_to_cpu(str->qu_value);
+}
+
+static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
+{
+	struct gfs2_quota *str = buf;
+
+	str->qu_limit = cpu_to_be64(qu->qu_limit);
+	str->qu_warn = cpu_to_be64(qu->qu_warn);
+	str->qu_value = cpu_to_be64(qu->qu_value);
+	memset(&str->qu_reserved, 0, sizeof(str->qu_reserved));
+}
+
 /**
 * gfs2_adjust_quota
 *
@ -573,12 +604,13 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 	struct inode *inode = &ip->i_inode;
 	struct address_space *mapping = inode->i_mapping;
 	unsigned long index = loc >> PAGE_CACHE_SHIFT;
-	unsigned offset = loc & (PAGE_CACHE_SHIFT - 1);
+	unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
 	unsigned blocksize, iblock, pos;
 	struct buffer_head *bh;
 	struct page *page;
 	void *kaddr;
-	__be64 *ptr;
+	char *ptr;
+	struct gfs2_quota_host qp;
 	s64 value;
 	int err = -EIO;

@ -620,13 +652,17 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,

 	kaddr = kmap_atomic(page, KM_USER0);
 	ptr = kaddr + offset;
-	value = (s64)be64_to_cpu(*ptr) + change;
-	*ptr = cpu_to_be64(value);
+	gfs2_quota_in(&qp, ptr);
+	qp.qu_value += change;
+	value = qp.qu_value;
+	gfs2_quota_out(&qp, ptr);
 	flush_dcache_page(page);
 	kunmap_atomic(kaddr, KM_USER0);
 	err = 0;
 	qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
 	qd->qd_qb.qb_value = cpu_to_be64(value);
+	((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_magic = cpu_to_be32(GFS2_MAGIC);
+	((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_value = cpu_to_be64(value);
 unlock:
 	unlock_page(page);
 	page_cache_release(page);
@ -689,7 +725,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 			goto out_alloc;

 		error = gfs2_trans_begin(sdp,
-					 al->al_rgd->rd_ri.ri_length +
+					 al->al_rgd->rd_length +
 					 num_qd * data_blocks +
 					 nalloc * ind_blocks +
 					 RES_DINODE + num_qd +
@ -709,7 +745,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 		offset = qd2offset(qd);
 		error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
 					  (struct gfs2_quota_data *)
-					  qd->qd_gl->gl_lvb);
+					  qd);
 		if (error)
 			goto out_end_trans;

@ -1050,6 +1086,15 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
 	return error;
 }

+static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
+{
+	const struct gfs2_quota_change *str = buf;
+
+	qc->qc_change = be64_to_cpu(str->qc_change);
+	qc->qc_flags = be32_to_cpu(str->qc_flags);
+	qc->qc_id = be32_to_cpu(str->qc_id);
+}
+
 int gfs2_quota_init(struct gfs2_sbd *sdp)
 {
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@ -116,6 +116,22 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp)
 	}
 }

+static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
+{
+	const struct gfs2_log_header *str = buf;
+
+	if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
+	    str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH))
+		return 1;
+
+	lh->lh_sequence = be64_to_cpu(str->lh_sequence);
+	lh->lh_flags = be32_to_cpu(str->lh_flags);
+	lh->lh_tail = be32_to_cpu(str->lh_tail);
+	lh->lh_blkno = be32_to_cpu(str->lh_blkno);
+	lh->lh_hash = be32_to_cpu(str->lh_hash);
+	return 0;
+}
+
 /**
 * get_log_header - read the log header for a given segment
 * @jd: the journal
@ -147,12 +163,10 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 					     sizeof(u32));
 	hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
 	hash ^= (u32)~0;
-	gfs2_log_header_in(&lh, bh->b_data);
+	error = gfs2_log_header_in(&lh, bh->b_data);
 	brelse(bh);

-	if (lh.lh_header.mh_magic != GFS2_MAGIC ||
-	    lh.lh_header.mh_type != GFS2_METATYPE_LH ||
-	    lh.lh_blkno != blk || lh.lh_hash != hash)
+	if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
 		return 1;

 	*head = lh;
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@ -1,6 +1,6 @@
 /*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
@ -28,6 +28,7 @@
 #include "ops_file.h"
 #include "util.h"
 #include "log.h"
+#include "inode.h"

 #define BFITNOENT ((u32)~0)

@ -50,6 +51,9 @@ static const char valid_change[16] = {
 	        1, 0, 0, 0
 };

+static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
+                        unsigned char old_state, unsigned char new_state);
+
 /**
 * gfs2_setbit - Set a bit in the bitmaps
 * @buffer: the buffer that holds the bitmaps
@ -204,7 +208,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 {
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 	struct gfs2_bitmap *bi = NULL;
-	u32 length = rgd->rd_ri.ri_length;
+	u32 length = rgd->rd_length;
 	u32 count[4], tmp;
 	int buf, x;

@ -227,7 +231,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 		return;
 	}

-	tmp = rgd->rd_ri.ri_data -
+	tmp = rgd->rd_data -
 		rgd->rd_rg.rg_free -
 		rgd->rd_rg.rg_dinodes;
 	if (count[1] + count[2] != tmp) {
@ -253,10 +257,10 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)

 }

-static inline int rgrp_contains_block(struct gfs2_rindex_host *ri, u64 block)
+static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
 {
-	u64 first = ri->ri_data0;
-	u64 last = first + ri->ri_data;
+	u64 first = rgd->rd_data0;
+	u64 last = first + rgd->rd_data;
 	return first <= block && block < last;
 }

@ -275,7 +279,7 @@ struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
 	spin_lock(&sdp->sd_rindex_spin);

 	list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
-		if (rgrp_contains_block(&rgd->rd_ri, blk)) {
+		if (rgrp_contains_block(rgd, blk)) {
 			list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
 			spin_unlock(&sdp->sd_rindex_spin);
 			return rgd;
@ -354,6 +358,15 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 	mutex_unlock(&sdp->sd_rindex_mutex);
 }

+static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
+{
+	printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
+	printk(KERN_INFO "  ri_length = %u\n", rgd->rd_length);
+	printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
+	printk(KERN_INFO "  ri_data = %u\n", rgd->rd_data);
+	printk(KERN_INFO "  ri_bitbytes = %u\n", rgd->rd_bitbytes);
+}
+
 /**
 * gfs2_compute_bitstructs - Compute the bitmap sizes
 * @rgd: The resource group descriptor
@ -367,7 +380,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 {
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 	struct gfs2_bitmap *bi;
-	u32 length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */
+	u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
 	u32 bytes_left, bytes;
 	int x;

@ -378,7 +391,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 	if (!rgd->rd_bits)
 		return -ENOMEM;

-	bytes_left = rgd->rd_ri.ri_bitbytes;
+	bytes_left = rgd->rd_bitbytes;

 	for (x = 0; x < length; x++) {
 		bi = rgd->rd_bits + x;
@ -399,14 +412,14 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 		} else if (x + 1 == length) {
 			bytes = bytes_left;
 			bi->bi_offset = sizeof(struct gfs2_meta_header);
-			bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
+			bi->bi_start = rgd->rd_bitbytes - bytes_left;
 			bi->bi_len = bytes;
 		/* other blocks */
 		} else {
 			bytes = sdp->sd_sb.sb_bsize -
 				sizeof(struct gfs2_meta_header);
 			bi->bi_offset = sizeof(struct gfs2_meta_header);
-			bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
+			bi->bi_start = rgd->rd_bitbytes - bytes_left;
 			bi->bi_len = bytes;
 		}

@ -418,9 +431,9 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 		return -EIO;
 	}
 	bi = rgd->rd_bits + (length - 1);
-	if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) {
+	if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
 		if (gfs2_consist_rgrpd(rgd)) {
-			gfs2_rindex_print(&rgd->rd_ri);
+			gfs2_rindex_print(rgd);
 			fs_err(sdp, "start=%u len=%u offset=%u\n",
 			       bi->bi_start, bi->bi_len, bi->bi_offset);
 		}
@ -431,9 +444,104 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 }

 /**
- * gfs2_ri_update - Pull in a new resource index from the disk
+ * gfs2_ri_total - Total up the file system space, according to the rindex.
+ *
+ */
+u64 gfs2_ri_total(struct gfs2_sbd *sdp)
+{
+	u64 total_data = 0;	
+	struct inode *inode = sdp->sd_rindex;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	char buf[sizeof(struct gfs2_rindex)];
+	struct file_ra_state ra_state;
+	int error, rgrps;
+
+	mutex_lock(&sdp->sd_rindex_mutex);
+	file_ra_state_init(&ra_state, inode->i_mapping);
+	for (rgrps = 0;; rgrps++) {
+		loff_t pos = rgrps * sizeof(struct gfs2_rindex);
+
+		if (pos + sizeof(struct gfs2_rindex) >= ip->i_di.di_size)
+			break;
+		error = gfs2_internal_read(ip, &ra_state, buf, &pos,
+					   sizeof(struct gfs2_rindex));
+		if (error != sizeof(struct gfs2_rindex))
+			break;
+		total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
+	}
+	mutex_unlock(&sdp->sd_rindex_mutex);
+	return total_data;
+}
+
+static void gfs2_rindex_in(struct gfs2_rgrpd *rgd, const void *buf)
+{
+	const struct gfs2_rindex *str = buf;
+
+	rgd->rd_addr = be64_to_cpu(str->ri_addr);
+	rgd->rd_length = be32_to_cpu(str->ri_length);
+	rgd->rd_data0 = be64_to_cpu(str->ri_data0);
+	rgd->rd_data = be32_to_cpu(str->ri_data);
+	rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes);
+}
+
+/**
+ * read_rindex_entry - Pull in a new resource index entry from the disk
 * @gl: The glock covering the rindex inode
 *
+ * Returns: 0 on success, error code otherwise
+ */
+
+static int read_rindex_entry(struct gfs2_inode *ip,
+			     struct file_ra_state *ra_state)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
+	char buf[sizeof(struct gfs2_rindex)];
+	int error;
+	struct gfs2_rgrpd *rgd;
+
+	error = gfs2_internal_read(ip, ra_state, buf, &pos,
+				   sizeof(struct gfs2_rindex));
+	if (!error)
+		return 0;
+	if (error != sizeof(struct gfs2_rindex)) {
+		if (error > 0)
+			error = -EIO;
+		return error;
+	}
+
+	rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS);
+	error = -ENOMEM;
+	if (!rgd)
+		return error;
+
+	mutex_init(&rgd->rd_mutex);
+	lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
+	rgd->rd_sbd = sdp;
+
+	list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
+	list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
+
+	gfs2_rindex_in(rgd, buf);
+	error = compute_bitstructs(rgd);
+	if (error)
+		return error;
+
+	error = gfs2_glock_get(sdp, rgd->rd_addr,
+			       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
+	if (error)
+		return error;
+
+	rgd->rd_gl->gl_object = rgd;
+	rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
+	rgd->rd_flags |= GFS2_RDF_CHECK;
+	return error;
+}
+
+/**
+ * gfs2_ri_update - Pull in a new resource index from the disk
+ * @ip: pointer to the rindex inode
+ *
 * Returns: 0 on successful update, error code otherwise
 */

@ -441,13 +549,11 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct inode *inode = &ip->i_inode;
-	struct gfs2_rgrpd *rgd;
-	char buf[sizeof(struct gfs2_rindex)];
 	struct file_ra_state ra_state;
-	u64 junk = ip->i_di.di_size;
+	u64 rgrp_count = ip->i_di.di_size;
 	int error;

-	if (do_div(junk, sizeof(struct gfs2_rindex))) {
+	if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) {
 		gfs2_consist_inode(ip);
 		return -EIO;
 	}
@ -455,50 +561,50 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
 	clear_rgrpdi(sdp);

 	file_ra_state_init(&ra_state, inode->i_mapping);
-	for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
-		loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
-		error = gfs2_internal_read(ip, &ra_state, buf, &pos,
-					    sizeof(struct gfs2_rindex));
-		if (!error)
-			break;
-		if (error != sizeof(struct gfs2_rindex)) {
-			if (error > 0)
-				error = -EIO;
-			goto fail;
+	for (sdp->sd_rgrps = 0; sdp->sd_rgrps < rgrp_count; sdp->sd_rgrps++) {
+		error = read_rindex_entry(ip, &ra_state);
+		if (error) {
+			clear_rgrpdi(sdp);
+			return error;
 		}
-
-		rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS);
-		error = -ENOMEM;
-		if (!rgd)
-			goto fail;
-
-		mutex_init(&rgd->rd_mutex);
-		lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
-		rgd->rd_sbd = sdp;
-
-		list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
-		list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
-
-		gfs2_rindex_in(&rgd->rd_ri, buf);
-		error = compute_bitstructs(rgd);
-		if (error)
-			goto fail;
-
-		error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr,
-				       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
-		if (error)
-			goto fail;
-
-		rgd->rd_gl->gl_object = rgd;
-		rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
 	}

 	sdp->sd_rindex_vn = ip->i_gl->gl_vn;
 	return 0;
+}

-fail:
-	clear_rgrpdi(sdp);
-	return error;
+/**
+ * gfs2_ri_update_special - Pull in a new resource index from the disk
+ *
+ * This is a special version that's safe to call from gfs2_inplace_reserve_i.
+ * In this case we know that we don't have any resource groups in memory yet.
+ *
+ * @ip: pointer to the rindex inode
+ *
+ * Returns: 0 on successful update, error code otherwise
+ */
+static int gfs2_ri_update_special(struct gfs2_inode *ip)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	struct inode *inode = &ip->i_inode;
+	struct file_ra_state ra_state;
+	int error;
+
+	file_ra_state_init(&ra_state, inode->i_mapping);
+	for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
+		/* Ignore partials */
+		if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
+		    ip->i_di.di_size)
+			break;
+		error = read_rindex_entry(ip, &ra_state);
+		if (error) {
+			clear_rgrpdi(sdp);
+			return error;
+		}
+	}
+
+	sdp->sd_rindex_vn = ip->i_gl->gl_vn;
+	return 0;
 }

 /**
@ -543,6 +649,28 @@ int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
 	return error;
 }

+static void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
+{
+	const struct gfs2_rgrp *str = buf;
+
+	rg->rg_flags = be32_to_cpu(str->rg_flags);
+	rg->rg_free = be32_to_cpu(str->rg_free);
+	rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
+	rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
+}
+
+static void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
+{
+	struct gfs2_rgrp *str = buf;
+
+	str->rg_flags = cpu_to_be32(rg->rg_flags);
+	str->rg_free = cpu_to_be32(rg->rg_free);
+	str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
+	str->__pad = cpu_to_be32(0);
+	str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
+	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
+}
+
 /**
 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
 * @rgd: the struct gfs2_rgrpd describing the RG to read in
@ -557,7 +685,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 {
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 	struct gfs2_glock *gl = rgd->rd_gl;
-	unsigned int length = rgd->rd_ri.ri_length;
+	unsigned int length = rgd->rd_length;
 	struct gfs2_bitmap *bi;
 	unsigned int x, y;
 	int error;
@ -575,7 +703,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)

 	for (x = 0; x < length; x++) {
 		bi = rgd->rd_bits + x;
-		error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, 0, &bi->bi_bh);
+		error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
 		if (error)
 			goto fail;
 	}
@ -637,7 +765,7 @@ void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
 void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
 {
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
-	int x, length = rgd->rd_ri.ri_length;
+	int x, length = rgd->rd_length;

 	spin_lock(&sdp->sd_rindex_spin);
 	gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
@ -660,7 +788,7 @@ void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
 void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
 {
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
-	unsigned int length = rgd->rd_ri.ri_length;
+	unsigned int length = rgd->rd_length;
 	unsigned int x;

 	for (x = 0; x < length; x++) {
@ -721,6 +849,38 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
 	return ret;
 }

+/**
+ * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
+ * @rgd: The rgrp
+ *
+ * Returns: The inode, if one has been found
+ */
+
+static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
+{
+	struct inode *inode;
+	u32 goal = 0;
+	u64 no_addr;
+
+	for(;;) {
+		goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
+				    GFS2_BLKST_UNLINKED);
+		if (goal == 0)
+			return 0;
+		no_addr = goal + rgd->rd_data0;
+		if (no_addr <= *last_unlinked)
+			continue;
+		*last_unlinked = no_addr;
+		inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
+					no_addr, -1);
+		if (!IS_ERR(inode))
+			return inode;
+	}
+
+	rgd->rd_flags &= ~GFS2_RDF_CHECK;
+	return NULL;
+}
+
 /**
 * recent_rgrp_first - get first RG from "recent" list
 * @sdp: The GFS2 superblock
@ -743,7 +903,7 @@ static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
 		goto first;

 	list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
-		if (rgd->rd_ri.ri_addr == rglast)
+		if (rgd->rd_addr == rglast)
 			goto out;
 	}

@ -882,8 +1042,9 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
 * Returns: errno
 */

-static int get_local_rgrp(struct gfs2_inode *ip)
+static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 {
+	struct inode *inode = NULL;
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *rgd, *begin = NULL;
 	struct gfs2_alloc *al = &ip->i_alloc;
@ -903,7 +1064,11 @@ static int get_local_rgrp(struct gfs2_inode *ip)
 		case 0:
 			if (try_rgrp_fit(rgd, al))
 				goto out;
+			if (rgd->rd_flags & GFS2_RDF_CHECK)
+				inode = try_rgrp_unlink(rgd, last_unlinked);
 			gfs2_glock_dq_uninit(&al->al_rgd_gh);
+			if (inode)
+				return inode;
 			rgd = recent_rgrp_next(rgd, 1);
 			break;

@ -912,7 +1077,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
 			break;

 		default:
-			return error;
+			return ERR_PTR(error);
 		}
 	}

@ -927,7 +1092,11 @@ static int get_local_rgrp(struct gfs2_inode *ip)
 		case 0:
 			if (try_rgrp_fit(rgd, al))
 				goto out;
+			if (rgd->rd_flags & GFS2_RDF_CHECK)
+				inode = try_rgrp_unlink(rgd, last_unlinked);
 			gfs2_glock_dq_uninit(&al->al_rgd_gh);
+			if (inode)
+				return inode;
 			break;

 		case GLR_TRYFAILED:
@ -935,7 +1104,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
 			break;

 		default:
-			return error;
+			return ERR_PTR(error);
 		}

 		rgd = gfs2_rgrpd_get_next(rgd);
@ -944,7 +1113,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)

 		if (rgd == begin) {
 			if (++loops >= 3)
-				return -ENOSPC;
+				return ERR_PTR(-ENOSPC);
 			if (!skipped)
 				loops++;
 			flags = 0;
@ -954,7 +1123,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
 	}

 out:
-	ip->i_last_rg_alloc = rgd->rd_ri.ri_addr;
+	ip->i_last_rg_alloc = rgd->rd_addr;

 	if (begin) {
 		recent_rgrp_add(rgd);
@ -964,7 +1133,7 @@ out:
 		forward_rgrp_set(sdp, rgd);
 	}

-	return 0;
+	return NULL;
 }

 /**
@ -978,19 +1147,33 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_alloc *al = &ip->i_alloc;
-	int error;
+	struct inode *inode;
+	int error = 0;
+	u64 last_unlinked = 0;

 	if (gfs2_assert_warn(sdp, al->al_requested))
 		return -EINVAL;

-	error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+try_again:
+	/* We need to hold the rindex unless the inode we're using is
+	   the rindex itself, in which case it's already held. */
+	if (ip != GFS2_I(sdp->sd_rindex))
+		error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+	else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */
+		error = gfs2_ri_update_special(ip);
+
 	if (error)
 		return error;

-	error = get_local_rgrp(ip);
-	if (error) {
-		gfs2_glock_dq_uninit(&al->al_ri_gh);
-		return error;
+	inode = get_local_rgrp(ip, &last_unlinked);
+	if (inode) {
+		if (ip != GFS2_I(sdp->sd_rindex))
+			gfs2_glock_dq_uninit(&al->al_ri_gh);
+		if (IS_ERR(inode))
+			return PTR_ERR(inode);
+		iput(inode);
+		gfs2_log_flush(sdp, NULL);
+		goto try_again;
 	}

 	al->al_file = file;
@ -1019,7 +1202,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip)

 	al->al_rgd = NULL;
 	gfs2_glock_dq_uninit(&al->al_rgd_gh);
-	gfs2_glock_dq_uninit(&al->al_ri_gh);
+	if (ip != GFS2_I(sdp->sd_rindex))
+		gfs2_glock_dq_uninit(&al->al_ri_gh);
 }

 /**
@ -1037,8 +1221,8 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
 	unsigned int buf;
 	unsigned char type;

-	length = rgd->rd_ri.ri_length;
-	rgrp_block = block - rgd->rd_ri.ri_data0;
+	length = rgd->rd_length;
+	rgrp_block = block - rgd->rd_data0;

 	for (buf = 0; buf < length; buf++) {
 		bi = rgd->rd_bits + buf;
@ -1077,10 +1261,10 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
 */

 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
-			     unsigned char old_state, unsigned char new_state)
+			unsigned char old_state, unsigned char new_state)
 {
 	struct gfs2_bitmap *bi = NULL;
-	u32 length = rgd->rd_ri.ri_length;
+	u32 length = rgd->rd_length;
 	u32 blk = 0;
 	unsigned int buf, x;

@ -1118,17 +1302,18 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
 		goal = 0;
 	}

-	if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length))
-		blk = 0;
+	if (old_state != new_state) {
+		gfs2_assert_withdraw(rgd->rd_sbd, blk != BFITNOENT);

-	gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
-	gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
-		    bi->bi_len, blk, new_state);
-	if (bi->bi_clone)
-		gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
+		gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
+		gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
 			    bi->bi_len, blk, new_state);
+		if (bi->bi_clone)
+			gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
+				    bi->bi_len, blk, new_state);
+	}

-	return bi->bi_start * GFS2_NBBY + blk;
+	return (blk == BFITNOENT) ? 0 : (bi->bi_start * GFS2_NBBY) + blk;
 }

 /**
@ -1156,9 +1341,9 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
 		return NULL;
 	}

-	length = rgd->rd_ri.ri_length;
+	length = rgd->rd_length;

-	rgrp_blk = bstart - rgd->rd_ri.ri_data0;
+	rgrp_blk = bstart - rgd->rd_data0;

 	while (blen--) {
 		for (buf = 0; buf < length; buf++) {
@ -1202,15 +1387,15 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip)
 	u32 goal, blk;
 	u64 block;

-	if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data))
-		goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0;
+	if (rgrp_contains_block(rgd, ip->i_di.di_goal_data))
+		goal = ip->i_di.di_goal_data - rgd->rd_data0;
 	else
 		goal = rgd->rd_last_alloc_data;

 	blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
 	rgd->rd_last_alloc_data = blk;

-	block = rgd->rd_ri.ri_data0 + blk;
+	block = rgd->rd_data0 + blk;
 	ip->i_di.di_goal_data = block;

 	gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
@ -1246,15 +1431,15 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip)
 	u32 goal, blk;
 	u64 block;

-	if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta))
-		goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0;
+	if (rgrp_contains_block(rgd, ip->i_di.di_goal_meta))
+		goal = ip->i_di.di_goal_meta - rgd->rd_data0;
 	else
 		goal = rgd->rd_last_alloc_meta;

 	blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
 	rgd->rd_last_alloc_meta = blk;

-	block = rgd->rd_ri.ri_data0 + blk;
+	block = rgd->rd_data0 + blk;
 	ip->i_di.di_goal_meta = block;

 	gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
@ -1296,7 +1481,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)

 	rgd->rd_last_alloc_meta = blk;

-	block = rgd->rd_ri.ri_data0 + blk;
+	block = rgd->rd_data0 + blk;

 	gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
 	rgd->rd_rg.rg_free--;
@ -1379,7 +1564,7 @@ void gfs2_unlink_di(struct inode *inode)
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_rgrpd *rgd;
-	u64 blkno = ip->i_num.no_addr;
+	u64 blkno = ip->i_no_addr;

 	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
 	if (!rgd)
@ -1414,9 +1599,9 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)

 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
-	gfs2_free_uninit_di(rgd, ip->i_num.no_addr);
+	gfs2_free_uninit_di(rgd, ip->i_no_addr);
 	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
-	gfs2_meta_wipe(ip, ip->i_num.no_addr, 1);
+	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
 }

 /**
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@ -65,5 +65,6 @@ void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
 		      int flags);
 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
+u64 gfs2_ri_total(struct gfs2_sbd *sdp);

 #endif /* __RGRP_DOT_H__ */
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@ -95,8 +95,8 @@ int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
 {
 	unsigned int x;

-	if (sb->sb_header.mh_magic != GFS2_MAGIC ||
-	    sb->sb_header.mh_type != GFS2_METATYPE_SB) {
+	if (sb->sb_magic != GFS2_MAGIC ||
+	    sb->sb_type != GFS2_METATYPE_SB) {
 		if (!silent)
 			printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
 		return -EINVAL;
@ -174,10 +174,31 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
 	return 0;
 }

+static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
+{
+	const struct gfs2_sb *str = buf;
+
+	sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
+	sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
+	sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
+	sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
+	sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
+	sb->sb_bsize = be32_to_cpu(str->sb_bsize);
+	sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
+	sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
+	sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
+	sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
+	sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
+
+	memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
+	memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
+}
+
 /**
 * gfs2_read_super - Read the gfs2 super block from disk
- * @sb: The VFS super block
+ * @sdp: The GFS2 super block
 * @sector: The location of the super block
+ * @error: The error code to return
 *
 * This uses the bio functions to read the super block from disk
 * because we want to be 100% sure that we never read cached data.
@ -189,17 +210,19 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
 * the master directory (contains pointers to journals etc) and the
 * root directory.
 *
- * Returns: A page containing the sb or NULL
+ * Returns: 0 on success or error
 */

-struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
+int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
 {
+	struct super_block *sb = sdp->sd_vfs;
+	struct gfs2_sb *p;
 	struct page *page;
 	struct bio *bio;

 	page = alloc_page(GFP_KERNEL);
 	if (unlikely(!page))
-		return NULL;
+		return -ENOBUFS;

 	ClearPageUptodate(page);
 	ClearPageDirty(page);
@ -208,7 +231,7 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
 	bio = bio_alloc(GFP_KERNEL, 1);
 	if (unlikely(!bio)) {
 		__free_page(page);
-		return NULL;
+		return -ENOBUFS;
 	}

 	bio->bi_sector = sector * (sb->s_blocksize >> 9);
@ -222,9 +245,13 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
 	bio_put(bio);
 	if (!PageUptodate(page)) {
 		__free_page(page);
-		return NULL;
+		return -EIO;
 	}
-	return page;
+	p = kmap(page);
+	gfs2_sb_in(&sdp->sd_sb, p);
+	kunmap(page);
+	__free_page(page);
+	return 0;
 }

 /**
@ -241,19 +268,13 @@ int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
 	u32 tmp_blocks;
 	unsigned int x;
 	int error;
-	struct page *page;
-	char *sb;

-	page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
-	if (!page) {
+	error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+	if (error) {
 		if (!silent)
 			fs_err(sdp, "can't read superblock\n");
-		return -EIO;
+		return error;
 	}
-	sb = kmap(page);
-	gfs2_sb_in(&sdp->sd_sb, sb);
-	kunmap(page);
-	__free_page(page);

 	error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
 	if (error)
@ -360,7 +381,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
 		name.len = sprintf(buf, "journal%u", sdp->sd_journals);
 		name.hash = gfs2_disk_hash(name.name, name.len);

-		error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL);
+		error = gfs2_dir_check(sdp->sd_jindex, &name, NULL);
 		if (error == -ENOENT) {
 			error = 0;
 			break;
@ -593,6 +614,24 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 	return error;
 }

+static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
+{
+	const struct gfs2_statfs_change *str = buf;
+
+	sc->sc_total = be64_to_cpu(str->sc_total);
+	sc->sc_free = be64_to_cpu(str->sc_free);
+	sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
+}
+
+static void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
+{
+	struct gfs2_statfs_change *str = buf;
+
+	str->sc_total = cpu_to_be64(sc->sc_total);
+	str->sc_free = cpu_to_be64(sc->sc_free);
+	str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
+}
+
 int gfs2_statfs_init(struct gfs2_sbd *sdp)
 {
 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
@ -772,7 +811,7 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
 			    struct gfs2_statfs_change_host *sc)
 {
 	gfs2_rgrp_verify(rgd);
-	sc->sc_total += rgd->rd_ri.ri_data;
+	sc->sc_total += rgd->rd_data;
 	sc->sc_free += rgd->rd_rg.rg_free;
 	sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
 	return 0;
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@ -16,7 +16,7 @@ void gfs2_tune_init(struct gfs2_tune *gt);

 int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
 int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
-struct page *gfs2_read_super(struct super_block *sb, sector_t sector);
+int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector);

 static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
 {
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@ -115,8 +115,8 @@ int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
 		"GFS2: fsid=%s:   inode = %llu %llu\n"
 		"GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
 		sdp->sd_fsname,
-		sdp->sd_fsname, (unsigned long long)ip->i_num.no_formal_ino,
-		(unsigned long long)ip->i_num.no_addr,
+		sdp->sd_fsname, (unsigned long long)ip->i_no_formal_ino,
+		(unsigned long long)ip->i_no_addr,
 		sdp->sd_fsname, function, file, line);
 	return rv;
 }
@ -137,7 +137,7 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
 		"GFS2: fsid=%s:   RG = %llu\n"
 		"GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
 		sdp->sd_fsname,
-		sdp->sd_fsname, (unsigned long long)rgd->rd_ri.ri_addr,
+		sdp->sd_fsname, (unsigned long long)rgd->rd_addr,
 		sdp->sd_fsname, function, file, line);
 	return rv;
 }
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@ -49,6 +49,7 @@ header-y += consolemap.h
 header-y += const.h
 header-y += cycx_cfm.h
 header-y += dlm_device.h
+header-y += dlm_netlink.h
 header-y += dm-ioctl.h
 header-y += dn.h
 header-y += dqblk_v1.h
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@ -85,7 +85,11 @@
 * Only relevant to locks originating in userspace.  A persistent lock will not
 * be removed if the process holding the lock exits.
 *
- * DLM_LKF_NODLKWT
+ * DLM_LKF_NODLCKWT
+ *
+ * Do not cancel the lock if it gets into conversion deadlock.
+ * Exclude this lock from being monitored due to DLM_LSFL_TIMEWARN.
+ *
 * DLM_LKF_NODLCKBLK
 *
 * net yet implemented
@ -149,6 +153,7 @@
 #define DLM_LKF_ALTPR		0x00008000
 #define DLM_LKF_ALTCW		0x00010000
 #define DLM_LKF_FORCEUNLOCK	0x00020000
+#define DLM_LKF_TIMEOUT		0x00040000

 /*
 * Some return codes that are not in errno.h
@ -199,11 +204,12 @@ struct dlm_lksb {
 	char *	 sb_lvbptr;
 };

+#define DLM_LSFL_NODIR		0x00000001
+#define DLM_LSFL_TIMEWARN	0x00000002
+#define DLM_LSFL_FS     	0x00000004

 #ifdef __KERNEL__

-#define DLM_LSFL_NODIR		0x00000001
-
 /*
 * dlm_new_lockspace
 *
--- a/include/linux/dlm_device.h
+++ b/include/linux/dlm_device.h
@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@ -18,21 +18,24 @@
 #define DLM_USER_LVB_LEN	32

 /* Version of the device interface */
-#define DLM_DEVICE_VERSION_MAJOR 5
-#define DLM_DEVICE_VERSION_MINOR 1
+#define DLM_DEVICE_VERSION_MAJOR 6
+#define DLM_DEVICE_VERSION_MINOR 0
 #define DLM_DEVICE_VERSION_PATCH 0

 /* struct passed to the lock write */
 struct dlm_lock_params {
 	__u8 mode;
 	__u8 namelen;
-	__u16 flags;
+	__u16 unused;
+	__u32 flags;
 	__u32 lkid;
 	__u32 parent;
-        void __user *castparam;
+	__u64 xid;
+	__u64 timeout;
+	void __user *castparam;
 	void __user *castaddr;
 	void __user *bastparam;
-        void __user *bastaddr;
+	void __user *bastaddr;
 	struct dlm_lksb __user *lksb;
 	char lvb[DLM_USER_LVB_LEN];
 	char name[0];
@ -62,9 +65,15 @@ struct dlm_write_request {
 	} i;
 };

+struct dlm_device_version {
+	__u32 version[3];
+};
+
 /* struct read from the "device" fd,
   consists mainly of userspace pointers for the library to use */
+
 struct dlm_lock_result {
+	__u32 version[3];
 	__u32 length;
 	void __user * user_astaddr;
 	void __user * user_astparam;
@ -83,6 +92,7 @@ struct dlm_lock_result {
 #define DLM_USER_CREATE_LOCKSPACE  4
 #define DLM_USER_REMOVE_LOCKSPACE  5
 #define DLM_USER_PURGE        6
+#define DLM_USER_DEADLOCK     7

 /* Arbitrary length restriction */
 #define MAX_LS_NAME_LEN 64
--- a/include/linux/dlm_netlink.h
+++ b/include/linux/dlm_netlink.h
@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef _DLM_NETLINK_H
+#define _DLM_NETLINK_H
+
+enum {
+	DLM_STATUS_WAITING = 1,
+	DLM_STATUS_GRANTED = 2,
+	DLM_STATUS_CONVERT = 3,
+};
+
+#define DLM_LOCK_DATA_VERSION 1
+
+struct dlm_lock_data {
+	uint16_t version;
+	uint32_t lockspace_id;
+	int nodeid;
+	int ownpid;
+	uint32_t id;
+	uint32_t remid;
+	uint64_t xid;
+	int8_t status;
+	int8_t grmode;
+	int8_t rqmode;
+	unsigned long timestamp;
+	int resource_namelen;
+	char resource_name[DLM_RESNAME_MAXLEN];
+};
+
+enum {
+	DLM_CMD_UNSPEC = 0,
+	DLM_CMD_HELLO,		/* user->kernel */
+	DLM_CMD_TIMEOUT,	/* kernel->user */
+	__DLM_CMD_MAX,
+};
+
+#define DLM_CMD_MAX (__DLM_CMD_MAX - 1)
+
+enum {
+	DLM_TYPE_UNSPEC = 0,
+	DLM_TYPE_LOCK,
+	__DLM_TYPE_MAX,
+};
+
+#define DLM_TYPE_MAX (__DLM_TYPE_MAX - 1)
+
+#define DLM_GENL_VERSION 0x1
+#define DLM_GENL_NAME "DLM"
+
+#endif /* _DLM_NETLINK_H */
--- a/include/linux/gfs2_ondisk.h
+++ b/include/linux/gfs2_ondisk.h
@ -54,18 +54,6 @@ struct gfs2_inum {
 	__be64 no_addr;
 };

-struct gfs2_inum_host {
-	__u64 no_formal_ino;
-	__u64 no_addr;
-};
-
-static inline int gfs2_inum_equal(const struct gfs2_inum_host *ino1,
-				  const struct gfs2_inum_host *ino2)
-{
-	return ino1->no_formal_ino == ino2->no_formal_ino &&
-	       ino1->no_addr == ino2->no_addr;
-}
-
 /*
 * Generic metadata head structure
 * Every inplace buffer logged in the journal must start with this.
@ -94,12 +82,6 @@ struct gfs2_meta_header {
 	__be32 __pad1;		/* Was incarnation number in gfs1 */
 };

-struct gfs2_meta_header_host {
-	__u32 mh_magic;
-	__u32 mh_type;
-	__u32 mh_format;
-};
-
 /*
 * super-block structure
 *
@ -139,23 +121,6 @@ struct gfs2_sb {
 	/* In gfs1, quota and license dinodes followed */
 };

-struct gfs2_sb_host {
-	struct gfs2_meta_header_host sb_header;
-
-	__u32 sb_fs_format;
-	__u32 sb_multihost_format;
-
-	__u32 sb_bsize;
-	__u32 sb_bsize_shift;
-
-	struct gfs2_inum_host sb_master_dir; /* Was jindex dinode in gfs1 */
-	struct gfs2_inum_host sb_root_dir;
-
-	char sb_lockproto[GFS2_LOCKNAME_LEN];
-	char sb_locktable[GFS2_LOCKNAME_LEN];
-	/* In gfs1, quota and license dinodes followed */
-};
-
 /*
 * resource index structure
 */
@ -173,14 +138,6 @@ struct gfs2_rindex {
 	__u8 ri_reserved[64];
 };

-struct gfs2_rindex_host {
-	__u64 ri_addr;	/* grp block disk address */
-	__u64 ri_data0;	/* first data location */
-	__u32 ri_length;	/* length of rgrp header in fs blocks */
-	__u32 ri_data;	/* num of data blocks in rgrp */
-	__u32 ri_bitbytes;	/* number of bytes in data bitmaps */
-};
-
 /*
 * resource group header structure
 */
@ -212,13 +169,6 @@ struct gfs2_rgrp {
 	__u8 rg_reserved[80]; /* Several fields from gfs1 now reserved */
 };

-struct gfs2_rgrp_host {
-	__u32 rg_flags;
-	__u32 rg_free;
-	__u32 rg_dinodes;
-	__u64 rg_igeneration;
-};
-
 /*
 * quota structure
 */
@ -230,12 +180,6 @@ struct gfs2_quota {
 	__u8 qu_reserved[64];
 };

-struct gfs2_quota_host {
-	__u64 qu_limit;
-	__u64 qu_warn;
-	__u64 qu_value;
-};
-
 /*
 * dinode structure
 */
@ -315,29 +259,11 @@ struct gfs2_dinode {
 	struct gfs2_inum __pad4; /* Unused even in current gfs1 */

 	__be64 di_eattr;	/* extended attribute block number */
+	__be32 di_atime_nsec;   /* nsec portion of atime */
+	__be32 di_mtime_nsec;   /* nsec portion of mtime */
+	__be32 di_ctime_nsec;   /* nsec portion of ctime */

-	__u8 di_reserved[56];
-};
-
-struct gfs2_dinode_host {
-	__u64 di_size;	/* number of bytes in file */
-	__u64 di_blocks;	/* number of blocks in file */
-
-	/* This section varies from gfs1. Padding added to align with
-         * remainder of dinode
-	 */
-	__u64 di_goal_meta;	/* rgrp to alloc from next */
-	__u64 di_goal_data;	/* data block goal */
-	__u64 di_generation;	/* generation number for NFS */
-
-	__u32 di_flags;	/* GFS2_DIF_... */
-	__u16 di_height;	/* height of metadata */
-
-	/* These only apply to directories  */
-	__u16 di_depth;	/* Number of bits in the table */
-	__u32 di_entries;	/* The number of entries in the directory */
-
-	__u64 di_eattr;	/* extended attribute block number */
+	__u8 di_reserved[44];
 };

 /*
@ -414,16 +340,6 @@ struct gfs2_log_header {
 	__be32 lh_hash;
 };

-struct gfs2_log_header_host {
-	struct gfs2_meta_header_host lh_header;
-
-	__u64 lh_sequence;	/* Sequence number of this transaction */
-	__u32 lh_flags;	/* GFS2_LOG_HEAD_... */
-	__u32 lh_tail;		/* Block number of log tail */
-	__u32 lh_blkno;
-	__u32 lh_hash;
-};
-
 /*
 * Log type descriptor
 */
@ -464,11 +380,6 @@ struct gfs2_inum_range {
 	__be64 ir_length;
 };

-struct gfs2_inum_range_host {
-	__u64 ir_start;
-	__u64 ir_length;
-};
-
 /*
 * Statfs change
 * Describes an change to the pool of free and allocated
@ -481,12 +392,6 @@ struct gfs2_statfs_change {
 	__be64 sc_dinodes;
 };

-struct gfs2_statfs_change_host {
-	__u64 sc_total;
-	__u64 sc_free;
-	__u64 sc_dinodes;
-};
-
 /*
 * Quota change
 * Describes an allocation change for a particular
@ -501,39 +406,12 @@ struct gfs2_quota_change {
 	__be32 qc_id;
 };

-struct gfs2_quota_change_host {
-	__u64 qc_change;
-	__u32 qc_flags;	/* GFS2_QCF_... */
-	__u32 qc_id;
+struct gfs2_quota_lvb {
+        __be32 qb_magic;
+        __u32 __pad;
+        __be64 qb_limit;      /* Hard limit of # blocks to alloc */
+        __be64 qb_warn;       /* Warn user when alloc is above this # */
+        __be64 qb_value;       /* Current # blocks allocated */
 };

-#ifdef __KERNEL__
-/* Translation functions */
-
-extern void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf);
-extern void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf);
-extern void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf);
-extern void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf);
-extern void gfs2_rindex_out(const struct gfs2_rindex_host *ri, void *buf);
-extern void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf);
-extern void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf);
-extern void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf);
-struct gfs2_inode;
-extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
-extern void gfs2_ea_header_in(struct gfs2_ea_header *ea, const void *buf);
-extern void gfs2_ea_header_out(const struct gfs2_ea_header *ea, void *buf);
-extern void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf);
-extern void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf);
-extern void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf);
-extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf);
-extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf);
-extern void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf);
-
-/* Printing functions */
-
-extern void gfs2_rindex_print(const struct gfs2_rindex_host *ri);
-extern void gfs2_dinode_print(const struct gfs2_inode *ip);
-
-#endif /* __KERNEL__ */
-
 #endif /* __GFS2_ONDISK_DOT_H__ */