ocfs2/cluster: Make fence method configurable - v2
By default, o2cb fences the box by calling emergency_restart(). While this scheme works well in production, it comes in the way during testing as it does not let the tester take stack/core dumps for analysis. This patch allows user to dynamically change the fence method to panic() by: # echo "panic" > /sys/kernel/config/cluster/<clustername>/fence_method Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> Signed-off-by: Joel Becker <joel.becker@oracle.com>
This commit is contained in:
Родитель
57b09bb5e4
Коммит
f6656d26d1
|
@ -35,6 +35,10 @@
|
|||
* cluster references throughout where nodes are looked up */
|
||||
struct o2nm_cluster *o2nm_single_cluster = NULL;
|
||||
|
||||
char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = {
|
||||
"reset", /* O2NM_FENCE_RESET */
|
||||
"panic", /* O2NM_FENCE_PANIC */
|
||||
};
|
||||
|
||||
struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
|
||||
{
|
||||
|
@ -579,6 +583,43 @@ static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write(
|
|||
return o2nm_cluster_attr_write(page, count,
|
||||
&cluster->cl_reconnect_delay_ms);
|
||||
}
|
||||
|
||||
static ssize_t o2nm_cluster_attr_fence_method_read(
|
||||
struct o2nm_cluster *cluster, char *page)
|
||||
{
|
||||
ssize_t ret = 0;
|
||||
|
||||
if (cluster)
|
||||
ret = sprintf(page, "%s\n",
|
||||
o2nm_fence_method_desc[cluster->cl_fence_method]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t o2nm_cluster_attr_fence_method_write(
|
||||
struct o2nm_cluster *cluster, const char *page, size_t count)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
if (page[count - 1] != '\n')
|
||||
goto bail;
|
||||
|
||||
for (i = 0; i < O2NM_FENCE_METHODS; ++i) {
|
||||
if (count != strlen(o2nm_fence_method_desc[i]) + 1)
|
||||
continue;
|
||||
if (strncasecmp(page, o2nm_fence_method_desc[i], count - 1))
|
||||
continue;
|
||||
if (cluster->cl_fence_method != i) {
|
||||
printk(KERN_INFO "ocfs2: Changing fence method to %s\n",
|
||||
o2nm_fence_method_desc[i]);
|
||||
cluster->cl_fence_method = i;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
bail:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = {
|
||||
.attr = { .ca_owner = THIS_MODULE,
|
||||
.ca_name = "idle_timeout_ms",
|
||||
|
@ -603,10 +644,19 @@ static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = {
|
|||
.store = o2nm_cluster_attr_reconnect_delay_ms_write,
|
||||
};
|
||||
|
||||
static struct o2nm_cluster_attribute o2nm_cluster_attr_fence_method = {
|
||||
.attr = { .ca_owner = THIS_MODULE,
|
||||
.ca_name = "fence_method",
|
||||
.ca_mode = S_IRUGO | S_IWUSR },
|
||||
.show = o2nm_cluster_attr_fence_method_read,
|
||||
.store = o2nm_cluster_attr_fence_method_write,
|
||||
};
|
||||
|
||||
static struct configfs_attribute *o2nm_cluster_attrs[] = {
|
||||
&o2nm_cluster_attr_idle_timeout_ms.attr,
|
||||
&o2nm_cluster_attr_keepalive_delay_ms.attr,
|
||||
&o2nm_cluster_attr_reconnect_delay_ms.attr,
|
||||
&o2nm_cluster_attr_fence_method.attr,
|
||||
NULL,
|
||||
};
|
||||
static ssize_t o2nm_cluster_show(struct config_item *item,
|
||||
|
@ -778,6 +828,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
|
|||
cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
|
||||
cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
|
||||
cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
|
||||
cluster->cl_fence_method = O2NM_FENCE_RESET;
|
||||
|
||||
ret = &cluster->cl_group;
|
||||
o2nm_single_cluster = cluster;
|
||||
|
|
|
@ -33,6 +33,12 @@
|
|||
#include <linux/configfs.h>
|
||||
#include <linux/rbtree.h>
|
||||
|
||||
enum o2nm_fence_method {
|
||||
O2NM_FENCE_RESET = 0,
|
||||
O2NM_FENCE_PANIC,
|
||||
O2NM_FENCE_METHODS, /* Number of fence methods */
|
||||
};
|
||||
|
||||
struct o2nm_node {
|
||||
spinlock_t nd_lock;
|
||||
struct config_item nd_item;
|
||||
|
@ -58,6 +64,7 @@ struct o2nm_cluster {
|
|||
unsigned int cl_idle_timeout_ms;
|
||||
unsigned int cl_keepalive_delay_ms;
|
||||
unsigned int cl_reconnect_delay_ms;
|
||||
enum o2nm_fence_method cl_fence_method;
|
||||
|
||||
/* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
|
||||
unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
|
|
|
@ -74,8 +74,20 @@ static void o2quo_fence_self(void)
|
|||
* threads can still schedule, etc, etc */
|
||||
o2hb_stop_all_regions();
|
||||
|
||||
printk("ocfs2 is very sorry to be fencing this system by restarting\n");
|
||||
emergency_restart();
|
||||
switch (o2nm_single_cluster->cl_fence_method) {
|
||||
case O2NM_FENCE_PANIC:
|
||||
panic("*** ocfs2 is very sorry to be fencing this system by "
|
||||
"panicing ***\n");
|
||||
break;
|
||||
default:
|
||||
WARN_ON(o2nm_single_cluster->cl_fence_method >=
|
||||
O2NM_FENCE_METHODS);
|
||||
case O2NM_FENCE_RESET:
|
||||
printk(KERN_ERR "*** ocfs2 is very sorry to be fencing this "
|
||||
"system by restarting ***\n");
|
||||
emergency_restart();
|
||||
break;
|
||||
};
|
||||
}
|
||||
|
||||
/* Indicate that a timeout occured on a hearbeat region write. The
|
||||
|
|
Загрузка…
Ссылка в новой задаче