netfilter: ipset: Prepare the ipset core to use RCU at set level
Replace rwlock_t with spinlock_t in "struct ip_set" and change the locking accordingly. Convert the comment extension into an rcu-avare object. Also, simplify the timeout routines. Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
This commit is contained in:
Родитель
bd55389cc3
Коммит
b57b2d1fa5
|
@ -108,8 +108,13 @@ struct ip_set_counter {
|
||||||
atomic64_t packets;
|
atomic64_t packets;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ip_set_comment_rcu {
|
||||||
|
struct rcu_head rcu;
|
||||||
|
char str[0];
|
||||||
|
};
|
||||||
|
|
||||||
struct ip_set_comment {
|
struct ip_set_comment {
|
||||||
char *str;
|
struct ip_set_comment_rcu __rcu *c;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ip_set_skbinfo {
|
struct ip_set_skbinfo {
|
||||||
|
@ -226,7 +231,7 @@ struct ip_set {
|
||||||
/* The name of the set */
|
/* The name of the set */
|
||||||
char name[IPSET_MAXNAMELEN];
|
char name[IPSET_MAXNAMELEN];
|
||||||
/* Lock protecting the set data */
|
/* Lock protecting the set data */
|
||||||
rwlock_t lock;
|
spinlock_t lock;
|
||||||
/* References to the set */
|
/* References to the set */
|
||||||
u32 ref;
|
u32 ref;
|
||||||
/* The core set type */
|
/* The core set type */
|
||||||
|
|
|
@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb)
|
||||||
return nla_data(tb);
|
return nla_data(tb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Called from uadd only, protected by the set spinlock.
|
||||||
|
* The kadt functions don't use the comment extensions in any way.
|
||||||
|
*/
|
||||||
static inline void
|
static inline void
|
||||||
ip_set_init_comment(struct ip_set_comment *comment,
|
ip_set_init_comment(struct ip_set_comment *comment,
|
||||||
const struct ip_set_ext *ext)
|
const struct ip_set_ext *ext)
|
||||||
{
|
{
|
||||||
|
struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
|
||||||
size_t len = ext->comment ? strlen(ext->comment) : 0;
|
size_t len = ext->comment ? strlen(ext->comment) : 0;
|
||||||
|
|
||||||
if (unlikely(comment->str)) {
|
if (unlikely(c)) {
|
||||||
kfree(comment->str);
|
kfree_rcu(c, rcu);
|
||||||
comment->str = NULL;
|
rcu_assign_pointer(comment->c, NULL);
|
||||||
}
|
}
|
||||||
if (!len)
|
if (!len)
|
||||||
return;
|
return;
|
||||||
if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
|
if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
|
||||||
len = IPSET_MAX_COMMENT_SIZE;
|
len = IPSET_MAX_COMMENT_SIZE;
|
||||||
comment->str = kzalloc(len + 1, GFP_ATOMIC);
|
c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
|
||||||
if (unlikely(!comment->str))
|
if (unlikely(!c))
|
||||||
return;
|
return;
|
||||||
strlcpy(comment->str, ext->comment, len + 1);
|
strlcpy(c->str, ext->comment, len + 1);
|
||||||
|
rcu_assign_pointer(comment->c, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Used only when dumping a set, protected by rcu_read_lock_bh() */
|
||||||
static inline int
|
static inline int
|
||||||
ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
|
ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
|
||||||
{
|
{
|
||||||
if (!comment->str)
|
struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);
|
||||||
|
|
||||||
|
if (!c)
|
||||||
return 0;
|
return 0;
|
||||||
return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str);
|
return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Called from uadd/udel, flush or the garbage collectors protected
|
||||||
|
* by the set spinlock.
|
||||||
|
* Called when the set is destroyed and when there can't be any user
|
||||||
|
* of the set data anymore.
|
||||||
|
*/
|
||||||
static inline void
|
static inline void
|
||||||
ip_set_comment_free(struct ip_set_comment *comment)
|
ip_set_comment_free(struct ip_set_comment *comment)
|
||||||
{
|
{
|
||||||
if (unlikely(!comment->str))
|
struct ip_set_comment_rcu *c;
|
||||||
|
|
||||||
|
c = rcu_dereference_protected(comment->c, 1);
|
||||||
|
if (unlikely(!c))
|
||||||
return;
|
return;
|
||||||
kfree(comment->str);
|
kfree_rcu(c, rcu);
|
||||||
comment->str = NULL;
|
rcu_assign_pointer(comment->c, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -40,31 +40,26 @@ ip_set_timeout_uget(struct nlattr *tb)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
ip_set_timeout_test(unsigned long timeout)
|
ip_set_timeout_expired(unsigned long *t)
|
||||||
{
|
{
|
||||||
return timeout == IPSET_ELEM_PERMANENT ||
|
return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
|
||||||
time_is_after_jiffies(timeout);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool
|
|
||||||
ip_set_timeout_expired(unsigned long *timeout)
|
|
||||||
{
|
|
||||||
return *timeout != IPSET_ELEM_PERMANENT &&
|
|
||||||
time_is_before_jiffies(*timeout);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
ip_set_timeout_set(unsigned long *timeout, u32 t)
|
ip_set_timeout_set(unsigned long *timeout, u32 value)
|
||||||
{
|
{
|
||||||
if (!t) {
|
unsigned long t;
|
||||||
|
|
||||||
|
if (!value) {
|
||||||
*timeout = IPSET_ELEM_PERMANENT;
|
*timeout = IPSET_ELEM_PERMANENT;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
*timeout = msecs_to_jiffies(t * MSEC_PER_SEC) + jiffies;
|
t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies;
|
||||||
if (*timeout == IPSET_ELEM_PERMANENT)
|
if (t == IPSET_ELEM_PERMANENT)
|
||||||
/* Bingo! :-) */
|
/* Bingo! :-) */
|
||||||
(*timeout)--;
|
t--;
|
||||||
|
*timeout = t;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u32
|
static inline u32
|
||||||
|
|
|
@ -209,15 +209,15 @@ ip_set_type_register(struct ip_set_type *type)
|
||||||
pr_warn("ip_set type %s, family %s with revision min %u already registered!\n",
|
pr_warn("ip_set type %s, family %s with revision min %u already registered!\n",
|
||||||
type->name, family_name(type->family),
|
type->name, family_name(type->family),
|
||||||
type->revision_min);
|
type->revision_min);
|
||||||
ret = -EINVAL;
|
ip_set_type_unlock();
|
||||||
goto unlock;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
list_add_rcu(&type->list, &ip_set_type_list);
|
list_add_rcu(&type->list, &ip_set_type_list);
|
||||||
pr_debug("type %s, family %s, revision %u:%u registered.\n",
|
pr_debug("type %s, family %s, revision %u:%u registered.\n",
|
||||||
type->name, family_name(type->family),
|
type->name, family_name(type->family),
|
||||||
type->revision_min, type->revision_max);
|
type->revision_min, type->revision_max);
|
||||||
unlock:
|
|
||||||
ip_set_type_unlock();
|
ip_set_type_unlock();
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(ip_set_type_register);
|
EXPORT_SYMBOL_GPL(ip_set_type_register);
|
||||||
|
@ -231,12 +231,12 @@ ip_set_type_unregister(struct ip_set_type *type)
|
||||||
pr_warn("ip_set type %s, family %s with revision min %u not registered\n",
|
pr_warn("ip_set type %s, family %s with revision min %u not registered\n",
|
||||||
type->name, family_name(type->family),
|
type->name, family_name(type->family),
|
||||||
type->revision_min);
|
type->revision_min);
|
||||||
goto unlock;
|
ip_set_type_unlock();
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
list_del_rcu(&type->list);
|
list_del_rcu(&type->list);
|
||||||
pr_debug("type %s, family %s with revision min %u unregistered.\n",
|
pr_debug("type %s, family %s with revision min %u unregistered.\n",
|
||||||
type->name, family_name(type->family), type->revision_min);
|
type->name, family_name(type->family), type->revision_min);
|
||||||
unlock:
|
|
||||||
ip_set_type_unlock();
|
ip_set_type_unlock();
|
||||||
|
|
||||||
synchronize_rcu();
|
synchronize_rcu();
|
||||||
|
@ -531,16 +531,16 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
|
||||||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
|
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
read_lock_bh(&set->lock);
|
rcu_read_lock_bh();
|
||||||
ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
|
ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
|
||||||
read_unlock_bh(&set->lock);
|
rcu_read_unlock_bh();
|
||||||
|
|
||||||
if (ret == -EAGAIN) {
|
if (ret == -EAGAIN) {
|
||||||
/* Type requests element to be completed */
|
/* Type requests element to be completed */
|
||||||
pr_debug("element must be completed, ADD is triggered\n");
|
pr_debug("element must be completed, ADD is triggered\n");
|
||||||
write_lock_bh(&set->lock);
|
spin_lock_bh(&set->lock);
|
||||||
set->variant->kadt(set, skb, par, IPSET_ADD, opt);
|
set->variant->kadt(set, skb, par, IPSET_ADD, opt);
|
||||||
write_unlock_bh(&set->lock);
|
spin_unlock_bh(&set->lock);
|
||||||
ret = 1;
|
ret = 1;
|
||||||
} else {
|
} else {
|
||||||
/* --return-nomatch: invert matched element */
|
/* --return-nomatch: invert matched element */
|
||||||
|
@ -570,9 +570,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
|
||||||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
|
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
|
||||||
return -IPSET_ERR_TYPE_MISMATCH;
|
return -IPSET_ERR_TYPE_MISMATCH;
|
||||||
|
|
||||||
write_lock_bh(&set->lock);
|
spin_lock_bh(&set->lock);
|
||||||
ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
|
ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
|
||||||
write_unlock_bh(&set->lock);
|
spin_unlock_bh(&set->lock);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -593,9 +593,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
|
||||||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
|
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
|
||||||
return -IPSET_ERR_TYPE_MISMATCH;
|
return -IPSET_ERR_TYPE_MISMATCH;
|
||||||
|
|
||||||
write_lock_bh(&set->lock);
|
spin_lock_bh(&set->lock);
|
||||||
ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
|
ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
|
||||||
write_unlock_bh(&set->lock);
|
spin_unlock_bh(&set->lock);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -880,7 +880,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
|
||||||
set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
|
set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
|
||||||
if (!set)
|
if (!set)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
rwlock_init(&set->lock);
|
spin_lock_init(&set->lock);
|
||||||
strlcpy(set->name, name, IPSET_MAXNAMELEN);
|
strlcpy(set->name, name, IPSET_MAXNAMELEN);
|
||||||
set->family = family;
|
set->family = family;
|
||||||
set->revision = revision;
|
set->revision = revision;
|
||||||
|
@ -1062,9 +1062,9 @@ ip_set_flush_set(struct ip_set *set)
|
||||||
{
|
{
|
||||||
pr_debug("set: %s\n", set->name);
|
pr_debug("set: %s\n", set->name);
|
||||||
|
|
||||||
write_lock_bh(&set->lock);
|
spin_lock_bh(&set->lock);
|
||||||
set->variant->flush(set);
|
set->variant->flush(set);
|
||||||
write_unlock_bh(&set->lock);
|
spin_unlock_bh(&set->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -1377,9 +1377,9 @@ dump_last:
|
||||||
set->variant->uref(set, cb, true);
|
set->variant->uref(set, cb, true);
|
||||||
/* Fall through and add elements */
|
/* Fall through and add elements */
|
||||||
default:
|
default:
|
||||||
read_lock_bh(&set->lock);
|
rcu_read_lock_bh();
|
||||||
ret = set->variant->list(set, skb, cb);
|
ret = set->variant->list(set, skb, cb);
|
||||||
read_unlock_bh(&set->lock);
|
rcu_read_unlock_bh();
|
||||||
if (!cb->args[IPSET_CB_ARG0])
|
if (!cb->args[IPSET_CB_ARG0])
|
||||||
/* Set is done, proceed with next one */
|
/* Set is done, proceed with next one */
|
||||||
goto next_set;
|
goto next_set;
|
||||||
|
@ -1462,9 +1462,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
|
||||||
bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
|
bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
write_lock_bh(&set->lock);
|
spin_lock_bh(&set->lock);
|
||||||
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
|
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
|
||||||
write_unlock_bh(&set->lock);
|
spin_unlock_bh(&set->lock);
|
||||||
retried = true;
|
retried = true;
|
||||||
} while (ret == -EAGAIN &&
|
} while (ret == -EAGAIN &&
|
||||||
set->variant->resize &&
|
set->variant->resize &&
|
||||||
|
@ -1644,9 +1644,9 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
|
||||||
set->type->adt_policy))
|
set->type->adt_policy))
|
||||||
return -IPSET_ERR_PROTOCOL;
|
return -IPSET_ERR_PROTOCOL;
|
||||||
|
|
||||||
read_lock_bh(&set->lock);
|
rcu_read_lock_bh();
|
||||||
ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
|
ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
|
||||||
read_unlock_bh(&set->lock);
|
rcu_read_unlock_bh();
|
||||||
/* Userspace can't trigger element to be re-added */
|
/* Userspace can't trigger element to be re-added */
|
||||||
if (ret == -EAGAIN)
|
if (ret == -EAGAIN)
|
||||||
ret = 1;
|
ret = 1;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче