ipvs: remove rs_lock by using RCU
rs_lock was used to protect rs_table (hash table) from updaters (under global mutex) and readers (packet handlers). We can remove rs_lock by using RCU lock for readers. Reclaiming dest only with kfree_rcu is enough because the readers access only fields from the ip_vs_dest structure. Use hlist for rs_table. As we are now using hlist_del_rcu, introduce in_rs_table flag as replacement for the list_empty checks which do not work with RCU. It is needed because only NAT dests are in the rs_table. Signed-off-by: Julian Anastasov <ja@ssi.bg> Signed-off by: Hans Schillstrom <hans@schillstrom.com> Signed-off-by: Simon Horman <horms@verge.net.au>
This commit is contained in:
Родитель
363c97d743
Коммит
276472eae0
|
@ -738,7 +738,7 @@ struct ip_vs_dest_dst {
|
||||||
*/
|
*/
|
||||||
struct ip_vs_dest {
|
struct ip_vs_dest {
|
||||||
struct list_head n_list; /* for the dests in the service */
|
struct list_head n_list; /* for the dests in the service */
|
||||||
struct list_head d_list; /* for table with all the dests */
|
struct hlist_node d_list; /* for table with all the dests */
|
||||||
|
|
||||||
u16 af; /* address family */
|
u16 af; /* address family */
|
||||||
__be16 port; /* port number of the server */
|
__be16 port; /* port number of the server */
|
||||||
|
@ -767,6 +767,9 @@ struct ip_vs_dest {
|
||||||
__be16 vport; /* virtual port number */
|
__be16 vport; /* virtual port number */
|
||||||
union nf_inet_addr vaddr; /* virtual IP address */
|
union nf_inet_addr vaddr; /* virtual IP address */
|
||||||
__u32 vfwmark; /* firewall mark of service */
|
__u32 vfwmark; /* firewall mark of service */
|
||||||
|
|
||||||
|
struct rcu_head rcu_head;
|
||||||
|
unsigned int in_rs_table:1; /* we are in rs_table */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -897,7 +900,7 @@ struct netns_ipvs {
|
||||||
#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
|
#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
|
||||||
#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
|
#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
|
||||||
|
|
||||||
struct list_head rs_table[IP_VS_RTAB_SIZE];
|
struct hlist_head rs_table[IP_VS_RTAB_SIZE];
|
||||||
/* ip_vs_app */
|
/* ip_vs_app */
|
||||||
struct list_head app_list;
|
struct list_head app_list;
|
||||||
/* ip_vs_proto */
|
/* ip_vs_proto */
|
||||||
|
@ -933,7 +936,6 @@ struct netns_ipvs {
|
||||||
|
|
||||||
int num_services; /* no of virtual services */
|
int num_services; /* no of virtual services */
|
||||||
|
|
||||||
rwlock_t rs_lock; /* real services table */
|
|
||||||
/* Trash for destinations */
|
/* Trash for destinations */
|
||||||
struct list_head dest_trash;
|
struct list_head dest_trash;
|
||||||
/* Service counters */
|
/* Service counters */
|
||||||
|
@ -1376,9 +1378,9 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
|
||||||
atomic_dec(&svc->usecnt);
|
atomic_dec(&svc->usecnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern struct ip_vs_dest *
|
extern bool
|
||||||
ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
|
ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
|
||||||
const union nf_inet_addr *daddr, __be16 dport);
|
const union nf_inet_addr *daddr, __be16 dport);
|
||||||
|
|
||||||
extern int ip_vs_use_count_inc(void);
|
extern int ip_vs_use_count_inc(void);
|
||||||
extern void ip_vs_use_count_dec(void);
|
extern void ip_vs_use_count_dec(void);
|
||||||
|
|
|
@ -1161,9 +1161,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
|
||||||
sizeof(_ports), _ports, &iph);
|
sizeof(_ports), _ports, &iph);
|
||||||
if (pptr == NULL)
|
if (pptr == NULL)
|
||||||
return NF_ACCEPT; /* Not for me */
|
return NF_ACCEPT; /* Not for me */
|
||||||
if (ip_vs_lookup_real_service(net, af, iph.protocol,
|
if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr,
|
||||||
&iph.saddr,
|
pptr[0])) {
|
||||||
pptr[0])) {
|
|
||||||
/*
|
/*
|
||||||
* Notify the real server: there is no
|
* Notify the real server: there is no
|
||||||
* existing entry if it is not RST
|
* existing entry if it is not RST
|
||||||
|
|
|
@ -508,17 +508,13 @@ static inline unsigned int ip_vs_rs_hashkey(int af,
|
||||||
& IP_VS_RTAB_MASK;
|
& IP_VS_RTAB_MASK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
|
||||||
* Hashes ip_vs_dest in rs_table by <proto,addr,port>.
|
static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
|
||||||
* should be called with locked tables.
|
|
||||||
*/
|
|
||||||
static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
|
|
||||||
{
|
{
|
||||||
unsigned int hash;
|
unsigned int hash;
|
||||||
|
|
||||||
if (!list_empty(&dest->d_list)) {
|
if (dest->in_rs_table)
|
||||||
return 0;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Hash by proto,addr,port,
|
* Hash by proto,addr,port,
|
||||||
|
@ -526,60 +522,47 @@ static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
|
||||||
*/
|
*/
|
||||||
hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
|
hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
|
||||||
|
|
||||||
list_add(&dest->d_list, &ipvs->rs_table[hash]);
|
hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
|
||||||
|
dest->in_rs_table = 1;
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* Unhash ip_vs_dest from rs_table. */
|
||||||
* UNhashes ip_vs_dest from rs_table.
|
static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
|
||||||
* should be called with locked tables.
|
|
||||||
*/
|
|
||||||
static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
|
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Remove it from the rs_table table.
|
* Remove it from the rs_table table.
|
||||||
*/
|
*/
|
||||||
if (!list_empty(&dest->d_list)) {
|
if (dest->in_rs_table) {
|
||||||
list_del_init(&dest->d_list);
|
hlist_del_rcu(&dest->d_list);
|
||||||
|
dest->in_rs_table = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* Check if real service by <proto,addr,port> is present */
|
||||||
* Lookup real service by <proto,addr,port> in the real service table.
|
bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
|
||||||
*/
|
const union nf_inet_addr *daddr, __be16 dport)
|
||||||
struct ip_vs_dest *
|
|
||||||
ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
|
|
||||||
const union nf_inet_addr *daddr,
|
|
||||||
__be16 dport)
|
|
||||||
{
|
{
|
||||||
struct netns_ipvs *ipvs = net_ipvs(net);
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
||||||
unsigned int hash;
|
unsigned int hash;
|
||||||
struct ip_vs_dest *dest;
|
struct ip_vs_dest *dest;
|
||||||
|
|
||||||
/*
|
/* Check for "full" addressed entries */
|
||||||
* Check for "full" addressed entries
|
|
||||||
* Return the first found entry
|
|
||||||
*/
|
|
||||||
hash = ip_vs_rs_hashkey(af, daddr, dport);
|
hash = ip_vs_rs_hashkey(af, daddr, dport);
|
||||||
|
|
||||||
read_lock(&ipvs->rs_lock);
|
rcu_read_lock();
|
||||||
list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
|
hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
|
||||||
if ((dest->af == af)
|
if (dest->port == dport &&
|
||||||
&& ip_vs_addr_equal(af, &dest->addr, daddr)
|
dest->af == af &&
|
||||||
&& (dest->port == dport)
|
ip_vs_addr_equal(af, &dest->addr, daddr) &&
|
||||||
&& ((dest->protocol == protocol) ||
|
(dest->protocol == protocol || dest->vfwmark)) {
|
||||||
dest->vfwmark)) {
|
|
||||||
/* HIT */
|
/* HIT */
|
||||||
read_unlock(&ipvs->rs_lock);
|
rcu_read_unlock();
|
||||||
return dest;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
read_unlock(&ipvs->rs_lock);
|
rcu_read_unlock();
|
||||||
|
|
||||||
return NULL;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -612,9 +595,6 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
|
||||||
* the backup synchronization daemon. It finds the
|
* the backup synchronization daemon. It finds the
|
||||||
* destination to be bound to the received connection
|
* destination to be bound to the received connection
|
||||||
* on the backup.
|
* on the backup.
|
||||||
*
|
|
||||||
* ip_vs_lookup_real_service() looked promissing, but
|
|
||||||
* seems not working as expected.
|
|
||||||
*/
|
*/
|
||||||
struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
|
struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
|
||||||
const union nf_inet_addr *daddr,
|
const union nf_inet_addr *daddr,
|
||||||
|
@ -715,7 +695,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
|
||||||
__ip_vs_dst_cache_reset(dest);
|
__ip_vs_dst_cache_reset(dest);
|
||||||
__ip_vs_unbind_svc(dest);
|
__ip_vs_unbind_svc(dest);
|
||||||
free_percpu(dest->stats.cpustats);
|
free_percpu(dest->stats.cpustats);
|
||||||
kfree(dest);
|
kfree_rcu(dest, rcu_head);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -742,7 +722,7 @@ static void ip_vs_trash_cleanup(struct net *net)
|
||||||
__ip_vs_dst_cache_reset(dest);
|
__ip_vs_dst_cache_reset(dest);
|
||||||
__ip_vs_unbind_svc(dest);
|
__ip_vs_unbind_svc(dest);
|
||||||
free_percpu(dest->stats.cpustats);
|
free_percpu(dest->stats.cpustats);
|
||||||
kfree(dest);
|
kfree_rcu(dest, rcu_head);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -807,9 +787,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
|
||||||
* Put the real service in rs_table if not present.
|
* Put the real service in rs_table if not present.
|
||||||
* For now only for NAT!
|
* For now only for NAT!
|
||||||
*/
|
*/
|
||||||
write_lock_bh(&ipvs->rs_lock);
|
|
||||||
ip_vs_rs_hash(ipvs, dest);
|
ip_vs_rs_hash(ipvs, dest);
|
||||||
write_unlock_bh(&ipvs->rs_lock);
|
|
||||||
}
|
}
|
||||||
atomic_set(&dest->conn_flags, conn_flags);
|
atomic_set(&dest->conn_flags, conn_flags);
|
||||||
|
|
||||||
|
@ -905,7 +883,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
|
||||||
atomic_set(&dest->persistconns, 0);
|
atomic_set(&dest->persistconns, 0);
|
||||||
atomic_set(&dest->refcnt, 1);
|
atomic_set(&dest->refcnt, 1);
|
||||||
|
|
||||||
INIT_LIST_HEAD(&dest->d_list);
|
INIT_HLIST_NODE(&dest->d_list);
|
||||||
spin_lock_init(&dest->dst_lock);
|
spin_lock_init(&dest->dst_lock);
|
||||||
spin_lock_init(&dest->stats.lock);
|
spin_lock_init(&dest->stats.lock);
|
||||||
__ip_vs_update_dest(svc, dest, udest, 1);
|
__ip_vs_update_dest(svc, dest, udest, 1);
|
||||||
|
@ -1045,9 +1023,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
|
||||||
/*
|
/*
|
||||||
* Remove it from the d-linked list with the real services.
|
* Remove it from the d-linked list with the real services.
|
||||||
*/
|
*/
|
||||||
write_lock_bh(&ipvs->rs_lock);
|
|
||||||
ip_vs_rs_unhash(dest);
|
ip_vs_rs_unhash(dest);
|
||||||
write_unlock_bh(&ipvs->rs_lock);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Decrease the refcnt of the dest, and free the dest
|
* Decrease the refcnt of the dest, and free the dest
|
||||||
|
@ -1067,7 +1043,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
|
||||||
time, so the operation here is OK */
|
time, so the operation here is OK */
|
||||||
atomic_dec(&dest->svc->refcnt);
|
atomic_dec(&dest->svc->refcnt);
|
||||||
free_percpu(dest->stats.cpustats);
|
free_percpu(dest->stats.cpustats);
|
||||||
kfree(dest);
|
kfree_rcu(dest, rcu_head);
|
||||||
} else {
|
} else {
|
||||||
IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
|
IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
|
||||||
"dest->refcnt=%d\n",
|
"dest->refcnt=%d\n",
|
||||||
|
@ -3811,11 +3787,9 @@ int __net_init ip_vs_control_net_init(struct net *net)
|
||||||
int idx;
|
int idx;
|
||||||
struct netns_ipvs *ipvs = net_ipvs(net);
|
struct netns_ipvs *ipvs = net_ipvs(net);
|
||||||
|
|
||||||
rwlock_init(&ipvs->rs_lock);
|
|
||||||
|
|
||||||
/* Initialize rs_table */
|
/* Initialize rs_table */
|
||||||
for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
|
for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
|
||||||
INIT_LIST_HEAD(&ipvs->rs_table[idx]);
|
INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
|
||||||
|
|
||||||
INIT_LIST_HEAD(&ipvs->dest_trash);
|
INIT_LIST_HEAD(&ipvs->dest_trash);
|
||||||
atomic_set(&ipvs->ftpsvc_counter, 0);
|
atomic_set(&ipvs->ftpsvc_counter, 0);
|
||||||
|
@ -3892,7 +3866,7 @@ int __init ip_vs_control_init(void)
|
||||||
|
|
||||||
EnterFunction(2);
|
EnterFunction(2);
|
||||||
|
|
||||||
/* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
|
/* Initialize svc_table, ip_vs_svc_fwm_table */
|
||||||
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
|
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
|
||||||
INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
|
INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
|
||||||
INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
|
INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
|
||||||
|
|
Загрузка…
Ссылка в новой задаче