rds: Changing IP address internal representation to struct in6_addr
This patch changes the internal representation of an IP address to use struct in6_addr. IPv4 address is stored as an IPv4 mapped address. All the functions which take an IP address as argument are also changed to use struct in6_addr. But RDS socket layer is not modified such that it still does not accept IPv6 address from an application. And RDS layer does not accept nor initiate IPv6 connections. v2: Fixed sparse warnings. Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com> Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Родитель
a6c90dd321
Коммит
eee2fa6ab3
124
net/rds/af_rds.c
124
net/rds/af_rds.c
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -35,6 +35,7 @@
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/gfp.h>
|
#include <linux/gfp.h>
|
||||||
#include <linux/in.h>
|
#include <linux/in.h>
|
||||||
|
#include <linux/ipv6.h>
|
||||||
#include <linux/poll.h>
|
#include <linux/poll.h>
|
||||||
#include <net/sock.h>
|
#include <net/sock.h>
|
||||||
|
|
||||||
|
@ -113,26 +114,63 @@ void rds_wake_sk_sleep(struct rds_sock *rs)
|
||||||
static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
|
static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
|
||||||
int peer)
|
int peer)
|
||||||
{
|
{
|
||||||
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
|
|
||||||
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
|
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
|
||||||
|
struct sockaddr_in6 *sin6;
|
||||||
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
|
struct sockaddr_in *sin;
|
||||||
|
int uaddr_len;
|
||||||
|
|
||||||
/* racey, don't care */
|
/* racey, don't care */
|
||||||
if (peer) {
|
if (peer) {
|
||||||
if (!rs->rs_conn_addr)
|
if (ipv6_addr_any(&rs->rs_conn_addr))
|
||||||
return -ENOTCONN;
|
return -ENOTCONN;
|
||||||
|
|
||||||
|
if (ipv6_addr_v4mapped(&rs->rs_conn_addr)) {
|
||||||
|
sin = (struct sockaddr_in *)uaddr;
|
||||||
|
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
|
||||||
|
sin->sin_family = AF_INET;
|
||||||
sin->sin_port = rs->rs_conn_port;
|
sin->sin_port = rs->rs_conn_port;
|
||||||
sin->sin_addr.s_addr = rs->rs_conn_addr;
|
sin->sin_addr.s_addr = rs->rs_conn_addr_v4;
|
||||||
|
uaddr_len = sizeof(*sin);
|
||||||
} else {
|
} else {
|
||||||
|
sin6 = (struct sockaddr_in6 *)uaddr;
|
||||||
|
sin6->sin6_family = AF_INET6;
|
||||||
|
sin6->sin6_port = rs->rs_conn_port;
|
||||||
|
sin6->sin6_addr = rs->rs_conn_addr;
|
||||||
|
sin6->sin6_flowinfo = 0;
|
||||||
|
/* scope_id is the same as in the bound address. */
|
||||||
|
sin6->sin6_scope_id = rs->rs_bound_scope_id;
|
||||||
|
uaddr_len = sizeof(*sin6);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* If socket is not yet bound, set the return address family
|
||||||
|
* to be AF_UNSPEC (value 0) and the address size to be that
|
||||||
|
* of an IPv4 address.
|
||||||
|
*/
|
||||||
|
if (ipv6_addr_any(&rs->rs_bound_addr)) {
|
||||||
|
sin = (struct sockaddr_in *)uaddr;
|
||||||
|
memset(sin, 0, sizeof(*sin));
|
||||||
|
sin->sin_family = AF_UNSPEC;
|
||||||
|
return sizeof(*sin);
|
||||||
|
}
|
||||||
|
if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) {
|
||||||
|
sin = (struct sockaddr_in *)uaddr;
|
||||||
|
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
|
||||||
|
sin->sin_family = AF_INET;
|
||||||
sin->sin_port = rs->rs_bound_port;
|
sin->sin_port = rs->rs_bound_port;
|
||||||
sin->sin_addr.s_addr = rs->rs_bound_addr;
|
sin->sin_addr.s_addr = rs->rs_bound_addr_v4;
|
||||||
|
uaddr_len = sizeof(*sin);
|
||||||
|
} else {
|
||||||
|
sin6 = (struct sockaddr_in6 *)uaddr;
|
||||||
|
sin6->sin6_family = AF_INET6;
|
||||||
|
sin6->sin6_port = rs->rs_bound_port;
|
||||||
|
sin6->sin6_addr = rs->rs_bound_addr;
|
||||||
|
sin6->sin6_flowinfo = 0;
|
||||||
|
sin6->sin6_scope_id = rs->rs_bound_scope_id;
|
||||||
|
uaddr_len = sizeof(*sin6);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sin->sin_family = AF_INET;
|
return uaddr_len;
|
||||||
|
|
||||||
return sizeof(*sin);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -203,11 +241,12 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
|
||||||
static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
|
static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
|
||||||
int len)
|
int len)
|
||||||
{
|
{
|
||||||
|
struct sockaddr_in6 sin6;
|
||||||
struct sockaddr_in sin;
|
struct sockaddr_in sin;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
/* racing with another thread binding seems ok here */
|
/* racing with another thread binding seems ok here */
|
||||||
if (rs->rs_bound_addr == 0) {
|
if (ipv6_addr_any(&rs->rs_bound_addr)) {
|
||||||
ret = -ENOTCONN; /* XXX not a great errno */
|
ret = -ENOTCONN; /* XXX not a great errno */
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -215,14 +254,23 @@ static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
|
||||||
if (len < sizeof(struct sockaddr_in)) {
|
if (len < sizeof(struct sockaddr_in)) {
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
} else if (len < sizeof(struct sockaddr_in6)) {
|
||||||
|
/* Assume IPv4 */
|
||||||
if (copy_from_user(&sin, optval, sizeof(sin))) {
|
if (copy_from_user(&sin, optval, sizeof(struct sockaddr_in))) {
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
ipv6_addr_set_v4mapped(sin.sin_addr.s_addr, &sin6.sin6_addr);
|
||||||
|
sin6.sin6_port = sin.sin_port;
|
||||||
|
} else {
|
||||||
|
if (copy_from_user(&sin6, optval,
|
||||||
|
sizeof(struct sockaddr_in6))) {
|
||||||
|
ret = -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
rds_send_drop_to(rs, &sin);
|
rds_send_drop_to(rs, &sin6);
|
||||||
out:
|
out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -435,31 +483,41 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
|
||||||
int addr_len, int flags)
|
int addr_len, int flags)
|
||||||
{
|
{
|
||||||
struct sock *sk = sock->sk;
|
struct sock *sk = sock->sk;
|
||||||
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
|
struct sockaddr_in *sin;
|
||||||
struct rds_sock *rs = rds_sk_to_rs(sk);
|
struct rds_sock *rs = rds_sk_to_rs(sk);
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
lock_sock(sk);
|
lock_sock(sk);
|
||||||
|
|
||||||
if (addr_len != sizeof(struct sockaddr_in)) {
|
switch (addr_len) {
|
||||||
ret = -EINVAL;
|
case sizeof(struct sockaddr_in):
|
||||||
goto out;
|
sin = (struct sockaddr_in *)uaddr;
|
||||||
}
|
|
||||||
|
|
||||||
if (sin->sin_family != AF_INET) {
|
if (sin->sin_family != AF_INET) {
|
||||||
ret = -EAFNOSUPPORT;
|
ret = -EAFNOSUPPORT;
|
||||||
goto out;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
|
if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
|
||||||
ret = -EDESTADDRREQ;
|
ret = -EDESTADDRREQ;
|
||||||
goto out;
|
break;
|
||||||
|
}
|
||||||
|
if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) ||
|
||||||
|
sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &rs->rs_conn_addr);
|
||||||
|
rs->rs_conn_port = sin->sin_port;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case sizeof(struct sockaddr_in6):
|
||||||
|
ret = -EPROTONOSUPPORT;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
ret = -EINVAL;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
rs->rs_conn_addr = sin->sin_addr.s_addr;
|
|
||||||
rs->rs_conn_port = sin->sin_port;
|
|
||||||
|
|
||||||
out:
|
|
||||||
release_sock(sk);
|
release_sock(sk);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -578,8 +636,10 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
|
||||||
list_for_each_entry(inc, &rs->rs_recv_queue, i_item) {
|
list_for_each_entry(inc, &rs->rs_recv_queue, i_item) {
|
||||||
total++;
|
total++;
|
||||||
if (total <= len)
|
if (total <= len)
|
||||||
rds_inc_info_copy(inc, iter, inc->i_saddr,
|
rds_inc_info_copy(inc, iter,
|
||||||
rs->rs_bound_addr, 1);
|
inc->i_saddr.s6_addr32[3],
|
||||||
|
rs->rs_bound_addr_v4,
|
||||||
|
1);
|
||||||
}
|
}
|
||||||
|
|
||||||
read_unlock(&rs->rs_recv_lock);
|
read_unlock(&rs->rs_recv_lock);
|
||||||
|
@ -608,8 +668,8 @@ static void rds_sock_info(struct socket *sock, unsigned int len,
|
||||||
list_for_each_entry(rs, &rds_sock_list, rs_item) {
|
list_for_each_entry(rs, &rds_sock_list, rs_item) {
|
||||||
sinfo.sndbuf = rds_sk_sndbuf(rs);
|
sinfo.sndbuf = rds_sk_sndbuf(rs);
|
||||||
sinfo.rcvbuf = rds_sk_rcvbuf(rs);
|
sinfo.rcvbuf = rds_sk_rcvbuf(rs);
|
||||||
sinfo.bound_addr = rs->rs_bound_addr;
|
sinfo.bound_addr = rs->rs_bound_addr_v4;
|
||||||
sinfo.connected_addr = rs->rs_conn_addr;
|
sinfo.connected_addr = rs->rs_conn_addr_v4;
|
||||||
sinfo.bound_port = rs->rs_bound_port;
|
sinfo.bound_port = rs->rs_bound_port;
|
||||||
sinfo.connected_port = rs->rs_conn_port;
|
sinfo.connected_port = rs->rs_conn_port;
|
||||||
sinfo.inum = sock_i_ino(rds_rs_to_sk(rs));
|
sinfo.inum = sock_i_ino(rds_rs_to_sk(rs));
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -33,6 +33,7 @@
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <net/sock.h>
|
#include <net/sock.h>
|
||||||
#include <linux/in.h>
|
#include <linux/in.h>
|
||||||
|
#include <linux/ipv6.h>
|
||||||
#include <linux/if_arp.h>
|
#include <linux/if_arp.h>
|
||||||
#include <linux/jhash.h>
|
#include <linux/jhash.h>
|
||||||
#include <linux/ratelimit.h>
|
#include <linux/ratelimit.h>
|
||||||
|
@ -42,42 +43,58 @@ static struct rhashtable bind_hash_table;
|
||||||
|
|
||||||
static const struct rhashtable_params ht_parms = {
|
static const struct rhashtable_params ht_parms = {
|
||||||
.nelem_hint = 768,
|
.nelem_hint = 768,
|
||||||
.key_len = sizeof(u64),
|
.key_len = RDS_BOUND_KEY_LEN,
|
||||||
.key_offset = offsetof(struct rds_sock, rs_bound_key),
|
.key_offset = offsetof(struct rds_sock, rs_bound_key),
|
||||||
.head_offset = offsetof(struct rds_sock, rs_bound_node),
|
.head_offset = offsetof(struct rds_sock, rs_bound_node),
|
||||||
.max_size = 16384,
|
.max_size = 16384,
|
||||||
.min_size = 1024,
|
.min_size = 1024,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Create a key for the bind hash table manipulation. Port is in network byte
|
||||||
|
* order.
|
||||||
|
*/
|
||||||
|
static inline void __rds_create_bind_key(u8 *key, const struct in6_addr *addr,
|
||||||
|
__be16 port, __u32 scope_id)
|
||||||
|
{
|
||||||
|
memcpy(key, addr, sizeof(*addr));
|
||||||
|
key += sizeof(*addr);
|
||||||
|
memcpy(key, &port, sizeof(port));
|
||||||
|
key += sizeof(port);
|
||||||
|
memcpy(key, &scope_id, sizeof(scope_id));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return the rds_sock bound at the given local address.
|
* Return the rds_sock bound at the given local address.
|
||||||
*
|
*
|
||||||
* The rx path can race with rds_release. We notice if rds_release() has
|
* The rx path can race with rds_release. We notice if rds_release() has
|
||||||
* marked this socket and don't return a rs ref to the rx path.
|
* marked this socket and don't return a rs ref to the rx path.
|
||||||
*/
|
*/
|
||||||
struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
|
struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port,
|
||||||
|
__u32 scope_id)
|
||||||
{
|
{
|
||||||
u64 key = ((u64)addr << 32) | port;
|
u8 key[RDS_BOUND_KEY_LEN];
|
||||||
struct rds_sock *rs;
|
struct rds_sock *rs;
|
||||||
|
|
||||||
rs = rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms);
|
__rds_create_bind_key(key, addr, port, scope_id);
|
||||||
|
rs = rhashtable_lookup_fast(&bind_hash_table, key, ht_parms);
|
||||||
if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
|
if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
|
||||||
rds_sock_addref(rs);
|
rds_sock_addref(rs);
|
||||||
else
|
else
|
||||||
rs = NULL;
|
rs = NULL;
|
||||||
|
|
||||||
rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
|
rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr,
|
||||||
ntohs(port));
|
ntohs(port));
|
||||||
|
|
||||||
return rs;
|
return rs;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* returns -ve errno or +ve port */
|
/* returns -ve errno or +ve port */
|
||||||
static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
|
static int rds_add_bound(struct rds_sock *rs, const struct in6_addr *addr,
|
||||||
|
__be16 *port, __u32 scope_id)
|
||||||
{
|
{
|
||||||
int ret = -EADDRINUSE;
|
int ret = -EADDRINUSE;
|
||||||
u16 rover, last;
|
u16 rover, last;
|
||||||
u64 key;
|
u8 key[RDS_BOUND_KEY_LEN];
|
||||||
|
|
||||||
if (*port != 0) {
|
if (*port != 0) {
|
||||||
rover = be16_to_cpu(*port);
|
rover = be16_to_cpu(*port);
|
||||||
|
@ -95,12 +112,13 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
|
||||||
|
|
||||||
if (rover == RDS_FLAG_PROBE_PORT)
|
if (rover == RDS_FLAG_PROBE_PORT)
|
||||||
continue;
|
continue;
|
||||||
key = ((u64)addr << 32) | cpu_to_be16(rover);
|
__rds_create_bind_key(key, addr, cpu_to_be16(rover),
|
||||||
if (rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms))
|
scope_id);
|
||||||
|
if (rhashtable_lookup_fast(&bind_hash_table, key, ht_parms))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
rs->rs_bound_key = key;
|
memcpy(rs->rs_bound_key, key, sizeof(rs->rs_bound_key));
|
||||||
rs->rs_bound_addr = addr;
|
rs->rs_bound_addr = *addr;
|
||||||
net_get_random_once(&rs->rs_hash_initval,
|
net_get_random_once(&rs->rs_hash_initval,
|
||||||
sizeof(rs->rs_hash_initval));
|
sizeof(rs->rs_hash_initval));
|
||||||
rs->rs_bound_port = cpu_to_be16(rover);
|
rs->rs_bound_port = cpu_to_be16(rover);
|
||||||
|
@ -114,7 +132,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
|
||||||
rs, &addr, (int)ntohs(*port));
|
rs, &addr, (int)ntohs(*port));
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
rs->rs_bound_addr = 0;
|
rs->rs_bound_addr = in6addr_any;
|
||||||
rds_sock_put(rs);
|
rds_sock_put(rs);
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
break;
|
break;
|
||||||
|
@ -127,44 +145,61 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
|
||||||
void rds_remove_bound(struct rds_sock *rs)
|
void rds_remove_bound(struct rds_sock *rs)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (!rs->rs_bound_addr)
|
if (ipv6_addr_any(&rs->rs_bound_addr))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
rdsdebug("rs %p unbinding from %pI4:%d\n",
|
rdsdebug("rs %p unbinding from %pI6c:%d\n",
|
||||||
rs, &rs->rs_bound_addr,
|
rs, &rs->rs_bound_addr,
|
||||||
ntohs(rs->rs_bound_port));
|
ntohs(rs->rs_bound_port));
|
||||||
|
|
||||||
rhashtable_remove_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms);
|
rhashtable_remove_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms);
|
||||||
rds_sock_put(rs);
|
rds_sock_put(rs);
|
||||||
rs->rs_bound_addr = 0;
|
rs->rs_bound_addr = in6addr_any;
|
||||||
}
|
}
|
||||||
|
|
||||||
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
|
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
|
||||||
{
|
{
|
||||||
struct sock *sk = sock->sk;
|
struct sock *sk = sock->sk;
|
||||||
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
|
|
||||||
struct rds_sock *rs = rds_sk_to_rs(sk);
|
struct rds_sock *rs = rds_sk_to_rs(sk);
|
||||||
|
struct in6_addr v6addr, *binding_addr;
|
||||||
struct rds_transport *trans;
|
struct rds_transport *trans;
|
||||||
|
__u32 scope_id = 0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
__be16 port;
|
||||||
|
|
||||||
|
/* We only allow an RDS socket to be bound to an IPv4 address. IPv6
|
||||||
|
* address support will be added later.
|
||||||
|
*/
|
||||||
|
if (addr_len == sizeof(struct sockaddr_in)) {
|
||||||
|
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
|
||||||
|
|
||||||
|
if (sin->sin_family != AF_INET ||
|
||||||
|
sin->sin_addr.s_addr == htonl(INADDR_ANY))
|
||||||
|
return -EINVAL;
|
||||||
|
ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr);
|
||||||
|
binding_addr = &v6addr;
|
||||||
|
port = sin->sin_port;
|
||||||
|
} else if (addr_len == sizeof(struct sockaddr_in6)) {
|
||||||
|
return -EPROTONOSUPPORT;
|
||||||
|
} else {
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
lock_sock(sk);
|
lock_sock(sk);
|
||||||
|
|
||||||
if (addr_len != sizeof(struct sockaddr_in) ||
|
/* RDS socket does not allow re-binding. */
|
||||||
sin->sin_family != AF_INET ||
|
if (!ipv6_addr_any(&rs->rs_bound_addr)) {
|
||||||
rs->rs_bound_addr ||
|
|
||||||
sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
|
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = rds_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port);
|
ret = rds_add_bound(rs, binding_addr, &port, scope_id);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
if (rs->rs_transport) { /* previously bound */
|
if (rs->rs_transport) { /* previously bound */
|
||||||
trans = rs->rs_transport;
|
trans = rs->rs_transport;
|
||||||
if (trans->laddr_check(sock_net(sock->sk),
|
if (trans->laddr_check(sock_net(sock->sk),
|
||||||
sin->sin_addr.s_addr) != 0) {
|
binding_addr, scope_id) != 0) {
|
||||||
ret = -ENOPROTOOPT;
|
ret = -ENOPROTOOPT;
|
||||||
rds_remove_bound(rs);
|
rds_remove_bound(rs);
|
||||||
} else {
|
} else {
|
||||||
|
@ -172,13 +207,13 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
|
||||||
}
|
}
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
trans = rds_trans_get_preferred(sock_net(sock->sk),
|
trans = rds_trans_get_preferred(sock_net(sock->sk), binding_addr,
|
||||||
sin->sin_addr.s_addr);
|
scope_id);
|
||||||
if (!trans) {
|
if (!trans) {
|
||||||
ret = -EADDRNOTAVAIL;
|
ret = -EADDRNOTAVAIL;
|
||||||
rds_remove_bound(rs);
|
rds_remove_bound(rs);
|
||||||
pr_info_ratelimited("RDS: %s could not find a transport for %pI4, load rds_tcp or rds_rdma?\n",
|
pr_info_ratelimited("RDS: %s could not find a transport for %pI6c, load rds_tcp or rds_rdma?\n",
|
||||||
__func__, &sin->sin_addr.s_addr);
|
__func__, binding_addr);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2007 Oracle. All rights reserved.
|
* Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -101,7 +101,7 @@ static DEFINE_RWLOCK(rds_cong_monitor_lock);
|
||||||
static DEFINE_SPINLOCK(rds_cong_lock);
|
static DEFINE_SPINLOCK(rds_cong_lock);
|
||||||
static struct rb_root rds_cong_tree = RB_ROOT;
|
static struct rb_root rds_cong_tree = RB_ROOT;
|
||||||
|
|
||||||
static struct rds_cong_map *rds_cong_tree_walk(__be32 addr,
|
static struct rds_cong_map *rds_cong_tree_walk(const struct in6_addr *addr,
|
||||||
struct rds_cong_map *insert)
|
struct rds_cong_map *insert)
|
||||||
{
|
{
|
||||||
struct rb_node **p = &rds_cong_tree.rb_node;
|
struct rb_node **p = &rds_cong_tree.rb_node;
|
||||||
|
@ -109,12 +109,15 @@ static struct rds_cong_map *rds_cong_tree_walk(__be32 addr,
|
||||||
struct rds_cong_map *map;
|
struct rds_cong_map *map;
|
||||||
|
|
||||||
while (*p) {
|
while (*p) {
|
||||||
|
int diff;
|
||||||
|
|
||||||
parent = *p;
|
parent = *p;
|
||||||
map = rb_entry(parent, struct rds_cong_map, m_rb_node);
|
map = rb_entry(parent, struct rds_cong_map, m_rb_node);
|
||||||
|
|
||||||
if (addr < map->m_addr)
|
diff = rds_addr_cmp(addr, &map->m_addr);
|
||||||
|
if (diff < 0)
|
||||||
p = &(*p)->rb_left;
|
p = &(*p)->rb_left;
|
||||||
else if (addr > map->m_addr)
|
else if (diff > 0)
|
||||||
p = &(*p)->rb_right;
|
p = &(*p)->rb_right;
|
||||||
else
|
else
|
||||||
return map;
|
return map;
|
||||||
|
@ -132,7 +135,7 @@ static struct rds_cong_map *rds_cong_tree_walk(__be32 addr,
|
||||||
* these bitmaps in the process getting pointers to them. The bitmaps are only
|
* these bitmaps in the process getting pointers to them. The bitmaps are only
|
||||||
* ever freed as the module is removed after all connections have been freed.
|
* ever freed as the module is removed after all connections have been freed.
|
||||||
*/
|
*/
|
||||||
static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
|
static struct rds_cong_map *rds_cong_from_addr(const struct in6_addr *addr)
|
||||||
{
|
{
|
||||||
struct rds_cong_map *map;
|
struct rds_cong_map *map;
|
||||||
struct rds_cong_map *ret = NULL;
|
struct rds_cong_map *ret = NULL;
|
||||||
|
@ -144,7 +147,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
|
||||||
if (!map)
|
if (!map)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
map->m_addr = addr;
|
map->m_addr = *addr;
|
||||||
init_waitqueue_head(&map->m_waitq);
|
init_waitqueue_head(&map->m_waitq);
|
||||||
INIT_LIST_HEAD(&map->m_conn_list);
|
INIT_LIST_HEAD(&map->m_conn_list);
|
||||||
|
|
||||||
|
@ -171,7 +174,7 @@ out:
|
||||||
kfree(map);
|
kfree(map);
|
||||||
}
|
}
|
||||||
|
|
||||||
rdsdebug("map %p for addr %x\n", ret, be32_to_cpu(addr));
|
rdsdebug("map %p for addr %pI6c\n", ret, addr);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -202,8 +205,8 @@ void rds_cong_remove_conn(struct rds_connection *conn)
|
||||||
|
|
||||||
int rds_cong_get_maps(struct rds_connection *conn)
|
int rds_cong_get_maps(struct rds_connection *conn)
|
||||||
{
|
{
|
||||||
conn->c_lcong = rds_cong_from_addr(conn->c_laddr);
|
conn->c_lcong = rds_cong_from_addr(&conn->c_laddr);
|
||||||
conn->c_fcong = rds_cong_from_addr(conn->c_faddr);
|
conn->c_fcong = rds_cong_from_addr(&conn->c_faddr);
|
||||||
|
|
||||||
if (!(conn->c_lcong && conn->c_fcong))
|
if (!(conn->c_lcong && conn->c_fcong))
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
@ -353,7 +356,7 @@ void rds_cong_remove_socket(struct rds_sock *rs)
|
||||||
|
|
||||||
/* update congestion map for now-closed port */
|
/* update congestion map for now-closed port */
|
||||||
spin_lock_irqsave(&rds_cong_lock, flags);
|
spin_lock_irqsave(&rds_cong_lock, flags);
|
||||||
map = rds_cong_tree_walk(rs->rs_bound_addr, NULL);
|
map = rds_cong_tree_walk(&rs->rs_bound_addr, NULL);
|
||||||
spin_unlock_irqrestore(&rds_cong_lock, flags);
|
spin_unlock_irqrestore(&rds_cong_lock, flags);
|
||||||
|
|
||||||
if (map && rds_cong_test_bit(map, rs->rs_bound_port)) {
|
if (map && rds_cong_test_bit(map, rs->rs_bound_port)) {
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -34,7 +34,8 @@
|
||||||
#include <linux/list.h>
|
#include <linux/list.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/export.h>
|
#include <linux/export.h>
|
||||||
#include <net/inet_hashtables.h>
|
#include <net/ipv6.h>
|
||||||
|
#include <net/inet6_hashtables.h>
|
||||||
|
|
||||||
#include "rds.h"
|
#include "rds.h"
|
||||||
#include "loop.h"
|
#include "loop.h"
|
||||||
|
@ -49,18 +50,21 @@ static unsigned long rds_conn_count;
|
||||||
static struct hlist_head rds_conn_hash[RDS_CONNECTION_HASH_ENTRIES];
|
static struct hlist_head rds_conn_hash[RDS_CONNECTION_HASH_ENTRIES];
|
||||||
static struct kmem_cache *rds_conn_slab;
|
static struct kmem_cache *rds_conn_slab;
|
||||||
|
|
||||||
static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
|
static struct hlist_head *rds_conn_bucket(const struct in6_addr *laddr,
|
||||||
|
const struct in6_addr *faddr)
|
||||||
{
|
{
|
||||||
|
static u32 rds6_hash_secret __read_mostly;
|
||||||
static u32 rds_hash_secret __read_mostly;
|
static u32 rds_hash_secret __read_mostly;
|
||||||
|
|
||||||
unsigned long hash;
|
u32 lhash, fhash, hash;
|
||||||
|
|
||||||
net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret));
|
net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret));
|
||||||
|
net_get_random_once(&rds6_hash_secret, sizeof(rds6_hash_secret));
|
||||||
|
|
||||||
|
lhash = (__force u32)laddr->s6_addr32[3];
|
||||||
|
fhash = __ipv6_addr_jhash(faddr, rds6_hash_secret);
|
||||||
|
hash = __inet6_ehashfn(lhash, 0, fhash, 0, rds_hash_secret);
|
||||||
|
|
||||||
/* Pass NULL, don't need struct net for hash */
|
|
||||||
hash = __inet_ehashfn(be32_to_cpu(laddr), 0,
|
|
||||||
be32_to_cpu(faddr), 0,
|
|
||||||
rds_hash_secret);
|
|
||||||
return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK];
|
return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -72,20 +76,25 @@ static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
|
||||||
/* rcu read lock must be held or the connection spinlock */
|
/* rcu read lock must be held or the connection spinlock */
|
||||||
static struct rds_connection *rds_conn_lookup(struct net *net,
|
static struct rds_connection *rds_conn_lookup(struct net *net,
|
||||||
struct hlist_head *head,
|
struct hlist_head *head,
|
||||||
__be32 laddr, __be32 faddr,
|
const struct in6_addr *laddr,
|
||||||
struct rds_transport *trans)
|
const struct in6_addr *faddr,
|
||||||
|
struct rds_transport *trans,
|
||||||
|
int dev_if)
|
||||||
{
|
{
|
||||||
struct rds_connection *conn, *ret = NULL;
|
struct rds_connection *conn, *ret = NULL;
|
||||||
|
|
||||||
hlist_for_each_entry_rcu(conn, head, c_hash_node) {
|
hlist_for_each_entry_rcu(conn, head, c_hash_node) {
|
||||||
if (conn->c_faddr == faddr && conn->c_laddr == laddr &&
|
if (ipv6_addr_equal(&conn->c_faddr, faddr) &&
|
||||||
conn->c_trans == trans && net == rds_conn_net(conn)) {
|
ipv6_addr_equal(&conn->c_laddr, laddr) &&
|
||||||
|
conn->c_trans == trans &&
|
||||||
|
net == rds_conn_net(conn) &&
|
||||||
|
conn->c_dev_if == dev_if) {
|
||||||
ret = conn;
|
ret = conn;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
rdsdebug("returning conn %p for %pI4 -> %pI4\n", ret,
|
rdsdebug("returning conn %p for %pI6c -> %pI6c\n", ret,
|
||||||
&laddr, &faddr);
|
laddr, faddr);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -99,7 +108,7 @@ static void rds_conn_path_reset(struct rds_conn_path *cp)
|
||||||
{
|
{
|
||||||
struct rds_connection *conn = cp->cp_conn;
|
struct rds_connection *conn = cp->cp_conn;
|
||||||
|
|
||||||
rdsdebug("connection %pI4 to %pI4 reset\n",
|
rdsdebug("connection %pI6c to %pI6c reset\n",
|
||||||
&conn->c_laddr, &conn->c_faddr);
|
&conn->c_laddr, &conn->c_faddr);
|
||||||
|
|
||||||
rds_stats_inc(s_conn_reset);
|
rds_stats_inc(s_conn_reset);
|
||||||
|
@ -142,9 +151,12 @@ static void __rds_conn_path_init(struct rds_connection *conn,
|
||||||
* are torn down as the module is removed, if ever.
|
* are torn down as the module is removed, if ever.
|
||||||
*/
|
*/
|
||||||
static struct rds_connection *__rds_conn_create(struct net *net,
|
static struct rds_connection *__rds_conn_create(struct net *net,
|
||||||
__be32 laddr, __be32 faddr,
|
const struct in6_addr *laddr,
|
||||||
struct rds_transport *trans, gfp_t gfp,
|
const struct in6_addr *faddr,
|
||||||
int is_outgoing)
|
struct rds_transport *trans,
|
||||||
|
gfp_t gfp,
|
||||||
|
int is_outgoing,
|
||||||
|
int dev_if)
|
||||||
{
|
{
|
||||||
struct rds_connection *conn, *parent = NULL;
|
struct rds_connection *conn, *parent = NULL;
|
||||||
struct hlist_head *head = rds_conn_bucket(laddr, faddr);
|
struct hlist_head *head = rds_conn_bucket(laddr, faddr);
|
||||||
|
@ -154,9 +166,12 @@ static struct rds_connection *__rds_conn_create(struct net *net,
|
||||||
int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
|
int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
conn = rds_conn_lookup(net, head, laddr, faddr, trans);
|
conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if);
|
||||||
if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
|
if (conn &&
|
||||||
laddr == faddr && !is_outgoing) {
|
conn->c_loopback &&
|
||||||
|
conn->c_trans != &rds_loop_transport &&
|
||||||
|
ipv6_addr_equal(laddr, faddr) &&
|
||||||
|
!is_outgoing) {
|
||||||
/* This is a looped back IB connection, and we're
|
/* This is a looped back IB connection, and we're
|
||||||
* called by the code handling the incoming connect.
|
* called by the code handling the incoming connect.
|
||||||
* We need a second connection object into which we
|
* We need a second connection object into which we
|
||||||
|
@ -181,8 +196,10 @@ static struct rds_connection *__rds_conn_create(struct net *net,
|
||||||
}
|
}
|
||||||
|
|
||||||
INIT_HLIST_NODE(&conn->c_hash_node);
|
INIT_HLIST_NODE(&conn->c_hash_node);
|
||||||
conn->c_laddr = laddr;
|
conn->c_laddr = *laddr;
|
||||||
conn->c_faddr = faddr;
|
conn->c_isv6 = !ipv6_addr_v4mapped(laddr);
|
||||||
|
conn->c_faddr = *faddr;
|
||||||
|
conn->c_dev_if = dev_if;
|
||||||
|
|
||||||
rds_conn_net_set(conn, net);
|
rds_conn_net_set(conn, net);
|
||||||
|
|
||||||
|
@ -199,7 +216,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
|
||||||
* can bind to the destination address then we'd rather the messages
|
* can bind to the destination address then we'd rather the messages
|
||||||
* flow through loopback rather than either transport.
|
* flow through loopback rather than either transport.
|
||||||
*/
|
*/
|
||||||
loop_trans = rds_trans_get_preferred(net, faddr);
|
loop_trans = rds_trans_get_preferred(net, faddr, conn->c_dev_if);
|
||||||
if (loop_trans) {
|
if (loop_trans) {
|
||||||
rds_trans_put(loop_trans);
|
rds_trans_put(loop_trans);
|
||||||
conn->c_loopback = 1;
|
conn->c_loopback = 1;
|
||||||
|
@ -233,10 +250,10 @@ static struct rds_connection *__rds_conn_create(struct net *net,
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n",
|
rdsdebug("allocated conn %p for %pI6c -> %pI6c over %s %s\n",
|
||||||
conn, &laddr, &faddr,
|
conn, laddr, faddr,
|
||||||
strnlen(trans->t_name, sizeof(trans->t_name)) ? trans->t_name :
|
strnlen(trans->t_name, sizeof(trans->t_name)) ?
|
||||||
"[unknown]", is_outgoing ? "(outgoing)" : "");
|
trans->t_name : "[unknown]", is_outgoing ? "(outgoing)" : "");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since we ran without holding the conn lock, someone could
|
* Since we ran without holding the conn lock, someone could
|
||||||
|
@ -262,7 +279,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,
|
||||||
/* Creating normal conn */
|
/* Creating normal conn */
|
||||||
struct rds_connection *found;
|
struct rds_connection *found;
|
||||||
|
|
||||||
found = rds_conn_lookup(net, head, laddr, faddr, trans);
|
found = rds_conn_lookup(net, head, laddr, faddr, trans,
|
||||||
|
dev_if);
|
||||||
if (found) {
|
if (found) {
|
||||||
struct rds_conn_path *cp;
|
struct rds_conn_path *cp;
|
||||||
int i;
|
int i;
|
||||||
|
@ -295,18 +313,22 @@ out:
|
||||||
}
|
}
|
||||||
|
|
||||||
struct rds_connection *rds_conn_create(struct net *net,
|
struct rds_connection *rds_conn_create(struct net *net,
|
||||||
__be32 laddr, __be32 faddr,
|
const struct in6_addr *laddr,
|
||||||
struct rds_transport *trans, gfp_t gfp)
|
const struct in6_addr *faddr,
|
||||||
|
struct rds_transport *trans, gfp_t gfp,
|
||||||
|
int dev_if)
|
||||||
{
|
{
|
||||||
return __rds_conn_create(net, laddr, faddr, trans, gfp, 0);
|
return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(rds_conn_create);
|
EXPORT_SYMBOL_GPL(rds_conn_create);
|
||||||
|
|
||||||
struct rds_connection *rds_conn_create_outgoing(struct net *net,
|
struct rds_connection *rds_conn_create_outgoing(struct net *net,
|
||||||
__be32 laddr, __be32 faddr,
|
const struct in6_addr *laddr,
|
||||||
struct rds_transport *trans, gfp_t gfp)
|
const struct in6_addr *faddr,
|
||||||
|
struct rds_transport *trans,
|
||||||
|
gfp_t gfp, int dev_if)
|
||||||
{
|
{
|
||||||
return __rds_conn_create(net, laddr, faddr, trans, gfp, 1);
|
return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
|
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
|
||||||
|
|
||||||
|
@ -502,12 +524,17 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
|
||||||
|
|
||||||
/* XXX too lazy to maintain counts.. */
|
/* XXX too lazy to maintain counts.. */
|
||||||
list_for_each_entry(rm, list, m_conn_item) {
|
list_for_each_entry(rm, list, m_conn_item) {
|
||||||
|
__be32 laddr;
|
||||||
|
__be32 faddr;
|
||||||
|
|
||||||
total++;
|
total++;
|
||||||
|
laddr = conn->c_laddr.s6_addr32[3];
|
||||||
|
faddr = conn->c_faddr.s6_addr32[3];
|
||||||
if (total <= len)
|
if (total <= len)
|
||||||
rds_inc_info_copy(&rm->m_inc,
|
rds_inc_info_copy(&rm->m_inc,
|
||||||
iter,
|
iter,
|
||||||
conn->c_laddr,
|
laddr,
|
||||||
conn->c_faddr,
|
faddr,
|
||||||
0);
|
0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -584,7 +611,6 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
|
||||||
struct hlist_head *head;
|
struct hlist_head *head;
|
||||||
struct rds_connection *conn;
|
struct rds_connection *conn;
|
||||||
size_t i;
|
size_t i;
|
||||||
int j;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
||||||
|
@ -595,17 +621,20 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
|
||||||
i++, head++) {
|
i++, head++) {
|
||||||
hlist_for_each_entry_rcu(conn, head, c_hash_node) {
|
hlist_for_each_entry_rcu(conn, head, c_hash_node) {
|
||||||
struct rds_conn_path *cp;
|
struct rds_conn_path *cp;
|
||||||
int npaths;
|
|
||||||
|
|
||||||
npaths = (conn->c_trans->t_mp_capable ?
|
/* XXX We only copy the information from the first
|
||||||
RDS_MPATH_WORKERS : 1);
|
* path for now. The problem is that if there are
|
||||||
for (j = 0; j < npaths; j++) {
|
* more than one underlying paths, we cannot report
|
||||||
cp = &conn->c_path[j];
|
* information of all of them using the existing
|
||||||
|
* API. For example, there is only one next_tx_seq,
|
||||||
|
* which path's next_tx_seq should we report? It is
|
||||||
|
* a bug in the design of MPRDS.
|
||||||
|
*/
|
||||||
|
cp = conn->c_path;
|
||||||
|
|
||||||
/* XXX no cp_lock usage.. */
|
/* XXX no cp_lock usage.. */
|
||||||
if (!visitor(cp, buffer))
|
if (!visitor(cp, buffer))
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
/* We copy as much as we can fit in the buffer,
|
/* We copy as much as we can fit in the buffer,
|
||||||
* but we count all items so that the caller
|
* but we count all items so that the caller
|
||||||
|
@ -624,12 +653,13 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
|
||||||
static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
|
static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
|
||||||
{
|
{
|
||||||
struct rds_info_connection *cinfo = buffer;
|
struct rds_info_connection *cinfo = buffer;
|
||||||
|
struct rds_connection *conn = cp->cp_conn;
|
||||||
|
|
||||||
cinfo->next_tx_seq = cp->cp_next_tx_seq;
|
cinfo->next_tx_seq = cp->cp_next_tx_seq;
|
||||||
cinfo->next_rx_seq = cp->cp_next_rx_seq;
|
cinfo->next_rx_seq = cp->cp_next_rx_seq;
|
||||||
cinfo->laddr = cp->cp_conn->c_laddr;
|
cinfo->laddr = conn->c_laddr.s6_addr32[3];
|
||||||
cinfo->faddr = cp->cp_conn->c_faddr;
|
cinfo->faddr = conn->c_faddr.s6_addr32[3];
|
||||||
strncpy(cinfo->transport, cp->cp_conn->c_trans->t_name,
|
strncpy(cinfo->transport, conn->c_trans->t_name,
|
||||||
sizeof(cinfo->transport));
|
sizeof(cinfo->transport));
|
||||||
cinfo->flags = 0;
|
cinfo->flags = 0;
|
||||||
|
|
||||||
|
|
15
net/rds/ib.c
15
net/rds/ib.c
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -296,8 +296,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
|
||||||
if (conn->c_trans != &rds_ib_transport)
|
if (conn->c_trans != &rds_ib_transport)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
iinfo->src_addr = conn->c_laddr;
|
iinfo->src_addr = conn->c_laddr.s6_addr32[3];
|
||||||
iinfo->dst_addr = conn->c_faddr;
|
iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
|
||||||
|
|
||||||
memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
|
memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
|
||||||
memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
|
memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
|
||||||
|
@ -341,7 +341,8 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len,
|
||||||
* allowed to influence which paths have priority. We could call userspace
|
* allowed to influence which paths have priority. We could call userspace
|
||||||
* asserting this policy "routing".
|
* asserting this policy "routing".
|
||||||
*/
|
*/
|
||||||
static int rds_ib_laddr_check(struct net *net, __be32 addr)
|
static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr,
|
||||||
|
__u32 scope_id)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
struct rdma_cm_id *cm_id;
|
struct rdma_cm_id *cm_id;
|
||||||
|
@ -357,7 +358,7 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr)
|
||||||
|
|
||||||
memset(&sin, 0, sizeof(sin));
|
memset(&sin, 0, sizeof(sin));
|
||||||
sin.sin_family = AF_INET;
|
sin.sin_family = AF_INET;
|
||||||
sin.sin_addr.s_addr = addr;
|
sin.sin_addr.s_addr = addr->s6_addr32[3];
|
||||||
|
|
||||||
/* rdma_bind_addr will only succeed for IB & iWARP devices */
|
/* rdma_bind_addr will only succeed for IB & iWARP devices */
|
||||||
ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
|
ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
|
||||||
|
@ -367,8 +368,8 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr)
|
||||||
cm_id->device->node_type != RDMA_NODE_IB_CA)
|
cm_id->device->node_type != RDMA_NODE_IB_CA)
|
||||||
ret = -EADDRNOTAVAIL;
|
ret = -EADDRNOTAVAIL;
|
||||||
|
|
||||||
rdsdebug("addr %pI4 ret %d node type %d\n",
|
rdsdebug("addr %pI6c ret %d node type %d\n",
|
||||||
&addr, ret,
|
addr, ret,
|
||||||
cm_id->device ? cm_id->device->node_type : -1);
|
cm_id->device ? cm_id->device->node_type : -1);
|
||||||
|
|
||||||
rdma_destroy_id(cm_id);
|
rdma_destroy_id(cm_id);
|
||||||
|
|
47
net/rds/ib.h
47
net/rds/ib.h
|
@ -57,16 +57,44 @@ struct rds_ib_refill_cache {
|
||||||
struct list_head *ready;
|
struct list_head *ready;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* This is the common structure for the IB private data exchange in setting up
|
||||||
|
* an RDS connection. The exchange is different for IPv4 and IPv6 connections.
|
||||||
|
* The reason is that the address size is different and the addresses
|
||||||
|
* exchanged are in the beginning of the structure. Hence it is not possible
|
||||||
|
* for interoperability if same structure is used.
|
||||||
|
*/
|
||||||
|
struct rds_ib_conn_priv_cmn {
|
||||||
|
u8 ricpc_protocol_major;
|
||||||
|
u8 ricpc_protocol_minor;
|
||||||
|
__be16 ricpc_protocol_minor_mask; /* bitmask */
|
||||||
|
__be32 ricpc_reserved1;
|
||||||
|
__be64 ricpc_ack_seq;
|
||||||
|
__be32 ricpc_credit; /* non-zero enables flow ctl */
|
||||||
|
};
|
||||||
|
|
||||||
struct rds_ib_connect_private {
|
struct rds_ib_connect_private {
|
||||||
/* Add new fields at the end, and don't permute existing fields. */
|
/* Add new fields at the end, and don't permute existing fields. */
|
||||||
__be32 dp_saddr;
|
__be32 dp_saddr;
|
||||||
__be32 dp_daddr;
|
__be32 dp_daddr;
|
||||||
u8 dp_protocol_major;
|
struct rds_ib_conn_priv_cmn dp_cmn;
|
||||||
u8 dp_protocol_minor;
|
};
|
||||||
__be16 dp_protocol_minor_mask; /* bitmask */
|
|
||||||
__be32 dp_reserved1;
|
struct rds6_ib_connect_private {
|
||||||
__be64 dp_ack_seq;
|
/* Add new fields at the end, and don't permute existing fields. */
|
||||||
__be32 dp_credit; /* non-zero enables flow ctl */
|
struct in6_addr dp_saddr;
|
||||||
|
struct in6_addr dp_daddr;
|
||||||
|
struct rds_ib_conn_priv_cmn dp_cmn;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define dp_protocol_major dp_cmn.ricpc_protocol_major
|
||||||
|
#define dp_protocol_minor dp_cmn.ricpc_protocol_minor
|
||||||
|
#define dp_protocol_minor_mask dp_cmn.ricpc_protocol_minor_mask
|
||||||
|
#define dp_ack_seq dp_cmn.ricpc_ack_seq
|
||||||
|
#define dp_credit dp_cmn.ricpc_credit
|
||||||
|
|
||||||
|
union rds_ib_conn_priv {
|
||||||
|
struct rds_ib_connect_private ricp_v4;
|
||||||
|
struct rds6_ib_connect_private ricp_v6;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct rds_ib_send_work {
|
struct rds_ib_send_work {
|
||||||
|
@ -351,8 +379,8 @@ void rds_ib_listen_stop(void);
|
||||||
__printf(2, 3)
|
__printf(2, 3)
|
||||||
void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
|
void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
|
||||||
int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
|
int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
|
||||||
struct rdma_cm_event *event);
|
struct rdma_cm_event *event, bool isv6);
|
||||||
int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id);
|
int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6);
|
||||||
void rds_ib_cm_connect_complete(struct rds_connection *conn,
|
void rds_ib_cm_connect_complete(struct rds_connection *conn,
|
||||||
struct rdma_cm_event *event);
|
struct rdma_cm_event *event);
|
||||||
|
|
||||||
|
@ -361,7 +389,8 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn,
|
||||||
__rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt)
|
__rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt)
|
||||||
|
|
||||||
/* ib_rdma.c */
|
/* ib_rdma.c */
|
||||||
int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
|
int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev,
|
||||||
|
struct in6_addr *ipaddr);
|
||||||
void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
|
void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
|
||||||
void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
|
void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
|
||||||
void rds_ib_destroy_nodev_conns(void);
|
void rds_ib_destroy_nodev_conns(void);
|
||||||
|
|
285
net/rds/ib_cm.c
285
net/rds/ib_cm.c
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -35,6 +35,7 @@
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/vmalloc.h>
|
#include <linux/vmalloc.h>
|
||||||
#include <linux/ratelimit.h>
|
#include <linux/ratelimit.h>
|
||||||
|
#include <net/addrconf.h>
|
||||||
|
|
||||||
#include "rds_single_path.h"
|
#include "rds_single_path.h"
|
||||||
#include "rds.h"
|
#include "rds.h"
|
||||||
|
@ -95,25 +96,45 @@ rds_ib_tune_rnr(struct rds_ib_connection *ic, struct ib_qp_attr *attr)
|
||||||
*/
|
*/
|
||||||
void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event)
|
void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event)
|
||||||
{
|
{
|
||||||
const struct rds_ib_connect_private *dp = NULL;
|
|
||||||
struct rds_ib_connection *ic = conn->c_transport_data;
|
struct rds_ib_connection *ic = conn->c_transport_data;
|
||||||
|
const union rds_ib_conn_priv *dp = NULL;
|
||||||
struct ib_qp_attr qp_attr;
|
struct ib_qp_attr qp_attr;
|
||||||
|
__be64 ack_seq = 0;
|
||||||
|
__be32 credit = 0;
|
||||||
|
u8 major = 0;
|
||||||
|
u8 minor = 0;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (event->param.conn.private_data_len >= sizeof(*dp)) {
|
|
||||||
dp = event->param.conn.private_data;
|
dp = event->param.conn.private_data;
|
||||||
|
if (conn->c_isv6) {
|
||||||
|
if (event->param.conn.private_data_len >=
|
||||||
|
sizeof(struct rds6_ib_connect_private)) {
|
||||||
|
major = dp->ricp_v6.dp_protocol_major;
|
||||||
|
minor = dp->ricp_v6.dp_protocol_minor;
|
||||||
|
credit = dp->ricp_v6.dp_credit;
|
||||||
|
/* dp structure start is not guaranteed to be 8 bytes
|
||||||
|
* aligned. Since dp_ack_seq is 64-bit extended load
|
||||||
|
* operations can be used so go through get_unaligned
|
||||||
|
* to avoid unaligned errors.
|
||||||
|
*/
|
||||||
|
ack_seq = get_unaligned(&dp->ricp_v6.dp_ack_seq);
|
||||||
|
}
|
||||||
|
} else if (event->param.conn.private_data_len >=
|
||||||
|
sizeof(struct rds_ib_connect_private)) {
|
||||||
|
major = dp->ricp_v4.dp_protocol_major;
|
||||||
|
minor = dp->ricp_v4.dp_protocol_minor;
|
||||||
|
credit = dp->ricp_v4.dp_credit;
|
||||||
|
ack_seq = get_unaligned(&dp->ricp_v4.dp_ack_seq);
|
||||||
|
}
|
||||||
|
|
||||||
/* make sure it isn't empty data */
|
/* make sure it isn't empty data */
|
||||||
if (dp->dp_protocol_major) {
|
if (major) {
|
||||||
rds_ib_set_protocol(conn,
|
rds_ib_set_protocol(conn, RDS_PROTOCOL(major, minor));
|
||||||
RDS_PROTOCOL(dp->dp_protocol_major,
|
rds_ib_set_flow_control(conn, be32_to_cpu(credit));
|
||||||
dp->dp_protocol_minor));
|
|
||||||
rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (conn->c_version < RDS_PROTOCOL(3, 1)) {
|
if (conn->c_version < RDS_PROTOCOL(3, 1)) {
|
||||||
pr_notice("RDS/IB: Connection <%pI4,%pI4> version %u.%u no longer supported\n",
|
pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n",
|
||||||
&conn->c_laddr, &conn->c_faddr,
|
&conn->c_laddr, &conn->c_faddr,
|
||||||
RDS_PROTOCOL_MAJOR(conn->c_version),
|
RDS_PROTOCOL_MAJOR(conn->c_version),
|
||||||
RDS_PROTOCOL_MINOR(conn->c_version));
|
RDS_PROTOCOL_MINOR(conn->c_version));
|
||||||
|
@ -121,7 +142,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
|
||||||
rds_conn_destroy(conn);
|
rds_conn_destroy(conn);
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
pr_notice("RDS/IB: %s conn connected <%pI4,%pI4> version %u.%u%s\n",
|
pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c> version %u.%u%s\n",
|
||||||
ic->i_active_side ? "Active" : "Passive",
|
ic->i_active_side ? "Active" : "Passive",
|
||||||
&conn->c_laddr, &conn->c_faddr,
|
&conn->c_laddr, &conn->c_faddr,
|
||||||
RDS_PROTOCOL_MAJOR(conn->c_version),
|
RDS_PROTOCOL_MAJOR(conn->c_version),
|
||||||
|
@ -150,7 +171,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
|
||||||
printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err);
|
printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err);
|
||||||
|
|
||||||
/* update ib_device with this local ipaddr */
|
/* update ib_device with this local ipaddr */
|
||||||
err = rds_ib_update_ipaddr(ic->rds_ibdev, conn->c_laddr);
|
err = rds_ib_update_ipaddr(ic->rds_ibdev, &conn->c_laddr);
|
||||||
if (err)
|
if (err)
|
||||||
printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n",
|
printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n",
|
||||||
err);
|
err);
|
||||||
|
@ -158,14 +179,8 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
|
||||||
/* If the peer gave us the last packet it saw, process this as if
|
/* If the peer gave us the last packet it saw, process this as if
|
||||||
* we had received a regular ACK. */
|
* we had received a regular ACK. */
|
||||||
if (dp) {
|
if (dp) {
|
||||||
/* dp structure start is not guaranteed to be 8 bytes aligned.
|
if (ack_seq)
|
||||||
* Since dp_ack_seq is 64-bit extended load operations can be
|
rds_send_drop_acked(conn, be64_to_cpu(ack_seq),
|
||||||
* used so go through get_unaligned to avoid unaligned errors.
|
|
||||||
*/
|
|
||||||
__be64 dp_ack_seq = get_unaligned(&dp->dp_ack_seq);
|
|
||||||
|
|
||||||
if (dp_ack_seq)
|
|
||||||
rds_send_drop_acked(conn, be64_to_cpu(dp_ack_seq),
|
|
||||||
NULL);
|
NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -174,10 +189,11 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
|
||||||
|
|
||||||
static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
|
static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
|
||||||
struct rdma_conn_param *conn_param,
|
struct rdma_conn_param *conn_param,
|
||||||
struct rds_ib_connect_private *dp,
|
union rds_ib_conn_priv *dp,
|
||||||
u32 protocol_version,
|
u32 protocol_version,
|
||||||
u32 max_responder_resources,
|
u32 max_responder_resources,
|
||||||
u32 max_initiator_depth)
|
u32 max_initiator_depth,
|
||||||
|
bool isv6)
|
||||||
{
|
{
|
||||||
struct rds_ib_connection *ic = conn->c_transport_data;
|
struct rds_ib_connection *ic = conn->c_transport_data;
|
||||||
struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
|
struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
|
||||||
|
@ -193,24 +209,49 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
|
||||||
|
|
||||||
if (dp) {
|
if (dp) {
|
||||||
memset(dp, 0, sizeof(*dp));
|
memset(dp, 0, sizeof(*dp));
|
||||||
dp->dp_saddr = conn->c_laddr;
|
if (isv6) {
|
||||||
dp->dp_daddr = conn->c_faddr;
|
dp->ricp_v6.dp_saddr = conn->c_laddr;
|
||||||
dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version);
|
dp->ricp_v6.dp_daddr = conn->c_faddr;
|
||||||
dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
|
dp->ricp_v6.dp_protocol_major =
|
||||||
dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
|
RDS_PROTOCOL_MAJOR(protocol_version);
|
||||||
dp->dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic));
|
dp->ricp_v6.dp_protocol_minor =
|
||||||
|
RDS_PROTOCOL_MINOR(protocol_version);
|
||||||
|
dp->ricp_v6.dp_protocol_minor_mask =
|
||||||
|
cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
|
||||||
|
dp->ricp_v6.dp_ack_seq =
|
||||||
|
cpu_to_be64(rds_ib_piggyb_ack(ic));
|
||||||
|
|
||||||
|
conn_param->private_data = &dp->ricp_v6;
|
||||||
|
conn_param->private_data_len = sizeof(dp->ricp_v6);
|
||||||
|
} else {
|
||||||
|
dp->ricp_v4.dp_saddr = conn->c_laddr.s6_addr32[3];
|
||||||
|
dp->ricp_v4.dp_daddr = conn->c_faddr.s6_addr32[3];
|
||||||
|
dp->ricp_v4.dp_protocol_major =
|
||||||
|
RDS_PROTOCOL_MAJOR(protocol_version);
|
||||||
|
dp->ricp_v4.dp_protocol_minor =
|
||||||
|
RDS_PROTOCOL_MINOR(protocol_version);
|
||||||
|
dp->ricp_v4.dp_protocol_minor_mask =
|
||||||
|
cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
|
||||||
|
dp->ricp_v4.dp_ack_seq =
|
||||||
|
cpu_to_be64(rds_ib_piggyb_ack(ic));
|
||||||
|
|
||||||
|
conn_param->private_data = &dp->ricp_v4;
|
||||||
|
conn_param->private_data_len = sizeof(dp->ricp_v4);
|
||||||
|
}
|
||||||
|
|
||||||
/* Advertise flow control */
|
/* Advertise flow control */
|
||||||
if (ic->i_flowctl) {
|
if (ic->i_flowctl) {
|
||||||
unsigned int credits;
|
unsigned int credits;
|
||||||
|
|
||||||
credits = IB_GET_POST_CREDITS(atomic_read(&ic->i_credits));
|
credits = IB_GET_POST_CREDITS
|
||||||
dp->dp_credit = cpu_to_be32(credits);
|
(atomic_read(&ic->i_credits));
|
||||||
atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits);
|
if (isv6)
|
||||||
|
dp->ricp_v6.dp_credit = cpu_to_be32(credits);
|
||||||
|
else
|
||||||
|
dp->ricp_v4.dp_credit = cpu_to_be32(credits);
|
||||||
|
atomic_sub(IB_SET_POST_CREDITS(credits),
|
||||||
|
&ic->i_credits);
|
||||||
}
|
}
|
||||||
|
|
||||||
conn_param->private_data = dp;
|
|
||||||
conn_param->private_data_len = sizeof(*dp);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -349,7 +390,7 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
rdsdebug("Fatal QP Event %u (%s) "
|
rdsdebug("Fatal QP Event %u (%s) "
|
||||||
"- connection %pI4->%pI4, reconnecting\n",
|
"- connection %pI6c->%pI6c, reconnecting\n",
|
||||||
event->event, ib_event_msg(event->event),
|
event->event, ib_event_msg(event->event),
|
||||||
&conn->c_laddr, &conn->c_faddr);
|
&conn->c_laddr, &conn->c_faddr);
|
||||||
rds_conn_drop(conn);
|
rds_conn_drop(conn);
|
||||||
|
@ -580,11 +621,13 @@ out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event)
|
static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6)
|
||||||
{
|
{
|
||||||
const struct rds_ib_connect_private *dp = event->param.conn.private_data;
|
const union rds_ib_conn_priv *dp = event->param.conn.private_data;
|
||||||
u16 common;
|
u8 data_len, major, minor;
|
||||||
u32 version = 0;
|
u32 version = 0;
|
||||||
|
__be16 mask;
|
||||||
|
u16 common;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* rdma_cm private data is odd - when there is any private data in the
|
* rdma_cm private data is odd - when there is any private data in the
|
||||||
|
@ -603,51 +646,126 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (isv6) {
|
||||||
|
data_len = sizeof(struct rds6_ib_connect_private);
|
||||||
|
major = dp->ricp_v6.dp_protocol_major;
|
||||||
|
minor = dp->ricp_v6.dp_protocol_minor;
|
||||||
|
mask = dp->ricp_v6.dp_protocol_minor_mask;
|
||||||
|
} else {
|
||||||
|
data_len = sizeof(struct rds_ib_connect_private);
|
||||||
|
major = dp->ricp_v4.dp_protocol_major;
|
||||||
|
minor = dp->ricp_v4.dp_protocol_minor;
|
||||||
|
mask = dp->ricp_v4.dp_protocol_minor_mask;
|
||||||
|
}
|
||||||
|
|
||||||
/* Even if len is crap *now* I still want to check it. -ASG */
|
/* Even if len is crap *now* I still want to check it. -ASG */
|
||||||
if (event->param.conn.private_data_len < sizeof (*dp) ||
|
if (event->param.conn.private_data_len < data_len || major == 0)
|
||||||
dp->dp_protocol_major == 0)
|
|
||||||
return RDS_PROTOCOL_3_0;
|
return RDS_PROTOCOL_3_0;
|
||||||
|
|
||||||
common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IB_SUPPORTED_PROTOCOLS;
|
common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS;
|
||||||
if (dp->dp_protocol_major == 3 && common) {
|
if (major == 3 && common) {
|
||||||
version = RDS_PROTOCOL_3_0;
|
version = RDS_PROTOCOL_3_0;
|
||||||
while ((common >>= 1) != 0)
|
while ((common >>= 1) != 0)
|
||||||
version++;
|
version++;
|
||||||
} else
|
} else {
|
||||||
|
if (isv6)
|
||||||
|
printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI6c using incompatible protocol version %u.%u\n",
|
||||||
|
&dp->ricp_v6.dp_saddr, major, minor);
|
||||||
|
else
|
||||||
printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using incompatible protocol version %u.%u\n",
|
printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using incompatible protocol version %u.%u\n",
|
||||||
&dp->dp_saddr,
|
&dp->ricp_v4.dp_saddr, major, minor);
|
||||||
dp->dp_protocol_major,
|
}
|
||||||
dp->dp_protocol_minor);
|
|
||||||
return version;
|
return version;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Given an IPv6 address, find the IB net_device which hosts that address and
|
||||||
|
* return its index. This is used by the rds_ib_cm_handle_connect() code to
|
||||||
|
* find the interface index of where an incoming request comes from when
|
||||||
|
* the request is using a link local address.
|
||||||
|
*
|
||||||
|
* Note one problem in this search. It is possible that two interfaces have
|
||||||
|
* the same link local address. Unfortunately, this cannot be solved unless
|
||||||
|
* the underlying layer gives us the interface which an incoming RDMA connect
|
||||||
|
* request comes from.
|
||||||
|
*/
|
||||||
|
static u32 __rds_find_ifindex(struct net *net, const struct in6_addr *addr)
|
||||||
|
{
|
||||||
|
struct net_device *dev;
|
||||||
|
int idx = 0;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
for_each_netdev_rcu(net, dev) {
|
||||||
|
if (dev->type == ARPHRD_INFINIBAND &&
|
||||||
|
ipv6_chk_addr(net, addr, dev, 0)) {
|
||||||
|
idx = dev->ifindex;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
|
||||||
int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
|
int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
|
||||||
struct rdma_cm_event *event)
|
struct rdma_cm_event *event, bool isv6)
|
||||||
{
|
{
|
||||||
__be64 lguid = cm_id->route.path_rec->sgid.global.interface_id;
|
__be64 lguid = cm_id->route.path_rec->sgid.global.interface_id;
|
||||||
__be64 fguid = cm_id->route.path_rec->dgid.global.interface_id;
|
__be64 fguid = cm_id->route.path_rec->dgid.global.interface_id;
|
||||||
const struct rds_ib_connect_private *dp = event->param.conn.private_data;
|
const struct rds_ib_conn_priv_cmn *dp_cmn;
|
||||||
struct rds_ib_connect_private dp_rep;
|
|
||||||
struct rds_connection *conn = NULL;
|
struct rds_connection *conn = NULL;
|
||||||
struct rds_ib_connection *ic = NULL;
|
struct rds_ib_connection *ic = NULL;
|
||||||
struct rdma_conn_param conn_param;
|
struct rdma_conn_param conn_param;
|
||||||
|
const union rds_ib_conn_priv *dp;
|
||||||
|
union rds_ib_conn_priv dp_rep;
|
||||||
|
struct in6_addr s_mapped_addr;
|
||||||
|
struct in6_addr d_mapped_addr;
|
||||||
|
const struct in6_addr *saddr6;
|
||||||
|
const struct in6_addr *daddr6;
|
||||||
|
int destroy = 1;
|
||||||
|
u32 ifindex = 0;
|
||||||
u32 version;
|
u32 version;
|
||||||
int err = 1, destroy = 1;
|
int err = 1;
|
||||||
|
|
||||||
/* Check whether the remote protocol version matches ours. */
|
/* Check whether the remote protocol version matches ours. */
|
||||||
version = rds_ib_protocol_compatible(event);
|
version = rds_ib_protocol_compatible(event, isv6);
|
||||||
if (!version)
|
if (!version)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
rdsdebug("saddr %pI4 daddr %pI4 RDSv%u.%u lguid 0x%llx fguid "
|
dp = event->param.conn.private_data;
|
||||||
"0x%llx\n", &dp->dp_saddr, &dp->dp_daddr,
|
if (isv6) {
|
||||||
|
dp_cmn = &dp->ricp_v6.dp_cmn;
|
||||||
|
saddr6 = &dp->ricp_v6.dp_saddr;
|
||||||
|
daddr6 = &dp->ricp_v6.dp_daddr;
|
||||||
|
/* If the local address is link local, need to find the
|
||||||
|
* interface index in order to create a proper RDS
|
||||||
|
* connection.
|
||||||
|
*/
|
||||||
|
if (ipv6_addr_type(daddr6) & IPV6_ADDR_LINKLOCAL) {
|
||||||
|
/* Using init_net for now .. */
|
||||||
|
ifindex = __rds_find_ifindex(&init_net, daddr6);
|
||||||
|
/* No index found... Need to bail out. */
|
||||||
|
if (ifindex == 0) {
|
||||||
|
err = -EOPNOTSUPP;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
dp_cmn = &dp->ricp_v4.dp_cmn;
|
||||||
|
ipv6_addr_set_v4mapped(dp->ricp_v4.dp_saddr, &s_mapped_addr);
|
||||||
|
ipv6_addr_set_v4mapped(dp->ricp_v4.dp_daddr, &d_mapped_addr);
|
||||||
|
saddr6 = &s_mapped_addr;
|
||||||
|
daddr6 = &d_mapped_addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid "
|
||||||
|
"0x%llx\n", saddr6, daddr6,
|
||||||
RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version),
|
RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version),
|
||||||
(unsigned long long)be64_to_cpu(lguid),
|
(unsigned long long)be64_to_cpu(lguid),
|
||||||
(unsigned long long)be64_to_cpu(fguid));
|
(unsigned long long)be64_to_cpu(fguid));
|
||||||
|
|
||||||
/* RDS/IB is not currently netns aware, thus init_net */
|
/* RDS/IB is not currently netns aware, thus init_net */
|
||||||
conn = rds_conn_create(&init_net, dp->dp_daddr, dp->dp_saddr,
|
conn = rds_conn_create(&init_net, daddr6, saddr6,
|
||||||
&rds_ib_transport, GFP_KERNEL);
|
&rds_ib_transport, GFP_KERNEL, ifindex);
|
||||||
if (IS_ERR(conn)) {
|
if (IS_ERR(conn)) {
|
||||||
rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
|
rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
|
||||||
conn = NULL;
|
conn = NULL;
|
||||||
|
@ -678,12 +796,13 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
|
||||||
ic = conn->c_transport_data;
|
ic = conn->c_transport_data;
|
||||||
|
|
||||||
rds_ib_set_protocol(conn, version);
|
rds_ib_set_protocol(conn, version);
|
||||||
rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
|
rds_ib_set_flow_control(conn, be32_to_cpu(dp_cmn->ricpc_credit));
|
||||||
|
|
||||||
/* If the peer gave us the last packet it saw, process this as if
|
/* If the peer gave us the last packet it saw, process this as if
|
||||||
* we had received a regular ACK. */
|
* we had received a regular ACK. */
|
||||||
if (dp->dp_ack_seq)
|
if (dp_cmn->ricpc_ack_seq)
|
||||||
rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
|
rds_send_drop_acked(conn, be64_to_cpu(dp_cmn->ricpc_ack_seq),
|
||||||
|
NULL);
|
||||||
|
|
||||||
BUG_ON(cm_id->context);
|
BUG_ON(cm_id->context);
|
||||||
BUG_ON(ic->i_cm_id);
|
BUG_ON(ic->i_cm_id);
|
||||||
|
@ -703,7 +822,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
|
||||||
|
|
||||||
rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version,
|
rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version,
|
||||||
event->param.conn.responder_resources,
|
event->param.conn.responder_resources,
|
||||||
event->param.conn.initiator_depth);
|
event->param.conn.initiator_depth, isv6);
|
||||||
|
|
||||||
/* rdma_accept() calls rdma_reject() internally if it fails */
|
/* rdma_accept() calls rdma_reject() internally if it fails */
|
||||||
if (rdma_accept(cm_id, &conn_param))
|
if (rdma_accept(cm_id, &conn_param))
|
||||||
|
@ -718,12 +837,12 @@ out:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
|
int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6)
|
||||||
{
|
{
|
||||||
struct rds_connection *conn = cm_id->context;
|
struct rds_connection *conn = cm_id->context;
|
||||||
struct rds_ib_connection *ic = conn->c_transport_data;
|
struct rds_ib_connection *ic = conn->c_transport_data;
|
||||||
struct rdma_conn_param conn_param;
|
struct rdma_conn_param conn_param;
|
||||||
struct rds_ib_connect_private dp;
|
union rds_ib_conn_priv dp;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/* If the peer doesn't do protocol negotiation, we must
|
/* If the peer doesn't do protocol negotiation, we must
|
||||||
|
@ -738,7 +857,7 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION,
|
rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION,
|
||||||
UINT_MAX, UINT_MAX);
|
UINT_MAX, UINT_MAX, isv6);
|
||||||
ret = rdma_connect(cm_id, &conn_param);
|
ret = rdma_connect(cm_id, &conn_param);
|
||||||
if (ret)
|
if (ret)
|
||||||
rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret);
|
rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret);
|
||||||
|
@ -758,13 +877,17 @@ out:
|
||||||
int rds_ib_conn_path_connect(struct rds_conn_path *cp)
|
int rds_ib_conn_path_connect(struct rds_conn_path *cp)
|
||||||
{
|
{
|
||||||
struct rds_connection *conn = cp->cp_conn;
|
struct rds_connection *conn = cp->cp_conn;
|
||||||
struct rds_ib_connection *ic = conn->c_transport_data;
|
struct sockaddr_storage src, dest;
|
||||||
struct sockaddr_in src, dest;
|
rdma_cm_event_handler handler;
|
||||||
|
struct rds_ib_connection *ic;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
ic = conn->c_transport_data;
|
||||||
|
|
||||||
/* XXX I wonder what affect the port space has */
|
/* XXX I wonder what affect the port space has */
|
||||||
/* delegate cm event handler to rdma_transport */
|
/* delegate cm event handler to rdma_transport */
|
||||||
ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
|
handler = rds_rdma_cm_event_handler;
|
||||||
|
ic->i_cm_id = rdma_create_id(&init_net, handler, conn,
|
||||||
RDMA_PS_TCP, IB_QPT_RC);
|
RDMA_PS_TCP, IB_QPT_RC);
|
||||||
if (IS_ERR(ic->i_cm_id)) {
|
if (IS_ERR(ic->i_cm_id)) {
|
||||||
ret = PTR_ERR(ic->i_cm_id);
|
ret = PTR_ERR(ic->i_cm_id);
|
||||||
|
@ -775,13 +898,33 @@ int rds_ib_conn_path_connect(struct rds_conn_path *cp)
|
||||||
|
|
||||||
rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn);
|
rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn);
|
||||||
|
|
||||||
src.sin_family = AF_INET;
|
if (ipv6_addr_v4mapped(&conn->c_faddr)) {
|
||||||
src.sin_addr.s_addr = (__force u32)conn->c_laddr;
|
struct sockaddr_in *sin;
|
||||||
src.sin_port = (__force u16)htons(0);
|
|
||||||
|
|
||||||
dest.sin_family = AF_INET;
|
sin = (struct sockaddr_in *)&src;
|
||||||
dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
|
sin->sin_family = AF_INET;
|
||||||
dest.sin_port = (__force u16)htons(RDS_PORT);
|
sin->sin_addr.s_addr = conn->c_laddr.s6_addr32[3];
|
||||||
|
sin->sin_port = 0;
|
||||||
|
|
||||||
|
sin = (struct sockaddr_in *)&dest;
|
||||||
|
sin->sin_family = AF_INET;
|
||||||
|
sin->sin_addr.s_addr = conn->c_faddr.s6_addr32[3];
|
||||||
|
sin->sin_port = htons(RDS_PORT);
|
||||||
|
} else {
|
||||||
|
struct sockaddr_in6 *sin6;
|
||||||
|
|
||||||
|
sin6 = (struct sockaddr_in6 *)&src;
|
||||||
|
sin6->sin6_family = AF_INET6;
|
||||||
|
sin6->sin6_addr = conn->c_laddr;
|
||||||
|
sin6->sin6_port = 0;
|
||||||
|
sin6->sin6_scope_id = conn->c_dev_if;
|
||||||
|
|
||||||
|
sin6 = (struct sockaddr_in6 *)&dest;
|
||||||
|
sin6->sin6_family = AF_INET6;
|
||||||
|
sin6->sin6_addr = conn->c_faddr;
|
||||||
|
sin6->sin6_port = htons(RDS_CM_PORT);
|
||||||
|
sin6->sin6_scope_id = conn->c_dev_if;
|
||||||
|
}
|
||||||
|
|
||||||
ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src,
|
ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src,
|
||||||
(struct sockaddr *)&dest,
|
(struct sockaddr *)&dest,
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -100,18 +100,19 @@ static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
|
||||||
kfree_rcu(to_free, rcu);
|
kfree_rcu(to_free, rcu);
|
||||||
}
|
}
|
||||||
|
|
||||||
int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
|
int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev,
|
||||||
|
struct in6_addr *ipaddr)
|
||||||
{
|
{
|
||||||
struct rds_ib_device *rds_ibdev_old;
|
struct rds_ib_device *rds_ibdev_old;
|
||||||
|
|
||||||
rds_ibdev_old = rds_ib_get_device(ipaddr);
|
rds_ibdev_old = rds_ib_get_device(ipaddr->s6_addr32[3]);
|
||||||
if (!rds_ibdev_old)
|
if (!rds_ibdev_old)
|
||||||
return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
|
return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]);
|
||||||
|
|
||||||
if (rds_ibdev_old != rds_ibdev) {
|
if (rds_ibdev_old != rds_ibdev) {
|
||||||
rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr);
|
rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr->s6_addr32[3]);
|
||||||
rds_ib_dev_put(rds_ibdev_old);
|
rds_ib_dev_put(rds_ibdev_old);
|
||||||
return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
|
return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]);
|
||||||
}
|
}
|
||||||
rds_ib_dev_put(rds_ibdev_old);
|
rds_ib_dev_put(rds_ibdev_old);
|
||||||
|
|
||||||
|
@ -544,7 +545,7 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
|
||||||
struct rds_ib_connection *ic = rs->rs_conn->c_transport_data;
|
struct rds_ib_connection *ic = rs->rs_conn->c_transport_data;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
rds_ibdev = rds_ib_get_device(rs->rs_bound_addr);
|
rds_ibdev = rds_ib_get_device(rs->rs_bound_addr.s6_addr32[3]);
|
||||||
if (!rds_ibdev) {
|
if (!rds_ibdev) {
|
||||||
ret = -ENODEV;
|
ret = -ENODEV;
|
||||||
goto out;
|
goto out;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -266,7 +266,7 @@ static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *i
|
||||||
rds_ib_stats_inc(s_ib_rx_total_incs);
|
rds_ib_stats_inc(s_ib_rx_total_incs);
|
||||||
}
|
}
|
||||||
INIT_LIST_HEAD(&ibinc->ii_frags);
|
INIT_LIST_HEAD(&ibinc->ii_frags);
|
||||||
rds_inc_init(&ibinc->ii_inc, ic->conn, ic->conn->c_faddr);
|
rds_inc_init(&ibinc->ii_inc, ic->conn, &ic->conn->c_faddr);
|
||||||
|
|
||||||
return ibinc;
|
return ibinc;
|
||||||
}
|
}
|
||||||
|
@ -418,7 +418,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
|
||||||
ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
|
ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
rds_ib_conn_error(conn, "recv post on "
|
rds_ib_conn_error(conn, "recv post on "
|
||||||
"%pI4 returned %d, disconnecting and "
|
"%pI6c returned %d, disconnecting and "
|
||||||
"reconnecting\n", &conn->c_faddr,
|
"reconnecting\n", &conn->c_faddr,
|
||||||
ret);
|
ret);
|
||||||
break;
|
break;
|
||||||
|
@ -848,7 +848,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
|
||||||
|
|
||||||
if (data_len < sizeof(struct rds_header)) {
|
if (data_len < sizeof(struct rds_header)) {
|
||||||
rds_ib_conn_error(conn, "incoming message "
|
rds_ib_conn_error(conn, "incoming message "
|
||||||
"from %pI4 didn't include a "
|
"from %pI6c didn't include a "
|
||||||
"header, disconnecting and "
|
"header, disconnecting and "
|
||||||
"reconnecting\n",
|
"reconnecting\n",
|
||||||
&conn->c_faddr);
|
&conn->c_faddr);
|
||||||
|
@ -861,7 +861,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
|
||||||
/* Validate the checksum. */
|
/* Validate the checksum. */
|
||||||
if (!rds_message_verify_checksum(ihdr)) {
|
if (!rds_message_verify_checksum(ihdr)) {
|
||||||
rds_ib_conn_error(conn, "incoming message "
|
rds_ib_conn_error(conn, "incoming message "
|
||||||
"from %pI4 has corrupted header - "
|
"from %pI6c has corrupted header - "
|
||||||
"forcing a reconnect\n",
|
"forcing a reconnect\n",
|
||||||
&conn->c_faddr);
|
&conn->c_faddr);
|
||||||
rds_stats_inc(s_recv_drop_bad_checksum);
|
rds_stats_inc(s_recv_drop_bad_checksum);
|
||||||
|
@ -941,10 +941,10 @@ static void rds_ib_process_recv(struct rds_connection *conn,
|
||||||
ic->i_recv_data_rem = 0;
|
ic->i_recv_data_rem = 0;
|
||||||
ic->i_ibinc = NULL;
|
ic->i_ibinc = NULL;
|
||||||
|
|
||||||
if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
|
if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) {
|
||||||
rds_ib_cong_recv(conn, ibinc);
|
rds_ib_cong_recv(conn, ibinc);
|
||||||
else {
|
} else {
|
||||||
rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
|
rds_recv_incoming(conn, &conn->c_faddr, &conn->c_laddr,
|
||||||
&ibinc->ii_inc, GFP_ATOMIC);
|
&ibinc->ii_inc, GFP_ATOMIC);
|
||||||
state->ack_next = be64_to_cpu(hdr->h_sequence);
|
state->ack_next = be64_to_cpu(hdr->h_sequence);
|
||||||
state->ack_next_valid = 1;
|
state->ack_next_valid = 1;
|
||||||
|
@ -988,7 +988,7 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
|
||||||
} else {
|
} else {
|
||||||
/* We expect errors as the qp is drained during shutdown */
|
/* We expect errors as the qp is drained during shutdown */
|
||||||
if (rds_conn_up(conn) || rds_conn_connecting(conn))
|
if (rds_conn_up(conn) || rds_conn_connecting(conn))
|
||||||
rds_ib_conn_error(conn, "recv completion on <%pI4,%pI4> had status %u (%s), disconnecting and reconnecting\n",
|
rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n",
|
||||||
&conn->c_laddr, &conn->c_faddr,
|
&conn->c_laddr, &conn->c_faddr,
|
||||||
wc->status,
|
wc->status,
|
||||||
ib_wc_status_msg(wc->status));
|
ib_wc_status_msg(wc->status));
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -305,7 +305,7 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
|
||||||
|
|
||||||
/* We expect errors as the qp is drained during shutdown */
|
/* We expect errors as the qp is drained during shutdown */
|
||||||
if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
|
if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
|
||||||
rds_ib_conn_error(conn, "send completion on <%pI4,%pI4> had status %u (%s), disconnecting and reconnecting\n",
|
rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n",
|
||||||
&conn->c_laddr, &conn->c_faddr, wc->status,
|
&conn->c_laddr, &conn->c_faddr, wc->status,
|
||||||
ib_wc_status_msg(wc->status));
|
ib_wc_status_msg(wc->status));
|
||||||
}
|
}
|
||||||
|
@ -730,7 +730,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
|
||||||
first, &first->s_wr, ret, failed_wr);
|
first, &first->s_wr, ret, failed_wr);
|
||||||
BUG_ON(failed_wr != &first->s_wr);
|
BUG_ON(failed_wr != &first->s_wr);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 "
|
printk(KERN_WARNING "RDS/IB: ib_post_send to %pI6c "
|
||||||
"returned %d\n", &conn->c_faddr, ret);
|
"returned %d\n", &conn->c_faddr, ret);
|
||||||
rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
|
rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
|
||||||
rds_ib_sub_signaled(ic, nr_sig);
|
rds_ib_sub_signaled(ic, nr_sig);
|
||||||
|
@ -827,7 +827,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
|
||||||
send, &send->s_atomic_wr, ret, failed_wr);
|
send, &send->s_atomic_wr, ret, failed_wr);
|
||||||
BUG_ON(failed_wr != &send->s_atomic_wr.wr);
|
BUG_ON(failed_wr != &send->s_atomic_wr.wr);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
|
printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI6c "
|
||||||
"returned %d\n", &conn->c_faddr, ret);
|
"returned %d\n", &conn->c_faddr, ret);
|
||||||
rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
|
rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
|
||||||
rds_ib_sub_signaled(ic, nr_sig);
|
rds_ib_sub_signaled(ic, nr_sig);
|
||||||
|
@ -967,7 +967,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
|
||||||
first, &first->s_rdma_wr.wr, ret, failed_wr);
|
first, &first->s_rdma_wr.wr, ret, failed_wr);
|
||||||
BUG_ON(failed_wr != &first->s_rdma_wr.wr);
|
BUG_ON(failed_wr != &first->s_rdma_wr.wr);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
|
printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI6c "
|
||||||
"returned %d\n", &conn->c_faddr, ret);
|
"returned %d\n", &conn->c_faddr, ret);
|
||||||
rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
|
rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
|
||||||
rds_ib_sub_signaled(ic, nr_sig);
|
rds_ib_sub_signaled(ic, nr_sig);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -35,6 +35,7 @@
|
||||||
#include <linux/in.h>
|
#include <linux/in.h>
|
||||||
#include <net/net_namespace.h>
|
#include <net/net_namespace.h>
|
||||||
#include <net/netns/generic.h>
|
#include <net/netns/generic.h>
|
||||||
|
#include <linux/ipv6.h>
|
||||||
|
|
||||||
#include "rds_single_path.h"
|
#include "rds_single_path.h"
|
||||||
#include "rds.h"
|
#include "rds.h"
|
||||||
|
@ -88,11 +89,11 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
|
||||||
|
|
||||||
BUG_ON(hdr_off || sg || off);
|
BUG_ON(hdr_off || sg || off);
|
||||||
|
|
||||||
rds_inc_init(&rm->m_inc, conn, conn->c_laddr);
|
rds_inc_init(&rm->m_inc, conn, &conn->c_laddr);
|
||||||
/* For the embedded inc. Matching put is in loop_inc_free() */
|
/* For the embedded inc. Matching put is in loop_inc_free() */
|
||||||
rds_message_addref(rm);
|
rds_message_addref(rm);
|
||||||
|
|
||||||
rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc,
|
rds_recv_incoming(conn, &conn->c_laddr, &conn->c_faddr, &rm->m_inc,
|
||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
|
|
||||||
rds_send_drop_acked(conn, be64_to_cpu(rm->m_inc.i_hdr.h_sequence),
|
rds_send_drop_acked(conn, be64_to_cpu(rm->m_inc.i_hdr.h_sequence),
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2007 Oracle. All rights reserved.
|
* Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -183,7 +183,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
|
||||||
long i;
|
long i;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (rs->rs_bound_addr == 0 || !rs->rs_transport) {
|
if (ipv6_addr_any(&rs->rs_bound_addr) || !rs->rs_transport) {
|
||||||
ret = -ENOTCONN; /* XXX not a great errno */
|
ret = -ENOTCONN; /* XXX not a great errno */
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -574,7 +574,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
|
||||||
|
|
||||||
args = CMSG_DATA(cmsg);
|
args = CMSG_DATA(cmsg);
|
||||||
|
|
||||||
if (rs->rs_bound_addr == 0) {
|
if (ipv6_addr_any(&rs->rs_bound_addr)) {
|
||||||
ret = -ENOTCONN; /* XXX not a great errno */
|
ret = -ENOTCONN; /* XXX not a great errno */
|
||||||
goto out_ret;
|
goto out_ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 Oracle. All rights reserved.
|
* Copyright (c) 2009, 2018 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -39,8 +39,9 @@
|
||||||
|
|
||||||
static struct rdma_cm_id *rds_rdma_listen_id;
|
static struct rdma_cm_id *rds_rdma_listen_id;
|
||||||
|
|
||||||
int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
|
static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
|
||||||
struct rdma_cm_event *event)
|
struct rdma_cm_event *event,
|
||||||
|
bool isv6)
|
||||||
{
|
{
|
||||||
/* this can be null in the listening path */
|
/* this can be null in the listening path */
|
||||||
struct rds_connection *conn = cm_id->context;
|
struct rds_connection *conn = cm_id->context;
|
||||||
|
@ -72,7 +73,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
|
||||||
|
|
||||||
switch (event->event) {
|
switch (event->event) {
|
||||||
case RDMA_CM_EVENT_CONNECT_REQUEST:
|
case RDMA_CM_EVENT_CONNECT_REQUEST:
|
||||||
ret = trans->cm_handle_connect(cm_id, event);
|
ret = trans->cm_handle_connect(cm_id, event, isv6);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case RDMA_CM_EVENT_ADDR_RESOLVED:
|
case RDMA_CM_EVENT_ADDR_RESOLVED:
|
||||||
|
@ -90,7 +91,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
|
||||||
|
|
||||||
ibic = conn->c_transport_data;
|
ibic = conn->c_transport_data;
|
||||||
if (ibic && ibic->i_cm_id == cm_id)
|
if (ibic && ibic->i_cm_id == cm_id)
|
||||||
ret = trans->cm_initiate_connect(cm_id);
|
ret = trans->cm_initiate_connect(cm_id, isv6);
|
||||||
else
|
else
|
||||||
rds_conn_drop(conn);
|
rds_conn_drop(conn);
|
||||||
}
|
}
|
||||||
|
@ -116,14 +117,14 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
|
||||||
|
|
||||||
case RDMA_CM_EVENT_DISCONNECTED:
|
case RDMA_CM_EVENT_DISCONNECTED:
|
||||||
rdsdebug("DISCONNECT event - dropping connection "
|
rdsdebug("DISCONNECT event - dropping connection "
|
||||||
"%pI4->%pI4\n", &conn->c_laddr,
|
"%pI6c->%pI6c\n", &conn->c_laddr,
|
||||||
&conn->c_faddr);
|
&conn->c_faddr);
|
||||||
rds_conn_drop(conn);
|
rds_conn_drop(conn);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
|
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
|
||||||
if (conn) {
|
if (conn) {
|
||||||
pr_info("RDS: RDMA_CM_EVENT_TIMEWAIT_EXIT event: dropping connection %pI4->%pI4\n",
|
pr_info("RDS: RDMA_CM_EVENT_TIMEWAIT_EXIT event: dropping connection %pI6c->%pI6c\n",
|
||||||
&conn->c_laddr, &conn->c_faddr);
|
&conn->c_laddr, &conn->c_faddr);
|
||||||
rds_conn_drop(conn);
|
rds_conn_drop(conn);
|
||||||
}
|
}
|
||||||
|
@ -146,13 +147,20 @@ out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int rds_rdma_listen_init(void)
|
int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
|
||||||
|
struct rdma_cm_event *event)
|
||||||
|
{
|
||||||
|
return rds_rdma_cm_event_handler_cmn(cm_id, event, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int rds_rdma_listen_init_common(rdma_cm_event_handler handler,
|
||||||
|
struct sockaddr *sa,
|
||||||
|
struct rdma_cm_id **ret_cm_id)
|
||||||
{
|
{
|
||||||
struct sockaddr_in sin;
|
|
||||||
struct rdma_cm_id *cm_id;
|
struct rdma_cm_id *cm_id;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL,
|
cm_id = rdma_create_id(&init_net, handler, NULL,
|
||||||
RDMA_PS_TCP, IB_QPT_RC);
|
RDMA_PS_TCP, IB_QPT_RC);
|
||||||
if (IS_ERR(cm_id)) {
|
if (IS_ERR(cm_id)) {
|
||||||
ret = PTR_ERR(cm_id);
|
ret = PTR_ERR(cm_id);
|
||||||
|
@ -161,15 +169,11 @@ static int rds_rdma_listen_init(void)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
sin.sin_family = AF_INET;
|
|
||||||
sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
|
|
||||||
sin.sin_port = (__force u16)htons(RDS_PORT);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XXX I bet this binds the cm_id to a device. If we want to support
|
* XXX I bet this binds the cm_id to a device. If we want to support
|
||||||
* fail-over we'll have to take this into consideration.
|
* fail-over we'll have to take this into consideration.
|
||||||
*/
|
*/
|
||||||
ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
|
ret = rdma_bind_addr(cm_id, sa);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
|
printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
|
||||||
"rdma_bind_addr() returned %d\n", ret);
|
"rdma_bind_addr() returned %d\n", ret);
|
||||||
|
@ -185,7 +189,7 @@ static int rds_rdma_listen_init(void)
|
||||||
|
|
||||||
rdsdebug("cm %p listening on port %u\n", cm_id, RDS_PORT);
|
rdsdebug("cm %p listening on port %u\n", cm_id, RDS_PORT);
|
||||||
|
|
||||||
rds_rdma_listen_id = cm_id;
|
*ret_cm_id = cm_id;
|
||||||
cm_id = NULL;
|
cm_id = NULL;
|
||||||
out:
|
out:
|
||||||
if (cm_id)
|
if (cm_id)
|
||||||
|
@ -193,6 +197,26 @@ out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Initialize the RDS RDMA listeners. We create two listeners for
|
||||||
|
* compatibility reason. The one on RDS_PORT is used for IPv4
|
||||||
|
* requests only. The one on RDS_CM_PORT is used for IPv6 requests
|
||||||
|
* only. So only IPv6 enabled RDS module will communicate using this
|
||||||
|
* port.
|
||||||
|
*/
|
||||||
|
static int rds_rdma_listen_init(void)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct sockaddr_in sin;
|
||||||
|
|
||||||
|
sin.sin_family = PF_INET;
|
||||||
|
sin.sin_addr.s_addr = htonl(INADDR_ANY);
|
||||||
|
sin.sin_port = htons(RDS_PORT);
|
||||||
|
ret = rds_rdma_listen_init_common(rds_rdma_cm_event_handler,
|
||||||
|
(struct sockaddr *)&sin,
|
||||||
|
&rds_rdma_listen_id);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static void rds_rdma_listen_stop(void)
|
static void rds_rdma_listen_stop(void)
|
||||||
{
|
{
|
||||||
if (rds_rdma_listen_id) {
|
if (rds_rdma_listen_id) {
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include <linux/rds.h>
|
#include <linux/rds.h>
|
||||||
#include <linux/rhashtable.h>
|
#include <linux/rhashtable.h>
|
||||||
#include <linux/refcount.h>
|
#include <linux/refcount.h>
|
||||||
|
#include <linux/in6.h>
|
||||||
|
|
||||||
#include "info.h"
|
#include "info.h"
|
||||||
|
|
||||||
|
@ -30,6 +31,7 @@
|
||||||
* userspace from listening.
|
* userspace from listening.
|
||||||
*/
|
*/
|
||||||
#define RDS_PORT 18634
|
#define RDS_PORT 18634
|
||||||
|
#define RDS_CM_PORT 16385
|
||||||
|
|
||||||
#ifdef ATOMIC64_INIT
|
#ifdef ATOMIC64_INIT
|
||||||
#define KERNEL_HAS_ATOMIC64
|
#define KERNEL_HAS_ATOMIC64
|
||||||
|
@ -61,7 +63,7 @@ void rdsdebug(char *fmt, ...)
|
||||||
|
|
||||||
struct rds_cong_map {
|
struct rds_cong_map {
|
||||||
struct rb_node m_rb_node;
|
struct rb_node m_rb_node;
|
||||||
__be32 m_addr;
|
struct in6_addr m_addr;
|
||||||
wait_queue_head_t m_waitq;
|
wait_queue_head_t m_waitq;
|
||||||
struct list_head m_conn_list;
|
struct list_head m_conn_list;
|
||||||
unsigned long m_page_addrs[RDS_CONG_MAP_PAGES];
|
unsigned long m_page_addrs[RDS_CONG_MAP_PAGES];
|
||||||
|
@ -136,11 +138,13 @@ struct rds_conn_path {
|
||||||
/* One rds_connection per RDS address pair */
|
/* One rds_connection per RDS address pair */
|
||||||
struct rds_connection {
|
struct rds_connection {
|
||||||
struct hlist_node c_hash_node;
|
struct hlist_node c_hash_node;
|
||||||
__be32 c_laddr;
|
struct in6_addr c_laddr;
|
||||||
__be32 c_faddr;
|
struct in6_addr c_faddr;
|
||||||
|
int c_dev_if; /* c_laddrs's interface index */
|
||||||
unsigned int c_loopback:1,
|
unsigned int c_loopback:1,
|
||||||
|
c_isv6:1,
|
||||||
c_ping_triggered:1,
|
c_ping_triggered:1,
|
||||||
c_pad_to_32:30;
|
c_pad_to_32:29;
|
||||||
int c_npaths;
|
int c_npaths;
|
||||||
struct rds_connection *c_passive;
|
struct rds_connection *c_passive;
|
||||||
struct rds_transport *c_trans;
|
struct rds_transport *c_trans;
|
||||||
|
@ -269,7 +273,7 @@ struct rds_incoming {
|
||||||
struct rds_conn_path *i_conn_path;
|
struct rds_conn_path *i_conn_path;
|
||||||
struct rds_header i_hdr;
|
struct rds_header i_hdr;
|
||||||
unsigned long i_rx_jiffies;
|
unsigned long i_rx_jiffies;
|
||||||
__be32 i_saddr;
|
struct in6_addr i_saddr;
|
||||||
|
|
||||||
rds_rdma_cookie_t i_rdma_cookie;
|
rds_rdma_cookie_t i_rdma_cookie;
|
||||||
struct timeval i_rx_tstamp;
|
struct timeval i_rx_tstamp;
|
||||||
|
@ -386,7 +390,7 @@ struct rds_message {
|
||||||
struct list_head m_conn_item;
|
struct list_head m_conn_item;
|
||||||
struct rds_incoming m_inc;
|
struct rds_incoming m_inc;
|
||||||
u64 m_ack_seq;
|
u64 m_ack_seq;
|
||||||
__be32 m_daddr;
|
struct in6_addr m_daddr;
|
||||||
unsigned long m_flags;
|
unsigned long m_flags;
|
||||||
|
|
||||||
/* Never access m_rs without holding m_rs_lock.
|
/* Never access m_rs without holding m_rs_lock.
|
||||||
|
@ -519,7 +523,8 @@ struct rds_transport {
|
||||||
t_mp_capable:1;
|
t_mp_capable:1;
|
||||||
unsigned int t_type;
|
unsigned int t_type;
|
||||||
|
|
||||||
int (*laddr_check)(struct net *net, __be32 addr);
|
int (*laddr_check)(struct net *net, const struct in6_addr *addr,
|
||||||
|
__u32 scope_id);
|
||||||
int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp);
|
int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp);
|
||||||
void (*conn_free)(void *data);
|
void (*conn_free)(void *data);
|
||||||
int (*conn_path_connect)(struct rds_conn_path *cp);
|
int (*conn_path_connect)(struct rds_conn_path *cp);
|
||||||
|
@ -535,8 +540,8 @@ struct rds_transport {
|
||||||
void (*inc_free)(struct rds_incoming *inc);
|
void (*inc_free)(struct rds_incoming *inc);
|
||||||
|
|
||||||
int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
|
int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
|
||||||
struct rdma_cm_event *event);
|
struct rdma_cm_event *event, bool isv6);
|
||||||
int (*cm_initiate_connect)(struct rdma_cm_id *cm_id);
|
int (*cm_initiate_connect)(struct rdma_cm_id *cm_id, bool isv6);
|
||||||
void (*cm_connect_complete)(struct rds_connection *conn,
|
void (*cm_connect_complete)(struct rds_connection *conn,
|
||||||
struct rdma_cm_event *event);
|
struct rdma_cm_event *event);
|
||||||
|
|
||||||
|
@ -551,6 +556,12 @@ struct rds_transport {
|
||||||
bool (*t_unloading)(struct rds_connection *conn);
|
bool (*t_unloading)(struct rds_connection *conn);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Bind hash table key length. It is the sum of the size of a struct
|
||||||
|
* in6_addr, a scope_id and a port.
|
||||||
|
*/
|
||||||
|
#define RDS_BOUND_KEY_LEN \
|
||||||
|
(sizeof(struct in6_addr) + sizeof(__u32) + sizeof(__be16))
|
||||||
|
|
||||||
struct rds_sock {
|
struct rds_sock {
|
||||||
struct sock rs_sk;
|
struct sock rs_sk;
|
||||||
|
|
||||||
|
@ -562,10 +573,14 @@ struct rds_sock {
|
||||||
* support.
|
* support.
|
||||||
*/
|
*/
|
||||||
struct rhash_head rs_bound_node;
|
struct rhash_head rs_bound_node;
|
||||||
u64 rs_bound_key;
|
u8 rs_bound_key[RDS_BOUND_KEY_LEN];
|
||||||
__be32 rs_bound_addr;
|
struct sockaddr_in6 rs_bound_sin6;
|
||||||
__be32 rs_conn_addr;
|
#define rs_bound_addr rs_bound_sin6.sin6_addr
|
||||||
__be16 rs_bound_port;
|
#define rs_bound_addr_v4 rs_bound_sin6.sin6_addr.s6_addr32[3]
|
||||||
|
#define rs_bound_port rs_bound_sin6.sin6_port
|
||||||
|
#define rs_bound_scope_id rs_bound_sin6.sin6_scope_id
|
||||||
|
struct in6_addr rs_conn_addr;
|
||||||
|
#define rs_conn_addr_v4 rs_conn_addr.s6_addr32[3]
|
||||||
__be16 rs_conn_port;
|
__be16 rs_conn_port;
|
||||||
struct rds_transport *rs_transport;
|
struct rds_transport *rs_transport;
|
||||||
|
|
||||||
|
@ -701,7 +716,8 @@ extern wait_queue_head_t rds_poll_waitq;
|
||||||
/* bind.c */
|
/* bind.c */
|
||||||
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
|
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
|
||||||
void rds_remove_bound(struct rds_sock *rs);
|
void rds_remove_bound(struct rds_sock *rs);
|
||||||
struct rds_sock *rds_find_bound(__be32 addr, __be16 port);
|
struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port,
|
||||||
|
__u32 scope_id);
|
||||||
int rds_bind_lock_init(void);
|
int rds_bind_lock_init(void);
|
||||||
void rds_bind_lock_destroy(void);
|
void rds_bind_lock_destroy(void);
|
||||||
|
|
||||||
|
@ -725,11 +741,15 @@ extern u32 rds_gen_num;
|
||||||
int rds_conn_init(void);
|
int rds_conn_init(void);
|
||||||
void rds_conn_exit(void);
|
void rds_conn_exit(void);
|
||||||
struct rds_connection *rds_conn_create(struct net *net,
|
struct rds_connection *rds_conn_create(struct net *net,
|
||||||
__be32 laddr, __be32 faddr,
|
const struct in6_addr *laddr,
|
||||||
struct rds_transport *trans, gfp_t gfp);
|
const struct in6_addr *faddr,
|
||||||
|
struct rds_transport *trans, gfp_t gfp,
|
||||||
|
int dev_if);
|
||||||
struct rds_connection *rds_conn_create_outgoing(struct net *net,
|
struct rds_connection *rds_conn_create_outgoing(struct net *net,
|
||||||
__be32 laddr, __be32 faddr,
|
const struct in6_addr *laddr,
|
||||||
struct rds_transport *trans, gfp_t gfp);
|
const struct in6_addr *faddr,
|
||||||
|
struct rds_transport *trans,
|
||||||
|
gfp_t gfp, int dev_if);
|
||||||
void rds_conn_shutdown(struct rds_conn_path *cpath);
|
void rds_conn_shutdown(struct rds_conn_path *cpath);
|
||||||
void rds_conn_destroy(struct rds_connection *conn);
|
void rds_conn_destroy(struct rds_connection *conn);
|
||||||
void rds_conn_drop(struct rds_connection *conn);
|
void rds_conn_drop(struct rds_connection *conn);
|
||||||
|
@ -840,11 +860,12 @@ void rds_page_exit(void);
|
||||||
|
|
||||||
/* recv.c */
|
/* recv.c */
|
||||||
void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
|
void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
|
||||||
__be32 saddr);
|
struct in6_addr *saddr);
|
||||||
void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *conn,
|
void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *conn,
|
||||||
__be32 saddr);
|
struct in6_addr *saddr);
|
||||||
void rds_inc_put(struct rds_incoming *inc);
|
void rds_inc_put(struct rds_incoming *inc);
|
||||||
void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
|
void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr,
|
||||||
|
struct in6_addr *daddr,
|
||||||
struct rds_incoming *inc, gfp_t gfp);
|
struct rds_incoming *inc, gfp_t gfp);
|
||||||
int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
|
int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
|
||||||
int msg_flags);
|
int msg_flags);
|
||||||
|
@ -859,7 +880,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len);
|
||||||
void rds_send_path_reset(struct rds_conn_path *conn);
|
void rds_send_path_reset(struct rds_conn_path *conn);
|
||||||
int rds_send_xmit(struct rds_conn_path *cp);
|
int rds_send_xmit(struct rds_conn_path *cp);
|
||||||
struct sockaddr_in;
|
struct sockaddr_in;
|
||||||
void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest);
|
void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in6 *dest);
|
||||||
typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
|
typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
|
||||||
void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
|
void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
|
||||||
is_acked_func is_acked);
|
is_acked_func is_acked);
|
||||||
|
@ -946,11 +967,14 @@ void rds_send_worker(struct work_struct *);
|
||||||
void rds_recv_worker(struct work_struct *);
|
void rds_recv_worker(struct work_struct *);
|
||||||
void rds_connect_path_complete(struct rds_conn_path *conn, int curr);
|
void rds_connect_path_complete(struct rds_conn_path *conn, int curr);
|
||||||
void rds_connect_complete(struct rds_connection *conn);
|
void rds_connect_complete(struct rds_connection *conn);
|
||||||
|
int rds_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2);
|
||||||
|
|
||||||
/* transport.c */
|
/* transport.c */
|
||||||
void rds_trans_register(struct rds_transport *trans);
|
void rds_trans_register(struct rds_transport *trans);
|
||||||
void rds_trans_unregister(struct rds_transport *trans);
|
void rds_trans_unregister(struct rds_transport *trans);
|
||||||
struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr);
|
struct rds_transport *rds_trans_get_preferred(struct net *net,
|
||||||
|
const struct in6_addr *addr,
|
||||||
|
__u32 scope_id);
|
||||||
void rds_trans_put(struct rds_transport *trans);
|
void rds_trans_put(struct rds_transport *trans);
|
||||||
unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
|
unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
|
||||||
unsigned int avail);
|
unsigned int avail);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -41,14 +41,14 @@
|
||||||
#include "rds.h"
|
#include "rds.h"
|
||||||
|
|
||||||
void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
|
void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
|
||||||
__be32 saddr)
|
struct in6_addr *saddr)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
refcount_set(&inc->i_refcount, 1);
|
refcount_set(&inc->i_refcount, 1);
|
||||||
INIT_LIST_HEAD(&inc->i_item);
|
INIT_LIST_HEAD(&inc->i_item);
|
||||||
inc->i_conn = conn;
|
inc->i_conn = conn;
|
||||||
inc->i_saddr = saddr;
|
inc->i_saddr = *saddr;
|
||||||
inc->i_rdma_cookie = 0;
|
inc->i_rdma_cookie = 0;
|
||||||
inc->i_rx_tstamp.tv_sec = 0;
|
inc->i_rx_tstamp.tv_sec = 0;
|
||||||
inc->i_rx_tstamp.tv_usec = 0;
|
inc->i_rx_tstamp.tv_usec = 0;
|
||||||
|
@ -59,13 +59,13 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
|
||||||
EXPORT_SYMBOL_GPL(rds_inc_init);
|
EXPORT_SYMBOL_GPL(rds_inc_init);
|
||||||
|
|
||||||
void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp,
|
void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp,
|
||||||
__be32 saddr)
|
struct in6_addr *saddr)
|
||||||
{
|
{
|
||||||
refcount_set(&inc->i_refcount, 1);
|
refcount_set(&inc->i_refcount, 1);
|
||||||
INIT_LIST_HEAD(&inc->i_item);
|
INIT_LIST_HEAD(&inc->i_item);
|
||||||
inc->i_conn = cp->cp_conn;
|
inc->i_conn = cp->cp_conn;
|
||||||
inc->i_conn_path = cp;
|
inc->i_conn_path = cp;
|
||||||
inc->i_saddr = saddr;
|
inc->i_saddr = *saddr;
|
||||||
inc->i_rdma_cookie = 0;
|
inc->i_rdma_cookie = 0;
|
||||||
inc->i_rx_tstamp.tv_sec = 0;
|
inc->i_rx_tstamp.tv_sec = 0;
|
||||||
inc->i_rx_tstamp.tv_usec = 0;
|
inc->i_rx_tstamp.tv_usec = 0;
|
||||||
|
@ -110,7 +110,7 @@ static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk,
|
||||||
|
|
||||||
now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs);
|
now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs);
|
||||||
|
|
||||||
rdsdebug("rs %p (%pI4:%u) recv bytes %d buf %d "
|
rdsdebug("rs %p (%pI6c:%u) recv bytes %d buf %d "
|
||||||
"now_cong %d delta %d\n",
|
"now_cong %d delta %d\n",
|
||||||
rs, &rs->rs_bound_addr,
|
rs, &rs->rs_bound_addr,
|
||||||
ntohs(rs->rs_bound_port), rs->rs_rcv_bytes,
|
ntohs(rs->rs_bound_port), rs->rs_rcv_bytes,
|
||||||
|
@ -260,7 +260,7 @@ static void rds_start_mprds(struct rds_connection *conn)
|
||||||
struct rds_conn_path *cp;
|
struct rds_conn_path *cp;
|
||||||
|
|
||||||
if (conn->c_npaths > 1 &&
|
if (conn->c_npaths > 1 &&
|
||||||
IS_CANONICAL(conn->c_laddr, conn->c_faddr)) {
|
rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) < 0) {
|
||||||
for (i = 0; i < conn->c_npaths; i++) {
|
for (i = 0; i < conn->c_npaths; i++) {
|
||||||
cp = &conn->c_path[i];
|
cp = &conn->c_path[i];
|
||||||
rds_conn_path_connect_if_down(cp);
|
rds_conn_path_connect_if_down(cp);
|
||||||
|
@ -284,7 +284,8 @@ static void rds_start_mprds(struct rds_connection *conn)
|
||||||
* conn. This lets loopback, who only has one conn for both directions,
|
* conn. This lets loopback, who only has one conn for both directions,
|
||||||
* tell us which roles the addrs in the conn are playing for this message.
|
* tell us which roles the addrs in the conn are playing for this message.
|
||||||
*/
|
*/
|
||||||
void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
|
void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr,
|
||||||
|
struct in6_addr *daddr,
|
||||||
struct rds_incoming *inc, gfp_t gfp)
|
struct rds_incoming *inc, gfp_t gfp)
|
||||||
{
|
{
|
||||||
struct rds_sock *rs = NULL;
|
struct rds_sock *rs = NULL;
|
||||||
|
@ -339,7 +340,8 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
|
||||||
|
|
||||||
if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) {
|
if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) {
|
||||||
if (inc->i_hdr.h_sport == 0) {
|
if (inc->i_hdr.h_sport == 0) {
|
||||||
rdsdebug("ignore ping with 0 sport from 0x%x\n", saddr);
|
rdsdebug("ignore ping with 0 sport from %pI6c\n",
|
||||||
|
saddr);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
rds_stats_inc(s_recv_ping);
|
rds_stats_inc(s_recv_ping);
|
||||||
|
@ -362,7 +364,7 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
|
rs = rds_find_bound(daddr, inc->i_hdr.h_dport, conn->c_dev_if);
|
||||||
if (!rs) {
|
if (!rs) {
|
||||||
rds_stats_inc(s_recv_drop_no_sock);
|
rds_stats_inc(s_recv_drop_no_sock);
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -625,6 +627,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
|
||||||
struct rds_sock *rs = rds_sk_to_rs(sk);
|
struct rds_sock *rs = rds_sk_to_rs(sk);
|
||||||
long timeo;
|
long timeo;
|
||||||
int ret = 0, nonblock = msg_flags & MSG_DONTWAIT;
|
int ret = 0, nonblock = msg_flags & MSG_DONTWAIT;
|
||||||
|
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
|
||||||
DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
|
DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
|
||||||
struct rds_incoming *inc = NULL;
|
struct rds_incoming *inc = NULL;
|
||||||
|
|
||||||
|
@ -673,7 +676,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
rdsdebug("copying inc %p from %pI4:%u to user\n", inc,
|
rdsdebug("copying inc %p from %pI6c:%u to user\n", inc,
|
||||||
&inc->i_conn->c_faddr,
|
&inc->i_conn->c_faddr,
|
||||||
ntohs(inc->i_hdr.h_sport));
|
ntohs(inc->i_hdr.h_sport));
|
||||||
ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &msg->msg_iter);
|
ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &msg->msg_iter);
|
||||||
|
@ -707,12 +710,26 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
|
||||||
|
|
||||||
rds_stats_inc(s_recv_delivered);
|
rds_stats_inc(s_recv_delivered);
|
||||||
|
|
||||||
if (sin) {
|
if (msg->msg_name) {
|
||||||
|
if (ipv6_addr_v4mapped(&inc->i_saddr)) {
|
||||||
|
sin = (struct sockaddr_in *)msg->msg_name;
|
||||||
|
|
||||||
sin->sin_family = AF_INET;
|
sin->sin_family = AF_INET;
|
||||||
sin->sin_port = inc->i_hdr.h_sport;
|
sin->sin_port = inc->i_hdr.h_sport;
|
||||||
sin->sin_addr.s_addr = inc->i_saddr;
|
sin->sin_addr.s_addr =
|
||||||
|
inc->i_saddr.s6_addr32[3];
|
||||||
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
|
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
|
||||||
msg->msg_namelen = sizeof(*sin);
|
msg->msg_namelen = sizeof(*sin);
|
||||||
|
} else {
|
||||||
|
sin6 = (struct sockaddr_in6 *)msg->msg_name;
|
||||||
|
|
||||||
|
sin6->sin6_family = AF_INET6;
|
||||||
|
sin6->sin6_port = inc->i_hdr.h_sport;
|
||||||
|
sin6->sin6_addr = inc->i_saddr;
|
||||||
|
sin6->sin6_flowinfo = 0;
|
||||||
|
sin6->sin6_scope_id = rs->rs_bound_scope_id;
|
||||||
|
msg->msg_namelen = sizeof(*sin6);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -709,7 +709,7 @@ void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(rds_send_drop_acked);
|
EXPORT_SYMBOL_GPL(rds_send_drop_acked);
|
||||||
|
|
||||||
void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
|
void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in6 *dest)
|
||||||
{
|
{
|
||||||
struct rds_message *rm, *tmp;
|
struct rds_message *rm, *tmp;
|
||||||
struct rds_connection *conn;
|
struct rds_connection *conn;
|
||||||
|
@ -721,8 +721,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
|
||||||
spin_lock_irqsave(&rs->rs_lock, flags);
|
spin_lock_irqsave(&rs->rs_lock, flags);
|
||||||
|
|
||||||
list_for_each_entry_safe(rm, tmp, &rs->rs_send_queue, m_sock_item) {
|
list_for_each_entry_safe(rm, tmp, &rs->rs_send_queue, m_sock_item) {
|
||||||
if (dest && (dest->sin_addr.s_addr != rm->m_daddr ||
|
if (dest &&
|
||||||
dest->sin_port != rm->m_inc.i_hdr.h_dport))
|
(!ipv6_addr_equal(&dest->sin6_addr, &rm->m_daddr) ||
|
||||||
|
dest->sin6_port != rm->m_inc.i_hdr.h_dport))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
list_move(&rm->m_sock_item, &list);
|
list_move(&rm->m_sock_item, &list);
|
||||||
|
@ -1059,8 +1060,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
|
||||||
{
|
{
|
||||||
struct sock *sk = sock->sk;
|
struct sock *sk = sock->sk;
|
||||||
struct rds_sock *rs = rds_sk_to_rs(sk);
|
struct rds_sock *rs = rds_sk_to_rs(sk);
|
||||||
|
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
|
||||||
DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
|
DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
|
||||||
__be32 daddr;
|
|
||||||
__be16 dport;
|
__be16 dport;
|
||||||
struct rds_message *rm = NULL;
|
struct rds_message *rm = NULL;
|
||||||
struct rds_connection *conn;
|
struct rds_connection *conn;
|
||||||
|
@ -1069,10 +1070,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
|
||||||
int nonblock = msg->msg_flags & MSG_DONTWAIT;
|
int nonblock = msg->msg_flags & MSG_DONTWAIT;
|
||||||
long timeo = sock_sndtimeo(sk, nonblock);
|
long timeo = sock_sndtimeo(sk, nonblock);
|
||||||
struct rds_conn_path *cpath;
|
struct rds_conn_path *cpath;
|
||||||
|
struct in6_addr daddr;
|
||||||
|
__u32 scope_id = 0;
|
||||||
size_t total_payload_len = payload_len, rdma_payload_len = 0;
|
size_t total_payload_len = payload_len, rdma_payload_len = 0;
|
||||||
bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) &&
|
bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) &&
|
||||||
sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY));
|
sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY));
|
||||||
int num_sgs = ceil(payload_len, PAGE_SIZE);
|
int num_sgs = ceil(payload_len, PAGE_SIZE);
|
||||||
|
int namelen;
|
||||||
|
|
||||||
/* Mirror Linux UDP mirror of BSD error message compatibility */
|
/* Mirror Linux UDP mirror of BSD error message compatibility */
|
||||||
/* XXX: Perhaps MSG_MORE someday */
|
/* XXX: Perhaps MSG_MORE someday */
|
||||||
|
@ -1081,27 +1085,59 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (msg->msg_namelen) {
|
namelen = msg->msg_namelen;
|
||||||
/* XXX fail non-unicast destination IPs? */
|
if (namelen != 0) {
|
||||||
if (msg->msg_namelen < sizeof(*usin) || usin->sin_family != AF_INET) {
|
if (namelen < sizeof(*usin)) {
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
daddr = usin->sin_addr.s_addr;
|
switch (namelen) {
|
||||||
|
case sizeof(*usin):
|
||||||
|
if (usin->sin_family != AF_INET ||
|
||||||
|
usin->sin_addr.s_addr == htonl(INADDR_ANY) ||
|
||||||
|
usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
|
||||||
|
IN_MULTICAST(ntohl(usin->sin_addr.s_addr))) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
ipv6_addr_set_v4mapped(usin->sin_addr.s_addr, &daddr);
|
||||||
dport = usin->sin_port;
|
dport = usin->sin_port;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case sizeof(*sin6): {
|
||||||
|
ret = -EPROTONOSUPPORT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
/* We only care about consistency with ->connect() */
|
/* We only care about consistency with ->connect() */
|
||||||
lock_sock(sk);
|
lock_sock(sk);
|
||||||
daddr = rs->rs_conn_addr;
|
daddr = rs->rs_conn_addr;
|
||||||
dport = rs->rs_conn_port;
|
dport = rs->rs_conn_port;
|
||||||
|
scope_id = rs->rs_bound_scope_id;
|
||||||
release_sock(sk);
|
release_sock(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
lock_sock(sk);
|
lock_sock(sk);
|
||||||
if (daddr == 0 || rs->rs_bound_addr == 0) {
|
if (ipv6_addr_any(&rs->rs_bound_addr) || ipv6_addr_any(&daddr)) {
|
||||||
release_sock(sk);
|
release_sock(sk);
|
||||||
ret = -ENOTCONN; /* XXX not a great errno */
|
ret = -ENOTCONN;
|
||||||
goto out;
|
goto out;
|
||||||
|
} else if (namelen != 0) {
|
||||||
|
/* Cannot send to an IPv4 address using an IPv6 source
|
||||||
|
* address and cannot send to an IPv6 address using an
|
||||||
|
* IPv4 source address.
|
||||||
|
*/
|
||||||
|
if (ipv6_addr_v4mapped(&daddr) ^
|
||||||
|
ipv6_addr_v4mapped(&rs->rs_bound_addr)) {
|
||||||
|
release_sock(sk);
|
||||||
|
ret = -EOPNOTSUPP;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
release_sock(sk);
|
release_sock(sk);
|
||||||
|
|
||||||
|
@ -1155,13 +1191,14 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
|
||||||
|
|
||||||
/* rds_conn_create has a spinlock that runs with IRQ off.
|
/* rds_conn_create has a spinlock that runs with IRQ off.
|
||||||
* Caching the conn in the socket helps a lot. */
|
* Caching the conn in the socket helps a lot. */
|
||||||
if (rs->rs_conn && rs->rs_conn->c_faddr == daddr)
|
if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr))
|
||||||
conn = rs->rs_conn;
|
conn = rs->rs_conn;
|
||||||
else {
|
else {
|
||||||
conn = rds_conn_create_outgoing(sock_net(sock->sk),
|
conn = rds_conn_create_outgoing(sock_net(sock->sk),
|
||||||
rs->rs_bound_addr, daddr,
|
&rs->rs_bound_addr, &daddr,
|
||||||
rs->rs_transport,
|
rs->rs_transport,
|
||||||
sock->sk->sk_allocation);
|
sock->sk->sk_allocation,
|
||||||
|
scope_id);
|
||||||
if (IS_ERR(conn)) {
|
if (IS_ERR(conn)) {
|
||||||
ret = PTR_ERR(conn);
|
ret = PTR_ERR(conn);
|
||||||
goto out;
|
goto out;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -37,6 +37,8 @@
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
#include <net/net_namespace.h>
|
#include <net/net_namespace.h>
|
||||||
#include <net/netns/generic.h>
|
#include <net/netns/generic.h>
|
||||||
|
#include <net/tcp.h>
|
||||||
|
#include <net/addrconf.h>
|
||||||
|
|
||||||
#include "rds.h"
|
#include "rds.h"
|
||||||
#include "tcp.h"
|
#include "tcp.h"
|
||||||
|
@ -262,9 +264,33 @@ out:
|
||||||
spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
|
spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int rds_tcp_laddr_check(struct net *net, __be32 addr)
|
static int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
|
||||||
|
__u32 scope_id)
|
||||||
{
|
{
|
||||||
if (inet_addr_type(net, addr) == RTN_LOCAL)
|
struct net_device *dev = NULL;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (ipv6_addr_v4mapped(addr)) {
|
||||||
|
if (inet_addr_type(net, addr->s6_addr32[3]) == RTN_LOCAL)
|
||||||
|
return 0;
|
||||||
|
return -EADDRNOTAVAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the scope_id is specified, check only those addresses
|
||||||
|
* hosted on the specified interface.
|
||||||
|
*/
|
||||||
|
if (scope_id != 0) {
|
||||||
|
rcu_read_lock();
|
||||||
|
dev = dev_get_by_index_rcu(net, scope_id);
|
||||||
|
/* scope_id is not valid... */
|
||||||
|
if (!dev) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
return -EADDRNOTAVAIL;
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
ret = ipv6_chk_addr(net, addr, dev, 0);
|
||||||
|
if (ret)
|
||||||
return 0;
|
return 0;
|
||||||
return -EADDRNOTAVAIL;
|
return -EADDRNOTAVAIL;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -66,7 +66,8 @@ void rds_tcp_state_change(struct sock *sk)
|
||||||
* RDS connection as RDS_CONN_UP until the reconnect,
|
* RDS connection as RDS_CONN_UP until the reconnect,
|
||||||
* to avoid RDS datagram loss.
|
* to avoid RDS datagram loss.
|
||||||
*/
|
*/
|
||||||
if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) &&
|
if (rds_addr_cmp(&cp->cp_conn->c_laddr,
|
||||||
|
&cp->cp_conn->c_faddr) >= 0 &&
|
||||||
rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
|
rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
|
||||||
RDS_CONN_ERROR)) {
|
RDS_CONN_ERROR)) {
|
||||||
rds_conn_path_drop(cp, false);
|
rds_conn_path_drop(cp, false);
|
||||||
|
@ -88,7 +89,9 @@ out:
|
||||||
int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
|
int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
|
||||||
{
|
{
|
||||||
struct socket *sock = NULL;
|
struct socket *sock = NULL;
|
||||||
struct sockaddr_in src, dest;
|
struct sockaddr_in sin;
|
||||||
|
struct sockaddr *addr;
|
||||||
|
int addrlen;
|
||||||
int ret;
|
int ret;
|
||||||
struct rds_connection *conn = cp->cp_conn;
|
struct rds_connection *conn = cp->cp_conn;
|
||||||
struct rds_tcp_connection *tc = cp->cp_transport_data;
|
struct rds_tcp_connection *tc = cp->cp_transport_data;
|
||||||
|
@ -112,30 +115,33 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
|
||||||
|
|
||||||
rds_tcp_tune(sock);
|
rds_tcp_tune(sock);
|
||||||
|
|
||||||
src.sin_family = AF_INET;
|
sin.sin_family = AF_INET;
|
||||||
src.sin_addr.s_addr = (__force u32)conn->c_laddr;
|
sin.sin_addr.s_addr = conn->c_laddr.s6_addr32[3];
|
||||||
src.sin_port = (__force u16)htons(0);
|
sin.sin_port = 0;
|
||||||
|
addr = (struct sockaddr *)&sin;
|
||||||
|
addrlen = sizeof(sin);
|
||||||
|
|
||||||
ret = sock->ops->bind(sock, (struct sockaddr *)&src, sizeof(src));
|
ret = sock->ops->bind(sock, addr, addrlen);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
rdsdebug("bind failed with %d at address %pI4\n",
|
rdsdebug("bind failed with %d at address %pI6c\n",
|
||||||
ret, &conn->c_laddr);
|
ret, &conn->c_laddr);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
dest.sin_family = AF_INET;
|
sin.sin_family = AF_INET;
|
||||||
dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
|
sin.sin_addr.s_addr = conn->c_faddr.s6_addr32[3];
|
||||||
dest.sin_port = (__force u16)htons(RDS_TCP_PORT);
|
sin.sin_port = htons(RDS_TCP_PORT);
|
||||||
|
addr = (struct sockaddr *)&sin;
|
||||||
|
addrlen = sizeof(sin);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* once we call connect() we can start getting callbacks and they
|
* once we call connect() we can start getting callbacks and they
|
||||||
* own the socket
|
* own the socket
|
||||||
*/
|
*/
|
||||||
rds_tcp_set_callbacks(sock, cp);
|
rds_tcp_set_callbacks(sock, cp);
|
||||||
ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest),
|
ret = sock->ops->connect(sock, addr, addrlen, O_NONBLOCK);
|
||||||
O_NONBLOCK);
|
|
||||||
|
|
||||||
rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret);
|
rdsdebug("connect to address %pI6c returned %d\n", &conn->c_faddr, ret);
|
||||||
if (ret == -EINPROGRESS)
|
if (ret == -EINPROGRESS)
|
||||||
ret = 0;
|
ret = 0;
|
||||||
if (ret == 0) {
|
if (ret == 0) {
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006, 2018 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -83,13 +83,12 @@ static
|
||||||
struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
|
struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
bool peer_is_smaller = IS_CANONICAL(conn->c_faddr, conn->c_laddr);
|
|
||||||
int npaths = max_t(int, 1, conn->c_npaths);
|
int npaths = max_t(int, 1, conn->c_npaths);
|
||||||
|
|
||||||
/* for mprds, all paths MUST be initiated by the peer
|
/* for mprds, all paths MUST be initiated by the peer
|
||||||
* with the smaller address.
|
* with the smaller address.
|
||||||
*/
|
*/
|
||||||
if (!peer_is_smaller) {
|
if (rds_addr_cmp(&conn->c_faddr, &conn->c_laddr) >= 0) {
|
||||||
/* Make sure we initiate at least one path if this
|
/* Make sure we initiate at least one path if this
|
||||||
* has not already been done; rds_start_mprds() will
|
* has not already been done; rds_start_mprds() will
|
||||||
* take care of additional paths, if necessary.
|
* take care of additional paths, if necessary.
|
||||||
|
@ -164,13 +163,16 @@ int rds_tcp_accept_one(struct socket *sock)
|
||||||
|
|
||||||
inet = inet_sk(new_sock->sk);
|
inet = inet_sk(new_sock->sk);
|
||||||
|
|
||||||
rdsdebug("accepted tcp %pI4:%u -> %pI4:%u\n",
|
rdsdebug("accepted tcp %pI6c:%u -> %pI6c:%u\n",
|
||||||
&inet->inet_saddr, ntohs(inet->inet_sport),
|
&new_sock->sk->sk_v6_rcv_saddr, ntohs(inet->inet_sport),
|
||||||
&inet->inet_daddr, ntohs(inet->inet_dport));
|
&new_sock->sk->sk_v6_daddr, ntohs(inet->inet_dport));
|
||||||
|
|
||||||
conn = rds_conn_create(sock_net(sock->sk),
|
conn = rds_conn_create(sock_net(sock->sk),
|
||||||
inet->inet_saddr, inet->inet_daddr,
|
&new_sock->sk->sk_v6_rcv_saddr,
|
||||||
&rds_tcp_transport, GFP_KERNEL);
|
&new_sock->sk->sk_v6_daddr,
|
||||||
|
&rds_tcp_transport, GFP_KERNEL,
|
||||||
|
new_sock->sk->sk_bound_dev_if);
|
||||||
|
|
||||||
if (IS_ERR(conn)) {
|
if (IS_ERR(conn)) {
|
||||||
ret = PTR_ERR(conn);
|
ret = PTR_ERR(conn);
|
||||||
goto out;
|
goto out;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -179,7 +179,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
|
||||||
tc->t_tinc = tinc;
|
tc->t_tinc = tinc;
|
||||||
rdsdebug("alloced tinc %p\n", tinc);
|
rdsdebug("alloced tinc %p\n", tinc);
|
||||||
rds_inc_path_init(&tinc->ti_inc, cp,
|
rds_inc_path_init(&tinc->ti_inc, cp,
|
||||||
cp->cp_conn->c_faddr);
|
&cp->cp_conn->c_faddr);
|
||||||
tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
|
tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
|
||||||
local_clock();
|
local_clock();
|
||||||
|
|
||||||
|
@ -239,8 +239,9 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
|
||||||
if (tinc->ti_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
|
if (tinc->ti_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
|
||||||
rds_tcp_cong_recv(conn, tinc);
|
rds_tcp_cong_recv(conn, tinc);
|
||||||
else
|
else
|
||||||
rds_recv_incoming(conn, conn->c_faddr,
|
rds_recv_incoming(conn, &conn->c_faddr,
|
||||||
conn->c_laddr, &tinc->ti_inc,
|
&conn->c_laddr,
|
||||||
|
&tinc->ti_inc,
|
||||||
arg->gfp);
|
arg->gfp);
|
||||||
|
|
||||||
tc->t_tinc_hdr_rem = sizeof(struct rds_header);
|
tc->t_tinc_hdr_rem = sizeof(struct rds_header);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -153,7 +153,7 @@ out:
|
||||||
* an incoming RST.
|
* an incoming RST.
|
||||||
*/
|
*/
|
||||||
if (rds_conn_path_up(cp)) {
|
if (rds_conn_path_up(cp)) {
|
||||||
pr_warn("RDS/tcp: send to %pI4 on cp [%d]"
|
pr_warn("RDS/tcp: send to %pI6c on cp [%d]"
|
||||||
"returned %d, "
|
"returned %d, "
|
||||||
"disconnecting and reconnecting\n",
|
"disconnecting and reconnecting\n",
|
||||||
&conn->c_faddr, cp->cp_index, ret);
|
&conn->c_faddr, cp->cp_index, ret);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -82,7 +82,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
rdsdebug("conn %p for %pI4 to %pI4 complete\n",
|
rdsdebug("conn %p for %pI6c to %pI6c complete\n",
|
||||||
cp->cp_conn, &cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr);
|
cp->cp_conn, &cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr);
|
||||||
|
|
||||||
cp->cp_reconnect_jiffies = 0;
|
cp->cp_reconnect_jiffies = 0;
|
||||||
|
@ -125,13 +125,13 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
|
||||||
unsigned long rand;
|
unsigned long rand;
|
||||||
struct rds_connection *conn = cp->cp_conn;
|
struct rds_connection *conn = cp->cp_conn;
|
||||||
|
|
||||||
rdsdebug("conn %p for %pI4 to %pI4 reconnect jiffies %lu\n",
|
rdsdebug("conn %p for %pI6c to %pI6c reconnect jiffies %lu\n",
|
||||||
conn, &conn->c_laddr, &conn->c_faddr,
|
conn, &conn->c_laddr, &conn->c_faddr,
|
||||||
cp->cp_reconnect_jiffies);
|
cp->cp_reconnect_jiffies);
|
||||||
|
|
||||||
/* let peer with smaller addr initiate reconnect, to avoid duels */
|
/* let peer with smaller addr initiate reconnect, to avoid duels */
|
||||||
if (conn->c_trans->t_type == RDS_TRANS_TCP &&
|
if (conn->c_trans->t_type == RDS_TRANS_TCP &&
|
||||||
!IS_CANONICAL(conn->c_laddr, conn->c_faddr))
|
rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) >= 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
|
set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
|
||||||
|
@ -145,7 +145,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
|
||||||
}
|
}
|
||||||
|
|
||||||
get_random_bytes(&rand, sizeof(rand));
|
get_random_bytes(&rand, sizeof(rand));
|
||||||
rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n",
|
rdsdebug("%lu delay %lu ceil conn %p for %pI6c -> %pI6c\n",
|
||||||
rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
|
rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
|
||||||
conn, &conn->c_laddr, &conn->c_faddr);
|
conn, &conn->c_laddr, &conn->c_faddr);
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
@ -167,13 +167,13 @@ void rds_connect_worker(struct work_struct *work)
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (cp->cp_index > 0 &&
|
if (cp->cp_index > 0 &&
|
||||||
!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr))
|
rds_addr_cmp(&cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr) >= 0)
|
||||||
return;
|
return;
|
||||||
clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
|
clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
|
||||||
ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
|
ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
ret = conn->c_trans->conn_path_connect(cp);
|
ret = conn->c_trans->conn_path_connect(cp);
|
||||||
rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n",
|
rdsdebug("conn %p for %pI6c to %pI6c dispatched, ret %d\n",
|
||||||
conn, &conn->c_laddr, &conn->c_faddr, ret);
|
conn, &conn->c_laddr, &conn->c_faddr, ret);
|
||||||
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
@ -259,3 +259,50 @@ int rds_threads_init(void)
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Compare two IPv6 addresses. Return 0 if the two addresses are equal.
|
||||||
|
* Return 1 if the first is greater. Return -1 if the second is greater.
|
||||||
|
*/
|
||||||
|
int rds_addr_cmp(const struct in6_addr *addr1,
|
||||||
|
const struct in6_addr *addr2)
|
||||||
|
{
|
||||||
|
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
|
||||||
|
const __be64 *a1, *a2;
|
||||||
|
u64 x, y;
|
||||||
|
|
||||||
|
a1 = (__be64 *)addr1;
|
||||||
|
a2 = (__be64 *)addr2;
|
||||||
|
|
||||||
|
if (*a1 != *a2) {
|
||||||
|
if (be64_to_cpu(*a1) < be64_to_cpu(*a2))
|
||||||
|
return -1;
|
||||||
|
else
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
x = be64_to_cpu(*++a1);
|
||||||
|
y = be64_to_cpu(*++a2);
|
||||||
|
if (x < y)
|
||||||
|
return -1;
|
||||||
|
else if (x > y)
|
||||||
|
return 1;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
u32 a, b;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < 4; i++) {
|
||||||
|
if (addr1->s6_addr32[i] != addr2->s6_addr32[i]) {
|
||||||
|
a = ntohl(addr1->s6_addr32[i]);
|
||||||
|
b = ntohl(addr2->s6_addr32[i]);
|
||||||
|
if (a < b)
|
||||||
|
return -1;
|
||||||
|
else if (a > b)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(rds_addr_cmp);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006 Oracle. All rights reserved.
|
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is available to you under a choice of one of two
|
* This software is available to you under a choice of one of two
|
||||||
* licenses. You may choose to be licensed under the terms of the GNU
|
* licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
@ -33,6 +33,7 @@
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/in.h>
|
#include <linux/in.h>
|
||||||
|
#include <linux/ipv6.h>
|
||||||
|
|
||||||
#include "rds.h"
|
#include "rds.h"
|
||||||
#include "loop.h"
|
#include "loop.h"
|
||||||
|
@ -75,20 +76,26 @@ void rds_trans_put(struct rds_transport *trans)
|
||||||
module_put(trans->t_owner);
|
module_put(trans->t_owner);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr)
|
struct rds_transport *rds_trans_get_preferred(struct net *net,
|
||||||
|
const struct in6_addr *addr,
|
||||||
|
__u32 scope_id)
|
||||||
{
|
{
|
||||||
struct rds_transport *ret = NULL;
|
struct rds_transport *ret = NULL;
|
||||||
struct rds_transport *trans;
|
struct rds_transport *trans;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
||||||
if (IN_LOOPBACK(ntohl(addr)))
|
if (ipv6_addr_v4mapped(addr)) {
|
||||||
|
if (*(u_int8_t *)&addr->s6_addr32[3] == IN_LOOPBACKNET)
|
||||||
return &rds_loop_transport;
|
return &rds_loop_transport;
|
||||||
|
} else if (ipv6_addr_loopback(addr)) {
|
||||||
|
return &rds_loop_transport;
|
||||||
|
}
|
||||||
|
|
||||||
down_read(&rds_trans_sem);
|
down_read(&rds_trans_sem);
|
||||||
for (i = 0; i < RDS_TRANS_COUNT; i++) {
|
for (i = 0; i < RDS_TRANS_COUNT; i++) {
|
||||||
trans = transports[i];
|
trans = transports[i];
|
||||||
|
|
||||||
if (trans && (trans->laddr_check(net, addr) == 0) &&
|
if (trans && (trans->laddr_check(net, addr, scope_id) == 0) &&
|
||||||
(!trans->t_owner || try_module_get(trans->t_owner))) {
|
(!trans->t_owner || try_module_get(trans->t_owner))) {
|
||||||
ret = trans;
|
ret = trans;
|
||||||
break;
|
break;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче