Merge branch 'sctp-fully-support-for-dscp-and-flowlabel-per-transport'

Xin Long says:

====================
sctp: fully support for dscp and flowlabel per transport

Now dscp and flowlabel are set from sock when sending the packets,
but being multi-homing, sctp also supports for dscp and flowlabel
per transport, which is described in section 8.1.12 in RFC6458.

v1->v2:
  - define ip_queue_xmit as inline in net/ip.h, instead of exporting
    it in Patch 1/5 according to David's suggestion.
  - fix the param len check in sctp_s/getsockopt_peer_addr_params()
    in Patch 3/5 to guarantee that an old app built with old kernel
    headers could work on the newer kernel per Marcelo's point.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-07-04 11:36:55 +09:00
Родитель 05bd97fc55 0999f021c9
Коммит 2bdea157b9
9 изменённых файлов: 254 добавлений и 17 удалений

Просмотреть файл

@ -801,4 +801,11 @@ struct sctp_strreset_resptsn {
__be32 receivers_next_tsn;
};
enum {
SCTP_DSCP_SET_MASK = 0x1,
SCTP_DSCP_VAL_MASK = 0xfc,
SCTP_FLOWLABEL_SET_MASK = 0x100000,
SCTP_FLOWLABEL_VAL_MASK = 0xfffff
};
#endif /* __LINUX_SCTP_H__ */

Просмотреть файл

@ -148,7 +148,8 @@ void ip_send_check(struct iphdr *ip);
int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
__u8 tos);
void ip_init(void);
int ip_append_data(struct sock *sk, struct flowi4 *fl4,
int getfrag(void *from, char *to, int offset, int len,
@ -174,6 +175,12 @@ struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4,
struct ipcm_cookie *ipc, struct rtable **rtp,
struct inet_cork *cork, unsigned int flags);
static inline int ip_queue_xmit(struct sock *sk, struct sk_buff *skb,
struct flowi *fl)
{
return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos);
}
static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4)
{
return __ip_make_skb(sk, fl4, &sk->sk_write_queue, &inet_sk(sk)->cork.base);

Просмотреть файл

@ -193,6 +193,9 @@ struct sctp_sock {
/* This is the max_retrans value for new associations. */
__u16 pathmaxrxt;
__u32 flowlabel;
__u8 dscp;
/* The initial Path MTU to use for new associations. */
__u32 pathmtu;
@ -895,6 +898,9 @@ struct sctp_transport {
*/
__u16 pathmaxrxt;
__u32 flowlabel;
__u8 dscp;
/* This is the partially failed retrans value for the transport
* and will be initialized from the assocs value. This can be changed
* using the SCTP_PEER_ADDR_THLDS socket option
@ -1772,6 +1778,9 @@ struct sctp_association {
*/
__u16 pathmaxrxt;
__u32 flowlabel;
__u8 dscp;
/* Flag that path mtu update is pending */
__u8 pmtu_pending;

Просмотреть файл

@ -763,6 +763,8 @@ enum sctp_spp_flags {
SPP_SACKDELAY_DISABLE = 1<<6, /*Disable SACK*/
SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE,
SPP_HB_TIME_IS_ZERO = 1<<7, /* Set HB delay to 0 */
SPP_IPV6_FLOWLABEL = 1<<8,
SPP_DSCP = 1<<9,
};
struct sctp_paddrparams {
@ -773,6 +775,8 @@ struct sctp_paddrparams {
__u32 spp_pathmtu;
__u32 spp_sackdelay;
__u32 spp_flags;
__u32 spp_ipv6_flowlabel;
__u8 spp_dscp;
} __attribute__((packed, aligned(4)));
/*

Просмотреть файл

@ -423,7 +423,8 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
}
/* Note: skb->sk can be different from sk, in case of tunnels */
int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
__u8 tos)
{
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
@ -462,7 +463,7 @@ int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
inet->inet_dport,
inet->inet_sport,
sk->sk_protocol,
RT_CONN_FLAGS(sk),
RT_CONN_FLAGS_TOS(sk, tos),
sk->sk_bound_dev_if);
if (IS_ERR(rt))
goto no_route;
@ -478,7 +479,7 @@ packet_routed:
skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
skb_reset_network_header(skb);
iph = ip_hdr(skb);
*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (tos & 0xff));
if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df)
iph->frag_off = htons(IP_DF);
else
@ -511,7 +512,7 @@ no_route:
kfree_skb(skb);
return -EHOSTUNREACH;
}
EXPORT_SYMBOL(ip_queue_xmit);
EXPORT_SYMBOL(__ip_queue_xmit);
static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
{

Просмотреть файл

@ -115,6 +115,9 @@ static struct sctp_association *sctp_association_init(
/* Initialize path max retrans value. */
asoc->pathmaxrxt = sp->pathmaxrxt;
asoc->flowlabel = sp->flowlabel;
asoc->dscp = sp->dscp;
/* Initialize default path MTU. */
asoc->pathmtu = sp->pathmtu;
@ -647,6 +650,18 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
peer->sackdelay = asoc->sackdelay;
peer->sackfreq = asoc->sackfreq;
if (addr->sa.sa_family == AF_INET6) {
__be32 info = addr->v6.sin6_flowinfo;
if (info) {
peer->flowlabel = ntohl(info & IPV6_FLOWLABEL_MASK);
peer->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
} else {
peer->flowlabel = asoc->flowlabel;
}
}
peer->dscp = asoc->dscp;
/* Enable/disable heartbeat, SACK delay, and path MTU discovery
* based on association setting.
*/

Просмотреть файл

@ -209,12 +209,17 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
struct sock *sk = skb->sk;
struct ipv6_pinfo *np = inet6_sk(sk);
struct flowi6 *fl6 = &transport->fl.u.ip6;
__u8 tclass = np->tclass;
int res;
pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb,
skb->len, &fl6->saddr, &fl6->daddr);
IP6_ECN_flow_xmit(sk, fl6->flowlabel);
if (transport->dscp & SCTP_DSCP_SET_MASK)
tclass = transport->dscp & SCTP_DSCP_VAL_MASK;
if (INET_ECN_is_capable(tclass))
IP6_ECN_flow_xmit(sk, fl6->flowlabel);
if (!(transport->param_flags & SPP_PMTUD_ENABLE))
skb->ignore_df = 1;
@ -223,7 +228,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
rcu_read_lock();
res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt),
np->tclass);
tclass);
rcu_read_unlock();
return res;
}
@ -254,6 +259,17 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
fl6->flowi6_oif = daddr->v6.sin6_scope_id;
else if (asoc)
fl6->flowi6_oif = asoc->base.sk->sk_bound_dev_if;
if (t->flowlabel & SCTP_FLOWLABEL_SET_MASK)
fl6->flowlabel = htonl(t->flowlabel & SCTP_FLOWLABEL_VAL_MASK);
if (np->sndflow && (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) {
struct ip6_flowlabel *flowlabel;
flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
if (!flowlabel)
goto out;
fl6_sock_release(flowlabel);
}
pr_debug("%s: dst=%pI6 ", __func__, &fl6->daddr);

Просмотреть файл

@ -426,13 +426,16 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
struct dst_entry *dst = NULL;
union sctp_addr *daddr = &t->ipaddr;
union sctp_addr dst_saddr;
__u8 tos = inet_sk(sk)->tos;
if (t->dscp & SCTP_DSCP_SET_MASK)
tos = t->dscp & SCTP_DSCP_VAL_MASK;
memset(fl4, 0x0, sizeof(struct flowi4));
fl4->daddr = daddr->v4.sin_addr.s_addr;
fl4->fl4_dport = daddr->v4.sin_port;
fl4->flowi4_proto = IPPROTO_SCTP;
if (asoc) {
fl4->flowi4_tos = RT_CONN_FLAGS(asoc->base.sk);
fl4->flowi4_tos = RT_CONN_FLAGS_TOS(asoc->base.sk, tos);
fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
fl4->fl4_sport = htons(asoc->base.bind_addr.port);
}
@ -495,7 +498,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
fl4->fl4_sport = laddr->a.v4.sin_port;
flowi4_update_output(fl4,
asoc->base.sk->sk_bound_dev_if,
RT_CONN_FLAGS(asoc->base.sk),
RT_CONN_FLAGS_TOS(asoc->base.sk, tos),
daddr->v4.sin_addr.s_addr,
laddr->a.v4.sin_addr.s_addr);
@ -971,16 +974,21 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
struct sctp_transport *transport)
{
struct inet_sock *inet = inet_sk(skb->sk);
__u8 dscp = inet->tos;
pr_debug("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb,
skb->len, &transport->fl.u.ip4.saddr, &transport->fl.u.ip4.daddr);
skb->len, &transport->fl.u.ip4.saddr,
&transport->fl.u.ip4.daddr);
if (transport->dscp & SCTP_DSCP_SET_MASK)
dscp = transport->dscp & SCTP_DSCP_VAL_MASK;
inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ?
IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS);
return ip_queue_xmit(&inet->sk, skb, &transport->fl);
return __ip_queue_xmit(&inet->sk, skb, &transport->fl, dscp);
}
static struct sctp_af sctp_af_inet;

Просмотреть файл

@ -1697,6 +1697,7 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
struct sctp_association *asoc;
enum sctp_scope scope;
struct cmsghdr *cmsg;
__be32 flowinfo = 0;
struct sctp_af *af;
int err;
@ -1781,6 +1782,9 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
if (!cmsgs->addrs_msg)
return 0;
if (daddr->sa.sa_family == AF_INET6)
flowinfo = daddr->v6.sin6_flowinfo;
/* sendv addr list parse */
for_each_cmsghdr(cmsg, cmsgs->addrs_msg) {
struct sctp_transport *transport;
@ -1813,6 +1817,7 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
}
dlen = sizeof(struct in6_addr);
daddr->v6.sin6_flowinfo = flowinfo;
daddr->v6.sin6_family = AF_INET6;
daddr->v6.sin6_port = htons(asoc->peer.port);
memcpy(&daddr->v6.sin6_addr, CMSG_DATA(cmsg), dlen);
@ -2393,6 +2398,8 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
* uint32_t spp_pathmtu;
* uint32_t spp_sackdelay;
* uint32_t spp_flags;
* uint32_t spp_ipv6_flowlabel;
* uint8_t spp_dscp;
* };
*
* spp_assoc_id - (one-to-many style socket) This is filled in the
@ -2472,6 +2479,45 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
* also that this field is mutually exclusive to
* SPP_SACKDELAY_ENABLE, setting both will have undefined
* results.
*
* SPP_IPV6_FLOWLABEL: Setting this flag enables the
* setting of the IPV6 flow label value. The value is
* contained in the spp_ipv6_flowlabel field.
* Upon retrieval, this flag will be set to indicate that
* the spp_ipv6_flowlabel field has a valid value returned.
* If a specific destination address is set (in the
* spp_address field), then the value returned is that of
* the address. If just an association is specified (and
* no address), then the association's default flow label
* is returned. If neither an association nor a destination
* is specified, then the socket's default flow label is
* returned. For non-IPv6 sockets, this flag will be left
* cleared.
*
* SPP_DSCP: Setting this flag enables the setting of the
* Differentiated Services Code Point (DSCP) value
* associated with either the association or a specific
* address. The value is obtained in the spp_dscp field.
* Upon retrieval, this flag will be set to indicate that
* the spp_dscp field has a valid value returned. If a
* specific destination address is set when called (in the
* spp_address field), then that specific destination
* address's DSCP value is returned. If just an association
* is specified, then the association's default DSCP is
* returned. If neither an association nor a destination is
* specified, then the socket's default DSCP is returned.
*
* spp_ipv6_flowlabel
* - This field is used in conjunction with the
* SPP_IPV6_FLOWLABEL flag and contains the IPv6 flow label.
* The 20 least significant bits are used for the flow
* label. This setting has precedence over any IPv6-layer
* setting.
*
* spp_dscp - This field is used in conjunction with the SPP_DSCP flag
* and contains the DSCP. The 6 most significant bits are
* used for the DSCP. This setting has precedence over any
* IPv4- or IPv6- layer setting.
*/
static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
struct sctp_transport *trans,
@ -2611,6 +2657,51 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
}
}
if (params->spp_flags & SPP_IPV6_FLOWLABEL) {
if (trans && trans->ipaddr.sa.sa_family == AF_INET6) {
trans->flowlabel = params->spp_ipv6_flowlabel &
SCTP_FLOWLABEL_VAL_MASK;
trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
} else if (asoc) {
list_for_each_entry(trans,
&asoc->peer.transport_addr_list,
transports) {
if (trans->ipaddr.sa.sa_family != AF_INET6)
continue;
trans->flowlabel = params->spp_ipv6_flowlabel &
SCTP_FLOWLABEL_VAL_MASK;
trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
}
asoc->flowlabel = params->spp_ipv6_flowlabel &
SCTP_FLOWLABEL_VAL_MASK;
asoc->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
} else if (sctp_opt2sk(sp)->sk_family == AF_INET6) {
sp->flowlabel = params->spp_ipv6_flowlabel &
SCTP_FLOWLABEL_VAL_MASK;
sp->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
}
}
if (params->spp_flags & SPP_DSCP) {
if (trans) {
trans->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK;
trans->dscp |= SCTP_DSCP_SET_MASK;
} else if (asoc) {
list_for_each_entry(trans,
&asoc->peer.transport_addr_list,
transports) {
trans->dscp = params->spp_dscp &
SCTP_DSCP_VAL_MASK;
trans->dscp |= SCTP_DSCP_SET_MASK;
}
asoc->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK;
asoc->dscp |= SCTP_DSCP_SET_MASK;
} else {
sp->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK;
sp->dscp |= SCTP_DSCP_SET_MASK;
}
}
return 0;
}
@ -2625,11 +2716,18 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
int error;
int hb_change, pmtud_change, sackdelay_change;
if (optlen != sizeof(struct sctp_paddrparams))
if (optlen == sizeof(params)) {
if (copy_from_user(&params, optval, optlen))
return -EFAULT;
} else if (optlen == ALIGN(offsetof(struct sctp_paddrparams,
spp_ipv6_flowlabel), 4)) {
if (copy_from_user(&params, optval, optlen))
return -EFAULT;
if (params.spp_flags & (SPP_DSCP | SPP_IPV6_FLOWLABEL))
return -EINVAL;
} else {
return -EINVAL;
if (copy_from_user(&params, optval, optlen))
return -EFAULT;
}
/* Validate flags and value parameters. */
hb_change = params.spp_flags & SPP_HB;
@ -5453,6 +5551,45 @@ out:
* also that this field is mutually exclusive to
* SPP_SACKDELAY_ENABLE, setting both will have undefined
* results.
*
* SPP_IPV6_FLOWLABEL: Setting this flag enables the
* setting of the IPV6 flow label value. The value is
* contained in the spp_ipv6_flowlabel field.
* Upon retrieval, this flag will be set to indicate that
* the spp_ipv6_flowlabel field has a valid value returned.
* If a specific destination address is set (in the
* spp_address field), then the value returned is that of
* the address. If just an association is specified (and
* no address), then the association's default flow label
* is returned. If neither an association nor a destination
* is specified, then the socket's default flow label is
* returned. For non-IPv6 sockets, this flag will be left
* cleared.
*
* SPP_DSCP: Setting this flag enables the setting of the
* Differentiated Services Code Point (DSCP) value
* associated with either the association or a specific
* address. The value is obtained in the spp_dscp field.
* Upon retrieval, this flag will be set to indicate that
* the spp_dscp field has a valid value returned. If a
* specific destination address is set when called (in the
* spp_address field), then that specific destination
* address's DSCP value is returned. If just an association
* is specified, then the association's default DSCP is
* returned. If neither an association nor a destination is
* specified, then the socket's default DSCP is returned.
*
* spp_ipv6_flowlabel
* - This field is used in conjunction with the
* SPP_IPV6_FLOWLABEL flag and contains the IPv6 flow label.
* The 20 least significant bits are used for the flow
* label. This setting has precedence over any IPv6-layer
* setting.
*
* spp_dscp - This field is used in conjunction with the SPP_DSCP flag
* and contains the DSCP. The 6 most significant bits are
* used for the DSCP. This setting has precedence over any
* IPv4- or IPv6- layer setting.
*/
static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
char __user *optval, int __user *optlen)
@ -5462,9 +5599,15 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
struct sctp_association *asoc = NULL;
struct sctp_sock *sp = sctp_sk(sk);
if (len < sizeof(struct sctp_paddrparams))
if (len >= sizeof(params))
len = sizeof(params);
else if (len >= ALIGN(offsetof(struct sctp_paddrparams,
spp_ipv6_flowlabel), 4))
len = ALIGN(offsetof(struct sctp_paddrparams,
spp_ipv6_flowlabel), 4);
else
return -EINVAL;
len = sizeof(struct sctp_paddrparams);
if (copy_from_user(&params, optval, len))
return -EFAULT;
@ -5499,6 +5642,15 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
/*draft-11 doesn't say what to return in spp_flags*/
params.spp_flags = trans->param_flags;
if (trans->flowlabel & SCTP_FLOWLABEL_SET_MASK) {
params.spp_ipv6_flowlabel = trans->flowlabel &
SCTP_FLOWLABEL_VAL_MASK;
params.spp_flags |= SPP_IPV6_FLOWLABEL;
}
if (trans->dscp & SCTP_DSCP_SET_MASK) {
params.spp_dscp = trans->dscp & SCTP_DSCP_VAL_MASK;
params.spp_flags |= SPP_DSCP;
}
} else if (asoc) {
/* Fetch association values. */
params.spp_hbinterval = jiffies_to_msecs(asoc->hbinterval);
@ -5508,6 +5660,15 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
/*draft-11 doesn't say what to return in spp_flags*/
params.spp_flags = asoc->param_flags;
if (asoc->flowlabel & SCTP_FLOWLABEL_SET_MASK) {
params.spp_ipv6_flowlabel = asoc->flowlabel &
SCTP_FLOWLABEL_VAL_MASK;
params.spp_flags |= SPP_IPV6_FLOWLABEL;
}
if (asoc->dscp & SCTP_DSCP_SET_MASK) {
params.spp_dscp = asoc->dscp & SCTP_DSCP_VAL_MASK;
params.spp_flags |= SPP_DSCP;
}
} else {
/* Fetch socket values. */
params.spp_hbinterval = sp->hbinterval;
@ -5517,6 +5678,15 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
/*draft-11 doesn't say what to return in spp_flags*/
params.spp_flags = sp->param_flags;
if (sp->flowlabel & SCTP_FLOWLABEL_SET_MASK) {
params.spp_ipv6_flowlabel = sp->flowlabel &
SCTP_FLOWLABEL_VAL_MASK;
params.spp_flags |= SPP_IPV6_FLOWLABEL;
}
if (sp->dscp & SCTP_DSCP_SET_MASK) {
params.spp_dscp = sp->dscp & SCTP_DSCP_VAL_MASK;
params.spp_flags |= SPP_DSCP;
}
}
if (copy_to_user(optval, &params, len))