SCTP has this pecualiarity that its packets cannot be just segmented to
(P)MTU. Its chunks must be contained in IP segments, padding respected.
So we can't just generate a big skb, set gso_size to the fragmentation
point and deliver it to IP layer.

This patch takes a different approach. SCTP will now build a skb as it
would be if it was received using GRO. That is, there will be a cover
skb with protocol headers and children ones containing the actual
segments, already segmented to a way that respects SCTP RFCs.

With that, we can tell skb_segment() to just split based on frag_list,
trusting its sizes are already in accordance.

This way SCTP can benefit from GSO and instead of passing several
packets through the stack, it can pass a single large packet.

v2:
- Added support for receiving GSO frames, as requested by Dave Miller.
- Clear skb->cb if packet is GSO (otherwise it's not used by SCTP)
- Added heuristics similar to what we have in TCP for not generating
  single GSO packets that fills cwnd.
v3:
- consider sctphdr size in skb_gso_transport_seglen()
- rebased due to 5c7cdf339a ("gso: Remove arbitrary checks for
  unsupported GSO")

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Tested-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Marcelo Ricardo Leitner 2016-06-02 15:05:43 -03:00 коммит произвёл David S. Miller
Родитель 3acb50c18d
Коммит 90017accff
14 изменённых файлов: 428 добавлений и 125 удалений

Просмотреть файл

@ -53,8 +53,9 @@ enum {
* headers in software.
*/
NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */
NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */
/**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */
NETIF_F_GSO_TUNNEL_REMCSUM_BIT,
NETIF_F_GSO_SCTP_BIT,
NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */
NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */
@ -128,6 +129,7 @@ enum {
#define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID)
#define NETIF_F_GSO_PARTIAL __NETIF_F(GSO_PARTIAL)
#define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM)
#define NETIF_F_GSO_SCTP __NETIF_F(GSO_SCTP)
#define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
#define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX)
#define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX)
@ -166,7 +168,8 @@ enum {
NETIF_F_FSO)
/* List of features with software fallbacks. */
#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO)
#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO | \
NETIF_F_GSO_SCTP)
/*
* If one device supports one of these features, then enable them

Просмотреть файл

@ -4012,6 +4012,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT));
return (features & feature) == feature;
}

Просмотреть файл

@ -487,6 +487,8 @@ enum {
SKB_GSO_PARTIAL = 1 << 13,
SKB_GSO_TUNNEL_REMCSUM = 1 << 14,
SKB_GSO_SCTP = 1 << 15,
};
#if BITS_PER_LONG > 32

Просмотреть файл

@ -186,6 +186,10 @@ void sctp_assocs_proc_exit(struct net *net);
int sctp_remaddr_proc_init(struct net *net);
void sctp_remaddr_proc_exit(struct net *net);
/*
* sctp/offload.c
*/
int sctp_offload_init(void);
/*
* Module global variables

Просмотреть файл

@ -566,6 +566,9 @@ struct sctp_chunk {
/* This points to the sk_buff containing the actual data. */
struct sk_buff *skb;
/* In case of GSO packets, this will store the head one */
struct sk_buff *head_skb;
/* These are the SCTP headers by reverse order in a packet.
* Note that some of these may happen more than once. In that
* case, we point at the "current" one, whatever that means
@ -696,6 +699,8 @@ struct sctp_packet {
size_t overhead;
/* This is the total size of all chunks INCLUDING padding. */
size_t size;
/* This is the maximum size this packet may have */
size_t max_size;
/* The packet is destined for this transport address.
* The function we finally use to pass down to the next lower

Просмотреть файл

@ -89,6 +89,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
[NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
[NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",

Просмотреть файл

@ -49,6 +49,7 @@
#include <linux/slab.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/sctp.h>
#include <linux/netdevice.h>
#ifdef CONFIG_NET_CLS_ACT
#include <net/pkt_sched.h>
@ -4383,6 +4384,8 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
thlen += inner_tcp_hdrlen(skb);
} else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
thlen = tcp_hdrlen(skb);
} else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) {
thlen = sizeof(struct sctphdr);
}
/* UFO sets gso_size to the size of the fragmentation
* payload, i.e. the size of the L4 (UDP) header is already

Просмотреть файл

@ -11,7 +11,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
transport.o chunk.o sm_make_chunk.o ulpevent.o \
inqueue.o outqueue.o ulpqueue.o \
tsnmap.o bind_addr.o socket.o primitive.o \
output.o input.o debug.o ssnmap.o auth.o
output.o input.o debug.o ssnmap.o auth.o \
offload.o
sctp_probe-y := probe.o

Просмотреть файл

@ -139,7 +139,9 @@ int sctp_rcv(struct sk_buff *skb)
skb->csum_valid = 0; /* Previous value not applicable */
if (skb_csum_unnecessary(skb))
__skb_decr_checksum_unnecessary(skb);
else if (!sctp_checksum_disable && sctp_rcv_checksum(net, skb) < 0)
else if (!sctp_checksum_disable &&
!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
sctp_rcv_checksum(net, skb) < 0)
goto discard_it;
skb->csum_valid = 1;
@ -1175,6 +1177,14 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
{
sctp_chunkhdr_t *ch;
/* We do not allow GSO frames here as we need to linearize and
* then cannot guarantee frame boundaries. This shouldn't be an
* issue as packets hitting this are mostly INIT or INIT-ACK and
* those cannot be on GSO-style anyway.
*/
if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
return NULL;
if (skb_linearize(skb))
return NULL;

Просмотреть файл

@ -138,6 +138,17 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
if (chunk->singleton ||
chunk->end_of_packet ||
chunk->pdiscard) {
if (chunk->head_skb == chunk->skb) {
chunk->skb = skb_shinfo(chunk->skb)->frag_list;
goto new_skb;
}
if (chunk->skb->next) {
chunk->skb = chunk->skb->next;
goto new_skb;
}
if (chunk->head_skb)
chunk->skb = chunk->head_skb;
sctp_chunk_free(chunk);
chunk = queue->in_progress = NULL;
} else {
@ -155,15 +166,15 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
next_chunk:
/* Is the queue empty? */
if (list_empty(&queue->in_chunk_list))
entry = sctp_list_dequeue(&queue->in_chunk_list);
if (!entry)
return NULL;
entry = queue->in_chunk_list.next;
chunk = list_entry(entry, struct sctp_chunk, list);
list_del_init(entry);
/* Linearize if it's not GSO */
if (skb_is_nonlinear(chunk->skb)) {
if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) != SKB_GSO_SCTP &&
skb_is_nonlinear(chunk->skb)) {
if (skb_linearize(chunk->skb)) {
__SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
sctp_chunk_free(chunk);
@ -174,15 +185,39 @@ next_chunk:
chunk->sctp_hdr = sctp_hdr(chunk->skb);
}
queue->in_progress = chunk;
if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) {
/* GSO-marked skbs but without frags, handle
* them normally
*/
if (skb_shinfo(chunk->skb)->frag_list)
chunk->head_skb = chunk->skb;
/* This is the first chunk in the packet. */
chunk->singleton = 1;
ch = (sctp_chunkhdr_t *) chunk->skb->data;
chunk->data_accepted = 0;
/* skbs with "cover letter" */
if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len)
chunk->skb = skb_shinfo(chunk->skb)->frag_list;
if (WARN_ON(!chunk->skb)) {
__SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
sctp_chunk_free(chunk);
goto next_chunk;
}
}
if (chunk->asoc)
sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb);
queue->in_progress = chunk;
new_skb:
/* This is the first chunk in the packet. */
ch = (sctp_chunkhdr_t *) chunk->skb->data;
chunk->singleton = 1;
chunk->data_accepted = 0;
chunk->pdiscard = 0;
chunk->auth = 0;
chunk->has_asconf = 0;
chunk->end_of_packet = 0;
chunk->ecn_ce_done = 0;
}
chunk->chunk_hdr = ch;

98
net/sctp/offload.c Normal file
Просмотреть файл

@ -0,0 +1,98 @@
/*
* sctp_offload - GRO/GSO Offloading for SCTP
*
* Copyright (C) 2015, Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/kprobes.h>
#include <linux/socket.h>
#include <linux/sctp.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/kfifo.h>
#include <linux/time.h>
#include <net/net_namespace.h>
#include <linux/skbuff.h>
#include <net/sctp/sctp.h>
#include <net/sctp/checksum.h>
#include <net/protocol.h>
static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
{
skb->ip_summed = CHECKSUM_NONE;
return sctp_compute_cksum(skb, skb_transport_offset(skb));
}
static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct sctphdr *sh;
sh = sctp_hdr(skb);
if (!pskb_may_pull(skb, sizeof(*sh)))
goto out;
__skb_pull(skb, sizeof(*sh));
if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
/* Packet is from an untrusted source, reset gso_segs. */
struct skb_shared_info *pinfo = skb_shinfo(skb);
struct sk_buff *frag_iter;
pinfo->gso_segs = 0;
if (skb->len != skb->data_len) {
/* Means we have chunks in here too */
pinfo->gso_segs++;
}
skb_walk_frags(skb, frag_iter)
pinfo->gso_segs++;
segs = NULL;
goto out;
}
segs = skb_segment(skb, features | NETIF_F_HW_CSUM);
if (IS_ERR(segs))
goto out;
/* All that is left is update SCTP CRC if necessary */
if (!(features & NETIF_F_SCTP_CRC)) {
for (skb = segs; skb; skb = skb->next) {
if (skb->ip_summed == CHECKSUM_PARTIAL) {
sh = sctp_hdr(skb);
sh->checksum = sctp_gso_make_checksum(skb);
}
}
}
out:
return segs;
}
static const struct net_offload sctp_offload = {
.callbacks = {
.gso_segment = sctp_gso_segment,
},
};
int __init sctp_offload_init(void)
{
return inet_add_offload(&sctp_offload, IPPROTO_SCTP);
}

Просмотреть файл

@ -84,18 +84,42 @@ static void sctp_packet_reset(struct sctp_packet *packet)
struct sctp_packet *sctp_packet_config(struct sctp_packet *packet,
__u32 vtag, int ecn_capable)
{
struct sctp_chunk *chunk = NULL;
struct sctp_transport *tp = packet->transport;
struct sctp_association *asoc = tp->asoc;
pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
packet->vtag = vtag;
if (asoc && tp->dst) {
struct sock *sk = asoc->base.sk;
rcu_read_lock();
if (__sk_dst_get(sk) != tp->dst) {
dst_hold(tp->dst);
sk_setup_caps(sk, tp->dst);
}
if (sk_can_gso(sk)) {
struct net_device *dev = tp->dst->dev;
packet->max_size = dev->gso_max_size;
} else {
packet->max_size = asoc->pathmtu;
}
rcu_read_unlock();
} else {
packet->max_size = tp->pathmtu;
}
if (ecn_capable && sctp_packet_empty(packet)) {
chunk = sctp_get_ecne_prepend(packet->transport->asoc);
struct sctp_chunk *chunk;
/* If there a is a prepend chunk stick it on the list before
* any other chunks get appended.
*/
chunk = sctp_get_ecne_prepend(asoc);
if (chunk)
sctp_packet_append_chunk(packet, chunk);
}
@ -381,12 +405,15 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
struct sctp_transport *tp = packet->transport;
struct sctp_association *asoc = tp->asoc;
struct sctphdr *sh;
struct sk_buff *nskb;
struct sk_buff *nskb = NULL, *head = NULL;
struct sctp_chunk *chunk, *tmp;
struct sock *sk;
int err = 0;
int padding; /* How much padding do we need? */
int pkt_size;
__u8 has_data = 0;
int gso = 0;
int pktcount = 0;
struct dst_entry *dst;
unsigned char *auth = NULL; /* pointer to auth in skb data */
@ -400,18 +427,37 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
sk = chunk->skb->sk;
/* Allocate the new skb. */
nskb = alloc_skb(packet->size + MAX_HEADER, gfp);
if (!nskb)
/* Allocate the head skb, or main one if not in GSO */
if (packet->size > tp->pathmtu && !packet->ipfragok) {
if (sk_can_gso(sk)) {
gso = 1;
pkt_size = packet->overhead;
} else {
/* If this happens, we trash this packet and try
* to build a new one, hopefully correct this
* time. Application may notice this error.
*/
pr_err_once("Trying to GSO but underlying device doesn't support it.");
goto nomem;
}
} else {
pkt_size = packet->size;
}
head = alloc_skb(pkt_size + MAX_HEADER, gfp);
if (!head)
goto nomem;
if (gso) {
NAPI_GRO_CB(head)->last = head;
skb_shinfo(head)->gso_type = sk->sk_gso_type;
}
/* Make sure the outbound skb has enough header room reserved. */
skb_reserve(nskb, packet->overhead + MAX_HEADER);
skb_reserve(head, packet->overhead + MAX_HEADER);
/* Set the owning socket so that we know where to get the
* destination IP address.
*/
sctp_packet_set_owner_w(nskb, sk);
sctp_packet_set_owner_w(head, sk);
if (!sctp_transport_dst_check(tp)) {
sctp_transport_route(tp, NULL, sctp_sk(sk));
@ -422,11 +468,11 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
dst = dst_clone(tp->dst);
if (!dst)
goto no_route;
skb_dst_set(nskb, dst);
skb_dst_set(head, dst);
/* Build the SCTP header. */
sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr));
skb_reset_transport_header(nskb);
sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr));
skb_reset_transport_header(head);
sh->source = htons(packet->source_port);
sh->dest = htons(packet->destination_port);
@ -441,90 +487,133 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
sh->vtag = htonl(packet->vtag);
sh->checksum = 0;
/**
* 6.10 Bundling
*
* An endpoint bundles chunks by simply including multiple
* chunks in one outbound SCTP packet. ...
*/
/**
* 3.2 Chunk Field Descriptions
*
* The total length of a chunk (including Type, Length and
* Value fields) MUST be a multiple of 4 bytes. If the length
* of the chunk is not a multiple of 4 bytes, the sender MUST
* pad the chunk with all zero bytes and this padding is not
* included in the chunk length field. The sender should
* never pad with more than 3 bytes.
*
* [This whole comment explains WORD_ROUND() below.]
*/
pr_debug("***sctp_transmit_packet***\n");
list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
list_del_init(&chunk->list);
if (sctp_chunk_is_data(chunk)) {
/* 6.3.1 C4) When data is in flight and when allowed
* by rule C5, a new RTT measurement MUST be made each
* round trip. Furthermore, new RTT measurements
* SHOULD be made no more than once per round-trip
* for a given destination transport address.
*/
do {
/* Set up convenience variables... */
chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
pktcount++;
if (!chunk->resent && !tp->rto_pending) {
chunk->rtt_in_progress = 1;
tp->rto_pending = 1;
/* Calculate packet size, so it fits in PMTU. Leave
* other chunks for the next packets.
*/
if (gso) {
pkt_size = packet->overhead;
list_for_each_entry(chunk, &packet->chunk_list, list) {
int padded = WORD_ROUND(chunk->skb->len);
if (pkt_size + padded > tp->pathmtu)
break;
pkt_size += padded;
}
has_data = 1;
/* Allocate a new skb. */
nskb = alloc_skb(pkt_size + MAX_HEADER, gfp);
if (!nskb)
goto nomem;
/* Make sure the outbound skb has enough header
* room reserved.
*/
skb_reserve(nskb, packet->overhead + MAX_HEADER);
} else {
nskb = head;
}
padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
if (padding)
memset(skb_put(chunk->skb, padding), 0, padding);
/* if this is the auth chunk that we are adding,
* store pointer where it will be added and put
* the auth into the packet.
/**
* 3.2 Chunk Field Descriptions
*
* The total length of a chunk (including Type, Length and
* Value fields) MUST be a multiple of 4 bytes. If the length
* of the chunk is not a multiple of 4 bytes, the sender MUST
* pad the chunk with all zero bytes and this padding is not
* included in the chunk length field. The sender should
* never pad with more than 3 bytes.
*
* [This whole comment explains WORD_ROUND() below.]
*/
if (chunk == packet->auth)
auth = skb_tail_pointer(nskb);
memcpy(skb_put(nskb, chunk->skb->len),
pkt_size -= packet->overhead;
list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
list_del_init(&chunk->list);
if (sctp_chunk_is_data(chunk)) {
/* 6.3.1 C4) When data is in flight and when allowed
* by rule C5, a new RTT measurement MUST be made each
* round trip. Furthermore, new RTT measurements
* SHOULD be made no more than once per round-trip
* for a given destination transport address.
*/
if (!chunk->resent && !tp->rto_pending) {
chunk->rtt_in_progress = 1;
tp->rto_pending = 1;
}
has_data = 1;
}
padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
if (padding)
memset(skb_put(chunk->skb, padding), 0, padding);
/* if this is the auth chunk that we are adding,
* store pointer where it will be added and put
* the auth into the packet.
*/
if (chunk == packet->auth)
auth = skb_tail_pointer(nskb);
memcpy(skb_put(nskb, chunk->skb->len),
chunk->skb->data, chunk->skb->len);
pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, "
"rtt_in_progress:%d\n", chunk,
sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
chunk->has_tsn ? "TSN" : "No TSN",
chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0,
ntohs(chunk->chunk_hdr->length), chunk->skb->len,
chunk->rtt_in_progress);
pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n",
chunk,
sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
chunk->has_tsn ? "TSN" : "No TSN",
chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0,
ntohs(chunk->chunk_hdr->length), chunk->skb->len,
chunk->rtt_in_progress);
/*
* If this is a control chunk, this is our last
* reference. Free data chunks after they've been
* acknowledged or have failed.
/* If this is a control chunk, this is our last
* reference. Free data chunks after they've been
* acknowledged or have failed.
* Re-queue auth chunks if needed.
*/
pkt_size -= WORD_ROUND(chunk->skb->len);
if (chunk == packet->auth && !list_empty(&packet->chunk_list))
list_add(&chunk->list, &packet->chunk_list);
else if (!sctp_chunk_is_data(chunk))
sctp_chunk_free(chunk);
if (!pkt_size)
break;
}
/* SCTP-AUTH, Section 6.2
* The sender MUST calculate the MAC as described in RFC2104 [2]
* using the hash function H as described by the MAC Identifier and
* the shared association key K based on the endpoint pair shared key
* described by the shared key identifier. The 'data' used for the
* computation of the AUTH-chunk is given by the AUTH chunk with its
* HMAC field set to zero (as shown in Figure 6) followed by all
* chunks that are placed after the AUTH chunk in the SCTP packet.
*/
if (!sctp_chunk_is_data(chunk))
sctp_chunk_free(chunk);
}
if (auth)
sctp_auth_calculate_hmac(asoc, nskb,
(struct sctp_auth_chunk *)auth,
gfp);
/* SCTP-AUTH, Section 6.2
* The sender MUST calculate the MAC as described in RFC2104 [2]
* using the hash function H as described by the MAC Identifier and
* the shared association key K based on the endpoint pair shared key
* described by the shared key identifier. The 'data' used for the
* computation of the AUTH-chunk is given by the AUTH chunk with its
* HMAC field set to zero (as shown in Figure 6) followed by all
* chunks that are placed after the AUTH chunk in the SCTP packet.
*/
if (auth)
sctp_auth_calculate_hmac(asoc, nskb,
(struct sctp_auth_chunk *)auth,
gfp);
if (!gso)
break;
if (skb_gro_receive(&head, nskb))
goto nomem;
nskb = NULL;
if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
sk->sk_gso_max_segs))
goto nomem;
} while (!list_empty(&packet->chunk_list));
/* 2) Calculate the Adler-32 checksum of the whole packet,
* including the SCTP common header and all the
@ -532,16 +621,18 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
*
* Note: Adler-32 is no longer applicable, as has been replaced
* by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
*
* If it's a GSO packet, it's postponed to sctp_skb_segment.
*/
if (!sctp_checksum_disable) {
if (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
(dst_xfrm(dst) != NULL) || packet->ipfragok) {
sh->checksum = sctp_compute_cksum(nskb, 0);
if (!sctp_checksum_disable || gso) {
if (!gso && (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
dst_xfrm(dst) || packet->ipfragok)) {
sh->checksum = sctp_compute_cksum(head, 0);
} else {
/* no need to seed pseudo checksum for SCTP */
nskb->ip_summed = CHECKSUM_PARTIAL;
nskb->csum_start = skb_transport_header(nskb) - nskb->head;
nskb->csum_offset = offsetof(struct sctphdr, checksum);
head->ip_summed = CHECKSUM_PARTIAL;
head->csum_start = skb_transport_header(head) - head->head;
head->csum_offset = offsetof(struct sctphdr, checksum);
}
}
@ -557,7 +648,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
* Note: The works for IPv6 layer checks this bit too later
* in transmission. See IP6_ECN_flow_xmit().
*/
tp->af_specific->ecn_capable(nskb->sk);
tp->af_specific->ecn_capable(sk);
/* Set up the IP options. */
/* BUG: not implemented
@ -566,7 +657,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
/* Dump that on IP! */
if (asoc) {
asoc->stats.opackets++;
asoc->stats.opackets += pktcount;
if (asoc->peer.last_sent_to != tp)
/* Considering the multiple CPU scenario, this is a
* "correcter" place for last_sent_to. --xguo
@ -589,16 +680,36 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
}
}
pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len);
pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len);
nskb->ignore_df = packet->ipfragok;
tp->af_specific->sctp_xmit(nskb, tp);
if (gso) {
/* Cleanup our debris for IP stacks */
memset(head->cb, 0, max(sizeof(struct inet_skb_parm),
sizeof(struct inet6_skb_parm)));
skb_shinfo(head)->gso_segs = pktcount;
skb_shinfo(head)->gso_size = GSO_BY_FRAGS;
/* We have to refresh this in case we are xmiting to
* more than one transport at a time
*/
rcu_read_lock();
if (__sk_dst_get(sk) != tp->dst) {
dst_hold(tp->dst);
sk_setup_caps(sk, tp->dst);
}
rcu_read_unlock();
}
head->ignore_df = packet->ipfragok;
tp->af_specific->sctp_xmit(head, tp);
out:
sctp_packet_reset(packet);
return err;
no_route:
kfree_skb(nskb);
kfree_skb(head);
if (nskb != head)
kfree_skb(nskb);
if (asoc)
IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
@ -751,39 +862,63 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
struct sctp_chunk *chunk,
u16 chunk_len)
{
size_t psize;
size_t pmtu;
int too_big;
size_t psize, pmtu;
sctp_xmit_t retval = SCTP_XMIT_OK;
psize = packet->size;
pmtu = ((packet->transport->asoc) ?
(packet->transport->asoc->pathmtu) :
(packet->transport->pathmtu));
too_big = (psize + chunk_len > pmtu);
if (packet->transport->asoc)
pmtu = packet->transport->asoc->pathmtu;
else
pmtu = packet->transport->pathmtu;
/* Decide if we need to fragment or resubmit later. */
if (too_big) {
/* It's OK to fragmet at IP level if any one of the following
if (psize + chunk_len > pmtu) {
/* It's OK to fragment at IP level if any one of the following
* is true:
* 1. The packet is empty (meaning this chunk is greater
* the MTU)
* 2. The chunk we are adding is a control chunk
* 3. The packet doesn't have any data in it yet and data
* requires authentication.
* 1. The packet is empty (meaning this chunk is greater
* the MTU)
* 2. The packet doesn't have any data in it yet and data
* requires authentication.
*/
if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) ||
if (sctp_packet_empty(packet) ||
(!packet->has_data && chunk->auth)) {
/* We no longer do re-fragmentation.
* Just fragment at the IP layer, if we
* actually hit this condition
*/
packet->ipfragok = 1;
} else {
retval = SCTP_XMIT_PMTU_FULL;
goto out;
}
/* It is also okay to fragment if the chunk we are
* adding is a control chunk, but only if current packet
* is not a GSO one otherwise it causes fragmentation of
* a large frame. So in this case we allow the
* fragmentation by forcing it to be in a new packet.
*/
if (!sctp_chunk_is_data(chunk) && packet->has_data)
retval = SCTP_XMIT_PMTU_FULL;
if (psize + chunk_len > packet->max_size)
/* Hit GSO/PMTU limit, gotta flush */
retval = SCTP_XMIT_PMTU_FULL;
if (!packet->transport->burst_limited &&
psize + chunk_len > (packet->transport->cwnd >> 1))
/* Do not allow a single GSO packet to use more
* than half of cwnd.
*/
retval = SCTP_XMIT_PMTU_FULL;
if (packet->transport->burst_limited &&
psize + chunk_len > (packet->transport->burst_limited >> 1))
/* Do not allow a single GSO packet to use more
* than half of original cwnd.
*/
retval = SCTP_XMIT_PMTU_FULL;
/* Otherwise it will fit in the GSO packet */
}
out:
return retval;
}

Просмотреть файл

@ -1516,6 +1516,9 @@ static __init int sctp_init(void)
if (status)
goto err_v6_add_protocol;
if (sctp_offload_init() < 0)
pr_crit("%s: Cannot add SCTP protocol offload\n", __func__);
out:
return status;
err_v6_add_protocol:

Просмотреть файл

@ -4003,6 +4003,8 @@ static int sctp_init_sock(struct sock *sk)
return -ESOCKTNOSUPPORT;
}
sk->sk_gso_type = SKB_GSO_SCTP;
/* Initialize default send parameters. These parameters can be
* modified with the SCTP_DEFAULT_SEND_PARAM socket option.
*/