inet: Decrease overhead of on-stack inet_cork.

When we fast path datagram sends to avoid locking by putting
the inet_cork on the stack we use up lots of space that isn't
necessary.

This is because inet_cork contains a "struct flowi" which isn't
used in these code paths.

Split inet_cork to two parts, "inet_cork" and "inet_cork_full".
Only the latter of which has the "struct flowi" and is what is
stored in inet_sock.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
This commit is contained in:
David S. Miller 2011-05-06 15:02:07 -07:00
Родитель ad638bd16d
Коммит bdc712b4c2
5 изменённых файлов: 41 добавлений и 33 удалений

Просмотреть файл

@ -96,17 +96,21 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
struct inet_cork { struct inet_cork {
unsigned int flags; unsigned int flags;
unsigned int fragsize; __be32 addr;
struct ip_options *opt; struct ip_options *opt;
unsigned int fragsize;
struct dst_entry *dst; struct dst_entry *dst;
int length; /* Total length of all frames */ int length; /* Total length of all frames */
__be32 addr;
struct flowi fl;
struct page *page; struct page *page;
u32 off; u32 off;
u8 tx_flags; u8 tx_flags;
}; };
struct inet_cork_full {
struct inet_cork base;
struct flowi fl;
};
struct ip_mc_socklist; struct ip_mc_socklist;
struct ipv6_pinfo; struct ipv6_pinfo;
struct rtable; struct rtable;
@ -164,7 +168,7 @@ struct inet_sock {
int mc_index; int mc_index;
__be32 mc_addr; __be32 mc_addr;
struct ip_mc_socklist __rcu *mc_list; struct ip_mc_socklist __rcu *mc_list;
struct inet_cork cork; struct inet_cork_full cork;
}; };
#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ #define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */

Просмотреть файл

@ -132,7 +132,7 @@ extern struct sk_buff *ip_make_skb(struct sock *sk,
static inline struct sk_buff *ip_finish_skb(struct sock *sk) static inline struct sk_buff *ip_finish_skb(struct sock *sk)
{ {
return __ip_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork); return __ip_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork.base);
} }
/* datagram.c */ /* datagram.c */

Просмотреть файл

@ -1096,14 +1096,14 @@ int ip_append_data(struct sock *sk,
return 0; return 0;
if (skb_queue_empty(&sk->sk_write_queue)) { if (skb_queue_empty(&sk->sk_write_queue)) {
err = ip_setup_cork(sk, &inet->cork, ipc, rtp); err = ip_setup_cork(sk, &inet->cork.base, ipc, rtp);
if (err) if (err)
return err; return err;
} else { } else {
transhdrlen = 0; transhdrlen = 0;
} }
return __ip_append_data(sk, &sk->sk_write_queue, &inet->cork, getfrag, return __ip_append_data(sk, &sk->sk_write_queue, &inet->cork.base, getfrag,
from, length, transhdrlen, flags); from, length, transhdrlen, flags);
} }
@ -1114,6 +1114,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
struct sk_buff *skb; struct sk_buff *skb;
struct rtable *rt; struct rtable *rt;
struct ip_options *opt = NULL; struct ip_options *opt = NULL;
struct inet_cork *cork;
int hh_len; int hh_len;
int mtu; int mtu;
int len; int len;
@ -1129,20 +1130,21 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
if (skb_queue_empty(&sk->sk_write_queue)) if (skb_queue_empty(&sk->sk_write_queue))
return -EINVAL; return -EINVAL;
rt = (struct rtable *)inet->cork.dst; cork = &inet->cork.base;
if (inet->cork.flags & IPCORK_OPT) rt = (struct rtable *)cork->dst;
opt = inet->cork.opt; if (cork->flags & IPCORK_OPT)
opt = cork->opt;
if (!(rt->dst.dev->features&NETIF_F_SG)) if (!(rt->dst.dev->features&NETIF_F_SG))
return -EOPNOTSUPP; return -EOPNOTSUPP;
hh_len = LL_RESERVED_SPACE(rt->dst.dev); hh_len = LL_RESERVED_SPACE(rt->dst.dev);
mtu = inet->cork.fragsize; mtu = cork->fragsize;
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
if (inet->cork.length + size > 0xFFFF - fragheaderlen) { if (cork->length + size > 0xFFFF - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu); ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu);
return -EMSGSIZE; return -EMSGSIZE;
} }
@ -1150,7 +1152,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
return -EINVAL; return -EINVAL;
inet->cork.length += size; cork->length += size;
if ((size + skb->len > mtu) && if ((size + skb->len > mtu) &&
(sk->sk_protocol == IPPROTO_UDP) && (sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO)) { (rt->dst.dev->features & NETIF_F_UFO)) {
@ -1245,7 +1247,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
return 0; return 0;
error: error:
inet->cork.length -= size; cork->length -= size;
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
return err; return err;
} }
@ -1396,7 +1398,7 @@ static void __ip_flush_pending_frames(struct sock *sk,
void ip_flush_pending_frames(struct sock *sk) void ip_flush_pending_frames(struct sock *sk)
{ {
__ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork); __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork.base);
} }
struct sk_buff *ip_make_skb(struct sock *sk, struct sk_buff *ip_make_skb(struct sock *sk,

Просмотреть файл

@ -1150,6 +1150,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
{ {
struct inet_sock *inet = inet_sk(sk); struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_cork *cork;
struct sk_buff *skb; struct sk_buff *skb;
unsigned int maxfraglen, fragheaderlen; unsigned int maxfraglen, fragheaderlen;
int exthdrlen; int exthdrlen;
@ -1163,6 +1164,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
if (flags&MSG_PROBE) if (flags&MSG_PROBE)
return 0; return 0;
cork = &inet->cork.base;
if (skb_queue_empty(&sk->sk_write_queue)) { if (skb_queue_empty(&sk->sk_write_queue)) {
/* /*
* setup for corking * setup for corking
@ -1202,7 +1204,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
/* need source address above miyazawa*/ /* need source address above miyazawa*/
} }
dst_hold(&rt->dst); dst_hold(&rt->dst);
inet->cork.dst = &rt->dst; cork->dst = &rt->dst;
inet->cork.fl.u.ip6 = *fl6; inet->cork.fl.u.ip6 = *fl6;
np->cork.hop_limit = hlimit; np->cork.hop_limit = hlimit;
np->cork.tclass = tclass; np->cork.tclass = tclass;
@ -1212,10 +1214,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
if (np->frag_size) if (np->frag_size)
mtu = np->frag_size; mtu = np->frag_size;
} }
inet->cork.fragsize = mtu; cork->fragsize = mtu;
if (dst_allfrag(rt->dst.path)) if (dst_allfrag(rt->dst.path))
inet->cork.flags |= IPCORK_ALLFRAG; cork->flags |= IPCORK_ALLFRAG;
inet->cork.length = 0; cork->length = 0;
sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_page = NULL;
sk->sk_sndmsg_off = 0; sk->sk_sndmsg_off = 0;
exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) - exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
@ -1223,12 +1225,12 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
length += exthdrlen; length += exthdrlen;
transhdrlen += exthdrlen; transhdrlen += exthdrlen;
} else { } else {
rt = (struct rt6_info *)inet->cork.dst; rt = (struct rt6_info *)cork->dst;
fl6 = &inet->cork.fl.u.ip6; fl6 = &inet->cork.fl.u.ip6;
opt = np->cork.opt; opt = np->cork.opt;
transhdrlen = 0; transhdrlen = 0;
exthdrlen = 0; exthdrlen = 0;
mtu = inet->cork.fragsize; mtu = cork->fragsize;
} }
hh_len = LL_RESERVED_SPACE(rt->dst.dev); hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@ -1238,7 +1240,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
return -EMSGSIZE; return -EMSGSIZE;
} }
@ -1267,7 +1269,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
* --yoshfuji * --yoshfuji
*/ */
inet->cork.length += length; cork->length += length;
if (length > mtu) { if (length > mtu) {
int proto = sk->sk_protocol; int proto = sk->sk_protocol;
if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
@ -1292,7 +1294,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
while (length > 0) { while (length > 0) {
/* Check if the remaining data fits into current packet. */ /* Check if the remaining data fits into current packet. */
copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
if (copy < length) if (copy < length)
copy = maxfraglen - skb->len; copy = maxfraglen - skb->len;
@ -1317,7 +1319,7 @@ alloc_new_skb:
* we know we need more fragment(s). * we know we need more fragment(s).
*/ */
datalen = length + fraggap; datalen = length + fraggap;
if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
datalen = maxfraglen - fragheaderlen; datalen = maxfraglen - fragheaderlen;
fraglen = datalen + fragheaderlen; fraglen = datalen + fragheaderlen;
@ -1481,7 +1483,7 @@ alloc_new_skb:
} }
return 0; return 0;
error: error:
inet->cork.length -= length; cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
return err; return err;
} }
@ -1497,10 +1499,10 @@ static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
np->cork.opt = NULL; np->cork.opt = NULL;
} }
if (inet->cork.dst) { if (inet->cork.base.dst) {
dst_release(inet->cork.dst); dst_release(inet->cork.base.dst);
inet->cork.dst = NULL; inet->cork.base.dst = NULL;
inet->cork.flags &= ~IPCORK_ALLFRAG; inet->cork.base.flags &= ~IPCORK_ALLFRAG;
} }
memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
} }
@ -1515,7 +1517,7 @@ int ip6_push_pending_frames(struct sock *sk)
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
struct ipv6hdr *hdr; struct ipv6hdr *hdr;
struct ipv6_txoptions *opt = np->cork.opt; struct ipv6_txoptions *opt = np->cork.opt;
struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
struct flowi6 *fl6 = &inet->cork.fl.u.ip6; struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
unsigned char proto = fl6->flowi6_proto; unsigned char proto = fl6->flowi6_proto;
int err = 0; int err = 0;

Просмотреть файл

@ -542,8 +542,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
goto out; goto out;
offset = rp->offset; offset = rp->offset;
total_len = inet_sk(sk)->cork.length - (skb_network_header(skb) - total_len = inet_sk(sk)->cork.base.length - (skb_network_header(skb) -
skb->data); skb->data);
if (offset >= total_len - 1) { if (offset >= total_len - 1) {
err = -EINVAL; err = -EINVAL;
ip6_flush_pending_frames(sk); ip6_flush_pending_frames(sk);