From d5cc4a73a5b5c8374b810d5371e9e7ed05c1e02c Mon Sep 17 00:00:00 2001 From: Robert Olsson Date: Fri, 16 Mar 2007 15:00:07 -0700 Subject: [PATCH 1/4] [IPV4]: Do not disable preemption in trie_leaf_remove(). Hello, Just discussed this Patrick... We have two users of trie_leaf_remove, fn_trie_flush and fn_trie_delete both are holding RTNL. So there shouldn't be need for this preempt stuff. This is assumed to a leftover from an older RCU-take. > Mhh .. I think I just remembered something - me incorrectly suggesting > to add it there while we were talking about this at OLS :) IIRC the > idea was to make sure tnode_free (which at that time didn't use > call_rcu) wouldn't free memory while still in use in a rcu read-side > critical section. It should have been synchronize_rcu of course, > but with tnode_free using call_rcu it seems to be completely > unnecessary. So I guess we can simply remove it. Signed-off-by: Robert Olsson Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 72b3036bbc09..ada9b3db507d 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1527,7 +1527,6 @@ static int trie_leaf_remove(struct trie *t, t_key key) t->revision++; t->size--; - preempt_disable(); tp = NODE_PARENT(n); tnode_free((struct tnode *) n); @@ -1537,7 +1536,6 @@ static int trie_leaf_remove(struct trie *t, t_key key) rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); } else rcu_assign_pointer(t->trie, NULL); - preempt_enable(); return 1; } From 3e6b3b2e34ac2c72fa05b5e4b20bd88d64c298dc Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 16 Mar 2007 15:00:46 -0700 Subject: [PATCH 2/4] [NET]: Copy mac_len in skb_clone() as well ANK says: "It is rarely used, that's wy it was not noticed. But in the places, where it is used, it should be disaster." Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 820761f9eeef..702fa8f08747 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -463,6 +463,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) memcpy(n->cb, skb->cb, sizeof(skb->cb)); C(len); C(data_len); + C(mac_len); C(csum); C(local_df); n->cloned = 1; From 53cdcc04c1e85d4e423b2822b66149b6f2e52c2c Mon Sep 17 00:00:00 2001 From: John Heffner Date: Fri, 16 Mar 2007 15:04:03 -0700 Subject: [PATCH 3/4] [TCP]: Fix tcp_mem[] initialization. Change tcp_mem initialization function. The fraction of total memory is now a continuous function of memory size, and independent of page size. Signed-off-by: John Heffner Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 74c4d103ebc2..3834b10b5115 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2458,11 +2458,18 @@ void __init tcp_init(void) sysctl_max_syn_backlog = 128; } - /* Allow no more than 3/4 kernel memory (usually less) allocated to TCP */ - sysctl_tcp_mem[0] = (1536 / sizeof (struct inet_bind_hashbucket)) << order; - sysctl_tcp_mem[1] = sysctl_tcp_mem[0] * 4 / 3; + /* Set the pressure threshold to be a fraction of global memory that + * is up to 1/2 at 256 MB, decreasing toward zero with the amount of + * memory, with a floor of 128 pages. + */ + limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + limit = max(limit, 128UL); + sysctl_tcp_mem[0] = limit / 4 * 3; + sysctl_tcp_mem[1] = limit; sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; + /* Set per-socket limits to no more than 1/128 the pressure threshold */ limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); max_share = min(4UL*1024*1024, limit); From d35690beda1429544d46c8eb34b2e3a8c37ab299 Mon Sep 17 00:00:00 2001 From: Masayuki Nakagawa Date: Fri, 16 Mar 2007 16:14:03 -0700 Subject: [PATCH 4/4] [IPV6]: ipv6_fl_socklist is inadvertently shared. The ipv6_fl_socklist from listening socket is inadvertently shared with new socket created for connection. This leads to a variety of interesting, but fatal, bugs. For example, removing one of the sockets may lead to the other socket's encountering a page fault when the now freed list is referenced. The fix is to not share the flow label list with the new socket. Signed-off-by: Masayuki Nakagawa Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f57a9baa6b27..92f99927d12d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1453,6 +1453,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, First: no IPv4 options. */ newinet->opt = NULL; + newnp->ipv6_fl_list = NULL; /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all;