diff options
Diffstat (limited to 'patches/cve/CVE-2018-5390-tcp-free-batches-of-packets-in-tcp_prune_ofo_queue.patch')
-rw-r--r-- | patches/cve/CVE-2018-5390-tcp-free-batches-of-packets-in-tcp_prune_ofo_queue.patch | 163 |
1 files changed, 39 insertions, 124 deletions
diff --git a/patches/cve/CVE-2018-5390-tcp-free-batches-of-packets-in-tcp_prune_ofo_queue.patch b/patches/cve/CVE-2018-5390-tcp-free-batches-of-packets-in-tcp_prune_ofo_queue.patch index b8f95d0..35d1eec 100644 --- a/patches/cve/CVE-2018-5390-tcp-free-batches-of-packets-in-tcp_prune_ofo_queue.patch +++ b/patches/cve/CVE-2018-5390-tcp-free-batches-of-packets-in-tcp_prune_ofo_queue.patch | |||
@@ -1,134 +1,60 @@ | |||
1 | Date: Mon, 15 Oct 2018 06:07:41 +0200 | 1 | From 2d08921c8da26bdce3d8848ef6f32068f594d7d4 Mon Sep 17 00:00:00 2001 |
2 | Subject: [PATCH] Merge branch 'tcp-robust-ooo' | 2 | From: Eric Dumazet <edumazet@google.com> |
3 | Date: Mon, 23 Jul 2018 09:28:17 -0700 | ||
4 | Subject: [PATCH] tcp: free batches of packets in tcp_prune_ofo_queue() | ||
3 | 5 | ||
4 | Eric Dumazet says: | 6 | [ Upstream commit 72cd43ba64fc172a443410ce01645895850844c8 ] |
5 | 7 | ||
6 | ==================== | ||
7 | Juha-Matti Tilli reported that malicious peers could inject tiny | 8 | Juha-Matti Tilli reported that malicious peers could inject tiny |
8 | packets in out_of_order_queue, forcing very expensive calls | 9 | packets in out_of_order_queue, forcing very expensive calls |
9 | to tcp_collapse_ofo_queue() and tcp_prune_ofo_queue() for | 10 | to tcp_collapse_ofo_queue() and tcp_prune_ofo_queue() for |
10 | every incoming packet. | 11 | every incoming packet. out_of_order_queue rb-tree can contain |
12 | thousands of nodes, iterating over all of them is not nice. | ||
11 | 13 | ||
12 | With tcp_rmem[2] default of 6MB, the ooo queue could | 14 | Before linux-4.9, we would have pruned all packets in ofo_queue |
13 | contain ~7000 nodes. | 15 | in one go, every XXXX packets. XXXX depends on sk_rcvbuf and skbs |
16 | truesize, but is about 7000 packets with tcp_rmem[2] default of 6 MB. | ||
14 | 17 | ||
15 | This patch series makes sure we cut cpu cycles enough to | 18 | Since we plan to increase tcp_rmem[2] in the future to cope with |
16 | render the attack not critical. | 19 | modern BDP, can not revert to the old behavior, without great pain. |
17 | 20 | ||
18 | We might in the future go further, like disconnecting | 21 | Strategy taken in this patch is to purge ~12.5 % of the queue capacity. |
19 | or black-holing proven malicious flows. | 22 | |
20 | ==================== | 23 | Fixes: 36a6503fedda ("tcp: refine tcp_prune_ofo_queue() to not drop all packets") |
21 | 24 | ||
22 | CVE: CVE-2018-5390 | 25 | CVE: CVE-2018-5390 |
23 | Upstream-Status: Backport | 26 | Upstream-Status: Backport |
24 | 27 | ||
28 | Signed-off-by: Eric Dumazet <edumazet@google.com> | ||
29 | Reported-by: Juha-Matti Tilli <juha-matti.tilli@iki.fi> | ||
30 | Acked-by: Yuchung Cheng <ycheng@google.com> | ||
31 | Acked-by: Soheil Hassas Yeganeh <soheil@google.com> | ||
25 | Signed-off-by: David S. Miller <davem@davemloft.net> | 32 | Signed-off-by: David S. Miller <davem@davemloft.net> |
33 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | ||
26 | Signed-off-by: Andreas Wellving <andreas.wellving@enea.com> | 34 | Signed-off-by: Andreas Wellving <andreas.wellving@enea.com> |
27 | --- | 35 | --- |
28 | net/ipv4/tcp_input.c | 60 +++++++++++++++++++++++++++++++++++++++++++--------- | 36 | include/linux/skbuff.h | 2 ++ |
29 | 1 file changed, 50 insertions(+), 10 deletions(-) | 37 | net/ipv4/tcp_input.c | 15 +++++++++++---- |
38 | 2 files changed, 13 insertions(+), 4 deletions(-) | ||
30 | 39 | ||
31 | diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c | 40 | diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h |
32 | index 2f107e4..4838b5f 100644 | 41 | index b048d3d..1f207dd 100644 |
33 | --- a/net/ipv4/tcp_input.c | 42 | --- a/include/linux/skbuff.h |
34 | +++ b/net/ipv4/tcp_input.c | 43 | +++ b/include/linux/skbuff.h |
35 | @@ -4355,6 +4355,23 @@ static bool tcp_try_coalesce(struct sock *sk, | 44 | @@ -2982,6 +2982,8 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) |
36 | return true; | 45 | return __skb_grow(skb, len); |
37 | } | 46 | } |
38 | 47 | ||
39 | +static bool tcp_ooo_try_coalesce(struct sock *sk, | 48 | +#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode) |
40 | + struct sk_buff *to, | ||
41 | + struct sk_buff *from, | ||
42 | + bool *fragstolen) | ||
43 | +{ | ||
44 | + bool res = tcp_try_coalesce(sk, to, from, fragstolen); | ||
45 | + | 49 | + |
46 | + /* In case tcp_drop() is called later, update to->gso_segs */ | 50 | #define skb_queue_walk(queue, skb) \ |
47 | + if (res) { | 51 | for (skb = (queue)->next; \ |
48 | + u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) + | 52 | skb != (struct sk_buff *)(queue); \ |
49 | + max_t(u16, 1, skb_shinfo(from)->gso_segs); | 53 | diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c |
50 | + | 54 | index 71f2b09..2eabf21 100644 |
51 | + skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF); | 55 | --- a/net/ipv4/tcp_input.c |
52 | + } | 56 | +++ b/net/ipv4/tcp_input.c |
53 | + return res; | 57 | @@ -4965,6 +4965,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) |
54 | +} | ||
55 | + | ||
56 | static void tcp_drop(struct sock *sk, struct sk_buff *skb) | ||
57 | { | ||
58 | sk_drops_add(sk, skb); | ||
59 | @@ -4478,7 +4495,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | ||
60 | /* In the typical case, we are adding an skb to the end of the list. | ||
61 | * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. | ||
62 | */ | ||
63 | - if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { | ||
64 | + if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb, | ||
65 | + skb, &fragstolen)) { | ||
66 | coalesce_done: | ||
67 | tcp_grow_window(sk, skb); | ||
68 | kfree_skb_partial(skb, fragstolen); | ||
69 | @@ -4506,7 +4524,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | ||
70 | /* All the bits are present. Drop. */ | ||
71 | NET_INC_STATS(sock_net(sk), | ||
72 | LINUX_MIB_TCPOFOMERGE); | ||
73 | - __kfree_skb(skb); | ||
74 | + tcp_drop(sk, skb); | ||
75 | skb = NULL; | ||
76 | tcp_dsack_set(sk, seq, end_seq); | ||
77 | goto add_sack; | ||
78 | @@ -4525,10 +4543,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | ||
79 | TCP_SKB_CB(skb1)->end_seq); | ||
80 | NET_INC_STATS(sock_net(sk), | ||
81 | LINUX_MIB_TCPOFOMERGE); | ||
82 | - __kfree_skb(skb1); | ||
83 | + tcp_drop(sk, skb1); | ||
84 | goto merge_right; | ||
85 | } | ||
86 | - } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { | ||
87 | + } else if (tcp_ooo_try_coalesce(sk, skb1, | ||
88 | + skb, &fragstolen)) { | ||
89 | goto coalesce_done; | ||
90 | } | ||
91 | p = &parent->rb_right; | ||
92 | @@ -4907,6 +4926,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, | ||
93 | static void tcp_collapse_ofo_queue(struct sock *sk) | ||
94 | { | ||
95 | struct tcp_sock *tp = tcp_sk(sk); | ||
96 | + u32 range_truesize, sum_tiny = 0; | ||
97 | struct sk_buff *skb, *head; | ||
98 | struct rb_node *p; | ||
99 | u32 start, end; | ||
100 | @@ -4925,6 +4945,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) | ||
101 | } | ||
102 | start = TCP_SKB_CB(skb)->seq; | ||
103 | end = TCP_SKB_CB(skb)->end_seq; | ||
104 | + range_truesize = skb->truesize; | ||
105 | |||
106 | for (head = skb;;) { | ||
107 | skb = tcp_skb_next(skb, NULL); | ||
108 | @@ -4935,11 +4956,20 @@ static void tcp_collapse_ofo_queue(struct sock *sk) | ||
109 | if (!skb || | ||
110 | after(TCP_SKB_CB(skb)->seq, end) || | ||
111 | before(TCP_SKB_CB(skb)->end_seq, start)) { | ||
112 | - tcp_collapse(sk, NULL, &tp->out_of_order_queue, | ||
113 | - head, skb, start, end); | ||
114 | + /* Do not attempt collapsing tiny skbs */ | ||
115 | + if (range_truesize != head->truesize || | ||
116 | + end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) { | ||
117 | + tcp_collapse(sk, NULL, &tp->out_of_order_queue, | ||
118 | + head, skb, start, end); | ||
119 | + } else { | ||
120 | + sum_tiny += range_truesize; | ||
121 | + if (sum_tiny > sk->sk_rcvbuf >> 3) | ||
122 | + return; | ||
123 | + } | ||
124 | goto new_range; | ||
125 | } | ||
126 | |||
127 | + range_truesize += skb->truesize; | ||
128 | if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) | ||
129 | start = TCP_SKB_CB(skb)->seq; | ||
130 | if (after(TCP_SKB_CB(skb)->end_seq, end)) | ||
131 | @@ -4954,6 +4984,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) | ||
132 | * 2) not add too big latencies if thousands of packets sit there. | 58 | * 2) not add too big latencies if thousands of packets sit there. |
133 | * (But if application shrinks SO_RCVBUF, we could still end up | 59 | * (But if application shrinks SO_RCVBUF, we could still end up |
134 | * freeing whole queue here) | 60 | * freeing whole queue here) |
@@ -136,7 +62,7 @@ index 2f107e4..4838b5f 100644 | |||
136 | * | 62 | * |
137 | * Return true if queue has shrunk. | 63 | * Return true if queue has shrunk. |
138 | */ | 64 | */ |
139 | @@ -4961,20 +4992,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk) | 65 | @@ -4972,20 +4973,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk) |
140 | { | 66 | { |
141 | struct tcp_sock *tp = tcp_sk(sk); | 67 | struct tcp_sock *tp = tcp_sk(sk); |
142 | struct rb_node *node, *prev; | 68 | struct rb_node *node, *prev; |
@@ -167,16 +93,5 @@ index 2f107e4..4838b5f 100644 | |||
167 | node = prev; | 93 | node = prev; |
168 | } while (node); | 94 | } while (node); |
169 | tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); | 95 | tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); |
170 | @@ -5009,6 +5046,9 @@ static int tcp_prune_queue(struct sock *sk) | ||
171 | else if (tcp_under_memory_pressure(sk)) | ||
172 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); | ||
173 | |||
174 | + if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) | ||
175 | + return 0; | ||
176 | + | ||
177 | tcp_collapse_ofo_queue(sk); | ||
178 | if (!skb_queue_empty(&sk->sk_receive_queue)) | ||
179 | tcp_collapse(sk, &sk->sk_receive_queue, NULL, | ||
180 | -- | ||
181 | 96 | ||
182 | 97 | ||