diff options
author | Andreas Wellving <andreas.wellving@enea.com> | 2018-10-22 16:17:43 +0200 |
---|---|---|
committer | Andreas Wellving <Andreas.Wellving@enea.com> | 2018-10-23 10:45:18 +0200 |
commit | bca16600c0a6e7fa6ebc3abd076c06d54d34c1dc (patch) | |
tree | f2e2482232343f852a5528c8837d803629d221cd | |
parent | d6a8203b07080c46a47121b450b0e45f65e02bd4 (diff) | |
download | enea-kernel-cache-bca16600c0a6e7fa6ebc3abd076c06d54d34c1dc.tar.gz |
Modify CVE-2018-5390, CVE-2018-10876 and CVE-2018-10879
Correct CVE-2018-5390 according to kernel version, remove CVE-2018-10876
and CVE-2018-10879.
CVE-2018-5390 References:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-4.9.y&id=2d08921c8da26bdce3d8848ef6f32068f594d7d4
Change-Id: I83d497af94f3e521cece3ab7d19a20e5d0157421
Signed-off-by: Andreas Wellving <andreas.wellving@enea.com>
4 files changed, 39 insertions, 344 deletions
diff --git a/patches/cve/4.9.x.scc b/patches/cve/4.9.x.scc index 5fe1ee7..cf11548 100644 --- a/patches/cve/4.9.x.scc +++ b/patches/cve/4.9.x.scc | |||
@@ -27,10 +27,8 @@ patch CVE-2017-18255-perf-core-Fix-the-perf_cpu_time_max_percent-check.patch | |||
27 | patch CVE-2018-9518-NFC-llcp-Limit-size-of-SDP-URI.patch | 27 | patch CVE-2018-9518-NFC-llcp-Limit-size-of-SDP-URI.patch |
28 | 28 | ||
29 | #CVEs fixed in 4.9.112: | 29 | #CVEs fixed in 4.9.112: |
30 | patch CVE-2018-10876-ext4-only-look-at-the-bg_flags-field-if-it-is-valid.patch | ||
31 | patch CVE-2018-10877-ext4-verify-the-depth-of-extent-tree-in-ext4_find_ex.patch | 30 | patch CVE-2018-10877-ext4-verify-the-depth-of-extent-tree-in-ext4_find_ex.patch |
32 | patch CVE-2018-10878-ext4-always-check-block-group-bounds-in-ext4_init_bl.patch | 31 | patch CVE-2018-10878-ext4-always-check-block-group-bounds-in-ext4_init_bl.patch |
33 | patch CVE-2018-10879-ext4-make-sure-bitmaps-and-the-inode-table-don-t-ove.patch | ||
34 | patch CVE-2018-10881-ext4-clear-i_data-in-ext4_inode_info-when-removing-i.patch | 32 | patch CVE-2018-10881-ext4-clear-i_data-in-ext4_inode_info-when-removing-i.patch |
35 | patch CVE-2018-10882-ext4-add-more-inode-number-paranoia-checks.patch | 33 | patch CVE-2018-10882-ext4-add-more-inode-number-paranoia-checks.patch |
36 | patch CVE-2018-9516-HID-debug-check-length-before-copy_to_user.patch | 34 | patch CVE-2018-9516-HID-debug-check-length-before-copy_to_user.patch |
diff --git a/patches/cve/CVE-2018-10876-ext4-only-look-at-the-bg_flags-field-if-it-is-valid.patch b/patches/cve/CVE-2018-10876-ext4-only-look-at-the-bg_flags-field-if-it-is-valid.patch deleted file mode 100644 index fe172e6..0000000 --- a/patches/cve/CVE-2018-10876-ext4-only-look-at-the-bg_flags-field-if-it-is-valid.patch +++ /dev/null | |||
@@ -1,135 +0,0 @@ | |||
1 | Date: Fri, 12 Oct 2018 13:56:16 +0200 | ||
2 | Subject: [PATCH] ext4: only look at the bg_flags field if it is valid The | ||
3 | bg_flags field in the block group descripts is only valid if the uninit_bg or | ||
4 | metadata_csum feature is enabled. We were not consistently looking at this | ||
5 | field; fix this. | ||
6 | |||
7 | Also block group #0 must never have uninitialized allocation bitmaps, | ||
8 | or need to be zeroed, since that's where the root inode, and other | ||
9 | special inodes are set up. Check for these conditions and mark the | ||
10 | file system as corrupted if they are detected. | ||
11 | |||
12 | This addresses CVE-2018-10876. | ||
13 | Upstream-Status: Backport | ||
14 | |||
15 | https://bugzilla.kernel.org/show_bug.cgi?id=199403 | ||
16 | |||
17 | Signed-off-by: Theodore Ts'o <tytso@mit.edu> | ||
18 | Cc: stable@kernel.org | ||
19 | Signed-off-by: Andreas Wellving <andreas.wellving@enea.com> | ||
20 | --- | ||
21 | fs/ext4/balloc.c | 11 ++++++++++- | ||
22 | fs/ext4/ialloc.c | 14 ++++++++++++-- | ||
23 | fs/ext4/mballoc.c | 6 ++++-- | ||
24 | fs/ext4/super.c | 11 ++++++++++- | ||
25 | 4 files changed, 36 insertions(+), 6 deletions(-) | ||
26 | |||
27 | diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c | ||
28 | index e04ec86..10e18f7 100644 | ||
29 | --- a/fs/ext4/balloc.c | ||
30 | +++ b/fs/ext4/balloc.c | ||
31 | @@ -443,7 +443,16 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) | ||
32 | goto verify; | ||
33 | } | ||
34 | ext4_lock_group(sb, block_group); | ||
35 | - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
36 | + if (ext4_has_group_desc_csum(sb) && | ||
37 | + (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { | ||
38 | + if (block_group == 0) { | ||
39 | + ext4_unlock_group(sb, block_group); | ||
40 | + unlock_buffer(bh); | ||
41 | + ext4_error(sb, "Block bitmap for bg 0 marked " | ||
42 | + "uninitialized"); | ||
43 | + err = -EFSCORRUPTED; | ||
44 | + goto out; | ||
45 | + } | ||
46 | err = ext4_init_block_bitmap(sb, bh, block_group, desc); | ||
47 | set_bitmap_uptodate(bh); | ||
48 | set_buffer_uptodate(bh); | ||
49 | diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c | ||
50 | index 2d94e85..e937aad 100644 | ||
51 | --- a/fs/ext4/ialloc.c | ||
52 | +++ b/fs/ext4/ialloc.c | ||
53 | @@ -183,7 +183,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | ||
54 | } | ||
55 | |||
56 | ext4_lock_group(sb, block_group); | ||
57 | - if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | ||
58 | + if (ext4_has_group_desc_csum(sb) && | ||
59 | + (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) { | ||
60 | + if (block_group == 0) { | ||
61 | + ext4_unlock_group(sb, block_group); | ||
62 | + unlock_buffer(bh); | ||
63 | + ext4_error(sb, "Inode bitmap for bg 0 marked " | ||
64 | + "uninitialized"); | ||
65 | + err = -EFSCORRUPTED; | ||
66 | + goto out; | ||
67 | + } | ||
68 | err = ext4_init_inode_bitmap(sb, bh, block_group, desc); | ||
69 | set_bitmap_uptodate(bh); | ||
70 | set_buffer_uptodate(bh); | ||
71 | @@ -960,7 +969,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, | ||
72 | |||
73 | /* recheck and clear flag under lock if we still need to */ | ||
74 | ext4_lock_group(sb, group); | ||
75 | - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
76 | + if (ext4_has_group_desc_csum(sb) && | ||
77 | + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { | ||
78 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | ||
79 | ext4_free_group_clusters_set(sb, gdp, | ||
80 | ext4_free_clusters_after_init(sb, group, gdp)); | ||
81 | diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c | ||
82 | index 64056c6..4869946 100644 | ||
83 | --- a/fs/ext4/mballoc.c | ||
84 | +++ b/fs/ext4/mballoc.c | ||
85 | @@ -2444,7 +2444,8 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | ||
86 | * initialize bb_free to be able to skip | ||
87 | * empty groups without initialization | ||
88 | */ | ||
89 | - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
90 | + if (ext4_has_group_desc_csum(sb) && | ||
91 | + (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { | ||
92 | meta_group_info[i]->bb_free = | ||
93 | ext4_free_clusters_after_init(sb, group, desc); | ||
94 | } else { | ||
95 | @@ -2969,7 +2970,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | ||
96 | #endif | ||
97 | ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, | ||
98 | ac->ac_b_ex.fe_len); | ||
99 | - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
100 | + if (ext4_has_group_desc_csum(sb) && | ||
101 | + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { | ||
102 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | ||
103 | ext4_free_group_clusters_set(sb, gdp, | ||
104 | ext4_free_clusters_after_init(sb, | ||
105 | diff --git a/fs/ext4/super.c b/fs/ext4/super.c | ||
106 | index 1f58179..97d322b 100644 | ||
107 | --- a/fs/ext4/super.c | ||
108 | +++ b/fs/ext4/super.c | ||
109 | @@ -2991,13 +2991,22 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) | ||
110 | ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; | ||
111 | struct ext4_group_desc *gdp = NULL; | ||
112 | |||
113 | + if (!ext4_has_group_desc_csum(sb)) | ||
114 | + return ngroups; | ||
115 | + | ||
116 | for (group = 0; group < ngroups; group++) { | ||
117 | gdp = ext4_get_group_desc(sb, group, NULL); | ||
118 | if (!gdp) | ||
119 | continue; | ||
120 | |||
121 | - if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) | ||
122 | + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) | ||
123 | + continue; | ||
124 | + if (group != 0) | ||
125 | break; | ||
126 | + ext4_error(sb, "Inode table for bg 0 marked as " | ||
127 | + "needing zeroing"); | ||
128 | + if (sb_rdonly(sb)) | ||
129 | + return ngroups; | ||
130 | } | ||
131 | |||
132 | return group; | ||
133 | -- | ||
134 | |||
135 | |||
diff --git a/patches/cve/CVE-2018-10879-ext4-make-sure-bitmaps-and-the-inode-table-don-t-ove.patch b/patches/cve/CVE-2018-10879-ext4-make-sure-bitmaps-and-the-inode-table-don-t-ove.patch deleted file mode 100644 index 439c9e9..0000000 --- a/patches/cve/CVE-2018-10879-ext4-make-sure-bitmaps-and-the-inode-table-don-t-ove.patch +++ /dev/null | |||
@@ -1,83 +0,0 @@ | |||
1 | From 77260807d1170a8cf35dbb06e07461a655f67eee Mon Sep 17 00:00:00 2001 | ||
2 | From: Theodore Ts'o <tytso@mit.edu> | ||
3 | Date: Wed, 13 Jun 2018 23:08:26 -0400 | ||
4 | Subject: [PATCH] ext4: make sure bitmaps and the inode table don't overlap | ||
5 | with bg descriptors | ||
6 | |||
7 | It's really bad when the allocation bitmaps and the inode table | ||
8 | overlap with the block group descriptors, since it causes random | ||
9 | corruption of the bg descriptors. So we really want to head those off | ||
10 | at the pass. | ||
11 | |||
12 | https://bugzilla.kernel.org/show_bug.cgi?id=199865 | ||
13 | |||
14 | CVE: CVE-2018-10879 | ||
15 | Upstream-Status: Backport | ||
16 | |||
17 | Signed-off-by: Theodore Ts'o <tytso@mit.edu> | ||
18 | Cc: stable@kernel.org | ||
19 | Signed-off-by: Andreas Wellving <andreas.wellving@enea.com> | ||
20 | --- | ||
21 | fs/ext4/super.c | 25 +++++++++++++++++++++++++ | ||
22 | 1 file changed, 25 insertions(+) | ||
23 | |||
24 | diff --git a/fs/ext4/super.c b/fs/ext4/super.c | ||
25 | index c8b7b83..c61675d 100644 | ||
26 | --- a/fs/ext4/super.c | ||
27 | +++ b/fs/ext4/super.c | ||
28 | @@ -2348,6 +2348,7 @@ static int ext4_check_descriptors(struct super_block *sb, | ||
29 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
30 | ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); | ||
31 | ext4_fsblk_t last_block; | ||
32 | + ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0) + 1; | ||
33 | ext4_fsblk_t block_bitmap; | ||
34 | ext4_fsblk_t inode_bitmap; | ||
35 | ext4_fsblk_t inode_table; | ||
36 | @@ -2380,6 +2381,14 @@ static int ext4_check_descriptors(struct super_block *sb, | ||
37 | if (!sb_rdonly(sb)) | ||
38 | return 0; | ||
39 | } | ||
40 | + if (block_bitmap >= sb_block + 1 && | ||
41 | + block_bitmap <= last_bg_block) { | ||
42 | + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | ||
43 | + "Block bitmap for group %u overlaps " | ||
44 | + "block group descriptors", i); | ||
45 | + if (!sb_rdonly(sb)) | ||
46 | + return 0; | ||
47 | + } | ||
48 | if (block_bitmap < first_block || block_bitmap > last_block) { | ||
49 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | ||
50 | "Block bitmap for group %u not in group " | ||
51 | @@ -2394,6 +2403,14 @@ static int ext4_check_descriptors(struct super_block *sb, | ||
52 | if (!sb_rdonly(sb)) | ||
53 | return 0; | ||
54 | } | ||
55 | + if (inode_bitmap >= sb_block + 1 && | ||
56 | + inode_bitmap <= last_bg_block) { | ||
57 | + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | ||
58 | + "Inode bitmap for group %u overlaps " | ||
59 | + "block group descriptors", i); | ||
60 | + if (!sb_rdonly(sb)) | ||
61 | + return 0; | ||
62 | + } | ||
63 | if (inode_bitmap < first_block || inode_bitmap > last_block) { | ||
64 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | ||
65 | "Inode bitmap for group %u not in group " | ||
66 | @@ -2408,6 +2425,14 @@ static int ext4_check_descriptors(struct super_block *sb, | ||
67 | if (!sb_rdonly(sb)) | ||
68 | return 0; | ||
69 | } | ||
70 | + if (inode_table >= sb_block + 1 && | ||
71 | + inode_table <= last_bg_block) { | ||
72 | + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | ||
73 | + "Inode table for group %u overlaps " | ||
74 | + "block group descriptors", i); | ||
75 | + if (!sb_rdonly(sb)) | ||
76 | + return 0; | ||
77 | + } | ||
78 | if (inode_table < first_block || | ||
79 | inode_table + sbi->s_itb_per_group - 1 > last_block) { | ||
80 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | ||
81 | -- | ||
82 | 2.7.4 | ||
83 | |||
diff --git a/patches/cve/CVE-2018-5390-tcp-free-batches-of-packets-in-tcp_prune_ofo_queue.patch b/patches/cve/CVE-2018-5390-tcp-free-batches-of-packets-in-tcp_prune_ofo_queue.patch index b8f95d0..35d1eec 100644 --- a/patches/cve/CVE-2018-5390-tcp-free-batches-of-packets-in-tcp_prune_ofo_queue.patch +++ b/patches/cve/CVE-2018-5390-tcp-free-batches-of-packets-in-tcp_prune_ofo_queue.patch | |||
@@ -1,134 +1,60 @@ | |||
1 | Date: Mon, 15 Oct 2018 06:07:41 +0200 | 1 | From 2d08921c8da26bdce3d8848ef6f32068f594d7d4 Mon Sep 17 00:00:00 2001 |
2 | Subject: [PATCH] Merge branch 'tcp-robust-ooo' | 2 | From: Eric Dumazet <edumazet@google.com> |
3 | Date: Mon, 23 Jul 2018 09:28:17 -0700 | ||
4 | Subject: [PATCH] tcp: free batches of packets in tcp_prune_ofo_queue() | ||
3 | 5 | ||
4 | Eric Dumazet says: | 6 | [ Upstream commit 72cd43ba64fc172a443410ce01645895850844c8 ] |
5 | 7 | ||
6 | ==================== | ||
7 | Juha-Matti Tilli reported that malicious peers could inject tiny | 8 | Juha-Matti Tilli reported that malicious peers could inject tiny |
8 | packets in out_of_order_queue, forcing very expensive calls | 9 | packets in out_of_order_queue, forcing very expensive calls |
9 | to tcp_collapse_ofo_queue() and tcp_prune_ofo_queue() for | 10 | to tcp_collapse_ofo_queue() and tcp_prune_ofo_queue() for |
10 | every incoming packet. | 11 | every incoming packet. out_of_order_queue rb-tree can contain |
12 | thousands of nodes, iterating over all of them is not nice. | ||
11 | 13 | ||
12 | With tcp_rmem[2] default of 6MB, the ooo queue could | 14 | Before linux-4.9, we would have pruned all packets in ofo_queue |
13 | contain ~7000 nodes. | 15 | in one go, every XXXX packets. XXXX depends on sk_rcvbuf and skbs |
16 | truesize, but is about 7000 packets with tcp_rmem[2] default of 6 MB. | ||
14 | 17 | ||
15 | This patch series makes sure we cut cpu cycles enough to | 18 | Since we plan to increase tcp_rmem[2] in the future to cope with |
16 | render the attack not critical. | 19 | modern BDP, can not revert to the old behavior, without great pain. |
17 | 20 | ||
18 | We might in the future go further, like disconnecting | 21 | Strategy taken in this patch is to purge ~12.5 % of the queue capacity. |
19 | or black-holing proven malicious flows. | 22 | |
20 | ==================== | 23 | Fixes: 36a6503fedda ("tcp: refine tcp_prune_ofo_queue() to not drop all packets") |
21 | 24 | ||
22 | CVE: CVE-2018-5390 | 25 | CVE: CVE-2018-5390 |
23 | Upstream-Status: Backport | 26 | Upstream-Status: Backport |
24 | 27 | ||
28 | Signed-off-by: Eric Dumazet <edumazet@google.com> | ||
29 | Reported-by: Juha-Matti Tilli <juha-matti.tilli@iki.fi> | ||
30 | Acked-by: Yuchung Cheng <ycheng@google.com> | ||
31 | Acked-by: Soheil Hassas Yeganeh <soheil@google.com> | ||
25 | Signed-off-by: David S. Miller <davem@davemloft.net> | 32 | Signed-off-by: David S. Miller <davem@davemloft.net> |
33 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | ||
26 | Signed-off-by: Andreas Wellving <andreas.wellving@enea.com> | 34 | Signed-off-by: Andreas Wellving <andreas.wellving@enea.com> |
27 | --- | 35 | --- |
28 | net/ipv4/tcp_input.c | 60 +++++++++++++++++++++++++++++++++++++++++++--------- | 36 | include/linux/skbuff.h | 2 ++ |
29 | 1 file changed, 50 insertions(+), 10 deletions(-) | 37 | net/ipv4/tcp_input.c | 15 +++++++++++---- |
38 | 2 files changed, 13 insertions(+), 4 deletions(-) | ||
30 | 39 | ||
31 | diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c | 40 | diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h |
32 | index 2f107e4..4838b5f 100644 | 41 | index b048d3d..1f207dd 100644 |
33 | --- a/net/ipv4/tcp_input.c | 42 | --- a/include/linux/skbuff.h |
34 | +++ b/net/ipv4/tcp_input.c | 43 | +++ b/include/linux/skbuff.h |
35 | @@ -4355,6 +4355,23 @@ static bool tcp_try_coalesce(struct sock *sk, | 44 | @@ -2982,6 +2982,8 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) |
36 | return true; | 45 | return __skb_grow(skb, len); |
37 | } | 46 | } |
38 | 47 | ||
39 | +static bool tcp_ooo_try_coalesce(struct sock *sk, | 48 | +#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode) |
40 | + struct sk_buff *to, | ||
41 | + struct sk_buff *from, | ||
42 | + bool *fragstolen) | ||
43 | +{ | ||
44 | + bool res = tcp_try_coalesce(sk, to, from, fragstolen); | ||
45 | + | 49 | + |
46 | + /* In case tcp_drop() is called later, update to->gso_segs */ | 50 | #define skb_queue_walk(queue, skb) \ |
47 | + if (res) { | 51 | for (skb = (queue)->next; \ |
48 | + u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) + | 52 | skb != (struct sk_buff *)(queue); \ |
49 | + max_t(u16, 1, skb_shinfo(from)->gso_segs); | 53 | diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c |
50 | + | 54 | index 71f2b09..2eabf21 100644 |
51 | + skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF); | 55 | --- a/net/ipv4/tcp_input.c |
52 | + } | 56 | +++ b/net/ipv4/tcp_input.c |
53 | + return res; | 57 | @@ -4965,6 +4965,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) |
54 | +} | ||
55 | + | ||
56 | static void tcp_drop(struct sock *sk, struct sk_buff *skb) | ||
57 | { | ||
58 | sk_drops_add(sk, skb); | ||
59 | @@ -4478,7 +4495,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | ||
60 | /* In the typical case, we are adding an skb to the end of the list. | ||
61 | * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. | ||
62 | */ | ||
63 | - if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { | ||
64 | + if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb, | ||
65 | + skb, &fragstolen)) { | ||
66 | coalesce_done: | ||
67 | tcp_grow_window(sk, skb); | ||
68 | kfree_skb_partial(skb, fragstolen); | ||
69 | @@ -4506,7 +4524,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | ||
70 | /* All the bits are present. Drop. */ | ||
71 | NET_INC_STATS(sock_net(sk), | ||
72 | LINUX_MIB_TCPOFOMERGE); | ||
73 | - __kfree_skb(skb); | ||
74 | + tcp_drop(sk, skb); | ||
75 | skb = NULL; | ||
76 | tcp_dsack_set(sk, seq, end_seq); | ||
77 | goto add_sack; | ||
78 | @@ -4525,10 +4543,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | ||
79 | TCP_SKB_CB(skb1)->end_seq); | ||
80 | NET_INC_STATS(sock_net(sk), | ||
81 | LINUX_MIB_TCPOFOMERGE); | ||
82 | - __kfree_skb(skb1); | ||
83 | + tcp_drop(sk, skb1); | ||
84 | goto merge_right; | ||
85 | } | ||
86 | - } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { | ||
87 | + } else if (tcp_ooo_try_coalesce(sk, skb1, | ||
88 | + skb, &fragstolen)) { | ||
89 | goto coalesce_done; | ||
90 | } | ||
91 | p = &parent->rb_right; | ||
92 | @@ -4907,6 +4926,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, | ||
93 | static void tcp_collapse_ofo_queue(struct sock *sk) | ||
94 | { | ||
95 | struct tcp_sock *tp = tcp_sk(sk); | ||
96 | + u32 range_truesize, sum_tiny = 0; | ||
97 | struct sk_buff *skb, *head; | ||
98 | struct rb_node *p; | ||
99 | u32 start, end; | ||
100 | @@ -4925,6 +4945,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) | ||
101 | } | ||
102 | start = TCP_SKB_CB(skb)->seq; | ||
103 | end = TCP_SKB_CB(skb)->end_seq; | ||
104 | + range_truesize = skb->truesize; | ||
105 | |||
106 | for (head = skb;;) { | ||
107 | skb = tcp_skb_next(skb, NULL); | ||
108 | @@ -4935,11 +4956,20 @@ static void tcp_collapse_ofo_queue(struct sock *sk) | ||
109 | if (!skb || | ||
110 | after(TCP_SKB_CB(skb)->seq, end) || | ||
111 | before(TCP_SKB_CB(skb)->end_seq, start)) { | ||
112 | - tcp_collapse(sk, NULL, &tp->out_of_order_queue, | ||
113 | - head, skb, start, end); | ||
114 | + /* Do not attempt collapsing tiny skbs */ | ||
115 | + if (range_truesize != head->truesize || | ||
116 | + end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) { | ||
117 | + tcp_collapse(sk, NULL, &tp->out_of_order_queue, | ||
118 | + head, skb, start, end); | ||
119 | + } else { | ||
120 | + sum_tiny += range_truesize; | ||
121 | + if (sum_tiny > sk->sk_rcvbuf >> 3) | ||
122 | + return; | ||
123 | + } | ||
124 | goto new_range; | ||
125 | } | ||
126 | |||
127 | + range_truesize += skb->truesize; | ||
128 | if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) | ||
129 | start = TCP_SKB_CB(skb)->seq; | ||
130 | if (after(TCP_SKB_CB(skb)->end_seq, end)) | ||
131 | @@ -4954,6 +4984,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) | ||
132 | * 2) not add too big latencies if thousands of packets sit there. | 58 | * 2) not add too big latencies if thousands of packets sit there. |
133 | * (But if application shrinks SO_RCVBUF, we could still end up | 59 | * (But if application shrinks SO_RCVBUF, we could still end up |
134 | * freeing whole queue here) | 60 | * freeing whole queue here) |
@@ -136,7 +62,7 @@ index 2f107e4..4838b5f 100644 | |||
136 | * | 62 | * |
137 | * Return true if queue has shrunk. | 63 | * Return true if queue has shrunk. |
138 | */ | 64 | */ |
139 | @@ -4961,20 +4992,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk) | 65 | @@ -4972,20 +4973,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk) |
140 | { | 66 | { |
141 | struct tcp_sock *tp = tcp_sk(sk); | 67 | struct tcp_sock *tp = tcp_sk(sk); |
142 | struct rb_node *node, *prev; | 68 | struct rb_node *node, *prev; |
@@ -167,16 +93,5 @@ index 2f107e4..4838b5f 100644 | |||
167 | node = prev; | 93 | node = prev; |
168 | } while (node); | 94 | } while (node); |
169 | tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); | 95 | tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); |
170 | @@ -5009,6 +5046,9 @@ static int tcp_prune_queue(struct sock *sk) | ||
171 | else if (tcp_under_memory_pressure(sk)) | ||
172 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); | ||
173 | |||
174 | + if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) | ||
175 | + return 0; | ||
176 | + | ||
177 | tcp_collapse_ofo_queue(sk); | ||
178 | if (!skb_queue_empty(&sk->sk_receive_queue)) | ||
179 | tcp_collapse(sk, &sk->sk_receive_queue, NULL, | ||
180 | -- | ||
181 | 96 | ||
182 | 97 | ||