diff options
Diffstat (limited to 'patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch')
-rw-r--r-- | patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch | 184 |
1 files changed, 0 insertions, 184 deletions
diff --git a/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch b/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch deleted file mode 100644 index d3a1553..0000000 --- a/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch +++ /dev/null | |||
@@ -1,184 +0,0 @@ | |||
1 | From 771ee703122aa119bb662208066040f8b9356986 Mon Sep 17 00:00:00 2001 | ||
2 | From: Sebastien Boeuf <sebastien.boeuf@intel.com> | ||
3 | Date: Mon, 23 Jan 2017 15:08:55 -0800 | ||
4 | Subject: [PATCH 152/154] x86: kvm: Notify host to release pages | ||
5 | |||
6 | In context of hypervisors managing several virtual machines, we | ||
7 | want those virtual machines to give the memory they used back to | ||
8 | the host when they don't need it anymore. | ||
9 | |||
10 | This patch introduces a new hypercall KVM_HC_RETURN_MEM, allowing | ||
11 | the guest kernel to notify the host kernel when such event occurs. | ||
12 | And relying on do_madvise() function that we have previously exported, | ||
13 | it issues a call to this function when it receives the new hypercall. | ||
14 | |||
15 | Use of do_madvise() with MADV_DONTNEED flag will allow the guest to | ||
16 | ask for a new page without going through a new hypercall. Instead, | ||
17 | it will be able to start using that memory again as it will get | ||
18 | faulted back in as a fresh new page. That's why do_madvise() is more | ||
19 | efficient than doing vm_unmap() to return some memory to the host. | ||
20 | |||
21 | This patch introduces also a new sysctl kvm_madv_instant_free, | ||
22 | allowing user to set MADV_FREE advice instead of MADV_DONTNEED. | ||
23 | Indeed, MADV_FREE saves more performances than using MADV_DONTNEED | ||
24 | because it does not zero the pages in case the memory has not been | ||
25 | freed by the kernel. This can happen when there was no need for the | ||
26 | kernel to get this memory back, meaning it was keeping those pages | ||
27 | in the right state to be re-used by the same application. | ||
28 | MADV_FREE being a very recent advice introduced in kernel 4.5, we | ||
29 | only want to enable it through a sysctl in case the user want to | ||
30 | use it. | ||
31 | |||
32 | Suggested-by: Arjan van de Ven <arjan.van.de.ven@intel.com> | ||
33 | Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com> | ||
34 | |||
35 | Modified for aufs4 enabled kernel | ||
36 | Signed-off-by: Martin Borg <martin.borg@enea.com> | ||
37 | --- | ||
38 | arch/x86/kvm/x86.c | 17 +++++++++++++++++ | ||
39 | include/linux/mm.h | 5 +++++ | ||
40 | include/uapi/linux/kvm_para.h | 3 +++ | ||
41 | kernel/sysctl.c | 7 +++++++ | ||
42 | mm/Makefile | 2 +- | ||
43 | mm/kvm.c | 26 ++++++++++++++++++++++++++ | ||
44 | 6 files changed, 59 insertions(+), 1 deletion(-) | ||
45 | create mode 100644 mm/kvm.c | ||
46 | |||
47 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c | ||
48 | index 03869eb7fcd6..628bad46b8ad 100644 | ||
49 | --- a/arch/x86/kvm/x86.c | ||
50 | +++ b/arch/x86/kvm/x86.c | ||
51 | @@ -45,6 +45,7 @@ | ||
52 | #include <linux/user-return-notifier.h> | ||
53 | #include <linux/srcu.h> | ||
54 | #include <linux/slab.h> | ||
55 | +#include <linux/mm.h> | ||
56 | #include <linux/perf_event.h> | ||
57 | #include <linux/uaccess.h> | ||
58 | #include <linux/hash.h> | ||
59 | @@ -6253,6 +6254,19 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) | ||
60 | kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); | ||
61 | } | ||
62 | |||
63 | +static int kvm_pv_return_mem_op(struct kvm *kvm, gpa_t gpa, size_t len) | ||
64 | +{ | ||
65 | + unsigned long start = gfn_to_hva(kvm, gpa_to_gfn(gpa)); | ||
66 | + | ||
67 | + if (len > KVM_MAX_RET_MEM_SIZE) | ||
68 | + return KVM_EPERM; | ||
69 | + | ||
70 | + if (kvm_is_error_hva(start + len)) | ||
71 | + return KVM_EFAULT; | ||
72 | + | ||
73 | + return do_madvise(start, len, kvm_ret_mem_advice); | ||
74 | +} | ||
75 | + | ||
76 | void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) | ||
77 | { | ||
78 | vcpu->arch.apicv_active = false; | ||
79 | @@ -6304,6 +6318,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | ||
80 | ret = kvm_pv_clock_pairing(vcpu, a0, a1); | ||
81 | break; | ||
82 | #endif | ||
83 | + case KVM_HC_RETURN_MEM: | ||
84 | + ret = kvm_pv_return_mem_op(vcpu->kvm, a0, a1); | ||
85 | + break; | ||
86 | default: | ||
87 | ret = -KVM_ENOSYS; | ||
88 | break; | ||
89 | diff --git a/include/linux/mm.h b/include/linux/mm.h | ||
90 | index c3153e9ee7ea..15e02bf3a6b3 100644 | ||
91 | --- a/include/linux/mm.h | ||
92 | +++ b/include/linux/mm.h | ||
93 | @@ -2452,6 +2452,11 @@ extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm); | ||
94 | extern int sysctl_drop_caches; | ||
95 | int drop_caches_sysctl_handler(struct ctl_table *, int, | ||
96 | void __user *, size_t *, loff_t *); | ||
97 | +extern int sysctl_kvm_madv_instant_free; | ||
98 | +extern int kvm_ret_mem_advice; | ||
99 | +int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write, | ||
100 | + void __user *buffer, size_t *length, | ||
101 | + loff_t *ppos); | ||
102 | #endif | ||
103 | |||
104 | void drop_slab(void); | ||
105 | diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h | ||
106 | index dcf629dd2889..85f9422fe59c 100644 | ||
107 | --- a/include/uapi/linux/kvm_para.h | ||
108 | +++ b/include/uapi/linux/kvm_para.h | ||
109 | @@ -26,6 +26,9 @@ | ||
110 | #define KVM_HC_MIPS_EXIT_VM 7 | ||
111 | #define KVM_HC_MIPS_CONSOLE_OUTPUT 8 | ||
112 | #define KVM_HC_CLOCK_PAIRING 9 | ||
113 | +#define KVM_HC_RETURN_MEM 10 | ||
114 | + | ||
115 | +#define KVM_MAX_RET_MEM_SIZE (1 << 22) // 4MiB | ||
116 | |||
117 | /* | ||
118 | * hypercalls use architecture specific | ||
119 | diff --git a/kernel/sysctl.c b/kernel/sysctl.c | ||
120 | index d9c31bc2eaea..9a1611f92a2a 100644 | ||
121 | --- a/kernel/sysctl.c | ||
122 | +++ b/kernel/sysctl.c | ||
123 | @@ -1410,6 +1410,13 @@ static struct ctl_table vm_table[] = { | ||
124 | .extra1 = &one, | ||
125 | .extra2 = &four, | ||
126 | }, | ||
127 | + { | ||
128 | + .procname = "kvm_madv_instant_free", | ||
129 | + .data = &sysctl_kvm_madv_instant_free, | ||
130 | + .maxlen = sizeof(int), | ||
131 | + .mode = 0644, | ||
132 | + .proc_handler = kvm_madv_instant_free_sysctl_handler, | ||
133 | + }, | ||
134 | #ifdef CONFIG_COMPACTION | ||
135 | { | ||
136 | .procname = "compact_memory", | ||
137 | diff --git a/mm/Makefile b/mm/Makefile | ||
138 | index 4659b93cba43..77b145de8a55 100644 | ||
139 | --- a/mm/Makefile | ||
140 | +++ b/mm/Makefile | ||
141 | @@ -40,7 +40,7 @@ obj-y := filemap.o mempool.o oom_kill.o \ | ||
142 | mm_init.o mmu_context.o percpu.o slab_common.o \ | ||
143 | compaction.o vmacache.o swap_slots.o \ | ||
144 | interval_tree.o list_lru.o workingset.o \ | ||
145 | - prfile.o debug.o $(mmu-y) | ||
146 | + prfile.o debug.o kvm.o $(mmu-y) | ||
147 | |||
148 | obj-y += init-mm.o | ||
149 | |||
150 | diff --git a/mm/kvm.c b/mm/kvm.c | ||
151 | new file mode 100644 | ||
152 | index 000000000000..1c5600788221 | ||
153 | --- /dev/null | ||
154 | +++ b/mm/kvm.c | ||
155 | @@ -0,0 +1,26 @@ | ||
156 | +#include <linux/mman.h> | ||
157 | +#include <linux/sysctl.h> | ||
158 | + | ||
159 | +int sysctl_kvm_madv_instant_free; | ||
160 | + | ||
161 | +int kvm_ret_mem_advice = MADV_DONTNEED; | ||
162 | +EXPORT_SYMBOL_GPL(kvm_ret_mem_advice); | ||
163 | + | ||
164 | +int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write, | ||
165 | + void __user *buffer, size_t *length, loff_t *ppos) | ||
166 | +{ | ||
167 | + int ret; | ||
168 | + | ||
169 | + ret = proc_dointvec(table, write, buffer, length, ppos); | ||
170 | + if (ret) | ||
171 | + return ret; | ||
172 | + | ||
173 | +#ifdef MADV_FREE | ||
174 | + if (sysctl_kvm_madv_instant_free > 0) | ||
175 | + kvm_ret_mem_advice = MADV_FREE; | ||
176 | + else | ||
177 | + kvm_ret_mem_advice = MADV_DONTNEED; | ||
178 | +#endif | ||
179 | + | ||
180 | + return 0; | ||
181 | +} | ||
182 | -- | ||
183 | 2.15.0 | ||
184 | |||