summaryrefslogtreecommitdiffstats
path: root/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch
diff options
context:
space:
mode:
Diffstat (limited to 'patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch')
-rw-r--r--patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch184
1 files changed, 0 insertions, 184 deletions
diff --git a/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch b/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch
deleted file mode 100644
index d3a1553..0000000
--- a/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch
+++ /dev/null
@@ -1,184 +0,0 @@
1From 771ee703122aa119bb662208066040f8b9356986 Mon Sep 17 00:00:00 2001
2From: Sebastien Boeuf <sebastien.boeuf@intel.com>
3Date: Mon, 23 Jan 2017 15:08:55 -0800
4Subject: [PATCH 152/154] x86: kvm: Notify host to release pages
5
6In context of hypervisors managing several virtual machines, we
7want those virtual machines to give the memory they used back to
8the host when they don't need it anymore.
9
10This patch introduces a new hypercall KVM_HC_RETURN_MEM, allowing
11the guest kernel to notify the host kernel when such event occurs.
12And relying on do_madvise() function that we have previously exported,
13it issues a call to this function when it receives the new hypercall.
14
15Use of do_madvise() with MADV_DONTNEED flag will allow the guest to
16ask for a new page without going through a new hypercall. Instead,
17it will be able to start using that memory again as it will get
18faulted back in as a fresh new page. That's why do_madvise() is more
19efficient than doing vm_unmap() to return some memory to the host.
20
21This patch introduces also a new sysctl kvm_madv_instant_free,
22allowing user to set MADV_FREE advice instead of MADV_DONTNEED.
23Indeed, MADV_FREE saves more performances than using MADV_DONTNEED
24because it does not zero the pages in case the memory has not been
25freed by the kernel. This can happen when there was no need for the
26kernel to get this memory back, meaning it was keeping those pages
27in the right state to be re-used by the same application.
28MADV_FREE being a very recent advice introduced in kernel 4.5, we
29only want to enable it through a sysctl in case the user want to
30use it.
31
32Suggested-by: Arjan van de Ven <arjan.van.de.ven@intel.com>
33Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
34
35Modified for aufs4 enabled kernel
36Signed-off-by: Martin Borg <martin.borg@enea.com>
37---
38 arch/x86/kvm/x86.c | 17 +++++++++++++++++
39 include/linux/mm.h | 5 +++++
40 include/uapi/linux/kvm_para.h | 3 +++
41 kernel/sysctl.c | 7 +++++++
42 mm/Makefile | 2 +-
43 mm/kvm.c | 26 ++++++++++++++++++++++++++
44 6 files changed, 59 insertions(+), 1 deletion(-)
45 create mode 100644 mm/kvm.c
46
47diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
48index 03869eb7fcd6..628bad46b8ad 100644
49--- a/arch/x86/kvm/x86.c
50+++ b/arch/x86/kvm/x86.c
51@@ -45,6 +45,7 @@
52 #include <linux/user-return-notifier.h>
53 #include <linux/srcu.h>
54 #include <linux/slab.h>
55+#include <linux/mm.h>
56 #include <linux/perf_event.h>
57 #include <linux/uaccess.h>
58 #include <linux/hash.h>
59@@ -6253,6 +6254,19 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
60 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
61 }
62
63+static int kvm_pv_return_mem_op(struct kvm *kvm, gpa_t gpa, size_t len)
64+{
65+ unsigned long start = gfn_to_hva(kvm, gpa_to_gfn(gpa));
66+
67+ if (len > KVM_MAX_RET_MEM_SIZE)
68+ return KVM_EPERM;
69+
70+ if (kvm_is_error_hva(start + len))
71+ return KVM_EFAULT;
72+
73+ return do_madvise(start, len, kvm_ret_mem_advice);
74+}
75+
76 void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
77 {
78 vcpu->arch.apicv_active = false;
79@@ -6304,6 +6318,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
80 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
81 break;
82 #endif
83+ case KVM_HC_RETURN_MEM:
84+ ret = kvm_pv_return_mem_op(vcpu->kvm, a0, a1);
85+ break;
86 default:
87 ret = -KVM_ENOSYS;
88 break;
89diff --git a/include/linux/mm.h b/include/linux/mm.h
90index c3153e9ee7ea..15e02bf3a6b3 100644
91--- a/include/linux/mm.h
92+++ b/include/linux/mm.h
93@@ -2452,6 +2452,11 @@ extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm);
94 extern int sysctl_drop_caches;
95 int drop_caches_sysctl_handler(struct ctl_table *, int,
96 void __user *, size_t *, loff_t *);
97+extern int sysctl_kvm_madv_instant_free;
98+extern int kvm_ret_mem_advice;
99+int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write,
100+ void __user *buffer, size_t *length,
101+ loff_t *ppos);
102 #endif
103
104 void drop_slab(void);
105diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
106index dcf629dd2889..85f9422fe59c 100644
107--- a/include/uapi/linux/kvm_para.h
108+++ b/include/uapi/linux/kvm_para.h
109@@ -26,6 +26,9 @@
110 #define KVM_HC_MIPS_EXIT_VM 7
111 #define KVM_HC_MIPS_CONSOLE_OUTPUT 8
112 #define KVM_HC_CLOCK_PAIRING 9
113+#define KVM_HC_RETURN_MEM 10
114+
115+#define KVM_MAX_RET_MEM_SIZE (1 << 22) // 4MiB
116
117 /*
118 * hypercalls use architecture specific
119diff --git a/kernel/sysctl.c b/kernel/sysctl.c
120index d9c31bc2eaea..9a1611f92a2a 100644
121--- a/kernel/sysctl.c
122+++ b/kernel/sysctl.c
123@@ -1410,6 +1410,13 @@ static struct ctl_table vm_table[] = {
124 .extra1 = &one,
125 .extra2 = &four,
126 },
127+ {
128+ .procname = "kvm_madv_instant_free",
129+ .data = &sysctl_kvm_madv_instant_free,
130+ .maxlen = sizeof(int),
131+ .mode = 0644,
132+ .proc_handler = kvm_madv_instant_free_sysctl_handler,
133+ },
134 #ifdef CONFIG_COMPACTION
135 {
136 .procname = "compact_memory",
137diff --git a/mm/Makefile b/mm/Makefile
138index 4659b93cba43..77b145de8a55 100644
139--- a/mm/Makefile
140+++ b/mm/Makefile
141@@ -40,7 +40,7 @@ obj-y := filemap.o mempool.o oom_kill.o \
142 mm_init.o mmu_context.o percpu.o slab_common.o \
143 compaction.o vmacache.o swap_slots.o \
144 interval_tree.o list_lru.o workingset.o \
145- prfile.o debug.o $(mmu-y)
146+ prfile.o debug.o kvm.o $(mmu-y)
147
148 obj-y += init-mm.o
149
150diff --git a/mm/kvm.c b/mm/kvm.c
151new file mode 100644
152index 000000000000..1c5600788221
153--- /dev/null
154+++ b/mm/kvm.c
155@@ -0,0 +1,26 @@
156+#include <linux/mman.h>
157+#include <linux/sysctl.h>
158+
159+int sysctl_kvm_madv_instant_free;
160+
161+int kvm_ret_mem_advice = MADV_DONTNEED;
162+EXPORT_SYMBOL_GPL(kvm_ret_mem_advice);
163+
164+int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write,
165+ void __user *buffer, size_t *length, loff_t *ppos)
166+{
167+ int ret;
168+
169+ ret = proc_dointvec(table, write, buffer, length, ppos);
170+ if (ret)
171+ return ret;
172+
173+#ifdef MADV_FREE
174+ if (sysctl_kvm_madv_instant_free > 0)
175+ kvm_ret_mem_advice = MADV_FREE;
176+ else
177+ kvm_ret_mem_advice = MADV_DONTNEED;
178+#endif
179+
180+ return 0;
181+}
182--
1832.15.0
184