From ad4d4e5cbad3ad875eaa59e0a919c014dfb6a39e Mon Sep 17 00:00:00 2001 From: Adrian Calianu Date: Wed, 27 Sep 2017 10:37:36 +0200 Subject: add guest features from yocto branch Signed-off-by: Adrian Calianu --- ...0152-x86-kvm-Notify-host-to-release-pages.patch | 180 +++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 patches/boot_time_opt_guest/0152-x86-kvm-Notify-host-to-release-pages.patch (limited to 'patches/boot_time_opt_guest/0152-x86-kvm-Notify-host-to-release-pages.patch') diff --git a/patches/boot_time_opt_guest/0152-x86-kvm-Notify-host-to-release-pages.patch b/patches/boot_time_opt_guest/0152-x86-kvm-Notify-host-to-release-pages.patch new file mode 100644 index 0000000..ff9d8c0 --- /dev/null +++ b/patches/boot_time_opt_guest/0152-x86-kvm-Notify-host-to-release-pages.patch @@ -0,0 +1,180 @@ +From d28921b5f797829e4e676f7968ae688ef96b7992 Mon Sep 17 00:00:00 2001 +From: Sebastien Boeuf +Date: Mon, 23 Jan 2017 15:08:55 -0800 +Subject: [PATCH 152/154] x86: kvm: Notify host to release pages + +In context of hypervisors managing several virtual machines, we +want those virtual machines to give the memory they used back to +the host when they don't need it anymore. + +This patch introduces a new hypercall KVM_HC_RETURN_MEM, allowing +the guest kernel to notify the host kernel when such event occurs. +And relying on do_madvise() function that we have previously exported, +it issues a call to this function when it receives the new hypercall. + +Use of do_madvise() with MADV_DONTNEED flag will allow the guest to +ask for a new page without going through a new hypercall. Instead, +it will be able to start using that memory again as it will get +faulted back in as a fresh new page. That's why do_madvise() is more +efficient than doing vm_unmap() to return some memory to the host. + +This patch introduces also a new sysctl kvm_madv_instant_free, +allowing user to set MADV_FREE advice instead of MADV_DONTNEED. +Indeed, MADV_FREE saves more performances than using MADV_DONTNEED +because it does not zero the pages in case the memory has not been +freed by the kernel. This can happen when there was no need for the +kernel to get this memory back, meaning it was keeping those pages +in the right state to be re-used by the same application. +MADV_FREE being a very recent advice introduced in kernel 4.5, we +only want to enable it through a sysctl in case the user want to +use it. + +Suggested-by: Arjan van de Ven +Signed-off-by: Sebastien Boeuf +--- + arch/x86/kvm/x86.c | 17 +++++++++++++++++ + include/linux/mm.h | 5 +++++ + include/uapi/linux/kvm_para.h | 3 +++ + kernel/sysctl.c | 7 +++++++ + mm/Makefile | 2 +- + mm/kvm.c | 25 +++++++++++++++++++++++++ + 6 files changed, 58 insertions(+), 1 deletion(-) + create mode 100644 mm/kvm.c + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 582c75311f95..683a94dd5f03 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -46,6 +46,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -6019,6 +6020,19 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) + kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); + } + ++static int kvm_pv_return_mem_op(struct kvm *kvm, gpa_t gpa, size_t len) ++{ ++ unsigned long start = gfn_to_hva(kvm, gpa_to_gfn(gpa)); ++ ++ if (len > KVM_MAX_RET_MEM_SIZE) ++ return KVM_EPERM; ++ ++ if (kvm_is_error_hva(start + len)) ++ return KVM_EFAULT; ++ ++ return do_madvise(start, len, kvm_ret_mem_advice); ++} ++ + void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) + { + vcpu->arch.apicv_active = false; +@@ -6065,6 +6079,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) + kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); + ret = 0; + break; ++ case KVM_HC_RETURN_MEM: ++ ret = kvm_pv_return_mem_op(vcpu->kvm, a0, a1); ++ break; + default: + ret = -KVM_ENOSYS; + break; +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 925ec25f99a8..833f23d98baa 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -2303,6 +2303,11 @@ extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm); + extern int sysctl_drop_caches; + int drop_caches_sysctl_handler(struct ctl_table *, int, + void __user *, size_t *, loff_t *); ++extern int sysctl_kvm_madv_instant_free; ++extern int kvm_ret_mem_advice; ++int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write, ++ void __user *buffer, size_t *length, ++ loff_t *ppos); + #endif + + void drop_slab(void); +diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h +index bf6cd7d5cac2..7d90f77d87d0 100644 +--- a/include/uapi/linux/kvm_para.h ++++ b/include/uapi/linux/kvm_para.h +@@ -23,6 +23,9 @@ + #define KVM_HC_MIPS_GET_CLOCK_FREQ 6 + #define KVM_HC_MIPS_EXIT_VM 7 + #define KVM_HC_MIPS_CONSOLE_OUTPUT 8 ++#define KVM_HC_RETURN_MEM 10 ++ ++#define KVM_MAX_RET_MEM_SIZE (1 << 22) // 4MiB + + /* + * hypercalls use architecture specific +diff --git a/kernel/sysctl.c b/kernel/sysctl.c +index c1095cdc0fe2..d8ae774fa042 100644 +--- a/kernel/sysctl.c ++++ b/kernel/sysctl.c +@@ -1398,6 +1398,13 @@ static struct ctl_table vm_table[] = { + .extra1 = &one, + .extra2 = &four, + }, ++ { ++ .procname = "kvm_madv_instant_free", ++ .data = &sysctl_kvm_madv_instant_free, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = kvm_madv_instant_free_sysctl_handler, ++ }, + #ifdef CONFIG_COMPACTION + { + .procname = "compact_memory", +diff --git a/mm/Makefile b/mm/Makefile +index 295bd7a9f76b..651ce0aff140 100644 +--- a/mm/Makefile ++++ b/mm/Makefile +@@ -37,7 +37,7 @@ obj-y := filemap.o mempool.o oom_kill.o \ + mm_init.o mmu_context.o percpu.o slab_common.o \ + compaction.o vmacache.o \ + interval_tree.o list_lru.o workingset.o \ +- prfile.o debug.o $(mmu-y) ++ prfile.o debug.o kvm.o $(mmu-y) + + obj-y += init-mm.o + +diff --git a/mm/kvm.c b/mm/kvm.c +new file mode 100644 +index 000000000000..8945f6a311b9 +--- /dev/null ++++ b/mm/kvm.c +@@ -0,0 +1,25 @@ ++#include ++ ++int sysctl_kvm_madv_instant_free; ++ ++int kvm_ret_mem_advice = MADV_DONTNEED; ++EXPORT_SYMBOL_GPL(kvm_ret_mem_advice); ++ ++int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write, ++ void __user *buffer, size_t *length, loff_t *ppos) ++{ ++ int ret; ++ ++ ret = proc_dointvec(table, write, buffer, length, ppos); ++ if (ret) ++ return ret; ++ ++#ifdef MADV_FREE ++ if (sysctl_kvm_madv_instant_free > 0) ++ kvm_ret_mem_advice = MADV_FREE; ++ else ++ kvm_ret_mem_advice = MADV_DONTNEED; ++#endif ++ ++ return 0; ++} +-- +2.12.1 + -- cgit v1.2.3-54-g00ecf