From e790d9ef6405633b007339d746b709aed43a928d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 21 Aug 2014 18:08:05 +0200 Subject: KVM: add kvm_arch_sched_in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce preempt notifiers for architecture specific code. Advantage over creating a new notifier in every arch is slightly simpler code and guaranteed call order with respect to kvm_sched_in. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a4c33b34fe3f..ebd723676633 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -624,6 +624,8 @@ void kvm_arch_exit(void); int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu); + void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 64d831269ccbca1fc6d739a0f3c8aa24afb43a5e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 19 Aug 2014 12:15:00 +0200 Subject: KVM: Introduce gfn_to_hva_memslot_prot To support read-only memory regions on arm and arm64, we have a need to resolve a gfn to an hva given a pointer to a memslot to avoid looping through the memslots twice and to reuse the hva error checking of gfn_to_hva_prot(), add a new gfn_to_hva_memslot_prot() function and refactor gfn_to_hva_prot() to use this function. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ebd723676633..6d8a658ec174 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -528,6 +528,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); +unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, + bool *writable); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); -- cgit v1.2.3 From 656473003bc7e056c3bbd4a4d9832dad01e86f76 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 29 Aug 2014 14:01:17 +0200 Subject: KVM: forward declare structs in kvm_types.h Opaque KVM structs are useful for prototypes in asm/kvm_host.h, to avoid "'struct foo' declared inside parameter list" warnings (and consequent breakage due to conflicting types). Move them from individual files to a generic place in linux/kvm_types.h. Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 6 ------ include/linux/kvm_types.h | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ebd723676633..e1cb915a1096 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -140,8 +140,6 @@ static inline bool is_error_page(struct page *page) #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -struct kvm; -struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; extern spinlock_t kvm_lock; @@ -325,8 +323,6 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; -struct kvm_irq_routing_table; - #ifndef KVM_PRIVATE_MEM_SLOTS #define KVM_PRIVATE_MEM_SLOTS 0 #endif @@ -1036,8 +1032,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) extern bool kvm_rebooting; -struct kvm_device_ops; - struct kvm_device { struct kvm_device_ops *ops; struct kvm *kvm; diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index b0bcce0ddc95..b606bb689a3e 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -17,6 +17,20 @@ #ifndef __KVM_TYPES_H__ #define __KVM_TYPES_H__ +struct kvm; +struct kvm_async_pf; +struct kvm_device_ops; +struct kvm_interrupt; +struct kvm_irq_routing_table; +struct kvm_memory_slot; +struct kvm_one_reg; +struct kvm_run; +struct kvm_userspace_memory_region; +struct kvm_vcpu; +struct kvm_vcpu_init; + +enum kvm_mr_change; + #include /* -- cgit v1.2.3 From 13a34e067eab24fec882e1834fbf2cc31911d474 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 28 Aug 2014 15:13:03 +0200 Subject: KVM: remove garbage arg to *hardware_{en,dis}able MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the beggining was on_each_cpu(), which required an unused argument to kvm_arch_ops.hardware_{en,dis}able, but this was soon forgotten. Remove unnecessary arguments that stem from this. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e1cb915a1096..e098dce179df 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -630,8 +630,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); -int kvm_arch_hardware_enable(void *garbage); -void kvm_arch_hardware_disable(void *garbage); +int kvm_arch_hardware_enable(void); +void kvm_arch_hardware_disable(void); int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); -- cgit v1.2.3 From d60eacb07053142bfb9b41582074a89a790a9d46 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Sep 2014 10:27:33 +0100 Subject: KVM: device: add simple registration mechanism for kvm_device_ops kvm_ioctl_create_device currently has knowledge of all the device types and their associated ops. This is fairly inflexible when adding support for new in-kernel device emulations, so move what we currently have out into a table, which can support dynamic registration of ops by new drivers for virtual hardware. Cc: Alex Williamson Cc: Alex Graf Cc: Gleb Natapov Cc: Paolo Bonzini Cc: Marc Zyngier Acked-by: Cornelia Huck Reviewed-by: Christoffer Dall Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e098dce179df..b6e954742951 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1064,6 +1064,7 @@ struct kvm_device_ops { void kvm_device_get(struct kvm_device *dev); void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); +int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; -- cgit v1.2.3 From c06a841bf36340e9e917ce60d11a6425ac85d0bd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Sep 2014 10:27:34 +0100 Subject: KVM: ARM: vgic: register kvm_device_ops dynamically Now that we have a dynamic means to register kvm_device_ops, use that for the ARM VGIC, instead of relying on the static table. Cc: Gleb Natapov Cc: Paolo Bonzini Acked-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b6e954742951..601d321f96e7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1069,7 +1069,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; extern struct kvm_device_ops kvm_vfio_ops; -extern struct kvm_device_ops kvm_arm_vgic_v2_ops; extern struct kvm_device_ops kvm_flic_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT -- cgit v1.2.3 From 84877d93336de21a6251db00b841468a83c65906 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Tue, 2 Sep 2014 10:27:35 +0100 Subject: KVM: s390: register flic ops dynamically Using the new kvm_register_device_ops() interface makes us get rid of an #ifdef in common code. Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Cornelia Huck Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 601d321f96e7..7ef088bec715 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1069,7 +1069,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; extern struct kvm_device_ops kvm_vfio_ops; -extern struct kvm_device_ops kvm_flic_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT -- cgit v1.2.3 From 80ce1639727e9d38729c34f162378508c307ca25 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Sep 2014 10:27:36 +0100 Subject: KVM: VFIO: register kvm_device_ops dynamically Now that we have a dynamic means to register kvm_device_ops, use that for the VFIO kvm device, instead of relying on the static table. This is achieved by a module_init call to register the ops with KVM. Cc: Gleb Natapov Cc: Paolo Bonzini Acked-by: Alex Williamson Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7ef088bec715..d44a2d640551 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1068,7 +1068,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; -extern struct kvm_device_ops kvm_vfio_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT -- cgit v1.2.3 From 234b239bea395316d7f78018c672f4a88b3cdf0d Mon Sep 17 00:00:00 2001 From: Andres Lagar-Cavilla Date: Wed, 17 Sep 2014 10:51:48 -0700 Subject: kvm: Faults which trigger IO release the mmap_sem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When KVM handles a tdp fault it uses FOLL_NOWAIT. If the guest memory has been swapped out or is behind a filemap, this will trigger async readahead and return immediately. The rationale is that KVM will kick back the guest with an "async page fault" and allow for some other guest process to take over. If async PFs are enabled the fault is retried asap from an async workqueue. If not, it's retried immediately in the same code path. In either case the retry will not relinquish the mmap semaphore and will block on the IO. This is a bad thing, as other mmap semaphore users now stall as a function of swap or filemap latency. This patch ensures both the regular and async PF path re-enter the fault allowing for the mmap semaphore to be relinquished in the case of IO wait. Reviewed-by: Radim Krčmář Signed-off-by: Andres Lagar-Cavilla Acked-by: Andrew Morton Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 11 +++++++++++ include/linux/mm.h | 1 + 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d44a2d640551..45aaeb3360c9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -198,6 +198,17 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif +/* + * Carry out a gup that requires IO. Allow the mm to relinquish the mmap + * semaphore if the filemap/swap has to wait on a page lock. pagep == NULL + * controls whether we retry the gup one more time to completion in that case. + * Typically this is called after a FAULT_FLAG_RETRY_NOWAIT in the main tdp + * handler. + */ +int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm, + unsigned long addr, bool write_fault, + struct page **pagep); + enum { OUTSIDE_GUEST_MODE, IN_GUEST_MODE, diff --git a/include/linux/mm.h b/include/linux/mm.h index 8981cc882ed2..0f4196a0bc20 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1985,6 +1985,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ +#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); -- cgit v1.2.3 From 57128468080a8b6ea452223036d3e417f748af55 Mon Sep 17 00:00:00 2001 From: Andres Lagar-Cavilla Date: Mon, 22 Sep 2014 14:54:42 -0700 Subject: kvm: Fix page ageing bugs 1. We were calling clear_flush_young_notify in unmap_one, but we are within an mmu notifier invalidate range scope. The spte exists no more (due to range_start) and the accessed bit info has already been propagated (due to kvm_pfn_set_accessed). Simply call clear_flush_young. 2. We clear_flush_young on a primary MMU PMD, but this may be mapped as a collection of PTEs by the secondary MMU (e.g. during log-dirty). This required expanding the interface of the clear_flush_young mmu notifier, so a lot of code has been trivially touched. 3. In the absence of shadow_accessed_mask (e.g. EPT A bit), we emulate the access bit by blowing the spte. This requires proper synchronizing with MMU notifier consumers, like every other removal of spte's does. Signed-off-by: Andres Lagar-Cavilla Acked-by: Rik van Riel Signed-off-by: Paolo Bonzini --- include/linux/mmu_notifier.h | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 27288692241e..88787bb4b3b9 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -57,10 +57,13 @@ struct mmu_notifier_ops { * pte. This way the VM will provide proper aging to the * accesses to the page through the secondary MMUs and not * only to the ones through the Linux pte. + * Start-end is necessary in case the secondary MMU is mapping the page + * at a smaller granularity than the primary MMU. */ int (*clear_flush_young)(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address); + unsigned long start, + unsigned long end); /* * test_young is called to check the young/accessed bitflag in @@ -175,7 +178,8 @@ extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); extern void __mmu_notifier_release(struct mm_struct *mm); extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address); + unsigned long start, + unsigned long end); extern int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address); extern void __mmu_notifier_change_pte(struct mm_struct *mm, @@ -194,10 +198,11 @@ static inline void mmu_notifier_release(struct mm_struct *mm) } static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { if (mm_has_notifiers(mm)) - return __mmu_notifier_clear_flush_young(mm, address); + return __mmu_notifier_clear_flush_young(mm, start, end); return 0; } @@ -255,7 +260,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) unsigned long ___address = __address; \ __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ - ___address); \ + ___address, \ + ___address + \ + PAGE_SIZE); \ __young; \ }) @@ -266,7 +273,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) unsigned long ___address = __address; \ __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ - ___address); \ + ___address, \ + ___address + \ + PMD_SIZE); \ __young; \ }) @@ -301,7 +310,8 @@ static inline void mmu_notifier_release(struct mm_struct *mm) } static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { return 0; } -- cgit v1.2.3 From 445b8236959bfe624a5aa9bce89f44a3bec9b2b1 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Wed, 24 Sep 2014 15:57:55 +0800 Subject: kvm: Rename make_all_cpus_request() to kvm_make_all_cpus_request() and make it non-static Different architectures need different requests, and in fact we will use this function in architecture-specific code later. This will be outside kvm_main.c, so make it non-static and rename it to kvm_make_all_cpus_request(). Reviewed-by: Paolo Bonzini Signed-off-by: Tang Chen Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 45aaeb3360c9..12c591fc2571 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -586,6 +586,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); void kvm_make_mclock_inprogress_request(struct kvm *kvm); void kvm_make_scan_ioapic_request(struct kvm *kvm); +bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); -- cgit v1.2.3 From 4256f43f9fab91e1c17b5846a240cf4b66a768a8 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Wed, 24 Sep 2014 15:57:54 +0800 Subject: kvm: x86: Add request bit to reload APIC access page address Currently, the APIC access page is pinned by KVM for the entire life of the guest. We want to make it migratable in order to make memory hot-unplug available for machines that run KVM. This patch prepares to handle this in generic code, through a new request bit (that will be set by the MMU notifier) and a new hook that is called whenever the request bit is processed. Signed-off-by: Tang Chen Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 12c591fc2571..d594f9f34429 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -136,6 +136,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22 #define KVM_REQ_ENABLE_IBS 23 #define KVM_REQ_DISABLE_IBS 24 +#define KVM_REQ_APIC_PAGE_RELOAD 25 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v1.2.3