Commit e37a07e0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull more KVM updates from Radim Krčmář:
 "Second batch of KVM updates for v4.13

  Common:
   - add uevents for VM creation/destruction
   - annotate and properly access RCU-protected objects

  s390:
   - rename IOCTL added in the first v4.13 merge

  x86:
   - emulate VMLOAD VMSAVE feature in SVM
   - support paravirtual asynchronous page fault while nested
   - add Hyper-V userspace interfaces for better migration
   - improve master clock corner cases
   - extend internal error reporting after EPT misconfig
   - correct single-stepping of emulated instructions in SVM
   - handle MCE during VM entry
   - fix nVMX VM entry checks and nVMX VMCS shadowing"

* tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits)
  kvm: x86: hyperv: make VP_INDEX managed by userspace
  KVM: async_pf: Let guest support delivery of async_pf from guest mode
  KVM: async_pf: Force a nested vmexit if the injected #PF is async_pf
  KVM: async_pf: Add L1 guest async_pf #PF vmexit handler
  KVM: x86: Simplify kvm_x86_ops->queue_exception parameter list
  kvm: x86: hyperv: add KVM_CAP_HYPERV_SYNIC2
  KVM: x86: make backwards_tsc_observed a per-VM variable
  KVM: trigger uevents when creating or destroying a VM
  KVM: SVM: Enable Virtual VMLOAD VMSAVE feature
  KVM: SVM: Add Virtual VMLOAD VMSAVE feature definition
  KVM: SVM: Rename lbr_ctl field in the vmcb control area
  KVM: SVM: Prepare for new bit definition in lbr_ctl
  KVM: SVM: handle singlestep exception when skipping emulated instructions
  KVM: x86: take slots_lock in kvm_free_pit
  KVM: s390: Fix KVM_S390_GET_CMMA_BITS ioctl definition
  kvm: vmx: Properly handle machine check during VM-entry
  KVM: x86: update master clock before computing kvmclock_offset
  kvm: nVMX: Shadow "high" parts of shadowed 64-bit VMCS fields
  kvm: nVMX: Fix nested_vmx_check_msr_bitmap_controls
  kvm: nVMX: Validate the I/O bitmaps on nested VM-entry
  ...
parents a80099a1 d3457c87
......@@ -4329,3 +4329,21 @@ Querying this capability returns a bitmap indicating the possible
virtual SMT modes that can be set using KVM_CAP_PPC_SMT. If bit N
(counting from the right) is set, then a virtual SMT mode of 2^N is
available.
8.11 KVM_CAP_HYPERV_SYNIC2
Architectures: x86
This capability enables a newer version of Hyper-V Synthetic interrupt
controller (SynIC). The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
doesn't clear SynIC message and event flags pages when they are enabled by
writing to the respective MSRs.
8.12 KVM_CAP_HYPERV_VP_INDEX
Architectures: x86
This capability indicates that userspace can load HV_X64_MSR_VP_INDEX msr. Its
value is used to denote the target vcpu for a SynIC interrupt. For
compatibilty, KVM initializes this msr to KVM's internal vcpu index. When this
capability is absent, userspace can still query this msr's value.
......@@ -166,10 +166,11 @@ MSR_KVM_SYSTEM_TIME: 0x12
MSR_KVM_ASYNC_PF_EN: 0x4b564d02
data: Bits 63-6 hold 64-byte aligned physical address of a
64 byte memory area which must be in guest RAM and must be
zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
when asynchronous page faults are enabled on the vcpu 0 when
disabled. Bit 1 is 1 if asynchronous page faults can be injected
when vcpu is in cpl == 0.
when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
are delivered to L1 as #PF vmexits.
First 4 byte of 64 byte memory location will be written to by
the hypervisor at the time of asynchronous page fault (APF)
......
......@@ -286,6 +286,7 @@
#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
#define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
......
......@@ -23,6 +23,7 @@ struct x86_exception {
u16 error_code;
bool nested_page_fault;
u64 address; /* cr2 or nested page fault gpa */
u8 async_page_fault;
};
/*
......
......@@ -462,10 +462,12 @@ struct kvm_vcpu_hv_synic {
DECLARE_BITMAP(auto_eoi_bitmap, 256);
DECLARE_BITMAP(vec_bitmap, 256);
bool active;
bool dont_zero_synic_pages;
};
/* Hyper-V per vcpu emulation context */
struct kvm_vcpu_hv {
u32 vp_index;
u64 hv_vapic;
s64 runtime_offset;
struct kvm_vcpu_hv_synic synic;
......@@ -549,6 +551,7 @@ struct kvm_vcpu_arch {
bool reinject;
u8 nr;
u32 error_code;
u8 nested_apf;
} exception;
struct kvm_queued_interrupt {
......@@ -649,6 +652,9 @@ struct kvm_vcpu_arch {
u64 msr_val;
u32 id;
bool send_user_only;
u32 host_apf_reason;
unsigned long nested_apf_token;
bool delivery_as_pf_vmexit;
} apf;
/* OSVW MSRs (AMD only) */
......@@ -803,6 +809,7 @@ struct kvm_arch {
int audit_point;
#endif
bool backwards_tsc_observed;
bool boot_vcpu_runs_old_kvmclock;
u32 bsp_vcpu_id;
......@@ -952,9 +959,7 @@ struct kvm_x86_ops {
unsigned char *hypercall_addr);
void (*set_irq)(struct kvm_vcpu *vcpu);
void (*set_nmi)(struct kvm_vcpu *vcpu);
void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
bool has_error_code, u32 error_code,
bool reinject);
void (*queue_exception)(struct kvm_vcpu *vcpu);
void (*cancel_injection)(struct kvm_vcpu *vcpu);
int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
int (*nmi_allowed)(struct kvm_vcpu *vcpu);
......
......@@ -83,7 +83,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u32 event_inj;
u32 event_inj_err;
u64 nested_cr3;
u64 lbr_ctl;
u64 virt_ext;
u32 clean;
u32 reserved_5;
u64 next_rip;
......@@ -119,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define AVIC_ENABLE_SHIFT 31
#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
#define SVM_INTERRUPT_SHADOW_MASK 1
#define SVM_IOIO_STR_SHIFT 2
......
......@@ -67,6 +67,7 @@ struct kvm_clock_pairing {
#define KVM_ASYNC_PF_ENABLED (1 << 0)
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2)
/* Operations for KVM_HC_MMU_OP */
#define KVM_MMU_OP_WRITE_PTE 1
......
......@@ -330,7 +330,12 @@ static void kvm_guest_cpu_init(void)
#ifdef CONFIG_PREEMPT
pa |= KVM_ASYNC_PF_SEND_ALWAYS;
#endif
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED);
pa |= KVM_ASYNC_PF_ENABLED;
/* Async page fault support for L1 hypervisor is optional */
if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
(pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
smp_processor_id());
......
......@@ -106,14 +106,27 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
return 0;
}
static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id)
static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
{
struct kvm_vcpu *vcpu = NULL;
int i;
if (vpidx < KVM_MAX_VCPUS)
vcpu = kvm_get_vcpu(kvm, vpidx);
if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
return vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
return vcpu;
return NULL;
}
static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
{
struct kvm_vcpu *vcpu;
struct kvm_vcpu_hv_synic *synic;
if (vcpu_id >= atomic_read(&kvm->online_vcpus))
return NULL;
vcpu = kvm_get_vcpu(kvm, vcpu_id);
vcpu = get_vcpu_by_vpidx(kvm, vpidx);
if (!vcpu)
return NULL;
synic = vcpu_to_synic(vcpu);
......@@ -221,7 +234,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
synic->version = data;
break;
case HV_X64_MSR_SIEFP:
if (data & HV_SYNIC_SIEFP_ENABLE)
if ((data & HV_SYNIC_SIEFP_ENABLE) && !host &&
!synic->dont_zero_synic_pages)
if (kvm_clear_guest(vcpu->kvm,
data & PAGE_MASK, PAGE_SIZE)) {
ret = 1;
......@@ -232,7 +246,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
synic_exit(synic, msr);
break;
case HV_X64_MSR_SIMP:
if (data & HV_SYNIC_SIMP_ENABLE)
if ((data & HV_SYNIC_SIMP_ENABLE) && !host &&
!synic->dont_zero_synic_pages)
if (kvm_clear_guest(vcpu->kvm,
data & PAGE_MASK, PAGE_SIZE)) {
ret = 1;
......@@ -318,11 +333,11 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
return ret;
}
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint)
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
{
struct kvm_vcpu_hv_synic *synic;
synic = synic_get(kvm, vcpu_id);
synic = synic_get(kvm, vpidx);
if (!synic)
return -EINVAL;
......@@ -341,11 +356,11 @@ void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
kvm_hv_notify_acked_sint(vcpu, i);
}
static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi)
static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi)
{
struct kvm_vcpu_hv_synic *synic;
synic = synic_get(kvm, vcpu_id);
synic = synic_get(kvm, vpidx);
if (!synic)
return -EINVAL;
......@@ -687,14 +702,24 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
stimer_init(&hv_vcpu->stimer[i], i);
}
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu)
void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
}
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
{
struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
/*
* Hyper-V SynIC auto EOI SINT's are
* not compatible with APICV, so deactivate APICV
*/
kvm_vcpu_deactivate_apicv(vcpu);
vcpu_to_synic(vcpu)->active = true;
synic->active = true;
synic->dont_zero_synic_pages = dont_zero_synic_pages;
return 0;
}
......@@ -978,6 +1003,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
switch (msr) {
case HV_X64_MSR_VP_INDEX:
if (!host)
return 1;
hv->vp_index = (u32)data;
break;
case HV_X64_MSR_APIC_ASSIST_PAGE: {
u64 gfn;
unsigned long addr;
......@@ -1089,18 +1119,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
switch (msr) {
case HV_X64_MSR_VP_INDEX: {
int r;
struct kvm_vcpu *v;
kvm_for_each_vcpu(r, v, vcpu->kvm) {
if (v == vcpu) {
data = r;
break;
}
}
case HV_X64_MSR_VP_INDEX:
data = hv->vp_index;
break;
}
case HV_X64_MSR_EOI:
return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
case HV_X64_MSR_ICR:
......
......@@ -56,9 +56,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
void kvm_hv_irq_routing_update(struct kvm *kvm);
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu);
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
......
......@@ -724,8 +724,10 @@ void kvm_free_pit(struct kvm *kvm)
struct kvm_pit *pit = kvm->arch.vpit;
if (pit) {
mutex_lock(&kvm->slots_lock);
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev);
mutex_unlock(&kvm->slots_lock);
kvm_pit_set_reinject(pit, false);
hrtimer_cancel(&pit->pit_state.timer);
kthread_destroy_worker(pit->worker);
......
......@@ -46,6 +46,7 @@
#include <asm/io.h>
#include <asm/vmx.h>
#include <asm/kvm_page_track.h>
#include "trace.h"
/*
* When setting this variable to true it enables Two-Dimensional-Paging
......@@ -3748,7 +3749,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
kvm_event_needs_reinjection(vcpu)))
return false;
if (is_guest_mode(vcpu))
if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
return false;
return kvm_x86_ops->interrupt_allowed(vcpu);
......@@ -3780,6 +3781,38 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
return false;
}
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len,
bool need_unprotect)
{
int r = 1;
switch (vcpu->arch.apf.host_apf_reason) {
default:
trace_kvm_page_fault(fault_address, error_code);
if (need_unprotect && kvm_event_needs_reinjection(vcpu))
kvm_mmu_unprotect_page_virt(vcpu, fault_address);
r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
insn_len);
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
vcpu->arch.apf.host_apf_reason = 0;
local_irq_disable();
kvm_async_pf_task_wait(fault_address);
local_irq_enable();
break;
case KVM_PV_REASON_PAGE_READY:
vcpu->arch.apf.host_apf_reason = 0;
local_irq_disable();
kvm_async_pf_task_wake(fault_address);
local_irq_enable();
break;
}
return r;
}
EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
static bool
check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
{
......
......@@ -77,6 +77,9 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool accessed_dirty);
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len,
bool need_unprotect);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
......
This diff is collapsed.
......@@ -2422,28 +2422,41 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
* KVM wants to inject page-faults which it got to the guest. This function
* checks whether in a nested guest, we need to inject them to L1 or L2.
*/
static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
unsigned int nr = vcpu->arch.exception.nr;
if (!(vmcs12->exception_bitmap & (1u << nr)))
if (!((vmcs12->exception_bitmap & (1u << nr)) ||
(nr == PF_VECTOR && vcpu->arch.exception.nested_apf)))
return 0;
if (vcpu->arch.exception.nested_apf) {
vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code);
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
vcpu->arch.apf.nested_apf_token);
return 1;
}
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
vmcs_read32(VM_EXIT_INTR_INFO),
vmcs_readl(EXIT_QUALIFICATION));
return 1;
}
static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
bool has_error_code, u32 error_code,
bool reinject)
static void vmx_queue_exception(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned nr = vcpu->arch.exception.nr;
bool has_error_code = vcpu->arch.exception.has_error_code;
bool reinject = vcpu->arch.exception.reinject;
u32 error_code = vcpu->arch.exception.error_code;
u32 intr_info = nr | INTR_INFO_VALID_MASK;
if (!reinject && is_guest_mode(vcpu) &&
nested_vmx_check_exception(vcpu, nr))
nested_vmx_check_exception(vcpu))
return;
if (has_error_code) {
......@@ -3764,6 +3777,25 @@ static void free_kvm_area(void)
}
}
enum vmcs_field_type {
VMCS_FIELD_TYPE_U16 = 0,
VMCS_FIELD_TYPE_U64 = 1,
VMCS_FIELD_TYPE_U32 = 2,
VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
};
static inline int vmcs_field_type(unsigned long field)
{
if (0x1 & field) /* the *_HIGH fields are all 32 bit */
return VMCS_FIELD_TYPE_U32;
return (field >> 13) & 0x3 ;
}
static inline int vmcs_field_readonly(unsigned long field)
{
return (((field >> 10) & 0x3) == 1);
}
static void init_vmcs_shadow_fields(void)
{
int i, j;
......@@ -3789,14 +3821,22 @@ static void init_vmcs_shadow_fields(void)
/* shadowed fields guest access without vmexit */
for (i = 0; i < max_shadow_read_write_fields; i++) {
clear_bit(shadow_read_write_fields[i],
vmx_vmwrite_bitmap);
clear_bit(shadow_read_write_fields[i],
vmx_vmread_bitmap);
unsigned long field = shadow_read_write_fields[i];
clear_bit(field, vmx_vmwrite_bitmap);
clear_bit(field, vmx_vmread_bitmap);
if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) {
clear_bit(field + 1, vmx_vmwrite_bitmap);
clear_bit(field + 1, vmx_vmread_bitmap);
}
}
for (i = 0; i < max_shadow_read_only_fields; i++) {
unsigned long field = shadow_read_only_fields[i];
clear_bit(field, vmx_vmread_bitmap);
if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64)
clear_bit(field + 1, vmx_vmread_bitmap);
}
for (i = 0; i < max_shadow_read_only_fields; i++)
clear_bit(shadow_read_only_fields[i],
vmx_vmread_bitmap);
}
static __init int alloc_kvm_area(void)
......@@ -4634,6 +4674,11 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu)
return true;
}
static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
{
return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
}
static int init_rmode_tss(struct kvm *kvm)
{
gfn_t fn;
......@@ -5664,14 +5709,11 @@ static int handle_exception(struct kvm_vcpu *vcpu)
}
if (is_page_fault(intr_info)) {
/* EPT won't cause page fault directly */
BUG_ON(enable_ept);
cr2 = vmcs_readl(EXIT_QUALIFICATION);
trace_kvm_page_fault(cr2, error_code);
if (kvm_event_needs_reinjection(vcpu))
kvm_mmu_unprotect_page_virt(vcpu, cr2);
return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
/* EPT won't cause page fault directly */
WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0,
true);
}
ex_no = intr_info & INTR_INFO_VECTOR_MASK;
......@@ -7214,25 +7256,6 @@ static int handle_vmresume(struct kvm_vcpu *vcpu)
return nested_vmx_run(vcpu, false);
}
enum vmcs_field_type {
VMCS_FIELD_TYPE_U16 = 0,
VMCS_FIELD_TYPE_U64 = 1,
VMCS_FIELD_TYPE_U32 = 2,
VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
};
static inline int vmcs_field_type(unsigned long field)
{
if (0x1 & field) /* the *_HIGH fields are all 32 bit */
return VMCS_FIELD_TYPE_U32;
return (field >> 13) & 0x3 ;
}
static inline int vmcs_field_readonly(unsigned long field)
{
return (((field >> 10) & 0x3) == 1);
}
/*
* Read a vmcs12 field. Since these can have varying lengths and we return
* one type, we chose the biggest type (u64) and zero-extend the return value
......@@ -8014,7 +8037,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
if (is_nmi(intr_info))
return false;
else if (is_page_fault(intr_info))
return enable_ept;
return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
else if (is_no_device(intr_info) &&
!(vmcs12->guest_cr0 & X86_CR0_TS))
return false;
......@@ -8418,9 +8441,15 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
exit_reason != EXIT_REASON_TASK_SWITCH)) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
vcpu->run->internal.ndata = 2;
vcpu->run->internal.ndata = 3;
vcpu->run->internal.data[0] = vectoring_info;
vcpu->run->internal.data[1] = exit_reason;
vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
vcpu->run->internal.ndata++;
vcpu->run->internal.data[3] =
vmcs_read64(GUEST_PHYSICAL_ADDRESS);
}
return 0;
}
......@@ -8611,17 +8640,24 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
u32 exit_intr_info = 0;
u16 basic_exit_reason = (u16)vmx->exit_reason;
if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
|| vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI))
if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
|| basic_exit_reason == EXIT_REASON_EXCEPTION_NMI))
return;
vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
exit_intr_info = vmx->exit_intr_info;
if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
vmx->exit_intr_info = exit_intr_info;
/* if exit due to PF check for async PF */
if (is_page_fault(exit_intr_info))
vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
/* Handle machine checks before interrupts are enabled */
if (is_machine_check(exit_intr_info))
if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
is_machine_check(exit_intr_info))
kvm_machine_check();
/* We need to handle NMIs before interrupts are enabled */
......@@ -9589,23 +9625,26 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
}
static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
return 0;
if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) ||
!page_address_valid(vcpu, vmcs12->io_bitmap_b))
return -EINVAL;
return 0;
}
static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
int maxphyaddr;
u64 addr;
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
return 0;
if (vmcs12_read_any(vcpu, MSR_BITMAP, &addr)) {
WARN_ON(1);
return -EINVAL;
}
maxphyaddr = cpuid_maxphyaddr(vcpu);
if (!PAGE_ALIGNED(vmcs12->msr_bitmap) ||
((addr + PAGE_SIZE) >> maxphyaddr))
if (!page_address_valid(vcpu, vmcs12->msr_bitmap))
return -EINVAL;
return 0;
......@@ -10293,6 +10332,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
......@@ -10429,8 +10471,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
return 1;
}
vmcs12->launch_state = 1;
/*
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point
* we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
......@@ -10804,6 +10844,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
vmcs12->launch_state = 1;
/* vm_entry_intr_info_field is cleared on exit. Emulate this
* instead of reading the real value. */
vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
......
......@@ -134,8 +134,6 @@ module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
static bool __read_mostly vector_hashing = true;
module_param(vector_hashing, bool, S_IRUGO);
static bool __read_mostly backwards_tsc_observed = false;
#define KVM_NR_SHARED_MSRS 16
struct kvm_shared_msrs_global {
......@@ -452,7 +450,12 @@ EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
{
++vcpu->stat.pf_guest;
vcpu->arch.cr2 = fault->address;
vcpu->arch.exception.nested_apf =
is_guest_mode(vcpu) && fault->async_page_fault;
if (vcpu->arch.exception.nested_apf)
vcpu->arch.apf.nested_apf_token = fault->address;
else
vcpu->arch.cr2 = fault->address;
kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
}
EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
......@@ -1719,7 +1722,7 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
&ka->master_cycle_now);
ka->use_master_clock = host_tsc_clocksource && vcpus_matched
&& !backwards_tsc_observed
&& !ka->backwards_tsc_observed
&& !ka->boot_vcpu_runs_old_kvmclock;
if (ka->use_master_clock)
......@@ -2060,8 +2063,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
{
gpa_t gpa = data & ~0x3f;
/* Bits 2:5 are reserved, Should be zero */
if (data & 0x3c)
/* Bits 3:5 are reserved, Should be zero */
if (data & 0x38)
return 1;
vcpu->arch.apf.msr_val = data;
......@@ -2077,6 +2080,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
return 1;
vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
kvm_async_pf_wakeup_all(vcpu);
return 0;
}
......@@ -2661,6 +2665,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_HYPERV_VAPIC:
case KVM_CAP_HYPERV_SPIN:
case KVM_CAP_HYPERV_SYNIC:
case KVM_CAP_HYPERV_SYNIC2:
case KVM_CAP_HYPERV_VP_INDEX:
case KVM_CAP_PCI_SEGMENT:
case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP:
......@@ -3384,10 +3390,14 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return -EINVAL;
switch (cap->cap) {
case KVM_CAP_HYPERV_SYNIC2:
if (cap->args[0])
return -EINVAL;
case KVM_CAP_HYPERV_SYNIC:
if (!irqchip_in_kernel(vcpu->kvm))
return -EINVAL;
return kvm_hv_activate_synic(vcpu);
return kvm_hv_activate_synic(vcpu, cap->cap ==
KVM_CAP_HYPERV_SYNIC2);
default:
return -EINVAL;
}
......@@ -4188,9 +4198,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
r = 0;
/*
* TODO: userspace has to take care of races with VCPU_RUN, so
* kvm_gen_update_masterclock() can be cut down to locked