Commit e61cf2e3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull first set of KVM updates from Paolo Bonzini:
 "PPC:
   - minor code cleanups

  x86:
   - PCID emulation and CR3 caching for shadow page tables
   - nested VMX live migration
   - nested VMCS shadowing
   - optimized IPI hypercall
   - some optimizations

  ARM will come next week"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (85 commits)
  kvm: x86: Set highest physical address bits in non-present/reserved SPTEs
  KVM/x86: Use CC_SET()/CC_OUT in arch/x86/kvm/vmx.c
  KVM: X86: Implement PV IPIs in linux guest
  KVM: X86: Add kvm hypervisor init time platform setup callback
  KVM: X86: Implement "send IPI" hypercall
  KVM/x86: Move X86_CR4_OSXSAVE check into kvm_valid_sregs()
  KVM: x86: Skip pae_root shadow allocation if tdp enabled
  KVM/MMU: Combine flushing remote tlb in mmu_set_spte()
  KVM: vmx: skip VMWRITE of HOST_{FS,GS}_BASE when possible
  KVM: vmx: skip VMWRITE of HOST_{FS,GS}_SEL when possible
  KVM: vmx: always initialize HOST_{FS,GS}_BASE to zero during setup
  KVM: vmx: move struct host_state usage to struct loaded_vmcs
  KVM: vmx: compute need to reload FS/GS/LDT on demand
  KVM: nVMX: remove a misleading comment regarding vmcs02 fields
  KVM: vmx: rename __vmx_load_host_state() and vmx_save_host_state()
  KVM: vmx: add dedicated utility to access guest's kernel_gs_base
  KVM: vmx: track host_state.loaded using a loaded_vmcs pointer
  KVM: vmx: refactor segmentation code in vmx_save_host_state()
  kvm: nVMX: Fix fault priority for VMX operations
  kvm: nVMX: Fix fault vector for VMX operation at CPL > 0
  ...
parents 1009aa12 28a1f3ac
......@@ -3561,6 +3561,62 @@ Returns: 0 on success,
-ENOENT on deassign if the conn_id isn't registered
-EEXIST on assign if the conn_id is already registered
4.114 KVM_GET_NESTED_STATE
Capability: KVM_CAP_NESTED_STATE
Architectures: x86
Type: vcpu ioctl
Parameters: struct kvm_nested_state (in/out)
Returns: 0 on success, -1 on error
Errors:
E2BIG: the total state size (including the fixed-size part of struct
kvm_nested_state) exceeds the value of 'size' specified by
the user; the size required will be written into size.
struct kvm_nested_state {
__u16 flags;
__u16 format;
__u32 size;
union {
struct kvm_vmx_nested_state vmx;
struct kvm_svm_nested_state svm;
__u8 pad[120];
};
__u8 data[0];
};
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
struct kvm_vmx_nested_state {
__u64 vmxon_pa;
__u64 vmcs_pa;
struct {
__u16 flags;
} smm;
};
This ioctl copies the vcpu's nested virtualization state from the kernel to
userspace.
The maximum size of the state, including the fixed-size part of struct
kvm_nested_state, can be retrieved by passing KVM_CAP_NESTED_STATE to
the KVM_CHECK_EXTENSION ioctl().
4.115 KVM_SET_NESTED_STATE
Capability: KVM_CAP_NESTED_STATE
Architectures: x86
Type: vcpu ioctl
Parameters: struct kvm_nested_state (in)
Returns: 0 on success, -1 on error
This copies the vcpu's kvm_nested_state struct from userspace to the kernel. For
the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE.
5. The kvm_run structure
------------------------
......
......@@ -62,6 +62,10 @@ KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit
|| || can be enabled by setting bit 2
|| || when writing to msr 0x4b564d02
------------------------------------------------------------------------------
KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit
|| || before using paravirtualized
|| || send IPIs.
------------------------------------------------------------------------------
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
|| || per-cpu warps are expected in
|| || kvmclock.
......
......@@ -121,3 +121,23 @@ compute the CLOCK_REALTIME for its clock, at the same instant.
Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
6. KVM_HC_SEND_IPI
------------------------
Architecture: x86
Status: active
Purpose: Send IPIs to multiple vCPUs.
a0: lower part of the bitmap of destination APIC IDs
a1: higher part of the bitmap of destination APIC IDs
a2: the lowest APIC ID in bitmap
a3: APIC ICR
The hypercall lets a guest send multicast IPIs, with at most 128
128 destinations per hypercall in 64-bit mode and 64 vCPUs per
hypercall in 32-bit mode. The destinations are represented by a
bitmap contained in the first two arguments (a0 and a1). Bit 0 of
a0 corresponds to the APIC ID in the third argument (a2), bit 1
corresponds to the APIC ID a2+1, and so on.
Returns the number of CPUs to which the IPIs were delivered successfully.
......@@ -390,4 +390,51 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
#define SPLIT_HACK_MASK 0xff000000
#define SPLIT_HACK_OFFS 0xfb000000
/*
* This packs a VCPU ID from the [0..KVM_MAX_VCPU_ID) space down to the
* [0..KVM_MAX_VCPUS) space, using knowledge of the guest's core stride
* (but not its actual threading mode, which is not available) to avoid
* collisions.
*
* The implementation leaves VCPU IDs from the range [0..KVM_MAX_VCPUS) (block
* 0) unchanged: if the guest is filling each VCORE completely then it will be
* using consecutive IDs and it will fill the space without any packing.
*
* For higher VCPU IDs, the packed ID is based on the VCPU ID modulo
* KVM_MAX_VCPUS (effectively masking off the top bits) and then an offset is
* added to avoid collisions.
*
* VCPU IDs in the range [KVM_MAX_VCPUS..(KVM_MAX_VCPUS*2)) (block 1) are only
* possible if the guest is leaving at least 1/2 of each VCORE empty, so IDs
* can be safely packed into the second half of each VCORE by adding an offset
* of (stride / 2).
*
* Similarly, if VCPU IDs in the range [(KVM_MAX_VCPUS*2)..(KVM_MAX_VCPUS*4))
* (blocks 2 and 3) are seen, the guest must be leaving at least 3/4 of each
* VCORE empty so packed IDs can be offset by (stride / 4) and (stride * 3 / 4).
*
* Finally, VCPU IDs from blocks 5..7 will only be seen if the guest is using a
* stride of 8 and 1 thread per core so the remaining offsets of 1, 5, 3 and 7
* must be free to use.
*
* (The offsets for each block are stored in block_offsets[], indexed by the
* block number if the stride is 8. For cases where the guest's stride is less
* than 8, we can re-use the block_offsets array by multiplying the block
* number by (MAX_SMT_THREADS / stride) to reach the correct entry.)
*/
static inline u32 kvmppc_pack_vcpu_id(struct kvm *kvm, u32 id)
{
const int block_offsets[MAX_SMT_THREADS] = {0, 4, 2, 6, 1, 5, 3, 7};
int stride = kvm->arch.emul_smt_mode;
int block = (id / KVM_MAX_VCPUS) * (MAX_SMT_THREADS / stride);
u32 packed_id;
if (WARN_ONCE(block >= MAX_SMT_THREADS, "VCPU ID too large to pack"))
return 0;
packed_id = (id % KVM_MAX_VCPUS) + block_offsets[block];
if (WARN_ONCE(packed_id >= KVM_MAX_VCPUS, "VCPU ID packing failed"))
return 0;
return packed_id;
}
#endif /* __ASM_KVM_BOOK3S_H__ */
......@@ -42,7 +42,14 @@
#define KVM_USER_MEM_SLOTS 512
#include <asm/cputhreads.h>
#define KVM_MAX_VCPU_ID (threads_per_subcore * KVM_MAX_VCORES)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */
#define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES)
#else
#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
......@@ -672,7 +679,7 @@ struct kvm_vcpu_arch {
gva_t vaddr_accessed;
pgd_t *pgdir;
u8 io_gpr; /* GPR used as IO source/target */
u16 io_gpr; /* GPR used as IO source/target */
u8 mmio_host_swabbed;
u8 mmio_sign_extend;
/* conversion between single and double precision */
......@@ -688,7 +695,6 @@ struct kvm_vcpu_arch {
*/
u8 mmio_vsx_copy_nums;
u8 mmio_vsx_offset;
u8 mmio_vsx_tx_sx_enabled;
u8 mmio_vmx_copy_nums;
u8 mmio_vmx_offset;
u8 mmio_copy_type;
......@@ -801,14 +807,14 @@ struct kvm_vcpu_arch {
#define KVMPPC_VCPU_BUSY_IN_HOST 2
/* Values for vcpu->arch.io_gpr */
#define KVM_MMIO_REG_MASK 0x001f
#define KVM_MMIO_REG_EXT_MASK 0xffe0
#define KVM_MMIO_REG_MASK 0x003f
#define KVM_MMIO_REG_EXT_MASK 0xffc0
#define KVM_MMIO_REG_GPR 0x0000
#define KVM_MMIO_REG_FPR 0x0020
#define KVM_MMIO_REG_QPR 0x0040
#define KVM_MMIO_REG_FQPR 0x0060
#define KVM_MMIO_REG_VSX 0x0080
#define KVM_MMIO_REG_VMX 0x00c0
#define KVM_MMIO_REG_FPR 0x0040
#define KVM_MMIO_REG_QPR 0x0080
#define KVM_MMIO_REG_FQPR 0x00c0
#define KVM_MMIO_REG_VSX 0x0100
#define KVM_MMIO_REG_VMX 0x0180
#define __KVM_HAVE_ARCH_WQP
#define __KVM_HAVE_CREATE_DEVICE
......
......@@ -163,7 +163,7 @@
#define PSSCR_ESL 0x00200000 /* Enable State Loss */
#define PSSCR_SD 0x00400000 /* Status Disable */
#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */
#define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */
#define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */
#define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */
......
......@@ -179,7 +179,7 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
if ((tbltmp->it_page_shift <= stt->page_shift) &&
(tbltmp->it_offset << tbltmp->it_page_shift ==
stt->offset << stt->page_shift) &&
(tbltmp->it_size << tbltmp->it_page_shift ==
(tbltmp->it_size << tbltmp->it_page_shift >=
stt->size << stt->page_shift)) {
/*
* Reference the table to avoid races with
......@@ -295,7 +295,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
{
struct kvmppc_spapr_tce_table *stt = NULL;
struct kvmppc_spapr_tce_table *siter;
unsigned long npages, size;
unsigned long npages, size = args->size;
int ret = -ENOMEM;
int i;
......@@ -303,7 +303,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
return -EINVAL;
size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
npages = kvmppc_tce_pages(size);
ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
if (ret)
......
......@@ -127,14 +127,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
* and SPURR count and should be set according to the number of
* online threads in the vcore being run.
*/
#define RWMR_RPA_P8_1THREAD 0x164520C62609AECA
#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9
#define RWMR_RPA_P8_3THREAD 0x164520C62609AECA
#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9
#define RWMR_RPA_P8_5THREAD 0x164520C62609AECA
#define RWMR_RPA_P8_6THREAD 0x164520C62609AECA
#define RWMR_RPA_P8_7THREAD 0x164520C62609AECA
#define RWMR_RPA_P8_8THREAD 0x164520C62609AECA
#define RWMR_RPA_P8_1THREAD 0x164520C62609AECAUL
#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9UL
#define RWMR_RPA_P8_3THREAD 0x164520C62609AECAUL
#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9UL
#define RWMR_RPA_P8_5THREAD 0x164520C62609AECAUL
#define RWMR_RPA_P8_6THREAD 0x164520C62609AECAUL
#define RWMR_RPA_P8_7THREAD 0x164520C62609AECAUL
#define RWMR_RPA_P8_8THREAD 0x164520C62609AECAUL
static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = {
RWMR_RPA_P8_1THREAD,
......@@ -1807,7 +1807,7 @@ static int threads_per_vcore(struct kvm *kvm)
return threads_per_subcore;
}
static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
{
struct kvmppc_vcore *vcore;
......@@ -1821,7 +1821,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
init_swait_queue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
vcore->first_vcpuid = core * kvm->arch.smt_mode;
vcore->first_vcpuid = id;
vcore->kvm = kvm;
INIT_LIST_HEAD(&vcore->preempt_list);
......@@ -2037,12 +2037,26 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
mutex_lock(&kvm->lock);
vcore = NULL;
err = -EINVAL;
core = id / kvm->arch.smt_mode;
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode)) {
pr_devel("KVM: VCPU ID too high\n");
core = KVM_MAX_VCORES;
} else {
BUG_ON(kvm->arch.smt_mode != 1);
core = kvmppc_pack_vcpu_id(kvm, id);
}
} else {
core = id / kvm->arch.smt_mode;
}
if (core < KVM_MAX_VCORES) {
vcore = kvm->arch.vcores[core];
if (!vcore) {
if (vcore && cpu_has_feature(CPU_FTR_ARCH_300)) {
pr_devel("KVM: collision on id %u", id);
vcore = NULL;
} else if (!vcore) {
err = -ENOMEM;
vcore = kvmppc_vcore_create(kvm, core);
vcore = kvmppc_vcore_create(kvm,
id & ~(kvm->arch.smt_mode - 1));
kvm->arch.vcores[core] = vcore;
kvm->arch.online_vcores++;
}
......@@ -4550,6 +4564,8 @@ static int kvmppc_book3s_init_hv(void)
pr_err("KVM-HV: Cannot determine method for accessing XICS\n");
return -ENODEV;
}
/* presence of intc confirmed - node can be dropped again */
of_node_put(np);
}
#endif
......
......@@ -317,6 +317,11 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
return -EBUSY;
}
static u32 xive_vp(struct kvmppc_xive *xive, u32 server)
{
return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
}
static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
struct kvmppc_xive_src_block *sb,
struct kvmppc_xive_irq_state *state)
......@@ -362,7 +367,7 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
*/
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
xive_native_configure_irq(hw_num,
xive->vp_base + state->act_server,
xive_vp(xive, state->act_server),
MASKED, state->number);
/* set old_p so we can track if an H_EOI was done */
state->old_p = true;
......@@ -418,7 +423,7 @@ static void xive_finish_unmask(struct kvmppc_xive *xive,
*/
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
xive_native_configure_irq(hw_num,
xive->vp_base + state->act_server,
xive_vp(xive, state->act_server),
state->act_priority, state->number);
/* If an EOI is needed, do it here */
if (!state->old_p)
......@@ -495,7 +500,7 @@ static int xive_target_interrupt(struct kvm *kvm,
kvmppc_xive_select_irq(state, &hw_num, NULL);
return xive_native_configure_irq(hw_num,
xive->vp_base + server,
xive_vp(xive, server),
prio, state->number);
}
......@@ -883,7 +888,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
* which is fine for a never started interrupt.
*/
xive_native_configure_irq(hw_irq,
xive->vp_base + state->act_server,
xive_vp(xive, state->act_server),
state->act_priority, state->number);
/*
......@@ -959,7 +964,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
/* Reconfigure the IPI */
xive_native_configure_irq(state->ipi_number,
xive->vp_base + state->act_server,
xive_vp(xive, state->act_server),
state->act_priority, state->number);
/*
......@@ -1084,7 +1089,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
pr_devel("Duplicate !\n");
return -EEXIST;
}
if (cpu >= KVM_MAX_VCPUS) {
if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
pr_devel("Out of bounds !\n");
return -EINVAL;
}
......@@ -1098,7 +1103,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
xc->xive = xive;
xc->vcpu = vcpu;
xc->server_num = cpu;
xc->vp_id = xive->vp_base + cpu;
xc->vp_id = xive_vp(xive, cpu);
xc->mfrr = 0xff;
xc->valid = true;
......
......@@ -106,7 +106,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
* if mmio_vsx_tx_sx_enabled == 1, copy data between
* VSR[32..63] and memory
*/
vcpu->arch.mmio_vsx_tx_sx_enabled = get_tx_or_sx(inst);
vcpu->arch.mmio_vsx_copy_nums = 0;
vcpu->arch.mmio_vsx_offset = 0;
vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;
......@@ -242,8 +241,8 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
}
emulated = kvmppc_handle_vsx_load(run, vcpu,
KVM_MMIO_REG_VSX | (op.reg & 0x1f),
io_size_each, 1, op.type & SIGNEXT);
KVM_MMIO_REG_VSX|op.reg, io_size_each,
1, op.type & SIGNEXT);
break;
}
#endif
......@@ -363,7 +362,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
}
emulated = kvmppc_handle_vsx_store(run, vcpu,
op.reg & 0x1f, io_size_each, 1);
op.reg, io_size_each, 1);
break;
}
#endif
......
......@@ -879,10 +879,10 @@ static inline void kvmppc_set_vsr_dword(struct kvm_vcpu *vcpu,
if (offset == -1)
return;
if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
val.vval = VCPU_VSX_VR(vcpu, index);
if (index >= 32) {
val.vval = VCPU_VSX_VR(vcpu, index - 32);
val.vsxval[offset] = gpr;
VCPU_VSX_VR(vcpu, index) = val.vval;
VCPU_VSX_VR(vcpu, index - 32) = val.vval;
} else {
VCPU_VSX_FPR(vcpu, index, offset) = gpr;
}
......@@ -894,11 +894,11 @@ static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu,
union kvmppc_one_reg val;
int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
val.vval = VCPU_VSX_VR(vcpu, index);
if (index >= 32) {
val.vval = VCPU_VSX_VR(vcpu, index - 32);
val.vsxval[0] = gpr;
val.vsxval[1] = gpr;
VCPU_VSX_VR(vcpu, index) = val.vval;
VCPU_VSX_VR(vcpu, index - 32) = val.vval;
} else {
VCPU_VSX_FPR(vcpu, index, 0) = gpr;
VCPU_VSX_FPR(vcpu, index, 1) = gpr;
......@@ -911,12 +911,12 @@ static inline void kvmppc_set_vsr_word_dump(struct kvm_vcpu *vcpu,
union kvmppc_one_reg val;
int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
if (index >= 32) {
val.vsx32val[0] = gpr;
val.vsx32val[1] = gpr;
val.vsx32val[2] = gpr;
val.vsx32val[3] = gpr;
VCPU_VSX_VR(vcpu, index) = val.vval;
VCPU_VSX_VR(vcpu, index - 32) = val.vval;
} else {
val.vsx32val[0] = gpr;
val.vsx32val[1] = gpr;
......@@ -936,10 +936,10 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
if (offset == -1)
return;
if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
val.vval = VCPU_VSX_VR(vcpu, index);
if (index >= 32) {
val.vval = VCPU_VSX_VR(vcpu, index - 32);
val.vsx32val[offset] = gpr32;
VCPU_VSX_VR(vcpu, index) = val.vval;
VCPU_VSX_VR(vcpu, index - 32) = val.vval;
} else {
dword_offset = offset / 2;
word_offset = offset % 2;
......@@ -1360,10 +1360,10 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
break;
}
if (!vcpu->arch.mmio_vsx_tx_sx_enabled) {
if (rs < 32) {
*val = VCPU_VSX_FPR(vcpu, rs, vsx_offset);
} else {
reg.vval = VCPU_VSX_VR(vcpu, rs);
reg.vval = VCPU_VSX_VR(vcpu, rs - 32);
*val = reg.vsxval[vsx_offset];
}
break;
......@@ -1377,13 +1377,13 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
break;
}
if (!vcpu->arch.mmio_vsx_tx_sx_enabled) {
if (rs < 32) {
dword_offset = vsx_offset / 2;
word_offset = vsx_offset % 2;
reg.vsxval[0] = VCPU_VSX_FPR(vcpu, rs, dword_offset);
*val = reg.vsx32val[word_offset];
} else {
reg.vval = VCPU_VSX_VR(vcpu, rs);
reg.vval = VCPU_VSX_VR(vcpu, rs - 32);
*val = reg.vsx32val[vsx_offset];
}
break;
......
......@@ -269,6 +269,7 @@ struct kvm_s390_sie_block {
__u8 reserved1c0[8]; /* 0x01c0 */
#define ECD_HOSTREGMGMT 0x20000000
#define ECD_MEF 0x08000000
#define ECD_ETOKENF 0x02000000
__u32 ecd; /* 0x01c8 */
__u8 reserved1cc[18]; /* 0x01cc */
__u64 pp; /* 0x01de */
......@@ -655,6 +656,7 @@ struct kvm_vcpu_arch {
seqcount_t cputm_seqcount;
__u64 cputm_start;
bool gs_enabled;
bool skey_enabled;
};
struct kvm_vm_stat {
......@@ -793,12 +795,6 @@ struct kvm_s390_vsie {
struct page *pages[KVM_MAX_VCPUS];
};
struct kvm_s390_migration_state {
unsigned long bitmap_size; /* in bits (number of guest pages) */
atomic64_t dirty_pages; /* number of dirty pages */
unsigned long *pgste_bitmap;
};
struct kvm_arch{
void *sca;
int use_esca;
......@@ -828,7 +824,8 @@ struct kvm_arch{
struct kvm_s390_vsie vsie;
u8 epdx;
u64 epoch;
struct kvm_s390_migration_state *migration_state;
int migration_mode;
atomic64_t cmma_dirty_pages;
/* subset of available cpu features enabled by user space */
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
struct kvm_s390_gisa *gisa;
......
......@@ -4,7 +4,7 @@
/*
* KVM s390 specific structures and definitions
*
* Copyright IBM Corp. 2008
* Copyright IBM Corp. 2008, 2018
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
......@@ -225,6 +225,7 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_FPRS (1UL << 8)
#define KVM_SYNC_GSCB (1UL << 9)
#define KVM_SYNC_BPBC (1UL << 10)
#define KVM_SYNC_ETOKEN (1UL << 11)
/* length and alignment of the sdnx as a power of two */
#define SDNXC 8
#define SDNXL (1UL << SDNXC)
......@@ -258,6 +259,8 @@ struct kvm_sync_regs {
struct {
__u64 reserved1[2];
__u64 gscb[4];
__u64 etoken;
__u64 etoken_extension;
};
};
};
......
This diff is collapsed.
......@@ -205,13 +205,10 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
{
int rc;
struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
trace_kvm_s390_skey_related_inst(vcpu);
/* Already enabled? */
if (vcpu->kvm->arch.use_skf &&
!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
!kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
if (vcpu->arch.skey_enabled)
return 0;
rc = s390_enable_skey();
......@@ -222,9 +219,10 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
if (!vcpu->kvm->arch.use_skf)
sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
else
sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
vcpu->arch.skey_enabled = true;
return 0;
}
......@@ -987,7 +985,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
if (clear_user((void __user *)vmaddr, PAGE_SIZE))
if (kvm_clear_guest(vcpu->kvm, start, PAGE_SIZE))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
}
......@@ -1024,9 +1022,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
return 0;
}
static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
/*
* Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu)
*/
static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
{
struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state;
int r1, r2, nappended, entries;
unsigned long gfn, hva, res, pgstev, ptev;
unsigned long *cbrlo;
......@@ -1076,10 +1076,12 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
cbrlo[entries] = gfn << PAGE_SHIFT;
}
if (orc && gfn < ms->bitmap_size) {
/* increment only if we are really flipping the bit to 1 */
if (!test_and_set_bit(gfn, ms->pgste_bitmap))
atomic64_inc(&ms->dirty_pages);
if (orc) {
struct kvm_memory_slot *ms = gfn_to_memslot(vcpu->kvm, gfn);
/* Increment only if we are really flipping the bit */
if (ms && !test_and_set_bit(gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
atomic64_inc(&vcpu->kvm->arch.cmma_dirty_pages);
}
return nappended;
......@@ -1108,7 +1110,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
: ESSA_SET_STABLE_IF_RESIDENT))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (likely(!vcpu->kvm->arch.migration_state)) {
if (!vcpu->kvm->arch.migration_mode) {
/*
* CMMA is enabled in the KVM settings, but is disabled in
* the SIE block and in the mm_context, and we are not doing
......@@ -1136,10 +1138,16 @@ static int handle_essa(struct kvm_vcpu *vcpu)
/* Retry the ESSA instruction */
kvm_s390_retry_instr(vcpu);
} else {
/* Account for the possible extra cbrl entry */
i = do_essa(vcpu, orc);
int srcu_idx;
down_read(&vcpu->kvm->mm->mmap_sem);
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
i = __do_essa(vcpu, orc);
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
up_read(&vcpu->kvm->mm->mmap_sem);
if (i < 0)
return i;
/* Account for the possible extra cbrl entry */
entries += i;
}
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
......
......@@ -2,7 +2,7 @@
/*
* kvm nested virtualization support for s390x
*
* Copyright IBM Corp. 2016
* Copyright IBM Corp. 2016, 2018
*
* Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
*/
......@@ -378,6 +378,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (test_kvm_facility(vcpu->kvm, 139))
scb_s->ecd |= scb_o->ecd & ECD_MEF;
/* etoken */
if (test_kvm_facility(vcpu->kvm, 156))
scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
prepare_ibc(vcpu, vsie_page);
rc = shadow_crycb(vcpu, vsie_page);
out:
......@@ -627,7 +631,8 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
vsie_page->riccbd_gpa = gpa;
scb_s->riccbd = hpa;
}
if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
if (((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) ||
(scb_s->ecd & ECD_ETOKENF)) {
unsigned long sdnxc;
gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
......@@ -818,6 +823,8 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
* - < 0 if an error occurred
*/
static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
__releases(vcpu->kvm->srcu)
__acquires(vcpu->kvm->srcu)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
......
......@@ -4,7 +4,7 @@
* numbering scheme from the Princples of Operations: most significant bit
* has bit number 0.
*
* Copyright IBM Corp. 2015
* Copyright IBM Corp. 2015, 2018
*