VE原理探索
Intel处理器下,KVM利用VMX root和non-root模式来区分hypervisor和guest。
- VMEntry:内核模式下,由KVM的VMLAUNCH指令触发,guest需要信息填充在CPU的VMCS。
- VMExit:guest OS处理越权事件触发,比如访问硬件或中断,由硬件自动完成VMExit,exit reason等信息记录在VMCS,KVM据此采取下一步行动。
NOTE:#VE指的是virtualization exception,并不是VM EXIT
VMEexit
/* => ret, --> refer to*/
VE => vmx_vcpu_run
=> vcpu_enter_guest
--> r = kvm_x86_ops->handle_exit(vcpu)
<=> vmx_handle_exit
--> kvm_vmx_exit_handlers
当Guest OS访问硬件时,就会触发vm exit 返回到vmx_vcpu_run
,
vmx保存好vmcs并且记录下VM_EXIT_REASON后返回到调用该函数的vcpu_enter_guest
,
在vcpu_enter_guest
函数末尾调用了r = kvm_x86_ops->handle_exit(vcpu)
,
该函数对应于vmx_handle_exit
函数 (intel cpu架构对应关系可以查看vmx.c文件中static struct kvm_x86_ops vmx_x86_ops),
static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
...
.handle_exit = vmx_handle_exit,
...
}
vmx_handle_exit
调用kvm_vmx_exit_handlers[exit_reason](vcpu)
,
该语句根据exit_reason调用不同的函数,该数据结构定义如下:
static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_EXCEPTION_NMI] = handle_exception,
[EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
[EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
[EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
[EXIT_REASON_IO_INSTRUCTION] = handle_io,
[EXIT_REASON_CR_ACCESS] = handle_cr,
[EXIT_REASON_DR_ACCESS] = handle_dr,
[EXIT_REASON_CPUID] = handle_cpuid,
[EXIT_REASON_MSR_READ] = handle_rdmsr,
[EXIT_REASON_MSR_WRITE] = handle_wrmsr,
[EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
[EXIT_REASON_HLT] = handle_halt,
[EXIT_REASON_INVD] = handle_invd,
[EXIT_REASON_INVLPG] = handle_invlpg,
[EXIT_REASON_RDPMC] = handle_rdpmc,
[EXIT_REASON_VMCALL] = handle_vmcall,
[EXIT_REASON_VMCLEAR] = handle_vmclear,
[EXIT_REASON_VMLAUNCH] = handle_vmlaunch,
[EXIT_REASON_VMPTRLD] = handle_vmptrld,
[EXIT_REASON_VMPTRST] = handle_vmptrst,
[EXIT_REASON_VMREAD] = handle_vmread,
[EXIT_REASON_VMRESUME] = handle_vmresume,
[EXIT_REASON_VMWRITE] = handle_vmwrite,
[EXIT_REASON_VMOFF] = handle_vmoff,
[EXIT_REASON_VMON] = handle_vmon,
[EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
[EXIT_REASON_APIC_ACCESS] = handle_apic_access,
[EXIT_REASON_APIC_WRITE] = handle_apic_write,
[EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
[EXIT_REASON_WBINVD] = handle_wbinvd,
[EXIT_REASON_XSETBV] = handle_xsetbv,
[EXIT_REASON_TASK_SWITCH] = handle_task_switch,
[EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
/*------------------EPT_VIOLATION--------------------------*/
[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
[EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
[EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
[EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
[EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap,
[EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
[EXIT_REASON_INVEPT] = handle_invept,
[EXIT_REASON_INVVPID] = handle_invvpid,
[EXIT_REASON_XSAVES] = handle_xsaves,
[EXIT_REASON_XRSTORS] = handle_xrstors,
[EXIT_REASON_PML_FULL] = handle_pml_full,
[EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
};
exit_reason为时,
[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
调用handle_ept_violation()
。
目标:
kvm中handle_v2e这个函数里面,看看它的exit_reason
这个是退出kvm的出口
好好理一下,为什么那样写就能退出kvm
目标函数
if(!handle_v2e_ept_violation(vcpu,gpa,exit_qualification))
{
return 0;
}
handle_v2e_ept_violation()
static int handle_v2e_ept_violation(struct kvm_vcpu *vcpu,gpa_t gpa,unsigned long exit_qualification)
{
if((exit_qualification << 2) & PFERR_FETCH_MASK) /* It is a fetch fault? */
{
gfn_t gfn =gpa>>PAGE_SHIFT;
struct kvm_memory_slot *slot=gfn_to_memslot(vcpu->kvm,gfn);
if(slot && slot->access_bitmaps)
{
gfn_t base_gfn=slot->base_gfn;
u8 *access_bitmaps=slot->access_bitmaps;
unsigned long index=gfn-base_gfn;
if(access_bitmaps[index]==ACCESS_MALWARE)
{
printk(KERN_ERR "halting in the handle_v2e_ept_violation\n");
vcpu->run->exit_reason=KVM_EXIT_EMULATE;
return 0;
}
}
}
return 1;
}
这个函数主要目的是
vcpu->run->exit_reason=KVM_EXIT_EMULATE;
退回QEMU之后
while (1) {
ioctl(vcpufd, KVM_RUN, NULL);
switch (run->exit_reason) {
/* Handle exit */
}
}
源码
static int handle_ept_violation(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
gpa_t gpa;
u32 error_code;
int gla_validity;
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
gla_validity = (exit_qualification >> 7) & 0x3;
if (gla_validity == 0x2) {
printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
(long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
vmcs_readl(GUEST_LINEAR_ADDRESS));
printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
(long unsigned int)exit_qualification);
vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION;
return 0;
}
/*
* EPT violation happened while executing iret from NMI,
* "blocked by NMI" bit has to be set before next VM entry.
* There are errata that may cause this bit to not be set:
* AAK134, BY25.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
cpu_has_virtual_nmis() &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
trace_kvm_page_fault(gpa, exit_qualification);
/* it is a read fault? */
error_code = (exit_qualification << 2) & PFERR_USER_MASK;
/* it is a write fault? */
error_code |= exit_qualification & PFERR_WRITE_MASK;
/* It is a fetch fault? */
error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
/* ept page table is present? */
error_code |= (exit_qualification & 0x38) != 0;
vcpu->arch.exit_qualification = exit_qualification;
if(!handle_v2e_ept_violation(vcpu,gpa,exit_qualification))
{
return 0;
}
return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
}