初始化EPT

Posted penghan

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了初始化EPT相关的知识,希望对你有一定的参考价值。

struct eptp_bits {
    unsigned memory_type    :3;    /* 0: UC uncacheable, 6: WB writeback */
    unsigned pagewalk_len    :3;    /* value 1 less than EPT page-walk length */
    unsigned dirty        :1;    /* dirty flag */
    unsigned reserved1    :5;
    unsigned long pgd    :40;    /* bit N-1:12 of the physical address of the 4-KByte aligned EPT PML4 table, N=40 */
    unsigned reserved2    :12;
} __attribute__((__packed__));

union eptp {
    struct eptp_bits bits;
    __u64 value;
};

/* PGD (PML4E)
 *
 * (N鈥?):12  Physical address of 4-KByte aligned EPT page-directory-pointer
 *           table referenced by this entry
 *
 * Since no processors supporting the Intel 64 architecture support more than
 * 48 physical-address bits, the size of field "addr" is 36 bits. Ditto for
 * other structures. */
struct ept_pgd_bits {
    unsigned read        :1;
    unsigned write        :1;
    unsigned exec        :1;
    unsigned reserved1    :5;
    unsigned accessed    :1;
    unsigned ignored1    :3;
    unsigned long addr    :36;
    unsigned reserved2    :4;
    unsigned ignored2    :12;
} __attribute__((packed));

union ept_pgd {
    struct ept_pgd_bits bits;
    __u64 value;
};

/* PUD (PDPTE) */
struct ept_pud_bits {
    unsigned read        :1;
    unsigned write        :1;
    unsigned exec        :1;
    unsigned reserved1    :5;
    unsigned accessed    :1;
    unsigned ignored1    :3;
    unsigned long addr    :36;
    unsigned reserved2    :4;
    unsigned ignored2    :12;
} __attribute__((packed));

union ept_pud {
    struct ept_pud_bits bits;
    __u64 value;
};

/* PMD (PDE) */
struct ept_pmd_bits {
    unsigned read        :1;
    unsigned write        :1;
    unsigned exec        :1;
    unsigned reserved1    :4;
    unsigned zero        :1;
    unsigned accessed    :1;
    unsigned ignored1    :3;
    unsigned long addr    :36;
    unsigned reserved2    :4;
    unsigned ignored2    :12;
} __attribute__((packed));

union ept_pmd {
    struct ept_pmd_bits bits;
    __u64 value;
};

struct ept_pde_bits {
    unsigned read        :1;
    unsigned write        :1;
    unsigned exec        :1;
    unsigned memtype    :3; //bit3-5
    unsigned ignorpat    :1;
    unsigned ispage      :1;
    unsigned accessed    :1;
    unsigned writed      :1;
    unsigned ignored     :2;
    unsigned reserved    :9;
    unsigned long addr    :30;
    unsigned reserved2    :1;
    unsigned ignored2    :12;
} __attribute__((packed));

union ept_pde {
    struct ept_pde_bits bits;
    __u64 value;
};


/* PTE */
struct ept_pte_bits {
    unsigned read        :1;
    unsigned write        :1;
    unsigned exec        :1;
    unsigned mem_type    :3;
    unsigned ignore_pat    :1;
    unsigned ignored1    :1;
    unsigned accessed    :1;
    unsigned dirty        :1;
    unsigned ignored2    :2;
    unsigned long addr    :36;
    unsigned reserved1    :4;
    unsigned ignored3    :11;
    unsigned suppress_ve    :1;
} __attribute__((packed));

union ept_pte {
    struct ept_pte_bits bits;
    __u64 value;
};

/* bits 47:39 */
#define ept_pgd_index(x)    ((x >> 39) & 0x1ff)

/* bits 38:30 */
#define ept_pud_index(x)    ((x >> 30) & 0x1ff)

/* bits 29:21 */
#define ept_pmd_index(x)    ((x >> 21) & 0x1ff)

/* bits 20:12 */
#define ept_pte_index(x)    ((x >> 12) & 0x1ff)

 上面是一些页表的结构体,下面是初始化ept页表。每个页大小为2MB.

static err_t initEpt(void)
{
    union ept_pud* pud;
    union ept_pde* pde;

    int i,j,k, memnum = 0;
    pgd = (union ept_pgd *)get_zeroed_page(GFP_ATOMIC);
    if (NULL == pgd)
    {
        printk("%s: alloc pgd error!\n", __func__);
        goto allocErr;
    }
    eptph.value = 0;
    eptph.bits.memory_type = 6;
    eptph.bits.pagewalk_len = 3;
    eptph.bits.pgd = __pa(pgd) >>PAGE_SHIFT;
    printk("eptp.pgd physical address= 0x%016lx, virtual address = 0x%016lx, pgd = 0x%016lx\n", (unsigned long)(eptph.bits.pgd << PAGE_SHIFT), (unsigned long)__va(eptph.bits.pgd << PAGE_SHIFT), (unsigned long)pgd);

    for(i = 0; i < 512; i++)
    {
        pud = (union ept_pud*)get_zeroed_page(GFP_ATOMIC);
        if (NULL == pud)
        {
            printk("%s: alloc pud error!\n", __func__);
            goto allocErr;
        }
        pgd[i].value = 0;
        pgd[i].bits.read = 1;
        pgd[i].bits.write = 1;
        pgd[i].bits.exec = 1;
        pgd[i].bits.addr = __pa(pud) >>PAGE_SHIFT;
        printk("pgde: va=0x%016lx,  pa=0x%016lx\n", (unsigned long)pud, (unsigned long)pgd[i].bits.addr);
        
        for (j = 0; j < 512; j++)
        {
            pde = (union ept_pde*)get_zeroed_page(GFP_ATOMIC);
            if (NULL == pde)
            {
                printk("%s: alloc pde error!\n", __func__);
                goto allocErr;
            }
            pud[j].value = 0;
            pud[j].bits.read = 1;
            pud[j].bits.write = 1;
            pud[j].bits.exec = 1;
            pud[j].bits.addr = __pa(pde) >>PAGE_SHIFT;
            printk("pude: va=0x%016lx,  pa=0x%016lx\n", (unsigned long)pde, (unsigned long)pud[j].bits.addr);
            for(k = 0; k < 512; k++)
            {
                pde[k].value = 0;
                pde[k].bits.read = 1;
                pde[k].bits.write = 1;
                pde[k].bits.exec = 1;
                pde[k].bits.memtype = 6;
                pde[k].bits.ispage = 1;
                pde[k].bits.addr = memnum++;
                if (memnum >= (1<<18))
                {
                    return SUCCESS;
                }
            }
        }
    }
    return SUCCESS;
    
allocErr:
    printk("initEpt: alloc mem error!\n");
    return ERROR_ALLOC_FAILED;
        
    
}

 对创建的EPT页表做测试。

static err_t doMemTest(void)
{
    __u64 *page,*page2;
    unsigned char buff[100];
    memset(buff, 0, sizeof(buff));
    page = (__u64*)__get_free_pages(GFP_KERNEL, 9);
    page2 = (__u64*)__get_free_pages(GFP_KERNEL, 9);
    if (page == NULL || page2 == NULL)
    {
        printk("alloc test mem failed!\n");
        return ERROR_ALLOC_FAILED;
    }
    printk("page = 0x%016lx, page2 = 0x%016lx\n", (unsigned long int)page, (unsigned long int)page2);
    cpu_vmcs_vmcall_memtest(VMCALL_MEM_TEST, __pa(page), __pa(page2));
    printk("page = 0x%016lx, page2 = 0x%016lx, __pa(page2) = 0x%016lx\n\n\n\n", (unsigned long int)page, (unsigned long int)page2, (unsigned long int)__pa(page2));
    //cpu_vmcs_vmcall_memtest(VMCALL_MEM_TEST, __pa(page), __pa(page2));
    memcpy(page, "in our do mem test!\n", 30);
    memcpy(buff, page2, 30);
    //memcpy(page2, "in our do mem test!\n", 25);
   // memcpy(buff, page2, 25);
    
    printk("buff = %s\n", buff);

    memcpy(page2, "test for test!\n", 30);
    //memcpy(buff, page2, 30);
    
    printk("page = %s\n", (char *)page);
    return SUCCESS;
}
static __always_inline void cpu_vmcs_vmcall_memtest(unsigned long reason,
                        unsigned long arg0, unsigned long arg1)
{
    __u8 error, vmfailinvalid, vmfailvalid;

    asm volatile (
        /* clear ZF and CF, otherwise guest may think that vmcall
         * failed. encapsulated process may trick this by setting rsp to
         * zero, but what‘s the point? */
        "test    %%rsp, %%rsp\n"

        "vmcall\n"
        "setbe    %0\n"
        "setb    %1\n"
        "sete    %2\n"
        : "=qm"(error), "=qm"(vmfailinvalid), "=qm"(vmfailvalid)
        : "D"(reason), "S"(arg0), "d"(arg1) :"cc", "memory"
    );

    if (error)
        vmcall_error(reason, vmfailinvalid, vmfailvalid);
}

 

static void exit_vmcall_trusted(struct vcpu *vcpu)
{
    unsigned long arg0, arg1, arg2;
    struct shadow_process *shadowp;
    struct capsule_params *params;
    enum vmcall_reason reason;
    struct task_struct *task;
    struct capsule *capsule;
    bool woken_up;

    reason = vcpu->regs.rdi;
    arg0 = vcpu->regs.rsi;

    switch (reason) {
    case VMCALL_MEM_TEST:
        arg1 = vcpu->regs.rdx;
        printk("arg0 = 0x%016lx, arg1 = 0x%016lx\n", arg0, arg1);
        modifyEpt(arg0, arg1);
        break;

 

static void modifyEpt(__u64 vaddr1, __u64 vaddr2)
{ 
    
    unsigned long paddr1,paddr2;
    paddr1 = getPaddr(vaddr1);
    paddr2 = getPaddr(vaddr2);
    printk("pfn1 = 0x%016lx, pfn2 = 0x%016lx\n", (unsigned long)paddr1, (unsigned long)paddr2);

    setPaddr(paddr1, vaddr2);

    paddr1 = getPaddr(vaddr1);
    paddr2 = getPaddr(vaddr2);
    printk("pfn1 = 0x%016lx, pfn2 = 0x%016lx\n", (unsigned long)paddr1, (unsigned long)paddr2);
    //invept(INVEPT_ALL_CONTEXT, eptph.value);
}

 

static unsigned long getPaddr(__u64 vaddr)
{
    union ept_pgd *pgde;
    union ept_pud *pude;
    union ept_pde *pdee;

    //cpsl_dbg(capsule->id, "gpa -> hva: %016lx", gpa);
    /* Page Global Dir */
    pgde = pgd + ept_pgd_index(vaddr);
    printk("pgde = 0x%016lx, physical address = 0x%016lx, pgde->value = 0x%016lx\n",
        (unsigned long)pgde, (unsigned long)__pa(pgde), (unsigned long)pgde->value);
    /* no entry in EPT PGD? */
    if (!ept_pgd_present(pgde))
    {
        printk("getPaddr : pgd not found!\n");
        return 0;
    }
    printk("pgde get success......\n");
    printk("pgde->bits.addr = 0x%016lx, va = 0x%016lx, pud_index = 0x%016lx",
        (unsigned long)pgde->bits.addr, (unsigned long)__va((unsigned long)pgde->bits.addr << PAGE_SHIFT), (unsigned long)ept_pud_index(vaddr));
    /* Page Upper Dir */
    pude = (union ept_pud *)ept_pgd_addr(pgde) + ept_pud_index(vaddr);
    if (!ept_pud_present(pude))
    {
        printk("getPaddr : pud not found!\n");
        return 0;
    }
    printk("pude get success......\n");
    if (pude->value & (1 << 7)) {
        hv_err("BUG: huge pud in gpa_to_hva");
        return 0;
    }

    /* Page Middle Dir */
    printk("pude->bits.addr = 0x%016lx, va = 0x%016lx, pmd_index = 0x%016lx\n",
        (unsigned long)pude->bits.addr, (unsigned long)__va((unsigned long)(pude->bits.addr << PAGE_SHIFT)), (unsigned long)ept_pmd_index(vaddr));
    pdee = (union ept_pde *)ept_pud_addr(pude) + ept_pmd_index(vaddr);
    if (!ept_pmd_present((union ept_pmd *)pdee))
    {
        printk("getPaddr : pde not found!\n");
        return 0;
    }
    if (pdee->value & (1 << 7)) {
        //hv_err("BUG: large pgd in gpa_to_hva");
        printk("find pmd! pdee->value = 0x%016lx, pdee->bits.addr = 0x%016lx---------------\n", (unsigned long)pdee->value, (unsigned long)pdee->bits.addr);

        //hpa = (unsigned long)(pdee->bits.addr << LARGE_PAGE_SHIFT) + (vaddr & (LARGE_PAGE_SIZE-1));
        printk("pdee->bits.addr = 0x%016lx\n", (unsigned long)pdee->bits.addr);
        
        return pdee->bits.addr;
    }
    printk("find pmd failed!\n");
    return 0;
#if 0
    /* Page Table */
    pte = ept_pmd_addr(pmde) + ept_pte_index(gpa);
    if (!ept_pte_present(pte))
        return NULL;

    hpa = pte->bits.addr << PAGE_SHIFT;
    hva = __va(hpa);

    if (prot != NULL)
        *prot = pte->value & EPT_PROT_RWX;

    return hva;
#endif
}


static unsigned long setPaddr(unsigned long paddr, __u64 vaddr)
{
    union ept_pgd *pgde;
    union ept_pud *pude;
    union ept_pde *pdee;

    //cpsl_dbg(capsule->id, "gpa -> hva: %016lx", gpa);

    /* Page Global Dir */
    pgde = pgd + ept_pgd_index(vaddr);
    /* no entry in EPT PGD? */
    if (!ept_pgd_present(pgde))
    {
        printk("getPaddr : pgd not found!\n");
        return 0;
    }

    /* Page Upper Dir */
    pude = ept_pgd_addr(pgde) + ept_pud_index(vaddr);
    if (!ept_pud_present(pude))
    {
        printk("getPaddr : pud not found!\n");
        return 0;
    }

    if (pude->value & (1 << 7)) {
        hv_err("BUG: huge pud in gpa_to_hva");
        return 0;
    }

    /* Page Middle Dir */
    pdee = (union ept_pde *)ept_pud_addr(pude) + ept_pmd_index(vaddr);
    if (!ept_pmd_present((union ept_pmd*)pdee))
    {
        printk("getPaddr : pmd not found!\n");
        return 0;
    }
    if (pdee->value & (1 << 7)) {
        //hv_err("BUG: large pgd in gpa_to_hva");
        printk("find pmd!,now set it!!!!!!!!111\n");
        pdee->bits.addr = paddr;
        return pdee->bits.addr;
    }
    printk("find pmd failed!\n");
    return 0;
#if 0
    /* Page Table */
    pte = ept_pmd_addr(pmde) + ept_pte_index(gpa);
    if (!ept_pte_present(pte))
        return NULL;

    hpa = pte->bits.addr << PAGE_SHIFT;
    hva = __va(hpa);

    if (prot != NULL)
        *prot = pte->value & EPT_PROT_RWX;

    return hva;
#endif
}

 

 

线性区的页相关的那些标志。它们存放在vm_area_struct描述符的vm_flags字段中。一些标志给内核提供有关这个线性区全部页的信息,例如它们包含有什么内容,进程访问每个页的权限是什么。 另外的标志描述线性区自身,例如它应该如何增长(这些标志位于include/linux/Mm.h):

VM_READ:页是可读的

VM_WRITE:页是可写的

VM_EXEC:页是可执行的

VM_SHARED:页可以由几个进程共享

VM_MAYREAD:可以设置VM_READ标志

VM_MAYWRITE:可以设置VM_WRITE标志

VM_MAYEXEC:可以设置VM_EXEC标志

VM_MAYSHARE:可以设置VM_SHARE标志

VM_GROWSDOWN:线性区可以向低地址扩展

VM_GROWSUP:线性区可以向高地址扩展

VM_SHM:线性区用于IPC的共享内存

VM_DENYWRITE:线性区映射一个不能打开用于写的文件

VM_EXECUTABLE:线性区映射一个可执行文件

VM_LOCKED:线性区中的页被锁住,且不能换出

VM_IO:线性区映射设备的I/O地址空间

VM_SEQ_READ:应用程序顺序地访问页

VM_RAND_READ:应用程序以真正的随机顺序访问页

VM_DONTCOPY:当创建一个新进程时不拷贝线性区

VM_DONTEXPAND:通过mremap()系统调用禁止线性区扩展

VM_RESERVED:线性区是特殊的(如:它映射某个设备的I/O地址空间),因此它的页不能被交换出去

VM_ACCOUNT:创建IPC共享线性区时检查是否有足够的空闲内存用干映射

VM_HUGETLB:通过扩展分页机制处理线性区中的页

VM_NONLINEAR:线性区实现非线性文件映射

因此,要根据以下规则精简由读、写、执行和共享访问权限的16种可能组合: - 如果页具有写和共享两种访问权限,那么,Read/Write位被设置为1。 - 如果页具有读或执行访问权限,但是既没有写也没有共享访问权限,那么,Read/Write位被清0。 - 如果支持NX位,而且页没有执行访问权限,那么,把NX位设置为1。 - 如果页没有任何访问权限,那么,Presen七位被清0,以便每次访问都产生一个缺页异常。 然而,为了把这种情况与真正的页框不存在的情况相区分,Linux还把Page size位置为1(你可能认为Page size位的这种用法并不正当,因为这个位本来是表示实际页的大小。 但是,Linux可以侥幸逃脱这种骗局,因为80 x 86芯片在页目录项中检查Page size位,而不是在页表的表项中检查该位。)

do_page_fault()函数接收以下输入参数: - pt_regs结构的地址regs,该结构包含当异常发生时的微处理器寄存器的值。 - 3位的error_code,当异常发生时由控制单元压入栈中(参见第四章中的“中断和异常的硬件处理”一节)。这些位有以下含义:

—— 如果第0位被清0,则异常由访问一个不存在的页所引起(页表项中的Present标志被清0);否则,如果第0位被设置,则异常由无效的访问权限所引起。

—— 如果第1位被清0,则异常由读访问或者执行访问所引起;如果该位被设置,则异常由写访问所引起。

—— 如果第2位被清0,则异常发生在处理器处于内核态时;否则,异常发生在处理器处于用户态时。  

* bit 3 == 1 means use of reserved bit detected  * bit 4 == 1 means fault was an instruction fetch

 

以上是关于初始化EPT的主要内容,如果未能解决你的问题,请参考以下文章

KVM硬件辅助虚拟化之 EPT in Nested Virtualization

关于“ VMware Workstation 16 此平台不支持虚拟化的Intel VT-x/EPT. 不使用虚拟化的Intel VT-x/EPT,是否继续?”的有关问题的总结解答

EPTP 和 EPT 分页结构条目的格式

如何使用 Swift 使用此代码片段为 iOS 应用程序初始化 SDK?

Jekyll 偏移代码片段高亮的初始行

vscode 用户代码片段 vue初始化模板 Snippet #新加入开头注释 自动生成文件名 开发日期时间等内容