云游戏GPU虚拟化技术分析

Posted li_Jiejun

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了云游戏GPU虚拟化技术分析相关的知识,希望对你有一定的参考价值。

GPU全称是Graphic Processing Unit--图形处理器

1. 常规gpu调用逻辑

  接收cpu的指令,并行的进行渲染操作

2. GPU虚拟化

 三种分类方式:虚拟显卡,显卡透传,vgpu虚拟化

 2.1 端游gpu

  端游主要是指在pc(windows)上运行的游戏;
  实现:GPU透传
 2.1.1 一对一透传
 2.2.2 一对多透传

  操作比较简单了,技术比较成熟了,具体操作见文档参考资料。

 2.2 手游gpu

  手游主要是指在安卓手机上运行的游戏;
  实现:通过virtio-gpu模块,实现显卡的一对多虚拟化

3. 部分代码解读

 NOTE:以手游v-gpu为例

virtio-gpu工作流程图

3.1 前端内核驱动层
  内核中是以virtio-gpu.ko.xz 模块存在的,加载方式 modprobe virtio-gpu

virtio工作流程图

内核模块结构体

static struct virtio_driver virtio_gpu_driver = 
        .feature_table = features,
        .feature_table_size = ARRAY_SIZE(features),
        .driver.name = KBUILD_MODNAME,
        .driver.owner = THIS_MODULE,
        .id_table = id_table,
        .probe = virtio_gpu_probe,   //初始化设备,系统启动加载
        .remove = virtio_gpu_remove,
        .config_changed = virtio_gpu_config_changed
;

设备对应的操作接口,供用户层调用

static const struct file_operations virtio_gpu_driver_fops = 
        .owner = THIS_MODULE,
        .open = drm_open,
        .mmap = virtio_gpu_mmap,
        .poll = drm_poll,
        .read = drm_read,
        .unlocked_ioctl = drm_ioctl,
        .release = drm_release,
        .compat_ioctl = drm_compat_ioctl,
        .llseek = noop_llseek,
;

初始化设备

static int virtio_gpu_probe(struct virtio_device *vdev)

        struct drm_device *dev;
        int ret;

        if (vgacon_text_force() && virtio_gpu_modeset == -1)
                return -EINVAL;

        if (virtio_gpu_modeset == 0)
                return -EINVAL;

        dev = drm_dev_alloc(&driver, &vdev->dev);
        if (IS_ERR(dev))
                return PTR_ERR(dev);
        vdev->priv = dev;

        if (!strcmp(vdev->dev.parent->bus->name, "pci")) 
                ret = virtio_gpu_pci_quirk(dev, vdev);
                if (ret)
                        goto err_free;
        


        ret = virtio_gpu_init(dev);  //初始化设备,前后端通信消息队列
        if (ret)
                goto err_free;

        ret = drm_dev_register(dev, 0);
        if (ret)
                goto err_free;

        drm_fbdev_generic_setup(vdev->priv, 32);
        return 0;

err_free:
        drm_dev_put(dev);
        return ret;

    用于图像处理和光标操作,及相关的回调函数

virtio_gpu_init_vq(&vgdev->ctrlq, virtio_gpu_dequeue_ctrl_func);
virtio_gpu_init_vq(&vgdev->cursorq, virtio_gpu_dequeue_cursor_func);

3.2 后端qemu层
后端设备初始化

static void virtio_gpu_class_init(ObjectClass *klass, void *data)

    DeviceClass *dc = DEVICE_CLASS(klass);
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
    VirtIOGPUBaseClass *vgc = VIRTIO_GPU_BASE_CLASS(klass);

    vgc->gl_flushed = virtio_gpu_gl_flushed;
    vdc->realize = virtio_gpu_device_realize;  //主要逻辑入口
    vdc->reset = virtio_gpu_reset;
    vdc->get_config = virtio_gpu_get_config;
    vdc->set_config = virtio_gpu_set_config;

    dc->vmsd = &vmstate_virtio_gpu;
    device_class_set_props(dc, virtio_gpu_properties);

主要操作入口

static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)

    VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
    VirtIOGPU *g = VIRTIO_GPU(qdev);
    bool have_virgl;

#if !defined(CONFIG_VIRGL) || defined(HOST_WORDS_BIGENDIAN)
    have_virgl = false;
#else
    have_virgl = display_opengl;
#endif
    if (!have_virgl) 
        g->parent_obj.conf.flags &= ~(1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED);
     else 
#if defined(CONFIG_VIRGL)
        VIRTIO_GPU_BASE(g)->virtio_config.num_capsets =
            virtio_gpu_virgl_get_num_capsets(g);
#endif
    

//初始化图像/光标处理队列,注册对应的回调函数,并将队列加入任务队列
// virtio_gpu_handle_ctrl_cb回调将g->ctrl_bh 加入任务队列,qemu_bh_schedule(g->ctrl_bh);
    if (!virtio_gpu_base_device_realize(qdev,
                             virtio_gpu_handle_ctrl_cb,
                                 virtio_gpu_handle_cursor_cb,
                                 errp)) 
        return;
    

    g->ctrl_vq = virtio_get_queue(vdev, 0);
    g->cursor_vq = virtio_get_queue(vdev, 1);
    g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g);  //回调处理函数virtio_gpu_ctrl_bh
    g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g);
    QTAILQ_INIT(&g->reslist);
    QTAILQ_INIT(&g->cmdq);
    QTAILQ_INIT(&g->fenceq);

初始化图像/光标处理队列,注册对应的回调函数,并将队列加入任务队列

virtio_gpu_base_device_realize(DeviceState *qdev,
                               VirtIOHandleOutput ctrl_cb,                               
                               VirtIOHandleOutput cursor_cb,
                               Error **errp)

……

    if (virtio_gpu_virgl_enabled(g->conf)) 
        /* use larger control queue in 3d mode */
        virtio_add_queue(vdev, 256, ctrl_cb);  //初始化消息队列vq
        virtio_add_queue(vdev, 16, cursor_cb);
     else 
        virtio_add_queue(vdev, 64, ctrl_cb);
        virtio_add_queue(vdev, 16, cursor_cb);
    

初始化virtioqueue

VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
                            VirtIOHandleOutput handle_output)
      
    int i;
 
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) 
        if (vdev->vq[i].vring.num == 0)
            break;
    

    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
        abort();

    vdev->vq[i].vring.num = queue_size;
    vdev->vq[i].vring.num_default = queue_size;
    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
    vdev->vq[i].handle_output = handle_output;  //真正的回调处理函数
    vdev->vq[i].handle_aio_output = NULL;
    vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
                                       queue_size);

    return &vdev->vq[i];

到这里qemu准备好了接收前端虚拟机的消息回调函数(handle_output)了

如何接收前端消息?

首先这个gpu模块是基于virtio-pci来实现的,自然就继承了virtio-pci的一切特性,首先virtio初始化相应的IO操作函数

memory_region_init_io(&proxy->bar, OBJECT(proxy),
                              &virtio_pci_config_ops,  //读写操作结构体
                              proxy, "virtio-pci", size);



static const MemoryRegionOps virtio_pci_config_ops = 
    .read = virtio_pci_config_read,  //读操作
    .write = virtio_pci_config_write,  //写操作
    .impl = 
        .min_access_size = 1,
        .max_access_size = 4,
    ,
    .endianness = DEVICE_LITTLE_ENDIAN,
;

以写操作为例

static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)

……

    case VIRTIO_PCI_QUEUE_NOTIFY:   // VIRTIO_PCI_QUEUE_NOTIFY虚拟机内核virtio-gpu驱动发过来的消息指令
        if (val < VIRTIO_QUEUE_MAX) 
            virtio_queue_notify(vdev, val);
        
        break;
……
void virtio_queue_notify(VirtIODevice *vdev, int n)

    VirtQueue *vq = &vdev->vq[n];

    if (unlikely(!vq->vring.desc || vdev->broken)) 
        return;
    

    trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
    if (vq->host_notifier_enabled) 
        event_notifier_set(&vq->host_notifier);
     else if (vq->handle_output) 
        vq->handle_output(vdev, vq);  //处理回调

        if (unlikely(vdev->start_on_kick)) 
            virtio_set_started(vdev, true);
        
    

 handle_output对应的回调函数

static void virtio_gpu_ctrl_bh(void *opaque)

    VirtIOGPU *g = opaque;
    virtio_gpu_handle_ctrl(&g->parent_obj.parent_obj, g->ctrl_vq);  //处理函数


处理函数

static void virtio_gpu_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)

    VirtIOGPU *g = VIRTIO_GPU(vdev);
    struct virtio_gpu_ctrl_command *cmd;

    if (!virtio_queue_ready(vq)) 
        return;
    

#ifdef CONFIG_VIRGL
    if (!g->renderer_inited && g->parent_obj.use_virgl_renderer) 
        virtio_gpu_virgl_init(g);
        g->renderer_inited = true;
    
#endif

    cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command));  //获取队列中的指令
    while (cmd) 
        cmd->vq = vq;
        cmd->error = 0;
        cmd->finished = false;
        QTAILQ_INSERT_TAIL(&g->cmdq, cmd, next);  //获取队列中的所有指令并加入到g-cmdq链表中
        cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command));
    

    virtio_gpu_process_cmdq(g);  //处理cmd指令
 
#ifdef CONFIG_VIRGL
    if (g->parent_obj.use_virgl_renderer) 
        virtio_gpu_virgl_fence_poll(g);
    
#endif

处理cmd指令

void virtio_gpu_process_cmdq(VirtIOGPU *g)

    struct virtio_gpu_ctrl_command *cmd;

    if (g->processing_cmdq) 
        return;
    
    g->processing_cmdq = true;
    while (!QTAILQ_EMPTY(&g->cmdq))    //循环处理指令
        cmd = QTAILQ_FIRST(&g->cmdq);

        if (g->parent_obj.renderer_blocked) 
            break;
        

        /* process command */
        VIRGL(g, virtio_gpu_virgl_process_cmd, virtio_gpu_simple_process_cmd,
              g, cmd);  //以3d为例,这里会调用virtio_gpu_virgl_process_cmd

        QTAILQ_REMOVE(&g->cmdq, cmd, next);
        if (virtio_gpu_stats_enabled(g->parent_obj.conf)) 
            g->stats.requests++;
        

        if (!cmd->finished) 
            QTAILQ_INSERT_TAIL(&g->fenceq, cmd, next);
            g->inflight++;
            if (virtio_gpu_stats_enabled(g->parent_obj.conf)) 
                if (g->stats.max_inflight < g->inflight) 
                    g->stats.max_inflight = g->inflight;
                

                fprintf(stderr, "inflight: %3d (+)\\r", g->inflight);
            
         else 
            g_free(cmd);
        
    
    g->processing_cmdq = false;

  具体的每个操作指令

void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
                                      struct virtio_gpu_ctrl_command *cmd)

    VIRTIO_GPU_FILL_CMD(cmd->cmd_hdr);

    virgl_renderer_force_ctx_0();
    switch (cmd->cmd_hdr.type)     //详细对应的操作指令,调用到virgl,mesa接口
    case VIRTIO_GPU_CMD_CTX_CREATE:
        virgl_cmd_context_create(g, cmd);
        break;
    case VIRTIO_GPU_CMD_CTX_DESTROY:
        virgl_cmd_context_destroy(g, cmd);
        break;
    case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D:
        virgl_cmd_create_resource_2d(g, cmd);
        break;
    case VIRTIO_GPU_CMD_RESOURCE_CREATE_3D:
        virgl_cmd_create_resource_3d(g, cmd);
        break;
    case VIRTIO_GPU_CMD_SUBMIT_3D:
        virgl_cmd_submit_3d(g, cmd);
        break;
    case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D:
        virgl_cmd_transfer_to_host_2d(g, cmd);
        break;
    case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D:
        virgl_cmd_transfer_to_host_3d(g, cmd);
        break;
    case VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D:
        virgl_cmd_transfer_from_host_3d(g, cmd);
        break;
    case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING:
        virgl_resource_attach_backing(g, cmd);
        break;
    case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
        virgl_resource_detach_backing(g, cmd);
        break;
    case VIRTIO_GPU_CMD_SET_SCANOUT:
        virgl_cmd_set_scanout(g, cmd);
        break;
    case VIRTIO_GPU_CMD_RESOURCE_FLUSH:
        virgl_cmd_resource_flush(g, cmd);
        break;
    case VIRTIO_GPU_CMD_RESOURCE_UNREF:
        virgl_cmd_resource_unref(g, cmd);
        break;
    case VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE:
        /* TODO add security */
        virgl_cmd_ctx_attach_resource(g, cmd);
        break;
    case VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE:
        /* TODO add security */
        virgl_cmd_ctx_detach_resource(g, cmd);
        break;
    case VIRTIO_GPU_CMD_GET_CAPSET_INFO:
        virgl_cmd_get_capset_info(g, cmd);
        break;
    case VIRTIO_GPU_CMD_GET_CAPSET:
        virgl_cmd_get_capset(g, cmd);
        break;
    case VIRTIO_GPU_CMD_GET_DISPLAY_INFO:
        virtio_gpu_get_display_info(g, cmd);
        break;
    case VIRTIO_GPU_CMD_GET_EDID:
        virtio_gpu_get_edid(g, cmd);
        break;
    default:
        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
        break;
    
    if (cmd->finished) 
        return;
    
    if (cmd->error) 
        fprintf(stderr, "%s: ctrl 0x%x, error 0x%x\\n", __func__,
                cmd->cmd_hdr.type, cmd->error);
        virtio_gpu_ctrl_response_nodata(g, cmd, cmd->error);
        return;
    

    if (!(cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE)) 
        virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
        return;
    

    trace_virtio_gpu_fence_ctrl(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
    virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);


4. 总结

Virtio-gpu分为前后端架构,前后端的消息传递通过virtio架构实现的,前端通过ioevent通知后端,后端通过irq通知前端;前端virtio-gpu驱动捕捉到显卡操作指令/数据放到共享队列上,并通知后端,后端从共享队列上得到数据/指令,然后调用virgl,mesa接口到服务器的真实显卡上处理。

 

以上是关于云游戏GPU虚拟化技术分析的主要内容,如果未能解决你的问题,请参考以下文章

云游戏商业模式探索

云游戏商业模式探索

GPU虚拟化技术

详解GPU虚拟化技术

GPU虚拟化技术和工作原理

分析先进的GPU和虚拟化技术对ADAS平台至关重要