MTKView 绘图性能

Posted

技术标签:

【中文标题】MTKView 绘图性能【英文标题】:MTKView Drawing Performance 【发布时间】:2019-09-10 04:23:49 【问题描述】:

我想做什么

我正在尝试使用 Metal 视图在相机源上显示过滤器:MTKView。我正在密切关注 Apple 示例代码的方法 - Enhancing Live Video by Leveraging TrueDepth Camera Data (link)。

到目前为止我有什么

以下代码效果很好(主要从上述示例代码解释):

    class MetalObject: NSObject, MTKViewDelegate 

            private var metalBufferView         : MTKView?
            private var metalDevice             = MTLCreateSystemDefaultDevice()
            private var metalCommandQueue       : MTLCommandQueue!

            private var ciContext               : CIContext!
            private let colorSpace              = CGColorSpaceCreateDeviceRGB()

            private var videoPixelBuffer        : CVPixelBuffer?

            private let syncQueue               = DispatchQueue(label: "Preview View Sync Queue", qos: .userInitiated, attributes: [], autoreleaseFrequency: .workItem)

            private var textureWidth            : Int             = 0
            private var textureHeight           : Int             = 0
            private var textureMirroring        = false
            private var sampler                 : MTLSamplerState!
            private var renderPipelineState     : MTLRenderPipelineState!
            private var vertexCoordBuffer       : MTLBuffer!
            private var textCoordBuffer         : MTLBuffer!
            private var internalBounds          : CGRect!
            private var textureTranform         : CGAffineTransform?

            private var previewImage            : CIImage?

    init(with frame: CGRect) 
        super.init()

        self.metalBufferView = MTKView(frame: frame, device: self.metalDevice)
        self.metalBufferView!.contentScaleFactor = UIScreen.main.nativeScale
        self.metalBufferView!.framebufferOnly = true
        self.metalBufferView!.colorPixelFormat = .bgra8Unorm
        self.metalBufferView!.isPaused = true
        self.metalBufferView!.enableSetNeedsDisplay = false
        self.metalBufferView!.delegate = self

        self.metalCommandQueue = self.metalDevice!.makeCommandQueue()

        self.ciContext = CIContext(mtlDevice: self.metalDevice!)


        //Configure Metal
        let defaultLibrary = self.metalDevice!.makeDefaultLibrary()!
        let pipelineDescriptor = MTLRenderPipelineDescriptor()
        pipelineDescriptor.colorAttachments[0].pixelFormat = .bgra8Unorm
        pipelineDescriptor.vertexFunction = defaultLibrary.makeFunction(name: "vertexPassThrough")
        pipelineDescriptor.fragmentFunction = defaultLibrary.makeFunction(name: "fragmentPassThrough")

        // To determine how our textures are sampled, we create a sampler descriptor, which
        // will be used to ask for a sampler state object from our device below.
        let samplerDescriptor = MTLSamplerDescriptor()
        samplerDescriptor.sAddressMode = .clampToEdge
        samplerDescriptor.tAddressMode = .clampToEdge
        samplerDescriptor.minFilter = .linear
        samplerDescriptor.magFilter = .linear

        sampler = self.metalDevice!.makeSamplerState(descriptor: samplerDescriptor)

        do 
            renderPipelineState = try self.metalDevice!.makeRenderPipelineState(descriptor: pipelineDescriptor)
         catch 
            fatalError("Unable to create preview Metal view pipeline state. (\(error))")
        

    




    final func update (newVideoPixelBuffer: CVPixelBuffer?) 

        self.syncQueue.async 

            var filteredImage : CIImage

            self.videoPixelBuffer = newVideoPixelBuffer

            //---------
            //Core image filters
            //Strictly CIFilters, chained together
            //---------

            self.previewImage = filteredImage

            //Ask Metal View to draw
            self.metalBufferView?.draw()

        
    



    //MARK: - Metal View Delegate
    final func draw(in view: MTKView) 

        print (Thread.current)

        guard let drawable = self.metalBufferView!.currentDrawable,
            let currentRenderPassDescriptor = self.metalBufferView!.currentRenderPassDescriptor,
            let previewImage = self.previewImage else 
                return
        


        // create a texture for the CI image to render to
        let textureDescriptor = MTLTextureDescriptor.texture2DDescriptor(
            pixelFormat: .bgra8Unorm,
            width: Int(previewImage.extent.width),
            height: Int(previewImage.extent.height),
            mipmapped: false)
        textureDescriptor.usage = [.shaderWrite, .shaderRead]

        let texture = self.metalDevice!.makeTexture(descriptor: textureDescriptor)!

        if texture.width != textureWidth ||
            texture.height != textureHeight ||
            self.metalBufferView!.bounds != internalBounds 
            setupTransform(width: texture.width, height: texture.height, mirroring: mirroring, rotation: rotation)
        

        // Set up command buffer and encoder
        guard let commandQueue = self.metalCommandQueue else 
            print("Failed to create Metal command queue")
            return
        

        guard let commandBuffer = commandQueue.makeCommandBuffer() else 
            print("Failed to create Metal command buffer")
            return
        

        // add rendering of the image to the command buffer
        ciContext.render(previewImage,
                         to: texture,
                         commandBuffer: commandBuffer,
                         bounds: previewImage.extent,
                         colorSpace: self.colorSpace)

        guard let commandEncoder = commandBuffer.makeRenderCommandEncoder(descriptor: currentRenderPassDescriptor) else 
            print("Failed to create Metal command encoder")
            return
        

        // add vertex and fragment shaders to the command buffer
        commandEncoder.label = "Preview display"
        commandEncoder.setRenderPipelineState(renderPipelineState!)
        commandEncoder.setVertexBuffer(vertexCoordBuffer, offset: 0, index: 0)
        commandEncoder.setVertexBuffer(textCoordBuffer, offset: 0, index: 1)
        commandEncoder.setFragmentTexture(texture, index: 0)
        commandEncoder.setFragmentSamplerState(sampler, index: 0)
        commandEncoder.drawPrimitives(type: .triangleStrip, vertexStart: 0, vertexCount: 4)
        commandEncoder.endEncoding()

        commandBuffer.present(drawable) // Draw to the screen
        commandBuffer.commit()

    


    final func mtkView(_ view: MTKView, drawableSizeWillChange size: CGSize) 

    


备注

使用MTKViewDelegate而不是子类化MTKView的原因是当它被子类化时,在主线程上调用了draw call。使用上面显示的委托方法,似乎是一个不同的金属相关线程调用每个循环。上述方法似乎提供了更好的性能。 CIFilter 使用上述更新方法的详细信息必须进行编辑。所有这一切都是CIFilters 堆叠的重链。不幸的是,这些过滤器没有任何调整的余地。

问题

上面的代码似乎大大减慢了主线程的速度,导致应用程序 UI 的其余部分不稳定。例如,滚动 UIScrollview 似乎很慢且不稳定。

目标

调整 Metal 视图以减轻 CPU 占用并轻松处理主线程,从而为 UI 的其余部分留下足够的汁液。

根据上面的图形,命令缓冲区的准备工作都是在 CPU 中完成的,直到提交并提交(?)。有没有办法从 CPU 中卸载它?

任何提高绘图效率的提示、反馈、提示等都将不胜感激。

【问题讨论】:

正确的做法是使用仪器进行测量。也就是说,有几个明显的问题:您在每次绘制时创建纹理。相反,您应该在开始时创建一个小的纹理池,为每次绘制抓取一个,并在命令缓冲区完成后将其返回到池中。同样,您不应在每次抽奖时创建队列。您通常应该在应用设置时创建一个队列,并在整个过程中使用它。由于您想手动(从后台线程)调用draw(),因此不要设置enableSetNeedsDisplay。似乎不需要清除framebufferOnly 对于队列,我在初始化时创建self.metalCommandQueue = self.metalDevice!.makeCommandQueue()。这是不正确的? 哦,你是对的。对不起,我看错了。我被您使用 guard 所愚弄,并可能报告您未能在 draw 方法中创建队列。 所以对于最简单的设置:framebufferOnly = true 和 enableSetNeedsDisplay = false ? 是的,但纹理创建可能是一个更大的因素。 【参考方案1】:

您可以采取一些措施来提高性能:

直接渲染到视图的可绘制对象中,而不是渲染到纹理中,然后再次渲染以将该纹理渲染到视图中。 使用新的CIRenderDestination API 将实际纹理检索推迟到视图实际呈现的那一刻(即完成核心图像时)。

这是我在 Core Image 项目中使用的draw(in view: MTKView),已针对您的情况进行了修改:

public func draw(in view: MTKView) 
    if let currentDrawable = view.currentDrawable,
        let commandBuffer = self.commandQueue.makeCommandBuffer() 
        let drawableSize = view.drawableSize

        // optional: scale the image to fit the view
        let scaleX = drawableSize.width / image.extent.width
        let scaleY = drawableSize.height / image.extent.height
        let scale = min(scaleX, scaleY)
        let scaledImage = previewImage.transformed(by: CGAffineTransform(scaleX: scale, y: scale))

        // optional: center in the view
        let originX = max(drawableSize.width - scaledImage.extent.size.width, 0) / 2
        let originY = max(drawableSize.height - scaledImage.extent.size.height, 0) / 2
        let centeredImage = scaledImage.transformed(by: CGAffineTransform(translationX: originX, y: originY))

        // create a render destination that allows to lazily fetch the target texture
        // which allows the encoder to process all CI commands _before_ the texture is actually available;
        // this gives a nice speed boost because the CPU doesn’t need to wait for the GPU to finish
        // before starting to encode the next frame
        let destination = CIRenderDestination(width: Int(drawableSize.width),
                                              height: Int(drawableSize.height),
                                              pixelFormat: view.colorPixelFormat,
                                              commandBuffer: commandBuffer,
                                              mtlTextureProvider:  () -> MTLTexture in
                                                return currentDrawable.texture
        )

        let task = try! self.context.startTask(toRender: centeredImage, to: destination)
        // bonus: you can Quick Look the task to see what’s actually scheduled for the GPU

        commandBuffer.present(currentDrawable)
        commandBuffer.commit()

        // optional: you can wait for the task execution and Quick Look the info object to get insights and metrics
        DispatchQueue.global(qos: .background).async 
            let info = try! task.waitUntilCompleted()
        
    

如果这仍然太慢,您可以尝试在创建CIContext 时设置priorityRequestLow CIContextOption 以告诉Core Image 以低优先级呈现。

【讨论】:

这改进了 FPS!但是,它似乎仍然会减慢主线程/CPU 的速度,足以导致 UI 的其余部分运行缓慢。是否可以在调用之前在 update() 中完成这些工作? 请注意,您的 UI 绘图也在 GPU 上进行。所以当 GPU 太忙时,UI 就会挂起。是否在update()draw() 中执行实际的Core Image 渲染并不重要。您可以降低捕获设备的 FPS,这样您的系统就不会被它无论如何都无法处理的帧淹没。也可以使用self.metalBufferView?.setNeedsDisplay() 而不是.draw() 以允许UIKit 在适当的时候绘制您的视图。 CIFilters,它们应该在 draw() 之外运行吗?还是 Inside 会提供更好的性能? 这没关系,因为CIFilters 永远不会运行,直到您实际调用CIContext(在本例中为startTask(toRender:to:))的相关绘图函数之一,其中 需要draw()内部发生。 @Gizmodo 你试过在上下文中设置priorityRequestLow吗?

以上是关于MTKView 绘图性能的主要内容,如果未能解决你的问题,请参考以下文章

HTML5 Canvas 性能:加载图像与绘图

绘图的 QWT 性能改进

问绘图缓冲区的模板缓冲区会增加性能成本(WebGL)吗?

python获取安卓app性能参数并绘图

spritekit 游戏中的动态贝塞尔绘图 - 最佳(性能)方法?

iOS Core Animation 性能调优 学习笔记