H264解码性能不佳
Posted
技术标签:
【中文标题】H264解码性能不佳【英文标题】:Poor performance on H264 decoding 【发布时间】:2020-02-04 16:20:35 【问题描述】:我正在使用 Media Foundation 解码 H264 视频流。它可以工作,但在 Windows(桌面)和 HoloLens 2 (UWP) 上的性能真的很差。在功能相当强大的 PC 上解码单个 1920x1080 帧需要 30 毫秒,在 HoloLens 上需要 100 毫秒。
我想知道它是否使用软件渲染而不是硬件渲染。关于如何确保我在进行硬件解码的任何想法,或者关于如何提高性能的任何其他提示?
代码如下:
解码.cpp:
#include "MFUtility.h"
#include <stdio.h>
#include <tchar.h>
#include <evr.h>
#include <mfapi.h>
#include <mfplay.h>
#include <mfreadwrite.h>
#include <mferror.h>
#include <wmcodecdsp.h>
#include <Codecapi.h>
#include <fstream>
#pragma comment(lib, "mf.lib")
#pragma comment(lib, "mfplat.lib")
#pragma comment(lib, "mfplay.lib")
#pragma comment(lib, "mfreadwrite.lib")
#pragma comment(lib, "mfuuid.lib")
#pragma comment(lib, "wmcodecdspuuid.lib")
#if _MSC_VER // this is defined when compiling with Visual Studio
#define EXPORT_API __declspec(dllexport) // Visual Studio needs annotating exported functions with this
#else
#define EXPORT_API // XCode does not need annotating exported functions, so define is empty
#endif
IMFTransform* m_pDecoderTransform = NULL; // This is H264 Decoder MFT.
int _sampleCount = 0;
EXTERN_C const CLSID CLSID_CMSH264DecoderMFT;
// Link following functions C-style (required for plugins)
extern "C"
void EXPORT_API DecodeFrame(unsigned char* encodedFrame, int encodedFrameLength, unsigned char* decodedBuffer)
if (m_pDecoderTransform == NULL)
CHECK_HR(MFStartup(MF_VERSION),
"Media Foundation initialisation failed.");
IUnknown* m_pDecTransformUnk = NULL;
IMFMediaType* m_pDecInputMediaType = NULL, * m_pDecOutputMediaType = NULL;
DWORD m_mftStatus = 0;
int width = 1920;
int height = 1080;
int frameRate = 30;
CHECK_HR(CoCreateInstance(CLSID_CMSH264DecoderMFT, NULL, CLSCTX_INPROC_SERVER,
IID_IUnknown, (void**)&m_pDecTransformUnk), "Failed to create H264 decoder MFT.\n");
CHECK_HR(m_pDecTransformUnk->QueryInterface(
IID_PPV_ARGS(&m_pDecoderTransform)),
"Failed to get IMFTransform interface from H264 decoder MFT object.\n");
IMFAttributes* decoderAttributes;
CHECK_HR(m_pDecoderTransform->GetAttributes(&decoderAttributes),
"Can't get attributes.");
CHECK_HR(decoderAttributes->SetUINT32(CODECAPI_AVDecVideoAcceleration_H264, TRUE),
"Failed to enable CODECAPI_AVDecVideoAcceleration_H264");
CHECK_HR(decoderAttributes->SetUINT32(CODECAPI_AVLowLatencyMode, TRUE),
"Failed to enable CODECAPI_AVLowLatencyMode");
decoderAttributes->Release();
MFCreateMediaType(&m_pDecInputMediaType);
m_pDecInputMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
m_pDecInputMediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264);
CHECK_HR(MFSetAttributeSize(m_pDecInputMediaType, MF_MT_FRAME_SIZE, width, height),
"Failed to set image size");
CHECK_HR(MFSetAttributeRatio(m_pDecInputMediaType, MF_MT_FRAME_RATE, frameRate, 1),
"Failed to set frame rate on H264 MFT out type.\n");
CHECK_HR(MFSetAttributeRatio(m_pDecInputMediaType, MF_MT_PIXEL_ASPECT_RATIO, 1, 1),
"Failed to set aspect ratio on H264 MFT out type.\n");
CHECK_HR(m_pDecoderTransform->SetInputType(0, m_pDecInputMediaType, 0),
"Failed to set input media type on H.264 decoder MFT.\n");
MFCreateMediaType(&m_pDecOutputMediaType);
m_pDecOutputMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
m_pDecOutputMediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_IYUV);
CHECK_HR(MFSetAttributeSize(m_pDecOutputMediaType, MF_MT_FRAME_SIZE, width, height),
"Failed to set frame size on H264 MFT out type.\n");
CHECK_HR(MFSetAttributeRatio(m_pDecOutputMediaType, MF_MT_FRAME_RATE, frameRate, 1),
"Failed to set frame rate on H264 MFT out type.\n");
CHECK_HR(MFSetAttributeRatio(m_pDecOutputMediaType, MF_MT_PIXEL_ASPECT_RATIO, 1, 1),
"Failed to set aspect ratio on H264 MFT out type.\n");
CHECK_HR(m_pDecoderTransform->SetOutputType(0, m_pDecOutputMediaType, 0),
"Failed to set output media type on H.264 decoder MFT.\n");
CHECK_HR(m_pDecoderTransform->GetInputStatus(0, &m_mftStatus),
"Failed to get input status from H.264 decoder MFT.\n");
if (MFT_INPUT_STATUS_ACCEPT_DATA != m_mftStatus)
printf("H.264 decoder MFT is not accepting data.\n");
return;
CHECK_HR(m_pDecoderTransform->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL),
"Failed to process FLUSH command on H.264 decoder MFT.\n");
CHECK_HR(m_pDecoderTransform->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, NULL),
"Failed to process BEGIN_STREAMING command on H.264 decoder MFT.\n");
CHECK_HR(m_pDecoderTransform->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL),
"Failed to process START_OF_STREAM command on H.264 decoder MFT.\n");
// Start processing frames.
IMFSample* pCopyVideoSample = NULL, * pH264DecodeOutSample = NULL;
BOOL h264DecodeTransformFlushed = FALSE;
IMFMediaBuffer* pDstBuffer = NULL;
MFCreateSample(&pCopyVideoSample);
CHECK_HR(MFCreateMemoryBuffer(encodedFrameLength, &pDstBuffer), "Failed to create memory buffer.\n");
CHECK_HR(pCopyVideoSample->AddBuffer(pDstBuffer), "Failed to add buffer to re-constructed sample.\n");
byte* reconByteBuffer;
DWORD reconBuffCurrLen = 0;
DWORD reconBuffMaxLen = 0;
CHECK_HR(pDstBuffer->Lock(&reconByteBuffer, &reconBuffMaxLen, &reconBuffCurrLen), "Error locking recon buffer.\n");
memcpy(reconByteBuffer, encodedFrame, encodedFrameLength);
CHECK_HR(pDstBuffer->Unlock(), "Error unlocking recon buffer.\n");
pDstBuffer->SetCurrentLength(encodedFrameLength);
CHECK_HR(m_pDecoderTransform->ProcessInput(0, pCopyVideoSample, 0),
"The H264 decoder ProcessInput call failed.");
HRESULT getOutputResult = GetTransformOutput(m_pDecoderTransform, &pH264DecodeOutSample, &h264DecodeTransformFlushed);
if (getOutputResult != S_OK && getOutputResult != MF_E_TRANSFORM_NEED_MORE_INPUT)
OutputDebugStringA("Error getting H264 decoder transform output, error code %.2X. getOutputResult\n");
goto done;
if (h264DecodeTransformFlushed == TRUE)
// H264 decoder format changed. Clear the capture file and start again.
else if (pH264DecodeOutSample != NULL)
// Write decoded sample to capture output buffer.
CHECK_HR(WriteSampleToBuffer(pH264DecodeOutSample, decodedBuffer),
"Failed to write sample to buffer.");
SAFE_RELEASE(pH264DecodeOutSample);
_sampleCount++;
done:
return;
MFUtility.h:
#include <stdio.h>
#include <tchar.h>
#include <mfapi.h>
#include <mfplay.h>
#include <mfreadwrite.h>
#include <mferror.h>
#include <locale>
#define CHECK_HR(hr, msg) if (hr != S_OK) OutputDebugStringA(msg); char msgBuf[1024]; sprintf_s(msgBuf, "ErrorCode: %.2X.\n", hr); OutputDebugStringA(msgBuf); exit(99);
template <class T> void SAFE_RELEASE(T** ppT)
if (*ppT)
(*ppT)->Release();
*ppT = NULL;
template <class T> inline void SAFE_RELEASE(T*& pT)
if (pT != NULL)
pT->Release();
pT = NULL;
HRESULT WriteSampleToBuffer(IMFSample* pSample, unsigned char* decodedBuffer)
IMFMediaBuffer* buf = NULL;
DWORD bufLength;
HRESULT hr = S_OK;
hr = pSample->ConvertToContiguousBuffer(&buf);
CHECK_HR(hr, "ConvertToContiguousBuffer failed.");
hr = buf->GetCurrentLength(&bufLength);
CHECK_HR(hr, "Get buffer length failed.");
byte* byteBuffer = NULL;
DWORD buffMaxLen = 0, buffCurrLen = 0;
buf->Lock(&byteBuffer, &buffMaxLen, &buffCurrLen);
memcpy(decodedBuffer, byteBuffer, bufLength);
SAFE_RELEASE(buf);
return hr;
/**
* Creates a new single buffer media sample.
* @param[in] bufferSize: size of the media buffer to set on the create media sample.
* @param[out] pSample: pointer to the create single buffer media sample.
* @@Returns S_OK if successful or an error code if not.
*/
HRESULT CreateSingleBufferIMFSample(DWORD bufferSize, IMFSample** pSample)
IMFMediaBuffer* pBuffer = NULL;
HRESULT hr = S_OK;
hr = MFCreateSample(pSample);
CHECK_HR(hr, "Failed to create MF sample.");
// Adds a ref count to the pBuffer object.
hr = MFCreateMemoryBuffer(bufferSize, &pBuffer);
CHECK_HR(hr, "Failed to create memory buffer.");
// Adds another ref count to the pBuffer object.
hr = (*pSample)->AddBuffer(pBuffer);
CHECK_HR(hr, "Failed to add sample to buffer.");
// Leave the single ref count that will be removed when the pSample is released.
SAFE_RELEASE(pBuffer);
return hr;
/**
* Attempts to get an output sample from an MFT transform.
* @param[in] pTransform: pointer to the media transform to apply.
* @param[out] pOutSample: pointer to the media sample output by the transform. Can be NULL
* if the transform did not produce one.
* @param[out] transformFlushed: if set to true means the transform format changed and the
* contents were flushed. Output format of sample most likely changed.
* @@Returns S_OK if successful or an error code if not.
*/
HRESULT GetTransformOutput(IMFTransform* pTransform, IMFSample** pOutSample, BOOL* transformFlushed)
MFT_OUTPUT_STREAM_INFO StreamInfo = 0 ;
MFT_OUTPUT_DATA_BUFFER outputDataBuffer = 0 ;
DWORD processOutputStatus = 0;
IMFMediaType* pChangedOutMediaType = NULL;
HRESULT hr = S_OK;
*transformFlushed = FALSE;
hr = pTransform->GetOutputStreamInfo(0, &StreamInfo);
CHECK_HR(hr, "Failed to get output stream info from MFT.");
outputDataBuffer.dwStreamID = 0;
outputDataBuffer.dwStatus = 0;
outputDataBuffer.pEvents = NULL;
if ((StreamInfo.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES) == 0)
hr = CreateSingleBufferIMFSample(StreamInfo.cbSize, pOutSample);
CHECK_HR(hr, "Failed to create new single buffer IMF sample.");
outputDataBuffer.pSample = *pOutSample;
auto mftProcessOutput = pTransform->ProcessOutput(0, 1, &outputDataBuffer, &processOutputStatus);
if (mftProcessOutput == S_OK)
// Sample is ready and allocated on the transform output buffer.
*pOutSample = outputDataBuffer.pSample;
else if (mftProcessOutput == MF_E_TRANSFORM_STREAM_CHANGE)
// Format of the input stream has changed. https://docs.microsoft.com/en-us/windows/win32/medfound/handling-stream-changes
if (outputDataBuffer.dwStatus == MFT_OUTPUT_DATA_BUFFER_FORMAT_CHANGE)
hr = pTransform->GetOutputAvailableType(0, 0, &pChangedOutMediaType);
CHECK_HR(hr, "Failed to get the MFT ouput media type after a stream change.");
hr = pChangedOutMediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_IYUV);
CHECK_HR(hr, "Failed to set media sub type.");
hr = pTransform->SetOutputType(0, pChangedOutMediaType, 0);
CHECK_HR(hr, "Failed to set new output media type on MFT.");
hr = pTransform->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL);
CHECK_HR(hr, "Failed to process FLUSH command on MFT.");
*transformFlushed = TRUE;
else
hr = E_NOTIMPL;
SAFE_RELEASE(pOutSample);
*pOutSample = NULL;
else if (mftProcessOutput == MF_E_TRANSFORM_NEED_MORE_INPUT)
// More input is not an error condition but it means the allocated output sample is empty.
SAFE_RELEASE(pOutSample);
*pOutSample = NULL;
hr = MF_E_TRANSFORM_NEED_MORE_INPUT;
else
hr = mftProcessOutput;
SAFE_RELEASE(pOutSample);
*pOutSample = NULL;
SAFE_RELEASE(pChangedOutMediaType);
return hr;
【问题讨论】:
你能澄清一下你到底在测量什么吗?特别是测量开始和结束的位置。 您能否解释一下您正在使用 IMFSample** pOutSample 做什么?是为了GPU渲染吗 时间是在调用 DecodeFrame(...) 之前和之后测量的。 pOutSample 被复制到 WriteSampleToBuffer(...) 中的 decodedBuffer。我需要将每一帧的 YUV 像素复制到 decodedBuffer 中,以便以后在 Unity 中渲染。 【参考方案1】:要获得最佳性能,您需要为解码器提供 DirectxSurface :
MFCreateDXSurfaceBuffer function
这是一个专门的 IMFMediaBuffer :
DirectX Surface Buffer
就您的用例而言,您的 DirectxSurface 将来自 Unity3D,并且必须兼容。我还认为您需要维护一个 DirectxSurface 池来同步解码和渲染。
由于 Unity3D 和 MediaFoundation 之间的互操作,我不能保证一切都会按预期工作。
在性能方面可以肯定的是,解码后的帧必须在渲染之前留在GPU中。
在您当前的代码中,我怀疑解码后的帧会返回到系统内存,然后在渲染之前返回到 GPU 内存。 而且它不是最优的。也许通过使用软件解码,你会得到更好的性能,因为不会有这个乒乓球 系统内存和GPU内存之间。请记住,Unity3D 已经使用了大量的 GPU 带宽。
【讨论】:
谢谢,我将直接解码并从 DirectX 缓冲区进行渲染。以上是关于H264解码性能不佳的主要内容,如果未能解决你的问题,请参考以下文章