如何将音频添加到使用 SinkWriter 创建的 mp4 文件中?
Posted
技术标签:
【中文标题】如何将音频添加到使用 SinkWriter 创建的 mp4 文件中?【英文标题】:How to add audio to an mp4 file created with a SinkWriter? 【发布时间】:2017-03-07 01:38:30 【问题描述】:我想制作一个 mp4 文件作为接收器编写器。 可以通过提供来自 msdn 的样本制作视频,但无法创建音频。 所以我做了一个搜索并得到了这个来源。但是这个来源也没有听到任何声音。 这是一个不好的来源吗?那么错在哪里呢?
这里是搜索代码:
//Audio constants
const GUID AUDIO_MAJOR_TYPE = MFMediaType_Audio; //for audio treatment
const GUID AUDIO_ENCODING_FORMAT = MFAudioFormat_AAC; //encoding format (output)
const GUID AUDIO_INPUT_FORMAT = MFAudioFormat_PCM; //input format
const UINT32 AUDIO_SAMPLES_PER_SECOND = 44100; //samples per second
const UINT32 AUDIO_AVG_BYTES_PER_SECOND = 16000; //average bytes per second
const UINT32 AUDIO_NUM_CHANNELS = 1; //MONO or STEREO
const UINT32 AUDIO_BITS_PER_SAMPLE = 16; //bits per sample
const UINT32 AUDIO_ONE_SECOND = 10; //quantity of buffers per second
const UINT32 AUDIO_BUFFER_LENGTH = AUDIO_BITS_PER_SAMPLE / 8 * AUDIO_NUM_CHANNELS * AUDIO_SAMPLES_PER_SECOND; //max. buffer size
const LONGLONG AUDIO_SAMPLE_DURATION = 10000000; //sample duration
//Creation of a template to release pointers
template <class T> void SafeRelease(T **ppT)
if (*ppT)
(*ppT)->Release();
*ppT = NULL;
//Creation of the Byte Stream
IMFByteStream* CreateFileByteStream(LPCWSTR FileName)
//create file byte stream
IMFByteStream *pByteStream = NULL;
HRESULT hr = MFCreateFile(MF_ACCESSMODE_WRITE, MF_OPENMODE_DELETE_IF_EXIST, MF_FILEFLAGS_NONE, FileName, &pByteStream);
if (FAILED(hr))
pByteStream = NULL;
return pByteStream;
//Creation of the Video profile (H264)
IMFMediaType* CreateVideoProfile()
//Creation of the Audio profile (AAC)
IMFMediaType* CreateAudioProfile()
IMFMediaType *pMediaType = NULL;
HRESULT hr = MFCreateMediaType(&pMediaType);
if (SUCCEEDED(hr))
hr = pMediaType->SetGUID(MF_MT_MAJOR_TYPE, AUDIO_MAJOR_TYPE);
if (SUCCEEDED(hr))
hr = pMediaType->SetGUID(MF_MT_SUBTYPE, AUDIO_ENCODING_FORMAT);
if (SUCCEEDED(hr))
hr = pMediaType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, AUDIO_BITS_PER_SAMPLE);
if (SUCCEEDED(hr))
hr = pMediaType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, AUDIO_SAMPLES_PER_SECOND);
if (SUCCEEDED(hr))
hr = pMediaType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, AUDIO_NUM_CHANNELS);
if (SUCCEEDED(hr))
hr = pMediaType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, AUDIO_AVG_BYTES_PER_SECOND);
if (FAILED(hr))
pMediaType = NULL;
return pMediaType;
//Create an aggregate source (both audio and video)
IMFMediaSource* CreateAggregatedSource(IMFMediaSource *pSource1, IMFMediaSource *pSource2, IMFMediaSource *pAggSource)
pAggSource = NULL;
IMFCollection *pCollection = NULL;
HRESULT hr = MFCreateCollection(&pCollection);
if (SUCCEEDED(hr))
hr = pCollection->AddElement(pSource1);
if (SUCCEEDED(hr))
hr = pCollection->AddElement(pSource2);
if (SUCCEEDED(hr))
hr = MFCreateAggregateSource(pCollection, &pAggSource);
SafeRelease(&pCollection);
if (FAILED(hr))
pAggSource = NULL;
return pAggSource;
//Creation of the MPEG-4 MediaSink
IMFMediaSink* CreateMediaSink(IMFByteStream *pByteStream, IMFMediaType *pVideoMediaType, IMFMediaType *pAudioMediaType)
IMFMediaSink *pMediaSink = NULL;
DWORD pdwCharac = NULL;
DWORD pdwStreamCount = NULL;
HRESULT hr = MFCreateMPEG4MediaSink(pByteStream, pVideoMediaType, pAudioMediaType, &pMediaSink);
//// DEBUG ////
pMediaSink->GetCharacteristics(&pdwCharac);
pMediaSink->GetStreamSinkCount(&pdwStreamCount);
if (FAILED(hr))
pMediaSink = NULL;
return pMediaSink;
IMFAttributes* CreateAttributesForSinkWriter()
IMFAttributes *pMFAttributes = NULL;
HRESULT hr = MFCreateAttributes(&pMFAttributes, 100);
if (SUCCEEDED(hr))
hr = pMFAttributes->SetGUID(MF_TRANSCODE_CONTAINERTYPE, MFTranscodeContainerType_MPEG4);
if (SUCCEEDED(hr))
hr = pMFAttributes->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, FALSE); //no hardware encoding
if (SUCCEEDED(hr))
hr = pMFAttributes->SetUINT32(MF_READWRITE_DISABLE_CONVERTERS, FALSE); //enable converting formats
if (FAILED(hr))
pMFAttributes = NULL;
return pMFAttributes;
//Initialization of the Video SinkWriter...
HRESULT InitializeSinkWriterVideo(IMFSinkWriter **ppWriter, DWORD *pStreamIndex, IMFMediaSink *pMediaSink)
//Initialization of the Audio SinkWriter...
HRESULT InitializeSinkWriterAudio(IMFSinkWriter **ppWriter, DWORD *pStreamIndex, IMFMediaSink *pMediaSink)
*ppWriter = NULL;
*pStreamIndex = NULL;
IMFSinkWriter *pSinkWriter = NULL;
IMFMediaType *pMediaTypeOut = NULL;
IMFMediaType *pMediaTypeIn = NULL;
IMFAttributes *pAttrib = NULL;
DWORD streamIndex = 1;
HRESULT hr = MFCreateSinkWriterFromMediaSink(pMediaSink, NULL, &pSinkWriter);
//input : audio
if (SUCCEEDED(hr))
hr = MFCreateMediaType(&pMediaTypeIn);
if (SUCCEEDED(hr))
hr = pMediaTypeIn->SetGUID(MF_MT_MAJOR_TYPE, AUDIO_MAJOR_TYPE);
if (SUCCEEDED(hr))
hr = pMediaTypeIn->SetGUID(MF_MT_SUBTYPE, AUDIO_INPUT_FORMAT);
if (SUCCEEDED(hr))
hr = pMediaTypeIn->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, AUDIO_BITS_PER_SAMPLE);
if (SUCCEEDED(hr))
hr = pMediaTypeIn->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, AUDIO_SAMPLES_PER_SECOND);
if (SUCCEEDED(hr))
hr = pMediaTypeIn->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, AUDIO_NUM_CHANNELS);
if (SUCCEEDED(hr))
hr = pSinkWriter->SetInputMediaType(streamIndex, pMediaTypeIn, NULL);
//Tell the Audio SinkWriter to begin data treatment
if (SUCCEEDED(hr))
hr = pSinkWriter->BeginWriting();
//Possible error codes
if (FAILED(hr))
if (hr == MF_E_INVALIDMEDIATYPE)
UINT32 uiShutDown = 0;
if (hr == MF_E_INVALIDSTREAMNUMBER)
UINT32 uiShutDown = 1;
if (hr == MF_E_TOPO_CODEC_NOT_FOUND)
UINT32 uiShutDown = 2;
//Returns the pointer of the caller
if (SUCCEEDED(hr))
*ppWriter = pSinkWriter;
(*ppWriter)->AddRef();
*pStreamIndex = streamIndex;
//Release pointers
SafeRelease(&pSinkWriter);
SafeRelease(&pMediaTypeOut);
SafeRelease(&pMediaTypeIn);
SafeRelease(&pAttrib);
return hr;
//Write a video frame
HRESULT WriteVideoFrame(IMFSinkWriter *pWriter, DWORD streamIndex, const LONGLONG& rtStart, const LONGLONG& rtDuration)
//Write an audio packet
HRESULT WriteAudioPacket(IMFSinkWriter *pWriter, DWORD streamIndex, const LONGLONG& rtStart, const LONGLONG& rtDuration, UINT32 Quantity)
IMFSample *pSample = NULL;
IMFMediaBuffer *pBuffer = NULL;
const DWORD cbBuffer = Quantity * 2;
BYTE *pData = NULL;
//Create a new memory buffer, whose max. size is cbBuffer (QuantityOfSamplesPerVideoFrame * 2 Bytes)
HRESULT hr = MFCreateMemoryBuffer(cbBuffer, &pBuffer);
//Lock the buffer and copy the audio packet to the buffer
if (SUCCEEDED(hr))
hr = pBuffer->Lock(&pData, NULL, NULL);
if (SUCCEEDED(hr))
for (DWORD n = 0; n < cbBuffer; n++)
pData[n] = (BYTE)(rand() & 0xFF); //generation of random noise
if (SUCCEEDED(hr))
hr = pBuffer->Unlock();
// Set the data length of the buffer
if (SUCCEEDED(hr))
hr = pBuffer->SetCurrentLength(cbBuffer);
//Create a media sample and add the buffer to the sample
if (SUCCEEDED(hr))
hr = MFCreateSample(&pSample);
if (SUCCEEDED(hr))
hr = pSample->AddBuffer(pBuffer);
//Set the time stamp and the duration
if (SUCCEEDED(hr))
hr = pSample->SetSampleTime(rtStart);
if (SUCCEEDED(hr))
hr = pSample->SetSampleDuration(rtDuration);
//Send the sample to the Sink Writer
if (SUCCEEDED(hr))
hr = pWriter->WriteSample(streamIndex, pSample);
//Release pointers
SafeRelease(&pSample);
SafeRelease(&pBuffer);
return hr;
// MAIN FUNCTION
void main()
HRESULT hr = S_OK;
IMFByteStream *spByteStream = NULL;
IMFMediaSink *pMediaSink = NULL;
IMFSinkWriter *spSinkWriterVid = NULL;
IMFSinkWriter *spSinkWriterAud = NULL;
IMFMediaType *spVideo = NULL;
IMFMediaType *spAudio = NULL;
//IMFMediaEventGenerator *spMFMediaEvtGene = NULL;
//IMFMediaEvent *spMFMediaEvent = NULL;
IMFAttributes *spAttrib = NULL;
DWORD sindexVid = 0, sindexAud = 0, j = 0;
LPCWSTR str = L"outputfile.mp4";
hr = CoInitialize(NULL);
if (SUCCEEDED(hr))
hr = MFStartup(MF_VERSION);
if (SUCCEEDED(hr))
spByteStream = CreateFileByteStream(str);
if (spByteStream != NULL)
spVideo = CreateVideoProfile();
if (spVideo != NULL)
spAudio = CreateAudioProfile();
if (spAudio != NULL)
pMediaSink = CreateMediaSink(spByteStream, spVideo, spAudio);
if (pMediaSink != NULL)
hr = InitializeSinkWriterVideo(&spSinkWriterVid, &sindexVid, pMediaSink);
if (SUCCEEDED(hr))
LONGLONG rtStartVid = 0;
UINT64 rtDurationVid = 0;
/********************************************************
* VIDEO PART *
********************************************************/
//Calculate the average time per frame, for video
//MFFrameRateToAverageTimePerFrame(VIDEO_FPS, 1, &rtDurationVid);
//loop to treat all the pictures
for (DWORD i = 0; i < VIDEO_FRAME_COUNT; ++i, ++j)
//Picture pixels
for (DWORD k = 0; k < VIDEO_PELS; k++)
if (j>255)
j = 0;
videoFrameBuffer[k] = ((j << 16) & 0x00FF0000) | ((j << 8) & 0x0000FF00) | (j & 0x000000FF);
hr = WriteVideoFrame(spSinkWriterVid, sindexVid, rtStartVid, rtDurationVid);
if (FAILED(hr))
break;
//Update the time stamp value
rtStartVid += rtDurationVid;
//Finalization of writing with the Video SinkWriter
if (SUCCEEDED(hr))
hr = spSinkWriterVid->Finalize();
SafeRelease(&spVideo);
SafeRelease(&spSinkWriterVid);
if (SUCCEEDED(hr))
hr = InitializeSinkWriterAudio(&spSinkWriterAud, &sindexAud, pMediaSink);
if (SUCCEEDED(hr))
LONGLONG rtStartAud = 0;
UINT64 rtDurationAud;
double QtyAudiosamplesPerVideoFrame = 0;
//Calculate the approximate quantity of samples, according to a video frame duration
//44100 Hz -> 1 s
//????? Hz -> 0.04 s (= 40 ms = one video frame duration)
if (VIDEO_FPS != 0)
QtyAudioSamplesPerVideoFrame = ((double)AUDIO_SAMPLES_PER_SECOND / (double)VIDEO_FPS);
else
QtyAudioSamplesPerVideoFrame = 0;
MFFrameRateToAverageTimePerFrame(VIDEO_FPS, 1, &rtDurationAud); //we treat the same duration as the video
//it means that we will treat N audio packets for the last of one picture (=40 ms)
//loop to treat all the audio packets
if (rtDurationAud != 0)
for (DWORD i = 0; i < VIDEO_FRAME_COUNT; ++i)
//Audio packets
hr = WriteAudioPacket(spSinkWriterAud, sindexAud, rtStartAud, rtDurationAud, (UINT32)QtyAudioSamplesPerVideoFrame);
if (FAILED(hr))
break;
//Update the time stamp value
rtStartAud += rtDurationAud;
//Finalization of writing with the Audio SinkWriter
if (SUCCEEDED(hr))
hr = spSinkWriterAud->Finalize();
//Release pointers
SafeRelease(&spByteStream);
SafeRelease(&spAudio);
SafeRelease(&spSinkWriterAud);
SafeRelease(&spAttrib);
//Shutdown the MediaSink (not done by the SinkWriter)
pMediaSink->Shutdown();
SafeRelease(&pMediaSink);
//Shutdown MediaFoundation
MFShutdown();
CoUninitialize();
//CDialog::OnOK();
参考:MSDN 论坛上的How to add audio data to a video file created by a SinkWriter ?
【问题讨论】:
【参考方案1】:在现有的 IMFSinkWriter
上,您已经拥有视频(例如,使用 MSDN 教程代码),您调用 AddStream
和 SetInputMediaType
为音频初始化附加流,从而在生成的 MP4 文件中生成音轨。您获得了一个单独的音频流索引,您将在随后的调用中使用它。与WriteSample
用于视频类似,您可以将WriteSample
用于具有相应流索引的音频,并在应用程序的主循环中添加用于编码/写入的音频数据。
【讨论】:
感谢您的回复。但我无法理解答案。我在这段代码中使用WriteSample
,但我听不到音乐。我应该修复这段代码的哪些部分?
你的代码无法执行,你不要发布错误代码,如果有,你不要发布你得到的输出。该代码大约(非常粗略地)正确,但是您的代码存在一个严重的问题,即您有两个接收器编写器实例(spSinkWriterAud
和spSinkWriterVid
),而您只需要一个。您应该按照我上面提到的方式使用单接收器编写器。以上是关于如何将音频添加到使用 SinkWriter 创建的 mp4 文件中?的主要内容,如果未能解决你的问题,请参考以下文章
在 Windows Media Foundation 中使用 Sink Writer 添加到视频的音频示例
如何使用 Microsoft Media Foundation 将原始 48khz/32 位 PCM 编码为 FLAC?