使用 WASAPI 录制音频流

Posted 2023-02-25

技术标签:

【中文标题】使用 WASAPI 录制音频流【英文标题】：Record an audio stream with WASAPI 【发布时间】：2020-10-12 12:46:20 【问题描述】：

我正在阅读诸如 Capturing a stream 或 Loopback recording 之类的文档，但我找不到一个关于如何从 Loopback 音频设备（有时使用 Windows WASAPI 称为“你所听到的”、“立体声混音”）。

您能否提供一个简单的可重现示例，展示如何使用 C++ 从 WASAPI 设备循环录制音频块？

这是 Python 中的一个类似（工作）示例：

import soundcard as sc  # installed with: pip install soundcard
lb = sc.all_microphones(include_loopback=True)[0]
with lb.recorder(samplerate=44100) as mic:
    while True:
        data = mic.record(numframes=None)
        print(data)    # chunks of audio data (448 samples x 2 channels as an array by default)

【问题讨论】：

"在捕获流中的代码示例中，RecordAudiostream函数可以很容易地修改为配置环回模式捕获流。"你关注this part instructions看有帮助吗？我看过这个@RitaHan-MSFT，但我什至没有实现编译原始代码docs.microsoft.com/en-us/windows/win32/coreaudio/…：没有包含，没有构建指令等。这些细节并不是很明显在这种情况下。如果我能先实现构建这个，那么，是的，我想我将能够适应“loopback-mode capture”。更多详细信息请查看此博客：Sample - WASAPI loopback capture (record what you hear). @RitaHan-MSFT 你的意思是github.com/mvaneerde/blog/tree/develop/loopback-capture/…？看起来很有希望，但它被分割在许多 cpp 文件中，不是很容易理解，但我会研究这个，你是对的。最好的办法是在此处 docs.microsoft.com/en-us/windows/win32/coreaudio/… 更新 MSDN 页面，并提供可重现的代码（包含构建指令、包含、标头等）它包含一个VS解决方案文件，使用Visual Studio很容易构建和运行。 【参考方案1】：

这是环回模式音频捕获的示例。

根据Capturing a stream的文档，将Loopback recording指出的一些版本制作如下：

// In the call to the IMMDeviceEnumerator::GetDefaultAudioEndpoint method, change the first parameter (dataFlow) from eCapture to eRender.
hr = pEnumerator->GetDefaultAudioEndpoint(
    eRender, eConsole, &pDevice);

...

// In the call to the IAudioClient::Initialize method, change the value of the second parameter (StreamFlags) from 0 to AUDCLNT_STREAMFLAGS_LOOPBACK.
hr = pAudioClient->Initialize(
    AUDCLNT_SHAREMODE_SHARED,
    AUDCLNT_STREAMFLAGS_LOOPBACK,
    hnsRequestedDuration,
    0,
    pwfx,
    NULL);

文档中缺少的部分：CopyData() 和写入文件函数（WriteWaveHeader() 和 FinishWaveFile()）。以下显示了这些功能实现的示例。更多详细信息请参阅博客Sample - WASAPI loopback capture (record what you hear)。

HRESULT MyAudioSink::CopyData(BYTE* pData, UINT32 NumFrames, BOOL* pDone, WAVEFORMATEX* pwfx, HMMIO hFile)

    HRESULT hr = S_OK;

    if (0 == NumFrames) 
        wprintf(L"IAudioCaptureClient::GetBuffer said to read 0 frames\n");
        return E_UNEXPECTED;
    

    LONG lBytesToWrite = NumFrames * pwfx->nBlockAlign;
#pragma prefast(suppress: __WARNING_INCORRECT_ANNOTATION, "IAudioCaptureClient::GetBuffer SAL annotation implies a 1-byte buffer")
    LONG lBytesWritten = mmioWrite(hFile, reinterpret_cast<PCHAR>(pData), lBytesToWrite);
    if (lBytesToWrite != lBytesWritten) 
        wprintf(L"mmioWrite wrote %u bytes : expected %u bytes", lBytesWritten, lBytesToWrite);
        return E_UNEXPECTED;
    

    static int CallCount = 0;
    cout << "CallCount = " << CallCount++ << "NumFrames: " << NumFrames << endl ;

    if (clock() > 10 * CLOCKS_PER_SEC) //Record 10 seconds. From the first time call clock() at the beginning of the main().
        *pDone = true;

    return S_OK;


HRESULT WriteWaveHeader(HMMIO hFile, LPCWAVEFORMATEX pwfx, MMCKINFO* pckRIFF, MMCKINFO* pckData) 
    MMRESULT result;

    // make a RIFF/WAVE chunk
    pckRIFF->ckid = MAKEFOURCC('R', 'I', 'F', 'F');
    pckRIFF->fccType = MAKEFOURCC('W', 'A', 'V', 'E');

    result = mmioCreateChunk(hFile, pckRIFF, MMIO_CREATERIFF);
    if (MMSYSERR_NOERROR != result) 
        wprintf(L"mmioCreateChunk(\"RIFF/WAVE\") failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    

    // make a 'fmt ' chunk (within the RIFF/WAVE chunk)
    MMCKINFO chunk;
    chunk.ckid = MAKEFOURCC('f', 'm', 't', ' ');
    result = mmioCreateChunk(hFile, &chunk, 0);
    if (MMSYSERR_NOERROR != result) 
        wprintf(L"mmioCreateChunk(\"fmt \") failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    

    // write the WAVEFORMATEX data to it
    LONG lBytesInWfx = sizeof(WAVEFORMATEX) + pwfx->cbSize;
    LONG lBytesWritten =
        mmioWrite(
            hFile,
            reinterpret_cast<PCHAR>(const_cast<LPWAVEFORMATEX>(pwfx)),
            lBytesInWfx
        );
    if (lBytesWritten != lBytesInWfx) 
        wprintf(L"mmioWrite(fmt data) wrote %u bytes; expected %u bytes", lBytesWritten, lBytesInWfx);
        return E_FAIL;
    

    // ascend from the 'fmt ' chunk
    result = mmioAscend(hFile, &chunk, 0);
    if (MMSYSERR_NOERROR != result) 
        wprintf(L"mmioAscend(\"fmt \" failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    

    // make a 'fact' chunk whose data is (DWORD)0
    chunk.ckid = MAKEFOURCC('f', 'a', 'c', 't');
    result = mmioCreateChunk(hFile, &chunk, 0);
    if (MMSYSERR_NOERROR != result) 
        wprintf(L"mmioCreateChunk(\"fmt \") failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    

    // write (DWORD)0 to it
    // this is cleaned up later
    DWORD frames = 0;
    lBytesWritten = mmioWrite(hFile, reinterpret_cast<PCHAR>(&frames), sizeof(frames));
    if (lBytesWritten != sizeof(frames)) 
        wprintf(L"mmioWrite(fact data) wrote %u bytes; expected %u bytes", lBytesWritten, (UINT32)sizeof(frames));
        return E_FAIL;
    

    // ascend from the 'fact' chunk
    result = mmioAscend(hFile, &chunk, 0);
    if (MMSYSERR_NOERROR != result) 
        wprintf(L"mmioAscend(\"fact\" failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    

    // make a 'data' chunk and leave the data pointer there
    pckData->ckid = MAKEFOURCC('d', 'a', 't', 'a');
    result = mmioCreateChunk(hFile, pckData, 0);
    if (MMSYSERR_NOERROR != result) 
        wprintf(L"mmioCreateChunk(\"data\") failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    

    return S_OK;


HRESULT FinishWaveFile(HMMIO hFile, MMCKINFO* pckRIFF, MMCKINFO* pckData) 
    MMRESULT result;

    result = mmioAscend(hFile, pckData, 0);
    if (MMSYSERR_NOERROR != result) 
        wprintf(L"mmioAscend(\"data\" failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    

    result = mmioAscend(hFile, pckRIFF, 0);
    if (MMSYSERR_NOERROR != result) 
        wprintf(L"mmioAscend(\"RIFF/WAVE\" failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    

    return S_OK;

在pAudioClient->Start() 之前致电WriteWaveHeader。在pAudioClient->Stop() 之后调用FinishWaveFile。

因此，它会录制大约 10 秒在您的 Windows 上播放的音频。

更新 #1：

#include <Windows.h>
#include <mmsystem.h>
#include <mmdeviceapi.h>
#include <audioclient.h>
#include <time.h>
#include <iostream>

int main()

    clock();

    HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);

    // Create file
    MMIOINFO mi =  0 ;
    hFile = mmioOpen(
        // some flags cause mmioOpen write to this buffer
        // but not any that we're using
        (LPWSTR)fileName,
        &mi,
        MMIO_WRITE | MMIO_CREATE
    );

    if (NULL == hFile) 
        wprintf(L"mmioOpen(\"%ls\", ...) failed. wErrorRet == %u", fileName, GetLastError());
        return E_FAIL;
    

    MyAudioSink AudioSink;
    RecordAudioStream(&AudioSink);

    mmioClose(hFile, 0);

    CoUninitialize();
    return 0;

编译命令：

cl -DUNICODE loopbackCapture.cpp /link winmm.lib user32.lib Kernel32.lib Ole32.lib

更新 #2：

    #include <Windows.h>
    #include <mmsystem.h>
    #include <mmdeviceapi.h>
    #include <audioclient.h>
    #include <time.h>
    #include <iostream>
    
    using namespace std;
    
    #pragma comment(lib, "Winmm.lib")
    
    WCHAR fileName[] = L"loopback-capture.wav";
    BOOL bDone = FALSE;
    HMMIO hFile = NULL;
    
    // REFERENCE_TIME time units per second and per millisecond
    #define REFTIMES_PER_SEC  10000000
    #define REFTIMES_PER_MILLISEC  10000
    
    #define EXIT_ON_ERROR(hres)  \
                  if (FAILED(hres))  goto Exit; 
    #define SAFE_RELEASE(punk)  \
                  if ((punk) != NULL)  \
                     (punk)->Release(); (punk) = NULL; 
    
    const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
    const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
    const IID IID_IAudioClient = __uuidof(IAudioClient);
    const IID IID_IAudioCaptureClient = __uuidof(IAudioCaptureClient);
    
    class MyAudioSink
    
    public:
        HRESULT CopyData(BYTE* pData, UINT32 NumFrames, BOOL* pDone, WAVEFORMATEX* pwfx, HMMIO hFile);
    ;
    
    HRESULT WriteWaveHeader(HMMIO hFile, LPCWAVEFORMATEX pwfx, MMCKINFO* pckRIFF, MMCKINFO* pckData);
    HRESULT FinishWaveFile(HMMIO hFile, MMCKINFO* pckRIFF, MMCKINFO* pckData);
    HRESULT RecordAudioStream(MyAudioSink* pMySink);
    
    int main()
    
        clock();
    
        HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
    
        // Create file
        MMIOINFO mi =  0 ;
        hFile = mmioOpen(
            // some flags cause mmioOpen write to this buffer
            // but not any that we're using
            (LPWSTR)fileName,
            &mi,
            MMIO_WRITE | MMIO_CREATE
        );
    
        if (NULL == hFile) 
            wprintf(L"mmioOpen(\"%ls\", ...) failed. wErrorRet == %u", fileName, GetLastError());
            return E_FAIL;
        
    
        MyAudioSink AudioSink;
        RecordAudioStream(&AudioSink);
    
        mmioClose(hFile, 0);
    
        CoUninitialize();
        return 0;
    
    
    
    HRESULT MyAudioSink::CopyData(BYTE* pData, UINT32 NumFrames, BOOL* pDone, WAVEFORMATEX* pwfx, HMMIO hFile)
    
        HRESULT hr = S_OK;
    
        if (0 == NumFrames) 
            wprintf(L"IAudioCaptureClient::GetBuffer said to read 0 frames\n");
            return E_UNEXPECTED;
        
    
        LONG lBytesToWrite = NumFrames * pwfx->nBlockAlign;
    #pragma prefast(suppress: __WARNING_INCORRECT_ANNOTATION, "IAudioCaptureClient::GetBuffer SAL annotation implies a 1-byte buffer")
        LONG lBytesWritten = mmioWrite(hFile, reinterpret_cast<PCHAR>(pData), lBytesToWrite);
        if (lBytesToWrite != lBytesWritten) 
            wprintf(L"mmioWrite wrote %u bytes : expected %u bytes", lBytesWritten, lBytesToWrite);
            return E_UNEXPECTED;
        
    
        static int CallCount = 0;
        cout << "CallCount = " << CallCount++ << "NumFrames: " << NumFrames << endl ;
    
        if (clock() > 10 * CLOCKS_PER_SEC) //Record 10 seconds. From the first time call clock() at the beginning of the main().
            *pDone = true;
    
        return S_OK;
    
    
    HRESULT RecordAudioStream(MyAudioSink* pMySink)
    
        HRESULT hr;
        REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
        REFERENCE_TIME hnsActualDuration;
        UINT32 bufferFrameCount;
        UINT32 numFramesAvailable;
        IMMDeviceEnumerator* pEnumerator = NULL;
        IMMDevice* pDevice = NULL;
        IAudioClient* pAudioClient = NULL;
        IAudioCaptureClient* pCaptureClient = NULL;
        WAVEFORMATEX* pwfx = NULL;
        UINT32 packetLength = 0;
    
        BYTE* pData;
        DWORD flags;
    
        MMCKINFO ckRIFF =  0 ;
        MMCKINFO ckData =  0 ;
    
        hr = CoCreateInstance(
            CLSID_MMDeviceEnumerator, NULL,
            CLSCTX_ALL, IID_IMMDeviceEnumerator,
            (void**)& pEnumerator);
        EXIT_ON_ERROR(hr)
    
        hr = pEnumerator->GetDefaultAudioEndpoint(
            eRender, eConsole, &pDevice);
        EXIT_ON_ERROR(hr)
    
        hr = pDevice->Activate(
            IID_IAudioClient, CLSCTX_ALL,
            NULL, (void**)& pAudioClient);
        EXIT_ON_ERROR(hr)
    
        hr = pAudioClient->GetMixFormat(&pwfx);
        EXIT_ON_ERROR(hr)
    
        hr = pAudioClient->Initialize(
            AUDCLNT_SHAREMODE_SHARED,
            AUDCLNT_STREAMFLAGS_LOOPBACK,
            hnsRequestedDuration,
            0,
            pwfx,
            NULL);
        EXIT_ON_ERROR(hr)
    
        // Get the size of the allocated buffer.
        hr = pAudioClient->GetBufferSize(&bufferFrameCount);
        EXIT_ON_ERROR(hr)
    
        hr = pAudioClient->GetService(
            IID_IAudioCaptureClient,
            (void**)& pCaptureClient);
        EXIT_ON_ERROR(hr)
    
        hr = WriteWaveHeader((HMMIO)hFile, pwfx, &ckRIFF, &ckData);
        if (FAILED(hr)) 
            // WriteWaveHeader does its own logging
            return hr;
        
    
        // Calculate the actual duration of the allocated buffer.
        hnsActualDuration = (double)REFTIMES_PER_SEC *
        bufferFrameCount / pwfx->nSamplesPerSec;
    
        hr = pAudioClient->Start();  // Start recording.
        EXIT_ON_ERROR(hr)
    
        // Each loop fills about half of the shared buffer.
        while (bDone == FALSE)
        
            // Sleep for half the buffer duration.
            Sleep(hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
    
            hr = pCaptureClient->GetNextPacketSize(&packetLength);
            EXIT_ON_ERROR(hr)
    
            while (packetLength != 0)
            
                // Get the available data in the shared buffer.
                hr = pCaptureClient->GetBuffer(
                    &pData,
                    &numFramesAvailable,
                    &flags, NULL, NULL);
                EXIT_ON_ERROR(hr)
    
                    if (flags & AUDCLNT_BUFFERFLAGS_SILENT)
                    
                        pData = NULL;  // Tell CopyData to write silence.
                    
    
                // Copy the available capture data to the audio sink.
                hr = pMySink->CopyData(
                    pData, numFramesAvailable, &bDone, pwfx, (HMMIO)hFile);
                EXIT_ON_ERROR(hr)
    
                    hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
                EXIT_ON_ERROR(hr)
    
                    hr = pCaptureClient->GetNextPacketSize(&packetLength);
                EXIT_ON_ERROR(hr)
            
        
    
        hr = pAudioClient->Stop();  // Stop recording.
        EXIT_ON_ERROR(hr)
    
        hr = FinishWaveFile((HMMIO)hFile, &ckData, &ckRIFF);
        if (FAILED(hr)) 
            // FinishWaveFile does it's own logging
            return hr;
        
    
    Exit:
        CoTaskMemFree(pwfx);
        SAFE_RELEASE(pEnumerator)
        SAFE_RELEASE(pDevice)
        SAFE_RELEASE(pAudioClient)
        SAFE_RELEASE(pCaptureClient)
    
        return hr;
    

HRESULT WriteWaveHeader(HMMIO hFile, LPCWAVEFORMATEX pwfx, MMCKINFO* pckRIFF, MMCKINFO* pckData) 
    MMRESULT result;

    // make a RIFF/WAVE chunk
    pckRIFF->ckid = MAKEFOURCC('R', 'I', 'F', 'F');
    pckRIFF->fccType = MAKEFOURCC('W', 'A', 'V', 'E');

    result = mmioCreateChunk(hFile, pckRIFF, MMIO_CREATERIFF);
    if (MMSYSERR_NOERROR != result) 
        wprintf(L"mmioCreateChunk(\"RIFF/WAVE\") failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    

    // make a 'fmt ' chunk (within the RIFF/WAVE chunk)
    MMCKINFO chunk;
    chunk.ckid = MAKEFOURCC('f', 'm', 't', ' ');
    result = mmioCreateChunk(hFile, &chunk, 0);
    if (MMSYSERR_NOERROR != result) 
        wprintf(L"mmioCreateChunk(\"fmt \") failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    

    // write the WAVEFORMATEX data to it
    LONG lBytesInWfx = sizeof(WAVEFORMATEX) + pwfx->cbSize;
    LONG lBytesWritten =
        mmioWrite(
            hFile,
            reinterpret_cast<PCHAR>(const_cast<LPWAVEFORMATEX>(pwfx)),
            lBytesInWfx
        );
    if (lBytesWritten != lBytesInWfx) 
        wprintf(L"mmioWrite(fmt data) wrote %u bytes; expected %u bytes", lBytesWritten, lBytesInWfx);
        return E_FAIL;
    

    // ascend from the 'fmt ' chunk
    result = mmioAscend(hFile, &chunk, 0);
    if (MMSYSERR_NOERROR != result) 
        wprintf(L"mmioAscend(\"fmt \" failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    

    // make a 'fact' chunk whose data is (DWORD)0
    chunk.ckid = MAKEFOURCC('f', 'a', 'c', 't');
    result = mmioCreateChunk(hFile, &chunk, 0);
    if (MMSYSERR_NOERROR != result) 
        wprintf(L"mmioCreateChunk(\"fmt \") failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    

    // write (DWORD)0 to it
    // this is cleaned up later
    DWORD frames = 0;
    lBytesWritten = mmioWrite(hFile, reinterpret_cast<PCHAR>(&frames), sizeof(frames));
    if (lBytesWritten != sizeof(frames)) 
        wprintf(L"mmioWrite(fact data) wrote %u bytes; expected %u bytes", lBytesWritten, (UINT32)sizeof(frames));
        return E_FAIL;
    

    // ascend from the 'fact' chunk
    result = mmioAscend(hFile, &chunk, 0);
    if (MMSYSERR_NOERROR != result) 
        wprintf(L"mmioAscend(\"fact\" failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    

    // make a 'data' chunk and leave the data pointer there
    pckData->ckid = MAKEFOURCC('d', 'a', 't', 'a');
    result = mmioCreateChunk(hFile, pckData, 0);
    if (MMSYSERR_NOERROR != result) 
        wprintf(L"mmioCreateChunk(\"data\") failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    

    return S_OK;


HRESULT FinishWaveFile(HMMIO hFile, MMCKINFO* pckRIFF, MMCKINFO* pckData) 
    MMRESULT result;

    result = mmioAscend(hFile, pckData, 0);
    if (MMSYSERR_NOERROR != result) 
        wprintf(L"mmioAscend(\"data\" failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    

    result = mmioAscend(hFile, pckRIFF, 0);
    if (MMSYSERR_NOERROR != result) 
        wprintf(L"mmioAscend(\"RIFF/WAVE\" failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    

    return S_OK;

【讨论】：

非常感谢！我会试试这个。你可以包括相关的标题包括等和main()，以便它可以重现吗？像cl test.cpp /link winmm.lib user32.lib Kernel32.lib /subsystem:windows /entry:wmainCRTStartup 这样的东西对于未来的参考/未来的读者来说非常有用。非常感谢@RitaHanMSFT 的更新。我们接近解决方案！还有一些错误：MyAudioSink 未定义为类等。您能否包含完整的 .cpp 代码？（或者可能暂时将其粘贴到paste.ee 或类似的地方），我正在努力编译它。 PS：这里paste.ee/p/MA69P我尝试使用1）来自docs.microsoft.com/en-us/windows/win32/coreaudio/…的C++代码2）你的第二个代码块3）你的第三个代码块（“更新”）。 @Basj 请查看我的更新 #2 以获取完整的 .cpp 代码。仅将其用作参考，因为它还没有准备好用于生产环境。非常感谢@RitaHanMSFT，我会尽快尝试！谢谢，录音工作@RitaHanMSFT！（我仍然无法打开.wav，可能标题中有一些错误，但我可以自己检查这部分）。还有一个小问题：当电脑上没有声音播放时，没有录制静音帧，你知道我们该如何解决这个问题吗？如果没有使用声卡，则应记录静音。

以上是关于使用 WASAPI 录制音频流的主要内容，如果未能解决你的问题，请参考以下文章