如何在 Windows 10 上使用 C++ 将连续的原始音频数据记录到循环缓冲区中？

Posted 2023-02-25

技术标签:

【中文标题】如何在 Windows 10 上使用 C++ 将连续的原始音频数据记录到循环缓冲区中？【英文标题】：How to record continuous raw audio data into a circular buffer with C++ on Windows 10? 【发布时间】：2020-08-17 17:59:01 【问题描述】：

从Windows Multimedia turned out to be utterly incapable of recording continuous audio 开始，我得到了使用 Windows Core Audio 的提示。有一种手册here，但我不知道如何编写大量开销代码以使录音正常工作。谁能提供一个完整的、最小的连续音频录制到循环缓冲区的实现？

到目前为止，我被困在下面的代码中，没有超过pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice); 行，因为pEnumerator 仍然是nullptr。

#define VC_EXTRALEAN
#define _USE_MATH_DEFINES
#include <Windows.h>
#include <Audioclient.h>
#include <Mmdeviceapi.h>

#define REFTIMES_PER_SEC  10000000
#define REFTIMES_PER_MILLISEC  10000

int main() 
    REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
    UINT32 bufferFrameCount;
    UINT32 numFramesAvailable;

    IMMDeviceEnumerator* pEnumerator = NULL;
    IMMDevice* pDevice = NULL;
    IAudioClient* pAudioClient = NULL;
    IAudioCaptureClient* pCaptureClient = NULL;
    WAVEFORMATEX* pwfx = NULL;
    UINT32 packetLength = 0;
    BYTE* pData;
    DWORD flags;

    CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pEnumerator);
    pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
    pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**)&pAudioClient);
    pAudioClient->GetMixFormat(&pwfx);
    pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, hnsRequestedDuration, 0, pwfx, NULL);
    pAudioClient->GetBufferSize(&bufferFrameCount); // Get the size of the allocated buffer.
    pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient);

    // Calculate the actual duration of the allocated buffer.
    REFERENCE_TIME hnsActualDuration = (double)REFTIMES_PER_SEC* bufferFrameCount / pwfx->nSamplesPerSec;

    pAudioClient->Start();  // Start recording.

    // Each loop fills about half of the shared buffer.
    while(true) 
        // Sleep for half the buffer duration.
        Sleep(hnsActualDuration/REFTIMES_PER_MILLISEC/2);
        pCaptureClient->GetNextPacketSize(&packetLength);
        while(packetLength != 0) 
            // Get the available data in the shared buffer.
            pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);
            if(flags&AUDCLNT_BUFFERFLAGS_SILENT) 
                pData = NULL;  // Tell CopyData to write silence.
            

            // Copy the available capture data to the audio sink.
            //hr = pMySink->CopyData(pData, numFramesAvailable, &bDone);

            pCaptureClient->ReleaseBuffer(numFramesAvailable);
            pCaptureClient->GetNextPacketSize(&packetLength);
        
    
    pAudioClient->Stop();
    return 0;

编辑（24.07.2021）：

这是我的故障排除代码更新：

#define VC_EXTRALEAN
#define _USE_MATH_DEFINES
#include <Windows.h>
#include <Audioclient.h>
#include <Mmdeviceapi.h>

#include <chrono>
class Clock 
private:
    typedef chrono::high_resolution_clock clock;
    chrono::time_point<clock> t;
public:
    Clock()  start(); 
    void start()  t = clock::now(); 
    double stop() const  return chrono::duration_cast<chrono::duration<double>>(clock::now()-t).count(); 
;

const uint base        =   4096;
const uint sample_rate =  48000; // must be supported by microphone
const uint sample_size = 1*base; // must be a power of 2
const uint bandwidth   =   5000; // must be <= sample_rate/2

float* wave = new float[sample_size]; // circular buffer

void fill(float* const wave, const float* const buffer, int offset) 
    for(int i=sample_size; i>=offset; i--) 
        wave[i] = wave[i-offset];
    
    for(int i=0; i<offset; i++) 
        const uint p = offset-1-i;
        wave[i] = 0.5f*(buffer[2*p]+buffer[2*p+1]); // left and right channels
    


int main() 
    for(uint i=0; i<sample_size; i++) wave[i] = 0.0f;
    
    Clock clock;

    #define REFTIMES_PER_SEC  10000000
    #define REFTIMES_PER_MILLISEC  10000

    REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
    UINT32 bufferFrameCount;
    UINT32 numFramesAvailable;

    IMMDeviceEnumerator* pEnumerator = NULL;
    IMMDevice* pDevice = NULL;
    IAudioClient* pAudioClient = NULL;
    IAudioCaptureClient* pCaptureClient = NULL;
    WAVEFORMATEX* pwfx = NULL;
    UINT32 packetLength = 0;
    BYTE* pData;
    DWORD flags;

    CoInitializeEx(NULL, COINIT_MULTITHREADED);
    CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pEnumerator);
    pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
    pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**)&pAudioClient);
    pAudioClient->GetMixFormat(&pwfx);
    
    println(pwfx->wFormatTag);// 65534
    println(WAVE_FORMAT_PCM);// 1
    println(pwfx->nChannels);// 2
    println((uint)pwfx->nSamplesPerSec);// 48000
    println(pwfx->wBitsPerSample);// 32
    println(pwfx->nBlockAlign);// 8
    println(pwfx->wBitsPerSample*pwfx->nChannels/8);// 8
    println((uint)pwfx->nAvgBytesPerSec);// 384000
    println((uint)(pwfx->nBlockAlign*pwfx->nSamplesPerSec*pwfx->nChannels));// 768000
    println(pwfx->cbSize);// 22

    pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, hnsRequestedDuration, 0, pwfx, NULL);
    pAudioClient->GetBufferSize(&bufferFrameCount); // Get the size of the allocated buffer.
    pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient);

    // Calculate the actual duration of the allocated buffer.
    //REFERENCE_TIME hnsActualDuration = (double)REFTIMES_PER_SEC* bufferFrameCount / pwfx->nSamplesPerSec;

    pAudioClient->Start();  // Start recording.
    
    while(running) 

        pCaptureClient->GetNextPacketSize(&packetLength); // packetLength and numFramesAvailable are either 0 or 480
        pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);

        const int offset = (uint)numFramesAvailable;
        if(offset>0) 
            fill(wave, (float*)pData, offset); // here I add pData to the circular buffer "wave"
        

        while(packetLength != 0) 
            pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL); // Get the available data in the shared buffer.
            if(flags&AUDCLNT_BUFFERFLAGS_SILENT) 
                pData = NULL;  // Tell CopyData to write silence.
            
            pCaptureClient->ReleaseBuffer(numFramesAvailable);
            pCaptureClient->GetNextPacketSize(&packetLength);
        

        sleep(1.0/120.0-clock.stop());
        clock.start();
    
    pAudioClient->Stop();

【问题讨论】：

您的另一个问题仍然是调用 Windows Multimedia 非常错误，这就是您没有获得连续音频的原因。 【参考方案1】：

你没有调用CoInitializeEx，所以所有的 COM 调用都会失败。

您还应该测试所有调用以查看它们是否返回错误。

解决 cmets 中提出的问题：

我相信如果你想在共享模式下操作端点，那么你必须使用GetFixFormat返回的参数。这意味着：

您仅限于一个采样率（除非您编写代码来执行转换，这是一项不平凡的任务）

如果您希望样本为浮点数，则必须自己转换它们

要编写可在所有机器上运行的代码，您必须迎合混合格式给您带来的任何问题。这可能是：

16 位整数

24 位整数 (nBlockAlign = 3)

32 位容器中的 24 位整数 (nBlockAlign = 4)

32 位整数

32 位浮点数（罕见）

64 位浮点（以我的经验闻所未闻）

样本将按照运行代码的机器的本机字节顺序排列，并且是交错的。

所以，对pwfx 中的各种参数进行区分，并为您想要支持的每种示例格式编写相关代码。

假设您希望将您的 floats 标准化为 -1 .. +1 和 2 通道输入数据，您可以对 16 位整数执行此操作，例如：

const int16_t *inbuf = (const int16_t *) pData;
float *outbuf = ...;

for (int i = 0; i < numFramesAvailable * 2; ++i)

    int16_t sample = *inbuf++;
    *outbuf++ = (float) (sample * (1.0 / 32767));

请注意，我通过乘以倒数来避免（慢）浮点除法（编译器将预先计算 1.0 / 32767）。

剩下的交给你。

【讨论】：

谢谢。现在它至少运行没有错误。但是我无法更改pwfx 的属性（如果我更改任何属性，pCaptureClient 仍然是nullptr），所以我被pAudioClient->GetMixFormat(&pwfx); 返回的任何音频格式卡住了。我也无法弄清楚pData 中的字节顺序是怎样的。这就是我想要的：一个浮点数组，它保存每个时间点的幅度为 1 秒（所以 48k 条目）。我想连续（每秒约 60 次）并且没有间断地为这个数组提供新的音频数据（FIFO）。您能否提供一个最低限度的工作示例？更新了我的答案。再次感谢您。我发现无论出于何种原因，pData 缓冲区已经被格式化为floats，所以强制转换会搞砸。但更重要的是，类似于我在上面的问题中链接的另一个问题，音频帧是不连续的。是否有链接多个记录缓冲区的解决方法？音频在pData返回后如何处理？也许该代码中存在错误。您不需要做任何特别的事情来避免音频不连续。我还有一些想法。如果您的缓冲区大小为 480 个样本且采样率为 48000 Hz，则每个缓冲区满的时间为 10 毫秒（因此您应该休眠 5 毫秒）。但这小于Sleep 的粒度，因此它通常会休眠更长时间，这可能会导致您丢失一些传入的数据。所以试着增加你的缓冲区大小（传递给pAudioClient->Initialize）看看是否能解决问题。【参考方案2】：

您可以改用此音频库。它比尝试与平台特定的 SDK 交互更容易启动和运行：

http://www.music.mcgill.ca/~gary/rtaudio/recording.html

此外，虽然在您的示例中移除睡眠可能无济于事，但您不应该在音频处理期间调用睡眠、锁定互斥体或分配内存。与较短的缓冲时间相比，这些引入的延迟是完全任意的，因此总会给您带来问题。

【讨论】：

(或者看这个同时做输入输出的例子：music.mcgill.ca/~gary/rtaudio/duplex.html) 微软提倡拨打Sleep。

以上是关于如何在 Windows 10 上使用 C++ 将连续的原始音频数据记录到循环缓冲区中？的主要内容，如果未能解决你的问题，请参考以下文章