我想从麦克风录制音频并将数据保存为原始格式,我发现 WASAPICaptureSharedEventDriven,此示例应用程序使用核心音频 API 从用户指定的输入设备捕获音频数据并将其写入 wav 文件。这是代码:

//  WAV file writer.
//  This is a VERY simple .WAV file writer.

//  A wave file consists of:
//  RIFF header:    8 bytes consisting of the signature "RIFF" followed by a 4 byte file length.
//  WAVE header:    4 bytes consisting of the signature "WAVE".
//  fmt header:     4 bytes consisting of the signature "fmt " followed by a WAVEFORMATEX 
//  WAVEFORMAT:     <n> bytes containing a waveformat structure.
//  DATA header:    8 bytes consisting of the signature "data" followed by a 4 byte file length.
//  wave data:      <m> bytes containing wave data.
//  Header for a WAV file - we define a structure describing the first few fields in the header for convenience.

    DWORD   dwRiff;                     // "RIFF"
    DWORD   dwSize;                     // Size
    DWORD   dwWave;                     // "WAVE"
    DWORD   dwFmt;                      // "fmt "
    DWORD   dwFmtSize;                  // Wave Format Size

//  Static RIFF header, we'll append the format to it.
const BYTE WaveHeader[] = 

    'R',   'I',   'F',   'F',  0x00,  0x00,  0x00,  0x00, 'W',   'A',   'V',   'E',   'f',   'm',   't',   ' ', 0x00, 0x00, 0x00, 0x00

//  Static wave DATA tag.
const BYTE WaveData[] =  'd', 'a', 't', 'a';

//  Write the contents of a WAV file.  We take as input the data to write and the format of that data.
bool WriteWaveFile(HANDLE FileHandle, const BYTE *Buffer, const size_t BufferSize, const WAVEFORMATEX *WaveFormat)

    DWORD waveFileSize = sizeof(WAVEHEADER) + sizeof(WAVEFORMATEX) + WaveFormat->cbSize + sizeof(WaveData) + sizeof(DWORD) + static_cast<DWORD>(BufferSize);
    BYTE *waveFileData = new (std::nothrow) BYTE[waveFileSize];
    BYTE *waveFilePointer = waveFileData;
    WAVEHEADER *waveHeader = reinterpret_cast<WAVEHEADER *>(waveFileData);

    if (waveFileData == NULL)
        printf("Unable to allocate %d bytes to hold output wave data\n", waveFileSize);
        return false;

    //  Copy in the wave header - we'll fix up the lengths later.
    CopyMemory(waveFilePointer, WaveHeader, sizeof(WaveHeader));
    waveFilePointer += sizeof(WaveHeader);

    //  Update the sizes in the header.
    waveHeader->dwSize = waveFileSize - (2 * sizeof(DWORD));
    waveHeader->dwFmtSize = sizeof(WAVEFORMATEX) + WaveFormat->cbSize;

    //  Next copy in the WaveFormatex structure.
    CopyMemory(waveFilePointer, WaveFormat, sizeof(WAVEFORMATEX) + WaveFormat->cbSize);
    waveFilePointer += sizeof(WAVEFORMATEX) + WaveFormat->cbSize;

    //  Then the data header.
    CopyMemory(waveFilePointer, WaveData, sizeof(WaveData));
    waveFilePointer += sizeof(WaveData);
    *(reinterpret_cast<DWORD *>(waveFilePointer)) = static_cast<DWORD>(BufferSize);
    waveFilePointer += sizeof(DWORD);

    //  And finally copy in the audio data.
    CopyMemory(waveFilePointer, Buffer, BufferSize);

    //  Last but not least, write the data to the file.
    DWORD bytesWritten;
    if (!WriteFile(FileHandle, waveFileData, waveFileSize, &bytesWritten, NULL))
        printf("Unable to write wave file: %d\n", GetLastError());
        delete []waveFileData;
        return false;

    if (bytesWritten != waveFileSize)
        printf("Failed to write entire wave file\n");
        delete []waveFileData;
        return false;
    delete []waveFileData;
    return true;

//  Write the captured wave data to an output file so that it can be examined later.
void SaveWaveData(BYTE *CaptureBuffer, size_t BufferSize, const WAVEFORMATEX *WaveFormat)

    wchar_t waveFileName[MAX_PATH];
    HRESULT hr = StringCbCopy(waveFileName, sizeof(waveFileName), L"WASAPICaptureEventDriven-");
    if (SUCCEEDED(hr))
        GUID testGuid;
        if (SUCCEEDED(CoCreateGuid(&testGuid)))
            wchar_t *guidString;
            if (SUCCEEDED(StringFromCLSID(testGuid, &guidString)))
                hr = StringCbCat(waveFileName, sizeof(waveFileName), guidString);
                if (SUCCEEDED(hr))
                    hr = StringCbCat(waveFileName, sizeof(waveFileName), L".WAV");
                    if (SUCCEEDED(hr))
                        HANDLE waveHandle = CreateFile(waveFileName, GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS, 
                        if (waveHandle != INVALID_HANDLE_VALUE)
                            if (WriteWaveFile(waveHandle, CaptureBuffer, BufferSize, WaveFormat))
                                printf("Successfully wrote WAVE data to %S\n", waveFileName);
                                printf("Unable to write wave file\n");
                            printf("Unable to open output WAV file %S: %d\n", waveFileName, GetLastError());


FILE* _file;
int16_t* _data;
_data = (int16_t*)Buffer;
_file = fopen("utterance", "wb +");
fwrite(_data, 1,BufferSize, _file);


import numpy as np
import matplotlib.pyplot as plt
with open ('test.raw', 'rb') as f:
    buf = f.read ()
    data = np.frombuffer (buf, dtype = 'int16')
    L = data [:: 2]
    R = data [1 :: 2]

newdata = np.squeeze(data) # Shape is now: (10, 80)
plt.plot(newdata) # plotting by columns




请帮帮我! 原始格式是什么意思?你想得到wav文件吗? BTW,什么是输出?波形? @StriveSun-MSFT RAW Audio 格式或只是 RAW Audio 是一种音频文件格式,用于以原始形式存储未压缩的音频。与 WAV 或 AIFF 的大小相比,RAW 音频文件不包含任何标题信息(采样率、位深度、字节序或通道数)。这是原始音频的示例:cpham.perso.univ-pau.fr/SmartSantanderSample/test.raw 您已经拥有原始音频数据,因此您只需编写一个 .raw 格式的文件。我不确定数据是否是二进制的。见raw file format C++ @StriveSun-MSFT 我的数据是一个字节数组,它可以是浮点数或整数,因为它是每个样本 16 位 【参考方案1】:

经过检查,原始音频样本的属性没有问题。请尝试我的代码示例并添加 wav 标头以重新编码。

#include <Windows.h>
#include <stdio.h>
#include <MMDeviceAPI.h>
#include <AudioClient.h>
#include <assert.h>
#include <avrt.h>
#include <strsafe.h>
#include <fstream> 

using namespace std;

#pragma warning(disable:4996)


    DWORD   dwRiff;                     // "RIFF"
    DWORD   dwSize;                     // Size
    DWORD   dwWave;                     // "WAVE"
    DWORD   dwFmt;                      // "fmt "
    DWORD   dwFmtSize;                  // Wave Format Size

const BYTE WaveHeader[] =

    'R',   'I',   'F',   'F',  0x00,  0x00,  0x00,  0x00, 'W',   'A',   'V',   'E',   'f',   'm',   't',   ' ', 0x00, 0x00, 0x00, 0x00

const BYTE WaveData[] =  'd', 'a', 't', 'a' ;

bool WriteWaveFile(HANDLE FileHandle, const BYTE* Buffer, const size_t BufferSize, WAVEFORMATEX* WaveFormat)

    DWORD waveFileSize = sizeof(WAVEHEADER) + sizeof(WAVEFORMATEX) + WaveFormat->cbSize + sizeof(WaveData) + sizeof(DWORD) + static_cast<DWORD>(BufferSize);
    BYTE* waveFileData = new (std::nothrow) BYTE[waveFileSize];
    BYTE* waveFilePointer = waveFileData;
    WAVEHEADER* waveHeader = reinterpret_cast<WAVEHEADER*>(waveFileData);

    if (waveFileData == NULL)
        printf("Unable to allocate %d bytes to hold output wave data\n", waveFileSize);
        return false;

    //  Copy in the wave header - we'll fix up the lengths later.
    CopyMemory(waveFilePointer, WaveHeader, sizeof(WaveHeader));
    waveFilePointer += sizeof(WaveHeader);

    //  Update the sizes in the header.
    waveHeader->dwSize = waveFileSize - (2 * sizeof(DWORD));
    waveHeader->dwFmtSize = sizeof(WAVEFORMATEX) + WaveFormat->cbSize;

    //  Next copy in the WaveFormatex structure.
    CopyMemory(waveFilePointer, WaveFormat, sizeof(WAVEFORMATEX) + WaveFormat->cbSize);
    waveFilePointer += sizeof(WAVEFORMATEX) + WaveFormat->cbSize;

    //  Then the data header.
    CopyMemory(waveFilePointer, WaveData, sizeof(WaveData));
    waveFilePointer += sizeof(WaveData);
    *(reinterpret_cast<DWORD*>(waveFilePointer)) = static_cast<DWORD>(BufferSize);
    waveFilePointer += sizeof(DWORD);

    //  And finally copy in the audio data.

    CopyMemory(waveFilePointer, Buffer, BufferSize);
    //  Last but not least, write the data to the file.
    DWORD bytesWritten;
    if (!WriteFile(FileHandle, waveFileData, waveFileSize, &bytesWritten, NULL))
        printf("Unable to write wave file: %d\n", GetLastError());
        return false;

    if (bytesWritten != waveFileSize)
        printf("Failed to write entire wave file\n");
        return false;
    return true;

//  Write the captured wave data to an output file so that it can be examined later.
void SaveWaveData(BYTE* CaptureBuffer, size_t BufferSize, WAVEFORMATEX* WaveFormat)

    wchar_t waveFileName[MAX_PATH];
    HRESULT hr = StringCbCopy(waveFileName, sizeof(waveFileName), L"WASAPICaptureEventDriven-");
    if (SUCCEEDED(hr))
        GUID testGuid;
        if (SUCCEEDED(CoCreateGuid(&testGuid)))
            wchar_t* guidString;
            if (SUCCEEDED(StringFromCLSID(testGuid, &guidString)))
                hr = StringCbCat(waveFileName, sizeof(waveFileName), guidString);
                if (SUCCEEDED(hr))
                    hr = StringCbCat(waveFileName, sizeof(waveFileName), L".WAV");
                    if (SUCCEEDED(hr))
                        HANDLE waveHandle = CreateFile(waveFileName, GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS,
                        if (waveHandle != INVALID_HANDLE_VALUE)
                            if (WriteWaveFile(waveHandle, CaptureBuffer, BufferSize, WaveFormat))
                                printf("Successfully wrote WAVE data to %S\n", waveFileName);
                                printf("Unable to write wave file\n");
                            printf("Unable to open output WAV file %S: %d\n", waveFileName, GetLastError());

int main()

    long buffersize = 3528000;// For 10s audio sample, we can set the value of buffersize to 3528000
    BYTE* captureBuffer = new (std::nothrow) BYTE[buffersize]; 
    FILE* _file;
    _file = fopen("utterance.raw", "rb"); //raw audio path
    fread(captureBuffer, 1, buffersize, _file);

    WAVEFORMATEX wavformat;
    wavformat.wFormatTag = WAVE_FORMAT_IEEE_FLOAT;
    wavformat.nChannels = 2;
    wavformat.nSamplesPerSec = 44100;
    wavformat.nAvgBytesPerSec = 352800;
    wavformat.nBlockAlign = 8;
    wavformat.wBitsPerSample = 32;
    wavformat.cbSize = 22;
    SaveWaveData(captureBuffer, buffersize, &wavformat);

    return 0;

您也可以使用ofstream 来编写原始音频文件。


//  Write the contents of a WAV file.  We take as input the data to write and the format of that data.
//Added in WASAPICaptureSharedEventDriven sample
bool WriteWaveFile(HANDLE FileHandle, const BYTE *Buffer, const size_t BufferSize, const WAVEFORMATEX *WaveFormat)

    ofstream binaryFile("file.raw", ios::out | ios::binary);
    binaryFile.write((char*)Buffer, BufferSize);



ifstream infile("utterance.raw", std::ifstream::binary);
 // get size of file
infile.seekg(0, infile.end);
long size = infile.tellg();

BYTE* captureBuffer = new (std::nothrow) BYTE[size];

infile.read((char*)captureBuffer, size);

wavformat.wFormatTag = WAVE_FORMAT_IEEE_FLOAT;
wavformat.nChannels = 2;
wavformat.nSamplesPerSec = 44100;
wavformat.nAvgBytesPerSec = 352800;
wavformat.nBlockAlign = 8;
wavformat.wBitsPerSample = 32;
wavformat.cbSize = 22;
SaveWaveData(captureBuffer, size, &wavformat);


感谢您的回答。但是用ofstream保存的原始文件太吵了。 @Tuấn ifstreamofstream 应该一起使用,我这边工作正常。如果仍有噪音,请尝试第一种方法,使用fread。请参阅答案中代码的第一部分。 BTW,不要使用python示例,请使用我提供的示例进行测试。 太阳 - 无国界医生 感谢您的帮助。很好的答案。非常感谢。如果您有此question 的示例代码,我将不胜感激

