如何在 WASAPICaptureSharedEventDriven 中保存原始格式的音频文件

Posted

技术标签:

【中文标题】如何在 WASAPICaptureSharedEventDriven 中保存原始格式的音频文件【英文标题】:How to save audio file with raw format in WASAPICaptureSharedEventDriven 【发布时间】:2020-10-24 14:34:09 【问题描述】:

我想从麦克风录制音频并将数据保存为原始格式,我发现 WASAPICaptureSharedEventDriven,此示例应用程序使用核心音频 API 从用户指定的输入设备捕获音频数据并将其写入 wav 文件。这是代码:

//  WAV file writer.
//
//  This is a VERY simple .WAV file writer.
//

//
//  A wave file consists of:
//
//  RIFF header:    8 bytes consisting of the signature "RIFF" followed by a 4 byte file length.
//  WAVE header:    4 bytes consisting of the signature "WAVE".
//  fmt header:     4 bytes consisting of the signature "fmt " followed by a WAVEFORMATEX 
//  WAVEFORMAT:     <n> bytes containing a waveformat structure.
//  DATA header:    8 bytes consisting of the signature "data" followed by a 4 byte file length.
//  wave data:      <m> bytes containing wave data.
//
//
//  Header for a WAV file - we define a structure describing the first few fields in the header for convenience.
//
struct WAVEHEADER

    DWORD   dwRiff;                     // "RIFF"
    DWORD   dwSize;                     // Size
    DWORD   dwWave;                     // "WAVE"
    DWORD   dwFmt;                      // "fmt "
    DWORD   dwFmtSize;                  // Wave Format Size
;

//  Static RIFF header, we'll append the format to it.
const BYTE WaveHeader[] = 

    'R',   'I',   'F',   'F',  0x00,  0x00,  0x00,  0x00, 'W',   'A',   'V',   'E',   'f',   'm',   't',   ' ', 0x00, 0x00, 0x00, 0x00
;

//  Static wave DATA tag.
const BYTE WaveData[] =  'd', 'a', 't', 'a';

//
//  Write the contents of a WAV file.  We take as input the data to write and the format of that data.
//
bool WriteWaveFile(HANDLE FileHandle, const BYTE *Buffer, const size_t BufferSize, const WAVEFORMATEX *WaveFormat)

    DWORD waveFileSize = sizeof(WAVEHEADER) + sizeof(WAVEFORMATEX) + WaveFormat->cbSize + sizeof(WaveData) + sizeof(DWORD) + static_cast<DWORD>(BufferSize);
    BYTE *waveFileData = new (std::nothrow) BYTE[waveFileSize];
    BYTE *waveFilePointer = waveFileData;
    WAVEHEADER *waveHeader = reinterpret_cast<WAVEHEADER *>(waveFileData);

    if (waveFileData == NULL)
    
        printf("Unable to allocate %d bytes to hold output wave data\n", waveFileSize);
        return false;
    

    //
    //  Copy in the wave header - we'll fix up the lengths later.
    //
    CopyMemory(waveFilePointer, WaveHeader, sizeof(WaveHeader));
    waveFilePointer += sizeof(WaveHeader);

    //
    //  Update the sizes in the header.
    //
    waveHeader->dwSize = waveFileSize - (2 * sizeof(DWORD));
    waveHeader->dwFmtSize = sizeof(WAVEFORMATEX) + WaveFormat->cbSize;

    //
    //  Next copy in the WaveFormatex structure.
    //
    CopyMemory(waveFilePointer, WaveFormat, sizeof(WAVEFORMATEX) + WaveFormat->cbSize);
    waveFilePointer += sizeof(WAVEFORMATEX) + WaveFormat->cbSize;


    //
    //  Then the data header.
    //
    CopyMemory(waveFilePointer, WaveData, sizeof(WaveData));
    waveFilePointer += sizeof(WaveData);
    *(reinterpret_cast<DWORD *>(waveFilePointer)) = static_cast<DWORD>(BufferSize);
    waveFilePointer += sizeof(DWORD);

    //
    //  And finally copy in the audio data.
    //
    CopyMemory(waveFilePointer, Buffer, BufferSize);

    //
    //  Last but not least, write the data to the file.
    //
    DWORD bytesWritten;
    if (!WriteFile(FileHandle, waveFileData, waveFileSize, &bytesWritten, NULL))
    
        printf("Unable to write wave file: %d\n", GetLastError());
        delete []waveFileData;
        return false;
    

    if (bytesWritten != waveFileSize)
    
        printf("Failed to write entire wave file\n");
        delete []waveFileData;
        return false;
    
    delete []waveFileData;
    return true;


//
//  Write the captured wave data to an output file so that it can be examined later.
//
void SaveWaveData(BYTE *CaptureBuffer, size_t BufferSize, const WAVEFORMATEX *WaveFormat)

    wchar_t waveFileName[MAX_PATH];
    HRESULT hr = StringCbCopy(waveFileName, sizeof(waveFileName), L"WASAPICaptureEventDriven-");
    if (SUCCEEDED(hr))
    
        GUID testGuid;
        if (SUCCEEDED(CoCreateGuid(&testGuid)))
        
            wchar_t *guidString;
            if (SUCCEEDED(StringFromCLSID(testGuid, &guidString)))
            
                hr = StringCbCat(waveFileName, sizeof(waveFileName), guidString);
                if (SUCCEEDED(hr))
                
                    hr = StringCbCat(waveFileName, sizeof(waveFileName), L".WAV");
                    if (SUCCEEDED(hr))
                    
                        HANDLE waveHandle = CreateFile(waveFileName, GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS, 
                            FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN, 
                            NULL);
                        if (waveHandle != INVALID_HANDLE_VALUE)
                        
                            if (WriteWaveFile(waveHandle, CaptureBuffer, BufferSize, WaveFormat))
                            
                                printf("Successfully wrote WAVE data to %S\n", waveFileName);
                            
                            else
                            
                                printf("Unable to write wave file\n");
                            
                            CloseHandle(waveHandle);
                        
                        else
                        
                            printf("Unable to open output WAV file %S: %d\n", waveFileName, GetLastError());
                        
                    
                
                CoTaskMemFree(guidString);
            
        
    

我尝试从缓冲区保存音频:

FILE* _file;
int16_t* _data;
_data = (int16_t*)Buffer;
_file = fopen("utterance", "wb +");
fwrite(_data, 1,BufferSize, _file);
fclose(_file);

输出音频太糟糕了,要查看它,我使用此代码来绘制原始文件:

import numpy as np
import matplotlib.pyplot as plt
with open ('test.raw', 'rb') as f:
    buf = f.read ()
    data = np.frombuffer (buf, dtype = 'int16')
    L = data [:: 2]
    R = data [1 :: 2]

newdata = np.squeeze(data) # Shape is now: (10, 80)
plt.plot(newdata) # plotting by columns
plt.show()

输出是这样的,你可以是一个矩形,声音很害羞:

我哪里错了?请帮帮我!

【问题讨论】:

请帮帮我! 原始格式是什么意思?你想得到wav文件吗? BTW,什么是输出?波形? @StriveSun-MSFT RAW Audio 格式或只是 RAW Audio 是一种音频文件格式,用于以原始形式存储未压缩的音频。与 WAV 或 AIFF 的大小相比,RAW 音频文件不包含任何标题信息(采样率、位深度、字节序或通道数)。这是原始音频的示例:cpham.perso.univ-pau.fr/SmartSantanderSample/test.raw 您已经拥有原始音频数据,因此您只需编写一个 .raw 格式的文件。我不确定数据是否是二进制的。见raw file format C++ @StriveSun-MSFT 我的数据是一个字节数组,它可以是浮点数或整数,因为它是每个样本 16 位 【参考方案1】:

经过检查,原始音频样本的属性没有问题。请尝试我的代码示例并添加 wav 标头以重新编码。

#include <Windows.h>
#include <stdio.h>
#include <MMDeviceAPI.h>
#include <AudioClient.h>
#include <assert.h>
#include <avrt.h>
#include <strsafe.h>
#include <fstream> 

using namespace std;

#pragma warning(disable:4996)

struct WAVEHEADER

    DWORD   dwRiff;                     // "RIFF"
    DWORD   dwSize;                     // Size
    DWORD   dwWave;                     // "WAVE"
    DWORD   dwFmt;                      // "fmt "
    DWORD   dwFmtSize;                  // Wave Format Size
;

const BYTE WaveHeader[] =

    'R',   'I',   'F',   'F',  0x00,  0x00,  0x00,  0x00, 'W',   'A',   'V',   'E',   'f',   'm',   't',   ' ', 0x00, 0x00, 0x00, 0x00
;

const BYTE WaveData[] =  'd', 'a', 't', 'a' ;

bool WriteWaveFile(HANDLE FileHandle, const BYTE* Buffer, const size_t BufferSize, WAVEFORMATEX* WaveFormat)

    DWORD waveFileSize = sizeof(WAVEHEADER) + sizeof(WAVEFORMATEX) + WaveFormat->cbSize + sizeof(WaveData) + sizeof(DWORD) + static_cast<DWORD>(BufferSize);
    BYTE* waveFileData = new (std::nothrow) BYTE[waveFileSize];
    BYTE* waveFilePointer = waveFileData;
    WAVEHEADER* waveHeader = reinterpret_cast<WAVEHEADER*>(waveFileData);

    if (waveFileData == NULL)
    
        printf("Unable to allocate %d bytes to hold output wave data\n", waveFileSize);
        return false;
    

    //
    //  Copy in the wave header - we'll fix up the lengths later.
    //
    CopyMemory(waveFilePointer, WaveHeader, sizeof(WaveHeader));
    waveFilePointer += sizeof(WaveHeader);

    //
    //  Update the sizes in the header.
    //
    waveHeader->dwSize = waveFileSize - (2 * sizeof(DWORD));
    waveHeader->dwFmtSize = sizeof(WAVEFORMATEX) + WaveFormat->cbSize;

    //
    //  Next copy in the WaveFormatex structure.
    //
    CopyMemory(waveFilePointer, WaveFormat, sizeof(WAVEFORMATEX) + WaveFormat->cbSize);
    waveFilePointer += sizeof(WAVEFORMATEX) + WaveFormat->cbSize;


    //
    //  Then the data header.
    //
    CopyMemory(waveFilePointer, WaveData, sizeof(WaveData));
    waveFilePointer += sizeof(WaveData);
    *(reinterpret_cast<DWORD*>(waveFilePointer)) = static_cast<DWORD>(BufferSize);
    waveFilePointer += sizeof(DWORD);

    //
    //  And finally copy in the audio data.
    // 

    CopyMemory(waveFilePointer, Buffer, BufferSize);
    //
    //  Last but not least, write the data to the file.
    //
    DWORD bytesWritten;
    if (!WriteFile(FileHandle, waveFileData, waveFileSize, &bytesWritten, NULL))
    
        printf("Unable to write wave file: %d\n", GetLastError());
        delete[]waveFileData;
        return false;
    

    if (bytesWritten != waveFileSize)
    
        printf("Failed to write entire wave file\n");
        delete[]waveFileData;
        return false;
    
    delete[]waveFileData;
    return true;


//
//  Write the captured wave data to an output file so that it can be examined later.
//
void SaveWaveData(BYTE* CaptureBuffer, size_t BufferSize, WAVEFORMATEX* WaveFormat)

    wchar_t waveFileName[MAX_PATH];
    HRESULT hr = StringCbCopy(waveFileName, sizeof(waveFileName), L"WASAPICaptureEventDriven-");
    if (SUCCEEDED(hr))
    
        GUID testGuid;
        if (SUCCEEDED(CoCreateGuid(&testGuid)))
        
            wchar_t* guidString;
            if (SUCCEEDED(StringFromCLSID(testGuid, &guidString)))
            
                hr = StringCbCat(waveFileName, sizeof(waveFileName), guidString);
                if (SUCCEEDED(hr))
                
                    hr = StringCbCat(waveFileName, sizeof(waveFileName), L".WAV");
                    if (SUCCEEDED(hr))
                    
                        HANDLE waveHandle = CreateFile(waveFileName, GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS,
                            FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN,
                            NULL);
                        if (waveHandle != INVALID_HANDLE_VALUE)
                        
                            if (WriteWaveFile(waveHandle, CaptureBuffer, BufferSize, WaveFormat))
                            
                                printf("Successfully wrote WAVE data to %S\n", waveFileName);
                            
                            else
                            
                                printf("Unable to write wave file\n");
                            
                            CloseHandle(waveHandle);
                        
                        else
                        
                            printf("Unable to open output WAV file %S: %d\n", waveFileName, GetLastError());
                        
                    
                
                CoTaskMemFree(guidString);
            
        
    


int main()

    long buffersize = 3528000;// For 10s audio sample, we can set the value of buffersize to 3528000
    BYTE* captureBuffer = new (std::nothrow) BYTE[buffersize]; 
    
    FILE* _file;
    _file = fopen("utterance.raw", "rb"); //raw audio path
    fread(captureBuffer, 1, buffersize, _file);
    fclose(_file);

    WAVEFORMATEX wavformat;
    wavformat.wFormatTag = WAVE_FORMAT_IEEE_FLOAT;
    wavformat.nChannels = 2;
    wavformat.nSamplesPerSec = 44100;
    wavformat.nAvgBytesPerSec = 352800;
    wavformat.nBlockAlign = 8;
    wavformat.wBitsPerSample = 32;
    wavformat.cbSize = 22;
    SaveWaveData(captureBuffer, buffersize, &wavformat);

    return 0;

您也可以使用ofstream 来编写原始音频文件。

一些代码:

//  Write the contents of a WAV file.  We take as input the data to write and the format of that data.
//Added in WASAPICaptureSharedEventDriven sample
bool WriteWaveFile(HANDLE FileHandle, const BYTE *Buffer, const size_t BufferSize, const WAVEFORMATEX *WaveFormat)

    ofstream binaryFile("file.raw", ios::out | ios::binary);
    binaryFile.write((char*)Buffer, BufferSize);
    binaryFile.close();

    ...

获取原始文件后,使用ifstream打开文件并添加wav头。

ifstream infile("utterance.raw", std::ifstream::binary);
 // get size of file
infile.seekg(0, infile.end);
long size = infile.tellg();
infile.seekg(0);

BYTE* captureBuffer = new (std::nothrow) BYTE[size];

infile.read((char*)captureBuffer, size);
infile.close();

WAVEFORMATEX wavformat;
wavformat.wFormatTag = WAVE_FORMAT_IEEE_FLOAT;
wavformat.nChannels = 2;
wavformat.nSamplesPerSec = 44100;
wavformat.nAvgBytesPerSec = 352800;
wavformat.nBlockAlign = 8;
wavformat.wBitsPerSample = 32;
wavformat.cbSize = 22;
SaveWaveData(captureBuffer, size, &wavformat);

【讨论】:

感谢您的回答。但是用ofstream保存的原始文件太吵了。 @Tuấn ifstreamofstream 应该一起使用,我这边工作正常。如果仍有噪音,请尝试第一种方法,使用fread。请参阅答案中代码的第一部分。 BTW,不要使用python示例,请使用我提供的示例进行测试。 太阳 - 无国界医生 感谢您的帮助。很好的答案。非常感谢。如果您有此question 的示例代码,我将不胜感激

以上是关于如何在 WASAPICaptureSharedEventDriven 中保存原始格式的音频文件的主要内容,如果未能解决你的问题,请参考以下文章

如何在表单提交后保留文本(如何在提交后不删除自身?)

如何在异步任务中调用意图?或者如何在 onPostExecute 中开始新的活动?

在 Avkit 中如何使用这三行代码,以及如何将音乐静音”

如何在 JDBC 中启动事务?

如何在 Fragment 中调用 OnActivityResult 以及它是如何工作的?

如何使用 Firebase 在 Web 上托管 Flutter?它的效果如何?