从视频文件中提取音频并在 OpenAL 中播放
【中文标题】从视频文件中提取音频并在 OpenAL 中播放【英文标题】:Extracting audio from video file and play it in OpenAL 【发布时间】:2013-12-01 19:11:34 【问题描述】:我希望能够从视频文件中提取音频并将其加载到 OpenAL 播放的缓冲区中,但我不知道从哪里开始。
AVFoundation 似乎是最简单的方法(与 FFMPEG 相比,不是吗?),但我找不到使用 OpenAL 获得可播放缓冲区的方法。我在 MacOSX 上使用ObjectAL,效果很好。
您有兴趣处理什么样的文件?只是 MP4?也许是 AVI、WMV 或其他? 能够读取所有文件是最好的... 工作量很大,因为所有类型都是独一无二的。 【参考方案1】:对于可能感兴趣的人,我是这样做的。它采用 AVFoundation 输入的所有格式。
在 AVAsset 中加载我的文件(视频或音频); 获取音轨(AVAssetTrack); 在 NSData 中转换 PCM 中的轨道; 添加 WAV 标头(可选 -> ALBuffer 需要没有标头的数据); 用它提供一个 ALBuffer(来自ObjectAL)。这是构建 PCM 的代码(您会注意到我正在构建 2 个缓冲区,因为我需要反转音频文件)。
// ---- Create the forward and backward WAV buffers
// Feeds the bufferDictionary with NSData objects (one for each buffer)
// Returns YES if the task is completed
- (BOOL) wavDataBuffersWithAsset:(AVURLAsset *)asset assetTrack:(AVAssetTrack *)audioTrack
// ---- We get the file format description to feed our data array
NSArray* formatDesc = [audioTrack formatDescriptions];
CMAudioFormatDescriptionRef item = (CMAudioFormatDescriptionRef)[formatDesc objectAtIndex:0];
const AudiostreamBasicDescription* fileDescription = CMAudioFormatDescriptionGetStreamBasicDescription (item);
// ---- Sometimes (on movie files, stereo) we can see that the "bits per channel" item is set to 0
// We initialize it by default to 16.
uint32_t sampleRate = fileDescription->mSampleRate;
uint16_t bitDepth = fileDescription->mBitsPerChannel == 0 ? 16 : fileDescription->mBitsPerChannel;
uint16_t channels = fileDescription->mChannelsPerFrame;
// uint32_t byteRate = bitDepth * sampleRate * channels / 8; // -> used only by the WAV header creation method
ALenum audioFormat;
// ---- We get the format of the files to build ObjectAL buffers later
// Default is 16
switch (bitDepth)
case 8:
if (channels == 1)
audioFormat = AL_FORMAT_MONO8;
else if (channels == 2)
audioFormat = AL_FORMAT_STEREO8;
if (channels == 1)
audioFormat = AL_FORMAT_MONO16;
else if (channels == 2)
audioFormat = AL_FORMAT_STEREO16;
if (channels == 1)
audioFormat = AL_FORMAT_MONO16;
else if (channels == 2)
audioFormat = AL_FORMAT_STEREO16;
if ([self isCancelled])
return NO;
// ---- We initialize a reader, in order to be able to feed our NSData
AVAssetReader* reader = [[AVAssetReader alloc] initWithAsset:asset error:nil];
NSDictionary *settings = [NSDictionary dictionaryWithObjectsAndKeys:
[NSNumber numberWithInt:kAudioFormatLinearPCM], AVFormatIDKey,
[NSNumber numberWithFloat:(float)sampleRate], AVSampleRateKey,
[NSNumber numberWithInt:bitDepth], AVLinearPCMBitDepthKey,
[NSNumber numberWithBool:NO], AVLinearPCMIsNonInterleaved,
[NSNumber numberWithBool:NO], AVLinearPCMIsFloatKey,
[NSNumber numberWithBool:NO], AVLinearPCMIsBigEndianKey, nil];
AVAssetReaderTrackOutput* readerOutput = [AVAssetReaderTrackOutput assetReaderTrackOutputWithTrack:audioTrack outputSettings:settings];
[reader addOutput:readerOutput];
[reader startReading];
// ---- We create a WAV buffer
// Header + raw PCM
NSMutableData *audioData = [[[NSMutableData alloc] init] autorelease];
NSMutableData *reverseData = [[[NSMutableData alloc] init] autorelease];
// ---- We create an array to receive the data chunks, so we can reverse it later
NSMutableArray *reversedDataArray = [[[NSMutableArray alloc] init] autorelease];
if ([self isCancelled])
return NO;
// ---- Reads the samples from the AVAsset and append them subsequently
while ([reader status] != AVAssetReaderStatusCompleted)
CMSampleBufferRef buffer = [readerOutput copyNextSampleBuffer];
if (buffer == NULL) continue;
CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(buffer);
size_t size = CMBlockBufferGetDataLength(blockBuffer);
uint8_t *outBytes = malloc(size);
CMBlockBufferCopyDataBytes(blockBuffer, 0, size, outBytes);
[audioData appendBytes:outBytes length:size];
// ---- We add the reversed data at the beginning of our array
[reversedDataArray insertObject:[NSData dataWithBytes:outBytes length:size] atIndex:0];
if ([self isCancelled])
return NO;
// ---- We append the reversed data to our NSMutableData object
for (NSData *data in reversedDataArray)
[reverseData appendData:data];
// ---- NO WAV header with OpenAL
NSMutableData *headerData = [self wavHeaderWithDataLength:[audioData length] channels:channels bitDepth:bitDepth sampleRate:sampleRate byteRate:byteRate];
NSMutableData *headerReverseData = [[headerData mutableCopy] autorelease];
[headerData appendData:audioData];
[headerReverseData appendData:reverseData];
[bufferDictionary setObject:audioData forKey:@"forward"];
[bufferDictionary setObject:reverseData forKey:@"backward"];
[bufferDictionary setObject:[NSNumber numberWithInteger:audioFormat] forKey:@"audioFormat"];
[bufferDictionary setObject:[NSNumber numberWithInt:sampleRate] forKey:@"sampleRate"];
return YES;
如果需要,还有 WAV 标头:
// ---- Creates the WAV data header and returns it
- (NSMutableData *) wavHeaderWithDataLength:(NSUInteger)length channels:(int)channels bitDepth:(int)bitDepth sampleRate:(long)sampleRate byteRate:(long)byteRate
// ---- The WAV header is 44 bytes long
long totalAudioLen = length;
long totalDataLen = totalAudioLen + 44;
// ---- The WAV header
Byte *header = (Byte*)malloc(44);
header[0] = 'R';
header[1] = 'I';
header[2] = 'F';
header[3] = 'F';
header[4] = (Byte) (totalDataLen & 0xff);
header[5] = (Byte) ((totalDataLen >> 8) & 0xff);
header[6] = (Byte) ((totalDataLen >> 16) & 0xff);
header[7] = (Byte) ((totalDataLen >> 24) & 0xff);
header[8] = 'W';
header[9] = 'A';
header[10] = 'V';
header[11] = 'E';
header[12] = 'f';
header[13] = 'm';
header[14] = 't';
header[15] = ' ';
header[16] = bitDepth; // 16; // 4 bytes: size of 'fmt ' chunk
header[17] = 0;
header[18] = 0;
header[19] = 0;
header[20] = 1; // format = 1
header[21] = 0;
header[22] = (Byte) channels;
header[23] = 0;
header[24] = (Byte) (sampleRate & 0xff);
header[25] = (Byte) ((sampleRate >> 8) & 0xff);
header[26] = (Byte) ((sampleRate >> 16) & 0xff);
header[27] = (Byte) ((sampleRate >> 24) & 0xff);
header[28] = (Byte) (byteRate & 0xff);
header[29] = (Byte) ((byteRate >> 8) & 0xff);
header[30] = (Byte) ((byteRate >> 16) & 0xff);
header[31] = (Byte) ((byteRate >> 24) & 0xff);
header[32] = (Byte) (2 * 8 / 8); // block align
header[33] = 0;
header[34] = bitDepth; // 16; // bits per sample
header[35] = 0;
header[36] = 'd';
header[37] = 'a';
header[38] = 't';
header[39] = 'a';
header[40] = (Byte) (totalAudioLen & 0xff);
header[41] = (Byte) ((totalAudioLen >> 8) & 0xff);
header[42] = (Byte) ((totalAudioLen >> 16) & 0xff);
header[43] = (Byte) ((totalAudioLen >> 24) & 0xff);
// ---- The header NSData
NSMutableData *headerData = [NSMutableData dataWithBytes:header length:44];
return headerData;
