改变音调
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了改变音调相关的知识,希望对你有一定的参考价值。
改变声音的音调,有时候很好玩,像Talking Tom里面的那只猫一样,尖起声音学人说话很逗人。
这里小程介绍如何用soundtouch来做出这种效果。
soundtouch,一个开源的音效处理项目(用c++编写),可以更改音频的音调、播放速率、节拍等特征。
本文介绍如何控制音频的音调、节拍或播放速率。
大概的思路是这样的,先解码音频,得到pcm数据,再通过soundtouch来修改pcm数据,最后压缩为常见格式的音频。
小程上传了一个变调后的音频文件,读者可以关注“广州小程”微信公众号,再来试听下面的音频文件(如果博客没有上传音频的功能,那就提供不了文件了)。
然后,小程介绍下如何写代码来实现这个变调的功能。
先使用FFmpeg来解码,得到pcm后调用soundtouch来处理,最后使用FFmpeg命令把pcm编码成mp3。
演示demo的目录结构是这样的:
小程先贴上代码(change_pcm_pitch.cpp的内容),之后再简单介绍soundtouch的调用:
extern "C" {
#include "ffmpeg/include/libavcodec/avcodec.h"
#include "ffmpeg/include/libavformat/avformat.h"
#include "ffmpeg/include/libswresample/swresample.h"
#include "ffmpeg/include/libavutil/samplefmt.h"
}
#include "SoundTouch.h"
using namespace soundtouch;
// 解码并变调
void change_pcm_pitch(const char* filepath) {
av_register_all();
av_log_set_level(AV_LOG_DEBUG);
AVFormatContext* formatContext = avformat_alloc_context();
AVCodecContext* codecContext = NULL;
int status = 0;
bool success = false;
int audioindex = -1;
status = avformat_open_input(&formatContext, filepath, NULL, NULL);
if (status == 0) {
status = avformat_find_stream_info(formatContext, NULL);
if (status >= 0) {
for (int i = 0; i < formatContext->nb_streams; i ++) {
if (formatContext->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
audioindex = i;
break;
}
}
if (audioindex > -1) {
codecContext = formatContext->streams[audioindex]->codec;
AVCodec* codec = avcodec_find_decoder(codecContext->codec_id);
if (codec) {
status = avcodec_open2(codecContext, codec, NULL);
if (status == 0) {
success = true;
}
}
}
}
}
if (success) {
av_dump_format(formatContext, 0, filepath, false);
av_log(NULL, AV_LOG_DEBUG, "format and decoder sucessful, and now in decoding each frame\n");
printf("sample_rate=%d, channels=%d\n", codecContext->sample_rate, codecContext->channels);
SoundTouch* soundtouch = new SoundTouch();
printf("soundtouch version=%s\n", soundtouch->getVersionString());
soundtouch->setSampleRate(codecContext->sample_rate);
soundtouch->setChannels(codecContext->channels);
soundtouch->setTempo(0.5); // tempo,播放节奏,1.0为正常节奏,大于1.0加快,小于1.0变慢,pcm的体积随之变化
soundtouch->setRate(3.0); // rate,播放速率,1.0为正常速度;单设置这个时,除了影响播放速度,还会影响到音调
soundtouch->setPitch(0.5); // pitch,音调,1.0为正常音调;这个设置并不会影响到时长
AVFrame* frame = av_frame_alloc();
SwrContext* swr = NULL;
int gotframe = 0;
char outfile[512] = {0};
strcpy(outfile, filepath);
strcat(outfile + strlen(outfile), ".pcm");
FILE* file = fopen(outfile, "wb");
if (file) {
while (true) {
AVPacket packet;
av_init_packet(&packet);
status = av_read_frame(formatContext, &packet);
if (status < 0) {
if (status == AVERROR_EOF) {
av_log(NULL, AV_LOG_DEBUG, "read end for file\n");
break;
}
else {
av_packet_unref(&packet);
}
}
else {
if (packet.stream_index == audioindex) {
int srcCount = packet.size;
while (srcCount > 0) {
int decodedcount = avcodec_decode_audio4(codecContext, frame, &gotframe, &packet);
if (decodedcount < 0) {
av_log(NULL, AV_LOG_DEBUG, "decode failed, perhaps not enough data\n");
break;
}
if (gotframe > 0) {
// resample
int targetchannel = 2;
int targetsrate = 44100;
int targetfmt = AV_SAMPLE_FMT_S16;
bool needresample = false;
if (av_frame_get_channels(frame) != targetchannel || frame->sample_rate != targetsrate || frame->format != targetfmt) {
needresample = true;
}
if (needresample) {
if (swr == NULL) {
uint64_t in_channel_layout = av_get_default_channel_layout(av_frame_get_channels(frame));
uint64_t out_channel_layout = av_get_default_channel_layout(targetchannel);
int inSamplerate = frame->sample_rate;
swr = swr_alloc_set_opts(NULL,
out_channel_layout, (enum AVSampleFormat )AV_SAMPLE_FMT_S16, targetsrate,
in_channel_layout, (enum AVSampleFormat)frame->format, inSamplerate, 0, NULL);
int ret = swr_init(swr);
if (ret != 0) {
printf("swr_init failed: ret=%d\n", ret);
}
}
if (swr) {
if (frame->extended_data && frame->data[0] && frame->linesize[0] > 0) {
int out_size = av_samples_get_buffer_size(NULL, targetchannel, frame->nb_samples, (enum AVSampleFormat)targetfmt, 0);
void* out_buffer = av_malloc(out_size);
if (out_buffer) {
int convertSamples = swr_convert(swr, (uint8_t**)(&out_buffer), frame->nb_samples,
(const uint8_t**)frame->extended_data, frame->nb_samples);
int len = convertSamples * targetchannel * av_get_bytes_per_sample((enum AVSampleFormat)targetfmt);
int samplecount = convertSamples;
soundtouch->putSamples((SAMPLETYPE*)out_buffer, samplecount);
int bufsize = samplecount * frame->channels * sizeof(short);
unsigned char* buf = (unsigned char*)malloc(bufsize);
int gotsamplecount = soundtouch->receiveSamples((SAMPLETYPE*)buf, samplecount);
printf("soundtouch receiveSamples after resample:gotsamplecount=%d bufsize=%d sizeof(SAMPLETYPE)=%lu\n", gotsamplecount, bufsize, sizeof(SAMPLETYPE));
if (gotsamplecount) {
fwrite(buf, gotsamplecount * frame->channels * sizeof(short), 1, file);
}
free(buf);
av_free(out_buffer);
}
}
}
}
else {
int samplecount = frame->nb_samples;
soundtouch->putSamples((SAMPLETYPE*)frame->data[0], samplecount);
int bufsize = samplecount * frame->channels * sizeof(short);
unsigned char* buf = (unsigned char*)malloc(bufsize);
int gotsamplecount = soundtouch->receiveSamples((SAMPLETYPE*)buf, samplecount);
printf("soundtouch receiveSamples:gotsamplecount=%d bufsize=%d sizeof(SAMPLETYPE)=%lu\n", gotsamplecount, bufsize, sizeof(SAMPLETYPE));
if (gotsamplecount) {
fwrite(buf, gotsamplecount * frame->channels * sizeof(short), 1, file);
}
free(buf);
}
}
srcCount -= decodedcount;
}
}
}
av_packet_unref(&packet);
}
fclose(file);
}
av_frame_free(&frame);
delete soundtouch;
if (swr) {
swr_free(&swr);
}
}
avformat_free_context(formatContext);
}
// 压缩成mp3。保证FFmepg支持mp3编码即可(使用lamemp3),当然也可以编码成其它格式
// 小程安装了多个不同特性的FFmpeg,这里指定一个能编码mp3的ffmpeg
const char* FFMPEGEXE = "/usr/local/Cellar/ffmpeg/2.6.2/bin/ffmpeg";
const int SAMPLE_RATE = 44100;
const int CHANNELS = 2;
const int BITRATE = 128;
const int BUF_LEN = 1024;
// 编码
void encode(const char* srcfile, const char* outfile) {
char buf[BUF_LEN] = {0};
sprintf(buf, "%s -ar %d -ac %d -f s16le -i %s -ar %d -ac %d -b:a %dK -y %s", FFMPEGEXE, SAMPLE_RATE, CHANNELS, srcfile, SAMPLE_RATE, CHANNELS, BITRATE, outfile);
system(buf);
}
int main(int argc, const char *argv[])
{
const char filepath[] = "test2.mp3";
change_pcm_pitch(filepath); // xxx.xx.pcm create
encode("test2.mp3.pcm", "out.mp3");
return 0;
}
上面的demo直接使用了soundtouch的源码来实现音效变化。
soundtouch初始化:
soundtouch作用于pcm:
需要注意,soundtouch并没有解码功能,它假设调用层已经有pcm数据。
小程这里没有考虑音效处理的性能,实际上对于在线实时的播放,音效的性能是要关注的因素,需要避免音效过于耗时而导致播放卡顿,这是另外一个话题了。
总结一下,本文介绍了soundtouch的调用,实现了音频变调等效果。soundtouch的接口调用比较简单,解码与重采样的处理工作反而更多一些。
以上是关于改变音调的主要内容,如果未能解决你的问题,请参考以下文章