speexdsp库实现音频3A算法,speexdsp库编译,C/C++
Posted 架相
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了speexdsp库实现音频3A算法,speexdsp库编译,C/C++相关的知识,希望对你有一定的参考价值。
目录
speex是音频编解码库,speexdsp是附加的音频DSP库,是音频降噪库,也有回声抑制和自动增益控制功能,即通常说的音频3A算法。
现在音频编解码大部分都是使用opus库,很少使用speex进行音频编解码,但还是会使用speexdsp库的3A算法对音频数据进行处理。
本例是在ubuntu环境下,C/C++语言,使用Qt进行测试。
speexdsp库编译
源码下载:https://github.com/xiph/speexdsp/tags
我下载的版本:speexdsp-SpeexDSP-1.2.1.zip,解压缩。
编译步骤:
cd speexdsp-SpeexDSP-1.2.1/
./autogen.sh
#./autogen.sh: 11: autoreconf: not found报错,安装依赖
sudo apt-get install -y autoconf automake libtool
./configure --prefix=$BUILD_LIBS --disable-oggtest
make
make install
在家目录的build_libs/文件夹下生成include头文件,和libspeexdsp.a静态库,libspeexdsp.so动态库。
使用speexdsp做音频去噪声demo
在Qt环境下测试,使用wav文件作为音频输入,把处理后的音频数据输出到本地wav文件,回声消除使用在语音通话场景,本例只给出接口样例。
把编译好的include和lib文件夹拷贝到过程目录下,pro文件添加如下内容:
INCLUDEPATH += $$PWD/include
LIBS += -L$$PWD/lib -lspeexdsp -ldl
chwRtcAec.h
#ifndef chwAUDIOPROC_chwRtcAec_H_
#define chwAUDIOPROC_chwRtcAec_H_
#include <stdint.h>
#include <stdlib.h>
typedef struct
void *session;
void (*init)(void* session, int32_t sampleRate, int32_t pchannel,int32_t pframeSize,int32_t echopath);
void (*closeAec)(void* session);
void (*echoCapture)(void* session,short *rec, short *out);
void (*preprocessRun)(void* session,short *pcm);
void (*echoStateReset)(void* session);
void (*echoPlayback)(void* session,short *play);
void (*echoCancellation)(void* session,const short *rec, const short *play,
short *out);
chwRtcAec;
#define chw_delete(a) if( (a)) delete (a); (a) = NULL;
#define chw_deleteA(a) if( (a)) delete[] (a); (a) = NULL;
#define chw_free(a) if( (a)) free((a)); (a) = NULL;
#ifdef __cplusplus
extern "C"
#endif
void chw_rtcaec_init(void* context, int32_t sampleRate, int32_t pchannel,int32_t pframeSize,int32_t echopath);
void chw_rtcaec_preprocess_run(void* context,short *pcm);
void chw_create_rtcaec(chwRtcAec* aec);
void chw_destroy_rtcaec(chwRtcAec* aec);
#ifdef __cplusplus
#endif
#endif /* chwAUDIOPROC_chwAECSPEEX_H_ */
chwRtcAec.c
#include "chwRtcAec.h"
#include <speex/speex_echo.h>
#include <speex/speex_preprocess.h>
#include <stdio.h>
#include <string.h>
typedef struct chwRtcAec
int32_t channel;
int32_t frameSize;
int32_t sampleRate;
int32_t echoPath;
SpeexEchoState *state;
SpeexPreprocessState *preprocState;
chwRtcAecContext;
///m_aec->session,16000,1,320,10
void chw_rtcaec_init(void* context, int32_t sampleRate, int32_t pchannel,int32_t pframeSize,int32_t echopath)
if(context==NULL) return;
chwRtcAecContext* aec=(chwRtcAecContext*)context;
aec->frameSize = pframeSize;
aec->sampleRate = sampleRate;
aec->channel = pchannel;
aec->echoPath=echopath;
int32_t frame_size = aec->frameSize;//(aec->frameSizeInMS * aec->sampleRate * 1.0) / 1000;
int32_t filter_length = aec->frameSize * aec->echoPath;//20;//30;//aec->echoPath;//(aec->frameSizeInMS * aec->sampleRate * 1.0) / 1000;
/** 创建一个新的多通道回波取消器状态
* @param frame_size 一次要处理的样本数(应对应10-20 ms)
* @param filter_length 要取消的回波样本数(一般应对应100-500 ms)
* @param aec->channel 麦克风通道数
* @param aec->channel 扬声器通道数
* @return 新创建的回波取消器状态
*/
if(!aec->state) aec->state = speex_echo_state_init_mc(frame_size, filter_length, aec->channel,aec->channel);
//frame_size *= 2; // length read each time
/** 将创建一个新的预处理状态。您必须为每个已处理的通道创建一个状态。
* @param frame_size 一次要处理的样品数(应对应10-20 ms)。必须为:与用于剩余回声消除的回波消除器的值相同。
* @param sampling_rate 用于输入的采样率。
* @return 返回新创建的预处理器状态
*/
if(!aec->preprocState) aec->preprocState = speex_preprocess_state_init(frame_size, aec->sampleRate);
speex_echo_ctl(aec->state, SPEEX_ECHO_SET_SAMPLING_RATE, &aec->sampleRate);//设置采样率
speex_preprocess_ctl(aec->preprocState, SPEEX_PREPROCESS_SET_ECHO_STATE,aec->state);//设置相应的回波消除器状态,以便可进行残余回波抑制(无残余回波抑制时为NULL)
int32_t i = 1;
//speex_preprocess_ctl(aec->preprocState, SPEEX_PREPROCESS_SET_VAD, &i);
int32_t noiseSuppress = -25;
i = 1;
speex_preprocess_ctl(aec->preprocState, SPEEX_PREPROCESS_SET_DENOISE, &i);//设置预处理器去噪状态
speex_preprocess_ctl(aec->preprocState, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &noiseSuppress);//设置噪声的最大衰减,单位为dB(负数)
i=0;
speex_preprocess_ctl(aec->preprocState, SPEEX_PREPROCESS_SET_AGC, &i);//设置预处理器自动增益控制状态
i=sampleRate;
speex_preprocess_ctl(aec->preprocState, SPEEX_PREPROCESS_SET_AGC_LEVEL, &i);//设置预处理器自动增益控制级别(浮动)
i=0;
speex_preprocess_ctl(aec->preprocState, SPEEX_PREPROCESS_SET_DEREVERB, &i);//设置预处理器dereverb状态
float f=.0;
speex_preprocess_ctl(aec->preprocState, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &f);//设置预处理器dereverb decay
f=.0;
speex_preprocess_ctl(aec->preprocState, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &f);//设置EverB级别的预处理器
void chw_rtcaec_close(void* context)
if(context==NULL) return;
chwRtcAecContext* aec=(chwRtcAecContext*)context;
if(aec->state) speex_echo_state_destroy(aec->state);//销毁回声消除状态
if(aec->preprocState) speex_preprocess_state_destroy(aec->preprocState);//销毁预处理器状态
aec->state = NULL;
aec->preprocState = NULL;
void chw_rtcaec_echo_capture(void* context,short *rec, short *out)
if(context==NULL) return;
chwRtcAecContext* aec=(chwRtcAecContext*)context;
/** 使用内部回放缓冲区执行回声取消,这会延迟两帧
* 来解释大多数声卡引入的延迟(但它可能是关闭的!)
* @param aec->state 回声消除器状态
* @param rec 麦克风信号(近端+远端回声)
* @param out 返回去除回声的近端信号
*/
if(aec->state) speex_echo_capture(aec->state, rec, out);
void chw_rtcaec_preprocess_run(void* context,short *pcm)
if(context==NULL) return;
chwRtcAecContext* aec=(chwRtcAecContext*)context;
/** 对帧进行预处理
* @param aec->preprocState 预处理器状态
* @param Pcm 音频样本向量(进和出)。必须与在speex_preprocess_state_init()中指定的大小相同。
* @return 语音业务的Bool值(1表示语音,0表示噪音/静音),仅当VAD打开时使用。
*/
if(aec->preprocState) speex_preprocess_run(aec->preprocState, pcm);
void chw_rtcaec_echo_state_reset(void* context)
if(context==NULL) return;
chwRtcAecContext* aec=(chwRtcAecContext*)context;
/** 将回声取消器重置到其原始状态
* @param aec->state 回声消除状态
*/
if(aec->state) speex_echo_state_reset(aec->state);
void chw_rtcaec_echo_playback(void* context,short *play)
if(context==NULL) return;
chwRtcAecContext* aec=(chwRtcAecContext*)context;
/** 让回声取消器知道一个帧刚刚排队到声卡上
* @param aec->state 回声消除状态
* @param play 信号播放到扬声器(从远端接收)
*/
if(aec->state) speex_echo_playback(aec->state, play);
void chw_rtcaec_echo_cancellation(void* context,const short *rec, const short *play,
short *out)
if(context==NULL) return;
chwRtcAecContext* aec=(chwRtcAecContext*)context;
/** 根据发送到扬声器的音频执行回声取消帧(没有添加延迟以此形式播放)
* @param aec->state 回声消除状态
* @param rec 来至麦克风的信号(近端+远端回声)
* @param play 信号播放到扬声器(从远端接收)
* @param out 返回消除回声的近端信号
*/
if(aec->state) speex_echo_cancellation(aec->state, rec, play, out);
void chw_create_rtcaec(chwRtcAec* aec)
if(aec==NULL) return;
aec->session=(chwRtcAecContext*)calloc(sizeof(chwRtcAecContext),1);
aec->init=chw_rtcaec_init;
aec->closeAec=chw_rtcaec_close;
aec->echoCancellation=chw_rtcaec_echo_cancellation;
aec->echoCapture=chw_rtcaec_echo_capture;
aec->echoPlayback=chw_rtcaec_echo_playback;
aec->echoStateReset=chw_rtcaec_echo_state_reset;
aec->preprocessRun=chw_rtcaec_preprocess_run;
void chw_destroy_rtcaec(chwRtcAec* aec)
if(aec==NULL) return;
chw_rtcaec_close((chwRtcAecContext*)aec->session);
chw_free(aec->session);
qt调用示例:
#include "spxmainwindow.h"
#include "ui_spxmainwindow.h"
#include <QFile>
#define HEADLEN 44
#define SAMPLE_RATE (48000)
#define SAMPLES_PER_FRAME (1024)
#define FRAME_SIZE (SAMPLES_PER_FRAME * 1000/ SAMPLE_RATE)
#define FRAME_BYTES (SAMPLES_PER_FRAME)
spxMainWindow::spxMainWindow(QWidget *parent)
: QMainWindow(parent)
, ui(new Ui::spxMainWindow)
ui->setupUi(this);
pcm=new short[4096/2];
m_aec = NULL;
m_aec=(chwRtcAec*)calloc(sizeof(chwRtcAec),1);
chw_create_rtcaec(m_aec);
chw_rtcaec_init(m_aec->session,44100,1,1024,10);//注意传参的采样率、帧大小、通道数一定要和采集时一致
dealAudio();
spxMainWindow::~spxMainWindow()
chw_destroy_rtcaec(m_aec);
chw_free(m_aec);
chw_deleteA(pcm);
delete ui;
//消除噪声处理,以wav文件为例
void spxMainWindow::dealAudio()
size_t len = 0;
QFile inFile, outFile;
inFile.setFileName("./test.wav");
outFile.setFileName("./outtest.wav");
char headBuf[HEADLEN];
char dataBuf[FRAME_BYTES * 2];
QByteArray readBuf;
if(inFile.open(QIODevice::ReadOnly))
inFile.read(headBuf,HEADLEN);//wav头部
if(outFile.open(QIODevice::WriteOnly))
outFile.write(headBuf,HEADLEN);
while(inFile.atEnd() == false)
len = inFile.read(dataBuf,SAMPLES_PER_FRAME);
if(len == 0) break;
chw_rtcaec_preprocess_run(m_aec->session,(short*)dataBuf);
outFile.write(dataBuf,SAMPLES_PER_FRAME);
inFile.flush();
inFile.close();
outFile.flush();
outFile.close();
//回声消除,播放对方音频后调用
void spxMainWindow::dealEchoPlay(short* pbuf,int32_t plen)
if(m_aec) m_aec->echoPlayback(m_aec->session,pbuf);
//回声采样和去噪,采集本端音频后,编码前调用
void spxMainWindow::putEchoBuffer( uint8_t *pBuffer,int32_t plen)
if (m_aec)
m_aec->echoCapture(m_aec->session, (short*) pBuffer, pcm);
m_aec->preprocessRun(m_aec->session, pcm);
m_audioFrame.data = (uint8_t*)pcm;
m_audioFrame.len = plen;
// m_resample.resample(m_resample.context,&m_audioFrame);
报错记录
使用回声消除时报如下几个警告:
waining:no playback frame available(your application is buggy and/or got xruns),#ifndef DISABLE_WARNINGS
warning: Had to discard a playback frame (your application is buggy and/or got xruns)
warning: Auto-filling the buffer (your application is buggy and/or got xruns)
基本不影响业务,看着不顺眼的可以修改libspeexdsp源码里的os_support.h文件,在里面添加下面定义,不打印WARNINGS信息,重新编译即可。
#define DISABLE_WARNINGS
用于服务器端估计/识别音频文件相似性的框架/算法/库? [关闭]
【中文标题】用于服务器端估计/识别音频文件相似性的框架/算法/库? [关闭]【英文标题】:Framework/Algorithms/Library for server side estimation/recognition of audio file similarity? [closed] 【发布时间】:2010-04-14 07:08:27 【问题描述】:我想实现一个 android 应用程序,它将录制的音频文件与我们的声音数据库进行比较并报告相似度指标。该应用程序将以某种方式像 Shazam 一样工作,但与我们的数据库音频文件相匹配。谁能指出一个开源项目,它实现了足够的算法来在我们的服务器上进行声音识别/比较?谢谢
【问题讨论】:
【参考方案1】:音频散列用于进行此类比较。这是一篇解决音频散列挑战的技术论文:http://www.eurasip.org/Proceedings/Eusipco/Eusipco2004/defevent/papers/cr1091.pdf
有些技术可以从像http://www.gracenote.com/这样的公司获得许可
【讨论】:
谢谢,我会查看论文的!真的没有开源实现吗?我要谷歌它... 你希望这可以作为记录器应用程序还是系统范围的工作?以上是关于speexdsp库实现音频3A算法,speexdsp库编译,C/C++的主要内容,如果未能解决你的问题,请参考以下文章