语音识别和声音与音乐的比较
Posted
技术标签:
【中文标题】语音识别和声音与音乐的比较【英文标题】:speech recognition and sound comparation witth musicg 【发布时间】:2012-09-30 23:04:59 【问题描述】:我正在尝试制作一个带有语音识别功能的 android 应用程序,但不幸的是 google 不支持我的语言(马其顿语),我正在尝试比较两种录音声音。
我正在使用http://code.google.com/p/musicg/ 来记录和比较语音,并且我正在初始化用于检测语音的设置。有人可以告诉我如何重写这个用于语音检测的初始化函数,这对我来说非常重要。 . 或其他一些想法 怎么做。
这是哨声检测的初始化
// settings for detecting a whistle
minFrequency = 600.0f;
maxFrequency = Double.MAX_VALUE;
minIntensity = 100.0f;
maxIntensity = 100000.0f;
minStandardDeviation = 0.1f;
maxStandardDeviation = 1.0f;
highPass = 500;
lowPass = 10000;
minNumZeroCross = 50;
maxNumZeroCross = 200;
numRobust = 10;
【问题讨论】:
你成功了吗?我正在尝试做类似的事情 我会发布一些代码,但与我所期望的并不真正相关,我每次都无法得到我所期望的声音,但这并不取决于 api 算法,而是取决于不同的噪音和录制单词声音的速度。 【参考方案1】:我的理解是,目前的 musicg DetectionApi 仅用于分析单个声音块并告诉您它是否包含该类型声音。例如包含的口哨或拍手 api 示例。即是拍手/是口哨。
使用 musicg,您可能做的最好的事情就是识别声音是否是声音。尽管这可能超出了DetectionApi。
既然你说google api不支持马其顿语,或许你可以试试Pocketsphinx,在this *** article中提到。
【讨论】:
【参考方案2】:首先,您所要做的就是将录制的声音保存为 wav,然后很容易使用他们的 API 中的指纹类https://code.google.com/p/musicg/source/browse/#git%2Fsrc%2Fcom%2Fmusicg%2Ffingerprint
这是我进行比较的方式,一个临时录制的 WAV 声音与我数据中的所有 wav 声音。
public Cursor FP(String recordedClip, Context context)
Baza baza = new Baza(context);
Cursor allSound = baza.getAllProtocolsForSoundCheck();
List<Protocol> protocols = new ArrayList<Protocol>();
int PID =-1;
Log.d("broj",allSound.getCount()+"");
for (int i = 0; i < allSound.getCount(); i++)
Protocol protocol = new Protocol();
allSound.moveToNext();
protocol.setSoundPath(allSound.getString(4));
protocol.setId(Integer.parseInt(allSound.getString(1)));
protocols.add(protocol);
Log.d("brojProtocol",allSound.getString(2)+" ");
baza.updateProtocolsSoundSimilarity(protocol.getId(), (float) -1);
Wave record = new Wave(recordedClip);
List<Wave> waves = new ArrayList<Wave>();
if (protocols != null)
for (int i = 0; i < protocols.size(); i++)
waves.add(new Wave(protocols.get(i).getSoundPath()));
for (int i = 0; i < waves.size(); i++)
Log.d("similarity", record.getFingerprintSimilarity(waves.get(i))
.getSimilarity()+"");
baza.updateProtocolsSoundSimilarity(protocols.get(i).getId(),
record.getFingerprintSimilarity(waves.get(i))
.getSimilarity());
Cursor similarCursor = baza.getSimilarProtocols();
similarCursor.moveToFirst();
TransferClass protocolForTransfer = new TransferClass();
protocolForTransfer.setId(Integer.parseInt(similarCursor.getString(1)));
protocolForTransfer.setName(similarCursor.getString(2));
Log.d("passobj",protocolForTransfer.getName()+" "+protocolForTransfer.getId());
// return protocolForTransfer;
return similarCursor;
【讨论】:
【参考方案3】:这是我将临时录制的声音保存为 wav 格式的方法:
public class RecorderActivity
private static final int RECORDER_BPP = 16;
private static final String AUDIO_RECORDER_FILE_EXT_WAV = ".wav";
private static final String AUDIO_RECORDER_FOLDER = "HQProtocol/sound";
private static final String AUDIO_RECORDER_TEMP_FILE = "record_temp.raw";
private String AUDIO_RECORDER_FILE = "";
private static final int RECORDER_SAMPLERATE = 8000;
private static final int RECORDER_CHANNELS = AudioFormat.CHANNEL_IN_MONO;
private static final int RECORDER_AUDIO_ENCODING = AudioFormat.ENCODING_PCM_16BIT;
private RealDoubleFFT transformer;
EndPointDetection endpoint;
int blockSize = 256;
private AudioRecord recorder = null;
private int bufferSize = 0;
private RecorderAsynctask recordingThread = null;
private boolean isRecording = false;
float tempFloatBuffer[] = new float[3];
int tempIndex = 0;
int totalReadBytes = 0;
ImageView imageView;
Bitmap bitmap;
Canvas canvas;
Paint paint;
Context con;
RecorderActivity(String file, Context con, ImageView image)
AUDIO_RECORDER_FILE = file;
this.con = con;
this.imageView = image;
bitmap = Bitmap.createBitmap((int) 256, (int) 100,
Bitmap.Config.ARGB_8888);
canvas = new Canvas(bitmap);
paint = new Paint();
paint.setStrokeWidth(5);
paint.setColor(Color.BLUE);
imageView.setImageBitmap(bitmap);
transformer = new RealDoubleFFT(256);
bufferSize = AudioRecord.getMinBufferSize(RECORDER_SAMPLERATE,
RECORDER_CHANNELS, RECORDER_AUDIO_ENCODING);
public String getFilename()
String filepath = Environment.getExternalStorageDirectory().getPath();
File file = new File(filepath, AUDIO_RECORDER_FOLDER);
if (!file.exists())
file.mkdirs();
return (file.getAbsolutePath() + "/" + AUDIO_RECORDER_FILE + AUDIO_RECORDER_FILE_EXT_WAV);
private String getTempFilename()
String filepath = Environment.getExternalStorageDirectory().getPath();
File file = new File(filepath, AUDIO_RECORDER_FOLDER);
if (!file.exists())
file.mkdirs();
File tempFile = new File(filepath, AUDIO_RECORDER_TEMP_FILE);
if (tempFile.exists())
tempFile.delete();
return (file.getAbsolutePath() + "/" + AUDIO_RECORDER_TEMP_FILE);
public void startRecording()
recorder = new AudioRecord(MediaRecorder.Audiosource.MIC,
RECORDER_SAMPLERATE, RECORDER_CHANNELS,
RECORDER_AUDIO_ENCODING, bufferSize);
recorder.startRecording();
isRecording = true;
recordingThread = new RecorderAsynctask();
recordingThread.execute(this);
class RecorderAsynctask extends AsyncTask<RecorderActivity, double[], Void>
public void shareLockedfuntionProgreesUpdate(double[] fttrezult)
publishProgress(fttrezult);
@Override
protected Void doInBackground(RecorderActivity... params)
// TODO Auto-generated method stub
byte data[] = new byte[bufferSize];
String filename = getTempFilename();
FileOutputStream os = null;
try
os = new FileOutputStream(filename);
catch (FileNotFoundException e)
// TODO Auto-generated catch block
e.printStackTrace();
int read = 0;
AudioTrack tempAudioTrack;
double[] toTransform = new double[blockSize];
if (null != os)
while (isRecording)
// sampleRateTextField.setText(recorder.getSampleRate());
int bufferReadResult = recorder.read(data, 0, blockSize);
for (int i = 0; i < blockSize && i < bufferReadResult; i++)
toTransform[i] = (double) data[i] / 32768.0; // signed
// 16
// bit
transformer.ft(toTransform);
publishProgress(toTransform);
if (AudioRecord.ERROR_INVALID_OPERATION != read)
try
os.write(data);
tempIndex++;
catch (IOException e)
e.printStackTrace();
try
os.close();
catch (IOException e)
e.printStackTrace();
return null;
@Override
protected void onProgressUpdate(double[]... toTransform)
canvas.drawColor(Color.GRAY);
Paint p = new Paint();
for (int i = 0; i < toTransform[0].length; i++)
int x = i;
int downy = (int) (100 - (toTransform[0][i] * 10));
int upy = 100;
p.setColor(Color.rgb(downy % 256, i % 256, upy % 256));
canvas.drawLine(x, upy, x, downy, p);
imageView.invalidate();
public void writeAudioDataToFile(RecorderAsynctask asyntask)
byte data[] = new byte[bufferSize];
String filename = getTempFilename();
FileOutputStream os = null;
try
os = new FileOutputStream(filename);
catch (FileNotFoundException e)
// TODO Auto-generated catch block
e.printStackTrace();
int read = 0;
double[] toTransform = new double[256];
if (null != os)
while (isRecording)
// sampleRateTextField.setText(recorder.getSampleRate());
int bufferReadResult = recorder.read(data, 0, 256);
for (int i = 0; i < 256 && i < bufferReadResult; i++)
toTransform[i] = (double) data[i] / 32768.0; // signed
// 16
// bit
transformer.ft(toTransform);
asyntask.shareLockedfuntionProgreesUpdate(toTransform);
if (AudioRecord.ERROR_INVALID_OPERATION != read)
try
os.write(data);
tempIndex++;
catch (IOException e)
e.printStackTrace();
try
os.close();
catch (IOException e)
e.printStackTrace();
public void stopRecording()
if (null != recorder)
isRecording = false;
recorder.stop();
recorder.release();
recorder = null;
recordingThread = null;
copyWaveFile(getTempFilename(), getFilename());
deleteTempFile();
private void deleteTempFile()
File file = new File(getTempFilename());
file.delete();
private void copyWaveFile(String inFilename, String outFilename)
FileInputStream in = null;
FileOutputStream out = null;
long totalAudioLen = 0;
long totalDataLen = totalAudioLen + 36;
long longSampleRate = RECORDER_SAMPLERATE;
int channels = 1;
long byteRate = RECORDER_BPP * RECORDER_SAMPLERATE * channels / 8;
byte[] data = new byte[bufferSize];
try
in = new FileInputStream(inFilename);
out = new FileOutputStream(outFilename);
totalAudioLen = in.getChannel().size();
totalDataLen = totalAudioLen + 36;
WriteWaveFileHeader(out, totalAudioLen, totalDataLen,
longSampleRate, channels, byteRate);
while (in.read(data) != -1)
out.write(data);
in.close();
out.close();
catch (FileNotFoundException e)
e.printStackTrace();
catch (IOException e)
e.printStackTrace();
private void WriteWaveFileHeader(FileOutputStream out, long totalAudioLen,
long totalDataLen, long longSampleRate, int channels, long byteRate)
throws IOException
byte[] header = new byte[44];
header[0] = 'R'; // RIFF/WAVE header
header[1] = 'I';
header[2] = 'F';
header[3] = 'F';
header[4] = (byte) (totalDataLen & 0xff);
header[5] = (byte) ((totalDataLen >> 8) & 0xff);
header[6] = (byte) ((totalDataLen >> 16) & 0xff);
header[7] = (byte) ((totalDataLen >> 24) & 0xff);
header[8] = 'W';
header[9] = 'A';
header[10] = 'V';
header[11] = 'E';
header[12] = 'f'; // 'fmt ' chunk
header[13] = 'm';
header[14] = 't';
header[15] = ' ';
header[16] = 16; // 4 bytes: size of 'fmt ' chunk
header[17] = 0;
header[18] = 0;
header[19] = 0;
header[20] = 1; // format = 1
header[21] = 0;
header[22] = (byte) channels;
header[23] = 0;
header[24] = (byte) (longSampleRate & 0xff);
header[25] = (byte) ((longSampleRate >> 8) & 0xff);
header[26] = (byte) ((longSampleRate >> 16) & 0xff);
header[27] = (byte) ((longSampleRate >> 24) & 0xff);
header[28] = (byte) (byteRate & 0xff);
header[29] = (byte) ((byteRate >> 8) & 0xff);
header[30] = (byte) ((byteRate >> 16) & 0xff);
header[31] = (byte) ((byteRate >> 24) & 0xff);
header[32] = (byte) (2 * 16 / 8); // block align
header[33] = 0;
header[34] = RECORDER_BPP; // bits per sample
header[35] = 0;
header[36] = 'd';
header[37] = 'a';
header[38] = 't';
header[39] = 'a';
header[40] = (byte) (totalAudioLen & 0xff);
header[41] = (byte) ((totalAudioLen >> 8) & 0xff);
header[42] = (byte) ((totalAudioLen >> 16) & 0xff);
header[43] = (byte) ((totalAudioLen >> 24) & 0xff);
out.write(header, 0, 44);
public void closeThreadIfisnot()
recordingThread.cancel(true);
【讨论】:
以上是关于语音识别和声音与音乐的比较的主要内容,如果未能解决你的问题,请参考以下文章