调用百度语音AI实现语音的识别和合成
Posted yejifeng
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了调用百度语音AI实现语音的识别和合成相关的知识,希望对你有一定的参考价值。
#coding:utf-8
## 先去ffmpeg官网下载(https://ffmpeg.zeranoe.com/builds/),好了之后解压缩,配一下环境变量
## 打开cmd,运行命令,安装如下的包
## pip install baidu-aip
## pip install pydub
## pip install PyAudio
## pip install Wave
""" 调用百度语音api """
from aip import AipSpeech
APP_ID = " "
API_KEY = " "
SECRET_KEY = " "
client = AipSpeech(APP_ID,API_KEY,SECRET_KEY)
def speech_synthesis(text, filepath):
""" 语音合成:文字转语音 """
result = client.synthesis(text, 'zh', 1,
'vol': 5,
'spd': 5,
'pit': 5,
'per': 0,
)
if not isinstance(result, dict):
with open (filepath , 'wb') as file: file.write(result)
def play_speech(filepath):
import os
os.system("ffplay %s"%(filepath))
# def play_speech(filepath):
# """ 播放语音 """
# import pyaudio
# import wave
# wf = wave.open(filepath, 'rb') #二进制只读方式打开wav文件
# p = pyaudio.PyAudio()
# stream=p.open(format=p.get_format_from_width(wf.getsampwidth()),channels=wf.getnchannels(),rate=wf.getframerate(),output=True)
# stream = p.open(format=pyaudio.paInt16,
# channels=1,
# rate=16000,
# output=True) #打开数据流
# data = wf.readframes(1024) #读取数据
# while data != '': #播放
# stream.write(data)
# data = wf.readframes(1024)
# stream.stop_stream()
# stream.close()
# p.terminate()
# def Conversion_sampling_rate(filepath, newfilepath):
# """ 转换采样率 """
# from pydub import Audiosegment
# setframefp = AudioSegment.from_file(filepath)
# setframefp.set_frame_rate(16000)
# setframefp.export(newfilepath, format='wav')
def wav_to_pcm(wav_file):
""" wav文件转为16k pcm文件 """
import os
pcm_file = "%s.pcm" %(wav_file.split(".")[0])
os.system("ffmpeg -y -i %s -acodec pcm_s16le -f s16le -ac 1 -ar 16000 %s"%(wav_file,pcm_file))
return pcm_file
def sound_record(file_name):
""" 录音 """
import pyaudio
import wave
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
RECORD_SECONDS = 3
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("开始录音,请说话......")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("录音结束!")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(file_name, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
def speech_recognition(filepath):
""" 语音识别:语音转文字 """
with open(filepath, 'rb') as fp:speechfile = fp.read()
result = client.asr(speechfile, 'pcm', 16000,
'dev_pid': 1536,
)
try:
res_str = result.get("result")[0]
print(res_str)
except:
res_str = "error"
print("识别没有成功")
return res_str
# 测试
# text = "世界很复杂百度更懂你"
# synthesisfilepath = "synthesisspeech.pcm"
# synthesisfilepath = "16k.pcm"
# speech_synthesis(text, synthesisfilepath)
# wav_file = pcm_to_wav(synthesisfilepath)
# play_speech(wav_file)
# recordfilepath = "recordspeech.wav"
# sound_record(recordfilepath)
# pcm_file = wav_to_pcm(recordfilepath)
# speech_recognition(pcm_file)
""" 控制面板 """
from tkinter import *
from tkinter import ttk
from tkinter import messagebox
import os
class App:
def __init__(self, master):
self.master = master
self.master.title("调用百度AI识别语音")
self.master.geometry("500x400")
self.buttonimg = PhotoImage(file= os.path.join(os.path.dirname(os.path.abspath(__file__)), 'luyin - small.gif'))
self.initWidgets()
def initWidgets(self):
self.button = Button(self.master, text='开始录音', image=self.buttonimg, command=self.open_sound_record, height=100,width=100)
self.button.pack(ipadx=5, ipady=5, pady = 20)
self.label = Label(self.master, text="语音识别结果:")
self.label.place(x=100,y=400,anchor='nw')
self.label.pack()
self.text = Text(self.master, height=3, width=200)
self.text.place(x=150,y=400,anchor='nw')
self.text.pack()
def open_sound_record(self):
recordfilepath = "recordspeech.wav"
sound_record(recordfilepath)
pcm_file = wav_to_pcm(recordfilepath)
res_str = speech_recognition(pcm_file)
if res_str == "error":
print(messagebox.showinfo("出错","没有成功识别语音!"))
else:
self.text.insert("insert", res_str)
# text = "语音识别的结果是"+res_str
# synthesisfilepath = "synthesisspeech.pcm"
# speech_synthesis(text, synthesisfilepath)
# play_speech(synthesisfilepath)
root = Tk()
App(root)
root.mainloop()
以上是关于调用百度语音AI实现语音的识别和合成的主要内容,如果未能解决你的问题,请参考以下文章