如何在 C# 中使用 SpInprocRecoContext 识别语音事件?
Posted
技术标签:
【中文标题】如何在 C# 中使用 SpInprocRecoContext 识别语音事件?【英文标题】:How to recognize voice events using SpInprocRecoContext in C#? 【发布时间】:2018-04-13 20:37:12 【问题描述】:我非常接近完成一个通过 C# (SAPI 5.4) 修改 Windows 语音词典的个人项目。我正在研究的最后一点是如何为给定的单词设置 SAPI 电话。我找到了一种通过 C# 表单和通过SpSharedRecoContext 获得的语音识别来执行此操作的方法。但是,我正在尝试使用语音文件(* .wav)作为输入进行识别。我知道这需要通过SpInprocRecoContext 来完成。
我从 Microsoft 中发现的关于 SAPI 5.4 识别的每个示例(例如 VB 的 this one)都是针对 SpSharedRecoContext 而不是 SpInprocRecoContext(我相信我已经看到 cmets 发现其中一些示例缺少细节)。此外,我在这个论坛上发现了多个主题(主要由 Eric Brown 回答,参见 topic 1、topic 2、topic 3)提到使用 SpInprocRecoContext 需要比 SpSharedRecoContext 更多的设置,但 我还没有找到在 C# 中使用 SpInprocRecoContext 时如何捕获语音识别事件的明确答案。
我该如何继续?
这是我目前的代码(为更好的组织而编辑):
using SpeechLib;
using System;
namespace SpeechTest
class Program
static void Main(string[] args)
string MyText = "dolphins"; // Text string of interest
string WaveFile = @"C:\Reco\MYWAVE.wav"; // Path to wav file used for voice recognition
// Declare important recognition objects
SpInprocRecognizer Recognizer;
SpInProcRecoContext RecoContext;
ISpeechRecoGrammar grammar;
ISpeechFileStream MyFileStream;
ISpeechVoice MyVoice;
// Create recognizer and recognition context
RecoContext = new SpInProcRecoContext();
Recognizer = (SpInprocRecognizer)RecoContext.Recognizer;
// Set up recognition event handling
RecoContext.Recognition += new _ISpeechRecoContextEvents_RecognitionEventHandler(RecoContext_Recognition);
// Set up the grammar
grammar = RecoContext.CreateGrammar(); // Initialize the grammar
grammar.DictationLoad("", SpeechLoadOption.SLOStatic); // Set up dictation grammar
grammar.DictationSetState(SpeechRuleState.SGDSActive); // Activate the grammar
// Set up audio input for SpInProcRecoContext
SpObjectTokenCategory Category = new SpObjectTokenCategory();
Category.SetId(SpeechStringConstants.SpeechCategoryAudioIn);
SpObjectToken AudioToken = new SpObjectToken();
AudioToken.SetId(Category.Default);
Recognizer.AudioInput = AudioToken;
//Category.SetId(SpeechStringConstants.SpeechCategoryRecognizers); // <== generates a runtime error!!!
//SpObjectToken EngineToken = new SpObjectToken();
//EngineToken.SetId(Category.Default);
//Recognizer.Recognizer = EngineToken;
//Category.SetId(SpeechStringConstants.SpeechCategoryRecoProfiles); // <== generates a runtime error!!!
//SpObjectToken ProfileToken = new SpObjectToken();
//ProfileToken.SetId(Category.Default);
//Recognizer.Profile = ProfileToken;
// Create an audio file stream from MyText
MyFileStream = new SpFileStream(); // Create new SpFileStream instance
TextToWave(MyText, WaveFile); // Call function to create a wav file that voices MyText
MyFileStream.Open(WaveFile, SpeechStreamFileMode.SSFMOpenForRead, true);
// Activate the recognizer and input the audio file stream into the recognizer
RecoContext.State = SpeechRecoContextState.SRCS_Enabled;
Recognizer.State = SpeechRecognizerState.SRSActive;
Recognizer.AudioInputStream = MyFileStream; // <== generates a runtime error!!!
// Output info and cleanup
Console.WriteLine(MyText + " = " + SAPIPhonemes);
MyFileStream.Close();
Console.ReadLine();
static void TextToWave(string text, string file)
SpFileStream fileStream = new SpFileStream();
SpVoice voice = new SpVoice();
fileStream.Open(file, SpeechStreamFileMode.SSFMCreateForWrite, true);
voice.AudioOutputStream = fileStream;
voice.Speak(text);
fileStream.Close();
public static string SAPIPhonemes = null;
public static void RecoContext_Recognition(int StreamNumber, object StreamPosition, SpeechRecognitionType RecognitionType, ISpeechRecoResult Result)
// This event is recognized and all the below code works fine when using SpSharedRecoContext
Console.WriteLine(Result.ToString());
string SAPIPhonemes = null;
SpPhoneConverter MyPhoneConverter = new SpPhoneConverter();
MyPhoneConverter.LanguageId = 1033;
foreach (ISpeechPhraseElement MyPhrase in Result.PhraseInfo.Elements)
SAPIPhonemes += " " + MyPhoneConverter.IdToPhone(MyPhrase.Pronunciation);
这里可以参考基于表单的 SpSharedRecoContext 代码:
using SpeechLib;
using System;
using System.Windows.Forms;
namespace RecoForm
public partial class Form1 : Form
// Speech Recognition Object
SpSharedRecoContext listener;
// Grammar object
ISpeechRecoGrammar grammar;
public Form1()
InitializeComponent();
private void Form1_Load(object sender, EventArgs e)
// nothing
public string ps;
private void button1_Click(object sender, EventArgs e)
if (btnListen.Text == "Start Listening")
// textBox1.Clear();
try
listener = new SpSharedRecoContext();
listener.Recognition += new _ISpeechRecoContextEvents_RecognitionEventHandler(listener_Reco);
grammar = listener.CreateGrammar(0);
grammar.DictationLoad("", SpeechLoadOption.SLOStatic);
grammar.DictationSetState(SpeechRuleState.SGDSActive);
btnListen.Text = "Stop Listening";
if (ps == "1")
listener.Resume();
ps = "0";
catch (Exception ex)
MessageBox.Show(ex.Message);
else if (btnListen.Text == "Stop Listening")
listener.Pause();
btnListen.Text = "Start Listening";
if (ps == "0")
ps = "1";
public void listener_Reco(int StreamNumber, object StreamPosition, SpeechRecognitionType RecognitionType, ISpeechRecoResult Result)
string heard = Result.PhraseInfo.GetText(0, -1, true);
textBox1.Text += " " + heard;
SpPhoneConverter MyPhoneConverter = new SpPhoneConverter();
MyPhoneConverter.LanguageId = 1033;
foreach (ISpeechPhraseElement MyPhrase in Result.PhraseInfo.Elements)
textBox2.Text += " " + MyPhoneConverter.IdToPhone(MyPhrase.Pronunciation);
// https://***.com/questions/11935533/c-sharp-sapi-5-4-languages
这是另一个示例(在 VB 中),它结合了仍然不起作用的 Microsoft 示例(here 和 here)(请参阅 Command1_Click 中的 cmets 以查找我遇到运行时错误的位置)。
Imports SpeechLib
Public Class Form1
Const WaveFile = "C:\Reco\MYWAVE.wav"
Dim WithEvents RC As SpInProcRecoContext
Dim Recognizer As SpInprocRecognizer
Dim myGrammar As ISpeechRecoGrammar
Dim MyFileStream As SpeechLib.SpFileStream
Dim MyVoice As SpeechLib.SpVoice
Dim MyText As String
Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load
On Error GoTo EH
RC = New SpInProcRecoContext
Recognizer = RC.Recognizer
myGrammar = RC.CreateGrammar
myGrammar.DictationSetState(SpeechRuleState.SGDSActive)
MyVoice = New SpVoice
MyVoice.Voice = MyVoice.GetVoices("gender=male").Item(0)
Dim Category As SpObjectTokenCategory
Category = New SpObjectTokenCategory
Category.SetId(SpeechStringConstants.SpeechCategoryAudioIn)
Dim Token As SpObjectToken
Token = New SpObjectToken
Token.SetId(Category.Default)
Recognizer.AudioInput = Token
TextBox1.Text = "play the eight of clubs"
EH:
If Err.Number Then ShowErrMsg()
End Sub
Private Sub Command1_Click(sender As Object, e As EventArgs) Handles Command1.Click
MyFileStream = MakeWAVFileFromText(TextBox1.Text, WaveFile)
MyFileStream.Open(WaveFile)
Recognizer.AudioInputStream = MyFileStream ' ==> produces a runtime error!!!
End Sub
Private Sub RC_Recognition(ByVal StreamNumber As Long, ByVal StreamPosition As Object, ByVal RecognitionType As SpeechLib.SpeechRecognitionType, ByVal Result As SpeechLib.ISpeechRecoResult)
On Error GoTo EH
TextBox2.Text = Result.PhraseInfo.GetText
EH:
If Err.Number Then ShowErrMsg()
End Sub
Private Sub ShowErrMsg()
' Declare identifiers:
Const NL = vbNewLine
Dim T As String
T = "Desc: " & Err.Description & NL
T = T & "Err #: " & Err.Number
MsgBox(T, vbExclamation, "Run-Time Error")
End
End Sub
Private Function MakeWAVFileFromText(ByVal strText As String, ByVal strFName As String) As SpFileStream
On Error GoTo EH
' Declare identifiers:
Dim FileStream As SpFileStream
Dim Voice As SpVoice
' Instantiate Voice and FileStream objects:
Voice = New SpVoice
FileStream = New SpFileStream
' Open specified .wav file, set voice output
' to file, and speak synchronously:
FileStream.Open(strFName, SpeechStreamFileMode.SSFMCreateForWrite, True)
Voice.AudioOutputStream = FileStream
Voice.Speak(strText, SpeechVoiceSpeakFlags.SVSFIsXML)
' Close file and return reference to FileStream object:
FileStream.Close()
MakeWAVFileFromText = FileStream
EH:
If Err.Number Then ShowErrMsg()
End Function
End Class
' https://msdn.microsoft.com/en-us/library/ee125184%28v=vs.85%29.aspx
' https://msdn.microsoft.com/en-us/library/ee125344(v=vs.85).aspx
更新:所以这有效,但是流事件的结束不会触发,从而阻止 Application.Run 返回。作为一种解决方法,我可以使用一些 StopWatch 抖动来关闭所有内容,但显然这并不理想。请记住,我对 C# 还是很陌生,所以我的 cmets 可能不是 100% 准确的。
任何想法如何让结束流事件触发?
using SpeechLib;
using System;
using System.Windows.Forms;
namespace SAPITextFromVoice
class Program
// Initialize variables needed throughout this code
static ISpeechRecoGrammar grammar; // Declare the grammar
static SpFileStream FileStream; // Declare the voice recognition input file stream
static string AudioPath = null; // Declare directory path to wav file
static string GrammarPath = null; // Declare directory path to grammar file
static void Main(string[] args)
// Initialize string variable for storing the text of interest
string MyText = "the rain in spain";
// Store path to speech grammar XML file
//GrammarPath = @"C:\Reco\MyGrammar.xml";
// Store path to voice recognition input wav file
AudioPath = @"C:\Reco\MyAudio.wav";
TextToWav(AudioPath, MyText);
try // Attempt the following code
// Open the created wav in a new FileStream
FileStream = new SpFileStream(); // Create new instance of SpFileStream
FileStream.Open(AudioPath, SpeechStreamFileMode.SSFMOpenForRead, true); // Open the specified file in the FileStream for reading with events enabled
// Create speech recognizer and associated context
SpInprocRecognizer MyRecognizer = new SpInprocRecognizer(); // Create new instance of SpInprocRecognizer
SpInProcRecoContext RecoContext = (SpInProcRecoContext)MyRecognizer.CreateRecoContext(); // Initialize the SpInProcRecoContext (in-process recognition context)
// Set the voice recognition input as the FileStream
MyRecognizer.AudioInputStream = FileStream; // This will internally "speak" the wav file for input into the voice recognition engine
// Set up recognition event handling
RecoContext.Recognition += new _ISpeechRecoContextEvents_RecognitionEventHandler(RecoContext_Recognition); // Register for successful voice recognition events
RecoContext.FalseRecognition += new _ISpeechRecoContextEvents_FalseRecognitionEventHandler(RecoContext_FalseRecognition); // Register for failed (low confidence) voice recognition events
RecoContext.Hypothesis += new _ISpeechRecoContextEvents_HypothesisEventHandler(RecoContext_Hypothesis); // Register for voice recognition hypothesis events
RecoContext.EndStream += new _ISpeechRecoContextEvents_EndStreamEventHandler(RecoContext_EndStream); // Register for end of file stream events
// Set up the grammar
grammar = RecoContext.CreateGrammar(); // Initialize the grammar object
//grammar.CmdLoadFromFile(GrammarPath, SpeechLoadOption.SLODynamic); // Load custom XML grammar file
//grammar.CmdSetRuleIdState(0, SpeechRuleState.SGDSActive); // Activate the loaded grammar
grammar.DictationLoad("", SpeechLoadOption.SLOStatic); // Load blank dictation topic into the grammar
grammar.DictationSetState(SpeechRuleState.SGDSActive); // Activate dictation grammar
catch // Handle exceptions in above code
Console.WriteLine("Error during voice recognition setup");
return; // Stop executing the code
Application.Run(); // Starts a standard application message loop on the current thread
Console.WriteLine("done");
Console.ReadLine();
// Function for converting text to a voiced wav file via text-to-speech
public static bool TextToWav(string FilePath, string text)
try // Attempt the following code
if (System.IO.File.Exists(FilePath) == true) // Check if voice recognition wav file already exists
System.IO.File.Delete(FilePath); // Delete existing voice recognitoin wav file
SpFileStream stream = new SpFileStream(); // Create new SpFileStream instance
stream.Format.Type = SpeechAudioFormatType.SAFT48kHz16BitStereo; // Set the file stream audio format
stream.Open(FilePath, SpeechStreamFileMode.SSFMCreateForWrite, true); // Open the specified file for writing with events enabled
SpVoice voice = new SpVoice(); // Create new SPVoice instance
voice.Volume = 100; // Set the volume level of the text-to-speech voice
voice.Rate = -2; // Set the rate at which text is spoken by the text-to-speech engine
string NameAttribute = "Name = " + "Microsoft Anna";
voice.Voice = voice.GetVoices(NameAttribute).Item(0);
//voice.Speak(text);
voice.AudioOutputStream = stream; // Send the audio output to the file stream
voice.Speak(text, SpeechVoiceSpeakFlags.SVSFDefault); // Internally "speak" the inputted text (which records it in the wav file)
stream.Close(); // Close the file stream
return true; // Send "true" back to calling code line
catch // Handle exceptions in above code
Console.WriteLine("Error during wav file creation");
return false; // Send "false" back to calling code line
// Event handler for successful (higher confidence) voice recognition
public static void RecoContext_Recognition(int StreamNumber, object StreamPosition, SpeechRecognitionType RecognitionType, ISpeechRecoResult Result)
RecognitionProcessing(Result, true); // Process the voice recognition result
// Event handler for false (low confidence) voice recognition
public static void RecoContext_FalseRecognition(int StreamNumber, object StreamPosition, ISpeechRecoResult Result)
RecognitionProcessing(Result, false); // Process the voice recognition result
// Event handler for voice recognition hypotheses
public static void RecoContext_Hypothesis(int StreamNumber, object StreamPosition, ISpeechRecoResult Result)
float confidence = Result.PhraseInfo.Elements.Item(0).EngineConfidence;
Console.WriteLine(("Hypothesis = " + Result.PhraseInfo.GetText() + " (" + Decimal.Round(Convert.ToDecimal(confidence), (confidence > 0.01 ? 3 : 4)) + ")")); // Output info to console
// Event handler for reaching the end of an audio input stream
public static void RecoContext_EndStream(int StreamNumber, object StreamPosition, bool StreamReleased)
// Clean up now that voice recognition is complete
Console.WriteLine("--- END OF STREAM ---"); // Output info to the console
try // Attempt the following code
//grammar.CmdSetRuleIdState(0, SpeechRuleState.SGDSInactive); // Deactivate the loaded grammar
grammar.DictationSetState(SpeechRuleState.SGDSInactive); // Deactivate dictation grammar
FileStream.Close(); // Close the input FileStream
Application.ExitThread(); // Terminates the message loop on the current thread
catch // Handle exceptions in above code
Console.WriteLine("Error during cleanup process");
// Function for processing voice recognition results
public static void RecognitionProcessing(ISpeechRecoResult Result, bool RecoType)
try // Attempt the following code
string RecognizedText = Result.PhraseInfo.GetText().Trim(); // Store recognized text
float confidence = Result.PhraseInfo.Elements.Item(0).EngineConfidence; // Get confidence of voice recognition result
decimal RecognitionConfidence = Decimal.Round(Convert.ToDecimal(confidence), (confidence > 0.01 ? 3 : 4)); // Calculate confidence of voice recognition result convert to decimal, and round the result
Console.WriteLine((RecoType == false ? "false " : "") + "recognition = " + RecognizedText + " (" + RecognitionConfidence + ")"); // Output info to the console
GetPhonemes(Result); // Retrieve SAPI phonemes from recognized words
catch // Handle exceptions in above code
Console.WriteLine("Error during processing of recognition result");
// Function for extracting SAPI phonemes from voice recognition results
public static void GetPhonemes(ISpeechRecoResult Result)
try // Attempt the following code
SpPhoneConverter MyPhoneConverter = new SpPhoneConverter(); // Create new SPPhoneConverter instance
MyPhoneConverter.LanguageId = 1033; // Set the phone converter's language (English = 1033)
string SAPIPhonemesRaw = null; // Initialize string for storing raw SAPI phoneme data
string SAPIPhonemes = null; // Initialize string for storing delimited SAPI phoneme data
int i = 1; // Initialize integer for tracking phoneme count
foreach (ISpeechPhraseElement MyPhrase in Result.PhraseInfo.Elements) // Loop through each element of the recognized text
SAPIPhonemesRaw += " " + MyPhoneConverter.IdToPhone(MyPhrase.Pronunciation); // Build string of SAPI phonemes extracted from the recognized text
SAPIPhonemes += (i++ > 1 ? " - " : " ") + MyPhoneConverter.IdToPhone(MyPhrase.Pronunciation); // Build string of SAPI phonemes extracted from the recognized text, delimited by "-"
Console.WriteLine("Phonemes = " + SAPIPhonemes.Trim());
catch // Handle exceptions in above code
Console.WriteLine("Error during phoneme extraction");
【问题讨论】:
感谢@halfer 的编辑,感谢您关于避免无关内容的建议。 不用担心 Exergist,不客气。如果您有兴趣,可以在 Meta 上进行一些参考讨论,例如here 和 here。总之,看起来是个不错的Q! 另外,你遇到了什么运行时错误? 不是很明显这是如何发生的,System.Speech 命名空间中 SAPI 的 .NET 包装器非常好。上手容易多了。并获得帮助。 基本上我有一个语音识别应用程序,可以将 C# 代码作为宏的一部分运行。我创建了一组可以修改 Windows 语音识别字典(应用程序可以利用它来改进识别和文本到语音)的 C# 函数集合,据我所知,唯一的方法是直接通过 SAPI .我项目的最后一部分涉及识别口语短语并提取 SAPI 音素,然后将其用作字典的输入。我知道我可以使用 System.Speech 获得 IPA 发音,但不确定 SAPI。简而言之就是这个故事。 【参考方案1】:抱歉花了这么长时间,但查看您的代码我发现了几个可能的问题。
-
在激活识别器之前,您需要在识别器上设置输入流。一旦识别器激活,它将立即开始读取。更改活动识别器上的输入流将导致错误。
在将识别器设置为活动之前,您确实需要设置一个记录配置文件和记录引擎。我将为每种类型创建单独的
SpObjectTokenCategory
对象。
【讨论】:
感谢 Eric 的评论,我希望您能找到我的帖子 :) 我编辑的代码与您的上述答案相比如何? 再次感谢您的帮助埃里克!您似乎对语音识别和 SAPI 了解很多,我希望您可以快速查看我的相关问题 here。这是我项目的最后一部分,非常感谢您的反馈!【参考方案2】:我正在回过头来提供完整的解决方案,让我可以使用给定的单词,创建带有文本到语音的浊音文件流,然后提取该单词的 SAPI 音素。包含在我原来的问题的答案。此外,using SpeechLib
指的是 Interop.SpeechLib.dll,它是 (COM) Microsoft Speech Object Library v5.4。
请记住,此代码在另一个名为 VoiceAttack 的父应用程序中用作“内联函数”,因此代码的格式与您在 Visual Studio 中的预期略有不同。从这种格式转换到 Visual Studio 并不困难,希望其他人可以将此作为未来工作的跳板。
请注意,我是 C# 爱好者。该代码在功能和速度方面完全按照我的需要工作,但它可能不像某些人喜欢的那样“优化”,并且描述性 cmets 仅限于我可用的知识。我绝对愿意接受有关如何改进它的建议。
非常感谢 Eric Brown 的反馈!
using SpeechLib;
using System;
using System.IO;
using System.Threading;
using System.Windows.Forms;
class VAInline
// Initialize variables needed throughout this code
ISpeechRecoGrammar grammar; // Declare the grammar
SpFileStream FileStream; // Declare the voice recognition input file stream
string AudioPath = null; // Declare directory path to wav file
string GrammarPath = null; // Declare directory path to grammar file
string RecognitionFlag = "";
string RecognitionConfidence = "";
bool UseDictation; // Declare boolean variable for storing pronunciation dictation grammar setting
public void main()
// Reset relevant VoiceAttack text variables
VA.SetText("~~RecognitionError", null);
VA.SetText("~~RecognizedText", null);
VA.SetText("~~SAPIPhonemes", null);
VA.SetText("~~SAPIPhonemesRaw", null);
//VA.SetText("~~FalseRecognitionFlag", null);
// Retrieve the desired word data contained within VoiceAttack text variable
string ProcessText = null; // Initialize string variable for storing the text of interest
if (VA.GetText("~~ProcessText") != null) // Check if user provided valid text in input variable
ProcessText = VA.GetText("~~ProcessText"); // Store text of interest held by VA text variable
else
VA.SetText("~~RecognitionError", "Error in input text string (SAPI)"); // Send error detail back to VoiceAttack as text variable
return; // End code processing
// Retrieve path to speech grammar XML file from VoiceAttack
GrammarPath = VA.GetText("~~GrammarFilePath");
// Retrieve path to voice recognition input wav file from VoiceAttack
AudioPath = VA.GetText("~~AudioFilePath");
// Check if TTS engine is voicing the input for the speech recognition engine
if (VA.GetBoolean("~~UserVoiceInput") == false)
//VA.WriteToLog("creating wav file");
if (TextToWav(AudioPath, ProcessText) == false) // Create wav file with specified path that voices specified text (with text-to-speech) and check if the creation was NOT successful
return; // Stop executing the code
// Create speech recognizer and associated context
SpInprocRecognizer MyRecognizer = new SpInprocRecognizer(); // Create new instance of SpInprocRecognizer
SpInProcRecoContext RecoContext = (SpInProcRecoContext)MyRecognizer.CreateRecoContext(); // Initialize the SpInProcRecoContext (in-process recognition context)
try // Attempt the following code
// Open the created wav in a new FileStream
FileStream = new SpFileStream(); // Create new instance of SpFileStream
FileStream.Open(AudioPath, SpeechStreamFileMode.SSFMOpenForRead, true); // Open the specified file in the FileStream for reading with events enabled
// Set the voice recognition input as the FileStream
MyRecognizer.AudioInputStream = FileStream; // This will internally "speak" the wav file for input into the voice recognition engine
// Set up recognition event handling
RecoContext.Recognition += new _ISpeechRecoContextEvents_RecognitionEventHandler(RecoContext_Recognition); // Register for successful voice recognition events
RecoContext.FalseRecognition += new _ISpeechRecoContextEvents_FalseRecognitionEventHandler(RecoContext_FalseRecognition); // Register for failed (low confidence) voice recognition events
if (VA.GetBoolean("~~ShowRecognitionHypothesis") == true) // Check if user wants to show voice recognition hypothesis results
RecoContext.Hypothesis += new _ISpeechRecoContextEvents_HypothesisEventHandler(RecoContext_Hypothesis); // Register for voice recognition hypothesis events
RecoContext.EndStream += new _ISpeechRecoContextEvents_EndStreamEventHandler(RecoContext_EndStream); // Register for end of file stream events
// Set up the grammar
grammar = RecoContext.CreateGrammar(); // Initialize the grammar object
UseDictation = (bool?)VA.GetBoolean("~~UseDictation") ?? false; // Set UserDictation based on value from VoiceAttack boolean variable
if (UseDictation == true) // Check if pronunciation dictation grammar should be used with speech recognition
//grammar.DictationLoad("", SpeechLoadOption.SLOStatic); // Load blank dictation topic into the grammar
grammar.DictationLoad("Pronunciation", SpeechLoadOption.SLOStatic); // Load pronunciation dictation topic into the grammar so that the raw (unfiltered) phonemes may be retrieved
grammar.DictationSetState(SpeechRuleState.SGDSActive); // Activate dictation grammar
else
grammar.CmdLoadFromFile(GrammarPath, SpeechLoadOption.SLODynamic); // Load custom XML grammar file
grammar.CmdSetRuleIdState(0, SpeechRuleState.SGDSActive); // Activate the loaded grammar
Application.Run(); // Starts a standard application message loop on the current thread
catch // Handle exceptions in above code
VA.SetText("~~RecognitionError", "Error during voice recognition setup (SAPI)"); // Send error detail back to VoiceAttack as text variable
return; // Stop executing the code
finally // Runs whether an exception is encountered or not
MyRecognizer = null; // Set to null in preparation for garbage collection
FileStream.Close(); // Close the input FileStream
FileStream = null; // Set to null in preparation for garbage collection
// Close up recognition event handling
RecoContext.Recognition -= new _ISpeechRecoContextEvents_RecognitionEventHandler(RecoContext_Recognition); // Unregister for successful voice recognition events
RecoContext.FalseRecognition -= new _ISpeechRecoContextEvents_FalseRecognitionEventHandler(RecoContext_FalseRecognition); // Unregister for failed (low confidence) voice recognition events
if (VA.GetBoolean("~~ShowRecognitionHypothesis") == true) // Check if user wanted to show voice recognition hypothesis results
RecoContext.Hypothesis -= new _ISpeechRecoContextEvents_HypothesisEventHandler(RecoContext_Hypothesis); // Unregister for voice recognition hypothesis events
RecoContext.EndStream -= new _ISpeechRecoContextEvents_EndStreamEventHandler(RecoContext_EndStream); // Unregister for end of file stream events
RecoContext = null; // Set to null in preparation for garbage collection
//VA.WriteToLog("voice recognition complete"); // Output info to event log
// Function for converting text to a voiced wav file via text-to-speech
public bool TextToWav(string FilePath, string text)
//VA.WriteToLog("creating wav file"); // Output info to event log
SpFileStream stream = new SpFileStream(); // Create new SpFileStream instance
try // Attempt the following code
if (System.IO.File.Exists(FilePath) == true) // Check if voice recognition wav file already exists
System.IO.File.Delete(FilePath); // Delete existing voice recognition wav file
stream.Format.Type = SpeechAudioFormatType.SAFT48kHz16BitStereo; // Set the file stream audio format
stream.Open(FilePath, SpeechStreamFileMode.SSFMCreateForWrite, true); // Open the specified file for writing with events enabled
SpVoice voice = new SpVoice(); // Create new SPVoice instance
voice.Volume = 100; // Set the volume level of the text-to-speech voice
voice.Rate = -2; // Set the rate at which text is spoken by the text-to-speech engine
string NameAttribute = "Name = " + VA.GetText("~~TextToSpeechVoice");
voice.Voice = voice.GetVoices(NameAttribute).Item(0);
//voice.Speak(text);
voice.AudioOutputStream = stream; // Send the audio output to the file stream
voice.Speak(text, SpeechVoiceSpeakFlags.SVSFDefault); // Internally "speak" the inputted text (which records it in the wav file)
voice = null; // Set to null in preparation for garbage collection
catch // Handle exceptions in above code
VA.SetText("~~RecognitionError", "Error during wav file creation (SAPI)"); // Send error detail back to VoiceAttack as text variable
return false; // Send "false" back to calling code line
finally // Runs whether an exception is encountered or not
stream.Close(); // Close the file stream
stream = null; // Set to null in preparation for garbage collection
return true; // Send "true" back to calling code line
// Event handler for successful (higher confidence) voice recognition
public void RecoContext_Recognition(int StreamNumber, object StreamPosition, SpeechRecognitionType RecognitionType, ISpeechRecoResult Result)
//VA.WriteToLog("Recognition successful"); // Output info to event log
//VA.SetText("~~FalseRecognitionFlag", ""); // Send blank recognition flag ("") back to VoiceAttack as text variable
//RecognitionFlag = ""; // Set the RecognitionFlag as blank
RecognitionProcessing(Result); // Process the voice recognition result
//if (UseDictation == false) // Check if pronunciation dictation grammar should NOT be used with speech recognition
GetPhonemes(Result); // Retrieve SAPI phonemes from recognition result
// Event handler for unsuccessful (low confidence) voice recognition
public void RecoContext_FalseRecognition(int StreamNumber, object StreamPosition, ISpeechRecoResult Result)
//VA.WriteToLog("Low confidence recognition"); // Output info to event log
//VA.WriteToLog(Result.PhraseInfo.GetText());
//VA.SetText("~~FalseRecognitionFlag", "*"); // Send unsuccessful recognition flag (text character) back to VoiceAttack as text variable
RecognitionFlag = "*"; // Set the RecognitionFlag as "*"
RecognitionProcessing(Result); // Process the voice recognition result
GetPhonemes(Result); // Retrieve SAPI phonemes from recognition result
// Event handler for voice recognition hypotheses
public void RecoContext_Hypothesis(int StreamNumber, object StreamPosition, ISpeechRecoResult Result)
//VA.WriteToLog("Recognition hypothesis"); // Output info to event log
float confidence = Result.PhraseInfo.Elements.Item(0).EngineConfidence;
VA.WriteToLog("Hypothesis = " + Result.PhraseInfo.GetText() + " (" + Decimal.Round(Convert.ToDecimal(confidence), (confidence > 0.01 ? 3 : 4)) + ")"); // Output info to event log
// Event handler for reaching the end of an audio input stream
public void RecoContext_EndStream(int StreamNumber, object StreamPosition, bool StreamReleased)
// VA.WriteToLog("End of stream, cleaning up now"); // Output info to event log
// Clean up now that voice recognition is complete
try // Attempt the following code
if (UseDictation == true)
grammar.DictationSetState(SpeechRuleState.SGDSInactive); // Deactivate dictation grammar
else
grammar.CmdSetRuleIdState(0, SpeechRuleState.SGDSInactive); // Deactivate the loaded grammar
catch // Handle exceptions in above code
VA.SetText("~~RecognitionError", "Error during cleanup process (SAPI)"); // Send error detail back to VoiceAttack as text variable
finally // Runs whether an exception is encountered or not
Application.ExitThread(); // Terminates the message loop on the current thread
// Function for processing voice recognition results
public void RecognitionProcessing(ISpeechRecoResult Result)
//VA.WriteToLog("Processing recognition result"); // Output info to event log
try // Attempt the following code
string RecognizedText = Result.PhraseInfo.GetText().Trim(); // Store recognized text
float confidence = Result.PhraseInfo.Elements.Item(0).EngineConfidence; // Get confidence of voice recognition result
decimal RecognitionConfidenceScore = Decimal.Round(Convert.ToDecimal(confidence), (confidence > 0.01 ? 3 : 4)); // Calculate confidence of voice recognition result convert to decimal, and round the result
string RecognitionConfidenceLevel = Result.PhraseInfo.Elements.Item(0).ActualConfidence.ToString().Replace("SEC", "").Replace("Confidence", "");
VA.SetText("~~RecognizedText", RecognizedText); // Send recognized text back to VoiceAttack as text variable
//VA.SetText("~~RecognitionConfidenceLevel", RecognitionConfidenceLevel); // Send speech recognition confidence level back to VoiceAttack as text variable
//VA.SetDecimal("~~RecognitionConfidence", RecognitionConfidenceScore); // Send recognized confidence back to VoiceAttack as decimal variable
if (VA.GetBoolean("~~ShowConfidence") == true)
RecognitionConfidence = "(" + RecognitionConfidenceLevel + " @ " + RecognitionConfidenceScore.ToString() + ")" + RecognitionFlag;
//VA.SetText("~~RecognitionConfidence", RecognitionConfidenceLevel + " @ " + RecognitionConfidenceScore.ToString()); // Send speech recognition confidence data back to VoiceAttack as text variable
VA.SetText("~~RecognitionConfidence", RecognitionConfidence); // Send formatted speech recognition confidence data back to VoiceAttack as text variable
if (UseDictation == true) // Check if pronunciation dictation grammar should be used with speech recognition
RecognizedText = RecognizedText.Replace("hh", "h"); // Replace any instances of "hh" in recognized phonemes with "h"
VA.SetText("~~SAPIPhonemes", RecognizedText); // Send word-delimited SAPI phoneme data back to VoiceAttack as text variable
catch (Exception e) // Handle exceptions in above code
VA.WriteToLog(e.ToString());
VA.SetText("~~RecognitionError", "Error during processing of recognition result (SAPI)"); // Send error detail back to VoiceAttack as text variable
// Function for extracting SAPI phonemes from voice recognition results
public void GetPhonemes(ISpeechRecoResult Result)
//VA.WriteToLog("Extracting phonemes from voice recognition result"); // Output info to event log
try // Attempt the following code
SpPhoneConverter MyPhoneConverter = new SpPhoneConverter(); // Create new SPPhoneConverter instance
MyPhoneConverter.LanguageId = 1033; // Set the phone converter's language (English = 1033)
string SAPIPhonemesRaw = null; // Initialize string for storing raw SAPI phoneme data
string SAPIPhonemes = null; // Initialize string for storing delimited SAPI phoneme data
int i = 1; // Initialize integer for tracking phoneme count
string WordSeparator = " "; // Initialize string variable for storing the characters used to separate words within the phoneme result
if (VA.GetBoolean("~~SeparatePhonemes") == true) // Check if user wants to have the "-" character separate the words within the phoneme result
WordSeparator = " - "; // Redefine the WordSeparator
foreach (ISpeechPhraseElement MyPhrase in Result.PhraseInfo.Elements) // Loop through each element of the recognized text
if (MyPhrase.DisplayText != " ")
SAPIPhonemesRaw += " " + MyPhoneConverter.IdToPhone(MyPhrase.Pronunciation); // Build string of SAPI phonemes extracted from the recognized text
SAPIPhonemes += (i++ > 1 ? WordSeparator : " ") + MyPhoneConverter.IdToPhone(MyPhrase.Pronunciation); // Build string of SAPI phonemes extracted from the recognized text, delimited by " "
MyPhoneConverter = null; // Set to null in preparation for garbage collection
VA.SetText("~~SAPIPhonemesRaw", SAPIPhonemesRaw.Trim()); // Send raw SAPI phoneme data back to VoiceAttack as text variable
VA.SetText("~~SAPIPhonemes", SAPIPhonemes.Trim()); // Send word-delimited SAPI phoneme data back to VoiceAttack as text variable
catch // Handle exceptions in above code
VA.SetText("~~RecognitionError", "Error during phoneme extraction"); // Send error detail back to VoiceAttack as text variable
// References:
// https://github.com/rti7743/rtilabs/blob/master/files/asobiba/DictationFilter/DictationFilter/SpeechRecognitionRegexp.cs
// https://***.com/questions/6193874/help-with-sapi-v5-1-speechrecognitionengine-always-gives-same-wrong-result-with/6203533#6203533
// http://www.drdobbs.com/com-objects-c-and-the-microsoft-speech-a/184416575
// http://vbcity.com/forums/t/125150.aspx
// https://people.kth.se/~maguire/DEGREE-PROJECT-REPORTS/050702-Johan_Sverin-with-cover.pdf
// https://msdn.microsoft.com/en-us/library/ee125471(v=vs.85).aspx
// https://***.com/questions/20770593/speech-to-phoneme-in-net
【讨论】:
以上是关于如何在 C# 中使用 SpInprocRecoContext 识别语音事件?的主要内容,如果未能解决你的问题,请参考以下文章