一个基于Windows Vista speech API5 3以及WPF技术的语音识别代码

Posted wicnwicnwh

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了一个基于Windows Vista speech API5 3以及WPF技术的语音识别代码相关的知识,希望对你有一定的参考价值。

 

本人小试牛刀,试验了一下用c#.net3.0 WPF技术开发了一个语音识别程序,

 

 

windows.cs

 

using System;
using System.Collections.Generic;
using System.Text;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Shapes;

using System.Reflection;
using System.Windows.Threading;
using System.IO;
using System.Xml;
using System.Collections.ObjectModel;
using System.ComponentModel;

using System.Speech.Recognition;
using System.Speech.Recognition.SrgsGrammar;
using System.Speech.Synthesis;

namespace speechReco
{
    /// <summary>
    /// Interaction logic for Window1.xaml
    /// </summary>

    public partial class Window1 : System.Windows.Window
    {
        private SpeechRecognizer sharedRecognizer;
        private SpeechRecognitionEngine appRecognizer;
        private SrgsDocument sdCmnrules;

        public Window1()
        {
            InitializeComponent();

            sharedRecognizer = new SpeechRecognizer();
            sharedRecognizer.AudioLevelUpdated += new EventHandler<AudioLevelUpdatedEventArgs>(sharedRecognizer_AudioLevelUpdated);
            sharedRecognizer.AudiosignalProblemOccurred += new EventHandler<AudioSignalProblemOccurredEventArgs>(sharedRecognizer_AudioSignalProblemOccurred);
            sharedRecognizer.AudioStateChanged += new EventHandler<AudioStateChangedEventArgs>(sharedRecognizer_AudioStateChanged);
            sharedRecognizer.EmulateRecognizeCompleted += new EventHandler<EmulateRecognizeCompletedEventArgs>(sharedRecognizer_EmulateRecognizeCompleted);
            sharedRecognizer.LoadGrammarCompleted += new EventHandler<LoadGrammarCompletedEventArgs>(sharedRecognizer_LoadGrammarCompleted);
            sharedRecognizer.RecognizerUpdateReached += new EventHandler<RecognizerUpdateReachedEventArgs>(sharedRecognizer_RecognizerUpdateReached);
            sharedRecognizer.SpeechDetected += new EventHandler<SpeechDetectedEventArgs>(sharedRecognizer_SpeechDetected);
            sharedRecognizer.SpeechHypothesized += new EventHandler<SpeechHypothesizedEventArgs>(sharedRecognizer_SpeechHypothesized);
            sharedRecognizer.SpeechRecognitionRejected += new EventHandler<SpeechRecognitionRejectedEventArgs>(sharedRecognizer_SpeechRecognitionRejected);
            sharedRecognizer.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(sharedRecognizer_SpeechRecognized);
            sharedRecognizer.StateChanged += new EventHandler<System.Speech.Recognition.StateChangedEventArgs>(sharedRecognizer_StateChanged);

            //load SRGS library
            byte[] ba = speechReco.Properties.Resources.cmnrules;
            MemoryStream ms = new MemoryStream(ba);
            ms.Position = 0;
            XmlReader xr = XmlReader.Create(ms);
            sdCmnrules = new SrgsDocument(xr);
            //populate ComboBox
            foreach(SrgsRule rule in sdCmnrules.Rules)
            {
                if (rule.Scope == SrgsRuleScope.Public)
                {
                    cbRules.Items.Add(rule.Id);
                }
            }
            //default to integer rule
            cbRules.SelectedValue = "integer";
            cbRules.SelectionChanged += new SelectionChangedEventHandler(cbRules_SelectionChanged);

            this.btnSharedColor.Click += new RoutedEventHandler(btnSharedColor_Click);
            this.btnInProcColor.Click += new RoutedEventHandler(btnInProcColor_Click);
            this.btnTapDictation.PreviewMouseLeftButtonDown += new MouseButtonEventHandler(btnTapDictation_PreviewMouseLeftButtonDown);
            this.btnTapDictation.PreviewMouseLeftButtonUp += new MouseButtonEventHandler(btnTapDictation_PreviewMouseLeftButtonUp);
            this.btnSrgs.Click += new RoutedEventHandler(btnSrgs_Click);
            this.btnAdvGrammarBuilder.Click += new RoutedEventHandler(btnAdvGrammarBuilder_Click);
            this.btnWavFile.Click += new RoutedEventHandler(btnWavFile_Click);
            this.btnSynthPhonemes.Click += new RoutedEventHandler(btnSynthPhonemes_Click);
            this.btnEnable.Click += new RoutedEventHandler(btnEnable_Click);
            this.btnDisable.Click += new RoutedEventHandler(btnDisable_Click);
            this.btnUnload.Click += new RoutedEventHandler(btnUnload_Click);
            this.btnEmulate.Click += new RoutedEventHandler(btnEmulate_Click);
        }

        void btnEmulate_Click(object sender, RoutedEventArgs e)
        {
            //sharedRecognizer.EmulateRecognize("green");
            sharedRecognizer.EmulateRecognizeAsync("green");
            //sharedRecognizer.EmulateRecognize("stop listening");
        }

        void btnUnload_Click(object sender, RoutedEventArgs e)
        {
            sharedRecognizer.UnloadAllGrammars();
        }

        void btnDisable_Click(object sender, RoutedEventArgs e)
        {
            sharedRecognizer.Enabled = false;
        }

        void btnEnable_Click(object sender, RoutedEventArgs e)
        {
            sharedRecognizer.Enabled = true;
        }

        string recoPhonemes;
        void btnSynthPhonemes_Click(object sender, RoutedEventArgs e)
        {
            //this is a trick to figure out phonemes used by synthesis engine
           
            //txt to wav
            MemoryStream audioStream = new MemoryStream();
            SpeechSynthesizer synth = new SpeechSynthesizer();
            synth.SetOutputToWaveStream(audioStream);
            PromptBuilder pb = new PromptBuilder();
            pb.AppendBreak(PromptBreak.ExtraSmall); //‘e‘ wont be recognized if this is large, or non-existent?
            synth.Speak(pb);
            string textToSpeak = this.txtSynthTxt.Text.Trim();
            synth.Speak(textToSpeak);
            //synth.Speak(pb);
            synth.SetOutputToNull();
            audioStream.Position = 0;

            //now wav to txt (for reco phonemes)
            recoPhonemes = String.Empty;
            GrammarBuilder gb = new GrammarBuilder(textToSpeak);
            Grammar g = new Grammar(gb); //TODO the hard letters to recognize are ‘g‘ and ‘e‘
            SpeechRecognitionEngine reco = new SpeechRecognitionEngine();
            reco.SpeechHypothesized += new EventHandler<SpeechHypothesizedEventArgs>(reco_SpeechHypothesized);
            reco.SpeechRecognitionRejected += new EventHandler<SpeechRecognitionRejectedEventArgs>(reco_SpeechRecognitionRejected);
            reco.UnloadAllGrammars(); //only use the one word grammar
            reco.LoadGrammar(g);
            reco.SetInputToWaveStream(audioStream);
            RecognitionResult rr = reco.Recognize();
            reco.SetInputToNull();
            if (rr != null)
            {
                recoPhonemes = StringFromWordArray(rr.Words, WordType.Pronunciation);
            }
            txtRecoPho.Text = recoPhonemes;
        }

        void reco_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
        {
            recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);
        }

        void reco_SpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
        {
            recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);
        }

        void btnWavFile_Click(object sender, RoutedEventArgs e)
        {
            sharedRecognizer.Enabled = false;

            appRecognizer = new SpeechRecognitionEngine();
            appRecognizer.SetInputToWaveFile("spoken.wav");
            appRecognizer.LoadGrammar(new DictationGrammar());
            RecognitionResult rr = appRecognizer.Recognize();
            appRecognizer.SetInputToNull();
            if (rr == null)
            {
                MessageBox.Show("null result?");
            }
            else
            {
                //NOTE in-process recognir cannot send feedback to microphone bar
                //SpeechUI.SendTextFeedback(rr, rr.Text, true);

                //show phoneme result
                string phonemes = StringFromWordArray(rr.Words, WordType.Pronunciation);
                txtRecoPho.Text = phonemes;

                //show text result
                MessageBox.Show(rr.Text);               
            }
            appRecognizer.Dispose();
        }

        public enum WordType
        {
            Text,
            Normalized = Text,
            Lexical,
            Pronunciation
        }

        public static string StringFromWordArray(ReadOnlyCollection<RecognizedWordUnit> words, WordType type)
        {
            string text = "";
            foreach (RecognizedWordUnit word in words)
            {
                string wordText = "";
                if (type == WordType.Text || type == WordType.Normalized)
                {
                    wordText = word.Text;
                }
                else if (type == WordType.Lexical)
                {
                    wordText = word.LexicalForm;
                }
                else if (type == WordType.Pronunciation)
                {
                    wordText = word.Pronunciation;
                }
                else
                {
                    throw new InvalidEnumArgumentException(String.Format("[0}: is not a valid input", type));
                }
                //Use display attribute

                if ((word.DisplayAttributes & DisplayAttributes.OneTrailingSpace) != 0)
                {
                    wordText += " ";
                }
                if ((word.DisplayAttributes & DisplayAttributes.TwoTrailingSpaces) != 0)
                {
                    wordText += "  ";
                }
                if ((word.DisplayAttributes & DisplayAttributes.ConsumeLeadingSpaces) != 0)
                {
                    wordText = wordText.TrimStart();
                }
                if ((word.DisplayAttributes & DisplayAttributes.ZeroTrailingSpaces) != 0)
                {
                    wordText = wordText.TrimEnd();
                }

                text += wordText;

            }
            return text;
        }

        void btnAdvGrammarBuilder_Click(object sender, RoutedEventArgs e)
        {
            sharedRecognizer.Enabled = true;
            sharedRecognizer.UnloadAllGrammars();

            //from http://msdn.microsoft.com/msdnmag/issues/06/01/speechinWindowsVista/#S5
            //[I‘d like] a [<size>] [<crust>] [<topping>] pizza [please]

            //build the core set of choices
            Choices sizes = new Choices("small", "regular", "large");
            Choices crusts = new Choices("thin crust", "thick crust");
            Choices toppings = new Choices("vegetarian", "pepperoni", "cheese");

            SemanticResultKey srkSize = new SemanticResultKey("size", sizes.ToGrammarBuilder());
            SemanticResultKey srkCrust = new SemanticResultKey("crust", crusts.ToGrammarBuilder());
            SemanticResultKey srkTopping = new SemanticResultKey("topping", toppings.ToGrammarBuilder());
            SemanticResultValue srvSize = new SemanticResultValue(srkSize, "regular");
            SemanticResultValue srvCrust = new SemanticResultValue(srkCrust, "thick crust");

            //build the permutations of choices...
            //choose all three
            GrammarBuilder sizeCrustTopping = new GrammarBuilder();
            //sizeCrustTopping.AppendChoices(sizes, "size");
            //sizeCrustTopping.AppendChoices(crusts, "crust");
            //sizeCrustTopping.AppendChoices(toppings, "topping");
            sizeCrustTopping.Append(srkSize);
            sizeCrustTopping.Append(srkCrust);
            sizeCrustTopping.Append(srkTopping);

            //choose size and topping, and assume thick crust
            GrammarBuilder sizeAndTopping = new GrammarBuilder();
            //sizeAndTopping.AppendChoices(sizes, "size");
            //sizeAndTopping.AppendChoices(toppings, "topping");
            //sizeAndTopping.AppendResultKeyValue("crust", "thick crust");
            sizeAndTopping.Append(srkSize);
            sizeAndTopping.Append(srkTopping);
            //TODO how to set default semantic value for "crust"?
            //sizeAndTopping.Append(srvCrust);
            //sizeAndTopping.Append(new SemanticResultValue(crusts.ToGrammarBuilder(), "thick crust"));
            //sizeAndTopping.Append(new SemanticResultValue("crust", "thick crust"));
            //sizeAndTopping.Append(new SemanticResultValue("thick crust"));
            //sizeAndTopping.Append(new SemanticResultKey("crust", "thick crust"));

            //choose topping only, and assume the rest
            GrammarBuilder toppingOnly = new GrammarBuilder();
            //toppingOnly.AppendChoices(toppings, "topping");
            //toppingOnly.AppendResultKeyValue("size", "regular");
            //toppingOnly.AppendResultKeyValue("crust", "thick crust");
            toppingOnly.Append(srkTopping);
            //TODO how to set default semantic value for "size" and "crust"?
            //toppingOnly.Append(srvSize);
            //toppingOnly.Append(srvCrust);
            //toppingOnly.Append(new SemanticResultKey("size", "regular"));
            //toppingOnly.Append(new SemanticResultKey("crust", "thick crust"));

            //assemble the permutations
            Choices permutations = new Choices();
            permutations.Add(sizeCrustTopping);
            permutations.Add(sizeAndTopping);
            permutations.Add(toppingOnly);

            //now build the complete pattern...
            GrammarBuilder pizzaRequest = new GrammarBuilder();
            //pre-amble "[I‘d like] a"
            pizzaRequest.Append(new Choices("I‘d like a", "a"));
            //permutations "[<size>] [<crust>] [<topping>]"
            pizzaRequest.Append(permutations);
            //post-amble "pizza [please]"
            pizzaRequest.Append(new Choices("pizza", "pizza please"));
            string debug = pizzaRequest.DebugShowPhrases;

            //create the pizza grammar
            Grammar pizzaGrammar = new Grammar(pizzaRequest);

            //attach the event handler
            pizzaGrammar.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(pizzaGrammar_SpeechRecognized);

            //load the grammar into the recognizer
            sharedRecognizer.LoadGrammar(pizzaGrammar);

        }

        void pizzaGrammar_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
        {
            StringBuilder resultString = new StringBuilder();
            resultString.Append("Raw text result: ");
            resultString.AppendLine(e.Result.Text);
            resultString.Append("Size: ");
            resultString.AppendLine(e.Result.Semantics["size"].Value.ToString());
            resultString.Append("Crust: ");
            resultString.AppendLine(e.Result.Semantics["crust"].Value.ToString());
            resultString.Append("Topping: ");
            resultString.AppendLine(
                e.Result.Semantics["topping"].Value.ToString());
            MessageBox.Show(resultString.ToString());
        }

        void cbRules_SelectionChanged(object sender, SelectionChangedEventArgs e)
        {
            //TODO
        }

        void btnSrgs_Click(object sender, RoutedEventArgs e)
        {
            sharedRecognizer.Enabled = true;
            sharedRecognizer.UnloadAllGrammars();

            string ruleName = (string) cbRules.SelectedValue;
            //SrgsRule rule = sdCmnrules.Rules[ruleName];

            Grammar grammarSrgs = new Grammar(sdCmnrules, ruleName);
            grammarSrgs.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(grammarSrgs_SpeechRecognized);

            sharedRecognizer.LoadGrammar(grammarSrgs);
            MessageBox.Show("listening for user input based on the selected rule : " + ruleName);
        }

        void grammarSrgs_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
        {
            //send text to microphone bar
            SpeechUI.SendTextFeedback(e.Result, e.Result.Text, true);
            //send actual numeric value to TextBox on form
            if (e.Result.Semantics.Value != null)
            {
                this.Dispatcher.Invoke(DispatcherPriority.Render, new UpdateTxtRecoDelegate(UpdateTextReco), e.Result.Semantics.Value.ToString());
            }
        }

        void btnTapDictation_PreviewMouseLeftButtonDown(object sender, MouseButtonEventArgs e)
        {
            sharedRecognizer.Enabled = false;

            dictationResult = String.Empty;
            appRecognizer = new SpeechRecognitionEngine();
            appRecognizer.SetInputToDefaultAudioDevice();
            appRecognizer.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(appRecognizer_SpeechRecognized);
            DictationGrammar dg;
            if (cbSpelling.IsChecked == false)
            {
                dg = new DictationGrammar();
            }
            else
            {
                dg = new DictationGrammar("grammar:dictation#spelling");
            }
            appRecognizer.LoadGrammar(dg);
            appRecognizer.RecognizeAsync(RecognizeMode.Multiple);
        }

        string dictationResult;
        void appRecognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
        {
            //on UI thread
            dictationResult += e.Result.Text;
            txtReco.Text = dictationResult;
        }

        void btnTapDictation_PreviewMouseLeftButtonUp(object sender, MouseButtonEventArgs e)
        {
            appRecognizer.RecognizeAsyncStop();
            appRecognizer.Dispose();
        }

        void btnInProcColor_Click(object sender, RoutedEventArgs e)
        {
            sharedRecognizer.Enabled = false;

            Choices cColor = GetColorChoices();

            GrammarBuilder gb = new GrammarBuilder(cColor);
            Grammar grammarColors = new Grammar(gb);
            grammarColors.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(grammarColors_SpeechRecognized);

            appRecognizer = new SpeechRecognitionEngine();
            appRecognizer.SetInputToDefaultAudioDevice();
            appRecognizer.LoadGrammar(grammarColors);
            appRecognizer.LoadGrammar(new DictationGrammar());
            appRecognizer.RecognizeAsync(RecognizeMode.Multiple);

            MessageBox.Show("listening for you to say a color (e.g. Green)");
        }

        private Choices GetColorChoices()
        {
            //build a grammar list of colors
            Choices cColor = new Choices();

            Type t = typeof(Colors);
            MemberInfo[] mia = t.GetMembers(BindingFlags.Public | BindingFlags.Static);
            foreach (MemberInfo mi in mia)
            {
                if (mi.Name.StartsWith("get_") == true)
                    continue;
                cColor.Add(mi.Name);
            }

            return cColor;
        }

        void btnSharedColor_Click(object sender, RoutedEventArgs e)
        {
            sharedRecognizer.Enabled = true;
            sharedRecognizer.UnloadAllGrammars();

            Choices cColor = GetColorChoices();

            GrammarBuilder gb = new GrammarBuilder(cColor);
            Grammar grammarColors = new Grammar(gb);
            grammarColors.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(grammarColors_SpeechRecognized);

            sharedRecognizer.LoadGrammar(grammarColors);
            MessageBox.Show("listening for you to say a color (e.g. Green)");
        }

        void grammarColors_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
        {
            //not on UI thread
            //txtReco.Text = e.Result.Text;
            //need to use Dispatcher to get back on UI thread

            //TODO cannot convert from ‘anonymous method‘ to ‘System.Delegate‘ ... WTF?
            //this.Dispatcher.Invoke(DispatcherPriority.Render,
            //    delegate()
            //    {
            //        txtReco.Text = e.Result.Text;
            //    });

            //http://romanski.livejournal.com/1761.html
            this.Dispatcher.Invoke(DispatcherPriority.Render,
            (System.Windows.Forms.MethodInvoker) delegate
            {
                txtReco.Text = e.Result.Text;
            });
           
            //this.Dispatcher.Invoke(DispatcherPriority.Render, new UpdateTxtRecoDelegate(UpdateTextReco), e.Result.Text);
        }
       
        delegate void UpdateTxtRecoDelegate(string arg);
        public void UpdateTextReco(string arg)
        {
            txtReco.Text = arg;
        }


        #region SHARED_RECOGNIZER_EVENTS
        void sharedRecognizer_StateChanged(object sender, System.Speech.Recognition.StateChangedEventArgs e)
        {
            System.Console.WriteLine("StateChanged : " + e.RecognizerState.ToString());
        }

        void sharedRecognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
        {
            //on UI thread
            System.Console.WriteLine("SpeechRecognized : " + e.Result.Text);
            //txtReco.Text = e.Result.Text;
        }

        void sharedRecognizer_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
        {
            System.Console.WriteLine("SpeechRecognitionRejected : " + e.Result.Text);
        }

        void sharedRecognizer_SpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
        {
            System.Console.WriteLine("SpeechHypothesized : " + e.Result.Text);
        }

        void sharedRecognizer_SpeechDetected(object sender, SpeechDetectedEventArgs e)
        {
            System.Console.WriteLine("SpeechDetected : " + e.AudioPosition.TotalMilliseconds.ToString());
        }

        void sharedRecognizer_RecognizerUpdateReached(object sender, RecognizerUpdateReachedEventArgs e)
        {
            System.Console.WriteLine("RecognizerUpdateReached : " + e.AudioPosition.TotalMilliseconds.ToString());
        }

        void sharedRecognizer_LoadGrammarCompleted(object sender, LoadGrammarCompletedEventArgs e)
        {
            System.Console.WriteLine("LoadGrammarCompleted : " + e.Grammar.Name);
        }

        void sharedRecognizer_EmulateRecognizeCompleted(object sender, EmulateRecognizeCompletedEventArgs e)
        {
            if (e.Result != null)
            {
                System.Console.WriteLine("EmulateRecognizeCompleted : " + e.Result.Text);
            }
            else
            {
                System.Console.WriteLine("EmulateRecognizeCompleted : null result");
            }
        }

        void sharedRecognizer_AudioStateChanged(object sender, AudioStateChangedEventArgs e)
        {
            System.Console.WriteLine("AudioStateChanged : " + e.AudioState.ToString());
        }

        void sharedRecognizer_AudioSignalProblemOccurred(object sender, AudioSignalProblemOccurredEventArgs e)
        {
            System.Console.WriteLine("AudioSignalProblemOccurred : " + e.AudioSignalProblem.ToString());
        }

        void sharedRecognizer_AudioLevelUpdated(object sender, AudioLevelUpdatedEventArgs e)
        {
            //System.Console.WriteLine("AudioLevelUpdated : " + e.AudioLevel.ToString());
        }
        #endregion

    }
}

需要的留下Email,我给大家发

再分享一下我老师大神的人工智能教程吧。零基础!通俗易懂!风趣幽默!还带黄段子!希望你也加入到我们人工智能的队伍中来!https://blog.csdn.net/jiangjunshow

以上是关于一个基于Windows Vista speech API5 3以及WPF技术的语音识别代码的主要内容,如果未能解决你的问题,请参考以下文章

XP和VISTA区别?

使用 Vista/7 的 OpenGL 最多 32 个屏幕窗口

xp系统和vista有啥区别

在 Windows 2008/7/Vista 中使 Windows 服务蜂鸣

Windows Vista 与Windows Xp的区别

Vista 和 Windows 7 中的 OLEDB JET 错误,而不是 XP