`

一个基于Windows Vista speech API5.3以及WPF技术的语音识别代码

 
阅读更多

本人小试牛刀,试验了一下用c#.net3.0 WPF技术开发了一个语音识别程序,

windows.cs

using System;
using System.Collections.Generic;
using System.Text;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Shapes;

using System.Reflection;
using System.Windows.Threading;
using System.IO;
using System.Xml;
using System.Collections.ObjectModel;
using System.ComponentModel;

using System.Speech.Recognition;
using System.Speech.Recognition.SrgsGrammar;
using System.Speech.Synthesis;

namespace speechReco
{
/// <summary>
/// Interaction logic for Window1.xaml
/// </summary>

public partial class Window1 : System.Windows.Window
{
private SpeechRecognizer sharedRecognizer;
private SpeechRecognitionEngine appRecognizer;
private SrgsDocument sdCmnrules;

public Window1()
{
InitializeComponent();

sharedRecognizer = new SpeechRecognizer();
sharedRecognizer.AudioLevelUpdated += new EventHandler<AudioLevelUpdatedEventArgs>(sharedRecognizer_AudioLevelUpdated);
sharedRecognizer.AudioSignalProblemOccurred += new EventHandler<AudioSignalProblemOccurredEventArgs>(sharedRecognizer_AudioSignalProblemOccurred);
sharedRecognizer.AudioStateChanged += new EventHandler<AudioStateChangedEventArgs>(sharedRecognizer_AudioStateChanged);
sharedRecognizer.EmulateRecognizeCompleted += new EventHandler<EmulateRecognizeCompletedEventArgs>(sharedRecognizer_EmulateRecognizeCompleted);
sharedRecognizer.LoadGrammarCompleted += new EventHandler<LoadGrammarCompletedEventArgs>(sharedRecognizer_LoadGrammarCompleted);
sharedRecognizer.RecognizerUpdateReached += new EventHandler<RecognizerUpdateReachedEventArgs>(sharedRecognizer_RecognizerUpdateReached);
sharedRecognizer.SpeechDetected += new EventHandler<SpeechDetectedEventArgs>(sharedRecognizer_SpeechDetected);
sharedRecognizer.SpeechHypothesized += new EventHandler<SpeechHypothesizedEventArgs>(sharedRecognizer_SpeechHypothesized);
sharedRecognizer.SpeechRecognitionRejected += new EventHandler<SpeechRecognitionRejectedEventArgs>(sharedRecognizer_SpeechRecognitionRejected);
sharedRecognizer.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(sharedRecognizer_SpeechRecognized);
sharedRecognizer.StateChanged += new EventHandler<System.Speech.Recognition.StateChangedEventArgs>(sharedRecognizer_StateChanged);

//load SRGS library
byte[] ba = speechReco.Properties.Resources.cmnrules;
MemoryStream ms = new MemoryStream(ba);
ms.Position = 0;
XmlReader xr = XmlReader.Create(ms);
sdCmnrules = new SrgsDocument(xr);
//populate ComboBox
foreach(SrgsRule rule in sdCmnrules.Rules)
{
if (rule.Scope == SrgsRuleScope.Public)
{
cbRules.Items.Add(rule.Id);
}
}
//default to integer rule
cbRules.SelectedValue = "integer";
cbRules.SelectionChanged += new SelectionChangedEventHandler(cbRules_SelectionChanged);

this.btnSharedColor.Click += new RoutedEventHandler(btnSharedColor_Click);
this.btnInProcColor.Click += new RoutedEventHandler(btnInProcColor_Click);
this.btnTapDictation.PreviewMouseLeftButtonDown += new MouseButtonEventHandler(btnTapDictation_PreviewMouseLeftButtonDown);
this.btnTapDictation.PreviewMouseLeftButtonUp += new MouseButtonEventHandler(btnTapDictation_PreviewMouseLeftButtonUp);
this.btnSrgs.Click += new RoutedEventHandler(btnSrgs_Click);
this.btnAdvGrammarBuilder.Click += new RoutedEventHandler(btnAdvGrammarBuilder_Click);
this.btnWavFile.Click += new RoutedEventHandler(btnWavFile_Click);
this.btnSynthPhonemes.Click += new RoutedEventHandler(btnSynthPhonemes_Click);
this.btnEnable.Click += new RoutedEventHandler(btnEnable_Click);
this.btnDisable.Click += new RoutedEventHandler(btnDisable_Click);
this.btnUnload.Click += new RoutedEventHandler(btnUnload_Click);
this.btnEmulate.Click += new RoutedEventHandler(btnEmulate_Click);
}

void btnEmulate_Click(object sender, RoutedEventArgs e)
{
//sharedRecognizer.EmulateRecognize("green");
sharedRecognizer.EmulateRecognizeAsync("green");
//sharedRecognizer.EmulateRecognize("stop listening");
}

void btnUnload_Click(object sender, RoutedEventArgs e)
{
sharedRecognizer.UnloadAllGrammars();
}

void btnDisable_Click(object sender, RoutedEventArgs e)
{
sharedRecognizer.Enabled = false;
}

void btnEnable_Click(object sender, RoutedEventArgs e)
{
sharedRecognizer.Enabled = true;
}

string recoPhonemes;
void btnSynthPhonemes_Click(object sender, RoutedEventArgs e)
{
//this is a trick to figure out phonemes used by synthesis engine

//txt to wav
MemoryStream audioStream = new MemoryStream();
SpeechSynthesizer synth = new SpeechSynthesizer();
synth.SetOutputToWaveStream(audioStream);
PromptBuilder pb = new PromptBuilder();
pb.AppendBreak(PromptBreak.ExtraSmall); //'e' wont be recognized if this is large, or non-existent?
synth.Speak(pb);
string textToSpeak = this.txtSynthTxt.Text.Trim();
synth.Speak(textToSpeak);
//synth.Speak(pb);
synth.SetOutputToNull();
audioStream.Position = 0;

//now wav to txt (for reco phonemes)
recoPhonemes = String.Empty;
GrammarBuilder gb = new GrammarBuilder(textToSpeak);
Grammar g = new Grammar(gb); //TODO the hard letters to recognize are 'g' and 'e'
SpeechRecognitionEngine reco = new SpeechRecognitionEngine();
reco.SpeechHypothesized += new EventHandler<SpeechHypothesizedEventArgs>(reco_SpeechHypothesized);
reco.SpeechRecognitionRejected += new EventHandler<SpeechRecognitionRejectedEventArgs>(reco_SpeechRecognitionRejected);
reco.UnloadAllGrammars(); //only use the one word grammar
reco.LoadGrammar(g);
reco.SetInputToWaveStream(audioStream);
RecognitionResult rr = reco.Recognize();
reco.SetInputToNull();
if (rr != null)
{
recoPhonemes = StringFromWordArray(rr.Words, WordType.Pronunciation);
}
txtRecoPho.Text = recoPhonemes;
}

void reco_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
{
recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);
}

void reco_SpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
{
recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);
}

void btnWavFile_Click(object sender, RoutedEventArgs e)
{
sharedRecognizer.Enabled = false;

appRecognizer = new SpeechRecognitionEngine();
appRecognizer.SetInputToWaveFile("spoken.wav");
appRecognizer.LoadGrammar(new DictationGrammar());
RecognitionResult rr = appRecognizer.Recognize();
appRecognizer.SetInputToNull();
if (rr == null)
{
MessageBox.Show("null result?");
}
else
{
//NOTE in-process recognir cannot send feedback to microphone bar
//SpeechUI.SendTextFeedback(rr, rr.Text, true);

//show phoneme result
string phonemes = StringFromWordArray(rr.Words, WordType.Pronunciation);
txtRecoPho.Text = phonemes;

//show text result
MessageBox.Show(rr.Text);
}
appRecognizer.Dispose();
}

public enum WordType
{
Text,
Normalized = Text,
Lexical,
Pronunciation
}

public static string StringFromWordArray(ReadOnlyCollection<RecognizedWordUnit> words, WordType type)
{
string text = "";
foreach (RecognizedWordUnit word in words)
{
string wordText = "";
if (type == WordType.Text || type == WordType.Normalized)
{
wordText = word.Text;
}
else if (type == WordType.Lexical)
{
wordText = word.LexicalForm;
}
else if (type == WordType.Pronunciation)
{
wordText = word.Pronunciation;
}
else
{
throw new InvalidEnumArgumentException(String.Format("[0}: is not a valid input", type));
}
//Use display attribute

if ((word.DisplayAttributes & DisplayAttributes.OneTrailingSpace) != 0)
{
wordText += " ";
}
if ((word.DisplayAttributes & DisplayAttributes.TwoTrailingSpaces) != 0)
{
wordText += " ";
}
if ((word.DisplayAttributes & DisplayAttributes.ConsumeLeadingSpaces) != 0)
{
wordText = wordText.TrimStart();
}
if ((word.DisplayAttributes & DisplayAttributes.ZeroTrailingSpaces) != 0)
{
wordText = wordText.TrimEnd();
}

text += wordText;

}
return text;
}

void btnAdvGrammarBuilder_Click(object sender, RoutedEventArgs e)
{
sharedRecognizer.Enabled = true;
sharedRecognizer.UnloadAllGrammars();

//from http://msdn.microsoft.com/msdnmag/issues/06/01/speechinWindowsVista/#S5
//[I'd like] a [<size>] [<crust>] [<topping>] pizza [please]

//build the core set of choices
Choices sizes = new Choices("small", "regular", "large");
Choices crusts = new Choices("thin crust", "thick crust");
Choices toppings = new Choices("vegetarian", "pepperoni", "cheese");

SemanticResultKey srkSize = new SemanticResultKey("size", sizes.ToGrammarBuilder());
SemanticResultKey srkCrust = new SemanticResultKey("crust", crusts.ToGrammarBuilder());
SemanticResultKey srkTopping = new SemanticResultKey("topping", toppings.ToGrammarBuilder());
SemanticResultValue srvSize = new SemanticResultValue(srkSize, "regular");
SemanticResultValue srvCrust = new SemanticResultValue(srkCrust, "thick crust");

//build the permutations of choices...
//choose all three
GrammarBuilder sizeCrustTopping = new GrammarBuilder();
//sizeCrustTopping.AppendChoices(sizes, "size");
//sizeCrustTopping.AppendChoices(crusts, "crust");
//sizeCrustTopping.AppendChoices(toppings, "topping");
sizeCrustTopping.Append(srkSize);
sizeCrustTopping.Append(srkCrust);
sizeCrustTopping.Append(srkTopping);

//choose size and topping, and assume thick crust
GrammarBuilder sizeAndTopping = new GrammarBuilder();
//sizeAndTopping.AppendChoices(sizes, "size");
//sizeAndTopping.AppendChoices(toppings, "topping");
//sizeAndTopping.AppendResultKeyValue("crust", "thick crust");
sizeAndTopping.Append(srkSize);
sizeAndTopping.Append(srkTopping);
//TODO how to set default semantic value for "crust"?
//sizeAndTopping.Append(srvCrust);
//sizeAndTopping.Append(new SemanticResultValue(crusts.ToGrammarBuilder(), "thick crust"));
//sizeAndTopping.Append(new SemanticResultValue("crust", "thick crust"));
//sizeAndTopping.Append(new SemanticResultValue("thick crust"));
//sizeAndTopping.Append(new SemanticResultKey("crust", "thick crust"));

//choose topping only, and assume the rest
GrammarBuilder toppingOnly = new GrammarBuilder();
//toppingOnly.AppendChoices(toppings, "topping");
//toppingOnly.AppendResultKeyValue("size", "regular");
//toppingOnly.AppendResultKeyValue("crust", "thick crust");
toppingOnly.Append(srkTopping);
//TODO how to set default semantic value for "size" and "crust"?
//toppingOnly.Append(srvSize);
//toppingOnly.Append(srvCrust);
//toppingOnly.Append(new SemanticResultKey("size", "regular"));
//toppingOnly.Append(new SemanticResultKey("crust", "thick crust"));

//assemble the permutations
Choices permutations = new Choices();
permutations.Add(sizeCrustTopping);
permutations.Add(sizeAndTopping);
permutations.Add(toppingOnly);

//now build the complete pattern...
GrammarBuilder pizzaRequest = new GrammarBuilder();
//pre-amble "[I'd like] a"
pizzaRequest.Append(new Choices("I'd like a", "a"));
//permutations "[<size>] [<crust>] [<topping>]"
pizzaRequest.Append(permutations);
//post-amble "pizza [please]"
pizzaRequest.Append(new Choices("pizza", "pizza please"));
string debug = pizzaRequest.DebugShowPhrases;

//create the pizza grammar
Grammar pizzaGrammar = new Grammar(pizzaRequest);

//attach the event handler
pizzaGrammar.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(pizzaGrammar_SpeechRecognized);

//load the grammar into the recognizer
sharedRecognizer.LoadGrammar(pizzaGrammar);

}

void pizzaGrammar_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
StringBuilder resultString = new StringBuilder();
resultString.Append("Raw text result: ");
resultString.AppendLine(e.Result.Text);
resultString.Append("Size: ");
resultString.AppendLine(e.Result.Semantics["size"].Value.ToString());
resultString.Append("Crust: ");
resultString.AppendLine(e.Result.Semantics["crust"].Value.ToString());
resultString.Append("Topping: ");
resultString.AppendLine(
e.Result.Semantics["topping"].Value.ToString());
MessageBox.Show(resultString.ToString());
}

void cbRules_SelectionChanged(object sender, SelectionChangedEventArgs e)
{
//TODO
}

void btnSrgs_Click(object sender, RoutedEventArgs e)
{
sharedRecognizer.Enabled = true;
sharedRecognizer.UnloadAllGrammars();

string ruleName = (string) cbRules.SelectedValue;
//SrgsRule rule = sdCmnrules.Rules[ruleName];

Grammar grammarSrgs = new Grammar(sdCmnrules, ruleName);
grammarSrgs.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(grammarSrgs_SpeechRecognized);

sharedRecognizer.LoadGrammar(grammarSrgs);
MessageBox.Show("listening for user input based on the selected rule : " + ruleName);
}

void grammarSrgs_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
//send text to microphone bar
SpeechUI.SendTextFeedback(e.Result, e.Result.Text, true);
//send actual numeric value to TextBox on form
if (e.Result.Semantics.Value != null)
{
this.Dispatcher.Invoke(DispatcherPriority.Render, new UpdateTxtRecoDelegate(UpdateTextReco), e.Result.Semantics.Value.ToString());
}
}

void btnTapDictation_PreviewMouseLeftButtonDown(object sender, MouseButtonEventArgs e)
{
sharedRecognizer.Enabled = false;

dictationResult = String.Empty;
appRecognizer = new SpeechRecognitionEngine();
appRecognizer.SetInputToDefaultAudioDevice();
appRecognizer.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(appRecognizer_SpeechRecognized);
DictationGrammar dg;
if (cbSpelling.IsChecked == false)
{
dg = new DictationGrammar();
}
else
{
dg = new DictationGrammar("grammar:dictation#spelling");
}
appRecognizer.LoadGrammar(dg);
appRecognizer.RecognizeAsync(RecognizeMode.Multiple);
}

string dictationResult;
void appRecognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
//on UI thread
dictationResult += e.Result.Text;
txtReco.Text = dictationResult;
}

void btnTapDictation_PreviewMouseLeftButtonUp(object sender, MouseButtonEventArgs e)
{
appRecognizer.RecognizeAsyncStop();
appRecognizer.Dispose();
}

void btnInProcColor_Click(object sender, RoutedEventArgs e)
{
sharedRecognizer.Enabled = false;

Choices cColor = GetColorChoices();

GrammarBuilder gb = new GrammarBuilder(cColor);
Grammar grammarColors = new Grammar(gb);
grammarColors.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(grammarColors_SpeechRecognized);

appRecognizer = new SpeechRecognitionEngine();
appRecognizer.SetInputToDefaultAudioDevice();
appRecognizer.LoadGrammar(grammarColors);
appRecognizer.LoadGrammar(new DictationGrammar());
appRecognizer.RecognizeAsync(RecognizeMode.Multiple);

MessageBox.Show("listening for you to say a color (e.g. Green)");
}

private Choices GetColorChoices()
{
//build a grammar list of colors
Choices cColor = new Choices();

Type t = typeof(Colors);
MemberInfo[] mia = t.GetMembers(BindingFlags.Public | BindingFlags.Static);
foreach (MemberInfo mi in mia)
{
if (mi.Name.StartsWith("get_") == true)
continue;
cColor.Add(mi.Name);
}

return cColor;
}

void btnSharedColor_Click(object sender, RoutedEventArgs e)
{
sharedRecognizer.Enabled = true;
sharedRecognizer.UnloadAllGrammars();

Choices cColor = GetColorChoices();

GrammarBuilder gb = new GrammarBuilder(cColor);
Grammar grammarColors = new Grammar(gb);
grammarColors.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(grammarColors_SpeechRecognized);

sharedRecognizer.LoadGrammar(grammarColors);
MessageBox.Show("listening for you to say a color (e.g. Green)");
}

void grammarColors_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
//not on UI thread
//txtReco.Text = e.Result.Text;
//need to use Dispatcher to get back on UI thread

//TODO cannot convert from 'anonymous method' to 'System.Delegate' ... WTF?
//this.Dispatcher.Invoke(DispatcherPriority.Render,
// delegate()
// {
// txtReco.Text = e.Result.Text;
// });

//http://romanski.livejournal.com/1761.html
this.Dispatcher.Invoke(DispatcherPriority.Render,
(System.Windows.Forms.MethodInvoker) delegate
{
txtReco.Text = e.Result.Text;
});

//this.Dispatcher.Invoke(DispatcherPriority.Render, new UpdateTxtRecoDelegate(UpdateTextReco), e.Result.Text);
}

delegate void UpdateTxtRecoDelegate(string arg);
public void UpdateTextReco(string arg)
{
txtReco.Text = arg;
}


#region SHARED_RECOGNIZER_EVENTS
void sharedRecognizer_StateChanged(object sender, System.Speech.Recognition.StateChangedEventArgs e)
{
System.Console.WriteLine("StateChanged : " + e.RecognizerState.ToString());
}

void sharedRecognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
//on UI thread
System.Console.WriteLine("SpeechRecognized : " + e.Result.Text);
//txtReco.Text = e.Result.Text;
}

void sharedRecognizer_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
{
System.Console.WriteLine("SpeechRecognitionRejected : " + e.Result.Text);
}

void sharedRecognizer_SpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
{
System.Console.WriteLine("SpeechHypothesized : " + e.Result.Text);
}

void sharedRecognizer_SpeechDetected(object sender, SpeechDetectedEventArgs e)
{
System.Console.WriteLine("SpeechDetected : " + e.AudioPosition.TotalMilliseconds.ToString());
}

void sharedRecognizer_RecognizerUpdateReached(object sender, RecognizerUpdateReachedEventArgs e)
{
System.Console.WriteLine("RecognizerUpdateReached : " + e.AudioPosition.TotalMilliseconds.ToString());
}

void sharedRecognizer_LoadGrammarCompleted(object sender, LoadGrammarCompletedEventArgs e)
{
System.Console.WriteLine("LoadGrammarCompleted : " + e.Grammar.Name);
}

void sharedRecognizer_EmulateRecognizeCompleted(object sender, EmulateRecognizeCompletedEventArgs e)
{
if (e.Result != null)
{
System.Console.WriteLine("EmulateRecognizeCompleted : " + e.Result.Text);
}
else
{
System.Console.WriteLine("EmulateRecognizeCompleted : null result");
}
}

void sharedRecognizer_AudioStateChanged(object sender, AudioStateChangedEventArgs e)
{
System.Console.WriteLine("AudioStateChanged : " + e.AudioState.ToString());
}

void sharedRecognizer_AudioSignalProblemOccurred(object sender, AudioSignalProblemOccurredEventArgs e)
{
System.Console.WriteLine("AudioSignalProblemOccurred : " + e.AudioSignalProblem.ToString());
}

void sharedRecognizer_AudioLevelUpdated(object sender, AudioLevelUpdatedEventArgs e)
{
//System.Console.WriteLine("AudioLevelUpdated : " + e.AudioLevel.ToString());
}
#endregion

}
}

需要的留下Email,我给大家发

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics