基於vc++2008託管程式碼開發Windows Vista語音識別
分享一下我老師大神的人工智慧教程!零基礎,通俗易懂!http://blog.csdn.net/jiangjunshow
也歡迎大家轉載本篇文章。分享知識,造福人民,實現我們中華民族偉大復興!
廢話少說看程式碼
#pragma once
#include "Window1.g.h"
#include "Resources.Designer.h"
using namespace System;
using namespace System::Collections::Generic;
using namespace System::Text;
using namespace System::Windows;
using namespace System::Windows::Controls;
using namespace System::Windows::Data;
using namespace System::Windows::Documents;
using namespace System::Windows::Input;
using namespace System::Windows::Media;
using namespace System::Windows::Media::Imaging;
using namespace System::Windows::Shapes;
using namespace System::Reflection;
using namespace System::Windows::Threading;
using namespace System::IO;
using namespace System::Xml;
using namespace System::Collections::ObjectModel;
using namespace System::ComponentModel;
using namespace System::Speech::Recognition;
using namespace System::Speech::Recognition::SrgsGrammar;
using namespace System::Speech::Synthesis;
namespace speechReco
{
/// <summary>
/// Interaction logic for Window1.xaml
/// </summary>
//ORIGINAL LINE: public partial class Window1 : System.Windows.Window
//INSTANT C++ TODO TASK: C++ does not support 'partial' types. You must manually combine the entire Window1 type in one place.
public ref class Window1 : System::Windows::Window
{
private:
SpeechRecognizer ^sharedRecognizer;
SpeechRecognitionEngine ^appRecognizer;
SrgsDocument ^sdCmnrules;
public:
Window1()
{
InitializeComponent();
sharedRecognizer = gcnew SpeechRecognizer();
sharedRecognizer->AudioLevelUpdated += gcnew EventHandler<AudioLevelUpdatedEventArgs^>(this, &Window1::sharedRecognizer_AudioLevelUpdated);
sharedRecognizer->AudioSignalProblemOccurred += gcnew EventHandler<AudioSignalProblemOccurredEventArgs^>(this, &Window1::sharedRecognizer_AudioSignalProblemOccurred);
sharedRecognizer->AudioStateChanged += gcnew EventHandler<AudioStateChangedEventArgs^>(this, &Window1::sharedRecognizer_AudioStateChanged);
sharedRecognizer->EmulateRecognizeCompleted += gcnew EventHandler<EmulateRecognizeCompletedEventArgs^>(this, &Window1::sharedRecognizer_EmulateRecognizeCompleted);
sharedRecognizer->LoadGrammarCompleted += gcnew EventHandler<LoadGrammarCompletedEventArgs^>(this, &Window1::sharedRecognizer_LoadGrammarCompleted);
sharedRecognizer->RecognizerUpdateReached += gcnew EventHandler<RecognizerUpdateReachedEventArgs^>(this, &Window1::sharedRecognizer_RecognizerUpdateReached);
sharedRecognizer->SpeechDetected += gcnew EventHandler<SpeechDetectedEventArgs^>(this, &Window1::sharedRecognizer_SpeechDetected);
sharedRecognizer->SpeechHypothesized += gcnew EventHandler<SpeechHypothesizedEventArgs^>(this, &Window1::sharedRecognizer_SpeechHypothesized);
sharedRecognizer->SpeechRecognitionRejected += gcnew EventHandler<SpeechRecognitionRejectedEventArgs^>(this, &Window1::sharedRecognizer_SpeechRecognitionRejected);
sharedRecognizer->SpeechRecognized += gcnew EventHandler<SpeechRecognizedEventArgs^>(this, &Window1::sharedRecognizer_SpeechRecognized);
sharedRecognizer->StateChanged += gcnew EventHandler<System::Speech::Recognition::StateChangedEventArgs^>(this, &Window1::sharedRecognizer_StateChanged);
//load SRGS library
array<System::Byte> ^ba = speechReco::Properties::Resources::cmnrules;
MemoryStream ^ms = gcnew MemoryStream(ba);
ms->Position = 0;
XmlReader ^xr = XmlReader::Create(ms);
sdCmnrules = gcnew SrgsDocument(xr);
//populate ComboBox
for each(SrgsRule ^rule in sdCmnrules->Rules)
{
if (rule->Scope == SrgsRuleScope::Public)
{
cbRules->Items->Add(rule->Id);
}
}
//default to integer rule
cbRules->SelectedValue = "integer";
cbRules->SelectionChanged += gcnew SelectionChangedEventHandler(this, &Window1::cbRules_SelectionChanged);
this->btnSharedColor->Click += gcnew RoutedEventHandler(this, &Window1::btnSharedColor_Click);
this->btnInProcColor->Click += gcnew RoutedEventHandler(this, &Window1::btnInProcColor_Click);
this->btnTapDictation->PreviewMouseLeftButtonDown += gcnew MouseButtonEventHandler(this, &Window1::btnTapDictation_PreviewMouseLeftButtonDown);
this->btnTapDictation->PreviewMouseLeftButtonUp += gcnew MouseButtonEventHandler(this, &Window1::btnTapDictation_PreviewMouseLeftButtonUp);
this->btnSrgs->Click += gcnew RoutedEventHandler(this, &Window1::btnSrgs_Click);
this->btnAdvGrammarBuilder->Click += gcnew RoutedEventHandler(this, &Window1::btnAdvGrammarBuilder_Click);
this->btnWavFile->Click += gcnew RoutedEventHandler(this, &Window1::btnWavFile_Click);
this->btnSynthPhonemes->Click += gcnew RoutedEventHandler(this, &Window1::btnSynthPhonemes_Click);
this->btnEnable->Click += gcnew RoutedEventHandler(this, &Window1::btnEnable_Click);
this->btnDisable->Click += gcnew RoutedEventHandler(this, &Window1::btnDisable_Click);
this->btnUnload->Click += gcnew RoutedEventHandler(this, &Window1::btnUnload_Click);
this->btnEmulate->Click += gcnew RoutedEventHandler(this, &Window1::btnEmulate_Click);
}
private:
void btnEmulate_Click(System::Object ^sender, RoutedEventArgs ^e)
{
//sharedRecognizer.EmulateRecognize("green");
sharedRecognizer->EmulateRecognizeAsync("green");
//sharedRecognizer.EmulateRecognize("stop listening");
}
void btnUnload_Click(System::Object ^sender, RoutedEventArgs ^e)
{
sharedRecognizer->UnloadAllGrammars();
}
void btnDisable_Click(System::Object ^sender, RoutedEventArgs ^e)
{
sharedRecognizer->Enabled = false;
}
void btnEnable_Click(System::Object ^sender, RoutedEventArgs ^e)
{
sharedRecognizer->Enabled = true;
}
System::String ^recoPhonemes;
void btnSynthPhonemes_Click(System::Object ^sender, RoutedEventArgs ^e)
{
//this is a trick to figure out phonemes used by synthesis engine
//txt to wav
MemoryStream ^audioStream = gcnew MemoryStream();
SpeechSynthesizer ^synth = gcnew SpeechSynthesizer();
synth->SetOutputToWaveStream(audioStream);
PromptBuilder ^pb = gcnew PromptBuilder();
pb->AppendBreak(PromptBreak::ExtraSmall); //'e' wont be recognized if this is large, or non-existent?
synth->Speak(pb);
System::String ^textToSpeak = this->txtSynthTxt->Text->Trim();
synth->Speak(textToSpeak);
//synth.Speak(pb);
synth->SetOutputToNull();
audioStream->Position = 0;
//now wav to txt (for reco phonemes)
recoPhonemes = System::String::Empty;
GrammarBuilder ^gb = gcnew GrammarBuilder(textToSpeak);
Grammar ^g = gcnew Grammar(gb); //TODO the hard letters to recognize are 'g' and 'e'
SpeechRecognitionEngine ^reco = gcnew SpeechRecognitionEngine();
reco->SpeechHypothesized += gcnew EventHandler<SpeechHypothesizedEventArgs^>(this, &Window1::reco_SpeechHypothesized);
reco->SpeechRecognitionRejected += gcnew EventHandler<SpeechRecognitionRejectedEventArgs^>(this, &Window1::reco_SpeechRecognitionRejected);
reco->UnloadAllGrammars(); //only use the one word grammar
reco->LoadGrammar(g);
reco->SetInputToWaveStream(audioStream);
RecognitionResult ^rr = reco->Recognize();
reco->SetInputToNull();
if (rr != nullptr)
{
recoPhonemes = StringFromWordArray(rr->Words, WordType::Pronunciation);
}
txtRecoPho->Text = recoPhonemes;
}
void reco_SpeechRecognitionRejected(System::Object ^sender, SpeechRecognitionRejectedEventArgs ^e)
{
recoPhonemes = StringFromWordArray(e->Result->Words, WordType::Pronunciation);
}
void reco_SpeechHypothesized(System::Object ^sender, SpeechHypothesizedEventArgs ^e)
{
recoPhonemes = StringFromWordArray(e->Result->Words, WordType::Pronunciation);
}
void btnWavFile_Click(System::Object ^sender, RoutedEventArgs ^e)
{
sharedRecognizer->Enabled = false;
appRecognizer = gcnew SpeechRecognitionEngine();
appRecognizer->SetInputToWaveFile("spoken.wav");
appRecognizer->LoadGrammar(gcnew DictationGrammar());
RecognitionResult ^rr = appRecognizer->Recognize();
appRecognizer->SetInputToNull();
if (rr == nullptr)
{
MessageBox::Show("null result?");
}
else
{
//NOTE in-process recognir cannot send feedback to microphone bar
//SpeechUI.SendTextFeedback(rr, rr.Text, true);
//show phoneme result
System::String ^phonemes = StringFromWordArray(rr->Words, WordType::Pronunciation);
txtRecoPho->Text = phonemes;
//show text result
MessageBox::Show(rr->Text);
}
delete appRecognizer;
}
public:
enum class WordType
{
Text,
Normalized = Text,
Lexical,
Pronunciation
};
public:
static System::String ^StringFromWordArray(ReadOnlyCollection<RecognizedWordUnit^> ^words, WordType type)
{
System::String ^text = "";
for each (RecognizedWordUnit ^word in words)
{
System::String ^wordText = "";
if (type == WordType::Text || type == WordType::Normalized)
{
wordText = word->Text;
}
else if (type == WordType::Lexical)
{
wordText = word->LexicalForm;
}
else if (type == WordType::Pronunciation)
{
wordText = word->Pronunciation;
}
else
{
throw gcnew InvalidEnumArgumentException(System::String::Format("[0}: is not a valid input", type));
}
//Use display attribute
if ((word->DisplayAttributes & DisplayAttributes::OneTrailingSpace) != 0)
{
wordText += " ";
}
if ((word->DisplayAttributes & DisplayAttributes::TwoTrailingSpaces) != 0)
{
wordText += " ";
}
if ((word->DisplayAttributes & DisplayAttributes::ConsumeLeadingSpaces) != 0)
{
wordText = wordText->TrimStart();
}
if ((word->DisplayAttributes & DisplayAttributes::ZeroTrailingSpaces) != 0)
{
wordText = wordText->TrimEnd();
}
text += wordText;
}
return text;
}
private:
void btnAdvGrammarBuilder_Click(System::Object ^sender, RoutedEventArgs ^e)
{
sharedRecognizer->Enabled = true;
sharedRecognizer->UnloadAllGrammars();
//from http://msdn.microsoft.com/msdnmag/issues/06/01/speechinWindowsVista/#S5
//[I'd like] a [<size>] [<crust>] [<topping>] pizza [please]
//build the core set of choices
Choices ^sizes = gcnew Choices("small", "regular", "large");
Choices ^crusts = gcnew Choices("thin crust", "thick crust");
Choices ^toppings = gcnew Choices("vegetarian", "pepperoni", "cheese");
SemanticResultKey ^srkSize = gcnew SemanticResultKey("size", sizes->ToGrammarBuilder());
SemanticResultKey ^srkCrust = gcnew SemanticResultKey("crust", crusts->ToGrammarBuilder());
SemanticResultKey ^srkTopping = gcnew SemanticResultKey("topping", toppings->ToGrammarBuilder());
SemanticResultValue ^srvSize = gcnew SemanticResultValue(srkSize, "regular");
SemanticResultValue ^srvCrust = gcnew SemanticResultValue(srkCrust, "thick crust");
//build the permutations of choices...
//choose all three
GrammarBuilder ^sizeCrustTopping = gcnew GrammarBuilder();
//sizeCrustTopping.AppendChoices(sizes, "size");
//sizeCrustTopping.AppendChoices(crusts, "crust");
//sizeCrustTopping.AppendChoices(toppings, "topping");
sizeCrustTopping->Append(srkSize);
sizeCrustTopping->Append(srkCrust);
sizeCrustTopping->Append(srkTopping);
//choose size and topping, and assume thick crust
GrammarBuilder ^sizeAndTopping = gcnew GrammarBuilder();
//sizeAndTopping.AppendChoices(sizes, "size");
//sizeAndTopping.AppendChoices(toppings, "topping");
//sizeAndTopping.AppendResultKeyValue("crust", "thick crust");
sizeAndTopping->Append(srkSize);
sizeAndTopping->Append(srkTopping);
//TODO how to set default semantic value for "crust"?
//sizeAndTopping.Append(srvCrust);
//sizeAndTopping.Append(new SemanticResultValue(crusts.ToGrammarBuilder(), "thick crust"));
//sizeAndTopping.Append(new SemanticResultValue("crust", "thick crust"));
//sizeAndTopping.Append(new SemanticResultValue("thick crust"));
//sizeAndTopping.Append(new SemanticResultKey("crust", "thick crust"));
//choose topping only, and assume the rest
GrammarBuilder ^toppingOnly = gcnew GrammarBuilder();
//toppingOnly.AppendChoices(toppings, "topping");
//toppingOnly.AppendResultKeyValue("size", "regular");
//toppingOnly.AppendResultKeyValue("crust", "thick crust");
toppingOnly->Append(srkTopping);
//TODO how to set default semantic value for "size" and "crust"?
//toppingOnly.Append(srvSize);
//toppingOnly.Append(srvCrust);
//toppingOnly.Append(new SemanticResultKey("size", "regular"));
//toppingOnly.Append(new SemanticResultKey("crust", "thick crust"));
//assemble the permutations
Choices ^permutations = gcnew Choices();
permutations->Add(sizeCrustTopping);
permutations->Add(sizeAndTopping);
permutations->Add(toppingOnly);
//now build the complete pattern...
GrammarBuilder ^pizzaRequest = gcnew GrammarBuilder();
//pre-amble "[I'd like] a"
pizzaRequest->Append(gcnew Choices("I'd like a", "a"));
//permutations "[<size>] [<crust>] [<topping>]"
pizzaRequest->Append(permutations);
//post-amble "pizza [please]"
pizzaRequest->Append(gcnew Choices("pizza", "pizza please"));
System::String ^debug = pizzaRequest->DebugShowPhrases;
//create the pizza grammar
Grammar ^pizzaGrammar = gcnew Grammar(pizzaRequest);
//attach the event handler
pizzaGrammar->SpeechRecognized += gcnew EventHandler<SpeechRecognizedEventArgs^>(this, &Window1::pizzaGrammar_SpeechRecognized);
//load the grammar into the recognizer
sharedRecognizer->LoadGrammar(pizzaGrammar);
}
void pizzaGrammar_SpeechRecognized(System::Object ^sender, SpeechRecognizedEventArgs ^e)
{
StringBuilder ^resultString = gcnew StringBuilder();
resultString->Append("Raw text result: ");
resultString->AppendLine(e->Result->Text);
resultString->Append("Size: ");
resultString->AppendLine(e->Result->Semantics["size"]->Value->ToString());
resultString->Append("Crust: ");
resultString->AppendLine(e->Result->Semantics["crust"]->Value->ToString());
resultString->Append("Topping: ");
resultString->AppendLine(e->Result->Semantics["topping"]->Value->ToString());
MessageBox::Show(resultString->ToString());
}
void cbRules_SelectionChanged(System::Object ^sender, SelectionChangedEventArgs ^e)
{
//TODO
}
void btnSrgs_Click(System::Object ^sender, RoutedEventArgs ^e)
{
sharedRecognizer->Enabled = true;
sharedRecognizer->UnloadAllGrammars();
System::String ^ruleName = safe_cast<System::String^>(cbRules->SelectedValue);
//SrgsRule rule = sdCmnrules.Rules[ruleName];
Grammar ^grammarSrgs = gcnew Grammar(sdCmnrules, ruleName);
grammarSrgs->SpeechRecognized += gcnew EventHandler<SpeechRecognizedEventArgs^>(this, &Window1::grammarSrgs_SpeechRecognized);
sharedRecognizer->LoadGrammar(grammarSrgs);
MessageBox::Show("listening for user input based on the selected rule : " + ruleName);
}
void grammarSrgs_SpeechRecognized(System::Object ^sender, SpeechRecognizedEventArgs ^e)
{
//send text to microphone bar
SpeechUI::SendTextFeedback(e->Result, e->Result->Text, true);
//send actual numeric value to TextBox on form
if (e->Result->Semantics->Value != nullptr)
{
this->Dispatcher->Invoke(DispatcherPriority::Render, gcnew UpdateTxtRecoDelegate(UpdateTextReco), e->Result->Semantics->Value->ToString());
}
}
void btnTapDictation_PreviewMouseLeftButtonDown(System::Object ^sender, MouseButtonEventArgs ^e)
{
sharedRecognizer->Enabled = false;
dictationResult = System::String::Empty;
appRecognizer = gcnew SpeechRecognitionEngine();
appRecognizer->SetInputToDefaultAudioDevice();
appRecognizer->SpeechRecognized += gcnew EventHandler<SpeechRecognizedEventArgs^>(this, &Window1::appRecognizer_SpeechRecognized);
DictationGrammar ^dg;
if (cbSpelling->IsChecked == false)
{
dg = gcnew DictationGrammar();
}
else
{
dg = gcnew DictationGrammar("grammar:dictation#spelling");
}
appRecognizer->LoadGrammar(dg);
appRecognizer->RecognizeAsync(RecognizeMode::Multiple);
}
System::String ^dictationResult;
void appRecognizer_SpeechRecognized(System::Object ^sender, SpeechRecognizedEventArgs ^e)
{
//on UI thread
dictationResult += e->Result->Text;
txtReco->Text = dictationResult;
}
void btnTapDictation_PreviewMouseLeftButtonUp(System::Object ^sender, MouseButtonEventArgs ^e)
{
appRecognizer->RecognizeAsyncStop();
delete appRecognizer;
}
void btnInProcColor_Click(System::Object ^sender, RoutedEventArgs ^e)
{
sharedRecognizer->Enabled = false;
Choices ^cColor = GetColorChoices();
GrammarBuilder ^gb = gcnew GrammarBuilder(cColor);
Grammar ^grammarColors = gcnew Grammar(gb);
grammarColors->SpeechRecognized += gcnew EventHandler<SpeechRecognizedEventArgs^>(this, &Window1::grammarColors_SpeechRecognized);
appRecognizer = gcnew SpeechRecognitionEngine();
appRecognizer->SetInputToDefaultAudioDevice();
appRecognizer->LoadGrammar(grammarColors);
appRecognizer->LoadGrammar(gcnew DictationGrammar());
appRecognizer->RecognizeAsync(RecognizeMode::Multiple);
MessageBox::Show("listening for you to say a color (e.g. Green)");
}
Choices ^GetColorChoices()
{
//build a grammar list of colors
Choices ^cColor = gcnew Choices();
Type ^t = Colors::typeid;
array<MemberInfo^> ^mia = t->GetMembers(BindingFlags::Public | BindingFlags::Static);
for each (MemberInfo ^mi in mia)
{
if (mi->Name->StartsWith("get_") == true)
continue;
cColor->Add(mi->Name);
}
return cColor;
}
void btnSharedColor_Click(System::Object ^sender, RoutedEventArgs ^e)
{
sharedRecognizer->Enabled = true;
sharedRecognizer->UnloadAllGrammars();
Choices ^cColor = GetColorChoices();
GrammarBuilder ^gb = gcnew GrammarBuilder(cColor);
Grammar ^grammarColors = gcnew Grammar(gb);
grammarColors->SpeechRecognized += gcnew EventHandler<SpeechRecognizedEventArgs^>(this, &Window1::grammarColors_SpeechRecognized);
sharedRecognizer->LoadGrammar(grammarColors);
MessageBox::Show("listening for you to say a color (e.g. Green)");
}
void grammarColors_SpeechRecognized(System::Object ^sender, SpeechRecognizedEventArgs ^e)
{
//not on UI thread
//txtReco.Text = e.Result.Text;
//need to use Dispatcher to get back on UI thread
//TODO cannot convert from 'anonymous method' to 'System.Delegate' ... WTF?
//this.Dispatcher.Invoke(DispatcherPriority.Render,
// delegate()
// {
// txtReco.Text = e.Result.Text;
// });
//http://romanski.livejournal.com/1761.html
//INSTANT C++ TODO TASK: Anonymous methods are not converted by Instant C++ C# Edition if local variables of the outer method are referenced within the anonymous method:
this->Dispatcher->Invoke(DispatcherPriority::Render, (System::Windows::Forms::MethodInvoker) delegate(System::Object^ sender, System::EventArgs e));
{
txtReco->Text = e->Result->Text;
}
//this.Dispatcher.Invoke(DispatcherPriority.Render, new UpdateTxtRecoDelegate(UpdateTextReco), e.Result.Text);
}
delegate void UpdateTxtRecoDelegate(System::String ^arg);
public:
void UpdateTextReco(System::String ^arg)
{
txtReco->Text = arg;
}
#pragma region SHARED_RECOGNIZER_EVENTS
private:
void sharedRecognizer_StateChanged(System::Object ^sender, System::Speech::Recognition::StateChangedEventArgs ^e)
{
System::Console::WriteLine("StateChanged : " + e->RecognizerState.ToString());
}
void sharedRecognizer_SpeechRecognized(System::Object ^sender, SpeechRecognizedEventArgs ^e)
{
//on UI thread
System::Console::WriteLine("SpeechRecognized : " + e->Result->Text);
//txtReco.Text = e.Result.Text;
}
void sharedRecognizer_SpeechRecognitionRejected(System::Object ^sender, SpeechRecognitionRejectedEventArgs ^e)
{
System::Console::WriteLine("SpeechRecognitionRejected : " + e->Result->Text);
}
void sharedRecognizer_SpeechHypothesized(System::Object ^sender, SpeechHypothesizedEventArgs ^e)
{
System::Console::WriteLine("SpeechHypothesized : " + e->Result->Text);
}
void sharedRecognizer_SpeechDetected(System::Object ^sender, SpeechDetectedEventArgs ^e)
{
System::Console::WriteLine("SpeechDetected : " + e->AudioPosition.TotalMilliseconds.ToString());
}
void sharedRecognizer_RecognizerUpdateReached(System::Object ^sender, RecognizerUpdateReachedEventArgs ^e)
{
System::Console::WriteLine("RecognizerUpdateReached : " + e->AudioPosition.TotalMilliseconds.ToString());
}
void sharedRecognizer_LoadGrammarCompleted(System::Object ^sender, LoadGrammarCompletedEventArgs ^e)
{
System::Console::WriteLine("LoadGrammarCompleted : " + e->Grammar->Name);
}
void sharedRecognizer_EmulateRecognizeCompleted(System::Object ^sender, EmulateRecognizeCompletedEventArgs ^e)
{
if (e->Result != nullptr)
{
System::Console::WriteLine("EmulateRecognizeCompleted : " + e->Result->Text);
}
else
{
System::Console::WriteLine("EmulateRecognizeCompleted : null result");
}
}
void sharedRecognizer_AudioStateChanged(System::Object ^sender, AudioStateChangedEventArgs ^e)
{
System::Console::WriteLine("AudioStateChanged : " + e->AudioState.ToString());
}
void sharedRecognizer_AudioSignalProblemOccurred(System::Object ^sender, AudioSignalProblemOccurredEventArgs ^e)
{
System::Console::WriteLine("AudioSignalProblemOccurred : " + e->AudioSignalProblem.ToString());
}
void sharedRecognizer_AudioLevelUpdated(System::Object ^sender, AudioLevelUpdatedEventArgs ^e)
{
//System.Console.WriteLine("AudioLevelUpdated : " + e.AudioLevel.ToString());
}
#pragma endregion
};
}
需要的留下eMAIL,我給大家發