與眾不同 windows phone (45) - 8.0 語音: TTS, 語音識別, 語音命令
作者:webabcd
介紹
與眾不同 windows phone 8.0 之 語音
- TTS(Text To Speech)
- 語音識別
- 語音命令
示例
1、演示 TTS(Text To Speech)的應用
Speech/TTS.xaml
<phone:PhoneApplicationPage x:Class="Demo.Speech.TTS" xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:phone="clr-namespace:Microsoft.Phone.Controls;assembly=Microsoft.Phone" xmlns:shell="clr-namespace:Microsoft.Phone.Shell;assembly=Microsoft.Phone" xmlns:d="http://schemas.microsoft.com/expression/blend/2008" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" FontFamily="{StaticResource PhoneFontFamilyNormal}" FontSize="{StaticResource PhoneFontSizeNormal}" Foreground="{StaticResource PhoneForegroundBrush}" SupportedOrientations="Portrait" Orientation="Portrait" mc:Ignorable="d" shell:SystemTray.IsVisible="True"> <Grid Background="Transparent"> <StackPanel Orientation="Vertical"> <TextBlock Name="lblMsg" /> <Button x:Name="btnTTS_Basic" Content="TTS 基礎" Click="btnTTS_Basic_Click" /> <Button x:Name="btnTTS_Select" Content="用指定的語音 TTS" Click="btnTTS_Select_Click" /> <Button x:Name="btnTTS_SSML" Content="朗讀 SSML 文檔" Click="btnTTS_SSML_Click" /> </StackPanel> </Grid> </phone:PhoneApplicationPage>
Speech/TTS.xaml.cs
/* * 演示 TTS(Text To Speech)的應用 * * * InstalledVoices - 管理已安裝的語音 * All - 已安裝的全部語音,返回 VoiceInformation 對象列表 * Default - 默認語音,返回 VoiceInformation 對象 * * VoiceInformation - 語音信息 * Id - 標識 * Language - 語言 * DisplayName - 名稱 * Description - 描述 * Gender - 性別(VoiceGender.Male 或 VoiceGender.Female) * * SpeechSynthesizer - TTS 的類 * SetVoice(VoiceInformation voiceInformation) - 設置語音 * GetVoice() - 獲取語音信息 * SpeakTextAsync(string content, object userState) - 朗讀指定的文本。可以設置一個上下文對象,在 SpeechStarted 時取出 * SpeakSsmlAsync(string content, object userState) - 朗讀指定的 SSML 文檔。可以設置一個上下文對象,在 SpeechStarted 時取出 * SpeakSsmlFromUriAsync(Uri content, object userState) - 朗讀指定地址的 SSML 文檔。可以設置一個上下文對象,在 SpeechStarted 時取出 * CancelAll() - 取消全部朗讀 * SpeechStarted - 開始朗讀時觸發的事件 * BookmarkReached - 朗讀到 <mark /> 標記時觸發的事件(僅針對 SSML 協議) * * * 注: * 1、需要在 manifest 中增加配置 <Capability Name="ID_CAP_SPEECH_RECOGNITION" /> * 2、SSML - Speech Synthesis Markup Language * 3、微軟關於 ssml 的說明:http://msdn.microsoft.com/en-us/library/hh361578 * 4、W3C 關於 ssml 的說明:http://www.w3.org/TR/speech-synthesis/ */ using System; using System.Collections.Generic; using System.Linq; using System.Windows; using Microsoft.Phone.Controls; using Windows.Phone.Speech.Synthesis; namespace Demo.Speech { public partial class TTS : PhoneApplicationPage { private string _text = "TTS 是 Text To Speech 的縮寫<mark name=\"xxx\" />,即“從文本到語音”,是人機對話的一部分,讓機器能夠說話。"; public TTS() { InitializeComponent(); } // 默認方式朗讀文本 private async void btnTTS_Basic_Click(object sender, RoutedEventArgs e) { SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer(); await speechSynthesizer.SpeakTextAsync(_text); } // 用指定的語音朗讀文本 private async void btnTTS_Select_Click(object sender, RoutedEventArgs e) { SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer(); // 中文語音列表(應該有兩條記錄:第一條是女聲;第二條是男聲。具體信息可從 VoiceInformation 對象中獲取) IEnumerable<VoiceInformation> zhVoices = from voice in InstalledVoices.All where voice.Language == "zh-CN" select voice; // 設置語音 speechSynthesizer.SetVoice(zhVoices.ElementAt(0)); // 朗讀文本 await speechSynthesizer.SpeakTextAsync(_text); } // 朗讀指定 SSML 協議文檔 private async void btnTTS_SSML_Click(object sender, RoutedEventArgs e) { SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer(); // 開始朗讀時觸發的事件 speechSynthesizer.SpeechStarted += speechSynthesizer_SpeechStarted; // 到達 <mark /> 標記時觸發的事件 speechSynthesizer.BookmarkReached += speechSynthesizer_BookmarkReached; // 微軟關於 ssml 的說明:http://msdn.microsoft.com/en-us/library/hh361578 // W3C 關於 ssml 的說明:http://www.w3.org/TR/speech-synthesis/ string ssml = "<speak version=\"1.0\" xmlns=\"http://www.w3.org/2001/10/synthesis\" xml:lang=\"zh-CN\">"; // 中文 ssml += "<voice gender=\"male\">"; // 男聲 ssml += "<prosody rate=\"-50%\">"; // 語速放慢 50% ssml += _text; ssml += "</prosody>"; ssml += "</voice>"; ssml += "</speak>"; // 朗讀 SSML await speechSynthesizer.SpeakSsmlAsync(ssml); } void speechSynthesizer_SpeechStarted(SpeechSynthesizer sender, SpeechStartedEventArgs args) { // 獲取上下文對象 object userState = args.UserState; } void speechSynthesizer_BookmarkReached(SpeechSynthesizer sender, SpeechBookmarkReachedEventArgs args) { this.Dispatcher.BeginInvoke(delegate() { // 觸發當前事件的 <mark /> 標記的名稱 lblMsg.Text = "mark name: " + args.Bookmark; lblMsg.Text += Environment.NewLine; // 朗讀到觸發當前事件的 <mark /> 標記所用的時間 lblMsg.Text += "audio position: " + args.AudioPosition.TotalSeconds; }); } } }
2、演示如何通過自定義語法列表做語音識別,以及如何通過 SRGS 自定義語法做語音識別
Speech/SRGSGrammar.xml
<?xml version="1.0" encoding="utf-8"?> <grammar version="1.0" xml:lang="zh-cn" root="Main" tag-format="semantics/1.0" xmlns="http://www.w3.org/2001/06/grammar" xmlns:sapi="http://schemas.microsoft.com/Speech/2002/06/SRGSExtensions"> <rule id="Main"> <item repeat="0-1">我想去</item> <ruleref uri="#Cities" /> </rule> <rule id="Cities" scope="public"> <one-of> <item>北京</item> <item>深圳</item> <item>上海</item> <item>廣州</item> </one-of> </rule> </grammar> <!-- 本例可以識別:我想去北京;我想去深圳;我想去上海;我想去廣州;北京;深圳;上海;廣州 Visual Studio 有創建 SRGSGrammar(SRGS 語法)文件的模板 微軟關於 SRGS 的說明:http://msdn.microsoft.com/en-us/library/hh361653 W3C 關於 SRGS 的說明:http://www.w3.org/TR/speech-grammar/ -->
Speech/SpeechRecognition.xaml
<phone:PhoneApplicationPage x:Class="Demo.Speech.SpeechRecognition" xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:phone="clr-namespace:Microsoft.Phone.Controls;assembly=Microsoft.Phone" xmlns:shell="clr-namespace:Microsoft.Phone.Shell;assembly=Microsoft.Phone" xmlns:d="http://schemas.microsoft.com/expression/blend/2008" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" FontFamily="{StaticResource PhoneFontFamilyNormal}" FontSize="{StaticResource PhoneFontSizeNormal}" Foreground="{StaticResource PhoneForegroundBrush}" SupportedOrientations="Portrait" Orientation="Portrait" mc:Ignorable="d" shell:SystemTray.IsVisible="True"> <Grid Background="Transparent"> <StackPanel Orientation="Vertical"> <TextBlock Name="lblMsg" /> <Button x:Name="btnDemo" Content="通過自定義語法列表做語音識別" Click="btnDemo_Click" /> <Button x:Name="btnSRGS" Content="通過 SRGS 自定義語法做語音識別" Click="btnSRGS_Click" /> </StackPanel> </Grid> </phone:PhoneApplicationPage>
Speech/SpeechRecognition.xaml.cs
/* * 演示如何通過自定義語法列表做語音識別,以及如何通過 SRGS 自定義語法做語音識別 * * * 語音識別:用於在 app 內識別語音 * 語音命令:用於在 app 外通過語音命令啟動 app * * * 注: * 1、需要在 manifest 中增加配置 <Capability Name="ID_CAP_SPEECH_RECOGNITION" /> <Capability Name="ID_CAP_MICROPHONE" /> * 2、安裝語音識別器:設置 -> 語音 -> 在“語音語言”列表中安裝指定的語音識別器,並啟用語音識別服務 * 3、SRGS - Speech Recognition Grammar Specification * 4、微軟關於 SRGS 的說明:http://msdn.microsoft.com/en-us/library/hh361653 * 5、W3C 關於 SRGS 的說明:http://www.w3.org/TR/speech-grammar/ */ using System; using System.Collections.Generic; using System.Linq; using System.Windows; using Microsoft.Phone.Controls; using Windows.Phone.Speech.Recognition; namespace Demo.Speech { public partial class SpeechRecognition : PhoneApplicationPage { public SpeechRecognition() { InitializeComponent(); } private async void btnDemo_Click(object sender, RoutedEventArgs e) { // 語音識別器,帶 UI 的 SpeechRecognizerUI speechRecognizerUI = new SpeechRecognizerUI(); // 識別過程中發生問題時觸發的事件 speechRecognizerUI.Recognizer.AudioProblemOccurred += Recognizer_AudioProblemOccurred; // 音頻捕獲狀態發生變化時觸發的事件 speechRecognizerUI.Recognizer.AudioCaptureStateChanged += Recognizer_AudioCaptureStateChanged; // InitialSilenceTimeout - 在此時間內收到的都是無聲輸入,則終止識別 speechRecognizerUI.Recognizer.Settings.InitialSilenceTimeout = TimeSpan.FromSeconds(5.0); // EndSilenceTimeout - 語音識別開始后,如果此時間內都是無聲輸入,則識別結束 speechRecognizerUI.Recognizer.Settings.EndSilenceTimeout = TimeSpan.FromSeconds(0.15); // BabbleTimeout - 在此時間內收到的都是噪音,則終止識別(0 代表禁用此功能) speechRecognizerUI.Recognizer.Settings.BabbleTimeout = TimeSpan.FromSeconds(0.0); // 獲取中文語音識別器 IEnumerable<SpeechRecognizerInformation> zhRecognizers = from recognizerInfo in InstalledSpeechRecognizers.All where recognizerInfo.Language == "zh-CN" select recognizerInfo; if (zhRecognizers.Count() > 0) { // 指定語音識別器 speechRecognizerUI.Recognizer.SetRecognizer(zhRecognizers.First()); // 設置語音識別的單詞列表 string[] phrases = { "xbox", "海賊王", "王磊" }; speechRecognizerUI.Recognizer.Grammars.AddGrammarFromList("myWord", phrases); // speechRecognizerUI.Recognizer.Grammars.AddGrammarFromPredefinedType("dictation", SpeechPredefinedGrammar.Dictation); // 聽寫整句,基於本地的語音識別 // speechRecognizerUI.Recognizer.Grammars.AddGrammarFromPredefinedType("webSearch", SpeechPredefinedGrammar.WebSearch); // 聽寫整句,基於網絡的語音識別 // 預加載全部語法 await speechRecognizerUI.Recognizer.PreloadGrammarsAsync(); // 帶 UI 的語音識別器的監聽頁上顯示的標題 speechRecognizerUI.Settings.ListenText = "監聽中。。。"; // 帶 UI 的語音識別器的監聽頁上顯示的示例文本 speechRecognizerUI.Settings.ExampleText = "精確識別:xbox, 海賊王, 王磊"; // 在“您說的是”頁(如果匹配到多條記錄,則會在此頁列出)和“聽到您說”頁是否需要通過 TTS 朗讀識別的內容(當在語音設置中啟用了“播放音頻確認”時,此處 true 才會有效) speechRecognizerUI.Settings.ReadoutEnabled = true; // 是否顯示“聽到您說”頁(用於顯示識別出的最終文本) speechRecognizerUI.Settings.ShowConfirmation = false; try { // 開始識別 SpeechRecognitionUIResult result = await speechRecognizerUI.RecognizeWithUIAsync(); // 輸出識別狀態和結果 lblMsg.Text = "識別狀態: " + result.ResultStatus.ToString(); lblMsg.Text += Environment.NewLine; lblMsg.Text += "識別結果:" + result.RecognitionResult.Text; lblMsg.Text += Environment.NewLine; lblMsg.Text += "可信度級別: " + result.RecognitionResult.TextConfidence.ToString(); // Rejected, Low, Medium, High } catch (Exception ex) { if ((uint)ex.HResult == 0x800455BC) { lblMsg.Text = "當前語音識別器不支持所請求的語言: " + speechRecognizerUI.Recognizer.GetRecognizer().Language; } else { lblMsg.Text = ex.ToString(); } } } else { lblMsg.Text = "未安裝中文語音識別器"; } } void Recognizer_AudioCaptureStateChanged(SpeechRecognizer sender, SpeechRecognizerAudioCaptureStateChangedEventArgs args) { // 音頻捕獲狀態發生了變化:Capturing(捕獲中) 或 Inactive(未捕獲) lblMsg.Text = "AudioCaptureStateChanged: " + args.State.ToString(); } void Recognizer_AudioProblemOccurred(SpeechRecognizer sender, SpeechAudioProblemOccurredEventArgs args) { // 識別過程中發生了問題:TooLoud, TooQuiet, TooFast, TooSlow, TooNoisy, NoSignal, None lblMsg.Text = "AudioProblemOccurred: " + args.Problem.ToString(); } // 通過 SRGS 自定義語法 // 微軟關於 SRGS 的說明:http://msdn.microsoft.com/en-us/library/hh361653 // W3C 關於 SRGS 的說明:http://www.w3.org/TR/speech-grammar/ private async void btnSRGS_Click(object sender, RoutedEventArgs e) { // 語音識別器,無 UI 的 SpeechRecognizer speechRecognizer = new SpeechRecognizer(); // 指定 SRGS 語法 Uri mySRGS = new Uri("ms-appx:///Speech/SRGSGrammar.xml", UriKind.Absolute); speechRecognizer.Grammars.AddGrammarFromUri("srgs", mySRGS); try { lblMsg.Text = "監聽中。。。"; lblMsg.Text += Environment.NewLine; // 開始識別 SpeechRecognitionResult result = await speechRecognizer.RecognizeAsync(); // 輸出識別結果 lblMsg.Text += "識別結果:" + result.Text; lblMsg.Text += Environment.NewLine; lblMsg.Text += "可信度級別: " + result.TextConfidence.ToString(); // Rejected, Low, Medium, High } catch (Exception ex) { if ((uint)ex.HResult == 0x800455BC) { lblMsg.Text = "當前語音識別器不支持所請求的語言: " + speechRecognizer.GetRecognizer().Language; } else { lblMsg.Text = ex.ToString(); } } } } }
3、演示如何通過語音命令啟動 app,以及 app 啟動后如何獲取啟動此 app 的語音命令的標識和內容
Speech/VoiceCommandDefinition.xml
<?xml version="1.0" encoding="utf-8"?> <VoiceCommands xmlns="http://schemas.microsoft.com/voicecommands/1.0"> <CommandSet xml:lang="zh-cn"> <!--命令前綴,不指定此值的話則會使用程序名做命令前綴--> <CommandPrefix>貪吃蛇</CommandPrefix> <!--語音監聽窗口會隨機顯示不同 app 的語音命令提示文字(貪吃蛇 開始),輪到此 app 的時候就可能會顯示這個--> <Example>開始</Example> <Command Name="PlayGame"> <!--語音監聽窗口會隨機顯示不同 app 的語音命令提示文字(貪吃蛇 開始),輪到此 app 的時候就可能會顯示這個--> <Example>開始</Example> <!--監聽語法--> <ListenFor>[馬上] 開始</ListenFor> <!--監聽語法--> <ListenFor>[馬上] 啟動</ListenFor> <!--准備啟動目標 app 時,在監聽窗口中顯示的提示文字(當在語音設置中啟用了“播放音頻確認”時,此文字會作為 TTS 的文本)--> <Feedback>准備啟動</Feedback> <!--啟動頁--> <Navigate Target="/Speech/VoiceCommands.xaml" /> </Command> <Command Name="PlayLevel"> <!--語音監聽窗口會隨機顯示不同 app 的語音命令提示文字(貪吃蛇 從等級 2 開始),輪到此 app 的時候就可能會顯示這個--> <Example>從等級 2 開始</Example> <!--監聽語法--> <ListenFor>從等級 {number} 開始</ListenFor> <!--准備啟動目標 app 時,在監聽窗口中顯示的提示文字(當在語音設置中啟用了“播放音頻確認”時,此文字會作為 TTS 的文本)--> <Feedback>正轉到等級 {number}... </Feedback> <!--啟動頁--> <Navigate Target="/Speech/VoiceCommands.xaml" /> </Command> <!--ListenFor 和 Feedback 可以通過 {number} 來引用此集合--> <PhraseList Label="number"> <Item>1</Item> <Item>2</Item> <Item>3</Item> </PhraseList> </CommandSet> </VoiceCommands> <!-- 本例可以識別:貪吃蛇開始,貪吃蛇馬上開始,貪吃蛇啟動,貪吃蛇馬上啟動,貪吃蛇從等級 1 開始,從等級 2 開始,從等級 3 開始 Visual Studio 有創建 VoiceCommandDefinition(語音命令定義)文件的模板 關於 VoiceCommands 的詳細說明參見:http://msdn.microsoft.com/en-us/library/windowsphone/develop/jj207041 -->
Speech/VoiceCommands.xaml
<phone:PhoneApplicationPage x:Class="Demo.Speech.VoiceCommands" xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:phone="clr-namespace:Microsoft.Phone.Controls;assembly=Microsoft.Phone" xmlns:shell="clr-namespace:Microsoft.Phone.Shell;assembly=Microsoft.Phone" xmlns:d="http://schemas.microsoft.com/expression/blend/2008" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" FontFamily="{StaticResource PhoneFontFamilyNormal}" FontSize="{StaticResource PhoneFontSizeNormal}" Foreground="{StaticResource PhoneForegroundBrush}" SupportedOrientations="Portrait" Orientation="Portrait" mc:Ignorable="d" shell:SystemTray.IsVisible="True"> <Grid Background="Transparent"> <StackPanel Orientation="Vertical"> <TextBlock Name="lblMsg" TextWrapping="Wrap" Text="返回到開始屏幕,長按 windows 鍵,說出你的語音命令(語音命令的定義參見 VoiceCommandDefinition.xml)" /> </StackPanel> </Grid> </phone:PhoneApplicationPage>
Speech/VoiceCommands.xaml.cs
/* * 演示如何通過語音命令啟動 app,以及 app 啟動后如何獲取啟動此 app 的語音命令的標識和內容 * * * 語音識別:用於在 app 內識別語音 * 語音命令:用於在 app 外通過語音命令啟動 app * * * 注: * 1、需要在 manifest 中增加配置 <Capability Name="ID_CAP_SPEECH_RECOGNITION" /> <Capability Name="ID_CAP_MICROPHONE" /> * 2、關於 VoiceCommands 的詳細說明參見:http://msdn.microsoft.com/en-us/library/windowsphone/develop/jj207041 */ using System; using System.Windows; using Microsoft.Phone.Controls; using Windows.Phone.Speech.VoiceCommands; using System.Windows.Navigation; namespace Demo.Speech { public partial class VoiceCommands : PhoneApplicationPage { public VoiceCommands() { InitializeComponent(); this.Loaded += VoiceCommands_Loaded; } private async void VoiceCommands_Loaded(object sender, RoutedEventArgs e) { // 向系統注冊本 app 的語音命令定義 await VoiceCommandService.InstallCommandSetsFromFileAsync(new Uri("ms-appx:///Speech/VoiceCommandDefinition.xml")); // 獲取語音命令定義的 CommandSet 中的內容,可以動態修改 // VoiceCommandService.InstalledCommandSets } protected override void OnNavigatedTo(NavigationEventArgs e) { // 通過語音命令啟動時,url 類似如下:/Speech/VoiceCommands.xaml?voiceCommandName=PlayGame&reco=%E8%B4%AA%E5%90%83%E8%9B%87%20%E5%BC%80%E5%A7%8B if (NavigationContext.QueryString.ContainsKey("voiceCommandName")) { lblMsg.Text = "語音命令的標識: " + NavigationContext.QueryString["voiceCommandName"]; lblMsg.Text += Environment.NewLine; lblMsg.Text += "語音命令的內容: " + NavigationContext.QueryString["reco"]; } base.OnNavigatedTo(e); } } }
OK
[源碼下載]
