與眾不同 windows phone (45) - 8.0 語音: TTS, 語音識別, 語音命令

本文轉載自查看原文 2014-01-02 09:43 3314 Windows Phone

作者：webabcd

介紹
與眾不同 windows phone 8.0 之語音

TTS（Text To Speech）
語音識別
語音命令

示例
1、演示 TTS（Text To Speech）的應用
Speech/TTS.xaml

<phone:PhoneApplicationPage
    x:Class="Demo.Speech.TTS"
    xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
    xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
    xmlns:phone="clr-namespace:Microsoft.Phone.Controls;assembly=Microsoft.Phone"
    xmlns:shell="clr-namespace:Microsoft.Phone.Shell;assembly=Microsoft.Phone"
    xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
    xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
    FontFamily="{StaticResource PhoneFontFamilyNormal}"
    FontSize="{StaticResource PhoneFontSizeNormal}"
    Foreground="{StaticResource PhoneForegroundBrush}"
    SupportedOrientations="Portrait" Orientation="Portrait"
    mc:Ignorable="d"
    shell:SystemTray.IsVisible="True">

    <Grid Background="Transparent">
        <StackPanel Orientation="Vertical">

            <TextBlock Name="lblMsg" />
            
            <Button x:Name="btnTTS_Basic" Content="TTS 基礎" Click="btnTTS_Basic_Click" />

            <Button x:Name="btnTTS_Select" Content="用指定的語音 TTS" Click="btnTTS_Select_Click" />

            <Button x:Name="btnTTS_SSML" Content="朗讀 SSML 文檔" Click="btnTTS_SSML_Click" />

        </StackPanel>
    </Grid>

</phone:PhoneApplicationPage>

Speech/TTS.xaml.cs

/*
 * 演示 TTS（Text To Speech）的應用
 * 
 * 
 * InstalledVoices - 管理已安裝的語音
 *     All - 已安裝的全部語音，返回 VoiceInformation 對象列表
 *     Default - 默認語音，返回 VoiceInformation 對象
 *     
 * VoiceInformation - 語音信息
 *     Id - 標識
 *     Language - 語言
 *     DisplayName - 名稱
 *     Description - 描述
 *     Gender - 性別（VoiceGender.Male 或 VoiceGender.Female）
 * 
 * SpeechSynthesizer - TTS 的類
 *     SetVoice(VoiceInformation voiceInformation) - 設置語音
 *     GetVoice() - 獲取語音信息
 *     SpeakTextAsync(string content, object userState) - 朗讀指定的文本。可以設置一個上下文對象，在 SpeechStarted 時取出
 *     SpeakSsmlAsync(string content, object userState) - 朗讀指定的 SSML 文檔。可以設置一個上下文對象，在 SpeechStarted 時取出
 *     SpeakSsmlFromUriAsync(Uri content, object userState) - 朗讀指定地址的 SSML 文檔。可以設置一個上下文對象，在 SpeechStarted 時取出
 *     CancelAll() - 取消全部朗讀
 *     SpeechStarted - 開始朗讀時觸發的事件
 *     BookmarkReached - 朗讀到 <mark /> 標記時觸發的事件（僅針對 SSML 協議）
 * 
 * 
 * 注：
 * 1、需要在 manifest 中增加配置 <Capability Name="ID_CAP_SPEECH_RECOGNITION" />
 * 2、SSML - Speech Synthesis Markup Language
 * 3、微軟關於 ssml 的說明：http://msdn.microsoft.com/en-us/library/hh361578
 * 4、W3C 關於 ssml 的說明：http://www.w3.org/TR/speech-synthesis/
 */

using System;
using System.Collections.Generic;
using System.Linq;
using System.Windows;
using Microsoft.Phone.Controls;
using Windows.Phone.Speech.Synthesis;

namespace Demo.Speech
{
    public partial class TTS : PhoneApplicationPage
    {
        private string _text = "TTS 是 Text To Speech 的縮寫<mark name=\"xxx\" />，即“從文本到語音”，是人機對話的一部分，讓機器能夠說話。";

        public TTS()
        {
            InitializeComponent();
        }

        // 默認方式朗讀文本
        private async void btnTTS_Basic_Click(object sender, RoutedEventArgs e)
        {
            SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer();
            await speechSynthesizer.SpeakTextAsync(_text);
        }

        // 用指定的語音朗讀文本
        private async void btnTTS_Select_Click(object sender, RoutedEventArgs e)
        {
            SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer();

            // 中文語音列表（應該有兩條記錄：第一條是女聲；第二條是男聲。具體信息可從 VoiceInformation 對象中獲取）
            IEnumerable<VoiceInformation> zhVoices = from voice in InstalledVoices.All
                                                     where voice.Language == "zh-CN"
                                                     select voice;

            // 設置語音
            speechSynthesizer.SetVoice(zhVoices.ElementAt(0));

            // 朗讀文本
            await speechSynthesizer.SpeakTextAsync(_text);
        }

        // 朗讀指定 SSML 協議文檔
        private async void btnTTS_SSML_Click(object sender, RoutedEventArgs e)
        {
            SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer();

            // 開始朗讀時觸發的事件
            speechSynthesizer.SpeechStarted += speechSynthesizer_SpeechStarted;

            // 到達 <mark /> 標記時觸發的事件
            speechSynthesizer.BookmarkReached += speechSynthesizer_BookmarkReached;

            // 微軟關於 ssml 的說明：http://msdn.microsoft.com/en-us/library/hh361578
            // W3C 關於 ssml 的說明：http://www.w3.org/TR/speech-synthesis/

            string ssml = "<speak version=\"1.0\" xmlns=\"http://www.w3.org/2001/10/synthesis\" xml:lang=\"zh-CN\">"; // 中文
            ssml += "<voice gender=\"male\">"; // 男聲
            ssml += "<prosody rate=\"-50%\">"; // 語速放慢 50%
            ssml += _text;
            ssml += "</prosody>";
            ssml += "</voice>";
            ssml += "</speak>";
          
            // 朗讀 SSML
            await speechSynthesizer.SpeakSsmlAsync(ssml);
        }

        void speechSynthesizer_SpeechStarted(SpeechSynthesizer sender, SpeechStartedEventArgs args)
        {
            // 獲取上下文對象
            object userState = args.UserState;
        }

        void speechSynthesizer_BookmarkReached(SpeechSynthesizer sender, SpeechBookmarkReachedEventArgs args)
        {
            this.Dispatcher.BeginInvoke(delegate() 
            {
                // 觸發當前事件的 <mark /> 標記的名稱
                lblMsg.Text = "mark name: " + args.Bookmark;
                lblMsg.Text += Environment.NewLine;

                // 朗讀到觸發當前事件的 <mark /> 標記所用的時間
                lblMsg.Text += "audio position: " + args.AudioPosition.TotalSeconds;
            });
        }
    }
}

2、演示如何通過自定義語法列表做語音識別，以及如何通過 SRGS 自定義語法做語音識別
Speech/SRGSGrammar.xml

<?xml version="1.0" encoding="utf-8"?>
<grammar version="1.0" xml:lang="zh-cn" root="Main" tag-format="semantics/1.0"
         xmlns="http://www.w3.org/2001/06/grammar"
         xmlns:sapi="http://schemas.microsoft.com/Speech/2002/06/SRGSExtensions">
  <rule id="Main">
    <item repeat="0-1">我想去</item>
    <ruleref uri="#Cities" />
  </rule>
  <rule id="Cities" scope="public">
    <one-of>
      <item>北京</item>
      <item>深圳</item>
      <item>上海</item>
      <item>廣州</item>
    </one-of>
  </rule>
</grammar>

<!--
本例可以識別：我想去北京；我想去深圳；我想去上海；我想去廣州；北京；深圳；上海；廣州

Visual Studio 有創建 SRGSGrammar（SRGS 語法）文件的模板
微軟關於 SRGS 的說明：http://msdn.microsoft.com/en-us/library/hh361653
W3C 關於 SRGS 的說明：http://www.w3.org/TR/speech-grammar/
-->

Speech/SpeechRecognition.xaml

<phone:PhoneApplicationPage
    x:Class="Demo.Speech.SpeechRecognition"
    xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
    xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
    xmlns:phone="clr-namespace:Microsoft.Phone.Controls;assembly=Microsoft.Phone"
    xmlns:shell="clr-namespace:Microsoft.Phone.Shell;assembly=Microsoft.Phone"
    xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
    xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
    FontFamily="{StaticResource PhoneFontFamilyNormal}"
    FontSize="{StaticResource PhoneFontSizeNormal}"
    Foreground="{StaticResource PhoneForegroundBrush}"
    SupportedOrientations="Portrait" Orientation="Portrait"
    mc:Ignorable="d"
    shell:SystemTray.IsVisible="True">

    <Grid Background="Transparent">
        <StackPanel Orientation="Vertical">

            <TextBlock Name="lblMsg" />

            <Button x:Name="btnDemo" Content="通過自定義語法列表做語音識別" Click="btnDemo_Click" />

            <Button x:Name="btnSRGS" Content="通過 SRGS 自定義語法做語音識別" Click="btnSRGS_Click" />
            
        </StackPanel>
    </Grid>

</phone:PhoneApplicationPage>

Speech/SpeechRecognition.xaml.cs

/*
 * 演示如何通過自定義語法列表做語音識別，以及如何通過 SRGS 自定義語法做語音識別
 * 
 * 
 * 語音識別：用於在 app 內識別語音
 * 語音命令：用於在 app 外通過語音命令啟動 app
 *  
 * 
 * 注：
 * 1、需要在 manifest 中增加配置 <Capability Name="ID_CAP_SPEECH_RECOGNITION" /> <Capability Name="ID_CAP_MICROPHONE" />
 * 2、安裝語音識別器：設置 -> 語音 -> 在“語音語言”列表中安裝指定的語音識別器，並啟用語音識別服務
 * 3、SRGS - Speech Recognition Grammar Specification
 * 4、微軟關於 SRGS 的說明：http://msdn.microsoft.com/en-us/library/hh361653
 * 5、W3C 關於 SRGS 的說明：http://www.w3.org/TR/speech-grammar/
 */

using System;
using System.Collections.Generic;
using System.Linq;
using System.Windows;
using Microsoft.Phone.Controls;
using Windows.Phone.Speech.Recognition;

namespace Demo.Speech
{
    public partial class SpeechRecognition : PhoneApplicationPage
    {
        public SpeechRecognition()
        {
            InitializeComponent();
        }

        private async void btnDemo_Click(object sender, RoutedEventArgs e)
        {
            // 語音識別器，帶 UI 的
            SpeechRecognizerUI speechRecognizerUI = new SpeechRecognizerUI();

            // 識別過程中發生問題時觸發的事件
            speechRecognizerUI.Recognizer.AudioProblemOccurred += Recognizer_AudioProblemOccurred;
            // 音頻捕獲狀態發生變化時觸發的事件
            speechRecognizerUI.Recognizer.AudioCaptureStateChanged += Recognizer_AudioCaptureStateChanged;

            // InitialSilenceTimeout - 在此時間內收到的都是無聲輸入，則終止識別
            speechRecognizerUI.Recognizer.Settings.InitialSilenceTimeout = TimeSpan.FromSeconds(5.0);
            // EndSilenceTimeout - 語音識別開始后，如果此時間內都是無聲輸入，則識別結束
            speechRecognizerUI.Recognizer.Settings.EndSilenceTimeout = TimeSpan.FromSeconds(0.15);
            // BabbleTimeout - 在此時間內收到的都是噪音，則終止識別（0 代表禁用此功能）
            speechRecognizerUI.Recognizer.Settings.BabbleTimeout = TimeSpan.FromSeconds(0.0);

            // 獲取中文語音識別器
            IEnumerable<SpeechRecognizerInformation> zhRecognizers = from recognizerInfo in InstalledSpeechRecognizers.All
                                                                     where recognizerInfo.Language == "zh-CN"
                                                                     select recognizerInfo;

            if (zhRecognizers.Count() > 0)
            {
                // 指定語音識別器
                speechRecognizerUI.Recognizer.SetRecognizer(zhRecognizers.First());

                // 設置語音識別的單詞列表
                string[] phrases = { "xbox", "海賊王", "王磊" };
                speechRecognizerUI.Recognizer.Grammars.AddGrammarFromList("myWord", phrases);
                // speechRecognizerUI.Recognizer.Grammars.AddGrammarFromPredefinedType("dictation", SpeechPredefinedGrammar.Dictation); // 聽寫整句，基於本地的語音識別
                // speechRecognizerUI.Recognizer.Grammars.AddGrammarFromPredefinedType("webSearch", SpeechPredefinedGrammar.WebSearch); // 聽寫整句，基於網絡的語音識別

                // 預加載全部語法
                await speechRecognizerUI.Recognizer.PreloadGrammarsAsync();

                // 帶 UI 的語音識別器的監聽頁上顯示的標題
                speechRecognizerUI.Settings.ListenText = "監聽中。。。";

                // 帶 UI 的語音識別器的監聽頁上顯示的示例文本
                speechRecognizerUI.Settings.ExampleText = "精確識別：xbox, 海賊王, 王磊";

                // 在“您說的是”頁（如果匹配到多條記錄，則會在此頁列出）和“聽到您說”頁是否需要通過 TTS 朗讀識別的內容（當在語音設置中啟用了“播放音頻確認”時，此處 true 才會有效）
                speechRecognizerUI.Settings.ReadoutEnabled = true;

                // 是否顯示“聽到您說”頁（用於顯示識別出的最終文本）
                speechRecognizerUI.Settings.ShowConfirmation = false;

                try
                {
                    // 開始識別
                    SpeechRecognitionUIResult result = await speechRecognizerUI.RecognizeWithUIAsync();

                    // 輸出識別狀態和結果
                    lblMsg.Text = "識別狀態: " + result.ResultStatus.ToString();
                    lblMsg.Text += Environment.NewLine;
                    lblMsg.Text += "識別結果：" + result.RecognitionResult.Text;
                    lblMsg.Text += Environment.NewLine;
                    lblMsg.Text += "可信度級別: " + result.RecognitionResult.TextConfidence.ToString(); // Rejected, Low, Medium, High
                }
                catch (Exception ex)
                {
                    if ((uint)ex.HResult == 0x800455BC)
                    {
                        lblMsg.Text = "當前語音識別器不支持所請求的語言: " + speechRecognizerUI.Recognizer.GetRecognizer().Language;
                    }
                    else
                    {
                        lblMsg.Text = ex.ToString();
                    }
                }
            }
            else
            {
                lblMsg.Text = "未安裝中文語音識別器";
            }
        }

        void Recognizer_AudioCaptureStateChanged(SpeechRecognizer sender, SpeechRecognizerAudioCaptureStateChangedEventArgs args)
        {
            // 音頻捕獲狀態發生了變化：Capturing（捕獲中） 或 Inactive（未捕獲）
            lblMsg.Text = "AudioCaptureStateChanged: " + args.State.ToString();
        }

        void Recognizer_AudioProblemOccurred(SpeechRecognizer sender, SpeechAudioProblemOccurredEventArgs args)
        {
            // 識別過程中發生了問題：TooLoud, TooQuiet, TooFast, TooSlow, TooNoisy, NoSignal, None
            lblMsg.Text = "AudioProblemOccurred: " + args.Problem.ToString();
        }



        // 通過 SRGS 自定義語法
        // 微軟關於 SRGS 的說明：http://msdn.microsoft.com/en-us/library/hh361653
        // W3C 關於 SRGS 的說明：http://www.w3.org/TR/speech-grammar/
        private async void btnSRGS_Click(object sender, RoutedEventArgs e)
        {
            // 語音識別器，無 UI 的
            SpeechRecognizer speechRecognizer = new SpeechRecognizer();

            // 指定 SRGS 語法
            Uri mySRGS = new Uri("ms-appx:///Speech/SRGSGrammar.xml", UriKind.Absolute);
            speechRecognizer.Grammars.AddGrammarFromUri("srgs", mySRGS);

            try
            {
                lblMsg.Text = "監聽中。。。";
                lblMsg.Text += Environment.NewLine;

                // 開始識別
                SpeechRecognitionResult result = await speechRecognizer.RecognizeAsync();

                // 輸出識別結果
                lblMsg.Text += "識別結果：" + result.Text;
                lblMsg.Text += Environment.NewLine;
                lblMsg.Text += "可信度級別: " + result.TextConfidence.ToString(); // Rejected, Low, Medium, High
            }
            catch (Exception ex)
            {
                if ((uint)ex.HResult == 0x800455BC)
                {
                    lblMsg.Text = "當前語音識別器不支持所請求的語言: " + speechRecognizer.GetRecognizer().Language;
                }
                else
                {
                    lblMsg.Text = ex.ToString();
                }
            }
        }
    }
}

3、演示如何通過語音命令啟動 app，以及 app 啟動后如何獲取啟動此 app 的語音命令的標識和內容
Speech/VoiceCommandDefinition.xml

<?xml version="1.0" encoding="utf-8"?>
<VoiceCommands xmlns="http://schemas.microsoft.com/voicecommands/1.0">
  <CommandSet xml:lang="zh-cn">

    <!--命令前綴，不指定此值的話則會使用程序名做命令前綴-->
    <CommandPrefix>貪吃蛇</CommandPrefix>
    <!--語音監聽窗口會隨機顯示不同 app 的語音命令提示文字（貪吃蛇 開始），輪到此 app 的時候就可能會顯示這個-->
    <Example>開始</Example>

    <Command Name="PlayGame">
      <!--語音監聽窗口會隨機顯示不同 app 的語音命令提示文字（貪吃蛇 開始），輪到此 app 的時候就可能會顯示這個-->
      <Example>開始</Example>
      <!--監聽語法-->
      <ListenFor>[馬上] 開始</ListenFor>
      <!--監聽語法-->
      <ListenFor>[馬上] 啟動</ListenFor>
      <!--准備啟動目標 app 時，在監聽窗口中顯示的提示文字（當在語音設置中啟用了“播放音頻確認”時，此文字會作為 TTS 的文本）-->
      <Feedback>准備啟動</Feedback>
      <!--啟動頁-->
      <Navigate Target="/Speech/VoiceCommands.xaml" />
    </Command>

    <Command Name="PlayLevel">
      <!--語音監聽窗口會隨機顯示不同 app 的語音命令提示文字（貪吃蛇 從等級 2 開始），輪到此 app 的時候就可能會顯示這個-->
      <Example>從等級 2 開始</Example>
      <!--監聽語法-->
      <ListenFor>從等級 {number} 開始</ListenFor>
      <!--准備啟動目標 app 時，在監聽窗口中顯示的提示文字（當在語音設置中啟用了“播放音頻確認”時，此文字會作為 TTS 的文本）-->
      <Feedback>正轉到等級 {number}... </Feedback>
      <!--啟動頁-->
      <Navigate Target="/Speech/VoiceCommands.xaml" />
    </Command>

    <!--ListenFor 和 Feedback 可以通過 {number} 來引用此集合-->
    <PhraseList Label="number">
      <Item>1</Item>
      <Item>2</Item>
      <Item>3</Item>
    </PhraseList>

  </CommandSet>
</VoiceCommands>

<!--
本例可以識別：貪吃蛇開始，貪吃蛇馬上開始，貪吃蛇啟動，貪吃蛇馬上啟動，貪吃蛇從等級 1 開始，從等級 2 開始，從等級 3 開始

Visual Studio 有創建 VoiceCommandDefinition（語音命令定義）文件的模板
關於 VoiceCommands 的詳細說明參見：http://msdn.microsoft.com/en-us/library/windowsphone/develop/jj207041
-->

Speech/VoiceCommands.xaml

<phone:PhoneApplicationPage
    x:Class="Demo.Speech.VoiceCommands"
    xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
    xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
    xmlns:phone="clr-namespace:Microsoft.Phone.Controls;assembly=Microsoft.Phone"
    xmlns:shell="clr-namespace:Microsoft.Phone.Shell;assembly=Microsoft.Phone"
    xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
    xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
    FontFamily="{StaticResource PhoneFontFamilyNormal}"
    FontSize="{StaticResource PhoneFontSizeNormal}"
    Foreground="{StaticResource PhoneForegroundBrush}"
    SupportedOrientations="Portrait" Orientation="Portrait"
    mc:Ignorable="d"
    shell:SystemTray.IsVisible="True">

    <Grid Background="Transparent">
        <StackPanel Orientation="Vertical">

            <TextBlock Name="lblMsg" TextWrapping="Wrap" Text="返回到開始屏幕，長按 windows 鍵，說出你的語音命令（語音命令的定義參見 VoiceCommandDefinition.xml）" />

        </StackPanel>
    </Grid>
    
</phone:PhoneApplicationPage>

Speech/VoiceCommands.xaml.cs

/*
 * 演示如何通過語音命令啟動 app，以及 app 啟動后如何獲取啟動此 app 的語音命令的標識和內容
 * 
 * 
 * 語音識別：用於在 app 內識別語音
 * 語音命令：用於在 app 外通過語音命令啟動 app
 * 
 * 
 * 注：
 * 1、需要在 manifest 中增加配置 <Capability Name="ID_CAP_SPEECH_RECOGNITION" /> <Capability Name="ID_CAP_MICROPHONE" />
 * 2、關於 VoiceCommands 的詳細說明參見：http://msdn.microsoft.com/en-us/library/windowsphone/develop/jj207041
 */

using System;
using System.Windows;
using Microsoft.Phone.Controls;
using Windows.Phone.Speech.VoiceCommands;
using System.Windows.Navigation;

namespace Demo.Speech
{
    public partial class VoiceCommands : PhoneApplicationPage
    {
        public VoiceCommands()
        {
            InitializeComponent();

            this.Loaded += VoiceCommands_Loaded;
        }

        private async void VoiceCommands_Loaded(object sender, RoutedEventArgs e)
        {
            // 向系統注冊本 app 的語音命令定義
            await VoiceCommandService.InstallCommandSetsFromFileAsync(new Uri("ms-appx:///Speech/VoiceCommandDefinition.xml"));

            // 獲取語音命令定義的 CommandSet 中的內容，可以動態修改
            // VoiceCommandService.InstalledCommandSets
        }

        protected override void OnNavigatedTo(NavigationEventArgs e)
        {
            // 通過語音命令啟動時，url 類似如下：/Speech/VoiceCommands.xaml?voiceCommandName=PlayGame&reco=%E8%B4%AA%E5%90%83%E8%9B%87%20%E5%BC%80%E5%A7%8B

            if (NavigationContext.QueryString.ContainsKey("voiceCommandName"))
            {
                lblMsg.Text = "語音命令的標識: " + NavigationContext.QueryString["voiceCommandName"];
                lblMsg.Text += Environment.NewLine;
                lblMsg.Text += "語音命令的內容: " + NavigationContext.QueryString["reco"];
            }

            base.OnNavigatedTo(e);
        }
    }
}

OK
[源碼下載]

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 與眾不同 windows phone (48) - 8.0 其它: C# 調用 C++ 語音識別-TDNN Python 語音識別語音識別 -- 概述語音識別基礎基於android的語音識別與眾不同 windows phone (36) - 8.0 新的瓷貼: FlipTile, CycleTile, IconicTile C# 語音識別 ROS kinetic語音識別語音識別文字的軟件