一個超簡單的語音識別編程,聽寫程序


轉載:http://blog.csdn.net/yincheng01/article/details/3584655

 

CSpeechRecognition類封裝了語音識別操作所需調用的幾個接口,使用它進行語音識別編程很方便,也很簡潔。

CSpeechRecognition類的定義如下:

///////////////////////////////////////////////////////////////

// active speech engine

#include <atlbase.h>

extern CComModule _Module;

#include <atlcom.h>

#include <sapi.h>

#include <sphelper.h>

#include <spuihelp.h>

///////////////////////////////////////////////////////////////

// speech message

#define WM_SREVENT   WM_USER+102

 

class CSpeechRecognition 

{

public:

   CSpeechRecognition();

   virtual ~CSpeechRecognition();

 

   // initialize

   BOOL Initialize(HWND hWnd = NULL, BOOL bIsShared = TRUE);

   void Destroy();

 

   // start and stop

   BOOL Start();

   BOOL Stop();

   BOOL IsDictationOn()

   {

       return m_bOnDictation;

   }

 

   // event handler

   void GetText(WCHAR **ppszCoMemText, ULONG ulStart = 0, ULONG nlCount = -1);

 

   // voice training

   HRESULT VoiceTraining(HWND hWndParent);

 

   // microphone setup

   HRESULT MicrophoneSetup(HWND hWndParent);

 

   // token list

   HRESULT InitTokenList(HWND hWnd, BOOL bIsComboBox = FALSE);

 

   // error string

   CString GetErrorString()

   {

       return m_sError;

   }

 

   // interface

     CComPtr<ISpRecognizer> m_cpRecoEngine;  // SR engine

       CComPtr<ISpRecoContext> m_cpRecoCtxt;   //Recognition contextfor dictation

     CComPtr<ISpRecoGrammar> m_cpDictationGrammar;  // Dictation grammar

 

private:

   CString m_sError;

    BOOL    m_bOnDictation;

};

其中定義的消息WM_SREVENT用於指示語音識別事件,該消息將通知到函數指定的響應窗口。

類中定義了3個接口指針m_cpRecoEnginem_cpRecoCtxtm_cpDictationGrammar,分別用於引用語音識別引擎3個重要接口IspRecognizerISpRecoContextIspRecoGrammar

函數Initialize語音識別引擎基本工作環境包括引擎識別上下文語法音頻事件等的

BOOL CSpeechRecognition::Initialize(HWND hWnd, BOOL bIsShared)

{

   // com library

   if (FAILED(CoInitialize(NULL)))

   {

       m_sError=_T("Error intialization COM");

       return FALSE;

   }

 

   // SR engine

    HRESULT hr = S_OK;

    if (bIsShared)

    {

        // Shared reco engine.

        // For a shared reco engine, the audio gets setup automatically

        hr = m_cpRecoEngine.CoCreateInstance( CLSID_SpSharedRecognizer );

    }

   else

   {

       hr = m_cpRecoEngine.CoCreateInstance(CLSID_SpInprocRecognizer);

 

   }

 

   // RecoContext

    if( SUCCEEDED( hr ) )

    {

        hr = m_cpRecoEngine->CreateRecoContext( &m_cpRecoCtxt );

    }

 

    // Set recognition notification for dictation

    if (SUCCEEDED(hr))

    {

  hr = m_cpRecoCtxt->SetNotifyWindowMessage( hWnd, WM_SREVENT, 0, 0 );

    }

   

    if (SUCCEEDED(hr))

    {

        // when the engine has recognized something

        const ULONGLONG ullInterest = SPFEI(SPEI_RECOGNITION);

        hr = m_cpRecoCtxt->SetInterest(ullInterest, ullInterest);

    }

 

    // create default audio object

    CComPtr<ISpAudio> cpAudio;

    hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &cpAudio);

 

    // set the input for the engine

    hr = m_cpRecoEngine->SetInput(cpAudio, TRUE);

    hr = m_cpRecoEngine->SetRecoState( SPRST_ACTIVE );

 

   // grammar

    if (SUCCEEDED(hr))

    {

        // Specifies that the grammar we want is a dictation grammar.

        // Initializes the grammar (m_cpDictationGrammar)

        hr = m_cpRecoCtxt->CreateGrammar( 0, &m_cpDictationGrammar );

    }

    if  (SUCCEEDED(hr))

    {hr = m_cpDictationGrammar->LoadDictation(NULL, SPLO_STATIC);

    }

    if (SUCCEEDED(hr))

    {

        hr = m_cpDictationGrammar->SetDictationState( SPRS_ACTIVE );

    }

    if (FAILED(hr))

    {

        m_cpDictationGrammar.Release();

    }

 

    return (hr == S_OK);

}

 

釋放函數Destroy被類的析構函數調用,釋放了類所引用的所有接口:

void CSpeechRecognition::Destroy()

{

   if (m_cpDictationGrammar)

       m_cpDictationGrammar.Release();

   if (m_cpRecoCtxt)

       m_cpRecoCtxt.Release();

   if (m_cpRecoEngine)

       m_cpRecoEngine.Release();

   CoUninitialize();

}

函數StartStop用來控制開始和停止接受及識別語音,它們通過調用引擎接口的SetRecoState方法來實現:

BOOL CSpeechRecognition::Start()

{

   if (m_bOnDictation)

       return TRUE;

 

         HRESULT hr = m_cpRecoEngine->SetRecoState( SPRST_ACTIVE );

   if (FAILED(hr))

         return FALSE;

 

   m_bOnDictation = TRUE;

   return TRUE;

}

 

BOOL CSpeechRecognition::Stop()

{

   if (! m_bOnDictation)

       return TRUE;

 

       HRESULT hr = m_cpRecoEngine->SetRecoState( SPRST_INACTIVE );

   if (FAILED(hr))

return FALSE;

 

   m_bOnDictation = FALSE;

   return TRUE;

}

函數GetText是獲取從語音中已識別出的文字的關鍵,應該在響應識別事件/消息的響應函數中調用,其代碼如下所示。

void CSpeechRecognition::GetText(WCHAR **ppszCoMemText, ULONG ulStart, ULONG nlCount)

{

    USES_CONVERSION;

    CSpEvent event;

 

    // Process all of the recognition events

    while (event.GetFrom(m_cpRecoCtxt) == S_OK)

    {

        switch (event.eEventId)

        {

            case SPEI_RECOGNITION:

       // There may be multiple recognition results, so get all of them

                {

                 HRESULT hr = S_OK;

                 if (nlCount == -1)

              event.RecoResult()->GetText(SP_GETWHOLEPHRASE,

SP_GETWHOLEPHRASE, TRUE, ppszCoMemText, NULL);

                 else

                 {

                 ASSERT(nlCount > 0);

                 event.RecoResult()->GetText(ulStart, nlCount, FALSE,

                        ppszCoMemText, NULL);

                 }

                }

                break;

        }

    }

}

函數InitTokenList調用SpInitTokenComboBoxSpInitTokenListBox函數來實現語音語言在列表或組合列表中的列表顯示和選擇:

HRESULT CSpeechRecognition::InitTokenList(HWND hWnd, BOOL bIsComboBox)

{

   if (bIsComboBox)

       return SpInitTokenComboBox(hWnd, SPCAT_RECOGNIZERS);

   else

       return SpInitTokenListBox(hWnd, SPCAT_RECOGNIZERS);

}

語音識別涉及語音的輸入,通常用話筒來輸入語音。進行語音識別前,需要判斷話筒的位置和設置是否合理,以保證語音識別引擎能獲得有效的語音輸入。函數MicrophoneSetup調用語音識別引擎接口的DisplayUI方法來顯示一個設置話筒的向導,如圖11-4所示。示例代碼如下所示:

HRESULT CSpeechRecognition::MicrophoneSetup(HWND hWndParent)

{

   return m_cpRecoEngine->DisplayUI(hWndParent, NULL, SPDUI_MicTraining, NULL, 0);

}

 

語音訓練是語音識別的重要基礎,為了獲得期望的識別效果,必須進行語音訓練,以讓語音識別引擎熟悉說話者的口音。函數VoiceTraining調用語音識別引擎接口的DisplayUI方法來顯示一個語音訓練向導,如圖11-5所示。示例代碼如下所示:

HRESULT CSpeechRecognition::VoiceTraining(HWND hWndParent)

{

   return m_cpRecoEngine->DisplayUI(hWndParent, NULL, SPDUI_UserTraining, NULL, 0);

}

 

CText2Speech類似,CSpeechRecognition類也提供錯誤處理機制,由GetErrorString函數可以獲得錯誤信息。

11.3.2  示例:用CSpeechRecognition類編制聽寫程序

使用CSpeechRecognition類來編寫語音識別程序很簡單,下面讓我們實現一個聽寫程序Stenotypist,其界面如圖11-6所示。

 

VisualC++編制Stenotypist的步驟和要點如下:

1)使用AppWizard生成一個基於對話框的項目Stenotypist

2)將SpeechRecognition.HSpeechRecognition.CPP增加到Stenotypist項目中;

3)在資源編輯器中編輯好響應的控件;

4)用ClassWizard為控件在CStenotypistDlg 類中生成相應的成員;

5)修改StenotypistDlg.h文件,為類CStenotypistDlg增加相應的變量和函數;

6)用ClassWizardCStenotypistDlg 類添加對控件和消息的響應函數。StenotypistDlg.h的代碼如下。

#include "SpeechRecognition.h"

 

////////////////////////////////////////////////////////////////////

// CStenotypistDlg dialog

 

class CStenotypistDlg : public CDialog

{

// Construction

public:

   CStenotypistDlg(CWnd* pParent = NULL); // standard constructor

 

// Dialog Data

   //{{AFX_DATA(CStenotypistDlg)

   enum { IDD = IDD_STENOTYPIST_DIALOG };

   CButton    m_btDictation;

   CString    m_strText;

   //}}AFX_DATA

 

   // ClassWizard generated virtual function overrides

   //{{AFX_VIRTUAL(CStenotypistDlg)

   protected:

   virtual void DoDataExchange(CDataExchange* pDX); // DDX/DDV support

   //}}AFX_VIRTUAL

 

   CSpeechRecognition   m_SpeechRecognition;

 

// Implementation

protected:

   HICON m_hIcon;

 

   // Generated message map functions

   //{{AFX_MSG(CStenotypistDlg)

   virtual BOOL OnInitDialog();

   afx_msg void OnSysCommand(UINT nID, LPARAM lParam);

   afx_msg void OnPaint();

   afx_msg HCURSOR OnQueryDragIcon();

   afx_msg void OnButtonVt();

   afx_msg void OnButtonMs();

   afx_msg void OnButtonDictate();

   //}}AFX_MSG

   afx_msg LRESULT OnSREvent(WPARAM, LPARAM);

   DECLARE_MESSAGE_MAP()

};

注意,在CStenotypistDlg類中定義了一個CSpeechRecognition類的對象。

OnInitDialog函數中調用CSpeechRecognition函數和設置語音語言列表:

BOOL CStenotypistDlg::OnInitDialog()

{

   CDialog::OnInitDialog();

 

   // Add "About..." menu item to system menu.

 

   // IDM_ABOUTBOX must be in the system command range.

   ASSERT((IDM_ABOUTBOX & 0xFFF0) == IDM_ABOUTBOX);

   ASSERT(IDM_ABOUTBOX < 0xF000);

 

   CMenu* pSysMenu = GetSystemMenu(FALSE);

   if (pSysMenu != NULL)

   {

       CString strAboutMenu;

       strAboutMenu.LoadString(IDS_ABOUTBOX);

       if (!strAboutMenu.IsEmpty())

       {

          pSysMenu->AppendMenu(MF_SEPARATOR);

          pSysMenu->AppendMenu(MF_STRING, IDM_ABOUTBOX, strAboutMenu);

       }

   }

 

    // Set the icon for this dialog.  The framework does this automatically

   //  when the application's main window is not a dialog

   SetIcon(m_hIcon, TRUE);         // Set big icon

   SetIcon(m_hIcon, FALSE);    // Set small icon

  

   // TODO: Add extra initialization here

   if (! m_SpeechRecognition.Initialize(m_hWnd))

AfxMessageBox(m_SpeechRecognition.GetErrorString());

m_SpeechRecognition.InitTokenList(GetDlgItem(IDC_LIST1)->m_hWnd);

 

   m_SpeechRecognition.Stop();

  

   return TRUE;  // return TRUE  unless you set the focus to a control

}

開始聽寫和停止聽寫的實現較簡單,只需調用CSpeechRecognition類的響應函數就能實現,其代碼如下所示。注意,停止和開始是互相切換的。

void CStenotypistDlg::OnButtonDictate()

{

   if (m_SpeechRecognition.IsDictationOn())

   {

       m_SpeechRecognition.Stop();

       m_btDictation.SetWindowText("聽寫(&D)");

 

       SetWindowText("聽寫者 - 請按<聽寫>按鈕開始聽寫!");

   }

   else

   {

       m_SpeechRecognition.Start();

       m_btDictation.SetWindowText("停止(&S)");

 

       SetWindowText("聽寫者 - 正在記錄,請口述...");

   }

}

設置話筒和語音訓練也通過直接調用CSpeechRecognition類的成員函數來實現:

void CStenotypistDlg::OnButtonVt()

{  m_SpeechRecognition.VoiceTraining(m_hWnd);

}

 void CStenotypistDlg::OnButtonMs()

{  m_SpeechRecognition.MicrophoneSetup(m_hWnd);

}

為了響應消息WM_SREVENT,需要添加相應的消息響應函數:

BEGIN_MESSAGE_MAP(CStenotypistDlg, CDialog)

   //{{AFX_MSG_MAP(CStenotypistDlg)

   ON_WM_SYSCOMMAND()

   ON_WM_PAINT()

   ON_WM_QUERYDRAGICON()

   ON_BN_CLICKED(IDC_BUTTON_VT, OnButtonVt)

   ON_BN_CLICKED(IDC_BUTTON_MS, OnButtonMs)

   ON_BN_CLICKED(IDC_BUTTON_DICTATE, OnButtonDictate)

   //}}AFX_MSG_MAP

   ON_MESSAGE(WM_SREVENT, OnSREvent)

END_MESSAGE_MAP()

 

LRESULT CStenotypistDlg::OnSREvent(WPARAM, LPARAM)

{  WCHAR *pwzText;

   m_SpeechRecognition.GetText(&pwzText);

  

   m_strText += CString(pwzText);

   UpdateData(FALSE);

 

   return 0L;

}

7)為了調用Speech引擎,應該在Microsoft Visual C++編程環境中設置好相應的includelib設置:

 設置include路徑

    通過Project→Settings菜單項打開Project Settings對話框;

    點擊C/C++項;

    在Category下拉列表中選取Preprocessor

    在“Additional include directories”編輯框中輸入安裝Speech SDK的include的路徑,默認的路徑是C:/Program Files/Microsoft Speech SDK 5.1/Include

 設置lib信息

    通過Project→Settings菜單項打開Project Settings對話框;

    選擇Link項;

    在Category下拉列表中選取Input

    在“Additional library path”編輯框中輸入安裝Speech SDK的lib的路徑,默認的路徑是C:/Program Files/Microsoft Speech SDK 5.1/ Lib/i386

    將“sapi.lib輸入“Object/library modules所標識的編輯框中

8)編譯連接該項目,就可讓聽寫者開始聽寫了。

Stenotypist項目的所有源代碼都存放在附盤的/Source/Stenotypist目錄下。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM