廢話就不多說了,開始。。。
最近在做一個文本轉語音TTS(Text to Speech)的第三方軟件封裝,應用的是海內語音技術龍頭安徽科大訊飛公司提供的離線引擎AiSound5.0,重要用於汽車導航用途。科大訊飛還提供了AiTalk用於語音識別,AiWrite用於手寫識別服務等。另外還有針對6種平台的SDK和開辟示例。
一、科大訊飛語音平台
科大訊飛目前有不少產品應用在移動終端上了,比如說用在手機上的訊飛語點,可方便語音撥打電話和發送短信,查氣候、股票等信息。
在上面訊飛語音雲這個網站可以找到科大訊飛針對6種平台的SDK,http://open.voicecloud.cn/download.php,包含Android、Iphone、Windows、Linux、Java、Flash這些平台的語音合成、語音識別和語音聽寫的開辟文檔和SDK下載,不過應用之前須要請求AppID(每一個語音應用程序須要一個Appid來獨一標識,您須要通過注冊帳號來獲得自己應用的Appid,未注冊的語音應用將無法正常獲取語音服務。),通過考核后就可以下載響應的開辟文檔和SDK並用於自己的軟件系統中了。
二 、科大訊飛語音合成、識別-在線文檔和應用程序示例網址
從這里可以查看在線的開辟文檔http://open.voicecloud.cn/developer.php?column=aW50ZV9zZGs%3D
Android平台的開辟文檔和應用程序示例:http://open.voicecloud.cn/developer.php?category=YW5kcm9pZA%3D%3D&column=ZG9jdW1lbnQ%3D&type=d2lkZ2V0#a38
Windows和Linux平台的開辟文檔和程序示例:http://open.voicecloud.cn/developer.php?category=b3RoZXI%3D&column=ZG9jdW1lbnQ%3D&type=YXBp
三、科大訊飛語音產品的移動應用
這里有一個科大訊飛的在線語音合成系統演示程序:ViViVoice 2.1在線演示系統
http://www.iflytek.com/TtsDemo/viviVoiceShow.aspx
1、訊飛語點的應用,官網下載網址如下:http://yudian.voicecloud.cn/yudian.htm,支持Android和Iphone兩大平台。
2、訊飛輸入法應用:http://ime.voicecloud.cn/index.html
3、訊飛語音輸入法應用:http://kouxun.voicecloud.cn/index.html
4、開辟者社區:http://open.voicecloud.cn/index.php
四、官網的Windows平台-語音合成和識別示例代碼
網址如下:http://open.voicecloud.cn/developer.php?category=b3RoZXI%3D&column=c2FtcGxl&type=YXBp
以Windows下的開辟為例,給出了語音合成、語音識別和語音聽寫的編程示例。
1、語音合成開辟例程
#include <stdio.h>
#include <string.h>
#include "qtts.h"
#define END_SYNTH( reason ) \
{ \
ret = QTTSSessionEnd( session_id, #reason ); \
if( 0 != ret ) \
{ \
printf("QTTSSessionEnd failed, error code is %d", ret ); \
} \
\
ret = QTTSFini(); \
if( 0 != ret ) \
{ \
printf("QTTSFini failed, error code is %d", ret ); \
} \
}
int main()
{
const char* configs = NULL;
const char* session_id = NULL;
const char* synth_params = NULL;
const char* synth_text = NULL;
unsigned int text_len = 0;
const char* synth_speech = NULL;
unsigned int synth_speech_len = 0;
FILE* f_speech = NULL;
int synth_status = 0;
int ret = 0;
printf( "===================================================================\n"
" Mobile Speech Platform 2.0 Client SDK Demo for TTS \n"
"===================================================================\n" );
/* 初始化 */
configs = "server_url=dev.voicecloud.cn/index.htm, timeout=10000, coding_libs=speex.dll";
ret = QTTSInit( configs );
if( 0 != ret )
{
printf( "QTTSInit failed, error code is %d", ret );
return -1;
}
/* 開始一路會話,應用會話模式 */
synth_params = "ssm=1, auf=audio/L16;rate=16000, aue=speex-wb;7, ent=intp65";
session_id = QTTSSessionBegin( synth_params, &ret );
if( 0 != ret )
{
printf( "QTTSSessionBegin failed, error code is %d", ret );
return -1;
}
/* 寫入合成文本 */
synth_text = "訊飛語音雲為您提供了最新最好的語音技術休會,"
"我們在互聯網上開放科大訊飛最新研發的各種語音技術,"
"包含世界領先的語音合成技術、語音識別技術、聲紋識別技術等。";
text_len = strlen( synth_text );
ret = QTTSTextPut( session_id, synth_text, text_len, NULL );
if( 0 != ret )
{
printf( "QTTSTextPut failed, error code is %d", ret );
END_SYNTH( QTTSTextPut failed! );
return -1;
}
/* 獲取合成音頻 */
f_speech = fopen( "synth_speech.pcm", "wb" );
if( NULL == f_speech )
{
printf( "Can not open file \"synth_speech.pcm\"" );
END_SYNTH( open file );
return -1;
}
while( TTS_FLAG_DATA_END != synth_status )
{
synth_speech = QTTSAudioGet( session_id, &synth_speech_len, &synth_status, &ret );
if( 0 != ret )
{
printf( "QTTSAudioGet failed, error code is: %d", ret );
break;
}
printf( "QTTSAudioGet ok, speech length = %d\n", synth_speech_len );
if( NULL != synth_speech && 0 != synth_speech_len )
{
fwrite( synth_speech, 1, synth_speech_len, f_speech );
}
}
fclose( f_speech );
/* 結束會話,釋放資源 */
ret = QTTSSessionEnd( session_id, "normal end" );
if( NULL == f_speech )
{
printf( "QTTSSessionEnd failed, error code is %d", ret );
}
session_id = NULL;
ret = QTTSFini();
if( 0 != ret )
{
printf( "QTTSFini failed, error code is %d", ret );
}
return 0;
}
2、語音識別開辟例程
#include <stdio.h>
#include <string.h>
#include <Windows.h>
#include "qisr.h"
#define END_RECOG( reason ) \
{ \ ret = QISRSessionEnd( session_id, #reason ); \
if( 0 != ret ) \
{ \ printf("QISRSessionEnd failed, error code is %d", ret ); \
} \
\
ret = QISRFini(); \
if( 0 != ret ) \
{ \
printf("QISRFini failed, error code is %d", ret ); \
} \
}
#define BLOCK_LEN 5 * 1024
int main()
{
const char* configs = NULL;
const char* session_id = NULL;
const char* recog_grammar = NULL;
const char* recog_params = NULL;
char recog_audio[ BLOCK_LEN ];
FILE* f_speech = NULL;
int audio_status = 0;
int ep_status = 0;
int rec_status = 0;
int rslt_status = 0;
const char* rec_result = NULL;
unsigned int audio_len = 0;
int ret = 0;
printf( "===================================================================\n"
" Mobile Speech Platform 2.0 Client SDK Demo for IAT \n"
"===================================================================\n" );
/* 初始化 */
configs = "server_url=dev.voicecloud.cn/index.htm, coding_libs=speex.dll, vad_enable=true";
ret = QISRInit( configs );
if( 0 != ret )
{
printf( "QISRInit failed, error code is %d\n", ret );
return -1;
}
/* 開始一路會話,應用會話模式,應用引擎內置的語法停止識別 */
recog_grammar = "builtin:grammar/../search/location.abnf?language=zh-cn";
recog_params = "ssm=1, aue=speex-wb;7, auf=audio/L16;rate=16000, “
”ent=map, vad_speech_tail=900";
session_id = QISRSessionBegin( recog_grammar, recog_params, &ret );
if( 0 != ret )
{
printf( "QISRSessionBegin failed, error code is %d\n", ret );
return -1;
}
/* 打開用來停止識別的語音文件,用戶可以采取其他的獲取音頻的方式比如實時采集音頻 */
f_speech = fopen( "sxk_16k.pcm", "rb" );
if( NULL == f_speech )
{
printf( "Can not open file \"sxk_16k.pcm\"\n" );
END_RECOG( open file );
return -1;
}
/* 發送音頻數據,獲取語音聽寫結果 */
while( ISR_AUDIO_SAMPLE_LAST != audio_status )
{
audio_len = fread( recog_audio, 1, BLOCK_LEN, f_speech );
audio_status = ( audio_len == BLOCK_LEN ) ?
ISR_AUDIO_SAMPLE_CONTINUE :ISR_AUDIO_SAMPLE_LAST;
ret = QISRAudioWrite( session_id, recog_audio, audio_len
, audio_status, &ep_status, &rslt_status );
if( 0 != ret )
{
printf( "QISRSessionBegin failed, error code is %d\n", ret );
rslt_status = ISR_REC_STATUS_SPEECH_COMPLETE;
break;
}
printf( "write audio data ok! len=%d, status=%d\n", audio_len, audio_status );
/* 已經有結果緩存在MSC中了,可以獲取了 */
if( ISR_REC_STATUS_SUCCESS == rslt_status )
{
rec_result = QISRGetResult( session_id, &rslt_status, 5000, &ret );
if( 0 != ret )
{
printf( "QISRGetResult failed, error code is %d\n", ret );
rslt_status = ISR_REC_STATUS_SPEECH_COMPLETE;
break;
}
if( NULL != rec_result )
{ printf( "got a result: %s\n", rec_result );
}
/* 全部結果已經取完了 */
if( ISR_REC_STATUS_SPEECH_COMPLETE == rslt_status )
{
printf( "the result has been got completely!\n" );
break;
}
}
/* 檢測到音頻后端點,結束音頻發送 */
if( ISR_EP_AFTER_SPEECH == ep_status )
{ printf( "end point of speech has been detected!\n" );
break;
}
Sleep( 160 );
}
fclose( f_speech );
/* 獲取余下的識別結果 */
while( ISR_REC_STATUS_SPEECH_COMPLETE != rslt_status )
{
rec_result = QISRGetResult( session_id, &rslt_status, 5000, &ret );
if( 0 != ret )
{
printf( "QISRGetResult failed, error code is: %d\n", ret );
break;
}
if( NULL != rec_result )
{
printf( "got a result: %s\n", rec_result );
}
/* sleep一下很有必要,防止MSC端無緩存的識別結果時浪費CPU資源 */
Sleep( 200 );
}
/* 結束會話,釋放資源 */
ret = QISRSessionEnd( session_id, "normal end" );
if( NULL == f_speech )
{
printf( "QISRSessionEnd failed, error code is %d\n", ret );
}
session_id = NULL;
ret = QISRFini();
if( 0 != ret )
{
printf( "QISRFini failed, error code is %d\n", ret );
}
return 0;
}
3、語音聽寫開辟例程
#include <stdio.h>
#include <string.h>
#include <Windows.h>
#include "qisr.h"
#define END_RECOG( reason ) \
{ \
ret = QISRSessionEnd( session_id, #reason ); \
if( 0 != ret ) \
{ \
printf("QISRSessionEnd failed, error code is %d", ret ); \
} \
\
ret = QISRFini(); \
if( 0 != ret ) \
{ \
printf("QISRFini failed, error code is %d", ret ); \
} \
}
#define BLOCK_LEN 5 * 1024
int main()
{
const char* configs = NULL;
const char* session_id = NULL;
const char* recog_params = NULL;
char recog_audio[ BLOCK_LEN ];
FILE* f_speech = NULL;
int audio_status = 0;
int ep_status = 0;
int rec_status = 0;
int rslt_status = 0;
const char* rec_result = NULL;
unsigned int audio_len = 0;
int ret = 0;
printf( "===================================================================\n"
" Mobile Speech Platform 2.0 Client SDK Demo for IAT \n"
"===================================================================\n" );
/* 初始化 */
configs = "server_url=dev.voicecloud.cn/index.htm, coding_libs=speex.dll, vad_enable=true";
ret = QISRInit( configs );
if( 0 != ret )
{
printf( "QISRInit failed, error code is %d\n", ret );
return -1;
}
/* 開始一路會話 */
recog_params = "ssm=1, sub=iat, aue=speex-wb;7, auf=audio/L16;rate=16000, “
“ent=sms16k, rst=plain, vad_speech_tail=1500";
session_id = QISRSessionBegin( NULL, recog_params, &ret );
if( 0 != ret )
{
printf( "QISRSessionBegin failed, error code is %d\n", ret );
return -1;
}
/* 打開用來停止識別的語音文件,用戶可以采取其他的獲取音頻的方式比如實時采集音頻 */
f_speech = fopen( "IAT_16KPCM_10s_0.pcm", "rb" );
if( NULL == f_speech )
{
printf( "Can not open file \"IAT_16KPCM_10s_0.pcm\"\n" );
END_RECOG( open file );
return -1;
}
/* 發送音頻數據,獲取語音聽寫結果 */
while( ISR_AUDIO_SAMPLE_LAST != audio_status )
{
audio_len = fread( recog_audio, 1, BLOCK_LEN, f_speech );
audio_status = ( audio_len == BLOCK_LEN ) ?
ISR_AUDIO_SAMPLE_CONTINUE :ISR_AUDIO_SAMPLE_LAST;
ret = QISRAudioWrite( session_id, recog_audio, audio_len, audio_status, &ep_status, &rslt_status );
if( 0 != ret )
{
printf( "QISRSessionBegin failed, error code is %d\n", ret );
rslt_status = ISR_REC_STATUS_SPEECH_COMPLETE;
break;
}
printf( "write audio data ok! len=%d, status=%d\n", audio_len, audio_status );
/* 已經有結果緩存在MSC中了,可以獲取了 */
if( ISR_REC_STATUS_SUCCESS == rslt_status )
{ rec_result = QISRGetResult( session_id, &rslt_status, 5000, &ret );
if( 0 != ret )
{
printf( "QISRGetResult failed, error code is %d\n", ret );
rslt_status = ISR_REC_STATUS_SPEECH_COMPLETE;
break;
}
if( NULL != rec_result )
{
printf( "got a result: %s\n", rec_result );
}
/* 全部結果已經取完了 */
if( ISR_REC_STATUS_SPEECH_COMPLETE == rslt_status )
{
printf( "the result has been got completely!\n" );
break;
}
}
/* 檢測到音頻后端點,結束音頻發送 */
if( ISR_EP_AFTER_SPEECH == ep_status )
{
printf( "end point of speech has been detected!\n" );
break;
}
Sleep( 160 );
}
fclose( f_speech );
/* 獲取余下的識別結果 */
while( ISR_REC_STATUS_SPEECH_COMPLETE != rslt_status )
{
rec_result = QISRGetResult( session_id, &rslt_status, 5000, &ret );
if( 0 != ret )
{
printf( "QISRGetResult failed, error code is: %d\n", ret );
break;
}
if( NULL != rec_result )
{
printf( "got a result: %s\n", rec_result );
}
/* sleep一下很有必要,防止MSC端無緩存的識別結果時浪費CPU資源 */
Sleep( 200 );
}
/* 結束會話,釋放資源 */
ret = QISRSessionEnd( session_id, "normal end" );
if( NULL == f_speech )
{
printf( "QISRSessionEnd failed, error code is %d\n", ret );
}
session_id = NULL;
ret = QISRFini();
if( 0 != ret )
{
printf( "QISRFini failed, error code is %d\n", ret );
}
return 0;
}
五、Tizen(泰澤)提供的TTS和STT語音合成、語音識別接口
泰澤是三星和英特爾合作開辟的一款操作系統。
1、Tizen SDK官網:https://developer.tizen.org/downloads/tizen-sdk
2、Tizen項目:https://review.tizen.org/git/
3、tts-api:https://review.tizen.org/git/?p=framework/api/tts-api.git;a=summary

其對應的tts.h頭文件api接口在線網址為:https://review.tizen.org/git/?p=framework/api/tts-api.git;a=blob;f=include/tts.h;h=636470d923555a30d164cc09ceff841b72187e20;hb=98aee0bab00a418af162a0314ef931f8fd620892
4、stt-api:https://review.tizen.org/git/?p=framework/api/stt-api.git;a=summary
文章結束給大家分享下程序員的一些笑話語錄: 問路
有一個駕駛熱氣球的人發現他迷路了。他降低了飛行的高度,並認出了地面 上的一個人。他繼續下降高度並對着那個人大叫,“打擾一下,你能告訴我我 在哪嗎?”
下面那個人說:“是的。你在熱氣球里啊,盤旋在 30 英尺的空中”。
熱氣球上的人說:“你一定是在 IT 部門做技術工作”。
“沒錯”,地面上的人說到,“你是怎么知道的?”
“呵呵”,熱氣球上的人說,“你告訴我的每件事在技術上都是對的,但對都沒 有用”。
地面上的人說,“你一定是管理層的人”。
“沒錯”,熱氣球上的人說,“可是你是怎么知道的?”
“呵呵”,地面上的那人說到,“你不知道你在哪里,你也不知道你要去哪,你 總希望我能幫你。你現在和我們剛見面時還在原來那個地方,但現在卻是我 錯了”。
--------------------------------- 原創文章 By
語音和語音合成
---------------------------------
