科大訊飛 文字轉語音


1、wav音頻文件的格式

wav文件由文件頭采樣數據2部分組成。

文件頭又分為RIFF(Resource Interchange File Format)、WAVE文件標識段 和 聲音數據格式說明段組成。

各段的起始地址分別由RIFF標識符、WAVE標識符、以及波形格式標識符(FMT)標定。

(1)文件頭格式

注意:下面的地址是連續的

雖然上圖給出的數據標識符起始地址剛好是文件頭的末地址+1,但並不代表總是這樣。

因此,我們在讀取數據時最好是找到數據標識符,該標識符的4個字節剛好是'd'、‘a’、‘t’、‘a’。

(2)數據格式

 

 

wav結構體定義:

typedef struct _wave_pcm_hdr
{
    char    riff[4]; //資源交換文件標志
    int     size_8;  //從下個地址開始到文件結尾的字節數  
    char    wave[4]; //wave文件標識  
    char    fmt[4];  //波形格式標識  
    int     fmt_size;//過濾字節(一般為00000010H)  

    short int formate_tag;      //格式種類,值為1時,表示PCM線性編碼  
    short int channels;         //通道數,單聲道為1,雙聲道為2  
    int       samples_per_sec;  //采樣頻率  
    int       avg_bytes_per_sec;//數據傳輸率 (每秒字節=采樣頻率×每個樣本字節數)  
    short int block_align;      //塊對齊字節數 = channles * bit_samp / 8  
    short int bits_per_sample;  //bits per sample (又稱量化位數) 

    char  data[4];
    int   data_size;
}_wave_pcm_hdr;
_wave_pcm_hdr default_wave_hdr=
{
    { 'R', 'I', 'F', 'F' },
    0,
    { 'W', 'A', 'V', 'E' },
    { 'f', 'm', 't', ' ' },
    16,
    1,
    1,
    16000,
    32000,
    2,
    16,
    { 'd', 'a', 't', 'a' },
    0

};

 

代碼:

#include "stdafx.h"
#include "msp_cmn.h"
#include "msp_errors.h"
#include "qtts.h"
#include <string>
#include <Windows.h>
#ifdef _WIN64
#pragma comment (lib,"msc_x64.lib")
#else
#pragma comment (lib,"msc.lib")
#endif // _WIN64
 

#pragma comment(lib,"WinMM.lib")
typedef struct _wave_pcm_hdr
{
    char    riff[4]; //資源交換文件標志
    int     size_8;  //從下個地址開始到文件結尾的字節數  
    char    wave[4]; //wave文件標識  
    char    fmt[4];  //波形格式標識  
    int     fmt_size;//過濾字節(一般為00000010H)  

    short int formate_tag;      //格式種類,值為1時,表示PCM線性編碼  
    short int channels;         //通道數,單聲道為1,雙聲道為2  
    int       samples_per_sec;  //采樣頻率  
    int       avg_bytes_per_sec;//數據傳輸率 (每秒字節=采樣頻率×每個樣本字節數)  
    short int block_align;      //塊對齊字節數 = channles * bit_samp / 8  
    short int bits_per_sample;  //bits per sample (又稱量化位數) 

    char  data[4];
    int   data_size;
}_wave_pcm_hdr;
_wave_pcm_hdr default_wave_hdr=
{
    { 'R', 'I', 'F', 'F' },
    0,
    { 'W', 'A', 'V', 'E' },
    { 'f', 'm', 't', ' ' },
    16,
    1,
    1,
    16000,
    32000,
    2,
    16,
    { 'd', 'a', 't', 'a' },
    0

};

int _tmain(int argc, _TCHAR* argv[])
{
    const char* usr = NULL;
    const char* pwd = NULL;
    const char* lgi_param = "appid = 58610d7f";
    int ret = MSPLogin(usr, pwd, lgi_param);
    if (MSP_SUCCESS != ret)
    {
        printf("MSPLogin failed, error code is: %d", ret);
    }


    const char * ssb_param = "voice_name = xiaoyan, aue = speex-wb;7, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 2";
     ret = -1;
    const char * sessionID = QTTSSessionBegin(ssb_param, &ret);
    if (MSP_SUCCESS != ret)
    {
        printf("QTTSSessionBegin failed, error code is : %d", ret);
    }
    const char* src_text;
    char ch[1000];
    src_text=gets(ch);

   
    unsigned int text_len = strlen(src_text); //textLen參數為合成文本所占字節數
     ret = QTTSTextPut(sessionID, src_text, text_len, NULL);
    if (MSP_SUCCESS != ret)
    {
        printf("QTTSTextPut failed, error code is : %d", ret);
    }



    FILE* fp = fopen("112.wav", "wb");
    fwrite(&default_wave_hdr, sizeof(default_wave_hdr),1,fp);
    unsigned int audio_len = 0;
    int synth_status = 0;
    while (1)
    {
        const void * data = QTTSAudioGet(sessionID, &audio_len, &synth_status, &ret);
        if (NULL != data)
        {
            fwrite(data, audio_len, 1, fp);
            default_wave_hdr.data_size += audio_len;
        }
        if (MSP_TTS_FLAG_DATA_END == synth_status || MSP_SUCCESS != ret)
        {
            break;
        }
    }
    fclose(fp);
    default_wave_hdr.size_8 += default_wave_hdr.data_size + (sizeof(default_wave_hdr) - 8);
    fseek(fp, 4, 0);
    fwrite(&default_wave_hdr.size_8, sizeof(default_wave_hdr.size_8), 1, fp);
    fseek(fp, 40, 0);
    fwrite(&default_wave_hdr.data_size, sizeof(default_wave_hdr.data_size), 1, fp);
    fclose(fp);

    ret = QTTSSessionEnd(sessionID, "normal end");
    if (MSP_SUCCESS != ret)
    {
        printf("QTTSSessionEnd failed, error code is : %d", ret);
    }

    PlaySoundA("qweqwr.wav", NULL, SND_ALIAS);

    ret = MSPLogout();
    if (MSP_SUCCESS != ret)
    {
        printf("MSPLogout failed, error code is: %d", ret);
    }
    system("pause");
    return 0;
}

代碼

 

 

 

 

 

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM