1、wav音頻文件的格式
wav文件由文件頭和采樣數據2部分組成。
文件頭又分為RIFF(Resource Interchange File Format)、WAVE文件標識段 和 聲音數據格式說明段組成。
各段的起始地址分別由RIFF標識符、WAVE標識符、以及波形格式標識符(FMT)標定。
(1)文件頭格式
注意:下面的地址是連續的
雖然上圖給出的數據標識符起始地址剛好是文件頭的末地址+1,但並不代表總是這樣。
因此,我們在讀取數據時最好是找到數據標識符,該標識符的4個字節剛好是'd'、‘a’、‘t’、‘a’。
(2)數據格式
wav結構體定義:
typedef struct _wave_pcm_hdr { char riff[4]; //資源交換文件標志 int size_8; //從下個地址開始到文件結尾的字節數 char wave[4]; //wave文件標識 char fmt[4]; //波形格式標識 int fmt_size;//過濾字節(一般為00000010H) short int formate_tag; //格式種類,值為1時,表示PCM線性編碼 short int channels; //通道數,單聲道為1,雙聲道為2 int samples_per_sec; //采樣頻率 int avg_bytes_per_sec;//數據傳輸率 (每秒字節=采樣頻率×每個樣本字節數) short int block_align; //塊對齊字節數 = channles * bit_samp / 8 short int bits_per_sample; //bits per sample (又稱量化位數) char data[4]; int data_size; }_wave_pcm_hdr; _wave_pcm_hdr default_wave_hdr= { { 'R', 'I', 'F', 'F' }, 0, { 'W', 'A', 'V', 'E' }, { 'f', 'm', 't', ' ' }, 16, 1, 1, 16000, 32000, 2, 16, { 'd', 'a', 't', 'a' }, 0 };
代碼:
#include "stdafx.h" #include "msp_cmn.h" #include "msp_errors.h" #include "qtts.h" #include <string> #include <Windows.h> #ifdef _WIN64 #pragma comment (lib,"msc_x64.lib") #else #pragma comment (lib,"msc.lib") #endif // _WIN64 #pragma comment(lib,"WinMM.lib") typedef struct _wave_pcm_hdr { char riff[4]; //資源交換文件標志 int size_8; //從下個地址開始到文件結尾的字節數 char wave[4]; //wave文件標識 char fmt[4]; //波形格式標識 int fmt_size;//過濾字節(一般為00000010H) short int formate_tag; //格式種類,值為1時,表示PCM線性編碼 short int channels; //通道數,單聲道為1,雙聲道為2 int samples_per_sec; //采樣頻率 int avg_bytes_per_sec;//數據傳輸率 (每秒字節=采樣頻率×每個樣本字節數) short int block_align; //塊對齊字節數 = channles * bit_samp / 8 short int bits_per_sample; //bits per sample (又稱量化位數) char data[4]; int data_size; }_wave_pcm_hdr; _wave_pcm_hdr default_wave_hdr= { { 'R', 'I', 'F', 'F' }, 0, { 'W', 'A', 'V', 'E' }, { 'f', 'm', 't', ' ' }, 16, 1, 1, 16000, 32000, 2, 16, { 'd', 'a', 't', 'a' }, 0 }; int _tmain(int argc, _TCHAR* argv[]) { const char* usr = NULL; const char* pwd = NULL; const char* lgi_param = "appid = 58610d7f"; int ret = MSPLogin(usr, pwd, lgi_param); if (MSP_SUCCESS != ret) { printf("MSPLogin failed, error code is: %d", ret); } const char * ssb_param = "voice_name = xiaoyan, aue = speex-wb;7, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 2"; ret = -1; const char * sessionID = QTTSSessionBegin(ssb_param, &ret); if (MSP_SUCCESS != ret) { printf("QTTSSessionBegin failed, error code is : %d", ret); } const char* src_text; char ch[1000]; src_text=gets(ch); unsigned int text_len = strlen(src_text); //textLen參數為合成文本所占字節數 ret = QTTSTextPut(sessionID, src_text, text_len, NULL); if (MSP_SUCCESS != ret) { printf("QTTSTextPut failed, error code is : %d", ret); } FILE* fp = fopen("112.wav", "wb"); fwrite(&default_wave_hdr, sizeof(default_wave_hdr),1,fp); unsigned int audio_len = 0; int synth_status = 0; while (1) { const void * data = QTTSAudioGet(sessionID, &audio_len, &synth_status, &ret); if (NULL != data) { fwrite(data, audio_len, 1, fp); default_wave_hdr.data_size += audio_len; } if (MSP_TTS_FLAG_DATA_END == synth_status || MSP_SUCCESS != ret) { break; } } fclose(fp); default_wave_hdr.size_8 += default_wave_hdr.data_size + (sizeof(default_wave_hdr) - 8); fseek(fp, 4, 0); fwrite(&default_wave_hdr.size_8, sizeof(default_wave_hdr.size_8), 1, fp); fseek(fp, 40, 0); fwrite(&default_wave_hdr.data_size, sizeof(default_wave_hdr.data_size), 1, fp); fclose(fp); ret = QTTSSessionEnd(sessionID, "normal end"); if (MSP_SUCCESS != ret) { printf("QTTSSessionEnd failed, error code is : %d", ret); } PlaySoundA("qweqwr.wav", NULL, SND_ALIAS); ret = MSPLogout(); if (MSP_SUCCESS != ret) { printf("MSPLogout failed, error code is: %d", ret); } system("pause"); return 0; }