C#文字轉語音


  之前的項目中要用到把用戶輸入的文字轉換為語音文件再播放出來,當時也是一頭霧水沒搞明白怎么弄,查了好多資料有的是使用在線合成語音文件,這個就有局限性了,不能離線合成,所以就放棄了。后來發現訊飛是有離線語音合成包了,但是官網沒有C#調用的離線包,windows平台只有C++寫的包,這個就比較坑了。后來就想到用C#調用C++dll的方式看能不能實現,參考了訊飛官網和論壇中大神們寫的代碼,最終還是實現了C#文字轉換語音的功能。現在把代碼貼出來,小伙們們可以直接使用,不過是要去訊飛官網購買離線包的。

public class iFlyTTS
    {
       /// <summary>
        /// 引入TTSDll函數的類
        /// </summary>
        private class TTSDll
        {           
            #region TTS dll import
            [DllImport("msc.dll", CallingConvention = CallingConvention.StdCall)]
            public static extern int MSPLogin(string one, string two, string configs);
            [DllImport("msc.dll", CallingConvention = CallingConvention.StdCall)]
            public static extern void MSPLogout();

            [DllImport("msc.dll", CallingConvention = CallingConvention.StdCall)]
            public static extern IntPtr QTTSSessionBegin(string _params, ref int errorCode);

            [DllImport("msc.dll", CallingConvention = CallingConvention.StdCall)]
            public static extern int QTTSTextPut(string sessionID, string textString, uint textLen, string _params);

            [DllImport("msc.dll", CallingConvention = CallingConvention.StdCall)]
            public static extern IntPtr QTTSAudioGet(string sessionID, ref int audioLen, ref SynthStatus synthStatus, ref int errorCode);

            [DllImport("msc.dll", CallingConvention = CallingConvention.StdCall)]
            public static extern IntPtr QTTSAudioInfo(string sessionID);

            [DllImport("msc.dll", CallingConvention = CallingConvention.StdCall)]
            public static extern int QTTSSessionEnd(string sessionID, string hints);

            [DllImport("msc.dll", CallingConvention = CallingConvention.Cdecl)]
            public static extern int QTTSGetParam(string sessionID, string paramName, string paramValue, ref uint valueLen);
            #endregion
        }
        private string sessionID;
     
        public iFlyTTS(string configs)
        {
            int ret = TTSDll.MSPLogin(null, null, configs);
            if (ret != 0) throw new Exception("初始化TTS引擎錯誤,錯誤代碼:" + ret);
        }

        public void MultiSpeek(string SpeekText, string outWaveFlie = null)
        {
            MemoryStream mStream = new MemoryStream();
            try
            {
                speek(SpeekText, ref mStream);
                WAVE_Header header = getWave_Header((int)mStream.Length - 44);     //創建wav文件頭
                byte[] headerByte = StructToBytes(header);                         //把文件頭結構轉化為字節數組                      //寫入文件頭
                mStream.Position = 0;                                                        //定位到文件頭
                mStream.Write(headerByte, 0, headerByte.Length);                             //寫入文件頭
                if (outWaveFlie != null)
                {
                    FileStream ofs = new FileStream(outWaveFlie, FileMode.Create);
                    mStream.WriteTo(ofs);
                    ofs.Close();
                    ofs = null;
                }
            }
            catch (Exception ex)
            {
                
            }
            finally
            {
                TTSDll.MSPLogout();
                mStream.Close();
                mStream = null;
            }
        }

        /// <summary>
        /// 把文本轉換成聲音,寫入指定的內存流
        /// </summary>
        /// <param name="SpeekText">要轉化成語音的文字</param>
        /// <param name="mStream">合成結果輸出的音頻流</param>
        private void speek(string SpeekText, ref MemoryStream mStream)
        {
            string szParams = "engine_type = local, voice_name = xiaoyan, text_encoding = GB2312, tts_res_path = fo|res\\tts\\xiaoyan.jet;fo|res\\tts\\common.jet, sample_rate = 8000,aue=speex-wb;7, speed = 50, volume = 50, pitch = 50, rdn = 2";
            int ret = 0;
            try
            {
                sessionID = Ptr2Str(TTSDll.QTTSSessionBegin(szParams, ref ret));           
                if (ret != 0) throw new Exception("初始化TTS引會話錯誤,錯誤代碼:" + ret);
                ret = TTSDll.QTTSTextPut(sessionID, SpeekText, (uint)Encoding.Default.GetByteCount(SpeekText), string.Empty);
                if (ret != 0) throw new Exception("向服務器發送數據,錯誤代碼:" + ret);
                int audio_len = 0;
                SynthStatus synth_status = SynthStatus.TTS_FLAG_STILL_HAVE_DATA;
                MemoryStream fs = mStream;
                IntPtr audio_data;// Marshal.AllocHGlobal(1024);
                Thread.Sleep(1000);  
                while (synth_status != SynthStatus.TTS_FLAG_DATA_END)
                {
                    audio_data = TTSDll.QTTSAudioGet(sessionID, ref audio_len, ref synth_status, ref ret); // sdh 這個是會花費時間的
                    byte[] tmpArray = Encoding.Default.GetBytes(SpeekText);
                    if (ret != 0) break;
                    byte[] data = new byte[audio_len];
                    if (audio_len > 0) Marshal.Copy(audio_data, data, 0, audio_len);
                    fs.Write(data, 0, data.Length);
                }
            }
            catch (Exception ex)
            {
               
            }
            finally
            {
                ret = TTSDll.QTTSSessionEnd(sessionID, "");
                if (ret != 0) throw new Exception("結束TTS會話錯誤,錯誤代碼:" + ret);
            }
        }

        private struct WAVE_Header
        {
            public int RIFF_ID;           //4 byte , 'RIFF'
            public int File_Size;         //4 byte , 文件長度
            public int RIFF_Type;         //4 byte , 'WAVE'

            public int FMT_ID;            //4 byte , 'fmt'
            public int FMT_Size;          //4 byte , 數值為16或18,18則最后又附加信息
            public short FMT_Tag;          //2 byte , 編碼方式,一般為0x0001
            public ushort FMT_Channel;     //2 byte , 聲道數目,1--單聲道;2--雙聲道
            public int FMT_SamplesPerSec;//4 byte , 采樣頻率
            public int AvgBytesPerSec;   //4 byte , 每秒所需字節數,記錄每秒的數據量
            public ushort BlockAlign;      //2 byte , 數據塊對齊單位(每個采樣需要的字節數)
            public ushort BitsPerSample;   //2 byte , 每個采樣需要的bit數

            public int DATA_ID;           //4 byte , 'data'
            public int DATA_Size;         //4 byte , 
        }

        /// <summary>
        /// 根據數據段的長度,生產文件頭
        /// </summary>
        /// <param name="data_len">音頻數據長度</param>
        /// <returns>返回wav文件頭結構體</returns>
        WAVE_Header getWave_Header(int data_len)
        {
            WAVE_Header wav_Header = new WAVE_Header();
            wav_Header.RIFF_ID = 0x46464952;        //字符RIFF
            wav_Header.File_Size = data_len + 36;
            wav_Header.RIFF_Type = 0x45564157;      //字符WAVE

            wav_Header.FMT_ID = 0x20746D66;         //字符fmt
            wav_Header.FMT_Size = 16;
            wav_Header.FMT_Tag = 0x0001;
            wav_Header.FMT_Channel = 1;             //單聲道
            wav_Header.FMT_SamplesPerSec = 8000;   //采樣頻率
            wav_Header.AvgBytesPerSec = 16000;      //每秒所需字節數
            wav_Header.BlockAlign = 2;              //每個采樣1個字節
            wav_Header.BitsPerSample = 16;           //每個采樣8bit

            wav_Header.DATA_ID = 0x61746164;        //字符data
            wav_Header.DATA_Size = data_len;

            return wav_Header;
        }

        /// <summary>
        /// 把結構體轉化為字節序列
        /// </summary>
        /// <param name="structure">被轉化的結構體</param>
        /// <returns>返回字節序列</returns>
        Byte[] StructToBytes(Object structure)
        {
            Int32 size = Marshal.SizeOf(structure);
            IntPtr buffer = Marshal.AllocHGlobal(size);
            try
            {
                Marshal.StructureToPtr(structure, buffer, false);
                Byte[] bytes = new Byte[size];
                Marshal.Copy(buffer, bytes, 0, size);
                return bytes;
            }
            finally
            {
                Marshal.FreeHGlobal(buffer);
            }
        }

        /// <summary>
        /// 指針轉字符串
        /// </summary>
        /// <param name="p">指向非托管代碼字符串的指針</param>
        /// <returns>返回指針指向的字符串</returns>
        public static string Ptr2Str(IntPtr p)
        {
            List<byte> lb = new List<byte>();
            while (Marshal.ReadByte(p) != 0)
            {
                lb.Add(Marshal.ReadByte(p));
                p = p + 1;
            }
            byte[] bs = lb.ToArray();
            return Encoding.Default.GetString(lb.ToArray());
        }
    }

  使用的時候直接調用下面的方法就行,如果要改變音色或者播放速度的都可以配置的,這個小伙伴們自己研究下,我調試了好幾種感覺就現在的挺好。

 private void 文字轉語音(String sourcewav)
        {
            try
            {
                //這個appid 是指在訊飛官網購買離線包的appid
                string login_params = "appid = ******, work_dir = .";
                iFlyTTS tts = new iFlyTTS(login_params);
          //填寫上自己要保存的路徑
                string strPath = System.IO.Path.Combine(***,  sourcewav + ".wav");
                tts.MultiSpeek(sourcewav.Trim(), strPath);
            }
            catch (Exception e)
            {
               
            }
        }

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM