1.將百度語音識別demo下載下來 並且用maven封裝成一個jar包 核心代碼如下
package com.baidu.speech.restapi.asrdemo; import com.alibaba.fastjson.JSONObject; import com.baidu.speech.restapi.asrdemo.common.ConnUtil; import com.baidu.speech.restapi.asrdemo.common.DemoException; import com.baidu.speech.restapi.asrdemo.common.TokenHolder; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.net.HttpURLConnection; import java.net.URL; public class AsrUtil { private final boolean METHOD_RAW = false; // 默認以json方式上傳音頻文件 // 填寫網頁上申請的appkey 如 $apiKey="g8eBUMSokVB1BHGmgxxxxxx" // private final String APP_KEY = "kVcnfD9iW2XVZSMaLMrtLYIz"; // my private static final String APP_KEY = "XXXXXXXXXXXXXX"; // 填寫網頁上申請的APP SECRET 如 $SECRET_KEY="94dc99566550d87f8fa8ece112xxxxx" // private final String SECRET_KEY = "O9o1O213UgG5LFn0bDGNtoRN3VWl2du6"; // my private static final String SECRET_KEY = "XXXXXXXXXXXX"; // 需要識別的文件 private static final String FILENAME = "16k.pcm"; // 文件格式, 支持pcm/wav/amr 格式,極速版額外支持m4a 格式 private static final String FORMAT = FILENAME.substring(FILENAME.length() - 3); private static String CUID = "1234567JAVA"; // 采樣率固定值 private static final int RATE = 16000; private static String URL; private static int DEV_PID; //private int LM_ID;//測試自訓練平台需要打開此注釋 private static String SCOPE; // 普通版 參數 // { // URL = "http://vop.baidu.com/server_api"; // 可以改為https // // 1537 表示識別普通話,使用輸入法模型。 其它語種參見文檔 // DEV_PID = 1537; // SCOPE = "audio_voice_assistant_get"; // } // 自訓練平台 參數 /*{ //自訓練平台模型上線后,您會看見 第二步:“”獲取專屬模型參數pid:8001,modelid:1234”,按照這個信息獲取 dev_pid=8001,lm_id=1234 DEV_PID = 8001; LM_ID = 1234; }*/ // 極速版 參數 static { URL = "http://vop.baidu.com/pro_api"; // 可以改為https DEV_PID = 80001; SCOPE = "brain_enhanced_asr"; } /* 忽略scope檢查,非常舊的應用可能沒有 { SCOPE = null; } */ public static String obtainAsrResult(byte[] bytes) throws IOException, DemoException { String resultJson = execute(bytes); System.out.println("識別結束:結果是:"); System.out.println(resultJson); return resultJson; } public static String execute(byte[] bytes) throws IOException, DemoException { TokenHolder holder = new TokenHolder(APP_KEY, SECRET_KEY, SCOPE); holder.resfresh(); String token = holder.getToken(); String result = null; result = runJsonPostMethod(token, bytes); return result; } private String runRawPostMethod(String token) throws IOException, DemoException { String url2 = URL + "?cuid=" + ConnUtil.urlEncode(CUID) + "&dev_pid=" + DEV_PID + "&token=" + token; //測試自訓練平台需要打開以下信息 //String url2 = URL + "?cuid=" + ConnUtil.urlEncode(CUID) + "&dev_pid=" + DEV_PID + "&lm_id="+ LM_ID + "&token=" + token; String contentTypeStr = "audio/" + FORMAT + "; rate=" + RATE; //System.out.println(url2); byte[] content = getFileContent(FILENAME); HttpURLConnection conn = (HttpURLConnection) new URL(url2).openConnection(); conn.setConnectTimeout(5000); conn.setRequestProperty("Content-Type", contentTypeStr); conn.setRequestMethod("POST"); conn.setDoOutput(true); conn.getOutputStream().write(content); conn.getOutputStream().close(); System.out.println("url is " + url2); System.out.println("header is " + "Content-Type :" + contentTypeStr); String result = ConnUtil.getResponseString(conn); return result; } public static String runJsonPostMethod(String token,byte[] bytes) throws DemoException, IOException { // byte[] content = getFileContent(FILENAME); String speech = base64Encode(bytes); JSONObject params = new JSONObject(); params.put("dev_pid", DEV_PID); //params.put("lm_id",LM_ID);//測試自訓練平台需要打開注釋 params.put("format", "wav"); params.put("rate", RATE); params.put("token", token); params.put("cuid", CUID); params.put("channel", "1"); params.put("len", bytes.length); params.put("speech", speech); // System.out.println(params.toString()); HttpURLConnection conn = (HttpURLConnection) new URL(URL).openConnection(); conn.setConnectTimeout(5000); conn.setRequestMethod("POST"); conn.setRequestProperty("Content-Type", "application/json; charset=utf-8"); conn.setDoOutput(true); conn.getOutputStream().write(params.toString().getBytes()); conn.getOutputStream().close(); String result = ConnUtil.getResponseString(conn); params.put("speech", "base64Encode(getFileContent(FILENAME))"); System.out.println("url is : " + URL); System.out.println("params is :" + params.toString()); return result; } private byte[] getFileContent(String filename) throws DemoException, IOException { File file = new File(filename); if (!file.canRead()) { System.err.println("文件不存在或者不可讀: " + file.getAbsolutePath()); throw new DemoException("file cannot read: " + file.getAbsolutePath()); } FileInputStream is = null; try { is = new FileInputStream(file); return ConnUtil.getInputStreamContent(is); } finally { if (is != null) { try { is.close(); } catch (IOException e) { e.printStackTrace(); } } } } private static String base64Encode(byte[] content) { /** Base64.Encoder encoder = Base64.getEncoder(); // JDK 1.8 推薦方法 String str = encoder.encodeToString(content); **/ char[] chars = Base64Util.encode(content); // 1.7 及以下,不推薦,請自行跟換相關庫 String str = new String(chars); return str; } }
調用 obtainAsrResult 方法即可獲得識別后字符串
2.使用 ffmpeg 對音頻進行轉碼
下載ffmpeg 並且將bin目錄設置到環境變量即可
代碼如下:
@Override public String aiAsrTest(MultipartFile file) { String r = null; try { byte[] bytes = file.getBytes(); // 對上傳文件進行轉碼處理 String path = "D:\\bwbd\\temp\\"; File dir = new File(path); if (dir == null || !dir.exists()) { dir.mkdirs(); } File file1 = new File(path + "16k1.wav"); file.transferTo(file1); File file2 = new File(path + "16k2.wav"); try { logger.info("========音頻格式轉換======"); Runtime runtime = Runtime.getRuntime(); String cutCmd = "ffmpeg -y -i " + file1 + " -acodec pcm_s16le -f s16le -ac 1 -ar 16000 " + file2; Process proce = runtime.exec(cutCmd); InputStream erro = proce.getErrorStream(); byte[] a = new byte[1024]; int j = 0; while ((j = erro.read(a)) > -1) { // logger.info(new String(a)); } } catch (Exception e) { e.printStackTrace(); logger.info("=========文件 "+ file + " 正在轉換出現異常"); } byte[] fileByteArray = FileUtil.getFileByteArray(file2); String result = AsrUtil.obtainAsrResult(fileByteArray); log.info("===ai接口返回:" + result); JSONObject jsonObject = JSONObject.parseObject(result); Object result1 = jsonObject.get("result"); if (null != result1) { List<String> strings = JSONArray.parseArray(result1.toString(), String.class); r = strings.get(0); r = new String(r.getBytes(),"utf-8"); } } catch (Exception e) { e.printStackTrace(); } return r; }
最后 附上vue實現音功能 並上傳到后台 獲得識別后字符串
1.methods域內定義以下方法
initAudio(){ // this.$nextTick(() => { // try { // // <!-- 檢查是否能夠調用麥克風 --> // window.AudioContext = window.AudioContext || window.webkitAudioContext; // navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia; // window.URL = window.URL || window.webkitURL; // // audio_context = new AudioContext; // console.log('navigator.getUserMedia ' + (navigator.getUserMedia ? 'available.' : 'not present!')); // } catch (e) { // alert('No web audio support in this browser!'); // } var _this = this; navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia; navigator.getUserMedia({audio: true}, function (stream) { let recorder = new HZRecorder(stream); _this.recorder = recorder; console.log('初始化完成'); }, function(e) { console.log('No live audio input: ' + e); }); // }) }, readyOriginal () { if (!this.isVoice) { // <!-- 開啟錄音 --> this.recorder && this.recorder.start(); this.isVoice = true } else { this.isVoice = false // 結束錄音 this.recorder && this.recorder.stop(); setTimeout(()=> { // <!-- 錄音上傳 --> https://localhost/bwbd/fg/aiAsrTest var mp3Blob = this.recorder.upload(); var fd = new FormData(); fd.append('file', mp3Blob); // this.$axios.post('https://localhost/api/webUser/insertUserLog',allJoin).then( // res=>{ // if(res.data.data===true){ // return // } // } // ) this.$axios.post('http://localhost/bwbd/fg/aiAsrTest',fd).then((res) => { // 這里做登錄攔截 if (res.data.status === 200) { console.log('保存成功'); console.log(res.data.data) } else { this.returnmsg = '上傳失敗' } }) },1000) } },
html元素調用 readyOriginal 方法 mounted 域內調用 initAudio 方法
記得引入js文件
import { HZRecorder} from '../utils/HZRecorder.js';
function HZRecorder(stream, config) { config = config || {}; config.sampleBits = config.sampleBits || 16; //采樣數位 8, 16 config.sampleRate = config.sampleRate || 16000; //采樣率16khz var context = new (window.webkitAudioContext || window.AudioContext)(); var audioInput = context.createMediaStreamSource(stream); var createScript = context.createScriptProcessor || context.createJavaScriptNode; var recorder = createScript.apply(context, [4096, 1, 1]); var audioData = { size: 0 //錄音文件長度 , buffer: [] //錄音緩存 , inputSampleRate: context.sampleRate //輸入采樣率 , inputSampleBits: 16 //輸入采樣數位 8, 16 , outputSampleRate: config.sampleRate //輸出采樣率 , oututSampleBits: config.sampleBits //輸出采樣數位 8, 16 , input: function (data) { this.buffer.push(new Float32Array(data)); this.size += data.length; } , compress: function () { //合並壓縮 //合並 var data = new Float32Array(this.size); var offset = 0; for (var i = 0; i < this.buffer.length; i++) { data.set(this.buffer[i], offset); offset += this.buffer[i].length; } //壓縮 var compression = parseInt(this.inputSampleRate / this.outputSampleRate); var length = data.length / compression; var result = new Float32Array(length); var index = 0, j = 0; while (index < length) { result[index] = data[j]; j += compression; index++; } return result; } , encodeWAV: function () { var sampleRate = Math.min(this.inputSampleRate, this.outputSampleRate); var sampleBits = Math.min(this.inputSampleBits, this.oututSampleBits); var bytes = this.compress(); var dataLength = bytes.length * (sampleBits / 8); var buffer = new ArrayBuffer(44 + dataLength); var data = new DataView(buffer); var channelCount = 1;//單聲道 var offset = 0; var writeString = function (str) { for (var i = 0; i < str.length; i++) { data.setUint8(offset + i, str.charCodeAt(i)); } } // 資源交換文件標識符 writeString('RIFF'); offset += 4; // 下個地址開始到文件尾總字節數,即文件大小-8 data.setUint32(offset, 36 + dataLength, true); offset += 4; // WAV文件標志 writeString('WAVE'); offset += 4; // 波形格式標志 writeString('fmt '); offset += 4; // 過濾字節,一般為 0x10 = 16 data.setUint32(offset, 16, true); offset += 4; // 格式類別 (PCM形式采樣數據) data.setUint16(offset, 1, true); offset += 2; // 通道數 data.setUint16(offset, channelCount, true); offset += 2; // 采樣率,每秒樣本數,表示每個通道的播放速度 data.setUint32(offset, sampleRate, true); offset += 4; // 波形數據傳輸率 (每秒平均字節數) 單聲道×每秒數據位數×每樣本數據位/8 data.setUint32(offset, channelCount * sampleRate * (sampleBits / 8), true); offset += 4; // 快數據調整數 采樣一次占用字節數 單聲道×每樣本的數據位數/8 data.setUint16(offset, channelCount * (sampleBits / 8), true); offset += 2; // 每樣本數據位數 data.setUint16(offset, sampleBits, true); offset += 2; // 數據標識符 writeString('data'); offset += 4; // 采樣數據總數,即數據總大小-44 data.setUint32(offset, dataLength, true); offset += 4; // 寫入采樣數據 if (sampleBits === 8) { for (var i = 0; i < bytes.length; i++, offset++) { var s = Math.max(-1, Math.min(1, bytes[i])); var val = s < 0 ? s * 0x8000 : s * 0x7FFF; val = parseInt(255 / (65535 / (val + 32768))); data.setInt8(offset, val, true); } } else { for (var i = 0; i < bytes.length; i++, offset += 2) { var s = Math.max(-1, Math.min(1, bytes[i])); data.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); } } return new Blob([data], { type: 'audio/wav' }); } }; //開始錄音 this.start = function () { audioInput.connect(recorder); recorder.connect(context.destination); } //停止 this.stop = function () { recorder.disconnect(); } //獲取音頻文件 this.getBlob = function () { this.stop(); return audioData.encodeWAV(); } //回放 this.play = function (audio) { var blob=this.getBlob(); // saveAs(blob, "F:/3.wav"); audio.src = window.URL.createObjectURL(this.getBlob()); } //上傳 this.upload = function () { return this.getBlob() } //音頻采集 recorder.onaudioprocess = function (e) { audioData.input(e.inputBuffer.getChannelData(0)); //record(e.inputBuffer.getChannelData(0)); } return this; } export { HZRecorder }