使用python實現AI機器人聊天工具其實還是蠻簡單的,大致原理如下:
1、使用python的SpeechRecognition模塊進行語音錄音,代碼如下:
1 import speech_recognition as sr 2 # Use SpeechRecognition to record 使用語音識別包錄制音頻 3 def my_record(rate=16000): 4 r = sr.Recognizer() 5 with sr.Microphone(sample_rate=rate) as source: 6 print("請講話(正在傾聽中......):") 7 audio = r.listen(source) 8 print("回答思考中.....") 9 with open("temp.wav", "wb") as f: 10 f.write(audio.get_wav_data())
2、使用百度的AI開發平台進行語音識別,具體教程可百度,地址:https://console.bce.baidu.com/ai/_=1602817929103&fromai=1#/ai/speech/overview/index
需要注冊一個應用,千萬要記住,一定要點擊免費領取,不然調用的時候會報錯。
python使用百度AI智能平台的代碼如下(將之前錄音好的文件傳輸給API平台,返回識別后的文本):
1 APP_ID = "22817840" #百度應用的APP_ID 2 API_KEY ="ndQCCddCNisGHe87G5agXsGm" #key 3 SECRET_KEY = "o0pLLR6DIPEnBXtOvpWXK3QxruFn1G3N" #secret_key 4 client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) 5 path = 'temp.wav' 6 # 將語音轉文本STT 7 def listen(): 8 # 讀取錄音文件 9 with open(path, 'rb') as fp: 10 voices = fp.read() 11 try: 12 # 參數dev_pid:1536普通話(支持簡單的英文識別)、1537普通話(純中文識別)、1737英語、1637粵語、1837四川話、1936普通話遠場 13 result = client.asr(voices, 'wav', 16000, {'dev_pid': 1537, }) 14 # print(result) 15 result_text = result["result"][0] 16 print("you said: " + result_text) 17 return result_text 18 except KeyError: 19 print("KeyError")
3、將文本內容發送給圖靈機器人平台,進行對話,圖靈機器人平台會返回對話的文本,如何使用圖靈機器人,請百度,地址:http://www.tuling123.com/member/robot/2304329/center/frame.jhtml?page=1&child=0
對應的python代碼如下:
1 # 圖靈機器人的API_KEY、API_URL 2 turing_api_key = "your turing_api_key" 3 api_url = "http://openapi.tuling123.com/openapi/api/v2" # 圖靈機器人api網址 4 headers = {'Content-Type': 'application/json;charset=UTF-8'} 5 6 7 # 圖靈機器人回復 8 def Turing(text_words=""): 9 req = { 10 "reqType": 0, 11 "perception": { 12 "inputText": { 13 "text": text_words 14 }, 15 16 "selfInfo": { 17 "location": { 18 "city": "北京", 19 "province": "北京", 20 "street": "車公庄西大街" 21 } 22 } 23 }, 24 "userInfo": { 25 "apiKey": "c81a2d3f03c6400f968787653fa42f68", # 你的圖靈機器人apiKey 26 "userId": "Nieson" # 用戶唯一標識(隨便填, 非密鑰) 27 } 28 } 29 req["perception"]["inputText"]["text"] = text_words 30 response = requests.request("post", api_url, json=req, headers=headers) 31 response_dict = json.loads(response.text) 32 33 result = response_dict["results"][0]["values"]["text"] 34 35 return result
4、將圖靈機器人返回的文本轉行為語音然后通過python的playsound模塊播放出來,python自帶的pyttsx3模塊可以輕松實現,但是效果不好,這里我使用的阿里智能平台,當然也可以用百度的,主要是阿里的語音種類多一點。具體的python的代碼如下:
1 class AccessToken: 2 @staticmethod 3 def _encode_text(text): 4 encoded_text = parse.quote_plus(text) 5 return encoded_text.replace('+', '%20').replace('*', '%2A').replace('%7E', '~') 6 @staticmethod 7 def _encode_dict(dic): 8 keys = dic.keys() 9 dic_sorted = [(key, dic[key]) for key in sorted(keys)] 10 encoded_text = parse.urlencode(dic_sorted) 11 return encoded_text.replace('+', '%20').replace('*', '%2A').replace('%7E', '~') 12 @staticmethod 13 def create_token(access_key_id, access_key_secret): 14 parameters = {'AccessKeyId': access_key_id, 15 'Action': 'CreateToken', 16 'Format': 'JSON', 17 'RegionId': 'cn-shanghai', 18 'SignatureMethod': 'HMAC-SHA1', 19 'SignatureNonce': str(uuid.uuid1()), 20 'SignatureVersion': '1.0', 21 'Timestamp': time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), 22 'Version': '2019-02-28'} 23 # 構造規范化的請求字符串 24 query_string = AccessToken._encode_dict(parameters) 25 # print('規范化的請求字符串: %s' % query_string) 26 # 構造待簽名字符串 27 string_to_sign = 'GET' + '&' + AccessToken._encode_text('/') + '&' + AccessToken._encode_text(query_string) 28 # print('待簽名的字符串: %s' % string_to_sign) 29 # 計算簽名 30 secreted_string = hmac.new(bytes(access_key_secret + '&', encoding='utf-8'), 31 bytes(string_to_sign, encoding='utf-8'), 32 hashlib.sha1).digest() 33 signature = base64.b64encode(secreted_string) 34 # print('簽名: %s' % signature) 35 # 進行URL編碼 36 signature = AccessToken._encode_text(signature) 37 # print('URL編碼后的簽名: %s' % signature) 38 # 調用服務 39 full_url = 'http://nls-meta.cn-shanghai.aliyuncs.com/?Signature=%s&%s' % (signature, query_string) 40 # print('url: %s' % full_url) 41 # 提交HTTP GET請求 42 response = requests.get(full_url) 43 if response.ok: 44 root_obj = response.json() 45 key = 'Token' 46 if key in root_obj: 47 token = root_obj[key]['Id'] 48 expire_time = root_obj[key]['ExpireTime'] 49 return token, expire_time 50 return None, None 51 52 def processPOSTRequest(appKey, token, text, audioSaveFile, format, sampleRate) : 53 host = 'nls-gateway.cn-shanghai.aliyuncs.com' 54 url = 'https://' + host + '/stream/v1/tts' 55 # 設置HTTPS Headers。 56 httpHeaders = { 57 'Content-Type': 'application/json' 58 } 59 # 設置HTTPS Body。 60 body = {'appkey': appKey, 'token': token, 'text': text, 'format': format, 'sample_rate': sampleRate} 61 body = json.dumps(body) 62 # print('The POST request body content: ' + body) 63 conn = http.client.HTTPSConnection(host) 64 conn.request(method='POST', url=url, body=body, headers=httpHeaders) 65 # 處理服務端返回的響應。 66 response = conn.getresponse() 67 68 69 contentType = response.getheader('Content-Type') 70 71 body = response.read() 72 if 'audio/mpeg' == contentType : 73 with open(audioSaveFile, mode='wb') as f: 74 f.write(body) 75 else : 76 print('The POST request failed: ' + str(body)) 77 conn.close() 78 79 def textConverToVidio(appKey,token,text,audioSaveFile): 80 # with open(file,"r",encoding="utf-8") as f: 81 # text = f.read() 82 textUrlencode = text 83 textUrlencode = urllib.parse.quote_plus(textUrlencode) 84 textUrlencode = textUrlencode.replace("+", "%20") 85 textUrlencode = textUrlencode.replace("*", "%2A") 86 textUrlencode = textUrlencode.replace("%7E", "~") 87 # print('text: ' + textUrlencode) 88 format = 'mp3' 89 sampleRate = 16000 90 processPOSTRequest(appKey, token, text, audioSaveFile, format, sampleRate)
最終就完成了一個簡單的機器人對話,由於時間倉促,代碼沒有優化,全部代碼如下:

import os import speech_recognition as sr from aip import AipSpeech import http.client import urllib.parse import json import base64 import hashlib import hmac import requests import time import uuid from urllib import parse from playsound import playsound # Use SpeechRecognition to record 使用語音識別包錄制音頻 def my_record(rate=16000): r = sr.Recognizer() with sr.Microphone(sample_rate=rate) as source: print("請講話(正在傾聽中......):") audio = r.listen(source) print("回答思考中.....") with open("temp.wav", "wb") as f: f.write(audio.get_wav_data()) APP_ID = "22817840" API_KEY ="ndQCCddCNisGHe87G5agXsGm" SECRET_KEY = "o0pLLR6DIPEnBXtOvpWXK3QxruFn1G3N" client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) path = 'temp.wav' # 將語音轉文本STT def listen(): # 讀取錄音文件 with open(path, 'rb') as fp: voices = fp.read() try: # 參數dev_pid:1536普通話(支持簡單的英文識別)、1537普通話(純中文識別)、1737英語、1637粵語、1837四川話、1936普通話遠場 result = client.asr(voices, 'wav', 16000, {'dev_pid': 1537, }) # print(result) result_text = result["result"][0] print("you said: " + result_text) return result_text except KeyError: print("KeyError") # 圖靈機器人的API_KEY、API_URL turing_api_key = "your turing_api_key" api_url = "http://openapi.tuling123.com/openapi/api/v2" # 圖靈機器人api網址 headers = {'Content-Type': 'application/json;charset=UTF-8'} # 圖靈機器人回復 def Turing(text_words=""): req = { "reqType": 0, "perception": { "inputText": { "text": text_words }, "selfInfo": { "location": { "city": "北京", "province": "北京", "street": "車公庄西大街" } } }, "userInfo": { "apiKey": "c81a2d3f03c6400f968787653fa42f68", # 你的圖靈機器人apiKey "userId": "Nieson" # 用戶唯一標識(隨便填, 非密鑰) } } req["perception"]["inputText"]["text"] = text_words response = requests.request("post", api_url, json=req, headers=headers) response_dict = json.loads(response.text) result = response_dict["results"][0]["values"]["text"] return result class AccessToken: @staticmethod def _encode_text(text): encoded_text = parse.quote_plus(text) return encoded_text.replace('+', '%20').replace('*', '%2A').replace('%7E', '~') @staticmethod def _encode_dict(dic): keys = dic.keys() dic_sorted = [(key, dic[key]) for key in sorted(keys)] encoded_text = parse.urlencode(dic_sorted) return encoded_text.replace('+', '%20').replace('*', '%2A').replace('%7E', '~') @staticmethod def create_token(access_key_id, access_key_secret): parameters = {'AccessKeyId': access_key_id, 'Action': 'CreateToken', 'Format': 'JSON', 'RegionId': 'cn-shanghai', 'SignatureMethod': 'HMAC-SHA1', 'SignatureNonce': str(uuid.uuid1()), 'SignatureVersion': '1.0', 'Timestamp': time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), 'Version': '2019-02-28'} # 構造規范化的請求字符串 query_string = AccessToken._encode_dict(parameters) # print('規范化的請求字符串: %s' % query_string) # 構造待簽名字符串 string_to_sign = 'GET' + '&' + AccessToken._encode_text('/') + '&' + AccessToken._encode_text(query_string) # print('待簽名的字符串: %s' % string_to_sign) # 計算簽名 secreted_string = hmac.new(bytes(access_key_secret + '&', encoding='utf-8'), bytes(string_to_sign, encoding='utf-8'), hashlib.sha1).digest() signature = base64.b64encode(secreted_string) # print('簽名: %s' % signature) # 進行URL編碼 signature = AccessToken._encode_text(signature) # print('URL編碼后的簽名: %s' % signature) # 調用服務 full_url = 'http://nls-meta.cn-shanghai.aliyuncs.com/?Signature=%s&%s' % (signature, query_string) # print('url: %s' % full_url) # 提交HTTP GET請求 response = requests.get(full_url) if response.ok: root_obj = response.json() key = 'Token' if key in root_obj: token = root_obj[key]['Id'] expire_time = root_obj[key]['ExpireTime'] return token, expire_time return None, None def processPOSTRequest(appKey, token, text, audioSaveFile, format, sampleRate) : host = 'nls-gateway.cn-shanghai.aliyuncs.com' url = 'https://' + host + '/stream/v1/tts' # 設置HTTPS Headers。 httpHeaders = { 'Content-Type': 'application/json' } # 設置HTTPS Body。 body = {'appkey': appKey, 'token': token, 'text': text, 'format': format, 'sample_rate': sampleRate} body = json.dumps(body) # print('The POST request body content: ' + body) conn = http.client.HTTPSConnection(host) conn.request(method='POST', url=url, body=body, headers=httpHeaders) # 處理服務端返回的響應。 response = conn.getresponse() contentType = response.getheader('Content-Type') body = response.read() if 'audio/mpeg' == contentType : with open(audioSaveFile, mode='wb') as f: f.write(body) else : print('The POST request failed: ' + str(body)) conn.close() def textConverToVidio(appKey,token,text,audioSaveFile): # with open(file,"r",encoding="utf-8") as f: # text = f.read() textUrlencode = text textUrlencode = urllib.parse.quote_plus(textUrlencode) textUrlencode = textUrlencode.replace("+", "%20") textUrlencode = textUrlencode.replace("*", "%2A") textUrlencode = textUrlencode.replace("%7E", "~") # print('text: ' + textUrlencode) format = 'mp3' sampleRate = 16000 processPOSTRequest(appKey, token, text, audioSaveFile, format, sampleRate) def getToken(): access_key_id = 'LTAI4G1gXMUogGkDacvKmJYu' access_key_secret = 'lw7dNXyVY55hzvYJjolayE8PccsNEZ' token, expire_time = AccessToken.create_token(access_key_id, access_key_secret) print('token: %s, expire time(s): %s' % (token, expire_time)) if expire_time: print('token有效期的北京時間:%s' % (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(expire_time)))) if __name__ == "__main__": print("開始聊天") while True: token = "13d737af7cd74961bfcacf97e79e821e" while True: my_record() you_say = listen() # you_say = input("you say:") robot_say = Turing(you_say) textConverToVidio("E6IPoVHkVnPT7OYH", token, robot_say, "robot.mp3") print("小玥: " + robot_say) playsound("robot.mp3") os.remove("robot.mp3")