想要爬取網站內容,一般先打開網站,獲取請求地址以及請求參數(data),具體代碼如下:
import urllib.request import urllib.parse import json content=input('Enter the word that needs translated:') url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'#_o要去掉,否則會出先error_code:50的報錯 data={} #以下為審查元素,可以在網站翻譯頁面按F12查看,i和doctype鍵不可少,其他都可以刪除,不影響爬取翻譯 data['i']=content data['from']='AUTO' data['to']='AUTO' data['smartresult']='dict' data['client']='fanyideskweb' data['salt']='15601659811655' data['sign']='78817b046452f9663a2b36604f220360' data['doctype']='json' data['version']='2.1' data['keyfrom']='fanyi.web' data['action']='FY_BY_REALTTIME' data=urllib.parse.urlencode(data).encode('utf-8') response=urllib.request.urlopen(url,data) html=response.read().decode('utf-8') target=json.loads(html) print('result:%s'%(target['translateResult'][0][0]['tgt']))