爬取有道翻譯


'''
j---Request URL:http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule
jo--Request URL:http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule
job-Request URL:http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule

三次的請求接口是一樣的,由此推斷
請求接口:http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule

通過form表單
i:j   需要翻譯的內容
from:AUTO  源語言
to:AUTO   目的語言  就是把哪一門語言翻譯為哪一門語言
smartresult:dict
client:fanyideskweb
salt:1520339426459
sign:6edf4011d6b587550ef418fc9ba09b5e
doctype:json
version:2.1
keyfrom:fanyi.web
action:FY_BY_REALTIME
typoResult:false

i:jo
from:AUTO
to:AUTO
smartresult:dict
client:fanyideskweb
salt:1520339739042
sign:90d73568704068c16c27f32f4f99a8a5
doctype:json
version:2.1
keyfrom:fanyi.web
action:FY_BY_REALTIME
typoResult:false

要想在靜態頁面生成東西,可用js來進行
JS:將靜態網頁變成動態加載的過程
'''
from urllib import request,parse
import time,random
import hashlib #haslib md5加密的一個包
import json
def getMD5(value):
    #創建MD5對象
    md5 = hashlib.md5()
    #對指定的字符串進行加密
    md5.update(bytes(value,encoding='utf-8'))
    #拿出加密后的內容,並賦值給sign
    sign = md5.hexdigest()
    return sign

def fanyi(key):
    base_url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'

    #生成salt
    #i = "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10)),
    i = int(time.time()*1000) + random.randint(0,10)

    #生成sign
    #o = n.md5("fanyideskweb" + t + i + "ebSeFb%=XZ%T[KZ)c(sy!");
    o_str = "fanyideskweb" + key + str(i) + "ebSeFb%=XZ%T[KZ)c(sy!"

    data = {
        "i": key,
        "from": "AUTO",
        "to": "AUTO",
        "smartresult": "dict",
        "client": "fanyideskweb",
        "salt": i,
        "sign": getMD5(o_str),
        "doctype": "json",
        "version": "2.1",
        "keyfrom": "fanyi.web",
        "action": "FY_BY_REALTIME",
        "typoResult": "false"
    }
    #轉碼,拼接
    data = parse.urlencode(data)
    headers = {
        "Accept":"application/json, text/javascript, */*; q=0.01",
        #Accept-Encoding:gzip, deflate
        "Accept-Language":"zh-CN,zh;q=0.9",
        "Connection":"keep-alive",
        "Content-Length":len(data),
        "Content-Type":"application/x-www-form-urlencoded; charset=UTF-8",
        "Cookie":"OUTFOX_SEARCH_USER_ID_NCOO=1135910979.4269547; OUTFOX_SEARCH_USER_ID=-1364254390@10.168.1.8; fanyi-ad-id=40789; fanyi-ad-closed=1; JSESSIONID=aaafp6BJjIzwC4k7Mb5hw; ___rl__test__cookies=1520316265914",
        "Host":"fanyi.youdao.com",
        "Origin":"http://fanyi.youdao.com",
        "Referer":"http://fanyi.youdao.com/",
        "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
        "X-Requested-With":"XMLHttpRequest"
    }

    req = request.Request(base_url,bytes(data,encoding='utf-8'),headers=headers)
    response = request.urlopen(req)
    content = response.read()
    content = content.decode('utf-8')
    print(content)
    #數據處理
    json_data = json.loads(content)
    sr_dic = json_data['smartResult']
    item_list = sr_dic['entries']
    for i in item_list:
        if i == '':
            pass
        else:
            print(i.strip())

if __name__ == '__main__':
    while True:
        text = input('請輸入翻譯內容:')
        fanyi(text)
        if text == 'q':
            break

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM