python使用chrome抓取頁面中ajax請求返回的數據


#-*-coding:utf-8-*-

from time import sleep
from selenium import webdriver
import json
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

# 過濾出數據請求中的headers
def getHttpInfo(browser):
    for responseReceived in browser.get_log('performance'):
        try:
            response = json.loads(responseReceived[u'message'])[u'message'][u'params'][u'response']
            if 'ajaxUrl' in response['url']:
                # print(response)
                # print(response['url'])
                # print(response['headers'])
                # print(response['headersText'])
                return response['requestHeaders']
        except:
            pass
    return None

# 請求頁面 並設置headers到文件中
def setHeaders():
    d = DesiredCapabilities.CHROME
    d['loggingPrefs'] = { 'performance':'ALL' }
    options=webdriver.ChromeOptions()
    options.set_headless()
    options.add_argument('--disable-gpu')

    driver=webdriver.Chrome(desired_capabilities=d,options=options)
    driver.get('http://www.baidu.com')
    sleep(20)
    headers = getHttpInfo(driver)
    driver.quit()
    # write header
    hand = open('header.txt', 'w')
    hand.write(json.dumps(headers))
    hand.close()

if __name__ == '__main__':
    setHeaders()


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM