油猴腳本爬蟲


腳本內容

// ==UserScript==
// @name         大眾點評評論爬蟲
// @namespace    http://tampermonkey.net/
// @version      0.1
// @description  crawl is greate
// @author       陳祥安
// @include      http://www.dianping.com/shop*
// @match        http://www.dianping.com/ajax/json/shopDynamic/allReview*
// @require      http://cdn.bootcss.com/jquery/1.11.2/jquery.js
// @grant        GM_xmlhttpRequest


// ==/UserScript==

(function() {
    var $x = function (xpath, context) {
        var nodes = [];
        try {
            var doc = (context && context.ownerDocument) || window.document;
            var results = doc.evaluate(xpath, context || doc, null, XPathResult.ANY_TYPE, null);
            var node;
            while (node = results.iterateNext()) {
                nodes.push(node);
            }
        } catch (e) {
             throw e;
        }
        return nodes;
    }

    var server_url = 'http://127.0.0.1:9090/comment/'

    window.addEventListener('load', (event) => {
       //關閉彈窗
       let close_btn = $(".J-bonus-close")
       console.log("准備關閉",close_btn)
       if(close_btn){
                  close_btn.click();
       }
       let li_item_list=$x("//ul[@class='comment-list J-list']/li[@class='comment-item']/div[@class='content']//p[@class='desc']");
       var dataList = [];
       li_item_list.forEach(v=>{
             console.log(v);
             dataList.push({"data":v.innerText})
       });


        GM_xmlhttpRequest({
            method: "POST",
            url: server_url,
            data : JSON.stringify({'name':"爬蟲","dataList":dataList}),
            onload: function(response) {
                //這里寫處理函數
                //document.getElementById('text').innerHTML = this.responseText;
                console.log(response);
                console.log("dataList",dataList);
                //window.close();
            }
        });
    });
})();

python代碼

# @Author : cxa
# @File : server.py
# @Software: PyCharm
import json

from flask import Flask, request, render_template

app = Flask(__name__)


@app.route('/')
def index():
    return "<h1>大眾點評API</h1>"


@app.route('/comment/', methods=['GET', 'POST'])
def login():
    if request.method == 'POST':
        form_data = request.get_data()
        result = json.loads(form_data.decode("utf-8"))
    print(result)
    return result


@app.errorhandler(404)
def miss(e):
    return render_template('404.html'), 404


@app.errorhandler(500)
def error(e):
    return render_template('500.html'), 500


if __name__ == '__main__':
    app.run(host='0.0.0.0', port=9090, debug=True)


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM