之前寫的關於chromedp的文章被別人轉到CSDN,很受鼓勵,再來一篇golang爬蟲實例


示例說明:用chromedp操作chrome,導航到baidu,然后輸入“美女”,然后再翻2頁,在此過程中保存cookie和所有img標簽內容,並保存第一頁的baidu logo為png

注釋已經比較詳細了,上代碼:

package main

import (
    "bufio"
    "context"
    "fmt"

    "io/ioutil"
    "log"
    "os"

    "github.com/chromedp/cdproto/page"

    "time"

    "github.com/chromedp/cdproto/network"

    "github.com/chromedp/cdproto/cdp"

    "github.com/chromedp/chromedp"
)

var res string        // 定義全局變量,用來保存爬蟲的數據,注釋掉了
var nodes []*cdp.Node // 定義全局變量,用來保存爬蟲的數據node
var buf []byte        //定義全局變量,用來保存Screenshot

func main() {
    var err error
    // create context
    ctxt, cancel := context.WithCancel(context.Background())
    defer cancel()

    // create chrome instance
    c, err := chromedp.New(ctxt, chromedp.WithLog(log.Printf))
    if err != nil {
        log.Fatal(err)
    }

    // run task list
    wd, _ := os.Getwd()
    err = c.Run(ctxt, chromedp.Tasks{
        page.SetDownloadBehavior(page.SetDownloadBehaviorBehaviorAllow).WithDownloadPath(wd),
        chromedp.Navigate(`https://www.baidu.com/`),   // 訪問掉隊的BAT
        chromedp.WaitVisible(`#kw`, chromedp.ByQuery), // 等待id=kw渲染成功,成功則說明已經獲取到了正確的頁面
        chromedp.SendKeys(`#kw`, `美女`, chromedp.ByID), //輸入關鍵詞 
        chromedp.Click("#su", chromedp.ByID),          // 觸發點擊事件,
        chromedp.Sleep(1 * time.Second),               //緩一緩
        //chromedp.OuterHTML("html", &res, chromedp.ByQuery), //獲取html源碼
        chromedp.Nodes("img", &nodes, chromedp.ByQueryAll), //獲取當前頁的img標簽
        chromedp.Screenshot("#result_logo", &buf, chromedp.ByID),
        // 獲取cookie
        chromedp.ActionFunc(func(ctx context.Context, h cdp.Executor) error {
            cookies, err := network.GetAllCookies().Do(ctx, h)
            // 將cookie拼接成header請求中cookie字段的模式
            var coo string
            for _, v := range cookies {
                coo = coo + v.Name + "=" + v.Value + ";"
            }
            WirteTXT(coo)                              //保存cookie到文件
            WirteTXT(fmt.Sprintf("\r\n\r\n%s", nodes)) //保存img標簽
            ioutil.WriteFile("contact-form.png", buf, 0644)
            if err != nil {
                return err
            }
            return nil
        }),
        chromedp.ActionFunc(func(ctx context.Context, h cdp.Executor) error {
            // 循環翻頁
            for i := 1; i < 3; i++ {
                //執行
                err = c.Run(ctxt, chromedp.Tasks{
                    chromedp.Click(`#page a:nth-last-child(1)`, chromedp.ByID), //翻頁
                    chromedp.Sleep(1 * time.Second),                            //緩一緩
                    chromedp.Nodes("img", &nodes, chromedp.ByQueryAll),         //獲取標簽的html

                }) //執行爬蟲任務
                WirteTXT(fmt.Sprintf("\r\n\r\n%s", nodes)) //保存img標簽
            }
            return nil
        }),
    })
    if err != nil {
        log.Fatal(err)
    }

}

func WirteTXT(txt string) {
    f, err := os.OpenFile("1.txt", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0777)
    if err != nil {
        fmt.Println("os Create error: ", err)
        return
    }
    defer f.Close()

    bw := bufio.NewWriter(f)
    bw.WriteString(txt + "\n")
    bw.Flush()
}

 

參考:

https://godoc.org/github.com/chromedp/chromedp#Selector.Do

https://www.jianshu.com/p/d282b4a57596

https://juejin.im/entry/5aac8374518825556a722de3

https://blog.csdn.net/yang731227/article/details/89202458

https://www.cnblogs.com/midnight/p/10384627.html

https://www.cnblogs.com/midnight/p/10384699.html

https://crieit.net/posts/chromedp-Node-HTML

https://qiita.com/yoheimuta/items/bbbe84d2a7fe673720b3

https://segmentfault.com/a/1190000019705499?utm_source=tag-newest

https://stackoverflow.com/search?q=chromedp

https://cloud.tencent.com/developer/ask/173850

https://www.ribice.ba/golang-chrome-automation/

https://gitee.com/-/ide/project/kwff/chromedp/edit/master/-/errors.go

https://www.cnblogs.com/apocelipes/archive/2018/07/04/9264673.html

如果在windows安裝chromedp,還可參考我之前寫的

https://www.cnblogs.com/pu369/p/10315988.html

https://www.cnblogs.com/pu369/p/10345483.html

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM