示例說明:用chromedp操作chrome,導航到baidu,然后輸入“美女”,然后再翻2頁,在此過程中保存cookie和所有img標簽內容,並保存第一頁的baidu logo為png
注釋已經比較詳細了,上代碼:
package main import ( "bufio" "context" "fmt" "io/ioutil" "log" "os" "github.com/chromedp/cdproto/page" "time" "github.com/chromedp/cdproto/network" "github.com/chromedp/cdproto/cdp" "github.com/chromedp/chromedp" ) var res string // 定義全局變量,用來保存爬蟲的數據,注釋掉了 var nodes []*cdp.Node // 定義全局變量,用來保存爬蟲的數據node var buf []byte //定義全局變量,用來保存Screenshot func main() { var err error // create context ctxt, cancel := context.WithCancel(context.Background()) defer cancel() // create chrome instance c, err := chromedp.New(ctxt, chromedp.WithLog(log.Printf)) if err != nil { log.Fatal(err) } // run task list wd, _ := os.Getwd() err = c.Run(ctxt, chromedp.Tasks{ page.SetDownloadBehavior(page.SetDownloadBehaviorBehaviorAllow).WithDownloadPath(wd), chromedp.Navigate(`https://www.baidu.com/`), // 訪問掉隊的BAT chromedp.WaitVisible(`#kw`, chromedp.ByQuery), // 等待id=kw渲染成功,成功則說明已經獲取到了正確的頁面 chromedp.SendKeys(`#kw`, `美女`, chromedp.ByID), //輸入關鍵詞 chromedp.Click("#su", chromedp.ByID), // 觸發點擊事件, chromedp.Sleep(1 * time.Second), //緩一緩 //chromedp.OuterHTML("html", &res, chromedp.ByQuery), //獲取html源碼 chromedp.Nodes("img", &nodes, chromedp.ByQueryAll), //獲取當前頁的img標簽 chromedp.Screenshot("#result_logo", &buf, chromedp.ByID), // 獲取cookie chromedp.ActionFunc(func(ctx context.Context, h cdp.Executor) error { cookies, err := network.GetAllCookies().Do(ctx, h) // 將cookie拼接成header請求中cookie字段的模式 var coo string for _, v := range cookies { coo = coo + v.Name + "=" + v.Value + ";" } WirteTXT(coo) //保存cookie到文件 WirteTXT(fmt.Sprintf("\r\n\r\n%s", nodes)) //保存img標簽 ioutil.WriteFile("contact-form.png", buf, 0644) if err != nil { return err } return nil }), chromedp.ActionFunc(func(ctx context.Context, h cdp.Executor) error { // 循環翻頁 for i := 1; i < 3; i++ { //執行 err = c.Run(ctxt, chromedp.Tasks{ chromedp.Click(`#page a:nth-last-child(1)`, chromedp.ByID), //翻頁 chromedp.Sleep(1 * time.Second), //緩一緩 chromedp.Nodes("img", &nodes, chromedp.ByQueryAll), //獲取標簽的html }) //執行爬蟲任務 WirteTXT(fmt.Sprintf("\r\n\r\n%s", nodes)) //保存img標簽 } return nil }), }) if err != nil { log.Fatal(err) } } func WirteTXT(txt string) { f, err := os.OpenFile("1.txt", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0777) if err != nil { fmt.Println("os Create error: ", err) return } defer f.Close() bw := bufio.NewWriter(f) bw.WriteString(txt + "\n") bw.Flush() }
參考:
https://godoc.org/github.com/chromedp/chromedp#Selector.Do
https://www.jianshu.com/p/d282b4a57596
https://juejin.im/entry/5aac8374518825556a722de3
https://blog.csdn.net/yang731227/article/details/89202458
https://www.cnblogs.com/midnight/p/10384627.html
https://www.cnblogs.com/midnight/p/10384699.html
https://crieit.net/posts/chromedp-Node-HTML
https://qiita.com/yoheimuta/items/bbbe84d2a7fe673720b3
https://segmentfault.com/a/1190000019705499?utm_source=tag-newest
https://stackoverflow.com/search?q=chromedp
https://cloud.tencent.com/developer/ask/173850
https://www.ribice.ba/golang-chrome-automation/
https://gitee.com/-/ide/project/kwff/chromedp/edit/master/-/errors.go
https://www.cnblogs.com/apocelipes/archive/2018/07/04/9264673.html
如果在windows安裝chromedp,還可參考我之前寫的
https://www.cnblogs.com/pu369/p/10315988.html
https://www.cnblogs.com/pu369/p/10345483.html
