因為爬取起點目錄頁找不到各個章節的url,因此只能一章一章的往下爬
分析下起點網頁html
首先導入相關jar包 (我用的是gradle)
上代碼
url="https://read.qidian.com/chapter/6xbxCkvMZqw1/OCcwrQf_B4Qex0RJOkJclQ2"; //偽裝瀏覽器 Document document = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36").timeout(500000).get(); //獲得書名 String bookName = document.getElementsByClass("book-cover-wrap").select("h1").text(); int i = 1; //一直循環,直到找不到下一章url while (true) { //獲取章節名 String name = document.select("h3[class = j_chapterName]").text(); //獲取下一章的element Element element = document.getElementById("j_chapterNext"); String nexturl; //獲取內容 String fiction = document.select("div[class = read-content j_readContent]").select("p").toString(); //正則替換<p></p> String s = fiction.replaceAll("<\\/?p>", ""); if (element == null) { break; } nexturl = "https:" + element.attr("href"); //is表示免費章節數(vip章節需要登錄付費)loginUrl 表示vip章節的url if(i==is){ i++; nexturl = loginUrl; } if (i >= is) { try { //獲取vip章節,並帶頭文件,可能報400,500 document = Jsoup.connect(nexturl).header("", "").header("","").header("", "").header("", "").timeout(60000).get(); } catch (Exception e) { System.out.println("ss"+e.getMessage()); document = Jsoup.connect(nexturl).header("", "").header("", "").header("", "").header("", "").userAgent("").timeout(60000).get(); } } else { i++; try { document = Jsoup.connect(nexturl).timeout(60000).get(); } catch (Exception e) { System.out.println("----------------------"); } } }