1 //引入模塊 2 const http = require('http') 3 const fs = require('fs') 4 const cheerio = require('cheerio') 5 const iconv = require('iconv-lite') 6 //第一章url 7 const url = 'http://www.81zw.com/book/8634/745331.html' 8 //開始章節數 9 let i = 1 10 //最大獲取章節數 11 let num = 100 12 13 function main(url) { 14 startRequest(url) 15 } 16 17 function startRequest(url) { 18 http.get(url, res => { 19 //定義空數組存放html 20 const html = [] 21 res.on('data', (chunk) => { 22 //把數據塊添加進數組 23 html.push(chunk) 24 }) 25 res.on('end', () => { 26 //獲取數據完畢后,使用iconv-lite轉碼,decedo中為Buffer對象,Buffer.concat為數組 27 const html1 = iconv.decode(Buffer.concat(html), 'gbk') 28 //使用cheerio解析html,cheerio模塊的語法跟jQuery基本一樣 29 const $ = cheerio.load(html1, {decodeEntities: false}) 30 //處理數據 31 const title = $('.bookname h1').text() 32 const arr = [] 33 const content = $("#content").html() 34 //分析結構后分割html 35 const contentArr = content.split('<br><br>') 36 contentArr.forEach(elem => { 37 //去除內容的兩端空格和 38 const data = trim(elem.toString()) 39 arr.push(data) 40 }) 41 const bookName = $(".con_top a").eq(2).text() 42 //定義存入數據庫的對象 43 const obj = { 44 id: i, 45 err: 0, 46 bookName: bookName, 47 title: title, 48 content: arr 49 } 50 51 let url2 = url.split('/')[url.split('/').length - 2] 52 const link = $(".bottem2 a").eq(2).attr('href') 53 //獲取當前章節的下一章地址,遞歸調用fetchPage 54 const nextLink = `http://www.81zw.com/book/${url2}/${link}` 55 saveContent(obj, nextLink) 56 console.log(`第${i + 1}章:${nextLink}`) 57 i++ 58 if (i <= num) { 59 setTimeout(() => { 60 main(nextLink) 61 }, 100) 62 } 63 }) 64 }) 65 } 66 67 function saveContent(obj, nextLink) { 68 console.log(`${i}--${obj.title}`) 69 //判斷書名文件夾是否存在,不存在則創建 70 if (!fs.existsSync(`data/${obj.bookName}`)) { 71 fs.mkdirSync(`data/${obj.bookName}`) 72 } 73 //寫入json文件 74 fs.writeFile(`./data/${obj.bookName}/chapter${i}.json`, JSON.stringify(obj), 'utf-8', err => { 75 if (err) throw err 76 }) 77 } 78 79 function trim(str) { 80 return str.replace(/(^\s*)|(\s*$)/g, '').replace(/ /g, '') 81 } 82 83 main(url)
生成文件