'use strict'; let request = require('request') let cheerio = require('cheerio'); //爬蟲 let iconv = require('iconv-lite'); //處理gbk編碼的網頁 let Entities = require('html-entities').XmlEntities; let entities = new Entities(); const fs = require('fs') const path = require('path') const host = 'http://www.quanshuwang.com/shuku/' const db = uniCloud.database() const collection = db.collection('repiles-book') const dbCom = db.command exports.main = async (event, context) => { // 開始抓取首頁鏈接 let indexArr = [] //發送請求獲取頁面內容 var body = await requestFn() var $ = cheerio.load(body); //兼容網頁編碼格式 if ($('meta[charset]').attr('charset') == 'utf-8') { //如果網頁是utf-8的編碼 } else { //如果網頁是gbk的編碼 body = iconv.decode(body, 'gbk'); //轉換gbk編碼的頁面內容 $ = cheerio.load(body); } //處理網頁數據 獲取排行列表 let list = $('.yd-book-content .tab-item').find('.yd-book-item') for (var i = 0; i < list.length; i++) { let href = list.eq(i).find('a').attr("href") //獲取書的id let index = href.indexOf('_') + 1 let index2 = href.lastIndexOf('.') let bookId = href.slice(index, index2) //書ID //獲取書的封面 let bookImageSrc = list.eq(i).find('img').attr("src") //獲取書的標題 注意使用html-entities解碼 let bookName = entities.decode(list.eq(i).find('h2').html()) //獲取書的作者 注意使用html-entities解碼 let bookAuthor = entities.decode(list.eq(i).find('.dl-horizontal-inline p').html()) console.log('書的封面:' + bookImageSrc); console.log('書的標題:' + bookName); console.log('書的作者:' + bookAuthor); console.log('書的id:' + bookId); let obj={ bookImageSrc, bookName, bookAuthor, bookId, } saveContent(obj) } console.log('新增文章數量:', indexArr.length); // 循環抓取每個新文章詳情頁 // if (indexArr.length > 0) { // for (let i = 0; i < indexArr.length; i++) { // let href = list.eq(indexArr[i]).attr("href") // let imgSrc = list.eq(indexArr[i]).find('img').attr('src') // let title = list.eq(indexArr[i]).find('.title').text() // await saveArticle(href, title, imgSrc) // } // } //返回數據給客戶端 return event }; function saveContent(obj) { //獲取要寫入文件的文件夾路徑(以書名當文件夾) let pathBook=path.join(__dirname, `../../bookData/${obj.bookName}`) //判斷書名文件夾是否存在,不存在則創建 if (!fs.existsSync(pathBook)) { fs.mkdirSync(pathBook) } //寫入json文件 fs.writeFile(`${pathBook}/chapter1.json`, JSON.stringify(obj), 'utf-8', err => { if (err) throw err }) } function requestFn() { return new Promise((resolve, reject) => { request({ url: host, encoding: null //設置抓取頁面時不要對數據做任何轉換 }, function(err, res, body) { if (err) { reject(err) } else { resolve(body) } }); }) }