cheerio 解析新浪部落格目錄列表
阿新 • • 發佈:2022-02-15
正序目錄
- 解析提取手機版新浪部落格網頁的目錄,然後存成markdown檔案
const fs = require('fs'); const cheerio = require('cheerio'); let path = `./html`; fs.readdir(path, function (err, files) { files.forEach((file) => { console.log(file); if (file.split('.')[1] === 'txt') { fs.readFile(path + '/' + file, 'utf-8', (err, data) => { const $ = cheerio.load(data); // const writeStream = fs.createWriteStream(path + '/' + file, 'utf-8'); const writeStream = fs.createWriteStream( path + '/目錄/' + '新浪部落格目錄.txt', 'utf-8' ); $('#pl-home-bloglist > article > ul>li').each((data, ele) => { let title = $(ele).find('h2').html(); let url = $(ele).find('a').attr('data-link'); writeStream.write('['); writeStream.write(title); writeStream.write(']'); writeStream.write('('); writeStream.write(url); writeStream.write(')'); writeStream.write('\n'); writeStream.write('\n'); console.log(title); console.log(url); }); writeStream.end(); }); } }); });
倒序目錄
- 和上面的效果一樣,只不過是倒序的
const fs = require('fs'); const cheerio = require('cheerio'); let path = `./html`; fs.readdir(path, function (err, files) { files.forEach((file) => { console.log(file); if (file.split('.')[1] === 'txt') { fs.readFile(path + '/' + file, 'utf-8', (err, data) => { const $ = cheerio.load(data); // const writeStream = fs.createWriteStream(path + '/' + file, 'utf-8'); const writeStream = fs.createWriteStream( path + '/目錄/' + '新浪部落格目錄.txt', 'utf-8' ); $($('#pl-home-bloglist > article > ul>li').get().reverse()).each( (data, ele) => { let title = $(ele).find('h2').html(); let url = $(ele).find('a').attr('data-link'); writeStream.write('['); writeStream.write(title); writeStream.write(']'); writeStream.write('('); writeStream.write(url); writeStream.write(')'); writeStream.write('\n'); writeStream.write('\n'); console.log(title); console.log(url); } ); writeStream.end(); }); } }); });