利用NodeJS抓取某商品資訊
阿新 • • 發佈:2019-02-05
利用NodeJS作為後端伺服器抓取某商品資訊,並解析出來然後以react+webpack+antd為前端介面展示出來。
抓取的商品資訊展示在前端介面如下:
後臺程式碼如下:
// 引入依賴 var express = require('express'); var utility = require('utility'); var eventproxy = require('eventproxy'); var superagent = require('superagent'); var cheerio = require('cheerio'); var url = require('url'); // 建立 express 例項 var app = express(); var Url = 'https://www.example.cn/dp/sr=8-1&keywords=3%AB&th=1'; app.get('/queryAmazonData', function (req, res, next) { // res.setHeader('200', 'Content-Type', 'text/html;charset=utf-8'); // res.send('Hello World1'); // res.end('sww'); Url = req.query.url; console.log(Url); superagent.get(Url) .end(function (err, response) { if (err) { return console.error(err); } var topicUrls = []; var $ = cheerio.load(response.text); $('#twisterJsInitializer_feature_div>script').each(function () { var script = $(this).html(); // script就是js程式碼了 // console.log(script); var str = script.replace("P.register('twister-js-init-dpx-data', function()", ''); str = str.replace('var dataToReturn =', ''); var array = str.split(',\n'); // console.log(array); var ljlistItem = ''; for (var i in array) { var data = array[i]; if ( data.indexOf('dimensionToAsinMap') != -1){ // console.log(data); // 7.將商品id的str組合轉化為dict ljlistItem = data.trim().replace('"dimensionToAsinMap" : ', ''); } } // 將商品id的str組合轉化為dict var dict = JSON.parse(ljlistItem); // console.log(dict); // 8函式替換,將商品的id替換掉 var strlist = ''; var splitstr = ''; if(Url.indexOf('dp/') != -1){ strlist = Url.split('dp/'); //'dp/' splitstr = 'dp/'; } else if (Url.indexOf('d/') != -1){ strlist = Url.split('d/'); // 'dp/' splitstr = 'd/'; } // console.log(`strlist:${strlist}\n`); var ljlength = strlist[1].indexOf('/'); var ljstr = strlist[1]; // console.log(`ljlength:${ljlength}\n`); // console.log(`ljstr:${ljstr}\n`); strlist[1] = ljstr.slice(10,ljstr.length); // console.log(strlist[1]); // 9.遍歷字典,根據id來拼接商品詳情的url var urlArray = []; for(var key in dict) { // console.log(`value:${dict[key]}\n`); var tempurl = strlist[0] + splitstr + dict[key] + strlist[1] + '&th=1&psc=1'; console.log(`5.每個商品的跳轉url:${tempurl}\n`); urlArray.push(tempurl); // yield Request(tempurl, callback=self.parse_productdetail); } var ep = new eventproxy(); //接收到全部的商品資訊後,返回給前端的介面 ep.after('topic_html', urlArray.length, function (topics) { topics = topics.map(function (topicPair) { var topicUrl = topicPair[0]; var topicData = topicPair[1]; return ({ // topicUrl: topicUrl, topicData: topicData, }); }); console.log('final:'); console.log(topics); // res.setHeader('200', 'Content-Type', 'text/html;charset=utf-8'); // res.send('Hello World1'); res.send(topics);//返回商品資訊給前端 // document.write(localStorage.topics); }); urlArray.forEach(function (topicUrl) { superagent.get(topicUrl) .end(function (err, response1) { // console.log('fetch ' + topicUrl + ' successful'); var $ = cheerio.load(response1.text); //商品價格 price = $('#priceblock_ourprice').text().trim(); //商品名稱 name = $('#productTitle').text().trim(); //尺寸 size = $('#dropdown_selected_size_name>span>span').text().trim(); //顏色 color = $('#variation_color_name>div>span').text().trim(); // 打折資訊 discount = $('#applicable_promotion_list_sec>table>tr>td>span[3]>span>a[2]>span>span>span').text().trim(); //3.提取商品運費和稅費 tbody不用新增 fee = $('#ags_shipping_import_fee').text().trim(); //4.海外購標識 overseapurchas = $('#agsBadge').attr('src'); if (overseapurchas != ''){ overseapurchas = 'haitao' } //5.根據是否有海外購的標識來選擇prime的獲取路徑 prime = ''; if (overseapurchas.trim() == ''){ //沒有海外購 //國內-免運費 prime = $('#price-shipping-message>i>i>span').text().trim(); } else if (overseapurchas.trim() != '') { //有海外購標識 //海外購-免運費 prime = $('#price-shipping-message>div>i>i>span').text().trim(); } if (prime.length > 0){ prime = 'prime'; } // console.log(`price:${price}\n`); // console.log(`name:${name}\n`); // console.log(`size:${size}\n`); // console.log(`color:${color}\n`); // console.log(`discount:${discount}\n`); // console.log(`fee:${fee}\n`); // console.log(`overseapurchas:${overseapurchas}\n`); // console.log(`prime:${prime}\n`); // 輸出model資訊 const object = {}; object.price = price; object.name = name; object.size = size; object.color = color; object.discount = discount; object.overseapurchas = overseapurchas; object.fee = fee; object.prime = prime; // 獲取到全部的商品資訊後傳送出去(topic_html) ep.emit('topic_html', [topicUrl, object]); }); }); }); }); }); // app.use(function(req, res, next) { // res.status(404).send('Sorry cant find that!'); // }); app.get('/index.html', function (req, res) { res.sendFile( __dirname + "/" + "index.html" ); }); app.get('/process_get', function (req, res) { // 輸出 JSON 格式 var response = { "first_name":req.query.first_name, "last_name":req.query.last_name }; // document.write(localStorage.topics); console.log(response); // res.end(JSON.stringify(localStorage.topics)); }); // app.all('*', function(req, res, next) { // res.header("Access-Control-Allow-Origin", "*"); // res.header("Access-Control-Allow-Headers", "X-Requested-With"); // res.header("Access-Control-Allow-Methods","PUT,POST,GET,DELETE,OPTIONS"); // res.header("X-Powered-By",' 3.2.1') // res.header("Content-Type", "application/json;charset=utf-8"); // next(); // }); app.listen(3000, function (req, res) { console.log('app is running at port 3000'); });
前端部分程式碼:
query(params) { // 請求nodejs後端介面拿到抓取到的商品資訊 // const current = (params && params.current) || 1;// 頁數,預設1,可傳入指定(點選分頁時會傳入) // this.state.pagination.current = current;// 點選查詢後 返回第一頁 // const pageSize = (params && params.pageSize) || this.state.pagination.pageSize;// 條數,一頁 const a = { ...params }; // a.pageSize = pageSize; // a.pageNo = current; this.setState({ ruleLoading: true }); queryData(a).then((data) => { if (data) { let array = []; for (let i = 0; i < data.length; i++) { const temp = data[i].topicData; array.push(temp); } this.setState({ list: array, ruleLoading: false }); } this.setState({ ruleLoading: false }); }); // console.log(data); }
render() {//UI展示 const { modalVisible, ruleLoading, description } = this.state; const pagination = { total: parseInt(this.state.pagination.total, 100), showTotal: total => `共 ${total} 條`, current: this.state.pagination.current, // showSizeChanger: true, showQuickJumper: true, onShowSizeChange: (current, pageSize) => { this.state.pagination.pageSize = pageSize; }, onChange: (current) => { const form = this.props.form; const a = form.getFieldValue('businessTypeName'); const b = form.getFieldValue('status'); const params = {}; params.businessTypeName = a; params.status = b; params.current = current; this.query(params); }, }; const columns = [ { title: '商品名稱', dataIndex: 'name', width: '25%', }, { title: '價格', dataIndex: 'price', }, { title: '運費+稅費', dataIndex: 'fee', width: '20%', }, { title: '折扣', dataIndex: 'discount', }, { title: '海外', dataIndex: 'overseapurchas', }, { title: 'prime', dataIndex: 'prime', }, { title: '尺寸', dataIndex: 'size:', }, { title: '顏色', dataIndex: 'color', }, ]; return ( <PageHeaderLayout title="資料查詢" > <Card> <div className={styles.tableList}> <div className={styles.tableListForm}> {this.renderForm()} {/* {this.renderAdvancedForm()} */} </div> <Table loading={ruleLoading} dataSource={this.state.list} columns={columns} pagination={pagination} onChange={this.handleTableChange} /> </div> </Card> </PageHeaderLayout> ); } }