1. 程式人生 > 其它 >Koa2 + Puppeteer打造『爬蟲系統』7

Koa2 + Puppeteer打造『爬蟲系統』7

技術標籤:實戰專案node.js爬蟲javascript

十九.建立課程資料模型以及資料入表操作

1.建立course.js的模型

const seq = require('../connection/mysql_connect'),
   { STRING,INT } = require('../../config/db_type_config');
const Course = seq.define('course',{
    cid:{
        comment: 'Course ID',
        type:INT,
        allowNull:false,
        unique:true
    },
    href:{
        comment: 'Course detail page link',
        type:STRING,
        allowNull:false
    },
    posterUrl:{
        comment:'Course img url',
        type:STRING,
        allowNull:false
    },
    courseName:{
        comment:'Course name',
        type:STRING,
        allowNull:false
    },
    price:{
        comment:'Course price',
        type:STRING,
        allowNull:false
    },
    description:{
        comment:'Course description',
        type:STRING,
        allowNull:false
    },
    studentCount:{
        comment:'Course count',
        type:INT,
        allowNull:false
    },
    field:{
        comment:'the course the tab',
        type:INT,
        allowNull:false
    },
    posterKey:{
        comment:'qiniu course image name',
        type:STRING,
        allowNull:false
    },
    status:{
        comment:'course status',
        type:INT,
        defaultValue:1,
        allowNull:false
    }

    
})   
module.exports = Course;

2.入口匯入

const Slider = require('./slider'),
      RecomCourse = require('./recomCourse'),
      AgencyInfo = require('./agencyinfo'),
      Collection = require('./collection'),
      Teacher =  require('./teacher'),
      Student = require('./student'),
      CourseTab = require('./courseTab')
      Course = require('./course')

module.exports={
    Slider,AgencyInfo,RecomCourse,Collection,Teacher,Student,CourseTab,Course
}

node do/sync.js同步表

3.在service中寫入course.js

const CourseModel = require('../do/models/course');
class CourseService{
    async addCourse(data){
        const cid = data.cid;
        const result = await CourseModel.findOne({
            where:{cid}
        })
        if(result){
            return await CourseModel.update(data,{
                where:{cid}
            })
        }else{
            return await CourseModel.create(data)
        }
    }
}
module.exports = new CourseService();

4.完善控制器

 {addCourse} = require('../service/course')
const result = await addCourse(item);
                       if(result){
                           console.log('Data create Ok')
                       }else{
                           console.log('Data create failed')
                       }

二十.爬取關於我們資料、建立表模型、資料入表

1.配置config.js

module.exports={
    qiniu:{
        keys:{
            ak:'qMK7okFyL1xX2o8gjFzy1PrI_jXR_yf58naIcIs8',
            sk:'j6Ngxxy6xSzXi9czKXun0Uju_5hDahAla9THml9C'
        },
        bucket:{
            tximg:{
                bucket_name:'crawler-txclass',
                domain:'http://qexgb3yqx.hn-bkt.clouddn.com'
            }
        }
    },
    crawler:{
        url:{
            main:'https://msiwei.ke.qq.com/#tab=0&category=-1',
            course:'https://msiwei.ke.qq.com/#tab=1&category=-1',
            teacher:'https://msiwei.ke.qq.com/#tab=2&category=-1',
            aboutus:'https://msiwei.ke.qq.com/#category=-1&tab=3'
        }
    }
}

2.建立aboutus.js爬蟲檔案

const Crawler = require('../lib/crawler'),
      {crawler} = require('../config/config');
Crawler({
    url:crawler.url.aboutus,
    callback(){
        const $ = window.$,
              $wrapper = $('.agency-about');
        return {
            aid:1,
            posterUrl:$wrapper.find('.about-banner-pic0').css('background-image').match(/\"(.+?)\"/)[1],
            title:$wrapper.find('.about-agency-propagate').text(),
            name:$wrapper.find('.about-agency-name').text(),
            intro:$wrapper.find('.about-agency-intr').text(),
            posterKey:''
        }      
    }
})     

3.編輯路由

router.get('/crawl_aboutus',crawlerController.crawlAboutus )

4.編寫控制器

crawlAboutus(){
        startProcess({
            path:'../crawler/aboutus',
            async message(data){
                if(data.posterUrl && !data.posterKey){
                    try {
                        const posterData = await qiniuUpload({
                            url:data.posterUrl,
                            bucket:qiniu.bucket.tximg.bucket_name,
                            ext:'.jpg'
                        })
                        if(posterData.key){
                            data.posterKey = posterData.key
                        }
                    } catch (error) {
                        console.log(error)
                    }
                }
            },
            async exit(data){
                console.log(data);
            },
            async error(data){
                console.log(data);
            }
        })
    }

5.建立表模型

const seq = require('../connection/mysql_connect'),
   { STRING,INT ,TEXT} = require('../../config/db_type_config');
const Aboutus = seq.define('aboutus',{
    aid:{
        comment:'aboutus Id',
        type:INT,
        allowNull:false,
        unique:true
    },
    posterUrl:{
        comment:'poster',
        type:STRING,
        allowNull:false
    },
    title:{
        comment:'title',
        type:STRING,
        allowNull:false
    },
    name:{
        comment:'NAME',
        type:STRING,
        allowNull:false
    },
    intro:{
        comment:'introduction',
        type:TEXT,
        allowNull:false
    },
   
    posterKey:{
        comment:'qiniu poster image name',
        type:STRING,
        allowNull:false
    }
})   
module.exports = Aboutus;

6.入口檔案匯入

const Slider = require('./slider'),
      RecomCourse = require('./recomCourse'),
      AgencyInfo = require('./agencyinfo'),
      Collection = require('./collection'),
      Teacher =  require('./teacher'),
      Student = require('./student'),
      CourseTab = require('./courseTab')
      Course = require('./course'),
      Aboutus =  require('./aboutus'),

module.exports={
    Slider,AgencyInfo,RecomCourse,Collection,Teacher,Student,CourseTab,Course,Aboutus
}

7.建立Aboutus.js在service中

const AboutusModel = require('../do/models/aboutus');
class AboutusService {
    async addAboutus(data){
        const id = data.aid;
        const result = await AboutusModel.findOne({
            where:{id}
        });
        if(result){
            return await AboutusModel.update(data,{
                where:{id}
            })
        }else{
            return await AboutusModel.create(data);
        }
    }
}
module.exports=new AboutusService();

8.控制器完善

 const result = await addAboutus(item);
                        if(result){
                            console.log('Data create Ok')
                        }else{
                            console.log('Data create failed')
                        }

訪問路徑寫入

二十一.統一同步表模型以及資料入表操作

node do/sync.js

依次訪問路由