1. 程式人生 > 實用技巧 >從零開始--03-資料儲存

從零開始--03-資料儲存

資料儲存

前面已經實現了資料的下載和解析,接下來,我們需要把資料儲存到資料庫中,將來寫介面的時候,可以直接讀取我們自己的資料庫獲取資料,順便練習一下python的資料庫操作。

準備工作

  1. 資料庫,這裡我們選擇使用mysql
  2. 資料庫管理工具,我們選擇免費的DBeaver
  3. PyMySQL

陣列處理

我們精簡一下資料,修改後的實體類如下

class VideoBean:
    """
    視訊資料
    """
    def __init__(self, data):
        self.id = data["id"]
        self.title = data["title"]
        self.poster = data["poster"]
        self.source_name = data["source_name"]
        self.play_url = data["play_url"]
        self.playcnt = data["playcnt"]
        self.mthid = data["mthid"]
        self.mthpic = data["mthpic"]
        self.threadId = data["threadId"]
        self.duration = data["duration"]
        self.comment_id = data["comment_id"]
        self.publish_time = data["publish_time"]
        self.new_cate_v2 = data["new_cate_v2"]
        self.like = data["like"]
        self.fmlike = data["fmlike"]
        self.comment = data["comment"]
        self.fmcomment = data["fmcomment"]
        self.fmplaycnt = data["fmplaycnt"]
        self.fmplaycnt_2 = data["fmplaycnt_2"]
        self.outstand_tag = data["outstand_tag"]

    def __repr__(self):
        return "<Video>[%s]" % self.id

資料獲取

資料獲取部分,我們依舊使用之前資料下載章節的程式碼即可

儲存

需要先建立一個數據庫

CREATE DATABASE `video` DEFAULT CHARSET utf8mb4 COLLATE utf8mb4_general_ci;

獲取資料庫連線

db = pymysql.connect(host='192.168.3.30', port=3306, user='root', password='123456', database="video", charset="utf8")
cursor = db.cursor()

建立表

video_sql = """
CREATE TABLE IF NOT EXISTS video(_id INTEGER PRIMARY KEY AUTO_INCREMENT,id VARCHAR(30) NOT NULL,title VARCHAR(300) NOT 
NULL,poster VARCHAR(300) NOT NULL,source_name VARCHAR(300) NOT NULL,play_url VARCHAR(300) NOT NULL,playcnt VARCHAR(40) NOT 
NULL,mthid VARCHAR(30) NOT NULL,mthpic VARCHAR(300) NOT NULL,threadId VARCHAR(30) NOT NULL,duration VARCHAR(30) NOT NULL,
comment_id VARCHAR(30) NOT NULL,publish_time VARCHAR(30) NOT NULL,new_cate_v2 VARCHAR(30) NOT NULL,`like` VARCHAR (40) 
NOT NULL,fmlike VARCHAR(30) NOT NULL,comment VARCHAR(30) NOT NULL,fmcomment VARCHAR(30) NOT NULL,fmplaycnt VARCHAR(30) 
NOT NULL,fmplaycnt_2 VARCHAR(30) NOT NULL,outstand_tag VARCHAR(30) NOT NULL)"""
cursor.execute(video_sql)

抓取資料並存儲

我們希望抓取更多分類的資料,所以會準備一個視訊分類的list,之後遍歷去查詢獲取。

video_categories = ["recommend", "yingshi", "yinyue", "vlog", "youxi", "gaoxiao", "zongyi", "yule", "dongman",
                    "shenghuo", "guangchangwu", "meishi", "chongwu", "sannong", "junshi", "shehui", "tiyu", "keji",
                    "shishang", "qiche", "qinzi", "jiaoyu", "wenhua", "lvyou", "miaodong"]
for i in range(10):
    for item in video_categories:
        videos = GetVideoData.get_video_data(item)
        save(videos, cursor, db)
        print("{} 已下載".format(item))
        time.sleep(1)

完整的程式碼

import pymysql
import GetVideoData
import time


def save(videos, cursor, db):
    if videos is None:
        print("videos 為null")
        return
    if len(videos) == 0:
        print("videos 長度為0")
        return
    if cursor is None:
        print("cursor 為null")
        return
    if db is None:
        print("db 為null")
        return
    insert_sql = """INSERT INTO video(`id`,`title`,`poster`,`source_name`,`play_url`,
  `playcnt`,`mthid`,`mthpic`,`threadId`,`duration`,`comment_id`,`publish_time`,`new_cate_v2`,
  `like`,`fmlike`,`comment`,`fmcomment`,`fmplaycnt`,`fmplaycnt_2`,`outstand_tag`) values ( '{}', '{}', '{}', '{}', '{}',
   {}, '{}', '{}', '{}', '{}','{}','{}','{}',{}, '{}', '{}', '{}', '{}', '{}', '{}')
  """
    for video in videos:
        # 先查詢是否存在,如果不存在就插入,存在就跳過
        cursor.execute("select count(*) from video where `id` = {};".format(video.id))
        res = cursor.fetchone()
        if res[0] > 0:
            print(video)
            continue
        cursor.execute(
            insert_sql.format(video.id, video.title, video.poster, video.source_name, video.play_url, video.playcnt,
                              video.mthid, video.mthpic, video.threadId, video.duration,
                              video.comment_id, video.publish_time, video.new_cate_v2,
                              video.like,
                              video.fmlike, video.comment, video.fmcomment, video.fmplaycnt, video.fmplaycnt_2,
                              video.outstand_tag))
        db.commit()


db = pymysql.connect(host='192.168.3.30', port=3306, user='root', password='123456', database="video", charset="utf8")
cursor = db.cursor()
cursor.execute("SELECT VERSION()")
video_sql = """
CREATE TABLE IF NOT EXISTS video(_id INTEGER PRIMARY KEY AUTO_INCREMENT,id VARCHAR(30) NOT NULL,title VARCHAR(300) NOT 
NULL,poster VARCHAR(300) NOT NULL,source_name VARCHAR(300) NOT NULL,play_url VARCHAR(300) NOT NULL,playcnt VARCHAR(40) NOT 
NULL,mthid VARCHAR(30) NOT NULL,mthpic VARCHAR(300) NOT NULL,threadId VARCHAR(30) NOT NULL,duration VARCHAR(30) NOT NULL,
comment_id VARCHAR(30) NOT NULL,publish_time VARCHAR(30) NOT NULL,new_cate_v2 VARCHAR(30) NOT NULL,`like` VARCHAR (40) 
NOT NULL,fmlike VARCHAR(30) NOT NULL,comment VARCHAR(30) NOT NULL,fmcomment VARCHAR(30) NOT NULL,fmplaycnt VARCHAR(30) 
NOT NULL,fmplaycnt_2 VARCHAR(30) NOT NULL,outstand_tag VARCHAR(30) NOT NULL)
"""
# 建立資料庫
cursor.execute(video_sql)

# 抓取並存入資料
video_categories = ["recommend", "yingshi", "yinyue", "vlog", "youxi", "gaoxiao", "zongyi", "yule", "dongman",
                    "shenghuo", "guangchangwu", "meishi", "chongwu", "sannong", "junshi", "shehui", "tiyu", "keji",
                    "shishang", "qiche", "qinzi", "jiaoyu", "wenhua", "lvyou", "miaodong"]
for i in range(10):
    for item in video_categories:
        videos = GetVideoData.get_video_data(item)
        save(videos, cursor, db)
        print("{} 已下載".format(item))
        time.sleep(1)

db.close()

總結

這一節,完成了資料的抓取並存儲到資料庫

下一節,我們整理一下當前的程式碼,並處理一下視訊分類資料的處理。