從零開始--03-資料儲存
阿新 • • 發佈:2020-12-26
資料儲存
前面已經實現了資料的下載和解析,接下來,我們需要把資料儲存到資料庫中,將來寫介面的時候,可以直接讀取我們自己的資料庫獲取資料,順便練習一下python的資料庫操作。
準備工作
- 資料庫,這裡我們選擇使用mysql
- 資料庫管理工具,我們選擇免費的DBeaver
- PyMySQL
陣列處理
我們精簡一下資料,修改後的實體類如下
class VideoBean: """ 視訊資料 """ def __init__(self, data): self.id = data["id"] self.title = data["title"] self.poster = data["poster"] self.source_name = data["source_name"] self.play_url = data["play_url"] self.playcnt = data["playcnt"] self.mthid = data["mthid"] self.mthpic = data["mthpic"] self.threadId = data["threadId"] self.duration = data["duration"] self.comment_id = data["comment_id"] self.publish_time = data["publish_time"] self.new_cate_v2 = data["new_cate_v2"] self.like = data["like"] self.fmlike = data["fmlike"] self.comment = data["comment"] self.fmcomment = data["fmcomment"] self.fmplaycnt = data["fmplaycnt"] self.fmplaycnt_2 = data["fmplaycnt_2"] self.outstand_tag = data["outstand_tag"] def __repr__(self): return "<Video>[%s]" % self.id
資料獲取
資料獲取部分,我們依舊使用之前資料下載章節的程式碼即可
儲存
需要先建立一個數據庫
CREATE DATABASE `video` DEFAULT CHARSET utf8mb4 COLLATE utf8mb4_general_ci;
獲取資料庫連線
db = pymysql.connect(host='192.168.3.30', port=3306, user='root', password='123456', database="video", charset="utf8")
cursor = db.cursor()
建立表
video_sql = """ CREATE TABLE IF NOT EXISTS video(_id INTEGER PRIMARY KEY AUTO_INCREMENT,id VARCHAR(30) NOT NULL,title VARCHAR(300) NOT NULL,poster VARCHAR(300) NOT NULL,source_name VARCHAR(300) NOT NULL,play_url VARCHAR(300) NOT NULL,playcnt VARCHAR(40) NOT NULL,mthid VARCHAR(30) NOT NULL,mthpic VARCHAR(300) NOT NULL,threadId VARCHAR(30) NOT NULL,duration VARCHAR(30) NOT NULL, comment_id VARCHAR(30) NOT NULL,publish_time VARCHAR(30) NOT NULL,new_cate_v2 VARCHAR(30) NOT NULL,`like` VARCHAR (40) NOT NULL,fmlike VARCHAR(30) NOT NULL,comment VARCHAR(30) NOT NULL,fmcomment VARCHAR(30) NOT NULL,fmplaycnt VARCHAR(30) NOT NULL,fmplaycnt_2 VARCHAR(30) NOT NULL,outstand_tag VARCHAR(30) NOT NULL)""" cursor.execute(video_sql)
抓取資料並存儲
我們希望抓取更多分類的資料,所以會準備一個視訊分類的list,之後遍歷去查詢獲取。
video_categories = ["recommend", "yingshi", "yinyue", "vlog", "youxi", "gaoxiao", "zongyi", "yule", "dongman", "shenghuo", "guangchangwu", "meishi", "chongwu", "sannong", "junshi", "shehui", "tiyu", "keji", "shishang", "qiche", "qinzi", "jiaoyu", "wenhua", "lvyou", "miaodong"] for i in range(10): for item in video_categories: videos = GetVideoData.get_video_data(item) save(videos, cursor, db) print("{} 已下載".format(item)) time.sleep(1)
完整的程式碼
import pymysql
import GetVideoData
import time
def save(videos, cursor, db):
if videos is None:
print("videos 為null")
return
if len(videos) == 0:
print("videos 長度為0")
return
if cursor is None:
print("cursor 為null")
return
if db is None:
print("db 為null")
return
insert_sql = """INSERT INTO video(`id`,`title`,`poster`,`source_name`,`play_url`,
`playcnt`,`mthid`,`mthpic`,`threadId`,`duration`,`comment_id`,`publish_time`,`new_cate_v2`,
`like`,`fmlike`,`comment`,`fmcomment`,`fmplaycnt`,`fmplaycnt_2`,`outstand_tag`) values ( '{}', '{}', '{}', '{}', '{}',
{}, '{}', '{}', '{}', '{}','{}','{}','{}',{}, '{}', '{}', '{}', '{}', '{}', '{}')
"""
for video in videos:
# 先查詢是否存在,如果不存在就插入,存在就跳過
cursor.execute("select count(*) from video where `id` = {};".format(video.id))
res = cursor.fetchone()
if res[0] > 0:
print(video)
continue
cursor.execute(
insert_sql.format(video.id, video.title, video.poster, video.source_name, video.play_url, video.playcnt,
video.mthid, video.mthpic, video.threadId, video.duration,
video.comment_id, video.publish_time, video.new_cate_v2,
video.like,
video.fmlike, video.comment, video.fmcomment, video.fmplaycnt, video.fmplaycnt_2,
video.outstand_tag))
db.commit()
db = pymysql.connect(host='192.168.3.30', port=3306, user='root', password='123456', database="video", charset="utf8")
cursor = db.cursor()
cursor.execute("SELECT VERSION()")
video_sql = """
CREATE TABLE IF NOT EXISTS video(_id INTEGER PRIMARY KEY AUTO_INCREMENT,id VARCHAR(30) NOT NULL,title VARCHAR(300) NOT
NULL,poster VARCHAR(300) NOT NULL,source_name VARCHAR(300) NOT NULL,play_url VARCHAR(300) NOT NULL,playcnt VARCHAR(40) NOT
NULL,mthid VARCHAR(30) NOT NULL,mthpic VARCHAR(300) NOT NULL,threadId VARCHAR(30) NOT NULL,duration VARCHAR(30) NOT NULL,
comment_id VARCHAR(30) NOT NULL,publish_time VARCHAR(30) NOT NULL,new_cate_v2 VARCHAR(30) NOT NULL,`like` VARCHAR (40)
NOT NULL,fmlike VARCHAR(30) NOT NULL,comment VARCHAR(30) NOT NULL,fmcomment VARCHAR(30) NOT NULL,fmplaycnt VARCHAR(30)
NOT NULL,fmplaycnt_2 VARCHAR(30) NOT NULL,outstand_tag VARCHAR(30) NOT NULL)
"""
# 建立資料庫
cursor.execute(video_sql)
# 抓取並存入資料
video_categories = ["recommend", "yingshi", "yinyue", "vlog", "youxi", "gaoxiao", "zongyi", "yule", "dongman",
"shenghuo", "guangchangwu", "meishi", "chongwu", "sannong", "junshi", "shehui", "tiyu", "keji",
"shishang", "qiche", "qinzi", "jiaoyu", "wenhua", "lvyou", "miaodong"]
for i in range(10):
for item in video_categories:
videos = GetVideoData.get_video_data(item)
save(videos, cursor, db)
print("{} 已下載".format(item))
time.sleep(1)
db.close()
總結
這一節,完成了資料的抓取並存儲到資料庫
下一節,我們整理一下當前的程式碼,並處理一下視訊分類資料的處理。