Python資料分析——誰才是《三體》的主角?
阿新 • • 發佈:2018-12-16
準備工作
matplotlib庫
三體.txt(utf-8編碼)
三體主要人物.txt(utf-8編碼)
大綱
匯入matplotlib.pyplot方法,建立1個Novel類,包含2個屬性和4個方法。
import matplotlib.pyplot as plt class Novel(): """建立一個小說的類""" def __init__(self, novel_name, roles_name): """初始化類的屬性"""
def one_role_nums(self, role_name):"""統計一個人物名字出現的次數"""
def main_roles_nums(self): """統計主要人物名字出現的次數"""
def show_datas(self): """輸出人物名字及出現次數"""
def draw_picture(self, title = ""): """視覺化資料"""
初始化
類包含兩個屬性,小說名字和主要人物名字。
主要人物來源於《三體》百度百科,以每行一個名字的格式儲存於txt文件,如下圖。
注:艾aa的名字採用aa,因小說中多稱呼aa。
def __init__(self, novel_name, roles_name): """初始化類的屬性""" self.novel_name = novel_name self.roles_name = roles_name
one_role_nums方法
預設名字為2-4個字,否則不計數。
def one_role_nums(self, role_name): """統計一個人物名字出現的次數""" with open(self.novel_name, encoding = 'utf-8') as f: content= f.read() count = 0 if len(role_name) < 2 or len(role_name) > 4: return 0 if len(role_name) == 2: for i in range(len(content) - 1): if content[i] + content[i + 1] == role_name: count += 1 if len(role_name) == 3: for i in range(len(content) - 2): if content[i] + content[i + 1] + content[i + 2]== role_name: count += 1 if len(role_name) == 4: for i in range(len(content) - 3): if content[i] + content[i + 1] + content[i + 2] + content[i + 3] == role_name: count += 1 return count
main_roles_nums方法
返回一個列表,列表元素為字典中的鍵值對且按value降序排列。
def main_roles_nums(self): """統計主要人物名字出現的次數""" with open(self.roles_name, encoding = 'utf-8') as f: names = [line.strip() for line in f.readlines()] dic = {} for name in names: num = self.one_role_nums(name) if num: dic[name] = num dic = sorted(dic.items(), key = lambda k : k[1], reverse = True) return dic
show_datas方法
def show_datas(self): """輸出人物名字及出現次數""" dic = self.main_roles_nums() for x in dic: print(x[0], x[1])
draw_picture方法
運用matplotlib庫將所得資料視覺化。
呼叫方法時可賦予一個標題,預設為空。
def draw_picture(self, title = ""): """視覺化資料""" dic = self.main_roles_nums() names, nums = [], [] for x in dic: names.append(x[0]) nums.append(x[1]) n = list(range(len(names))) plt.figure(figsize=(10, 6)) plt.bar(n, nums, alpha=0.5) plt.xlim((0, len(names))) plt.xticks(n, names, rotation = 30, fontproperties = "SimHei", fontsize = 24) plt.title(title, fontproperties = "SimHei", fontsize = 40) plt.show()
建立物件並呼叫方法
建立一個物件,賦予兩個屬性。
novel = Novel("三體.txt", "三體主要人物.txt")
呼叫方法show_datas()
novel.show_datas()
呼叫方法draw_picture()並賦予一個標題
novel.draw_picture("三體主要人物名字出現次數")
結果符合預期,汪淼、羅輯和程心是當之無愧的三大主角,且隨著《三體I》《三體II》《三體III》小說的篇幅增多而名字出現次數增多。
其他人物名字出現的次數基本與觀感相符合。
全部程式碼
import matplotlib.pyplot as plt class Novel(): """建立一個小說的類""" def __init__(self, novel_name, roles_name): """初始化類的屬性""" self.novel_name = novel_name self.roles_name = roles_name def one_role_nums(self, role_name): """統計一個人物名字出現的次數""" with open(self.novel_name, encoding = 'utf-8') as f: content = f.read() count = 0 if len(role_name) < 2 or len(role_name) > 4: return 0 if len(role_name) == 2: for i in range(len(content) - 1): if content[i] + content[i + 1] == role_name: count += 1 if len(role_name) == 3: for i in range(len(content) - 2): if content[i] + content[i + 1] + content[i + 2]== role_name: count += 1 if len(role_name) == 4: for i in range(len(content) - 3): if content[i] + content[i + 1] + content[i + 2] + content[i + 3] == role_name: count += 1 return count def main_roles_nums(self): """統計主要人物名字出現的次數""" with open(self.roles_name, encoding = 'utf-8') as f: names = [line.strip() for line in f.readlines()] dic = {} for name in names: num = self.one_role_nums(name) if num: dic[name] = num dic = sorted(dic.items(), key = lambda k : k[1], reverse = True) return dic def show_datas(self): """輸出人物名字及出現次數""" dic = self.main_roles_nums() for x in dic: print(x[0], x[1]) def draw_picture(self, title = ""): """視覺化資料""" dic = self.main_roles_nums() names, nums = [], [] for x in dic: names.append(x[0]) nums.append(x[1]) n = list(range(len(names))) plt.figure(figsize=(12, 9)) plt.bar(n, nums, alpha=0.5) plt.xlim((0, len(names))) plt.xticks(n, names, rotation = 30, fontproperties = "SimHei", fontsize = 24) plt.title(title, fontproperties = "SimHei", fontsize = 40) plt.show() novel = Novel("三體.txt", "三體主要人物.txt") novel.show_datas() novel.draw_picture("三體主要人物名字出現次數")