1. 程式人生 > 實用技巧 >python分析拉勾網職位資訊-python職位資訊

python分析拉勾網職位資訊-python職位資訊

import pandas as pd
import matplotlib.pyplot as plt
import jieba,wordcloud
from PIL import Image
import numpy as np

#資料清洗
df = pd.read_excel(r'C:\Users\xxx\Desktop\out/lagou_1.xlsx')
df[['工資1','工資2']] = df['工資'].str.split('-',expand=True)
df['工資1'] = df['工資1'].str.replace('k','').str.replace('K','')
df['工資2'] = df['工資2'].str.replace('k','').str.replace('K','')
df['工資平均'] = (df['工資2'].astype(int)+df['工資1'].astype(int))/2
#print(df['工資平均'])
df_1 = df.groupby(by='經驗').agg({'經驗':['nunique']})
#print(df_1)
df['平均經驗']=df['經驗'].str.replace('1-3年','2').str.replace('10年以上','10').str.replace('1年以下','1')\
.str.replace('3-5年','4').str.replace('3-5年','4').str.replace('5-10年','8').str.replace('不限','0').str.replace('應屆畢業生','0').astype(int)
#print(df['平均經驗'])
del df['工資1']
del df['工資2']
#'\\n','\'',' '
df['職位描述'] = df['職位描述'].str.replace('[','').str.replace('n,','').str.replace('\'','').str.replace(' ','')\
.str.replace(']','').str.replace('n','').str.replace('\\','').str.replace(',','').str.replace('#','')
df['工作地點'] = df['工作地點'].str.replace('[','').str.replace('n,','').str.replace('\'','').str.replace(' ','')\
.str.replace(']','').str.replace('n','').str.replace('\\','').str.replace(',','').str.replace('#','').str.replace('-','')
print(df['工作地點'])
#df.to_excel(r'C:\Users\xxx\Desktop\out/lagou_2.xlsx')
#df = pd.read_excel(r'C:\Users\xxx\Desktop\out/lagou_2.xlsx')
print(df)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

#視覺化展示-直方圖
plt.hist(df['工資平均'], alpha=0.8, color='steelblue')
plt.xlabel('工資/千元')
plt.ylabel('頻數')
plt.title("python平均工資直方圖")
plt.savefig('lagou-gongzi.jpg') # 指定儲存路徑
plt.show()

plt.hist(df['平均經驗'], alpha=0.8, color='steelblue')
plt.xlabel('經驗/年')
plt.ylabel('頻數')
plt.title("python平均經驗直方圖")
plt.savefig('lagou-jingyan.jpg') # 指定儲存路徑

#視覺化展示-餅圖
count = df['學歷'].value_counts()
plt.pie(count, labels=count.keys(), shadow=True,autopct='%2.2f%%')
plt.savefig('lagou_xueli.jpg')
plt.show()

#視覺化展示-詞圖雲
pic = Image.open('桌布.jpg')
mang_mask = np.array(pic)
strs = ''
for line in df['福利']:
strs += line
print(strs)
cut_strs = ' '.join(jieba.cut(strs))
word_cloud = wordcloud.WordCloud(font_path='/home/shen/Downloads/fonts/msyh.ttc',mask=mang_mask,background_color='white').generate(cut_strs)
word_cloud.to_file('lagou_wordcloud.jpg')
plt.imshow(word_cloud)
plt.show()