Python大數據:信用卡逾期分析
阿新 • • 發佈:2018-03-20
OS odi tuple his cat 歷史 true .py ade
# -*- coding:utf-8 -*- # 數據集成 import csv import numpy as np import pandas as pd import matplotlib.pyplot as plt #客戶信息 basicInfo = pd.DataFrame.from_csv(‘datas/basicInfo_train.csv‘, header=0, sep=‘,‘, index_col=0, parse_dates=True, encoding=None, tupleize_cols=False, infer_datetime_format=False)#歷史還款記錄 historyInfo = pd.DataFrame.from_csv(‘datas/history_train.csv‘, header=0, sep=‘,‘, index_col=0, parse_dates=True, encoding=None, tupleize_cols=False, infer_datetime_format=False) #歷史逾期情況 defaultInfo = pd.DataFrame.from_csv(‘datas/default_train.csv‘, header=0, sep=‘,‘, index_col=0, parse_dates=True, encoding=None, tupleize_cols=False, infer_datetime_format=False) combineInfo= pd.concat([basicInfo,historyInfo,defaultInfo],axis=1)
#查看前10條數據 combineInfo[:10]
#性別分析 gender = combineInfo.groupby(‘SEX‘)[‘Default‘].mean().reset_index() plt.xticks((0,1),(u"Male",u"Female")) plt.xlabel(u"Gender") plt.ylabel(u"Counts") plt.bar(gender.SEX,gender.Default,0.1,color=‘green‘) plt.show()
#教育程度與default值的相關性分析 edu = combineInfo.groupby(‘EDUCATION‘)[‘Default‘].mean() plt.plot(edu) plt.show()
#婚姻狀況分析 marriage = combineInfo.groupby(‘MARRIAGE‘)[‘Default‘].mean().reset_index() plt.bar(marriage.MARRIAGE,marriage.Default,0.5,color=‘green‘) plt.show()
Python大數據:信用卡逾期分析