1. 程式人生 > >Python大數據:信用卡逾期分析

Python大數據:信用卡逾期分析

OS odi tuple his cat 歷史 true .py ade

# -*- coding:utf-8 -*-
# 數據集成

import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt  

#客戶信息
basicInfo = pd.DataFrame.from_csv(datas/basicInfo_train.csv, header=0, sep=,, index_col=0, parse_dates=True, encoding=None, tupleize_cols=False, infer_datetime_format=False)
#歷史還款記錄 historyInfo = pd.DataFrame.from_csv(datas/history_train.csv, header=0, sep=,, index_col=0, parse_dates=True, encoding=None, tupleize_cols=False, infer_datetime_format=False) #歷史逾期情況 defaultInfo = pd.DataFrame.from_csv(datas/default_train.csv, header=0, sep=,, index_col=0, parse_dates=True, encoding=None, tupleize_cols=False, infer_datetime_format=False) combineInfo
= pd.concat([basicInfo,historyInfo,defaultInfo],axis=1)
#查看前10條數據
combineInfo[:10]
#性別分析
gender = combineInfo.groupby(SEX)[Default].mean().reset_index()
plt.xticks((0,1),(u"Male",u"Female"))
plt.xlabel(u"Gender")
plt.ylabel(u"Counts")
plt.bar(gender.SEX,gender.Default,0.1,color=green
) plt.show()
#教育程度與default值的相關性分析
edu = combineInfo.groupby(EDUCATION)[Default].mean()
plt.plot(edu)
plt.show()
#婚姻狀況分析
marriage = combineInfo.groupby(MARRIAGE)[Default].mean().reset_index()
plt.bar(marriage.MARRIAGE,marriage.Default,0.5,color=green)
plt.show()

Python大數據:信用卡逾期分析