pandas學習與使用2
阿新 • • 發佈:2018-12-12
繼續學習pandas庫,上一節主要介紹了Series,這一節主要是DataFrame結構的用法。執行環境python2.7
#!usr/bin/python3 # coding:utf-8 # pandas 使用DataFrame import numpy as np import pandas as pd dates = pd.date_range('20171231', periods=6) print "dates:\n", dates # a b c d # 2017-12-31 0.544078 0.389521 0.097052 0.942329 # 2018-01-01 0.474514 0.456605 0.750682 0.683513 # 2018-01-02 0.537973 0.230534 0.216569 0.015208 # 2018-01-03 0.320855 0.295421 0.342874 0.808681 # 2018-01-04 0.649339 0.678842 0.390282 0.692622 # 2018-01-05 0.041877 0.197155 0.384499 0.301309 # index 為縱向座標的,column為橫向座標 df = pd.DataFrame(np.random.rand(6, 4), index=dates, columns=['a', 'b', 'c', 'd']) print "df:\n", df print "dtypes:\n", df.dtypes print "columns:\n", df.columns print "values:\n", df.values # describe是DataFrame的詳細資訊,包括count、mena、std等值 print "describe:\n", df.describe() # a b c d # count 6.000000 6.000000 6.000000 6.000000 # mean 0.428106 0.374680 0.363660 0.573944 # std 0.217850 0.177776 0.220942 0.347394 # min 0.041877 0.197155 0.097052 0.015208 # 25% 0.359270 0.246756 0.248145 0.396860 # 50% 0.506243 0.342471 0.363687 0.688067 # 75% 0.542552 0.439834 0.388836 0.779666 # max 0.649339 0.678842 0.750682 0.942329 print "轉置:\n", df.T # 按照索引排序 ascending=False 倒序列 print "按照索引排序:\n", df.sort_index(axis=1, ascending=False) # 按照值排序 print "按照值排序:\n", df.sort_values(by='a') print "______________________" df1 = pd.DataFrame(np.arange(12).reshape((3, 4))) print "df1:\n", df1 print "df1.dtypes:\n", df1.dtypes df2 = pd.DataFrame({'A': 1, 'B': pd.Timestamp('20180101'), 'C': pd.Series(1, index=list(range(4)), dtype=float)}) print "df2:\n", df2 # DataFrame的構造 # pd.DataFrame() 引數: 1、二維array; 2、Series 列表; 3、value為Series的字典; # 1、二維array s1 = np.array([1, 2, 3, 4]) s2 = np.array([5, 6, 7, 8]) dataframe1 = pd.DataFrame([s1, s2]) print "dataframe1:\n", dataframe1 # 2、Series 列表 s1 = pd.Series(np.array([1, 2, 3, 4])) s2 = pd.Series(np.array([5, 6, 7, 8])) dataframe2 = pd.DataFrame([s1, s2]) print "dataframe2:\n", dataframe2 # 3、value為Series的字典 s1 = pd.Series(np.array([1, 2, 3, 4])) s2 = pd.Series(np.array([5, 6, 7, 8])) dataframe3 = pd.DataFrame({"a": s1, "b": s2}); print "dataframe3:\n", dataframe3 # DataFrame的屬性 print "dataframe3的columns的值:\n", dataframe3.columns print "dataframe3的形狀:\n", dataframe3.shape print "dataframe3的index的值:\n", list(dataframe3.index) print "dataframe3的value的值:\n", dataframe3.values # DataFrame的if-then操作 # df.ix[條件,then操作區域] df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8], "C": [1, 1, 1, 1]}) print "修改前:\n", df # if(df.A > 2),"B"=-1 df.ix[df.A > 2, 'B'] = -1 print "修改後:\n", df # 使用numpy.where df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8], "C": [1, 1, 1, 1]}) # np.where(條件,then,else) df["then"] = np.where(df.A < 3, 1, 0) print "修改後:\n", df # 直接取值df.[] df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8], "C": [1, 1, 1, 1]}) df = df[df.A >= 2] print "df[df.A >= 2]:\n", df # 使用.loc[] df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8], "C": [1, 1, 1, 1]}) df = df.loc[df.A > 2] print "df.loc[df.A > 2]:\n", df df = pd.DataFrame({'animal': 'cat dog cat fish dog cat cat'.split(), 'size': list('SSMMMLL'), 'weight': [8, 10, 11, 1, 20, 12, 12], 'adult': [False] * 5 + [True] * 2}) print "df:\n", df