Numpy pandas學習

def test3():#numpy的nan值替換為列平均值
    t1=np.arange(12).reshape((3,4)).astype("float")
    t1[1,2:]=np.nan
    print("t1:")
    print(t1)
    for i in range(t1.shape[1]):#遍歷列數
        temp_col=t1[:,i]#取一列
        temp_col_not_nan=temp_col[temp_col==temp_col]#當前一列不為nan,使用布林索引
        print(temp_col==temp_col)
         
print("temp_col_not_nan")
        print(temp_col_not_nan)
        temp_col[np.isnan(temp_col)]=temp_col_not_nan.mean()#將均值賦值給nan
        print("temp_col")
        print(temp_col)
        print("*"*100)

def test4():#陣列的拼接
    t1=np.arange(12).reshape((3,4))
    t2=np.arange(12,24).reshape((3,4))
    print 
("t1")
    print(t1)
    print("t2")
    print(t2)
    print(np.vstack((t1,t2)))#豎直拼接
    print(np.hstack((t1,t2)))#水平拼接
    print("*"*100)
    t1[[1,2],:]=t1[[2,1],:]#行交換
    print(t1)
    t1[:,[0,2]]=t1[:,[2,0]]#列交換
    print(t1)
    #獲取最大值最小值的位置
    np.argmax(t1,axis=0)
    np.argmin(t1,axis=1)
    #建立全為0的陣列 

    np.zeros((3,4))
    #全為1的陣列
    np.ones((3,4))
    #建立對角線為1的正方形陣列
    np.eye(3)
    # t1=t2.copy() 複製，相互不影響

def test5():#np的隨機函式
    np.random.rand(2,3)#產生2行3列的均勻分佈的隨機陣列
    np.random.randn(2,3)#標準正態分佈陣列
    np.random.randint(10,20,(3,4))#範圍[10，20)，三行四列的隨機整數陣列
    np.random.uniform(10,20,(3,4))#產生均勻分佈陣列
    np.random.seed(10)#設定隨機種子


def test1_pandas():#pandas基礎
    t = pd.Series([1, 2, 3, 4, 5, 6, 7])  # Series 一維，帶標籤陣列
    print(t)
    t2 = pd.Series([1, 2, 3, 4, 5], index=list("abcde"))  # 指定標籤
    print(t2)
    temp_dict = {"name": "xiaohong", "age": 30, "tel": 10086}
    t3 = pd.Series(temp_dict)  # 通過字典建立
    print(t3)
    t3.index  # 索引
    t3.values  # 值

    # DataFrame
    t3 = pd.DataFrame(np.arange(12).reshape(3, 4))
    print(t3)
    # DataFrame物件既有行索引，又有列索引
    # 行索引，表明不同行，橫向索引，叫index，0軸，axis=0
    # 列索引，表名不同列，縱向索引，叫columns，1軸，axis=1
    # ndim 維度屬性
    t1 = pd.DataFrame(np.arange(12).reshape(3, 4), index=list("abc"), columns=list("WXYZ"))  # 指定行列的索引
    d1 = {"name": ["xiaoming", "xioahong"], "age": [10, 20], "tel": [100086, 10085]}
    print(pd.DataFrame(d1))  # 通過字典建立
    d2 = [{"name": "xiaohong", "age": 10, "tel": 10086}, {"name": "xiaownag", "age": 10, "tel": 10085}]
    pd.DataFrame(d2)
    t1.head(3)  # 顯示頭幾行
    t1.tail(3)  # 顯示末尾幾行
    t1.info()  # 展示df的概覽
    t1.describe()  # 快速進行統計：count,mean,std,min
    pass

def test_pandas2():#pandas索引
    # pandas索引
    # loc通過標籤獲取
    t4 = pd.DataFrame(np.arange(12).reshape(3, 4), index=list("abc"), columns=list("WXYZ"))
    print(t4)
    print(t4.loc["a", "Z"])
    print(t4.loc["a", :])
    print(t4.loc[:, "Y"])
    print(t4.loc[["a", "c"], :])
    print(t4.loc[["a", "b"], ["W", "Z"]])
    # iloc通過位置獲取
    print(t4.iloc[1, :])
    # 布林索引
    print(t4[t4["W"] > 2])  # &,|
    # 缺失值處理
    t4.iloc[1] = np.nan
    t4.iloc[1, 2] = 2.0
    print(t4)
    print(pd.isnull(t4))  # 是否為空
    print(pd.notnull(t4))  # 是否不為空
    # 刪除
    print(t4[pd.notnull(t4.iloc[:, 0])])  # 把nan所在的行去掉
    t4.dropna(axis=0, how="all", inplace=False)  # 刪除nan所在行,all全為nan則刪掉，any只要有一個就刪掉,inplace是否對源資料修改
    # 填充
    t4.fillna(t4.mean())  # 填充均值
    pass

Numpy pandas學習

Pandas學習筆記 01 python和NumPy基礎

淺談python已知元素,獲取元素索引(numpy,pandas)

Pandas 學習手冊中文第二版·翻譯完成

Python Pandas學習

NumPy 陣列學習手冊·翻譯完成

python pandas學習記錄二

numpy的學習筆記

pandas學習筆記

Numpy(Pandas)刪除全為零的列的方法

pandas學習之 - excel篇

Python 更換資料來源；Python安裝numpy,pandas慢，超時報錯，下載不了的解決方法（更新，親測可行）

numpy pandas 使用筆記

pandas 學習第14篇：索引和選擇資料

pandas學習筆記(一)

pandas學習筆記(二)

【Pandas學習筆記Task03】：索引

tensorflow+numpy 深度學習相關函式（持續更新）

pandas學習筆記（1）——series和dataframe

pandas學習筆記(四)

Numpy pandas學習

相關推薦