Pandas資料結構
import numpy as np
# np.arange(10) np.random.rand np.array([]) zerios ones empty identity\n",
arr = np.arange(10)
print(arr)
print(type(arr))
# Series是一個一維陣列的物件\n",
# 1.通過列表建立Series物件\n",
# 由索引和資料組成的,左邊是索引 右邊是資料\n",
ser_obj = pd.Series(range(10,20))
print(ser_obj)
# 獲取索引 RangeIndex是索引的型別\n",
print(ser_obj.index)
# 獲取值\n",
print(ser_obj.values)
# head()函式 預設檢視前5條資料 ,可以傳入制定檢視資料的條數\n",
print(ser_obj.head())
# 根據索引取出資料\n",
print(ser_obj[5])
# 基本運算\n",
print(ser_obj**2)
# 篩選資料
print(ser_obj>15)
# 取出符合條件的資料\
print(ser_obj[ser_obj>15])
[0 1 2 3 4 5 6 7 8 9] <class 'numpy.ndarray'> 0 10 1 11 2 12 3 13 4 14 5 15 6 16 7 17 8 18 9 19 dtype: int64 RangeIndex(start=0, stop=10, step=1) [10 11 12 13 14 15 16 17 18 19] 0 10 1 11 2 12 3 13 4 14 dtype: int64 15 0 100 1 121 2 144 3 169 4 196 5 225 6 256 7 289 8 324 9 361 dtype: int64 0 False 1 False 2 False 3 False 4 False 5 False 6 True 7 True 8 True 9 True dtype: bool 6 16 7 17 8 18 9 19 dtype: int64
student = {
2001:15.5,
2005:20.4,
2008:26.5
}
# 傳入字典\n",
ser_obj = pd.Series(student)
# 指定索引名稱\n",
ser_obj.index.name = 'year'
# 指定series物件名稱\n",
ser_obj.name = 'GDP'
print(ser_obj)
print(ser_obj.index)
year 2001 15.5 2005 20.4 2008 26.5 Name: GDP, dtype: float64 Int64Index([2001, 2005, 2008], dtype='int64', name='year')
# 3.手動指定索引和資料型別\n",
# 1.資料 2.index=索引列表\n",
ser_obj = pd.Series(['張三','李四','王五'],index=['a','b','c'])
print(ser_obj)
ser_obj = pd.Series(['10','20','30'],index=['a','b','c'],dtype='float32')
print(ser_obj)
a 張三 b 李四 c 王五 dtype: object a 10.0 b 20.0 c 30.0 dtype: float32# DataFrame類
# 類似多維陣列/表格資料\n",
# 分為行索引和列索引\n",
# 每列的資料可以是不同的型別\n",
# 建立二維陣列
# randn() 生成帶有正負值的資料\n",
arr = np.random.randn(3,4)
print(arr)
print(type(arr))
# 通過ndarrary建立dataframe物件
# 二維陣列中每一個小陣列中的資料在dataframe中是一行\n",
# 二維陣列中每一個小陣列中對應索引的資料在dataframe中是一列\n",
# 指定行索引和列索引\n",
# index 指定行索引 columns指定列索引\n",
df_obj = pd.DataFrame(arr,index=['a','b','c'],columns=['name','age','phone','sex'])
print(df_obj)
[[ 0.46598321 0.34886397 -1.76062942 -0.24668652] [ 2.09997402 1.47819057 0.49940195 -2.01004336] [-0.3740503 1.08099591 -0.32428196 -0.4199838 ]] <class 'numpy.ndarray'> name age phone sex a 0.465983 0.348864 -1.760629 -0.246687 b 2.099974 1.478191 0.499402 -2.010043
c -0.374050 1.080996 -0.324282 -0.419984
# 通過字典建立DataFrame物件\n", # 保證資料中至少有一個列資料是帶有行索引的\n", # key作為列索引 value作為一列資料\n", dict_data = { 'A':1, 'B':pd.Series(range(4),index=list(range(4)),dtype='int32'), 'C':np.array(range(4,8),dtype='int32'), 'D':4, 'E':'hello', 'F':6 } df_obj = pd.DataFrame(dict_data) print(df_obj)
A B C D E F 0 1 0 4 4 hello 6 1 1 1 5 4 hello 6 2 1 2 6 4 hello 6 3 1 3 7 4 hello 6
print(df_obj['C'])
print(type(df_obj['C']))
# 通過屬性獲取某一列資料,如果列名中有空格,會導致獲取失敗\n",
# print(df_obj. B)(所以不建議採用這種方式)
0 4 1 5 2 6 3 7 Name: C, dtype: int32 <class 'pandas.core.series.Series'>
# 修改資料\n",
df_obj['C'] = pd.Series('world',index=list(range(4)))
print(df_obj)
# 新增新列\
df_obj['G'] = df_obj['B']*2
print(df_obj)
df_obj['H'] = '你好'
print(df_obj)
# 刪除列
del(df_obj['H'])
print(df_obj)
A B C D E F 0 1 0 world 4 hello 6 1 1 1 world 4 hello 6 2 1 2 world 4 hello 6 3 1 3 world 4 hello 6 A B C D E F G 0 1 0 world 4 hello 6 0 1 1 1 world 4 hello 6 2 2 1 2 world 4 hello 6 4 3 1 3 world 4 hello 6 6 A B C D E F G H 0 1 0 world 4 hello 6 0 你好 1 1 1 world 4 hello 6 2 你好 2 1 2 world 4 hello 6 4 你好 3 1 3 world 4 hello 6 6 你好 A B C D E F G 0 1 0 world 4 hello 6 0 1 1 1 world 4 hello 6 2 2 1 2 world 4 hello 6 4 3 1 3 world 4 hello 6 6
# 檢視索引\n",
# Series和DataFrame物件的索引都是Index物件\n",
# 索引物件不可變,為了保證資料的安全性\n",
# Index Int64Index RangeIndex(預設) DateTimeIndex 時間戳索引 MultilIndex 層級索引\n",
ser_obj = pd.Series(range(5),index=list(range(5)))
print(type(ser_obj.index))
print(type(df_obj.index))
# 獲取索引值\n",
print(df_obj.index[2])
# 修改索引值 索引不可修改 TypeError: Index does not support mutable operations\n",
df_obj.index[0] = 2
<class 'pandas.core.indexes.numeric.Int64Index'> <class 'pandas.core.indexes.numeric.Int64Index'> 2
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-51-a3f4da10c213> in <module>() 9 print(df_obj.index[2]) 10 # 修改索引值 索引不可修改 TypeError: Index does not support mutable operations\n", ---> 11df_obj.index[0] = 2 D:\anaconda3\lib\site-packages\pandas\core\indexes\base.py in __setitem__(self, key, value) 1722 1723 def __setitem__(self, key, value): -> 1724raise TypeError("Index does not support mutable operations") 1725 1726 def __getitem__(self, key): TypeError: Index does not support mutable operations