python的pandas學習筆記
阿新 • • 發佈:2019-01-24
import pandas as pd
import numpy as np
from pandas import Series,DataFrame
obj = Series(range(5),index=['a','a','b','b','c'])
obj
a 0
a 1
b 2
b 3
c 4
dtype: int64
obj.index.is_unique #判斷索引值是否唯一
False
obj['a'] #返回多個索引值
a 0
a 1
dtype: int64
對於DataFrame也如此
df = DataFrame(np.random.randn(4,3),index=list('aabb'))
df
0 | 1 | 2 | |
---|---|---|---|
a | 0.599982 | 2.421799 | 0.081475 |
a | 0.420616 | 2.265408 | 1.196068 |
b | -1.153728 | -0.173130 | -0.098733 |
b | 0.540624 | -0.286814 | 0.287023 |
df.ix['b']
0 | 1 | 2 | |
---|---|---|---|
b | -1.153728 | -0.173130 | -0.098733 |
b | 0.540624 | -0.286814 | 0.287023 |
df = DataFrame([[1.4,np.nan],[7.1,-4.5],[np.nan,np.nan],[0.75,-1.3]],index=list('abcd'),columns=['one','two'])
df
one | two | |
---|---|---|
a | 1.40 | NaN |
b | 7.10 | -4.5 |
c | NaN | NaN |
d | 0.75 | -1.3 |
sum()預設對列進行求和
df.sum()
one 9.25
two -5.80
dtype: float64
df.sum(axis = 1) #對行
a 1.40
b 2.60
c 0.00
d -0.55
dtype: float64
NA值會自動排除,通過skipa選項可以禁止
df.mean(axis=1,skipna=False)
a NaN
b 1.300
c NaN
d -0.275
dtype: float64
df.idxmax() #返回最大值的索引值
one b
two d
dtype: object
df.cumsum() #累計型
one | two | |
---|---|---|
a | 1.40 | NaN |
b | 8.50 | -4.5 |
c | NaN | NaN |
d | 9.25 | -5.8 |
彙總統計描述describe
df.describe()
one | two | |
---|---|---|
count | 3.000000 | 2.000000 |
mean | 3.083333 | -2.900000 |
std | 3.493685 | 2.262742 |
min | 0.750000 | -4.500000 |
25% | 1.075000 | -3.700000 |
50% | 1.400000 | -2.900000 |
75% | 4.250000 | -2.100000 |
max | 7.100000 | -1.300000 |
對於非數值型,describe會產生另外一種彙總統計
obj = Series(['a','a','b','c']*4)
obj
0 a
1 a
2 b
3 c
4 a
5 a
6 b
7 c
8 a
9 a
10 b
11 c
12 a
13 a
14 b
15 c
dtype: object
obj.describe()
count 16
unique 3
top a
freq 8
dtype: object
- 相關係數與協方差 唯一值、值計數以及成員資格
obj = Series(['c','a','d','a','a','b','b','c','c'])
uniques = obj.unique()
uniques
array([‘c’, ‘a’, ‘d’, ‘b’], dtype=object)
返回的唯一值是未排序的,如果需要則可以再次進行排序(unique.sort())
obj.value_counts()
c 3
a 3
b 2
d 1
dtype: int64
Series值頻統計是按降序排列。value_counts還是一個頂級的pandas方法,可以用於任何陣列和序列
pd.value_counts(obj.values,sort=False)
a 3
c 3
b 2
d 1
dtype: int64
isin,它用於判斷向量化的成員資格,可用於選取Series中或DataFrame列中資料的子集
mask = obj.isin(['b','c'])
mask
0 True
1 False
2 False
3 False
4 False
5 True
6 True
7 True
8 True
dtype: bool
obj[mask]
0 c
5 b
6 b
7 c
8 c
dtype: object
data = DataFrame({
'Qu1':[1,3,4,3,4],
'Qu2':[2,3,1,2,3],
'Qu3':[1,5,2,4,4]
})
data
Qu1 | Qu2 | Qu3 | |
---|---|---|---|
0 | 1 | 2 | 1 |
1 | 3 | 3 | 5 |
2 | 4 | 1 | 2 |
3 | 3 | 2 | 4 |
4 | 4 | 3 | 4 |
#統計DataFrame的每一列中元素1,2,3,4,5出現的頻率,缺失值用0填
result = data.apply(pd.value_counts).fillna(0)
result
Qu1 | Qu2 | Qu3 | |
---|---|---|---|
1 | 1.0 | 1.0 | 1.0 |
2 | 0.0 | 2.0 | 1.0 |
3 | 2.0 | 2.0 | 0.0 |
4 | 2.0 | 0.0 | 2.0 |
5 | 0.0 | 0.0 | 1.0 |
string_data = Series(['aardvark','artichoke',np.nan,'avocado'])
string_data
0 aardvark
1 artichoke
2 NaN
3 avocado
dtype: object
string_data.isnull()
0 False
1 False
2 True
3 False
dtype: bool
python內建的None值也會被當做NA處理
string_data[0] = None
string_data
0 None
1 artichoke
2 NaN
3 avocado
dtype: object
string_data.isnull()
0 True
1 False
2 True
3 False
dtype: bool
Na處理方法
dropna 根據各標籤的值中是否存在缺失資料對軸標籤進行過濾,可通過閾值調節對缺失值的容忍度
fillna 用指定值或插值方法(如ffill或bfill)填充缺失資料
isnull 返回一個含有布林值的物件,這些布林值表示哪些是缺失
notnull isnull的否定式
# #濾除缺失值
from numpy import nan as NA
data = Series([1,NA,3.5,NA,7])
data.dropna()
0 1.0
2 3.5
4 7.0
dtype: float64
#也可以通過布林值索引達到目的
data[data.notnull()]
0 1.0
2 3.5
4 7.0
dtype: float64
#dropna預設丟棄任何含有缺失值的行
data = DataFrame([[1,6.5,3],[1,NA,NA],[NA,NA,NA],[NA,6.5,3]])
cleaned = data.dropna()
cleaned
0 | 1 | 2 | |
---|---|---|---|
0 | 1.0 | 6.5 | 3.0 |
#傳入how = 'all'將只丟棄全為NA的哪些行
data.dropna(how = 'all')
0 | 1 | 2 | |
---|---|---|---|
0 | 1.0 | 6.5 | 3.0 |
1 | 1.0 | NaN | NaN |
3 | NaN | 6.5 | 3.0 |
data[4] = NA
data
0 | 1 | 2 | 4 | |
---|---|---|---|---|
0 | 1.0 | 6.5 | 3.0 | NaN |
1 | 1.0 | NaN | NaN | NaN |
2 | NaN | NaN | NaN | NaN |
3 | NaN | 6.5 | 3.0 | NaN |
#要用這種方法丟棄列,只需傳入axis=1即可
data.dropna(axis=1,how='all')
0 | 1 | 2 | |
---|---|---|---|
0 | 1.0 | 6.5 | 3.0 |
1 | 1.0 | NaN | NaN |
2 | NaN | NaN | NaN |
3 | NaN | 6.5 | 3.0 |
df = DataFrame(np.random.randn(7,3));df
0 | 1 | 2 | |
---|---|---|---|
0 | -1.051300 | -0.526329 | -0.204891 |
1 | -0.977547 | -1.706029 | 0.946824 |
2 | 0.540648 | -1.228170 | -1.180031 |
3 | -0.320932 | -0.667305 | 0.239980 |
4 | -0.303641 | -1.096918 | 0.355744 |
5 | -0.424176 | 1.880769 | -0.013825 |
6 | 0.643725 | 0.301759 | -1.520921 |
df.ix[:4,1] = NA;df
0 | 1 | 2 | |
---|---|---|---|
0 | -1.051300 | NaN | -0.204891 |
1 | -0.977547 | NaN | 0.946824 |
2 | 0.540648 | NaN | -1.180031 |
3 | -0.320932 | NaN | 0.239980 |
4 | -0.303641 | NaN | 0.355744 |
5 | -0.424176 | 1.880769 | -0.013825 |
6 | 0.643725 | 0.301759 | -1.520921 |
df.ix[:2,2] = NA;df
0 | 1 | 2 | |
---|---|---|---|
0 | -1.051300 | NaN | NaN |
1 | -0.977547 | NaN | NaN |
2 | 0.540648 | NaN | NaN |
3 | -0.320932 | NaN | 0.239980 |
4 | -0.303641 | NaN | 0.355744 |
5 | -0.424176 | 1.880769 | -0.013825 |
6 | 0.643725 | 0.301759 | -1.520921 |
df
0 | 1 | 2 | |
---|---|---|---|
0 | -1.051300 | NaN | NaN |
1 | -0.977547 | NaN | NaN |
2 | 0.540648 | NaN | NaN |
3 | -0.320932 | NaN | 0.239980 |
4 | -0.303641 | NaN | 0.355744 |
5 | -0.424176 | 1.880769 | -0.013825 |
6 | 0.643725 | 0.301759 | -1.520921 |
df.dropna(thresh=2)
0 | 1 | 2 | |
---|---|---|---|
3 | -0.320932 | NaN | 0.239980 |
4 | -0.303641 | NaN | 0.355744 |
5 | -0.424176 | 1.880769 | -0.013825 |
6 | 0.643725 | 0.301759 | -1.520921 |
help(df.dropna)
Help on method dropna in module pandas.core.frame:
dropna(self, axis=0, how=’any’, thresh=None, subset=None, inplace=False) method of pandas.core.frame.DataFrame instance
Return object with labels on given axis omitted where alternately any
or all of the data are missing
Parameters
———-
axis : {0 or ‘index’, 1 or ‘columns’}, or tuple/list thereof
Pass tuple or list to drop on multiple axes
how : {‘any’, ‘all’}
* any : if any NA values are present, drop that label
* all : if all values are NA, drop that label
thresh : int, default None
int value : require that many non-NA values
subset : array-like
Labels along other axis to consider, e.g. if you are dropping rows
these would be a list of columns to include
inplace : boolean, default False
If True, do operation inplace and return None.
Returns
——-
dropped : DataFrame
df.fillna(0)
0 | 1 | 2 | |
---|---|---|---|
0 | -1.051300 | 0.000000 | 0.000000 |
1 | -0.977547 | 0.000000 | 0.000000 |
2 | 0.540648 | 0.000000 | 0.000000 |
3 | -0.320932 | 0.000000 | 0.239980 |
4 | -0.303641 | 0.000000 | 0.355744 |
5 | -0.424176 | 1.880769 | -0.013825 |
6 | 0.643725 | 0.301759 | -1.520921 |
#若通過一個字典呼叫fillna,就可以實現對不同的列填充不同的值
df.fillna({1:0.5,3:-1})
0 | 1 | 2 | |
---|---|---|---|
0 | -1.051300 | 0.500000 | NaN |
1 | -0.977547 | 0.500000 | NaN |
2 | 0.540648 | 0.500000 | NaN |
3 | -0.320932 | 0.500000 | 0.239980 |
4 | -0.303641 | 0.500000 | 0.355744 |
5 | -0.424176 | 1.880769 | -0.013825 |
6 | 0.643725 | 0.301759 | -1.520921 |
fillna預設會返回新物件,但也可以對現有物件進行就地修改
_ = df.fillna(0,inplace=True)
df
0 | 1 | 2 | |
---|---|---|---|
0 | -1.051300 | 0.000000 | 0.000000 |
1 | -0.977547 | 0.000000 | 0.000000 |
2 | 0.540648 | 0.000000 | 0.000000 |
3 | -0.320932 | 0.000000 | 0.239980 |
4 | -0.303641 | 0.000000 | 0.355744 |
5 | -0.424176 | 1.880769 | -0.013825 |
6 | 0.643725 | 0.301759 | -1.520921 |
對reindex有效的哪些插值方法也可以用於fillna
df = DataFrame(np.random.randn(6,3))
df
0 | 1 | 2 | |
---|---|---|---|
0 | 0.936874 | 0.226055 | -0.008118 |
1 | -1.885668 | 0.947839 | -0.344767 |
2 | -1.620408 | -0.895714 | 1.133733 |
3 | 1.442455 | 0.959708 | 0.107022 |
4 | -1.455846 | 0.572486 | 1.087657 |
5 | 1.189054 | -1.623793 | -0.334216 |
df.ix[2:,1] = NA; df.ix[4:,2] = NA
df
0 | 1 | 2 | |
---|---|---|---|
0 | 0.936874 | 0.226055 | -0.008118 |
1 | -1.885668 | 0.947839 | -0.344767 |
2 | -1.620408 | NaN | 1.133733 |
3 | 1.442455 | NaN | 0.107022 |
4 | -1.455846 | NaN | NaN |
5 | 1.189054 | NaN | NaN |
# ffill :將有效的觀察傳播到下一個有效的觀察
df.fillna(method='ffill')
0 | 1 | 2 | |
---|---|---|---|
0 | 0.936874 | 0.226055 | -0.008118 |
1 | -1.885668 | 0.947839 | -0.344767 |
2 | -1.620408 | 0.947839 | 1.133733 |
3 | 1.442455 | 0.947839 | 0.107022 |
4 | -1.455846 | 0.947839 | 0.107022 |
5 | 1.189054 | 0.947839 | 0.107022 |
help(df.fillna)
Help on method fillna in module pandas.core.frame:
fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None, **kwargs) method of pandas.core.frame.DataFrame instance
Fill NA/NaN values using the specified method
Parameters
———-
value : scalar, dict, Series, or DataFrame
Value to use to fill holes (e.g. 0), alternately a
dict/Series/DataFrame of values specifying which value to use for
each index (for a Series) or column (for a DataFrame). (values not
in the dict/Series/DataFrame will not be filled). This value cannot
be a list.
method : {‘backfill’, ‘bfill’, ‘pad’, ‘ffill’, None}, default None
Method to use for filling holes in reindexed Series
pad / ffill: propagate last valid observation forward to next valid
backfill / bfill: use NEXT valid observation to fill gap
axis : {0, 1, ‘index’, ‘columns’}
inplace : boolean, default False
If True, fill in place. Note: this will modify any
other views on this object, (e.g. a no-copy slice for a column in a
DataFrame).
limit : int, default None
If method is specified, this is the maximum number of consecutive
NaN values to forward/backward fill. In other words, if there is
a gap with more than this number of consecutive NaNs, it will only
be partially filled. If method is not specified, this is the
maximum number of entries along the entire axis where NaNs will be
filled.
downcast : dict, default is None
a dict of item->dtype of what to downcast if possible,
or the string ‘infer’ which will try to downcast to an appropriate
equal type (e.g. float64 to int64 if possible)
See Also
——–
reindex, asfreq
Returns
——-
filled : DataFrame
df.fillna(method='ffill',limit=2)
0 | 1 | 2 | |
---|---|---|---|
0 | 0.936874 | 0.226055 | -0.008118 |
1 | -1.885668 | 0.947839 | -0.344767 |
2 | -1.620408 | 0.947839 | 1.133733 |
3 | 1.442455 | 0.947839 | 0.107022 |
4 | -1.455846 | NaN | 0.107022 |
5 | 1.189054 | NaN | 0.107022 |
data = Series([1,NA,3.5,NA,7])
#使用平均值填充
data.fillna(data.mean())
0 1.000000
1 3.833333
2 3.500000
3 3.833333
4 7.000000
dtype: float64
- 層次化索引
data = Series(np.random.randn(10),
index=[['a','a','a','b','b','b','c','c','d','d'],[1,2,3,1,2,3,1,2,2,3]])
data
a 1 -0.520847
2 0.858349
3 -1.048257
b 1 0.281738
2 0.757592
3 0.032117
c 1 0.526343
2 -2.281655
d 2 -0.017352
3 0.047178
dtype: float64
data.index
MultiIndex(levels=[[u’a’, u’b’, u’c’, u’d’], [1, 2, 3]],
labels=[[0, 0, 0, 1, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 1, 2, 0, 1, 1, 2]])
#對一個層次索引
data['b']
1 0.281738
2 0.757592
3 0.032117
dtype: float64
data['b':'c']
b 1 0.281738
2 0.757592
3 0.032117
c 1 0.526343
2 -2.281655
dtype: float64
data.ix[['b','d']]
b 1 0.281738
2 0.757592
3 0.032117
d 2 -0.017352
3 0.047178
dtype: float64
data[:,2]
a 0.858349
b 0.757592
c -2.281655
d -0.017352
dtype: float64
data
a 1 -0.520847
2 0.858349
3 -1.048257
b 1 0.281738
2 0.757592
3 0.032117
c 1 0.526343
2 -2.281655
d 2 -0.017352
3 0.047178
dtype: float64
層次化索引在資料重塑和基於分組的操作中扮演著重要的角色。可以使用unstack方法被重新安排到一個DataFrame中
data.unstack()
1 | 2 | 3 | |
---|---|---|---|
a | -0.520847 | 0.858349 | -1.048257 |
b | 0.281738 | 0.757592 | 0.032117 |
c | 0.526343 | -2.281655 | NaN |
d | NaN | -0.017352 | 0.047178 |
#unstack的逆運算是stack
data.unstack().stack()
a 1 -0.520847
2 0.858349
3 -1.048257
b 1 0.281738
2 0.757592
3 0.032117
c 1 0.526343
2 -2.281655
d 2 -0.017352
3 0.047178
dtype: float64
frame = DataFrame(np.arange(12).reshape(4,3),index=[['a','a','b','b'],[1,2,1,2]],
columns=[['Ohio','Ohio','Colorado'],['Green','Red','Green']])
frame
Ohio | Colorado | |||
---|---|---|---|---|
Green | Red | Green | ||
a | 1 | 0 | 1 | 2 |
2 | 3 | 4 | 5 | |
b | 1 | 6 | 7 | 8 |
2 | 9 | 10 | 11 |
frame.index.names = ['key1','key2']
frame.columns.names = ['state','color']
frame
state | Ohio | Colorado | ||
---|---|---|---|---|
color | Green | Red | Green | |
key1 | key2 | |||
a | 1 | 0 | 1 | 2 |
2 | 3 | 4 | 5 | |
b | 1 | 6 | 7 | 8 |
2 | 9 | 10 | 11 |
frame['Ohio']
color | Green | Red | |
---|---|---|---|
key1 | key2 | ||
a | 1 | 0 | 1 |
2 | 3 | 4 | |
b | 1 | 6 | 7 |
2 | 9 | 10 |
swaplevel接受兩個級別編號或名稱並返回一個互換了級別的新物件(但資料不會發生變化)
frame.swaplevel('key1','key2')
state | Ohio | Colorado | ||
---|---|---|---|---|
color | Green | Red | Green | |
key2 | key1 | |||
1 | a | 0 | 1 | 2 |
2 | a | 3 | 4 | 5 |
1 | b | 6 | 7 | 8 |
2 | b | 9 | 10 | 11 |
frame
state | Ohio | Colorado | ||
---|---|---|---|---|
color | Green | Red | Green | |
key1 | key2 | |||
a | 1 | 0 | 1 | 2 |
2 | 3 | 4 | 5 | |
b | 1 | 6 | 7 | 8 |
2 | 9 | 10 | 11 |
stortlevel則根據單個級別中的值對資料進行排序(穩定的)
#兩級分層,取0,1,分別表示第一層,第二層
frame.sortlevel(1)
state | Ohio | Colorado | ||
---|---|---|---|---|
color | Green | Red | Green | |
key1 | key2 | |||
a | 1 | 0 | 1 | 2 |
b | 1 | 6 | 7 | 8 |
a | 2 | 3 | 4 | 5 |
b | 2 | 9 | 10 | 11 |
frame
state | Ohio | Colorado | ||
---|---|---|---|---|
color | Green | Red | Green | |
key1 | key2 | |||
a | 1 | 0 | 1 | 2 |
2 | 3 | 4 | 5 | |
b | 1 | 6 | 7 | 8 |
2 | 9 | 10 | 11 |
frame.swaplevel(0,1)
state | Ohio | Colorado | ||
---|---|---|---|---|
color | Green | Red | Green | |
key2 | key1 | |||
1 | a | 0 | 1 | 2 |
2 | a | 3 | 4 | 5 |
1 | b | 6 | 7 | 8 |
2 | b | 9 | 10 | 11 |
frame.swaplevel(0,1).sortlevel(0)
state | Ohio | Colorado | ||
---|---|---|---|---|
color | Green | Red | Green | |
key2 | key1 | |||
1 | a | 0 | 1 | 2 |
b | 6 | 7 | 8 | |
2 | a | 3 | 4 | 5 |
b | 9 | 10 | 11 |
frame
state | Ohio | Colorado | ||
---|---|---|---|---|
color | Green | Red | Green | |
key1 | key2 | |||
a | 1 | 0 | 1 | 2 |
2 | 3 | 4 | 5 | |
b | 1 | 6 | 7 | 8 |
2 | 9 | 10 | 11 |
frame.sum(level='key2')
state | Ohio | Colorado | |
---|---|---|---|
color | Green | Red | Green |
key2 | |||
1 | 6 | 8 | 10 |
2 | 12 | 14 | 16 |
frame.sum(level='color',axis=1)
color | Green | Red | |
---|---|---|---|
key1 | key2 | ||
a | 1 | 2 | 1 |
2 | 8 | 4 | |
b | 1 | 14 | 7 |
2 | 20 | 10 |
frame = DataFrame({
'a':range(7),
'b':range(7,0,-1),
'c':['one','one','one','two','two','two','two'],
'd':[0,1,2,0,1,2,3]
})
frame
a | b | c | d | |
---|---|---|---|---|
0 | 0 | 7 | one | 0 |
1 | 1 | 6 | one | 1 |
2 | 2 | 5 | one | 2 |
3 | 3 | 4 | two | 0 |
4 | 4 | 3 | two | 1 |
5 | 5 | 2 | two | 2 |
6 | 6 | 1 | two | 3 |
DataFrame的set_index函式會將其中一個或多個列轉換為行索引,並建立一個新的DataFrame
frame2 = frame.set_index(['c','d'])
frame2
a | b | ||
---|---|---|---|
c | d | ||
one | 0 | 0 | 7 |
1 | 1 | 6 | |
2 | 2 | 5 | |
two | 0 | 3 | 4 |
1 | 4 | 3 | |
2 | 5 | 2 | |
3 | 6 | 1 |
預設情況下,哪些列會從DataFrame中移除,但也可以將其保留下來
frame.set_index(['c','d'],drop=False)
a | b | c | d | ||
---|---|---|---|---|---|
c | d | ||||
one | 0 | 0 | 7 | one | 0 |
1 | 1 | 6 | one | 1 | |
2 | 2 | 5 | one | 2 | |
two | 0 | 3 | 4 | two | 0 |
1 | 4 | 3 | two | 1 | |
2 | 5 | 2 | two | 2 | |
3 | 6 | 1 | two | 3 |
reset_index的功能跟set_index剛好相反,層次化索引的級別會被轉移到列裡
frame2.reset_index()
c | d | a | b | |
---|---|---|---|---|
0 | one | 0 | 0 | 7 |
1 | one | 1 | 1 | 6 |
2 | one | 2 | 2 | 5 |
3 | two | 0 | 3 | 4 |
4 | two | 1 | 4 | 3 |
5 | two | 2 | 5 | 2 |
6 | two | 3 | 6 | 1 |
整數索引
ser = Series(np.arange(3.))
#會以為是倒數第一的索引,其實報錯,整數的索引值為0,1,2
ser[-1]
—————————————————————————
KeyError Traceback (most recent call last)
in ()
1 #會以為是倒數第一的索引,其實報錯,整數的索引值為0,1,2
—-> 2 ser[-1]
C:\Anaconda\Anaconda2\lib\site-packages\pandas\core\series.pyc in __getitem__(self, key)
558 def __getitem__(self, key):
559 try:
–> 560 result = self.index.get_value(self, key)
561
562 if not lib.isscalar(result):
C:\Anaconda\Anaconda2\lib\site-packages\pandas\indexes\base.pyc in get_value(self, series, key)
1909 try:
1910 return self._engine.get_value(s, k,
-> 1911 tz=getattr(series.dtype, ‘tz’, None))
1912 except KeyError as e1:
1913 if len(self) > 0 and self.inferred_type in [‘integer’, ‘boolean’]:
pandas\index.pyx in pandas.index.IndexEngine.get_value (pandas\index.c:3234)()
pandas\index.pyx in pandas.index.IndexEngine.get_value (pandas\index.c:2931)()
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3891)()
pandas\hashtable.pyx in pandas.hashtable.Int64HashTable.get_item (pandas\hashtable.c:6527)()
pandas\hashtable.pyx in pandas.hashtable.Int64HashTable.get_item (pandas\hashtable.c:6465)()
KeyError: -1L
相反,對於一個非整數索引,就沒有這樣的歧義
ser2 = Series(np.arange(3.),index=['a','b','c'])
ser2[-1]
2.0
ser.ix[:1]
0 0.0
1 1.0
dtype: float64
如果需要可靠的,不考慮索引型別的,基於位置的索引,可以使用Series的iget_value方法和Dataframe的irow和icol方法
ser3 = Series(range(3),index=[-5,1,3])
ser3
-5 0
1 1
3 2
dtype: int64
ser3.iget_value(2)
C:\Anaconda\Anaconda2\lib\site-packages\ipykernel\__main__.py:1: FutureWarning: iget_value(i) is deprecated. Please use .iloc[i] or .iat[i]
if __name__ == ‘__main__’:
2
frame = DataFrame(np.arange(6).reshape(3,2),index=[2,0,1])
frame
0 | 1 | |
---|---|---|
2 | 0 | 1 |
0 | 2 | 3 |
1 | 4 | 5 |
frame.irow(1)
C:\Anaconda\Anaconda2\lib\site-packages\ipykernel\__main__.py:1: FutureWarning: irow(i) is deprecated. Please use .iloc[i]
if __name__ == '__main__':
0 2
1 3
Name: 0, dtype: int32