1. 程式人生 > >pandas之DataFrame資料框

pandas之DataFrame資料框

DataFrame資料框

1.建立資料框

 

df = DataFrame({
        'age':[21,22,23],
        'name':['zhangYafei','LiuGeliang','KangYue']
            },index=['first','second','third'])

columns={}
columns['id'] = 1
columns['title'] = 'python'
columns['url'] = 'http://www.baidu.com'


data1 = DataFrame.from_dict(columns,orient='index').T

result = {'city':'city','weather':'weather','date':'date','max':'max','min':'min'}

data3 = pd.DataFrame.from_dict(result,orient='index').T

 

 


2.資料框的增刪改查
#增加行 注意:這種方法,效率非常低,不應該用於遍歷中
df.loc[len(df)]=[24,'scrapy']
#增加列
df['sex'] = [1,1,1,0]

#根據行索引剔除
df = df.drop(4,axis=0)
#根據列名剔除 df.drop('age2',axis=1) #第二種剔除列的方法 del df['age2']
df.drop(3,axis=0)
Out[148]: 
        age        name  sex
first    21  zhangYafei    1
second   22  LiuGeliang    1
third    23     KangYue    1

df.drop('age',axis=1)
Out[149]: 
              name  sex
first   zhangYafei    1
second  LiuGeliang    
1 third KangYue 1 3 scrapy 0 df['sex2'] = [12,23,53,12] df Out[151]: age name sex sex2 first 21 zhangYafei 1 12 second 22 LiuGeliang 1 23 third 23 KangYue 1 53 3 24 scrapy 0 12 del df['sex2'] df Out[153]: age name sex first 21 zhangYafei 1 second 22 LiuGeliang 1 third 23 KangYue 1 3 24 scrapy 0

#修改列名
df.columns
df.columns = ['age2','name2']

#修改行索引
df.index
df.index = range(1,4)
df.index

#按列名訪問
df['age']
df[['age','name']]
#按行訪問
df[1:2]
ix可以用數字索引,也可以用index和column索引
df.ix[0]
d = df.ix[1:2,['age','name']]#取第0、1行列
#按行列號訪問
df.iloc[0:1,0:1]
df.iloc[1]
#loc只能通過index和columns來取,不能用數字
df.loc['first'])
df.loc['first',['age','name']]
#按行索引,列名訪問
df.at[0,'name']
df.at['first','name']
根據條件邏輯值取值
df = pd.DataFrame({'BoolCol': [1, 2, 3, 3, 4],'attr': [22, 33, 22, 44, 66]},
index=[10,20,30,40,50])
print(df)
value= df[(df.BoolCol==3)&(df.attr==22)].values.tolist()[0]
type(value)
print(" ".join(str(id) for id in value))
index = df[(df.BoolCol==3)&(df.attr==22)].index.tolist()
print(index)

3.資料框的遍歷

#遍歷列名 
for r in df:
  print(r)
#遍歷列
for cName in df:
  print('df的列:\n',cName)
  print('df的值:\n',df[cName])
  print("-"*10)

遍歷行

第一種:apply方式 推薦

def new_data(row):
  """增加別名列"""
  drug_name = row['藥品名稱']
  try:  
  row['別名'] = drug_name.rsplit('(',1)[1].strip(')')
  row['藥品名稱'] = drug_name.rsplit('(',1)[0]
  except IndexError as e:
  row['別名'] = np.NAN
  return row

 new_drug = data.apply(new_data,axis=1)

 

第二種:dataframe.iterrows

for row in data[10:13].iterrows():
  drug_name = row['藥品名稱'].values
  drug_alias = drug_name.rsplit('(',1)[1].strip(')')
  print(drug_name)
  print(drug_alias)

第三種:index方式

resoved_drug_list = []
for row in data.index:
  drug_name = '{}[{}]'.format(data.iloc[row]['藥品名稱'],data.iloc[row]['藥品ID'])
  resoved_drug_list.append(drug_name)

第四種:values方式

for r in df.values:
  print(r)
  print(r[0])
  print(r[1])
  print('-'*10)

第五種:while遍歷DataFrame
df = DataFrame({
'age':Series([21,22,23]),
'name':Series(['zhang','liu','kang'])
})

rowCount = len(df)
i = 0
while i<rowCount:  
  print(df.iloc[i])
  i+=1

補充:

#遍歷字串
for letter in 'python':
print('現在是:',letter)

#遍歷陣列
fruits = ['banana','apple','mango']
for fruit in fruits:
print('現在是:',fruit)
#遍歷序列
x = Series(['a',True,1],index=['first','second','third'])
x[0]
x['second']
x[2]

for v in x:
print('x中的值:',v)
for index in x.index:
print('X中的索引:',index)
print('x中的值:',x[index])
print('*'*10)

4.讀取檔案和資料框寫入檔案

讀取檔案
data = pandas.read_csv('drug_name.csv',encoding='utf-8')
寫入檔案 
new_dataframe.to_csv('unique_chinese_name.csv',mode='w',encoding='utf_8_sig',index=False)