def函式變數變化
阿新 • • 發佈:2018-11-01
介紹下本人。中山大學,醫學生+計科學生的集合體,機器學習愛好者。
一、我們容易搞混但是理解的def和return變數變化
#!/usr/bin/env python # -*- coding:utf8 -*- # @TIME :2018/10/3 16:39 # @Author:Yolanda # @File :ceshi1.py #反擊啊!少女 import pandas as pd import numpy as np from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer from tqdm import tqdm path = '/home/lab/cyy/ceshi35/kdxf/data' train = pd.read_table(path + '/train.txt') test = pd.read_table(path + '/test.txt') data = pd.concat([train, test], axis=0, ignore_index=True) # print(data)
#full變,data不變 #統計特徵 count_feature_list = [] def feature_count(full): data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'}) print(data_cate) nunique2features=data_cate[data_cate['data_nunique']>3]['index'] print(nunique2features) for i in nunique2features: temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)}) # print(temp) full = full.merge(temp, 'left') new_feature = 'cnt_' + i # print(new_feature) count_feature_list.append(new_feature) print(count_feature_list) print(full) print(full.keys()) return full full=feature_count(data) print(full) print(data) print(data.keys()) exit(0)
#full為空,data不變 #統計特徵 count_feature_list = [] def feature_count(full): data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'}) print(data_cate) nunique2features=data_cate[data_cate['data_nunique']>3]['index'] print(nunique2features) for i in nunique2features: temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)}) # print(temp) full = full.merge(temp, 'left') new_feature = 'cnt_' + i # print(new_feature) count_feature_list.append(new_feature) print(count_feature_list) print(full) print(full.keys()) full=feature_count(data) print(full)#none print(data) print(data.keys()) exit(0)
#full為空,data不變
#統計特徵
count_feature_list = []
def feature_count(full):
data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
full = full.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(full)
print(full.keys())
data=feature_count(data)#這樣寫相當於沒有return出一個值,也就是用空值替換了原data,原data變為空
print(data)
print(data)
print(data.keys())#nonetype沒有keys
exit(0)
#full為空,data不變
#統計特徵
count_feature_list = []
def feature_count(full):
data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
full = full.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(full)
print(full.keys())
full=feature_count(data)
print(full)#none
print(data)
print(data.keys())
exit(0)
#data不變
#統計特徵
count_feature_list = []
def feature_count(full):
data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
full = full.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(full)
print(full.keys())
return data
data=feature_count(data)#這樣寫就很混亂了,return的應該是def內定義的變數名
print(data)
print(data.keys())
exit(0)
#data不變
#統計特徵
count_feature_list = []
def feature_count(full):
data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
full = full.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(full)
print(full.keys())
return full
feature_count(data)
print(data)
print(data.keys())
exit(0)
#data不變
#統計特徵
count_feature_list = []
def feature_count(full):
data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
full = full.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(full)
print(full.keys())
feature_count(data)
print(data)
print(data.keys())
exit(0)
#data變(因為下面data=feature_count(data)賦值了data)
#統計特徵
count_feature_list = []
def feature_count(data):
data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
data = data.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(data)
print(data.keys())
return data
data=feature_count(data)
print(data)
print(data.keys())
exit(0)
#full空。data不變
#統計特徵
count_feature_list = []
def feature_count(data):
data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
data = data.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(data)
print(data.keys())
full=feature_count(data)
print(full)
print(data)
print(data.keys())
exit(0)
#data變——錯了,data不變!
#統計特徵
count_feature_list = []
def feature_count(data):
data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
data = data.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(data)
print(data.keys())
return data
feature_count(data)
print(data)
print(data.keys())
exit(0)
#data不變
#統計特徵
count_feature_list = []
def feature_count(data):
data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
data = data.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(data)
print(data.keys())
feature_count(data)
print(data)
print(data.keys())
exit(0)
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#full變,data不變
#統計特徵
count_feature_list = []
def feature_count(full):
data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
full = full.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(full)
print(full.keys())
return full
full=feature_count(data.copy())
print(full)
print(data)
print(data.keys())
exit(0)
#full空,data不變
#統計特徵
count_feature_list = []
def feature_count(full):
data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
full = full.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(full)
print(full.keys())
full=feature_count(data.copy())
print(full)
print(data)
print(data.keys())
exit(0)
#data不變
#統計特徵
count_feature_list = []
def feature_count(full):
data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
full = full.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(full)
print(full.keys())
return full
feature_count(data.copy())
print(data)
print(data.keys())
exit(0)
#data不變
#統計特徵
count_feature_list = []
def feature_count(full):
data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
full = full.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(full)
print(full.keys())
feature_count(data.copy())
print(data)
print(data.keys())
exit(0)
#data變
#統計特徵
count_feature_list = []
def feature_count(data):
data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
data = data.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(data)
print(data.keys())
return data
data=feature_count(data.copy())
print(data)
print(data.keys())
exit(0)
#full為空,data不變
#統計特徵
count_feature_list = []
def feature_count(data):
data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
data = data.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(data)
print(data.keys())
full=feature_count(data.copy())
print(full)
print(data)
print(data.keys())
exit(0)
#data不變
#統計特徵
count_feature_list = []
def feature_count(data):
data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
data = data.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(data)
print(data.keys())
return data
feature_count(data.copy())
print(data)
print(data.keys())
exit(0)
#data不變
#統計特徵
count_feature_list = []
def feature_count(data):
data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
print(data_cate)
nunique2features=data_cate[data_cate['data_nunique']>3]['index']
print(nunique2features)
for i in nunique2features:
temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
# print(temp)
data = data.merge(temp, 'left')
new_feature = 'cnt_' + i
# print(new_feature)
count_feature_list.append(new_feature)
print(count_feature_list)
print(data)
print(data.keys())
feature_count(data.copy())
print(data)
print(data.keys())
exit(0)
二、讓我們看一個有趣的事情。我們容易搞混但是難以理解的def和return變數變化
#!/usr/bin/env python
# -*- coding:utf8 -*-
# @TIME :2018/10/3 8:20
# @Author:Yolanda
# @File :ceshi.py
#反擊啊!少女
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from tqdm import tqdm
path = '/home/lab/cyy/ceshi35/kdxf/data'
train = pd.read_table(path + '/train.txt')
test = pd.read_table(path + '/test.txt')
data = pd.concat([train, test], axis=0, ignore_index=True)
# print(data)
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#變
def temp(full):
full['user_tags']=full['user_tags'].fillna(str(-1))#
print(full)
return full
full=temp(data)
print(full)
print(data)
exit(0)
#變
def temp(full):
full['user_tags']=full['user_tags'].fillna(str(-1))#
print(full)
full=temp(data)
print(full)
print(data)
exit(0)
#變
def temp(full):
full['user_tags']=full['user_tags'].fillna(str(-1))#
print(full)
return full
temp(data)
print(data)
exit(0)
#變
def temp(full):
full['user_tags']=full['user_tags'].fillna(str(-1))#
print(full)
temp(data)
print(data)
exit(0)
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#變
def temp(data):
data['user_tags']=data['user_tags'].fillna(str(-1))#
print(data)
return data
data=temp(data)
print(data)
exit(0)
#第一個變了,第二個是none
def temp(data):
data['user_tags']=data['user_tags'].fillna(str(-1))#
print(data)
data=temp(data)
print(data)
exit(0)
#變
def temp(data):
data['user_tags']=data['user_tags'].fillna(str(-1))#
print(data)
return data
temp(data)
print(data)
exit(0)
#變
def temp(data):
data['user_tags']=data['user_tags'].fillna(str(-1))#
print(data)
temp(data)
print(data)
exit(0)
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#不變
def temp(full):
full['user_tags']=full['user_tags'].fillna(str(-1))#
print(full)
return full
full=temp(data.copy())
print(full)
print(data)
exit(0)
#不變
def temp(full):
full['user_tags']=full['user_tags'].fillna(str(-1))#
print(full)
full=temp(data.copy())
print(full)
print(data)
exit(0)
#不變
def temp(full):
full['user_tags']=full['user_tags'].fillna(str(-1))#
print(full)
return full
temp(data.copy())
print(data)
exit(0)
#不變
def temp(full):
full['user_tags']=full['user_tags'].fillna(str(-1))#
print(full)
temp(data.copy())
print(data)
exit(0)
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#變
def temp(data):
data['user_tags']=data['user_tags'].fillna(str(-1))#
print(data)
return data
data=temp(data.copy())
print(data)
exit(0)
#第一個變,第二個none
def temp(data):
data['user_tags']=data['user_tags'].fillna(str(-1))#
print(data)
data=temp(data.copy())
print(data)
exit(0)
#不變
def temp(data):
data['user_tags']=data['user_tags'].fillna(str(-1))#
print(data)
return data
temp(data.copy())
print(data)
exit(0)
#不變
def temp(data):
data['user_tags']=data['user_tags'].fillna(str(-1))#
print(data)
temp(data.copy())
print(data)
exit(0)
三、讓我們接著看一個更有趣的事情。我們容易搞混但是更加無法理解的def和return變數變化
#!/usr/bin/env python
# -*- coding:utf8 -*-
# @TIME :2018/10/3 8:20
# @Author:Yolanda
# @File :ceshi.py
#反擊啊!少女
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from tqdm import tqdm
path = '/home/lab/cyy/ceshi35/kdxf/data'
train = pd.read_table(path + '/train.txt')
test = pd.read_table(path + '/test.txt')
data = pd.concat([train, test], axis=0, ignore_index=True)
# print(data)
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#full變,data不變(類似count)
def temp(full):
full=full.fillna(str(-1))#
print(full)
return full
full=temp(data)
print(full)
print(data)
exit(0)
#full變,data變(這種情況想要data不變,只能傳入data.copy()),可能涉及到pandas底層
def temp(full):
full['user_tags']=full['user_tags'].fillna(str(-1))#
print(full)
return full
full=temp(data)
print(full)
print(data)
exit(0)
#full變(變成user_tags一列),data不變(類似count)
def temp(full):
full=full['user_tags'].fillna(str(-1))#
print(full)
return full
full=temp(data)
print(full)
print(data)
exit(0)
我猜可能涉及到pandas底層,有懂的大佬來指點小白一下~~總結到此,理解基礎上多加練習。