1. 程式人生 > >def函式變數變化

def函式變數變化

介紹下本人。中山大學,醫學生+計科學生的集合體,機器學習愛好者。
一、我們容易搞混但是理解的def和return變數變化

#!/usr/bin/env python
# -*- coding:utf8 -*-
# @TIME  :2018/10/3 16:39
# @Author:Yolanda
# @File  :ceshi1.py

#反擊啊!少女
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from tqdm import tqdm

path = '/home/lab/cyy/ceshi35/kdxf/data'
train = pd.read_table(path + '/train.txt')
test = pd.read_table(path + '/test.txt')
data = pd.concat([train, test], axis=0, ignore_index=True)
# print(data)
#full變,data不變
#統計特徵
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
    return full
full=feature_count(data)
print(full)
print(data)
print(data.keys())
exit(0)
#full為空,data不變
#統計特徵
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
full=feature_count(data)
print(full)#none
print(data)
print(data.keys())
exit(0)
#full為空,data不變
#統計特徵
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
data=feature_count(data)#這樣寫相當於沒有return出一個值,也就是用空值替換了原data,原data變為空
print(data)
print(data)
print(data.keys())#nonetype沒有keys
exit(0)
#full為空,data不變
#統計特徵
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
full=feature_count(data)
print(full)#none
print(data)
print(data.keys())
exit(0)
#data不變
#統計特徵
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
    return data
data=feature_count(data)#這樣寫就很混亂了,return的應該是def內定義的變數名
print(data)
print(data.keys())
exit(0)
#data不變
#統計特徵
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
    return full
feature_count(data)
print(data)
print(data.keys())
exit(0)
#data不變
#統計特徵
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
feature_count(data)
print(data)
print(data.keys())
exit(0)
#data變(因為下面data=feature_count(data)賦值了data)
#統計特徵
count_feature_list = []
def feature_count(data):
    data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        data = data.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(data)
    print(data.keys())
    return data
data=feature_count(data)
print(data)
print(data.keys())
exit(0)
#full空。data不變
#統計特徵
count_feature_list = []
def feature_count(data):
    data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        data = data.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(data)
    print(data.keys())
full=feature_count(data)
print(full)
print(data)
print(data.keys())
exit(0)
#data變——錯了,data不變!
#統計特徵
count_feature_list = []
def feature_count(data):
    data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        data = data.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(data)
    print(data.keys())
    return data
feature_count(data)
print(data)
print(data.keys())
exit(0)
#data不變
#統計特徵
count_feature_list = []
def feature_count(data):
    data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        data = data.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(data)
    print(data.keys())
feature_count(data)
print(data)
print(data.keys())
exit(0)
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#full變,data不變
#統計特徵
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
    return full
full=feature_count(data.copy())
print(full)
print(data)
print(data.keys())
exit(0)
#full空,data不變
#統計特徵
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
full=feature_count(data.copy())
print(full)
print(data)
print(data.keys())
exit(0)
#data不變
#統計特徵
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
    return full
feature_count(data.copy())
print(data)
print(data.keys())
exit(0)
#data不變
#統計特徵
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
feature_count(data.copy())
print(data)
print(data.keys())
exit(0)
#data變
#統計特徵
count_feature_list = []
def feature_count(data):
    data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        data = data.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(data)
    print(data.keys())
    return data
data=feature_count(data.copy())
print(data)
print(data.keys())
exit(0)

#full為空,data不變
#統計特徵
count_feature_list = []
def feature_count(data):
    data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        data = data.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(data)
    print(data.keys())
full=feature_count(data.copy())
print(full)
print(data)
print(data.keys())
exit(0)
#data不變
#統計特徵
count_feature_list = []
def feature_count(data):
    data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        data = data.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(data)
    print(data.keys())
    return data
feature_count(data.copy())
print(data)
print(data.keys())
exit(0)
#data不變
#統計特徵
count_feature_list = []
def feature_count(data):
    data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        data = data.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(data)
    print(data.keys())
feature_count(data.copy())
print(data)
print(data.keys())
exit(0)

二、讓我們看一個有趣的事情。我們容易搞混但是難以理解的def和return變數變化

#!/usr/bin/env python
# -*- coding:utf8 -*-
# @TIME  :2018/10/3 8:20
# @Author:Yolanda
# @File  :ceshi.py

#反擊啊!少女
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from tqdm import tqdm

path = '/home/lab/cyy/ceshi35/kdxf/data'
train = pd.read_table(path + '/train.txt')
test = pd.read_table(path + '/test.txt')
data = pd.concat([train, test], axis=0, ignore_index=True)
# print(data)
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#變
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
    return full
full=temp(data)
print(full)
print(data)
exit(0)
#變
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
full=temp(data)
print(full)
print(data)
exit(0)
#變
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
    return full
temp(data)
print(data)
exit(0)
#變
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
temp(data)
print(data)
exit(0)
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#變
def temp(data):
    data['user_tags']=data['user_tags'].fillna(str(-1))#
    print(data)
    return data
data=temp(data)
print(data)
exit(0)
#第一個變了,第二個是none
def temp(data):
    data['user_tags']=data['user_tags'].fillna(str(-1))#
    print(data)
data=temp(data)
print(data)
exit(0)
#變
def temp(data):
    data['user_tags']=data['user_tags'].fillna(str(-1))#
    print(data)
    return data
temp(data)
print(data)
exit(0)
#變
def temp(data):
    data['user_tags']=data['user_tags'].fillna(str(-1))#
    print(data)
temp(data)
print(data)
exit(0)
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#不變
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
    return full
full=temp(data.copy())
print(full)
print(data)
exit(0)
#不變
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
full=temp(data.copy())
print(full)
print(data)
exit(0)
#不變
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
    return full
temp(data.copy())
print(data)
exit(0)
#不變
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
temp(data.copy())
print(data)
exit(0)
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#變
def temp(data):
    data['user_tags']=data['user_tags'].fillna(str(-1))#
    print(data)
    return data
data=temp(data.copy())
print(data)
exit(0)
#第一個變,第二個none
def temp(data):
    data['user_tags']=data['user_tags'].fillna(str(-1))#
    print(data)
data=temp(data.copy())
print(data)
exit(0)
#不變
def temp(data):
    data['user_tags']=data['user_tags'].fillna(str(-1))#
    print(data)
    return data
temp(data.copy())
print(data)
exit(0)
#不變
def temp(data):
    data['user_tags']=data['user_tags'].fillna(str(-1))#
    print(data)
temp(data.copy())
print(data)
exit(0)

三、讓我們接著看一個更有趣的事情。我們容易搞混但是更加無法理解的def和return變數變化

#!/usr/bin/env python
# -*- coding:utf8 -*-
# @TIME  :2018/10/3 8:20
# @Author:Yolanda
# @File  :ceshi.py

#反擊啊!少女
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from tqdm import tqdm

path = '/home/lab/cyy/ceshi35/kdxf/data'
train = pd.read_table(path + '/train.txt')
test = pd.read_table(path + '/test.txt')
data = pd.concat([train, test], axis=0, ignore_index=True)
# print(data)
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#full變,data不變(類似count)
def temp(full):
    full=full.fillna(str(-1))#
    print(full)
    return full
full=temp(data)
print(full)
print(data)
exit(0)
#full變,data變(這種情況想要data不變,只能傳入data.copy()),可能涉及到pandas底層
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
    return full
full=temp(data)
print(full)
print(data)
exit(0)
#full變(變成user_tags一列),data不變(類似count)
def temp(full):
    full=full['user_tags'].fillna(str(-1))#
    print(full)
    return full
full=temp(data)
print(full)
print(data)
exit(0)

我猜可能涉及到pandas底層,有懂的大佬來指點小白一下~~總結到此,理解基礎上多加練習。