特徵選擇之卡方統計 Chi-Square
阿新 • • 發佈:2019-01-09
%%% dataset中的最後一列為分類類別,k為要選擇的特徵個數
function result=chiAttributeEva(dataset,k)
% character_ordercharacter_order=[];
% the count of classes
classes=unique(dataset(:,size(dataset,2)));
n=size(dataset,2)-1;
for i=1:n
character=dataset(:,i);
character_value=unique(character,'rows');
chi_i=0;
for j=1:length(character_value)
value_rows_id=find(character==character_value(j));
value_rows=dataset(value_rows_id,:);
for m=1;length(classes)
class_id=find(value_rows(:,size(value_rows,2))==classes(m));
o_jk=length(class_id);
o_j=length(value_rows_id);
o_k=length(find(dataset(:,size(dataset,2)))==classes(m));
e_jk=o_j*o_k/n;
chi=(o_jk-e_jk)^2/e_jk;
chi_i=chi_i+chi;
end
end
character_order(i,:)=[i,chi_i];
end
character_order=sortrows(character_order,2);
result=character_order(1:k,:);
end