1. 程式人生 > 其它 >基於COIL20資料集並計算評價指標NMI、ACC

基於COIL20資料集並計算評價指標NMI、ACC

  • 1.先對圖片集進行處理
clear
clc
path = 'F:\Matlab\bin\kmeans_coil20\coil-20-proc\';     %源資料集路徑
save_path = 'F:\Matlab\bin\kmeans_coil20\';     %處理之後儲存的路徑
file = dir([path,'*.png']);     %列出源路徑下所有.png檔案的資訊:name date bytes...

COIL = [];
Label = [];
disp(length(file));
for i = 1:length(file)      %源路徑下png檔案的個數
    %%
    %%對圖片本身資料做處理
    image0 = imread([path,file(i).name]);       %讀取某一張圖片
    str = file(i).name;     %圖片名儲存為str
    image1 = im2double(image0);     %將圖片中unit8轉換為double
    [a,b] = size(image1);       %a為圖片行數 b為圖片列數
    image2 = reshape(image1,1,a*b);     %將a*b的圖片轉換為1行
    COIL = [COIL;image2];       %每一張圖片都載入在上一張圖片的下一行
    fprintf('%d th starting...\n',i);       %記錄程式執行過程
    %%
    %%對圖片標籤做處理 圖片名為‘obj(類標籤)__(拍攝角度).png’
    %主要提取類標籤
    LabelStart = strfind(str,'j');      %找到圖片名‘j’的位置(類標籤前)
    LabelEnd = strfind(str,'_');        %找到圖片名‘__’的位置(類標籤後)
    label_temp = str2double(str(LabelStart+1:LabelEnd(1,1)-1));     %提取出圖片類標籤
    Label = [Label;label_temp];     %每個類標籤按順序排排坐
end

save COIL COIL
save Label Label
%% k=20 隨機抽取二十個樣本作為初始均值向量
n=1440;
m=16384;
k=20;
% flag儲存隨機選取的數字
flag=randperm(n,k);
% 將抽取的mu打上類標籤
for i=1:k
    COIL(flag(i),m+1)=i;
end
mu=zeros(k,m);
for i=1:k
    for j=1:m
        mu(i,j)=COIL(flag(i),j);
    end
end
%% 計算歐式距離,選擇最近的原型進行分簇
dis=zeros(n,k);
for i=1:n
    min=10000000;
    category = 1;
    for j=1:k
        tmp=0;
        for o=1:m
            tmp=tmp+(COIL(i,o)-mu(j,o)).^2;
        end
        dis(i,j)=sqrt(tmp);
        if dis(i,j)<min
            min= dis(i,j);
            category = j;
        end
    end
    COIL(i,m+1)=category;
end
%% 迴圈
for iter=2:10
    % 重新計算mu
    % 每個簇中資料和
    sumn=zeros(k,m);
    % 每個簇中的資料個數
    num=zeros(1,k);
    for j=1:n
        for o=1:k
           if COIL(j,m+1)==o
                 for p=1:m
                    sumn(o,p)=sumn(o,p)+COIL(j,p);
                 end
                 num(1,o)=num(1,o)+1;
           end
        end
    end
    isUpdate = false;
    for i=1:k
        for j=1:m
            if mu(i,j)~=(sum(i,j)/num(1,i))
            isUpdate = true;
            end
        end
    end
    if isUpdate==false
        % 不更新
        disp(iter-1);
        break;
    else
        % 更新 mu
        for i=1:k
            for j=1:m
                mu(i,j) = sumn(i,j)/num(1,i);
            end
        end
    end
    for i=1:n
        min=10000;
        category = 1;
        for j=1:k
            tmp=0;
            for o=1:m
                tmp=tmp+(COIL(i,o)-mu(j,o)).^2;
            end
            dis(i,j)=sqrt(tmp);
            if dis(i,j)<min
                min= dis(i,j);
                category = j;
            end
        end
        COIL(i,m+1)=category;
    end
end
%% 計算NMI
% Y為聚類後的標籤 Label真實標籤
Y=COIL(:,m+1);
if  length ( Label ) ~=  length ( Y)
     error ( 'length( Label ) must == length( Y)' );
end
if  iscolumn(Label)
    Label=Label';
end
if  iscolumn(Y)
     Y=Y';
end
t =  length (Label);
A_ids =  unique (Label);
A_class =  length (A_ids);
B_ids =  unique (Y);
B_class =  length (B_ids);
% Mutual information
idAOccur =  double  ( repmat ( Label, A_class, 1) ==  repmat ( A_ids', 1, t ));  %得到節點社群矩陣N*C
idBOccur =  double  ( repmat ( Y, B_class, 1) ==  repmat ( B_ids', 1, t ));
idABOccur = idAOccur * idBOccur';
Px =  sum (idAOccur') / t;
Py =  sum (idBOccur') / t;
Pxy = idABOccur / t;
MImatrix = Pxy .*  log2 (Pxy ./(Px' * Py)+ eps );
MI =  sum (MImatrix(:));
% Entropies
Hx = - sum (Px .*  log2 (Px +  eps ),2);
Hy = - sum (Py .*  log2 (Py +  eps ),2);
%Normalized Mutual information
nmi = 2 * MI / (Hx+Hy);
fprintf('聚類的標準互資訊Nmi為:%f\n',nmi); %顯示分類結果
%% 計算ACC
[NewLabel]=BestMapping(Y,Label);
T= Label==NewLabel;
acc=sum(T)/length(NewLabel);
fprintf('聚類的準確度acc為:%f\n',acc); %顯示分類結果
  • 4.結果
    迭代2次

迭代10次

ljm要加油