matlab nan變成0_基於MATLAB詳解腦電Decoding的Clusterbased統計分析
技術標籤:matlab nan變成0matlab中nanmatlab如何strcatstrcat matlab蒙特卡洛模擬matlab
前面曾經寫過一個基於MATLAB進行腦電Decoding的教程:
基於MATLAB原始碼逐行超詳解ERP Decoding
進行了對訊號的解碼之後,如何進行統計分析呢,論文中的cluster-based test是怎麼做的?蒙特卡洛模擬又是怎麼做的?今天還是用開原始碼來詳解腦電Decoding中的cluster-based test和隨機模擬的實現。
希望自己完成的這個艱鉅的工作能對大家起到幫助!
原始碼來自Bae&Luck於2018年發表在Journal of Neuroscience上的一篇文章《Dissociable Decoding of Spatial Attention and Working Memory from EEG Oscillations and Sustained Potentials
% Gi-Yeul Bae 2017-10-3% 該原始碼來自於Bae & Luck 2018 Journal of Neuroscience的% 用於實現計算平均準確率% +進行cluster-based蒙特卡洛模擬分析% +畫圖的部分% 在其基礎上進行了中文詳細註解% 被試編號subList = [201 202 203 204 205 206 207 208 209 210 212 213 215 216 217 218]; % 被試數量Nsub = length(subList);% 3折Nblock = 3; % cross-validation% 迭代次數Nitr = 10; % iteration% 時間點 (每次平均5個時間窗,即一個時間點對應0.02s)Ntp = 100; % # of time points% 16個位置NBins = 16; % # of location bin% 時間範圍-500ms到1496mstm = -500:20:1496;% 初始化平均準確率矩陣 被試數*時間點數AverageAccuracy = nan(Nsub,Ntp);for sub = 1:Nsub % 初始化準確率矩陣 時間點數*blocks數*迭代數 DecodingAccuracy = nan(Ntp,Nblock,Nitr); % 讀取之前的decoding結果 fileLocation = pwd; readThis =strcat(fileLocation,'/Location_Results_Alphabased_',num2str(subList(sub)),'.mat');% readThis =strcat(fileLocation,'/Orientation_Results_Alphabased_',num2str(subList(sub)),'.mat');% readThis =strcat(fileLocation,'/Location_Results_ERPbased_',num2str(subList(sub)),'.mat');% readThis =strcat(fileLocation,'/Orientation_Results_ERPbased_',num2str(subList(sub)),'.mat'); load(readThis) % 預測得到的labels svmPrediction = squeeze(svmECOC.modelPredict); % 實際資料的labels tstTargets = squeeze(svmECOC.targets); clear svmECOC % 計算平均的解碼正確率 for block = 1:Nblock for itr = 1:Nitr for tp = 1:Ntp % 獲取16個條件下預測得到的labels prediction = squeeze(svmPrediction(itr,tp,block,:)); % 獲取16個條件下實際的labels TrueAnswer = squeeze(tstTargets(itr,tp,block,:)); % 計算誤差 Err = TrueAnswer - prediction; % 計算正確率 ACC = mean(Err==0); DecodingAccuracy(tp,block,itr) = ACC; % average decoding accuracy end end end % 平均block和迭代的結果 grandAvg = squeeze(mean(mean(DecodingAccuracy,2),3)); % 對結果進行時間上的平滑,對五個時間點進行平均(t-2, t-1, t, t+1, t+2) smoothed = nan(1,Ntp); for tAvg = 1:Ntp if tAvg ==1 smoothed(tAvg) = mean(grandAvg((tAvg):(tAvg+2))); elseif tAvg ==2 smoothed(tAvg) = mean(grandAvg((tAvg-1):(tAvg+2))); elseif tAvg == (Ntp-1) smoothed(tAvg) = mean(grandAvg((tAvg-2):(tAvg+1))); elseif tAvg == Ntp smoothed(tAvg) = mean(grandAvg((tAvg-2):(tAvg))); else smoothed(tAvg) = mean(grandAvg((tAvg-2):(tAvg+2))); end end % 儲存平滑後的正確率 AverageAccuracy(sub,:) =smoothed; % average across iteration and block end% 平均每個被試的結果,得到平均的正確率subAverage = squeeze(mean(AverageAccuracy,1)); % 計算標準誤seAverage = squeeze(std(AverageAccuracy,1))/sqrt(Nsub); %% 做cluster mass分析 %從220ms-1496ms releventTime = 37:100;% 對每個時間點的準確率與隨機準確率0.0625(1/16)做t-testPs = nan(2,length(releventTime)); for i = 1:length(releventTime) tp = releventTime(i); [H,P,CI,STATS] = ttest(AverageAccuracy(:,tp), 0.0625, 'tail', 'right'); Ps(1,i) = STATS.tstat; Ps(2,i) = P; end % 找到顯著性的點candid = Ps(2,:) <= .05;% 移除孤立的顯著性點% 判斷t-1,t,t+1三個時間點為不顯著,顯著,不顯著的情況,將這種時候的t設為不顯著candid_woOrphan = candid;candid_woOrphan(1,1) = candid(1,1);for i = 2:(length(releventTime)-1) if candid(1,i-1) == 0 && candid(1,i) ==1 && candid(1,i+1) ==0 candid_woOrphan(1,i) = 0; else candid_woOrphan(1,i) = candid(1,i); end end% 把統計的顯著性資訊匹配到時間上clusters = zeros(length(tm),1); % 記錄顯著性clusterT = zeros(length(tm),1); % 記錄t值clusters(releventTime,1) = candid_woOrphan;clusterT(releventTime,1) = Ps(1,:);clusterTsum = sum(Ps(1,logical(candid_woOrphan))); % 總的t值% 找到有多少個clusters,並計算每一個cluster的T值加和tmp = zeros(10,300); % 用來記錄cluster和每個cluster對應的時間點cl = 0; % 記錄cluster編號member = 0; % 記錄每一個cluster內時間點的編號for i = 2:length(clusters)-1 % 一個cluster的開始 if clusters(i-1) ==0 && clusters(i) == 1 && clusters(i+1) == 1 cl = cl+1; member = member +1; tmp(cl,member) = i; % 一個cluster的結束 elseif clusters(i-1) ==1 && clusters(i) == 1 && clusters(i+1) == 0 member = member +1; tmp(cl,member) = i; member = 0; % 一個cluster中期 elseif clusters(i-1) ==1 && clusters(i) == 1 && clusters(i+1) == 1 member = member +1; tmp(cl,member) = i; else endend% cluster的數量HowManyClusters = cl;a = tmp(1:cl,:); % 提取tmp中有效的clustereachCluster = a(:,logical(sum(a,1)~=0)); % 提取每個cluster中有效的部分% 計算每個cluster的t值和dat_clusterSumT = nan(HowManyClusters,1);for c = 1:HowManyClusters dat_clusterSumT(c,1) = sum(clusterT(eachCluster(c,eachCluster(c,:) ~=0)));end%% 進行蒙特卡洛模擬iteration = 10000;%% 注意:模擬會花很長時間% 初始化模擬cluster的t值SimclusterTvalue = nan(1,iteration);for itr = 1:iterationPs = nan(2,length(releventTime)); % 生成假資料,實際是隨機生成decoding結果的labels simacc = nan(Nitr,Nblock); simaccSub = nan(Nsub,Ntp); for sub = 1:Nsub for t = 1:Ntp for it = 1:Nitr for blk = 1:Nblock simaccTest = (1:NBins) - randi(NBins,1,NBins); % sample with replacement simacc(it,blk) = sum(simaccTest==0)/NBins; end end simaccSub(sub,t) = mean(mean(simacc(:,:),1),2); end end % 對假的結果進行時間上的平滑,同真結果的平滑步驟一樣 smtFake = nan(Nsub,Ntp); for tAvg = 1:Ntp if tAvg ==1 smtFake(:,tAvg) = mean(simaccSub(:,(tAvg):(tAvg+2)),2); elseif tAvg ==2 smtFake(:,tAvg) = mean(simaccSub(:,(tAvg-1):(tAvg+2)),2); elseif tAvg == (Ntp-1) smtFake(:,tAvg) = mean(simaccSub(:,(tAvg-2):(tAvg+1)),2); elseif tAvg == Ntp smtFake(:,tAvg) = mean(simaccSub(:,(tAvg-2):(tAvg)),2); else smtFake(:,tAvg) = mean(simaccSub(:,(tAvg-2):(tAvg+2)),2); end end % 進行t-test for i = 1:length(releventTime) tp = releventTime(i); [H,P,CI,STATS] = ttest(smtFake(:,i),0.0625, 'tail','right' ); Ps(1,i) = STATS.tstat; Ps(2,i) = P; end % 找到顯著性點 candid = Ps(2,:) <= .05; candid_woOrphan = zeros(1,length(candid)); candid_woOrphan(1,1) = candid(1,1); candid_woOrphan(1,length(candid)) = candid(1,length(candid)); % 去除單獨的時間資訊 for i = 2:(length(releventTime)-1) if candid(1,i-1) == 0 && candid(1,i) ==1 && candid(1,i+1) ==0 candid_woOrphan(1,i) = 0; else candid_woOrphan(1,i) = candid(1,i); end end % 把統計的顯著性資訊匹配到時間上 clusters = zeros(length(tm),1); clusterT = zeros(length(tm),1); clusters(releventTime,1) = candid_woOrphan; clusterT(releventTime,1) = Ps(1,:); % 找到有多少個clusters,並計算每一個cluster的T值加和 tmp = zeros(50,300); cl = 0; member = 0; for i = 2:length(clusters)-1 if clusters(i-1) ==0 && clusters(i) == 1 && clusters(i+1) == 1 cl = cl+1; member = member +1; tmp(cl,member) = i; elseif clusters(i-1) ==1 && clusters(i) == 1 && clusters(i+1) == 0 member = member +1; tmp(cl,member) = i; member = 0; elseif clusters(i-1) ==1 && clusters(i) == 1 && clusters(i+1) == 1 member = member +1; tmp(cl,member) = i; else end end HowManyClusters = cl; if HowManyClusters >0 a = tmp(1:cl,:); sim_eachCluster = a(:,logical(sum(a,1)~=0)); % 計算每個cluster的t值和 sim_clusterSumT = zeros(HowManyClusters,1); for c = 1:HowManyClusters sim_clusterSumT(c,1) = sum(clusterT(sim_eachCluster(c,sim_eachCluster(c,:) ~=0))); end % 找到絕對值最大的T值 record = abs(sim_clusterSumT) == max(abs(sim_clusterSumT)); SimclusterTvalue(1,itr) = sim_clusterSumT(record); % 如果沒有顯著cluster,T值設為0 else SimclusterTvalue(1,itr) = 0; end end % 模擬結束% 對10000次迭代的T值排序sortedTvalues = sort(SimclusterTvalue,2);%% 找到95%的臨界tcutOff = iteration - iteration * 0.05;critT = sortedTvalues(cutOff);% 獲取真實decoding的cluster t值大於sigCluster = dat_clusterSumT > critT;%% 畫圖figure(1)cl=colormap(parula(50));accEst = squeeze(subAverage); draw = eachCluster(sigCluster,:);draw = sort(reshape(draw,1,size(draw,1)*size(draw,2)));draw = draw(draw>0);w = zeros(Ntp,1);w(draw)=1;a = area(1:length(tm), accEst.*w');a.EdgeColor = 'none';a.FaceColor = [0.8,0.8,0.8];child = get(a,'Children');set(child,'FaceAlpha',0.9)hold onmEI = boundedline(1:length(tm),subAverage,seAverage, 'cmap',cl(42,:),'alpha','transparency',0.35);xlabel('Time (ms)');ylabel('Decoding Accuracy')ax = gca;ax.YLim = [0.05, 0.15];ax.YTick = [0,0.02,0.04,0.06,0.08,0.10,0.12,0.14];ax.XTick = [1 26 51 76 100]; ax.XTickLabel = {'-500','0','500','1000','1500'};h = line(1:length(tm),0.0625* ones(1,Ntp));h.LineStyle = '--';h.Color = [0.1,0.1,0.1];hold off
總的來說,標準的流程就是:
先將decoding的準確率在時間序列上和隨機水平進行t檢驗,然後排除掉單個時間點的顯著點,再對每一個連續顯著的cluster計算對應cluster的t值的和;
然後使用蒙特卡洛隨機模擬1000次每個腦電試次的label,得到虛假decoding結果,同樣進行時間序列上的逐點t檢驗並排除單個時間點的顯著點,再對計算出每一次迭代中cluster的t值和最大的值,對1000次迭代的最大值進行升序排序,取第95%位的值。
用上述這個隨機模擬得到的值和真實decoding得到的每個cluster的t加和值進行比較,保留下cluster的t加和值大於隨機模擬的t值的clusters作為最終顯著時間段。
原版原始碼及資料可見:
https://osf.io/bpexa/
路同學修改並新增超詳細的中文註釋的版本可見GItHub:
https://github.com/ZitongLu1996/ERP_Decoding_withChineseAnnotations
記得點關注、點贊、點在看哦!
也歡迎給作者打賞!
愛胡思亂想
渴望成為優秀學生的
路同學願和你談談心。
路同學
zitonglu1996.github.io