以logistic Regression為例實現多類別分類及Python實現
阿新 • • 發佈:2019-02-11
這種方法簡單有效,而且使用類似logistic這種有概率值大小可以比較的情況下,類邊界其實是個有範圍的值,可以增加正確率。而且當K(類別數量)很大時,通過投票的方式解決了一部分不平衡性問題。
可知,最佳情況下準確率有所提高:
參考:http://blog.sina.com.cn/s/blog_5eef0840010147pa.html
# -*- coding: utf-8 -*- from logisticRegression import * from numpy import * import operator #知道了Iris共有三種類別Iris-setosa,Iris-versicolor和Iris-virginica def loadDataSet(filename): numFeat = len(open(filename).readline().split(','))-1 dataMat = []; labelMat = [] fr = open(filename) for line in fr.readlines(): lineArr = [] curLine = line.strip().split(',') for i in range(numFeat): lineArr.append(float(curLine[i])) dataMat.append([1]+lineArr) #這裡是為了使 x0 等於 1 labelMat.append(curLine[-1]) return dataMat,labelMat # voteResult = {'Iris-setosa':0,'Iris-versicolo':0,'Iris-virginica':0}#記錄投票情況 voteResult = [0,0,0] categorylabels = ['Iris-setosa','Iris-versicolor','Iris-virginica']#類別標籤 opts = {'alpha': 0.01, 'maxIter': 100, 'optimizeType': 'smoothStocGradDescent'} #訓練過程 dataMat,labelMat = loadDataSet('train.txt') weight1 = [] for i in range(3):#三類 labelMat1 = [] for j in range(len(labelMat)):#把名稱變成0或1的數字 if labelMat[j] == categorylabels[i]: labelMat1.append(1) else: labelMat1.append(0) dataMat = mat(dataMat);labelMat1 = mat(labelMat1).T weight1.append(logisticRegression(dataMat,labelMat1,opts)) #測試過程 dataMat,labelMat = loadDataSet('test.txt') dataMat = mat(dataMat) initial_value = 0 list_length = len(labelMat) h = [initial_value]*list_length for j in range(len(labelMat)): voteResult = [0,0,0] for i in range(3): h[j] = float(sigmoid(dataMat[j]*weight1[i]))#得到訓練結果 if h[j] > 0.5 and h[j] <= 1: voteResult[i] = voteResult[i]+1+h[j]#由於類別少,為了防止同票,投票數要加上概率值 elif h[j] >= 0 and h[j] <= 0.5: voteResult[i] = voteResult[i]-1+h[j] else: print 'Properbility wrong!' h[j] = voteResult.index(max(voteResult)) print h labelMat2 = [] for j in range(len(labelMat)):#把名稱變成0或1或2的數字 for i in range(3):#三類 if labelMat[j] == categorylabels[i]: labelMat2.append(i);break #計算正確率 error = 0.0 for j in range(len(labelMat)): if h[j] != labelMat2[j]: error = error +1 pro = 1 - error / len(labelMat)#正確率 print pro
</pre><pre class="python" name="code">
<span style="white-space:pre"> </span><span style="font-family:SimSun;font-size:24px;">沒有優化的情況下的準確率:<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAnQAAACNCAYAAAA+a+c3AAAe8ElEQVR4Ae3dTWgjaX7H8V/NyylLsifJSE0gs9tZSEurjSPDviSE0G5iLRbZzTS5uBMH7MNm9iD1wTO0ySGwwQPjgO3LnuyEBhvysoeADD2DuwnJLCFEGk800qW3oQcCVreU6UtmBjYnhadKZZflemRJlt0q+9vQSHpUep7/83meUv1V9chyfvf3/6At/iGAAAIIIIAAAghEVuC1/2k1Ixs8gSOAAAIIIIAAAghIr732ioMDAggggAACCCCAQIQFXolw7ISOAAIIIIAAAgggIImEjmmAAAIIIIAAAghEXICELuIDSPgIIIAAAggggAAJHXMAAQQQQAABBBCIuAAJXcQH8GWH324nNHvvnmYT/i1/BedljwntI4AAAldR4Kofj0aS0LUTs7p3b1aJ9ugP5u12Vgtra1pbW1D2HOrvNekva7969Xng55KTSrf2VJJ3u9s4+7em29kFrY1oPl3E/EnM3tP6+vpLmaPB8RrWbZj4Pdez7ZPDtBvsr39/VPX49YXd9uqv+z6xtubOgYWs9x7Ya/uw+gctO4/6r+r8GdTetj3z0CZzgeXncDy6wOjP3NRrYTWYBCrteAfmdm1bd7cqYZsdlR3sq6Z5TSZLajSOirMLa7qTPjrA17YL2qocPT7a0n7PcSraLEqL6xn7Rn0+Y954l5duKt7pmx+Pyerzy0uajnf63Hyo1ZWSDiz96rO5npuNsl89GzrnJ6cmpfubZU3l8+6tHEfd497XHOrE6Y7FrZTqe5tqdMbpLF24COfG7rsqlLIjmaPD9vUsbsPF31CzPqHA7j5w6MO1e7KZUdVzsuZgSXh/Xff5m2rtFLVSntLi+qKy5U2VFb59sMaz3R9t/f78+e//S+ntjWk1H76nd3cbh/tyr314OP+zxz9cu2dTN692rYLHi84x8mLiCXe7LPPwrKMTdjw6a51Ren3oGbrN4o5q7Zq2CwUVqxndm0307JPjNLRfk9KTyWPblTeP6im891CxucW+zrIlZmf72u5YY6c8cCd8TrpfLKpYLKqwXVPqMJ6EVFv1ygvb2tkrucmErV+nNGV9+jz6ZW3sgp6o7O66Vv6taTY47sZ6pznR/9nbqZxu6pEelIfvwGV0PlVjBG62NsI8zb6xu+WNve11l6nc3t+E4rG6qmXJfHjYuruliuPIvv3gKhfi35k/f/8326q124qlJ5VoJzQRMwlMTTubw++QFxL/4KxDvyKZn1c6cLyoxwZ4fxu6Ve+F9nl1SebhGX3845B/e8bqIvfy0ITuWC/KVbXcnbv3pYQDL6Pr/8B9rJGjB+ZSQu5m/KhgZPcSiqfiOkxNy1XVO3WbN+LdXe9cQzI/IQXeu8a/XyMDOoeKprSwtiCVvATZNNDrUpF/lqDeSaiHCej85s8w0VzMa0bhZov0KnraLF5G+UX4h86fmHmvNEnC2Xp9EfGfLcLBXm2sJtNSbf/AfaE5djyo6ei4Mlh1kdn6so1jZOAHDDT0kuuJOtydW+7lFe9TaMV8HD22mdPY1V5rTbmpko5foU3pzsaG7kgylzjLmtLi2px7Sdc83tSiNu6k3U+B29vSn/5p2q03vbGhOfPJsGguX3j/JvLLWp+OH14OMKX+ZdSYWnq0uqLSgbnsMaeU6nr0KKbpw+3Np+ejuJP5W4o9uu9+mu5U7yYbk9rXbqBvYf0ylxTnUnXV6yml044bz0op4bZrLlVfbL96X/Sy+xivPcWXPCvjbM4sdG8//Jo4M+7euFZ9YPlnMY7GIfCU1DlLsGoG3JF7uScqzsf6EXhg8zTrbd6e9j64dM+XndWmbi3dVEx1bRf+S9/auBMynwPjHnBL5O9p6abUasXc19eVUqy2enj5zHiasdbiujuH/XEPhHx416ypMvum+Xdsf5w62me7X+/31yxrCF6m6+7voEsv/KBs9fvPd9/a2g0uC2h3lliYS/xh5QeW/gbrPg+fs/if/n4YPn88P++9LZNvKlavq57ySv33vfOYPyZhMO/b/lIffxzds4PmGJDMHy6XCc4rf7vu2+DYmP3Ln29h42sb9/AlHybJbalq8rnOIbDRuUrRHUPw8SjiufTzMAimwd//u9+Luqq7Eg/7S+j6pChX65rLTEmV4Jo7c1DykgWzB5iE0F0TtzzhXlZznC1tT8yq0TmLU6l4a5GqnQTDTRzdk4MppbWqQsEkTjllSybRSyo/n1ZttaiS8lqez2t/pdRZczd3YnuTsJh/Zuea1333IHesa1MT0n4nmwg80d0vc0kxsz4nVYsqPDDtTipZ6rT7EvsVCNm9637y9n1MorucV9L1aSi/nNPzg4ZUf6QHm94ZtLDtEytHZ9e66+/92Ix7VZk+1z4enSU4WjsXFWebg83zQFPK3Wxpu7DifcBZX9RUcVMFdw3WnObmm1otFr01hK9IlaLjHey65r+bgJv1n4E1h+3Snuo3b6l5f0fxpVtqru5IS539xZ233lpUz7b3ulSnsqVC+eT+GCwP9t3rb3A9WUbZdtntY3d/zTozf38M1tHrvq1+Wz3eWYXjzt76tillUsH3JfeThvuBLqzc1t/gmqng+5Vte1v85kNuqM9Q/oO9H7oxBeaPOt9razZbSqfNqaialPI+eATnTPC+bcyCDqf5HJ4oCKms7b7Pn5xX5z3uIaEMVcQ87O+43I07zPt/dx1X7fHpl1yTE4q1mv0tfjaXMVPmTbzzrmDR9E5TpzWfT8p8Cs087ydpqGuv5J3mPqrW+7T03BQfPFercybRez5sey/rD03mJE1l4nLrOmrAuxfaL2/dTLDdl9mv7pC9x8Ynpum3N7RhzvJ0fMw6jNKKSbbmpQdB+/Dtw+vup7TsrikqT02dOif8s3Mn185FwdlmEe7pfqjZkXvm2h2XYy+vayc0iQ6fz+Furc489m+PNXCOD0x/w9aTVbRp7e8g4YTXb6vB5uyWF838N99MNn9yx3u/spXb6h+8PDx+W5zD1d8Z8z7fD8Pnj/TcLKGJmUuLzwcPY+SvCHezNWPztI2vrTy8/oaarZgmAsvF24lEz6VG5xtPeJS9S8M9bXH2rivsWVP/gPMwrBq3bFTv/9YGLtUTpyZ0SW/BQF/fODQTolpPyZykO+3fQem+aukluSe6/GuqXS9qZ7OnJAKBnauPxNOcmctUi4dn5oKLdc2nqEzn0kJXGO5ZxZP96uzUXe2+jH51x3v02Pi09PC9gvuFj+LqAzcxdz+VL2dULd6X5oNfVAnf/qi+we+5bWV6fxvy6CxBMLn024qCsx9r9224p3t5aU7ul47Ml4Va3S/r83Fvt1MqmcrIMt2tL+xvfzza//39a1T9NYs+mq2T9dsCtrVryvP5hrbu3lWxaJYdePuArdxW/+Dl4fHb4uyuvz//8P2luy7zuOf8MQmhenwgOJf5ExalKQt3s21t87SNr608rH7zYTj4BUC3rVyi5/HxPOMJi/H0snBPW5zd9Y16HnbXf/xx+Hzu5zh7vJ6r8ejVWCz2V91dXVxfUtqJ65u5nL775T/rL//h8eEm7qCv5/Tq+/uhk/jg1Wv6s8yren9fyi//SN/5SlzpyddV+/CxPg+uTXM+1+NfXtPk/zzQPz754rB+cyc5+UP94HZOM84n+tv9ZGdtRVzpay/0/qsZzWfSSs9c04v3/1X/8vSrur30lt783lf0Hz/dUuVzbw2diT+4feoPf0u//kZCv5nJaWZmxv3/3V95qg8+/EUnroQmZybUPKVfH3xs1p6Ybb+mN66/qdt/+DV9+s8b+rdn3uVc54L7ZRw+e/CKvr/+/RNj4sbS8bmdy2nmVz/VP7meP9K3v6zpgw8dfevNaU27lvt65nyhx13b/+zjZ8fGpp8HU4tLuhX35k8u9z39Ruvf5dcTOn+m/kRvfa2m+//gj4Xfyng5v7DMDfNB4a9//F3FO/vMb7/2iX7+i88V5m8c3IPCi0m98868Zr7+pT79MqWb05P6/g9P1uF5mbVFx+ezO+6fXdcPutzMN/D++GtvyLnmKB1PH97GX6/p508eq/n6jP7ix7c147RUj6U0fe2Fasn50Pj9UQjuj3/38TP3rPrGO7cO+5v87IE+bpjlFN4+PX9n3t2/vv70Z+6+Hdbf6d97Xc9+7U0th7j57Xbf2uq3+ze03+Vs2v3k3x4rcfttvXU7p1zum4rV92T6Zf594/bSiXJ3LVtIf931fD/5I73RNe627W3x23zc90w5x94PT/Xv8/3wtPkTf/0TPf20qV9845Y7n8x76QcfV851/nSPt//Y5nbe4+633337v4+f6qudeTIz42hvxfu293nHY5tXkZ+HluPXsMfZ7vG6So+dGzdu9L4+OqCGdwAya3dWdNqC+uzCgrQ5+FqaAUMayebBfnlfvDBnuMJjj1K/BsW5fv26njx5MujLrNu7ZwmWlxTfKx4uXPY39syvprNvYLvt5WZ7TRTLRz3fomhwHjFflfkzKjvm4agk+6+H9//+rfwtR57Q+RXbbr1BOvqWq//tI9v241juf1Mq+O24y9CvcbMeJ+fg+Aad/G/h2RZoB7flPgIIIIBAfwLj9P7fX8Qvf6sLT+hefpeJAAEEEEAAAQQQuFwCp34p4nJ1l94ggAACCCCAAAKXT4CE7vKNKT1CAAEEEEAAgSsmQEJ3xQac7iKAAAIIIIDA5RMgobt8Y0qPEEAAAQQQQOCKCZDQXbEBp7sIIIAAAgggcPkESOgu35jSIwQQQAABBBC4YgKv9erv4d+BaTf1qI8/FNyrLp5DAAEEEEAAgasp4P6ixdJNxR1H7dq27m5VribEOfa659+hy87OqlEK+33Nc4yIqhFAAAEEEEDgUgq4v1KST2h3l4Ru1APc8wxdd2P+z8VMxx0FfyWhe7vTHlNPbyF8LsbHb2VUP9VGPb5o+C0+4S5+6bj4BM+kmNhq24UTP8vnx9zP7Vn7Nap4xq0e3+6sPuNWz6ic/X5x27/AQAmd+eFv85ubxYrj/kD3cj6pd3fNj9UP9o96envhczE+7o9d30nL/fmu3k32fJZ6evK47xUbOFuRxmn+uB8mc9L9YlENc2ksu6D1uUVly+G/W23tlDSScR9VPONWj3Ebp3EfVTyjcu41r3jOLtD3lyLMQE2mpaafv5WraqUnlWi37bWHPEM9ISiBInwCGCF3R+VjqnYqWyoUtlUPaWeQIurprYVPlHwSiqfiSvghl6tD7x+jGfdRxTNu9VzW959ROfsTkNtBBPpO6KSE4rGWnh8Eqo8FdvxAce+71INPR+Clzp/eo8CzCFxFAcepaOvuliqO43Y/mb+l2KMHh48v2mRU8YxbPRfteFHtjcr5ouK9bO0MkNBdtq7THwQQQAABm0Bi9p7mdX+oZTW2Os9SPqp4xq2es5iM82tH5TzOfRy32AZI6BpqtmKaSAa60GrKvwIbKD3lLvX0BsLnYnx6t8KzCFxlAfMnq8YpmRtVPONWz2WdY6Nyvqw+59WvvhM6x2lovybF/cUVUxnFavvewtl2VgtrC8r2sZ6OenoPJT4X49OrlfYA85l6egn0fg7n8fQxZ1Yy1eLhmbnE7Kz73v6yxmtU8YxbPbbRf1nOo4rH5myrn/LRCQz0LdeD0n01l9e1fsf/syUHUmethQY4W0c9vQcQn4vx8b9lZlpLb2woE/zzDAPMZ+rpPV74RMfHJBO5mzGlnQ2t3/Hidv9EVanThwveL0YVz7jVYzQv435xqnPvXYFnzyjQ+w8LL6zpTtpRu8cvRZhvHeaX56X7K9pteAtpw2K6fv26njx5EvaUW0Y9+DB/Tu4e7BfsF+wX7BfdAlE7npr4g3+fjl+K6B7R0TzumdCNpglqQQABBBBAAAEEEDhPgb7X0J1nENSNAAIIIIAAAgggMLwACd3wdrwSAQQQQAABBBAYCwESurEYBoJAAAEEEEAAAQSGFyChG96OVyKAAAIIIIAAAmMhQEI3FsNAEAgggAACCCCAwPACJHTD2/FKBBBAAAEEEEBgLARI6MZiGAgCAQQQQAABBBAYXoCEbng7XokAAggggAACCIyFAAndWAwDQSCAAAIIIIAAAsMLhP6Wq/dzQ0uajvu/2VpSw//N1pC2gr9JZ56udX4Tk/KCtir2n0MLoaQIAQQQQAABBBAYWCA0oUvm5xXfK6pYcdwfEF7OJ/XubiO0cvNjvItzMT18r+D+lqtJ4tbnFvU7//lf+tatpt4rbLrJoPc7blevPFveVKVHMhyKSiECCCCAAAIIIDCAQGhCN5mWmvudWspVtZYnlSgdWM7SNdRsBVosV1W/NaFnTkUfvetIfjKTiCtWr+qjVz7SR+/qypSTzAXmBncRQAABBBBA4FwEQhO6eKyl6oFJujptxuJKSAo7R+c4DZVW9rS4vq51x1G7XdNO0TsrZ17tnsFbn1OqvqO7W5XDTly18sOOcwcBBBBAAAEEEBixgHPjxo12d50LawuqFr1LhV7ilTl83L2teWwusy5q010vlpi9p3ndP3GJ1r0Um6keS+r8165fofIwP8oQQAABBBBAAIGzCIR+y7XZimkiGai21Qw9O2e28NbQSdWyt/1BaU+tmzll2115orkUm8pQHmDlLgIIIIAAAgggMAqB0IRuvybFzTVW828qo1ht37J+rrONUspMde4nJxTrJHoL92aV8BO7qYxSraYO2lldpfKwy9S+GrcIIIAAAggggMAoBEIvudr+bInt8qv3Ddabirtr6Jp6tLpy+I3XjTtpN05/bZ35kkDwz5lchfJRDBR1IIAAAggggAACNoHQhM62MeUIIIAAAggggAAC4ycQesl1/MIkIgQQQAABBBBAAAGbAAmdTYZyBBBAAAEEEEAgIgIkdBEZKMJEAAEEEEAAAQRsAiR0NhnKEUAAAQQQQACBiAiQ0EVkoAgTAQQQQAABBBCwCZDQ2WQoRwABBBBAAAEEIiJAQheRgSJMBBBAAAEEEEDAJkBCZ5OhHAEEEEAAAQQQiIgACV1EBoowEUAAAQQQQAABmwAJnU2GcgQQQAABBBBAICICJHQRGSjCRAABBBBAAAEEbAIkdDYZyhFAAAEEEEAAgYgIkNBFZKAIEwEEEEAAAQQQsAmQ0NlkKEcAAQQQQAABBCIiQEIXkYEiTAQQQAABBBBAwCZAQmeToRwBBBBAAAEEEIiIAAldRAaKMBFAAAEEEEAAAZsACZ1NhnIEEEAAAQQQQCAiAiR0ERkowkQAAQQQQAABBGwCJHQ2GcoRQAABBBBAAIGICJDQRWSgCBMBBBBAAAEEELAJkNDZZChHAAEEEEAAAQQiIkBCF5GBIkwEEEAAAQQQQMAmQEJnk6EcAQQQQAABBBCIiAAJXUQGijARQAABBBBAAAGbAAmdTYZyBBBAAAEEEEAgIgIkdBEZKMJEAAEEEEAAAQRsAiR0NhnKEUAAAQQQQACBiAhc6oSunZjVvbU1ra+vayHbdoek3c5qYW1B2bb3+LRx6nd7b7s1rQ1Qt992u53Q7L3hXntUR3/9Okucflv93Pbr1k9dg24zCs9B22R7BBBAAAEEXqZAaEKXXfCSIJMIBZOhlxnooG2bg3p+/qZaO0UVCtvS3GIniWuoWW+q0XeF/W3vOBVtFndU77veow0dp6HSynCvParl/OM8aqufe/3F009Ng25zVk8/ITRzf+3erBJ9Jv+Dxsn2CCCAAAIIjEogNKErb+6o1q5pu1BQ4b2Hih0mQ6Nq9iLqSSgeq6talkyytXV3SxXHkTnY727tquE4fQUx6PZ9VXoOG/WKMzE72/cZyVGFNm7xDNKvZH5e8b2iisWiintxzeeTg7ycbRFAAAEEELhwgdCE7sKjoMFzEzCXPnM34+dW/6AVj1s83fGbs3OTaanpn8ItV9VKT3KWrhuKxwgggAACYyXwmj2alO5sbOiOpNp2QeWpRW3cSavdrmlntalbSzcVU107xU33zJdZr7bslrX0aHVFuw3vDJhfHncctWvburtVcZv0y2M6vr253Hsn3Xlt86FWV0ru2TRbeVj8idl7envaS2LSGxuaMzEXN4/3wY+7ndXi+pxSquvRo5imp+NqPnxP7+421M4uHPW5s73fXlg8B50nJ/LLWg/UY4pt/T1so9lUSy2/+hO3JhEycapeVzqdVq1WUyola7/cNjvxm/vHHDq1h8V5ouFOQXf8pYOpjpsZvz3FlzxDm7Opxu+ruR+Mx5w5tf3z2+13/vhttLs8/Xq659vJds2Z3ZaqZjD9sGJxJaQBLtOfrJUSBBBAAAEEzlOgR0JX13bBS9bMkc3RlgplL6mYm29qtVj0LluaRM1dr5ZWbbWokvJans9rf6WkAyUP17GtlE0CkFG2XVbZLe9sbxKD5bwS7vZTyqSC7ZqDqqk/G1pug2nsvqtCycSaUdVPxMzl1orfh8zhS721b3KTk7RWVSgktLieU7a0qUrI9uaF1njc71mk1F2Pvb9J5W/F9PC9gue2ZD+TZuKs1m9p4sGmqlqUHlRVfVD2LiNb4gz2N+ggS5y2xOrY+HbGK7lS0maxofxyTs8PGlL9kR5sesl3sN1DaDOUgTiPxRPcKHDfa9dbB2mdP93zLcQzLH4z3/q97B4IibsIIIAAAgiMpUCPhM4Wb107Jw6GgbMaeq5WLOOe0ThQ9zq2ipugqW3KY0q/vaFpN0GqqWrOgLhfLDDJ1brmAmfujpKu4+W2CIcrr2uvZE7LmHMxvf/1jiesnvD+ej6ds0HGTfaEzkRUrra0PJlXPJXqnJ0zCwR7x2p/NixO29bh8TfcL3NUtbg+r+bqyjkkSL3mT8Dt2HwLlB96WuIP7W5DzdYtTZhlc/5l19YgX6IJrZRCBBBAAAEEzlVgiIQuLJ7AQVATih0eAE35nDJTUqUimcX5iVJJZZnylpr3vUuz7URC5vhpznzl8w1t3b3r3l9cX1TWXCrVVGi57YxSWISjLLPHaWslvL8mYzhMHhIZpSQ3sbXVIrOey1x2ffRIrfRRvmHdvuuJdjarqbI5Qzrov/D43TNfy+Ys6H1NLC8qu+Kf0e2vfj8e+ziadm3zp5N0nZhvnfJjnuHxh0VpvsyxX5Ny/jXWqYxitQfnkKyGtU4ZAggggAACwwm8GovF/qr7pfnlJX3nK3GlJ19X7cPH+txc9jTrsd65pbgT1zdzOf32a5/o57/43H2p43yux0+/qttLb+nN731F//HTLX30hflG6ed6/Mtrmr8zr5mZGX396c/0j0++8Mo729/O5TTzq5/qZx8/c+v6xu0lvXU7p1zum4rV9/R3p5R3x24eu+ulfvJHeqMr1u4+JD97oP3OWrC0E1f62gu9/2pG85m00jPX9Nlnk3on0Gez/cedtYHdcf7tftK7bBtSz4v3/1X/EtJfz+cb+vFfzGvGaakeS2n62gt98LF/auh470yy8eq1a2r+/Yd6+stnevzsC3eDsH75cZoNkpM/1A9u5zTjfKLece6HJi7B8fXH65/c/v5I3/6ypg8+dPStN6c1PXNNL97f14FZb2lx647HH9/jPfUe9TN/Ts63MM/Hh/PTj9+fb2Ht/u/jp0r++U/ceTgTr+mnWxV3HwjbljIEEEAAAQTGQcC5ceNGf39hdxyi7RHD9evX9eTJkx5bRPOpl9Uv2o3mfCFqBBBAAIGrKXBpErqrOXz0GgEEEEAAAQQQkPg7dMwCBBBAAAEEEEAg4gIkdBEfQMJHAAEEEEAAAQRI6JgDCCCAAAIIIIBAxAVI6CI+gISPAAIIIIAAAgiQ0DEHEEAAAQQQQACBiAuQ0EV8AAkfAQQQQAABBBAgoWMOIIAAAggggAACERcgoYv4ABI+AggggAACCCBAQsccQAABBBBAAAEEIi5AQhfxASR8BBBAAAEEEECAhI45gAACCCCAAAIIRFyAhC7iA0j4CCCAAAIIIIAACR1zAAEEEEAAAQQQiLgACV3EB5DwEUAAAQQQQAABEjrmAAIIIIAAAgggEHEBErqIDyDhI4AAAggggAACJHTMAQQQQAABBBBAIOICJHQRH0DCRwABBBBAAAEESOiYAwgggAACCCCAQMQFSOgiPoCEjwACCCCAAAIIkNAxBxBAAAEEEEAAgYgLkNBFfAAJHwEEEEAAAQQQIKFjDiCAAAIIIIAAAhEXIKGL+AASPgIIIIAAAgggQELHHEAAAQQQQAABBCIuQEIX8QEkfAQQQAABBBBAgISOOYAAAggggAACCERcgIQu4gNI+AgggAACCCCAAAkdcwABBBBAAAEEEIi4AAldxAeQ8BFAAAEEEEAAARI65gACCCCAAAIIIBBxARK6iA8g4SOAAAIIIIAAAiR0zAEEEEAAAQQQQCDiAiR0ER9AwkcAAQQQQAABBEjomAMIIIAAAggggEDEBUjoIj6AhI8AAggggAACCJDQMQcQQAABBBBAAIGIC5DQRXwACR8BBBBAAAEEECChYw4ggAACCCCAAAIRFyChi/gAEj4CCCCAAAIIIEBCxxxAAAEEEEAAAQQiLkBCF/EBJHwEEEAAAQQQQICEjjmAAAIIIIAAAghEXICELuIDSPgIIIAAAggggAAJHXMAAQQQQAABBBCIuAAJXcQHkPARQAABBBBAAIH/B6G5yRNZU8E/AAAAAElFTkSuQmCC" alt="" /></span>
2.第二種多分類方法為所有對所有(All-versus-all,AVA),也就是每次對一類學習一個分類器(one vs on at a time)。假定有M類,那麼要構建m(m-1)/2個二元分類器。每一個分類器都使用它應該區分的兩個類的元組來訓練。為了對未知元組分類,所有的分類器投票表決。該元組被指派到得票數醉倒的類。一般來說‘所有對所有’優於‘一對所有’。解決了不平衡性,但是會佔用更大的空間下面的程式主要修改了訓練過程:
-*- coding: utf-8 -*- from logisticRegression import * from numpy import * import operator #知道了Iris共有三種類別Iris-setosa,Iris-versicolor和Iris-virginica def loadDataSet(filename): numFeat = len(open(filename).readline().split(','))-1 dataMat = []; labelMat = [] fr = open(filename) for line in fr.readlines(): lineArr = [] curLine = line.strip().split(',') for i in range(numFeat): lineArr.append(float(curLine[i])) dataMat.append([1]+lineArr) #這裡是為了使 x0 等於 1 labelMat.append(curLine[-1]) return dataMat,labelMat # voteResult = {'Iris-setosa':0,'Iris-versicolo':0,'Iris-virginica':0}#記錄投票情況 voteResult = [0,0,0] categorylabels = ['Iris-setosa','Iris-versicolor','Iris-virginica']#類別標籤 opts = {'alpha': 0.01, 'maxIter': 50, 'optimizeType': 'smoothStocGradDescent'} #訓練過程 dataMat,labelMat = loadDataSet('train.txt') dataMat2 = dataMat[0:40]+dataMat[80:120] dataMat2 = mat(dataMat2) dataMat = mat(dataMat) weight1 = [] for i in range(3):#三類 labelMat1 = [] for j in range(len(labelMat)):#把名稱變成0或1的數字 if labelMat[j] == categorylabels[i]: labelMat1.append(1) else: labelMat1.append(0) if i == 0: weight1.append(logisticRegression(dataMat[0:80,:],labelMat1[0:80],opts)) elif i == 1: weight1.append(logisticRegression(dataMat[40:120,:],labelMat1[40:120],opts)) else: labelMat12 = labelMat1[0:40]+labelMat1[80:120] labelMat12 = labelMat12 weight1.append(logisticRegression(dataMat2,labelMat12,opts)) #測試過程 dataMat,labelMat = loadDataSet('test.txt') dataMat = mat(dataMat) initial_value = 0 list_length = len(labelMat) h = [initial_value]*list_length for j in range(len(labelMat)): voteResult = [0,0,0] for i in range(2): h[j] = float(sigmoid(dataMat[j]*weight1[i]))#得到訓練結果 if h[j] > 0.5 and h[j] <= 1: voteResult[i] = voteResult[i]+1#由於類別少,為了防止同票,投票數要加上概率值 elif h[j] >= 0 and h[j] <= 0.5: voteResult[i+1] = voteResult[i+1]+1 else: print 'Properbility wrong!' h[j] = float(sigmoid(dataMat[j]*weight1[2]))#得到訓練結果 if h[j] > 0.5 and h[j] <= 1: voteResult[2] = voteResult[2]+1#由於類別少,為了防止同票,投票數要加上概率值 elif h[j] >= 0 and h[j] <= 0.5: voteResult[0] = voteResult[0]+1 else: print 'Properbility wrong!' h[j] = voteResult.index(max(voteResult)) print h labelMat2 = [] for j in range(len(labelMat)):#把名稱變成0或1或2的數字 for i in range(3):#三類 if labelMat[j] == categorylabels[i]: labelMat2.append(i);break #計算正確率 error = 0.0 for j in range(len(labelMat)): if h[j] != labelMat2[j]: error = error +1 pro = 1 - error / len(labelMat)#正確率 print pro
可知,最佳情況下準確率有所提高:
參考:http://blog.sina.com.cn/s/blog_5eef0840010147pa.html