Python中關於SVM物種分佈模型程式碼報錯
阿新 • • 發佈:2021-02-10
Python中關於SVM物種分佈模型程式碼報錯
最近看了這篇文章:https://blog.csdn.net/wong2016/article/details/83151122
裡面用sklearn的資料集結合SVM畫了物種分佈圖,遂想要試試看,手頭有字典形式的TXT資料集,但並不是特別會寫,遂參考連結文中的程式碼進行修改和複製(非商用)。
想發個帖子求助一下……哪位前輩給康康哪裡錯了……腦闊痛=。=
**from __future__ import print_function from time import time import numpy as np import matplotlib.pyplot as plt from sklearn.datasets.base import Bunch #from sklearn.datasets import fetch_species_distributions from sklearn.datasets.species_distributions import construct_grids from sklearn import svm, metrics # if basemap is available, we'll use it. # otherwise, we'll improvise later... try: from mpl_toolkits.basemap import Basemap basemap = True except ImportError: basemap = False print(__doc__) def create_species_bunch(species_name, train, test, coverages, xgrid, ygrid): """Create a bunch with information about a particular organism This will use the test/train record arrays to extract the data specific to the given species name. """ bunch = Bunch(name=' '.join(species_name.split("_")[:2])) species_name = species_name.encode('ascii') points = dict(test=test, train=train) for label, pts in points.items(): # choose points associated with the desired species pts = pts[pts['species'] == species_name] bunch['pts_%s' % label] = pts # determine coverage values for each of the training & testing points ix = np.searchsorted(xgrid, pts['dd long']) iy = np.searchsorted(ygrid, pts['dd lat']) bunch['cov_%s' % label] = coverages[:, -iy, ix].T return bunch def plot_species_distribution(species=("pacific_saury")): """ Plot the species distribution. """ t0 = time() # Load the compressed data input_file = ('C:/Users/Administrator/Desktop/2021.2.9.txt') data = input_file # Set up the data grid xgrid, ygrid = construct_grids(data) # The grid in x,y coordinates X, Y = np.meshgrid(xgrid, ygrid[::-1]) # create a bunch for each species PS_bunch = create_species_bunch(species[0], data.train, data.test, data.coverages, xgrid, ygrid) # background points (grid coordinates) for evaluation np.random.seed(13) background_points = np.c_[np.random.randint(low=0, high=data.Ny, size=10000), np.random.randint(low=0, high=data.Nx, size=10000)].T # We'll make use of the fact that coverages[6] has measurements at all # land points. This will help us decide between land and water. land_reference = data.coverages[6] # Fit, predict, and plot for each species. for i, species in enumerate([PS_bunch]): print("_" * 80) print("Modeling distribution of species '%s'" % species.name) # Standardize features mean = species.cov_train.mean(axis=0) std = species.cov_train.std(axis=0) train_cover_std = (species.cov_train - mean) / std # Fit OneClassSVM print(" - fit OneClassSVM ... ", end='') clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.5) clf.fit(train_cover_std) print("done.") # Plot map of South America plt.subplot(1, 2, i + 1) if basemap: print(" - plot coastlines using basemap") m = Basemap(projection='cyl', llcrnrlat=Y.min(), urcrnrlat=Y.max(), llcrnrlon=X.min(), urcrnrlon=X.max(), resolution='c') m.drawcoastlines() m.drawcountries() else: print(" - plot coastlines from coverage") plt.contour(X, Y, land_reference, levels=[-9998], colors="k", linestyles="solid") plt.xticks([]) plt.yticks([]) print(" - predict species distribution") # Predict species distribution using the training data Z = np.ones((data.Ny, data.Nx), dtype=np.float64) # We'll predict only for the land points. idx = np.where(land_reference > -9999) coverages_land = data.coverages[:, idx[0], idx[1]].T pred = clf.decision_function((coverages_land - mean) / std) Z *= pred.min() Z[idx[0], idx[1]] = pred levels = np.linspace(Z.min(), Z.max(), 25) Z[land_reference == -9999] = -9999 # plot contours of the prediction plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds) plt.colorbar(format='%.2f') # scatter training/testing points plt.scatter(species.pts_train['dd long'], species.pts_train['dd lat'], s=2 ** 2, c='black', marker='^', label='train') plt.scatter(species.pts_test['dd long'], species.pts_test['dd lat'], s=2 ** 2, c='black', marker='x', label='test') plt.legend() plt.title(species.name) plt.axis('equal') # Compute AUC with regards to background points pred_background = Z[background_points[0], background_points[1]] pred_test = clf.decision_function((species.cov_test - mean) / std) scores = np.r_[pred_test, pred_background] y = np.r_[np.ones(pred_test.shape), np.zeros(pred_background.shape)] fpr, tpr, thresholds = metrics.roc_curve(y, scores) roc_auc = metrics.auc(fpr, tpr) plt.text(-35, -70, "AUC: %.3f" % roc_auc, ha="right") print("\n Area under the ROC curve : %f" % roc_auc) print("\ntime elapsed: %.2fs" % (time() - t0)) plot_species_distribution() plt.show()**
用的是Spyder,程式碼本身無報錯,但輸出仍舊是:
**runfile('C:/Users/Administrator/Desktop/untitled1.py', wdir='C:/Users/Administrator/Desktop') Built-in functions, exceptions, and other objects. Noteworthy: None is the `nil' object; Ellipsis represents `...' in slices. Traceback (most recent call last): File "C:\Users\Administrator\Desktop\untitled1.py", line 154, in <module> plot_species_distribution() File "C:\Users\Administrator\Desktop\untitled1.py", line 57, in plot_species_distribution xgrid, ygrid = construct_grids(data) File "E:\ANACONDA\lib\site-packages\sklearn\datasets\_species_distributions.py", line 128, in construct_grids xmin = batch.x_left_lower_corner + batch.grid_size AttributeError: 'str' object has no attribute 'x_left_lower_corner'**