scikit-learn 線性迴歸擬合正弦函式,預測房價
阿新 • • 發佈:2018-12-09
隨書程式碼,閱讀筆記。
- 線性迴歸擬合正弦函式
%matplotlib inline import matplotlib.pyplot as plt import numpy as np n_dots = 200 X = np.linspace(-2 * np.pi, 2 * np.pi, n_dots) Y = np.sin(X) + 0.2 * np.random.rand(n_dots) - 0.1 X = X.reshape(-1, 1) Y = Y.reshape(-1, 1); from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import Pipeline def polynomial_model(degree=1): polynomial_features = PolynomialFeatures(degree=degree, include_bias=False) linear_regression = LinearRegression(normalize=True) pipeline = Pipeline([("polynomial_features", polynomial_features), ("linear_regression", linear_regression)]) return pipeline from sklearn.metrics import mean_squared_error degrees = [2, 3, 5, 10] results = [] for d in degrees: model = polynomial_model(degree=d) model.fit(X, Y) train_score = model.score(X, Y) mse = mean_squared_error(Y, model.predict(X)) results.append({"model": model, "degree": d, "score": train_score, "mse": mse}) for r in results: print("degree: {}; train score: {}; mean squared error: {}".format(r["degree"], r["score"], r["mse"])) : from matplotlib.figure import SubplotParams plt.figure(figsize=(12, 6), dpi=200, subplotpars=SubplotParams(hspace=0.3)) for i, r in enumerate(results): fig = plt.subplot(2, 2, i+1) plt.xlim(-8, 8) plt.title("LinearRegression degree={}".format(r["degree"])) plt.scatter(X, Y, s=5, c='b', alpha=0.5) plt.plot(X, r["model"].predict(X), 'r-')
- 預測房價
%matplotlib inline import matplotlib.pyplot as plt import numpy as np from sklearn.datasets import load_boston boston = load_boston() X = boston.data y = boston.target X.shape boston.feature_names from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3) import time from sklearn.linear_model import LinearRegression model = LinearRegression() #model = LinearRegression(normalize=True) #歸一化,能加快演算法收斂速度,優化演算法訓練效率,無法提升演算法準確性 start = time.clock() model.fit(X_train, y_train) train_score = model.score(X_train, y_train) cv_score = model.score(X_test, y_test) print('elaspe: {0:.6f}; train_score: {1:0.6f}; cv_score: {2:.6f}'.format(time.clock()-start, train_score, cv_score)) from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import Pipeline def polynomial_model(degree=1): polynomial_features = PolynomialFeatures(degree=degree, include_bias=False) linear_regression = LinearRegression(normalize=True) pipeline = Pipeline([("polynomial_features", polynomial_features), ("linear_regression", linear_regression)]) return pipeline model = polynomial_model(degree=2) start = time.clock() model.fit(X_train, y_train) train_score = model.score(X_train, y_train) cv_score = model.score(X_test, y_test) print('elaspe: {0:.6f}; train_score: {1:0.6f}; cv_score: {2:.6f}'.format(time.clock()-start, train_score, cv_score)) #elaspe: 0.016412; train_score: 0.930547; cv_score: 0.860465 #畫出學習曲線 from common.utils import plot_learning_curve from sklearn.model_selection import ShuffleSplit cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0) plt.figure(figsize=(18, 4), dpi=200) title = 'Learning Curves (degree={0})' degrees = [1, 2, 3] start = time.clock() plt.figure(figsize=(18, 4), dpi=200) for i in range(len(degrees)): plt.subplot(1, 3, i + 1) plot_learning_curve(plt, polynomial_model(degrees[i]), title.format(degrees[i]), X, y, ylim=(0.01, 1.01), cv=cv) print('elaspe: {0:.6f}'.format(time.clock()-start))
多項式的階數對訓練模型效能影響很大,階數低,容易欠擬合,階數高,容易過擬合。