学习资料
Linear model:y = b + \sum w_ix_i,其中x_i叫作fature,w_i叫作weight,b叫作bias用Loss function L,定义一个function的好坏
import numpy as np import matplotlib.pyplot as plt x_data = [338., 333., 328., 207., 226., 25., 179., 60., 208., 606.] y_data = [640., 633., 619., 393., 428., 27., 193., 66., 226., 1591.] x_d = np.asarray(x_data) y_d = np.asarray(y_data)
x = np.arange(-200, -100, 1) y = np.arange(-5, 5, 0.1) Z = np.zeros((len(x), len(y))) X, Y = np.meshgrid(x, y) # loss for i in range(len(x)): for j in range(len(y)): b = x[i] w = y[j] Z[j][i] = 0 # meshgrid吐出结果:y为行,x为列 for n in range(len(x_data)): Z[j][i] += (y_data[n] - b - w * x_data[n]) ** 2 Z[j][i] /= len(x_data)
b=-120 w=-4 lr = 0.0000001 iteration = 100000 b=-120 w=-4 lr = 3 iteration = 10000 b_history = [b] w_history = [w] lr_b = 0 lr_w=0 loss_history = [] import time start = time.time() for i in range(iteration): m = float(len(x_d)) y_hat = w * x_d +b loss = np.dot(y_d - y_hat, y_d - y_hat) / m grad_b = -2.0 * np.sum(y_d - y_hat) / m grad_w = -2.0 * np.dot(y_d - y_hat, x_d) / m lr_b = lr_b + grad_b **2 lr_w = lr_w + grad_w **2 # update param b -= lr/np.sqrt(lr_b) * grad_b w -= lr/np.sqrt(lr_w) * grad_w b_history.append(b) w_history.append(w) loss_history.append(loss) if i % 1000 == 0: print("Step %i, w: %0.4f, b: %.4f, Loss: %.4f" % (i, w, b, loss)) end = time.time() print("大约需要时间:",end-start)
# plot the figure plt.contourf(x, y, Z, 50, alpha=0.5, cmap=plt.get_cmap('jet')) # 填充等高线 plt.plot([-188.4], [2.67], 'x', ms=12, mew=3, color="orange") plt.plot(b_history, w_history, 'o-', ms=3, lw=1.5, color='black') plt.xlim(-200, -100) plt.ylim(-5, 5) plt.xlabel(r'$b$') plt.ylabel(r'$w$') plt.title("linear regression") plt.show()
from sklearn.decomposition import PCA from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score import numpy as np # 生成示例数据 X = np.random.rand(100, 10) # 特征矩阵,假设有10个特征 y = np.random.rand(100) # 目标变量 # 将数据集拆分为训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 使用PCA进行降维 pca = PCA(n_components=5) # 选择前5个主成分 X_train_pca = pca.fit_transform(X_train) X_test_pca = pca.transform(X_test) # 使用线性回归模型进行训练和预测 regression = LinearRegression() regression.fit(X_train_pca, y_train) y_pred = regression.predict(X_test_pca) # 计算R2分数(拟合优度) r2 = r2_score(y_test, y_pred) print("R2 score:", r2)