线性模型-回归
最后发布时间 : 2023-10-11 22:12:04
浏览量 :
学习资料
线性回归
- 数据:工资(特征)
- 目标:预测银行会贷款给我多少钱(标签)
工资 | 额度 |
---|---|
4000 | 20000 |
5000 | 30000 |
6000 | 40000 |
Linear model:y = b + \sum w_ix_i,其中x_i叫作fature,w_i叫作weight,b叫作bias
用Loss function L,定义一个function的好坏
回归演示
import numpy as np
import matplotlib.pyplot as plt
x_data = [338., 333., 328., 207., 226., 25., 179., 60., 208., 606.]
y_data = [640., 633., 619., 393., 428., 27., 193., 66., 226., 1591.]
x_d = np.asarray(x_data)
y_d = np.asarray(y_data)
x = np.arange(-200, -100, 1)
y = np.arange(-5, 5, 0.1)
Z = np.zeros((len(x), len(y)))
X, Y = np.meshgrid(x, y)
# loss
for i in range(len(x)):
for j in range(len(y)):
b = x[i]
w = y[j]
Z[j][i] = 0 # meshgrid吐出结果:y为行,x为列
for n in range(len(x_data)):
Z[j][i] += (y_data[n] - b - w * x_data[n]) ** 2
Z[j][i] /= len(x_data)
b=-120
w=-4
lr = 0.0000001
iteration = 100000
b=-120
w=-4
lr = 3
iteration = 10000
b_history = [b]
w_history = [w]
lr_b = 0
lr_w=0
loss_history = []
import time
start = time.time()
for i in range(iteration):
m = float(len(x_d))
y_hat = w * x_d +b
loss = np.dot(y_d - y_hat, y_d - y_hat) / m
grad_b = -2.0 * np.sum(y_d - y_hat) / m
grad_w = -2.0 * np.dot(y_d - y_hat, x_d) / m
lr_b = lr_b + grad_b **2
lr_w = lr_w + grad_w **2
# update param
b -= lr/np.sqrt(lr_b) * grad_b
w -= lr/np.sqrt(lr_w) * grad_w
b_history.append(b)
w_history.append(w)
loss_history.append(loss)
if i % 1000 == 0:
print("Step %i, w: %0.4f, b: %.4f, Loss: %.4f" % (i, w, b, loss))
end = time.time()
print("大约需要时间:",end-start)
# plot the figure
plt.contourf(x, y, Z, 50, alpha=0.5, cmap=plt.get_cmap('jet')) # 填充等高线
plt.plot([-188.4], [2.67], 'x', ms=12, mew=3, color="orange")
plt.plot(b_history, w_history, 'o-', ms=3, lw=1.5, color='black')
plt.xlim(-200, -100)
plt.ylim(-5, 5)
plt.xlabel(r'$b$')
plt.ylabel(r'$w$')
plt.title("linear regression")
plt.show()
主成分回归
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import numpy as np
# 生成示例数据
X = np.random.rand(100, 10) # 特征矩阵,假设有10个特征
y = np.random.rand(100) # 目标变量
# 将数据集拆分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 使用PCA进行降维
pca = PCA(n_components=5) # 选择前5个主成分
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
# 使用线性回归模型进行训练和预测
regression = LinearRegression()
regression.fit(X_train_pca, y_train)
y_pred = regression.predict(X_test_pca)
# 计算R2分数(拟合优度)
r2 = r2_score(y_test, y_pred)
print("R2 score:", r2)