# https://zhuanlan.zhihu.com/p/700555198
library(tidyverse)
library(plyr)
data(titanic_train,package = "titanic")
# Survived:表示幸存与否的数字(1是幸存,0是死亡)
# Pclass:乘客是否住头等舱、二等舱或三等舱
# Sex:性别
# FamSize:船上亲属的总人数
# Fare:每位乘客所付票款
# Embarked:乘客出发港口的特征向量
data<-titanic_train[-c(62,830),] %>%
mutate_at(.vars = c("Sex","Pclass","Embarked"),.funs = factor) %>%
mutate(FamSize=SibSp+Parch)%>%
select(Survived,Pclass,Sex,FamSize,Fare,Embarked)
colnames(data)
#拆分数据:训练集和测试集
set.seed(111)
index <- sort(sample(nrow(data), nrow(data) * 0.7))
train <- data[index,] #训练集
test <- data[-index,] #测试集
train$Fare[train$Fare<1]=1 #有几个值为0,取对数变成负无穷,因此给它赋值1
train$Fare<-log2(train$Fare)
# 单因素分析
model<-glm(Survived==0 ~ Pclass,data=train,family = binomial())
summary(model)
#模型系数
cbind(coef=coef(model),confint(model))
#变量的OR值
exp(cbind(OR=coef(model),confint(model)))
# 从结果来看,相比于头等舱,二等舱和三等舱的人群死亡风险更高
# 批量单因素Logistic回归
uni_glm_model<-function(x){
FML<-as.formula(paste0("Survived== 0 ~",x))
glm1<-glm(FML,data=train,family = binomial)
glm2<-summary(glm1)
OR<-round(exp(coef(glm1)),2)
SE<-round(glm2$coefficients[,2],3)
CI2.5<-round(exp(coef(glm1)-1.96*SE),2)
CI97.5<-round(exp(coef(glm1)+1.96*SE),2)
CI<-paste0(CI2.5,"-",CI97.5)
B<-round(glm2$coefficients[,1],3)
Z<-round(glm2$coefficients[,3],3)
P<-round(glm2$coefficients[,4],3)
uni_glm_model<-data.frame("characteristics"=x,
B=B,
SE=SE,
OR=OR,
CI=CI,
Z=Z,
P=P)[-1,]
return(uni_glm_model)
}
uni_glm<-lapply(colnames(train)[2:6],uni_glm_model)
uni_glm<-ldply(uni_glm,data.frame)
# 多因素分析
# 因为变量不多,就把所有因素纳入到多因素分析。
model_m<-glm(Survived==0 ~ Pclass+Sex+Fare+Embarked+FamSize,
data=train,
family = binomial())
summary(model_m)
# Fare和Embarked变量p值不显著,FamSize变量在单因素分析不显著,但是在多因素分析显著,说明存在混杂因素的影响。
model_both<-step(model_m,direction = "both")
mul_glm<-summary(model_both)
mul_glm
https://mp.weixin.qq.com/s?__biz=MzI2OTQyMzc5MA==&mid=2247490396&idx=1&sn=b415ea07c997858b5791f08a6e6bb35b&chksm=eae1de9ddd96578bb77a8b15b13c80e377e9483292ec12a248624d9bb2cf2ec291e8cd19bdd7&scene=21#wechat_redirect
https://blog.csdn.net/weixin_43843918/article/details/135163071
https://zhuanlan.zhihu.com/p/660756933
https://blog.csdn.net/Dr_long1996/article/details/134881348