单因素logistic回归

最后发布时间:2024-06-13 20:20:47 浏览量:

生信小木屋

# https://zhuanlan.zhihu.com/p/700555198
library(tidyverse)
library(plyr)
data(titanic_train,package = "titanic") 

# Survived:表示幸存与否的数字(1是幸存,0是死亡) 
# Pclass:乘客是否住头等舱、二等舱或三等舱 
# Sex:性别 
# FamSize:船上亲属的总人数 
# Fare:每位乘客所付票款 
# Embarked:乘客出发港口的特征向量

data<-titanic_train[-c(62,830),] %>%
  mutate_at(.vars = c("Sex","Pclass","Embarked"),.funs = factor) %>%
  mutate(FamSize=SibSp+Parch)%>%
  select(Survived,Pclass,Sex,FamSize,Fare,Embarked)

colnames(data)


#拆分数据:训练集和测试集
set.seed(111)
index <- sort(sample(nrow(data), nrow(data) * 0.7))
train <- data[index,] #训练集
test <- data[-index,] #测试集
train$Fare[train$Fare<1]=1  #有几个值为0,取对数变成负无穷,因此给它赋值1
train$Fare<-log2(train$Fare)

# 单因素分析
model<-glm(Survived==0 ~ Pclass,data=train,family = binomial())
summary(model)
#模型系数
cbind(coef=coef(model),confint(model))
#变量的OR值
exp(cbind(OR=coef(model),confint(model)))

# 从结果来看,相比于头等舱,二等舱和三等舱的人群死亡风险更高

# 批量单因素Logistic回归

uni_glm_model<-function(x){
  FML<-as.formula(paste0("Survived== 0 ~",x))
  glm1<-glm(FML,data=train,family = binomial)
  glm2<-summary(glm1)
  OR<-round(exp(coef(glm1)),2)
  SE<-round(glm2$coefficients[,2],3)
  CI2.5<-round(exp(coef(glm1)-1.96*SE),2)
  CI97.5<-round(exp(coef(glm1)+1.96*SE),2)
  CI<-paste0(CI2.5,"-",CI97.5)
  B<-round(glm2$coefficients[,1],3)
  Z<-round(glm2$coefficients[,3],3)
  P<-round(glm2$coefficients[,4],3)
  
  uni_glm_model<-data.frame("characteristics"=x,
                            B=B,
                            SE=SE,
                            OR=OR,
                            CI=CI,
                            Z=Z,
                            P=P)[-1,]
  return(uni_glm_model)
}
uni_glm<-lapply(colnames(train)[2:6],uni_glm_model)
uni_glm<-ldply(uni_glm,data.frame) 


# 多因素分析
# 因为变量不多,就把所有因素纳入到多因素分析。
model_m<-glm(Survived==0 ~ Pclass+Sex+Fare+Embarked+FamSize,
             data=train,
             family = binomial())
summary(model_m)
# Fare和Embarked变量p值不显著,FamSize变量在单因素分析不显著,但是在多因素分析显著,说明存在混杂因素的影响。

model_both<-step(model_m,direction = "both")
mul_glm<-summary(model_both)
mul_glm

https://mp.weixin.qq.com/s?__biz=MzI2OTQyMzc5MA==&mid=2247490396&idx=1&sn=b415ea07c997858b5791f08a6e6bb35b&chksm=eae1de9ddd96578bb77a8b15b13c80e377e9483292ec12a248624d9bb2cf2ec291e8cd19bdd7&scene=21#wechat_redirect
https://blog.csdn.net/weixin_43843918/article/details/135163071

https://zhuanlan.zhihu.com/p/660756933

https://blog.csdn.net/Dr_long1996/article/details/134881348

快捷入口
生物统计学 思维导图 浏览PDF 下载PDF
分享到:
标签