-
for removing terms;:
for interaction;*
for crossing;%in%
for nesting; And^
for limit crossing to the specified degree.# Set seed
set.seed(123)
# Data
x = rnorm(5)
x2 = rnorm(5)
y = rnorm(5)
# Model frame
model.frame(y ~ x * x2, data = data.frame(x = x, y = y, x2=x2))
y x x2
1 1.2240818 -0.56047565 1.7150650
2 0.3598138 -0.23017749 0.4609162
3 0.4007715 1.55870831 -1.2650612
4 0.1106827 0.07050839 -0.6868529
5 -0.5558411 0.12928774 -0.4456620
model.frame(y ~ x + x2 + x:x2, data = data.frame(x = x, y = y, x2=x2))
y x x2
1 1.2240818 -0.56047565 1.7150650
2 0.3598138 -0.23017749 0.4609162
3 0.4007715 1.55870831 -1.2650612
4 0.1106827 0.07050839 -0.6868529
5 -0.5558411 0.12928774 -0.4456620
model.frame( y ~ x + x^2, data = data.frame(x = rnorm(5), y = rnorm(5)))
model.frame( y ~ x + I(x^2), data = data.frame(x = rnorm(5), y = rnorm(5)))
y x I(x^2)
1 -1.0678237 1.7869131 3.193058....
2 -0.2179749 0.4978505 0.247855....
3 -1.0260044 -1.9666172 3.867583....
4 -0.7288912 0.7013559 0.491900....
5 -0.6250393 -0.4727914 0.223531....
fit1 <- lm(loss ~ hours * effort, data = dat)
summary(fit1)
等价于
fit1 <- lm(loss ~ hours + effort + hours : effort, data = dat)
summary(fit1)
等价于
dat$hours_effort <- dat$hours * dat$effort
fit1 <- lm(loss ~ hours + effort + hours_effort, data = dat)
summary(fit1)
dat$gender <- relevel(dat$gender, ref = "female")
fit2 <- lm(loss ~ hours * gender, data = dat)
summary(fit2)
dat$hours_gender_male <- dat$hours * (dat$gender == "male")
fit2 <- lm(loss ~ hours + gender + hours_gender_male, data = dat)
summary(fit2)
dat$prog <- relevel(dat$prog, ref = "read")
fit3 <- lm(loss ~ hours * prog, data = dat)
summary(fit3)
dat$hours_prog_jog <- dat$hours * (dat$prog == "jog")
dat$hours_prog_swim <- dat$hours * (dat$prog == "swim")
fit3 <- lm(loss ~ hours + prog + hours_prog_jog + hours_prog_swim, data = dat)
summary(fit3)
dat$gender <- relevel(dat$gender, ref = "female")
dat$prog <- relevel(dat$prog, ref = "read")
fit4 <- lm(loss ~ gender * prog, data = dat)
summary(fit4)
dat$gender_male_prog_jog <- (dat$gender == "male") * (dat$prog == "jog")
dat$gender_male_prog_swim <- (dat$gender == "male") * (dat$prog == "swim")
fit4 <- lm(loss ~ gender + prog + gender_male_prog_jog + gender_male_prog_swim, data = dat)
summary(fit4)
dat$effort_cat <- cut(dat$effort, breaks = c(0, 25, 35, Inf), labels = c("low", "medium", "high"))
dat$effort_cat <- relevel(dat$effort_cat, ref = "low")
dat$prog <- relevel(dat$prog, ref = "read")
fit5 <- lm(loss ~ effort_cat * prog, data = dat)
summary(fit5)
dat$effort_cat_medium_prog_jog <- (dat$effort_cat == "medium") * (dat$prog == "jog")
dat$effort_cat_high_prog_jog <- (dat$effort_cat == "high") * (dat$prog == "jog")
dat$effort_cat_medium_prog_swim <- (dat$effort_cat == "medium") * (dat$prog == "swim")
dat$effort_cat_high_prog_swim <- (dat$effort_cat == "high") * (dat$prog == "swim")
fit5 <- lm(loss ~ effort_cat + prog + effort_cat_medium_prog_jog + effort_cat_high_prog_jog +
effort_cat_medium_prog_swim + effort_cat_high_prog_swim, data = dat)
summary(fit5)
https://www.datacamp.com/tutorial/r-formula-tutorial
https://f1000research.com/articles/9-1444
https://zhuanlan.zhihu.com/p/460060330