加载数据
library(tidyverse)
cjb <- read.csv("/home/wy/Downloads/cjb.csv",
header = TRUE,
stringsAsFactors = FALSE,
fileEncoding = "UTF-8")
plot3d(
x=cjb$sx,
y=cjb$wl,
z=cjb$sw,
xlab = "Math",
ylab="Physics",
zlab = "Biology",
type = "s",
size = 0.6,
col = c("red","green")[cjb$wlfk]
)
library(aplpack)
select_cols <- c("wl","hx","sw")
select_rows <- c(488,393,490,440,
287,289,292,293)
faces(cjb[select_rows,select_cols],
ncol.plot = 4,
nrow.plot = 2,
face.type =1)
effect of variables:
modified item Var
"height of face " "wl"
"width of face " "hx"
"structure of face" "sw"
"height of mouth " "wl"
"width of mouth " "hx"
"smiling " "sw"
"height of eyes " "wl"
"width of eyes " "hx"
"height of hair " "sw"
"width of hair " "wl"
"style of hair " "hx"
"height of nose " "sw"
"width of nose " "wl"
"width of ear " "hx"
"height of ear " "sw"
cjb_top_w <- cjb %>%
filter(wlfk=="文科") %>%
mutate(zcj = rowSums(.[4:12])) %>%
arrange(zcj) %>%
select(4:13) %>%
mutate_at(vars(yw:sw),jitter) %>%
head(n= 50)
cjb_top_l <- cjb %>%
filter(wlfk=="理科") %>%
mutate(zcj = rowSums(.[4:12])) %>%
arrange(zcj) %>%
select(4:13) %>%
mutate_at(vars(yw:sw),jitter) %>%
head(n= 50)
cjb_top <- rbind(cjb_top_w,cjb_top_l)
GGally::ggparcoord(cjb_top,columns =1:9,groupColumn = 10)+
geom_point()
breaks <- c(0,seq(50,100,len=11))
wl_sx_freq <- cjb %>%
select(wl,sx) %>%
mutate_at(
vars(wl,sx),
function(x){
cut(x,breaks = breaks)
}
)%>%
group_by(wl,sx) %>%
summarise(freq = n()) %>%
complete(wl,sx,fill = list(freq=0))
wl_sx_freq
# A tibble: 1,453 x 3
# Groups: wl [12]
# wl sx freq
# <fct> <fct> <dbl>
# 1 (0,50] (0,50] 1
# 2 (0,50] (50,55] 0
# 3 (0,50] (55,60] 0
# 4 (0,50] (60,65] 4
# 5 (0,50] (65,70] 4
# 6 (0,50] (70,75] 2
# 7 (0,50] (75,80] 0
# 8 (0,50] (80,85] 1
# 9 (0,50] (85,90] 0
# 10 (0,50] (90,95] 1
# … with 1,443 more rows
ggplot(wl_sx_freq,aes(x=wl,y=sx,fill=freq))+
geom_tile(colour="white",size = 0.5)+
geom_text(aes(label=freq),size=3)+
scale_fill_gradient(low = "white",high = "red")+
theme(axis.title.x = element_text(
angle = 90,
hjust = 1,
vjust = 0.5
))+
coord_fixed()
library(clustertend)
set.seed(2012)
scores <- cjb %>%
select(yw:sw)
n <- floor(nrow(cjb)*0.05)
hopkins_stat <- unlist(replicate(100,hopkins(scores,n)))
mean(hopkins_stat) # [1] 0.09244976