展开

plink、gemma及可视化

最后发布时间 : 2024-07-21 12:48:25 浏览量 :
docker run --rm -it   -v $PWD:$PWD -w $PWD \
	registry.cn-hangzhou.aliyuncs.com/wybioinfo/tidyverse:4.3 bash 

docker run --user 0:0  --rm -it  -v $PWD:$PWD -w $PWD \
	biocontainers/plink1.9:v1.90b6.6-181012-1-deb_cv1 bash
gemma=/workspace/Foxtail-millet-data-analysis/gemma-0.98.5-linux-static-AMD64
plink=/workspace/Foxtail-millet-data-analysis/plink
name=`head -n 310 OTU.rank.phe.id | awk '{print $1}' | tail -n 1`
# OTU_48
mkdir -p  host_OTU/OTU_48
$gemma --bfile ./millet.snp.gwas -p ./OTU.rank.phe -miss 0.1 -maf 0.01 -gk -outdir host_OTU/OTU_48 -o snp.kinship

$plink --bfile ./millet.snp.gwas --pca 10 --out host_OTU/OTU_48/PC10
cut -d ' ' -f 3- host_OTU/OTU_48/PC10.eigenvec | sed 's/^/1 /' > host_OTU/OTU_48/snp_PC10

$gemma --bfile ./millet.snp.gwas -p ./OTU.rank.phe -miss 0.1 -n 310 -k host_OTU/OTU_48/snp.kinship.cXX.txt -c host_OTU/OTU_48/snp_PC10 -lmm -outdir  host_OTU/OTU_48 -o OTU_48

echo "SNP CHROM BP P" > host_OTU/OTU_48/OTU_48.assoc.for_plot.txt
tail -n +2 host_OTU/OTU_48/OTU_48.assoc.txt | perl -ne '@tmp=split; print "si$tmp[0]:$tmp[2] $tmp[0] $tmp[2] $tmp[11]\n"' >> host_OTU/OTU_48/OTU_48.assoc.for_plot.txt

文章github

> data(pig60K)   #calculated p-values by MLM
> data(cattle50K)   #calculated SNP effects by rrblup
> head(pig60K)

          SNP Chromosome Position    trait1     trait2     trait3
1 ALGA0000009          1    52297 0.7738187 0.51194318 0.51194318
2 ALGA0000014          1    79763 0.7738187 0.51194318 0.51194318
3 ALGA0000021          1   209568 0.7583016 0.98405289 0.98405289
4 ALGA0000022          1   292758 0.7200305 0.48887140 0.48887140
5 ALGA0000046          1   747831 0.9736840 0.22096836 0.22096836
6 ALGA0000047          1   761957 0.9174565 0.05753712 0.05753712

> head(cattle50K)

   SNP chr    pos Somatic cell score  Milk yield Fat percentage
1 SNP1   1  59082        0.000244361 0.000484255    0.001379210
2 SNP2   1 118164        0.000532272 0.000039800    0.000598951
3 SNP3   1 177246        0.001633058 0.000311645    0.000279427
4 SNP4   1 236328        0.001412865 0.000909370    0.001040161
5 SNP5   1 295410        0.000090700 0.002202973    0.000351394
6 SNP6   1 354493        0.000110681 0.000342628    0.000105792

作为样本数据集,前三列分别是 SNPs 的名称、染色体、位置,其余列为 GWAS 的 pvalue 或 GS/GP 对性状的影响,性状的数量是无限的。注意: 如果绘制 SNP _ Missouri,只需要前三列。

library(CMplot)
df <- read.table("~/Foxtail-millet-data-analysis/Figure1_and_Figure5/data/OTU_106.assoc.txt",header = T)
CMplot(head(df,n = 2000), plot.type = "m", LOG10=TRUE, cex=0.4, threshold=2.2E-5,, threshold.lty=2, threshold.col="black",  amplify=TRUE, signal.col="red", signal.cex=1,width=14, bin.size=1e6, chr.den.col=c("darkgreen", "yellow", "red"), verbose=TRUE, height=5, file="pdf")
data(pig60K) 
head(pig60K)
head(df)
CMplot(pig60K,type="p",plot.type="m",LOG10=TRUE,threshold=NULL,file="jpg",file.name="",dpi=300,
       file.output=TRUE,verbose=TRUE,width=14,height=6,chr.labels.angle=45)

https://github.com/genetics-statistics/GEMMA