PDF预览

染色体上 Reads 分布图
- 方式一
- 方式二

染色体上 Reads 分布图

Reads在参考基因组各染色体上的分布情况，一方面有利于了解本次测序结果中reads的覆盖情况，另一方面也能够获得转录活性高低及其分布情况。
具体作图方法为：

设置滑动窗口长度为1000 bp，计算窗口内部比对到各个碱基位置的reads数；
选定窗口内各碱基中reads数的中值；
对各窗口reads数中值以2为底进行对数转换后绘图。

方式一

图中，横坐标为染色体的长度信息，纵坐标为计算得到的reads密度，其中绿色为正链，红色为负链。

 bedtools makewindows -b genome.gtf  -w 1000 > genome.window.bed

genome.gtf

seq_id	source	type	start	end	score	strand	phase	attributes
1	ensembl	gene	36112690	36122387	.	-	.	gene_id "ENSRNOG00000066169"; gene_version "1"; gene_source "ensembl"; gene_biotype "protein_coding";
1	ensembl	transcript	36112690	36122387	.	-	.	gene_id "ENSRNOG00000066169"; gene_version "1"; transcript_id "ENSRNOT00000101581"; transcript_version "1"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "Ensembl_canonical";
1	ensembl	exon	36122324	36122387	.	-	.	gene_id "ENSRNOG00000066169"; gene_version "1"; transcript_id "ENSRNOT00000101581"; transcript_version "1"; exon_number "1"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSRNOE00000618632"; exon_version "1"; tag "Ensembl_canonical";
1	ensembl	CDS	36122324	36122387	.	-	0	gene_id "ENSRNOG00000066169"; gene_version "1"; transcript_id "ENSRNOT00000101581"; transcript_version "1"; exon_number "1"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSRNOP00000083062"; protein_version "1"; tag "Ensembl_canonical";
1	ensembl	exon	36121478	36121512	.	-	.	gene_id "ENSRNOG00000066169"; gene_version "1"; transcript_id "ENSRNOT00000101581"; transcript_version "1"; exon_number "2"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSRNOE00000610554"; exon_version "1"; tag "Ensembl_canonical";

genome.window.bed

chrom	chromStart	chromEnd
1	36113689	36114689
1	36114689	36115689
1	36115689	36116689
1	36116689	36117689
1	36117689	36118689
1	36118689	36119689
1	36119689	36120689
1	36120689	36121689
1	36121689	36122387

awk '{print $1"\t"$2"\t"$3"\t0\t0\t+\n"$1"\t"$2"\t"$3"\t0\t0\t-"}'  genome.window.bed >  genome.window.bed.region


1	36112689	36113689	-
1	36113689	36114689	+
1	36113689	36114689	-
1	36114689	36115689	+
1	36114689	36115689	-
1	36115689	36116689	+
1	36115689	36116689	-
1	36116689	36117689	+
1	36116689	36117689	-

bedtools bamtobed -i hisat2.bam   > hisat2.bed

chr	start	end	QNAME	MAPQ	strand
1	389	539	E100050738L1C006R03102745350/1	60	-
1	1421	1571	E100050738L1C009R04003484420/1	1	+
1	1421	1571	E100050738L1C009R04003484420/1	1	+
1	1472	1622	E100050738L1C039R03004028531/1	1	+
1	1477	1627	E100050738L1C009R04003484420/2	1	-
1	3060	3209	E100050738L1C025R02303789340/1	60	+
1	3060	3209	E100050738L1C025R02303789340/2	60	-
1	4456	4606	E100050738L1C042R02103200309/1	60	+
1	5225	5375	E100050738L1C020R01301096306/1	60	-

bedtools coverage -S -a genome.window.bed.region -b hisat2.bed > genome.window.bed.region.cov

chr	star	end		reds 在char区域次数	匹配的总长度	该区域总长度	比例
1	36112689	36113689	-	4	1000	1000	1.0000000
1	36113689	36114689	+	7	1000	1000	1.0000000
1	36113689	36114689	-	2	1000	1000	1.0000000
1	36114689	36115689	+	7	1000	1000	1.0000000
1	36114689	36115689	-	1	1000	1000	1.0000000
1	36115689	36116689	+	6	1000	1000	1.0000000
1	36115689	36116689	-	2	1000	1000	1.0000000
1	36116689	36117689	+	7	1000	1000	1.0000000
1	36116689	36117689	-	2	1000	1000	1.0000000


data <- read.table("/ssd1/wy/workspace/RNA-seq/workspace/resources/genome.window.bed.region.cov",sep="\t")
library(tidyverse)
data <- data  |>
    mutate(log=case_when(V6=="+"~log2(V7+1),
    V6!="+"~ -log2(V7+1))) 

chr <-c("chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22","chrX","chrY") #因为人类染色体还有很多非常规染色体，如果全部画出来会不好看，所有此处只画出常规染色体。也可以在文件hg19.txt里面只输入常规染色体。data2<- data[data$V1 %in% chr,]
data2<- data[data$V1 %in% c(c(1:20),"X","Y","MT"),]
color <- c("+"="#008B00","-"="#FF8247") # 定义正负链颜色
png(file="a.png")
ggplot(data2)+
    facet_grid(V1~.)+
    geom_point(aes(V2/1000000,log,colour=V6),size=1)+
    theme(
    strip.text.y=element_text(angle=0,face="bold",hjust=0),
    legend.position="none",panel.grid.minor=element_blank(),
    panel.grid.major=element_blank(),
    plot.title=element_text(size=20,face="bold"),
    axis.text.y=element_text(size=10,angle=50,face="bold"),
    strip.background=element_blank(),
    panel.background=element_rect(fill="white"),
    axis.line=element_line(linetype=1))+
    scale_colour_manual(values=color)+
    scale_y_continuous(breaks=c(-15,15))+
    labs(title="Reads Density in Chromosomes")+
    xlab("chromosome position(Mb)")+
    ylab("reads density(log2)")
dev.off()

方式二

https://bioconductor.org/packages/release/bioc/vignettes/trackViewer/inst/doc/trackViewer.html