Reads在参考基因组各染色体上的分布情况,一方面有利于了解本次测序结果中reads的覆盖情况,另一方面也能够获得转录活性高低及其分布情况。
具体作图方法为:
图中,横坐标为染色体的长度信息,纵坐标为计算得到的reads密度,其中绿色为正链,红色为负链。
bedtools makewindows -b genome.gtf -w 1000 > genome.window.bed
genome.gtf
seq_id | source | type | start | end | score | strand | phase | attributes |
---|---|---|---|---|---|---|---|---|
1 | ensembl | gene | 36112690 | 36122387 | . | - | . | gene_id "ENSRNOG00000066169"; gene_version "1"; gene_source "ensembl"; gene_biotype "protein_coding"; |
1 | ensembl | transcript | 36112690 | 36122387 | . | - | . | gene_id "ENSRNOG00000066169"; gene_version "1"; transcript_id "ENSRNOT00000101581"; transcript_version "1"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "Ensembl_canonical"; |
1 | ensembl | exon | 36122324 | 36122387 | . | - | . | gene_id "ENSRNOG00000066169"; gene_version "1"; transcript_id "ENSRNOT00000101581"; transcript_version "1"; exon_number "1"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSRNOE00000618632"; exon_version "1"; tag "Ensembl_canonical"; |
1 | ensembl | CDS | 36122324 | 36122387 | . | - | 0 | gene_id "ENSRNOG00000066169"; gene_version "1"; transcript_id "ENSRNOT00000101581"; transcript_version "1"; exon_number "1"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSRNOP00000083062"; protein_version "1"; tag "Ensembl_canonical"; |
1 | ensembl | exon | 36121478 | 36121512 | . | - | . | gene_id "ENSRNOG00000066169"; gene_version "1"; transcript_id "ENSRNOT00000101581"; transcript_version "1"; exon_number "2"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSRNOE00000610554"; exon_version "1"; tag "Ensembl_canonical"; |
genome.window.bed
chrom | chromStart | chromEnd |
---|---|---|
1 | 36113689 | 36114689 |
1 | 36114689 | 36115689 |
1 | 36115689 | 36116689 |
1 | 36116689 | 36117689 |
1 | 36117689 | 36118689 |
1 | 36118689 | 36119689 |
1 | 36119689 | 36120689 |
1 | 36120689 | 36121689 |
1 | 36121689 | 36122387 |
awk '{print $1"\t"$2"\t"$3"\t0\t0\t+\n"$1"\t"$2"\t"$3"\t0\t0\t-"}' genome.window.bed > genome.window.bed.region
1 | 36112689 | 36113689 | 0 | 0 | - |
1 | 36113689 | 36114689 | 0 | 0 | + |
1 | 36113689 | 36114689 | 0 | 0 | - |
1 | 36114689 | 36115689 | 0 | 0 | + |
1 | 36114689 | 36115689 | 0 | 0 | - |
1 | 36115689 | 36116689 | 0 | 0 | + |
1 | 36115689 | 36116689 | 0 | 0 | - |
1 | 36116689 | 36117689 | 0 | 0 | + |
1 | 36116689 | 36117689 | 0 | 0 | - |
bedtools bamtobed -i hisat2.bam > hisat2.bed
chr | start | end | QNAME | MAPQ | strand |
---|---|---|---|---|---|
1 | 389 | 539 | E100050738L1C006R03102745350/1 | 60 | - |
1 | 1421 | 1571 | E100050738L1C009R04003484420/1 | 1 | + |
1 | 1421 | 1571 | E100050738L1C009R04003484420/1 | 1 | + |
1 | 1472 | 1622 | E100050738L1C039R03004028531/1 | 1 | + |
1 | 1477 | 1627 | E100050738L1C009R04003484420/2 | 1 | - |
1 | 3060 | 3209 | E100050738L1C025R02303789340/1 | 60 | + |
1 | 3060 | 3209 | E100050738L1C025R02303789340/2 | 60 | - |
1 | 4456 | 4606 | E100050738L1C042R02103200309/1 | 60 | + |
1 | 5225 | 5375 | E100050738L1C020R01301096306/1 | 60 | - |
bedtools coverage -S -a genome.window.bed.region -b hisat2.bed > genome.window.bed.region.cov
chr | star | end | reds 在char区域次数 | 匹配的总长度 | 该区域总长度 | 比例 | |||
---|---|---|---|---|---|---|---|---|---|
1 | 36112689 | 36113689 | 0 | 0 | - | 4 | 1000 | 1000 | 1.0000000 |
1 | 36113689 | 36114689 | 0 | 0 | + | 7 | 1000 | 1000 | 1.0000000 |
1 | 36113689 | 36114689 | 0 | 0 | - | 2 | 1000 | 1000 | 1.0000000 |
1 | 36114689 | 36115689 | 0 | 0 | + | 7 | 1000 | 1000 | 1.0000000 |
1 | 36114689 | 36115689 | 0 | 0 | - | 1 | 1000 | 1000 | 1.0000000 |
1 | 36115689 | 36116689 | 0 | 0 | + | 6 | 1000 | 1000 | 1.0000000 |
1 | 36115689 | 36116689 | 0 | 0 | - | 2 | 1000 | 1000 | 1.0000000 |
1 | 36116689 | 36117689 | 0 | 0 | + | 7 | 1000 | 1000 | 1.0000000 |
1 | 36116689 | 36117689 | 0 | 0 | - | 2 | 1000 | 1000 | 1.0000000 |
data <- read.table("/ssd1/wy/workspace/RNA-seq/workspace/resources/genome.window.bed.region.cov",sep="\t")
library(tidyverse)
data <- data |>
mutate(log=case_when(V6=="+"~log2(V7+1),
V6!="+"~ -log2(V7+1)))
chr <-c("chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22","chrX","chrY") #因为人类染色体还有很多非常规染色体,如果全部画出来会不好看,所有此处只画出常规染色体。也可以在文件hg19.txt里面只输入常规染色体。data2<- data[data$V1 %in% chr,]
data2<- data[data$V1 %in% c(c(1:20),"X","Y","MT"),]
color <- c("+"="#008B00","-"="#FF8247") # 定义正负链颜色
png(file="a.png")
ggplot(data2)+
facet_grid(V1~.)+
geom_point(aes(V2/1000000,log,colour=V6),size=1)+
theme(
strip.text.y=element_text(angle=0,face="bold",hjust=0),
legend.position="none",panel.grid.minor=element_blank(),
panel.grid.major=element_blank(),
plot.title=element_text(size=20,face="bold"),
axis.text.y=element_text(size=10,angle=50,face="bold"),
strip.background=element_blank(),
panel.background=element_rect(fill="white"),
axis.line=element_line(linetype=1))+
scale_colour_manual(values=color)+
scale_y_continuous(breaks=c(-15,15))+
labs(title="Reads Density in Chromosomes")+
xlab("chromosome position(Mb)")+
ylab("reads density(log2)")
dev.off()
https://bioconductor.org/packages/release/bioc/vignettes/trackViewer/inst/doc/trackViewer.html