染色体上 Reads 分布图

最后发布时间:2023-02-18 19:55:13 浏览量:

染色体上 Reads 分布图

Reads在参考基因组各染色体上的分布情况,一方面有利于了解本次测序结果中reads的覆盖情况,另一方面也能够获得转录活性高低及其分布情况。
具体作图方法为:

  • 设置滑动窗口长度为1000 bp,计算窗口内部比对到各个碱基位置的reads数;
  • 选定窗口内各碱基中reads数的中值;
  • 对各窗口reads数中值以2为底进行对数转换后绘图。

方式一

生信小木屋

图中,横坐标为染色体的长度信息,纵坐标为计算得到的reads密度,其中绿色为正链,红色为负链。

 bedtools makewindows -b genome.gtf  -w 1000 > genome.window.bed

genome.gtf

seq_idsourcetypestartendscorestrandphaseattributes
1ensemblgene3611269036122387.-.gene_id "ENSRNOG00000066169"; gene_version "1"; gene_source "ensembl"; gene_biotype "protein_coding";
1ensembltranscript3611269036122387.-.gene_id "ENSRNOG00000066169"; gene_version "1"; transcript_id "ENSRNOT00000101581"; transcript_version "1"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "Ensembl_canonical";
1ensemblexon3612232436122387.-.gene_id "ENSRNOG00000066169"; gene_version "1"; transcript_id "ENSRNOT00000101581"; transcript_version "1"; exon_number "1"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSRNOE00000618632"; exon_version "1"; tag "Ensembl_canonical";
1ensemblCDS3612232436122387.-0gene_id "ENSRNOG00000066169"; gene_version "1"; transcript_id "ENSRNOT00000101581"; transcript_version "1"; exon_number "1"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSRNOP00000083062"; protein_version "1"; tag "Ensembl_canonical";
1ensemblexon3612147836121512.-.gene_id "ENSRNOG00000066169"; gene_version "1"; transcript_id "ENSRNOT00000101581"; transcript_version "1"; exon_number "2"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSRNOE00000610554"; exon_version "1"; tag "Ensembl_canonical";

genome.window.bed

chromchromStartchromEnd
13611368936114689
13611468936115689
13611568936116689
13611668936117689
13611768936118689
13611868936119689
13611968936120689
13612068936121689
13612168936122387
awk '{print $1"\t"$2"\t"$3"\t0\t0\t+\n"$1"\t"$2"\t"$3"\t0\t0\t-"}'  genome.window.bed >  genome.window.bed.region
1361126893611368900-
1361136893611468900+
1361136893611468900-
1361146893611568900+
1361146893611568900-
1361156893611668900+
1361156893611668900-
1361166893611768900+
1361166893611768900-
bedtools bamtobed -i hisat2.bam   > hisat2.bed
chrstartendQNAMEMAPQstrand
1389539E100050738L1C006R03102745350/160-
114211571E100050738L1C009R04003484420/11+
114211571E100050738L1C009R04003484420/11+
114721622E100050738L1C039R03004028531/11+
114771627E100050738L1C009R04003484420/21-
130603209E100050738L1C025R02303789340/160+
130603209E100050738L1C025R02303789340/260-
144564606E100050738L1C042R02103200309/160+
152255375E100050738L1C020R01301096306/160-
bedtools coverage -S -a genome.window.bed.region -b hisat2.bed > genome.window.bed.region.cov
chrstarend reds 在char区域次数匹配的总长度该区域总长度比例
1361126893611368900-4100010001.0000000
1361136893611468900+7100010001.0000000
1361136893611468900-2100010001.0000000
1361146893611568900+7100010001.0000000
1361146893611568900-1100010001.0000000
1361156893611668900+6100010001.0000000
1361156893611668900-2100010001.0000000
1361166893611768900+7100010001.0000000
1361166893611768900-2100010001.0000000

data <- read.table("/ssd1/wy/workspace/RNA-seq/workspace/resources/genome.window.bed.region.cov",sep="\t")
library(tidyverse)
data <- data  |>
    mutate(log=case_when(V6=="+"~log2(V7+1),
    V6!="+"~ -log2(V7+1))) 

chr <-c("chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22","chrX","chrY") #因为人类染色体还有很多非常规染色体,如果全部画出来会不好看,所有此处只画出常规染色体。也可以在文件hg19.txt里面只输入常规染色体。data2<- data[data$V1 %in% chr,]
data2<- data[data$V1 %in% c(c(1:20),"X","Y","MT"),]
color <- c("+"="#008B00","-"="#FF8247") # 定义正负链颜色
png(file="a.png")
ggplot(data2)+
    facet_grid(V1~.)+
    geom_point(aes(V2/1000000,log,colour=V6),size=1)+
    theme(
    strip.text.y=element_text(angle=0,face="bold",hjust=0),
    legend.position="none",panel.grid.minor=element_blank(),
    panel.grid.major=element_blank(),
    plot.title=element_text(size=20,face="bold"),
    axis.text.y=element_text(size=10,angle=50,face="bold"),
    strip.background=element_blank(),
    panel.background=element_rect(fill="white"),
    axis.line=element_line(linetype=1))+
    scale_colour_manual(values=color)+
    scale_y_continuous(breaks=c(-15,15))+
    labs(title="Reads Density in Chromosomes")+
    xlab("chromosome position(Mb)")+
    ylab("reads density(log2)")
dev.off()

方式二

生信小木屋

https://bioconductor.org/packages/release/bioc/vignettes/trackViewer/inst/doc/trackViewer.html