参考基因组及注释文件

最后发布时间:2022-12-20 15:33:06 浏览量:

统计基因组中碱基的个数

cat ${testData}/RNA-seq/genomic/chr22_with_ERCC92.fa | grep -v ">" | perl -ne 'chomp $_; $bases{$_}++ for split //; if (eof){print "$_ $bases{$_}\n" for sort keys %bases}'

GRCh37和GRCh38都是Genome Reference Consortium(GRC)的人类基因组组装。GRCh38(也称为“build 38”)是在2009年GRCh37发布四年后发布的,因此它可以被视为一个版本,其中包含对早期版本的更新注释。

  • GRCh38 Genome Reference Consortium Human Build 38 Organism:

参考基因的下载

GENCODE

图片alt

图片alt


图片alt

图片alt

hg19与hg38序列的比较

图片alt

图片alt


图片alt

图片alt


图片alt

图片alt

library(tidyverse)
library("scales")
library(rtracklayer)

# gtf_data = import('reference/gencode.v39.annotation.gtf')
gtf_data = import('reference/gencode.v19.annotation.gtf.gz')

gtf_data = as.data.frame(gtf_data)
write_tsv(gtf_data, file="gtf_data.tsv")
gtf_data <- read_tsv("gtf_data.tsv")
chrom_order <- c("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", 
                 "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", 
                 "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", 
                 "chr22", "chrX", "chrY", "chrM")
chrom_key <- setNames(object = as.character(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 
                                              12, 13, 14, 15, 16, 17, 18, 19, 20, 
                                              21, 22, 23, 24, 25)), 
                      nm = chrom_order)
chrom_order <- factor(x = chrom_order, levels = rev(chrom_order))
chrom_sizes2 <- gtf_data |>
    mutate(chromosome=seqnames) |>
    group_by(chromosome) |>
    summarise(size=max(end)-min(start))
chrom_sizes2$chromosome <-  factor(x=chrom_sizes2$chromosome, levels = chrom_order)

sample_cns <- gtf_data |>
    filter(gene_type == "protein_coding") |>
    select(chromosome=seqnames,start,end,gene_type) 
sample_cns$chromosome <-  factor(x=sample_cns$chromosome, levels = chrom_order)
ggplot(data = chrom_sizes2) + 
    # base rectangles for the chroms, with numeric value for each chrom on the x-axis
    geom_rect(aes(xmin = as.numeric(chromosome) - 0.2, 
                  xmax = as.numeric(chromosome) + 0.2, 
                  ymax = size, ymin = 0), 
              colour="black", fill = "white") + 
    # rotate the plot 90 degrees
    coord_flip() +
    theme(axis.text.x = element_text(colour = "black"), 
          panel.grid.major = element_blank(), 
          panel.grid.minor = element_blank(), 
          panel.background = element_blank(),legend.position="bottom") +
    scale_x_discrete(name = "chromosome", limits = names(chrom_key)) +
    geom_rect(data = sample_cns, aes(xmin = as.numeric(chromosome) - 0.2, 
                                     xmax = as.numeric(chromosome) + 0.2, 
                                     ymax = end, ymin = start)) +labs(title="gencode.GRCh37.p13.v19")

参考