展开

pipleline

最后发布时间 : 2023-04-07 17:27:31 浏览量 :

生信小木屋

GATK4_CREATESEQUENCEDICTIONARY

gatk --java-options "-Xmx6g" CreateSequenceDictionary \
    --REFERENCE genome.fasta \
    --URI genome.fasta \
    --TMP_DIR . \
  • genome.dict

BWAMEM2_INDEX

mkdir bwamem2
bwa-mem2 \
    index \
    genome.fasta -p bwamem2/genome.fasta

BWAMEM2_MEM

INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'`
bwa-mem2 \
    mem \
    -t 2 \
    $INDEX \
    test2_1.fastq.gz test2_2.fastq.gz \
    | samtools sort  -@ 2 -o test2.bam -
[[id:test, data_type:bam, patient:test, sample:test, sex:XX, status:0], [/data/wangyang/sarek/work/88/1ba3e74a1d4397668039fd6d7a60b4/test.bam]]
[[id:test2, data_type:bam, patient:test, sample:test2, sex:XX, status:1], [/data/wangyang/sarek/work/43/4bb9911c4ce6a3036f55879cf2d5e6/test2.bam]]

GATK4_MARKDUPLICATES

gatk --java-options "-Xmx6g" MarkDuplicates \
    --INPUT test.bam \
    --OUTPUT test.md.cram.bam \
    --METRICS_FILE test.md.cram.metrics \
    --TMP_DIR . \
    --REFERENCE_SEQUENCE genome.fasta \
    -REMOVE_DUPLICATES false -VALIDATION_STRINGENCY LENIENT

samtools view -Ch -T genome.fasta -o test.md.cram test.md.cram.bam
rm test.md.cram.bam
samtools index test.md.cram
  • test.md.cram
  • test.md.cram.crai
  • test.md.cram.metrics
[[id:test2, data_type:bam, patient:test, sample:test2, sex:XX, status:1], /data/wangyang/sarek/work/cf/feda327c0495cb5500d0236286cc06/test2.md.cram, /data/wangyang/sarek/work/b6/b85c3aa7a79e8ee56e02b176b5893c/test2.md.cram.crai]
[[id:test, data_type:bam, patient:test, sample:test, sex:XX, status:0], /data/wangyang/sarek/work/f8/457448e4fe61d512f1d5416c0a2e0e/test.md.cram, /data/wangyang/sarek/work/fa/6118864553afed7a5b03250f81a0d5/test.md.cram.crai]

BUILD_INTERVALS

awk -v FS='     ' -v OFS='      ' '{ print $1, "0", $2 }' genome.fasta.fai > genome.fasta.bed
chr22   0       4000
[[id:[genome.fasta]], /data/wangyang/sarek/work/fe/c20d0e9d7c2b1b43b8dc3778710f1c/genome.fasta.bed]

CREATE_INTERVALS_BED

awk -vFS="      " '{
    t = $5  # runtime estimate
    if (t == "") {
        # no runtime estimate in this row, assume default value
        t = ($3 - $2) / 1000
    }
    if (name == "" || (chunk > 600 && (chunk + t) > longest * 1.05)) {
        # start a new chunk
        name = sprintf("%s_%d-%d.bed", $1, $2+1, $3)
        chunk = 0
        longest = 0
    }
    if (t > longest)
        longest = t
    chunk += t
    print $0 > name
}' genome.fasta.bed
[/data/wangyang/sarek/work/03/bb302a425d048ebd08eb5656c426d9/chr22_1-40001.bed, 1]
[/data/wangyang/sarek/work/fe/c20d0e9d7c2b1b43b8dc3778710f1c/genome.fasta.bed]
[[id:chr22_1-40001], /data/wangyang/sarek/work/03/bb302a425d048ebd08eb5656c426d9/chr22_1-40001.bed]

TABIX_BGZIPTABIX_INTERVAL_SPLIT

bgzip  --threads 1 -c  chr22_1-40001.bed > chr22_1-40001.bed.gz
tabix  chr22_1-40001.bed.gz
[[/data/wangyang/sarek/work/85/80d1d0bdc4f8f55ed6c5d25069b4d4/chr22_1-40001.bed.gz, /data/wangyang/sarek/work/85/80d1d0bdc4f8f55ed6c5d25069b4d4/chr22_1-40001.bed.gz.tbi], 1]

GERMLINE VARIANT CALLING

mpileup

[[data_type:cram, id:test, num_intervals:1, patient:test, sample:test, sex:XX, status:0], /data/wangyang/sarek/work/f8/457448e4fe61d512f1d5416c0a2e0e/test.md.cram, /data/wangyang/sarek/work/03/bb302a425d048ebd08eb5656c426d9/chr22_1-40001.bed
samtools mpileup \
    --fasta-ref genome.fasta \
    --output test.mpileup \
     \
    -l chr22_1-40001.bed \
    test.md.cram
bgzip test.mpileup
  • test.mpileup.gz
  • genome.fasta.fai

cnvkit

samtools view -T genome.fasta --fai-reference genome.fasta.fai test.md.cram -@ 2 -o test.md.bam

cnvkit.py \
    batch \
    test.md.bam \
    --normal  \
    --fasta genome.fasta \
     \
    --targets genome.fasta.bed \
    --processes 2 \
  • genome.fasta.target.bed
  • reference.cnn
  • test.md.antitargetcoverage.cnn
  • test.md.bam
  • test.md.bam.bai
  • test.md.cnr
  • test.md.targetcoverage.cnn

deepvariant

/opt/deepvariant/bin/run_deepvariant \
    --ref=genome.fasta \
    --reads=test.md.cram \
    --output_vcf=test.deepvariant.chr22_1-40001.vcf.gz \
    --output_gvcf=test.deepvariant.chr22_1-40001.g.vcf.gz \
    --model_type WGS \
    --regions chr22_1-40001.bed \
    --num_shards=2
  • test.deepvariant.chr22_1-40001.g.vcf.gz
  • test.deepvariant.chr22_1-40001.g.vcf.gz.tbi
  • test.deepvariant.chr22_1-40001.vcf.gz
  • test.deepvariant.chr22_1-40001.vcf.gz.tbi
  • test.deepvariant.chr22_1-40001.visual_report.html

freebayes

freebayes \
    -f genome.fasta \
    --target chr22_1-40001.bed \
    test.md.cram > test.vcf
  • test.vcf.gz

haplotypecaller

manta

strelka

tiddit