pipleline
最后发布时间 : 2023-04-07 17:27:31
浏览量 :
GATK4_CREATESEQUENCEDICTIONARY
gatk --java-options "-Xmx6g" CreateSequenceDictionary \
--REFERENCE genome.fasta \
--URI genome.fasta \
--TMP_DIR . \
- genome.dict
BWAMEM2_INDEX
mkdir bwamem2
bwa-mem2 \
index \
genome.fasta -p bwamem2/genome.fasta
BWAMEM2_MEM
INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'`
bwa-mem2 \
mem \
-t 2 \
$INDEX \
test2_1.fastq.gz test2_2.fastq.gz \
| samtools sort -@ 2 -o test2.bam -
[[id:test, data_type:bam, patient:test, sample:test, sex:XX, status:0], [/data/wangyang/sarek/work/88/1ba3e74a1d4397668039fd6d7a60b4/test.bam]]
[[id:test2, data_type:bam, patient:test, sample:test2, sex:XX, status:1], [/data/wangyang/sarek/work/43/4bb9911c4ce6a3036f55879cf2d5e6/test2.bam]]
GATK4_MARKDUPLICATES
gatk --java-options "-Xmx6g" MarkDuplicates \
--INPUT test.bam \
--OUTPUT test.md.cram.bam \
--METRICS_FILE test.md.cram.metrics \
--TMP_DIR . \
--REFERENCE_SEQUENCE genome.fasta \
-REMOVE_DUPLICATES false -VALIDATION_STRINGENCY LENIENT
samtools view -Ch -T genome.fasta -o test.md.cram test.md.cram.bam
rm test.md.cram.bam
samtools index test.md.cram
- test.md.cram
- test.md.cram.crai
- test.md.cram.metrics
[[id:test2, data_type:bam, patient:test, sample:test2, sex:XX, status:1], /data/wangyang/sarek/work/cf/feda327c0495cb5500d0236286cc06/test2.md.cram, /data/wangyang/sarek/work/b6/b85c3aa7a79e8ee56e02b176b5893c/test2.md.cram.crai]
[[id:test, data_type:bam, patient:test, sample:test, sex:XX, status:0], /data/wangyang/sarek/work/f8/457448e4fe61d512f1d5416c0a2e0e/test.md.cram, /data/wangyang/sarek/work/fa/6118864553afed7a5b03250f81a0d5/test.md.cram.crai]
BUILD_INTERVALS
awk -v FS=' ' -v OFS=' ' '{ print $1, "0", $2 }' genome.fasta.fai > genome.fasta.bed
chr22 0 4000
[[id:[genome.fasta]], /data/wangyang/sarek/work/fe/c20d0e9d7c2b1b43b8dc3778710f1c/genome.fasta.bed]
CREATE_INTERVALS_BED
awk -vFS=" " '{
t = $5 # runtime estimate
if (t == "") {
# no runtime estimate in this row, assume default value
t = ($3 - $2) / 1000
}
if (name == "" || (chunk > 600 && (chunk + t) > longest * 1.05)) {
# start a new chunk
name = sprintf("%s_%d-%d.bed", $1, $2+1, $3)
chunk = 0
longest = 0
}
if (t > longest)
longest = t
chunk += t
print $0 > name
}' genome.fasta.bed
[/data/wangyang/sarek/work/03/bb302a425d048ebd08eb5656c426d9/chr22_1-40001.bed, 1]
[/data/wangyang/sarek/work/fe/c20d0e9d7c2b1b43b8dc3778710f1c/genome.fasta.bed]
[[id:chr22_1-40001], /data/wangyang/sarek/work/03/bb302a425d048ebd08eb5656c426d9/chr22_1-40001.bed]
TABIX_BGZIPTABIX_INTERVAL_SPLIT
bgzip --threads 1 -c chr22_1-40001.bed > chr22_1-40001.bed.gz
tabix chr22_1-40001.bed.gz
[[/data/wangyang/sarek/work/85/80d1d0bdc4f8f55ed6c5d25069b4d4/chr22_1-40001.bed.gz, /data/wangyang/sarek/work/85/80d1d0bdc4f8f55ed6c5d25069b4d4/chr22_1-40001.bed.gz.tbi], 1]
GERMLINE VARIANT CALLING
mpileup
[[data_type:cram, id:test, num_intervals:1, patient:test, sample:test, sex:XX, status:0], /data/wangyang/sarek/work/f8/457448e4fe61d512f1d5416c0a2e0e/test.md.cram, /data/wangyang/sarek/work/03/bb302a425d048ebd08eb5656c426d9/chr22_1-40001.bed
samtools mpileup \
--fasta-ref genome.fasta \
--output test.mpileup \
\
-l chr22_1-40001.bed \
test.md.cram
bgzip test.mpileup
- test.mpileup.gz
- genome.fasta.fai
cnvkit
samtools view -T genome.fasta --fai-reference genome.fasta.fai test.md.cram -@ 2 -o test.md.bam
cnvkit.py \
batch \
test.md.bam \
--normal \
--fasta genome.fasta \
\
--targets genome.fasta.bed \
--processes 2 \
- genome.fasta.target.bed
- reference.cnn
- test.md.antitargetcoverage.cnn
- test.md.bam
- test.md.bam.bai
- test.md.cnr
- test.md.targetcoverage.cnn
deepvariant
/opt/deepvariant/bin/run_deepvariant \
--ref=genome.fasta \
--reads=test.md.cram \
--output_vcf=test.deepvariant.chr22_1-40001.vcf.gz \
--output_gvcf=test.deepvariant.chr22_1-40001.g.vcf.gz \
--model_type WGS \
--regions chr22_1-40001.bed \
--num_shards=2
- test.deepvariant.chr22_1-40001.g.vcf.gz
- test.deepvariant.chr22_1-40001.g.vcf.gz.tbi
- test.deepvariant.chr22_1-40001.vcf.gz
- test.deepvariant.chr22_1-40001.vcf.gz.tbi
- test.deepvariant.chr22_1-40001.visual_report.html
freebayes
freebayes \
-f genome.fasta \
--target chr22_1-40001.bed \
test.md.cram > test.vcf
- test.vcf.gz