gatk --java-options "-Xmx6g" CreateSequenceDictionary \ --REFERENCE genome.fasta \ --URI genome.fasta \ --TMP_DIR . \
mkdir bwamem2 bwa-mem2 \ index \ genome.fasta -p bwamem2/genome.fasta
INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` bwa-mem2 \ mem \ -t 2 \ $INDEX \ test2_1.fastq.gz test2_2.fastq.gz \ | samtools sort -@ 2 -o test2.bam -
[[id:test, data_type:bam, patient:test, sample:test, sex:XX, status:0], [/data/wangyang/sarek/work/88/1ba3e74a1d4397668039fd6d7a60b4/test.bam]] [[id:test2, data_type:bam, patient:test, sample:test2, sex:XX, status:1], [/data/wangyang/sarek/work/43/4bb9911c4ce6a3036f55879cf2d5e6/test2.bam]]
gatk --java-options "-Xmx6g" MarkDuplicates \ --INPUT test.bam \ --OUTPUT test.md.cram.bam \ --METRICS_FILE test.md.cram.metrics \ --TMP_DIR . \ --REFERENCE_SEQUENCE genome.fasta \ -REMOVE_DUPLICATES false -VALIDATION_STRINGENCY LENIENT samtools view -Ch -T genome.fasta -o test.md.cram test.md.cram.bam rm test.md.cram.bam samtools index test.md.cram
[[id:test2, data_type:bam, patient:test, sample:test2, sex:XX, status:1], /data/wangyang/sarek/work/cf/feda327c0495cb5500d0236286cc06/test2.md.cram, /data/wangyang/sarek/work/b6/b85c3aa7a79e8ee56e02b176b5893c/test2.md.cram.crai] [[id:test, data_type:bam, patient:test, sample:test, sex:XX, status:0], /data/wangyang/sarek/work/f8/457448e4fe61d512f1d5416c0a2e0e/test.md.cram, /data/wangyang/sarek/work/fa/6118864553afed7a5b03250f81a0d5/test.md.cram.crai]
awk -v FS=' ' -v OFS=' ' '{ print $1, "0", $2 }' genome.fasta.fai > genome.fasta.bed
chr22 0 4000
[[id:[genome.fasta]], /data/wangyang/sarek/work/fe/c20d0e9d7c2b1b43b8dc3778710f1c/genome.fasta.bed]
awk -vFS=" " '{ t = $5 # runtime estimate if (t == "") { # no runtime estimate in this row, assume default value t = ($3 - $2) / 1000 } if (name == "" || (chunk > 600 && (chunk + t) > longest * 1.05)) { # start a new chunk name = sprintf("%s_%d-%d.bed", $1, $2+1, $3) chunk = 0 longest = 0 } if (t > longest) longest = t chunk += t print $0 > name }' genome.fasta.bed
[/data/wangyang/sarek/work/03/bb302a425d048ebd08eb5656c426d9/chr22_1-40001.bed, 1] [/data/wangyang/sarek/work/fe/c20d0e9d7c2b1b43b8dc3778710f1c/genome.fasta.bed] [[id:chr22_1-40001], /data/wangyang/sarek/work/03/bb302a425d048ebd08eb5656c426d9/chr22_1-40001.bed]
bgzip --threads 1 -c chr22_1-40001.bed > chr22_1-40001.bed.gz tabix chr22_1-40001.bed.gz
[[/data/wangyang/sarek/work/85/80d1d0bdc4f8f55ed6c5d25069b4d4/chr22_1-40001.bed.gz, /data/wangyang/sarek/work/85/80d1d0bdc4f8f55ed6c5d25069b4d4/chr22_1-40001.bed.gz.tbi], 1]
[[data_type:cram, id:test, num_intervals:1, patient:test, sample:test, sex:XX, status:0], /data/wangyang/sarek/work/f8/457448e4fe61d512f1d5416c0a2e0e/test.md.cram, /data/wangyang/sarek/work/03/bb302a425d048ebd08eb5656c426d9/chr22_1-40001.bed
samtools mpileup \ --fasta-ref genome.fasta \ --output test.mpileup \ \ -l chr22_1-40001.bed \ test.md.cram bgzip test.mpileup
samtools view -T genome.fasta --fai-reference genome.fasta.fai test.md.cram -@ 2 -o test.md.bam cnvkit.py \ batch \ test.md.bam \ --normal \ --fasta genome.fasta \ \ --targets genome.fasta.bed \ --processes 2 \
/opt/deepvariant/bin/run_deepvariant \ --ref=genome.fasta \ --reads=test.md.cram \ --output_vcf=test.deepvariant.chr22_1-40001.vcf.gz \ --output_gvcf=test.deepvariant.chr22_1-40001.g.vcf.gz \ --model_type WGS \ --regions chr22_1-40001.bed \ --num_shards=2
freebayes \ -f genome.fasta \ --target chr22_1-40001.bed \ test.md.cram > test.vcf