gatk HaplotypeCaller \ -R output/index/ref.fasta \ -I output/mapping/mother.sorted.markdup.BQSR.bam \ -O output/gvcf/mother.sorted.markdup.g.vcf \ --emit-ref-confidence GVCF \ -L 20:10,000,000-10,200,000
gatk GenomicsDBImport \ -V gs://gatk-tutorials/workshop_1903/2-germline/gvcfs/mother.g.vcf.gz \ -V gs://gatk-tutorials/workshop_1903/2-germline/gvcfs/father.g.vcf.gz \ -V gs://gatk-tutorials/workshop_1903/2-germline/gvcfs/son.g.vcf.gz \ --genomicsdb-workspace-path /home/jupyter-user/2-germline-vd/sandbox/trio \ --intervals 20:10,000,000-10,200,000
对于那些不能使用 GenomicDBImport 的用户,另一种方法是将 GVCF 与 CombinGVCF 合并。
gatk CombineGVCFs \ -R output/index/ref.fasta \ --variant output/gvcf/mother.sorted.markdup.g.vcf \ -O output/gvcf/cohort.g.vcf.gz
因为检查数据库中的数据并不容易,所以我们将使用 SelectVariants 从 GenomicsDB 数据库中提取这三个组合的数据。
gatk SelectVariants \ -R /home/jupyter-user/2-germline-vd/ref/ref.fasta \ -V gendb://sandbox/trio \ -O /home/jupyter-user/2-germline-vd/sandbox/trio_selectvariants.g.vcf
对这三个人进行联合基因分型以产生 VCF
gatk GenotypeGVCFs \ -R /home/jupyter-user/2-germline-vd/ref/ref.fasta \ -V gendb://sandbox/trio \ -O /home/jupyter-user/2-germline-vd/sandbox/trioGGVCF.vcf \ -L 20:10,000,000-10,200,000
gatk HaplotypeCaller \ -R ref/ref.fasta \ -I bams/mother.bam \ -I bams/father.bam \ -I bams/son.bam \ -O sandbox/trio_hcjoint_nq.vcf \ -L 20:10,000,000-10,200,000 \ -new-qual \ -bamout sandbox/trio_hcjoint_nq.ba
gatk --java-options "-Xmx4g" GenotypeGVCFs \ -R output/index/ref.fasta \ -V output/gvcf/cohort.g.vcf.gz \ -O output/gvcf/output.vcf.gz bcftools view output/gvcf/output.vcf.gz | less -S