展开

nextflow配置

最后发布时间 : 2023-12-12 18:45:04 浏览量 :

配置文件加载顺序

  • 命令行--something value
  • 指定配置文件-c my_config
  • 当前目录下的nextflow.config
  • 家目录下$HOME/.nextflow/config
  • 脚本本身main.nf

命令行

nextflow run main.nf    -with-docker true 
nextflow run main.nf  -with-conda true

当前目录下的nextflow.config

manifest {
  description = 'Proof of concept of a RNA-seq pipeline implemented with Nextflow' 
  author = 'Paolo Di Tommaso'
  nextflowVersion = '>=22.10.0'
}

/* 
 * default params 
 */
 
params.reads = "$baseDir/data/ggal/ggal_gut_{1,2}.fq"
params.transcriptome = "$baseDir/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"
params.outdir = "results"
params.multiqc = "$baseDir/multiqc"

/* 
 * defines execution profiles for different environments  
 */ 

profiles {
  standard {
    process.container = 'quay.io/nextflow/rnaseq-nf:v1.1'
  }
  
  docker {
    process.container = 'quay.io/nextflow/rnaseq-nf:v1.1'
    docker.enabled = true
  }
  
  slurm {
    process.container = 'quay.io/nextflow/rnaseq-nf:v1.1'
    process.executor = 'slurm'
    singularity.enabled = true
  }
  
  batch {
    params.reads = 's3://rnaseq-nf/data/ggal/lung_{1,2}.fq'
    params.transcriptome = 's3://rnaseq-nf/data/ggal/transcript.fa'
    process.container = 'quay.io/nextflow/rnaseq-nf:v1.1'
    process.executor = 'awsbatch'
    process.queue = 'nextflow-ci'
    workDir = 's3://nextflow-ci/work'
    aws.region = 'eu-west-1'
    aws.batch.cliPath = '/home/ec2-user/miniconda/bin/aws'
  }

  's3-data' {
    process.container = 'quay.io/nextflow/rnaseq-nf:v1.1'
    params.reads = 's3://rnaseq-nf/data/ggal/lung_{1,2}.fq'
    params.transcriptome = 's3://rnaseq-nf/data/ggal/transcript.fa'
  } 

  gls {
      params.transcriptome = 'gs://rnaseq-nf/data/ggal/transcript.fa'
      params.reads = 'gs://rnaseq-nf/data/ggal/gut_{1,2}.fq'
      params.multiqc = 'gs://rnaseq-nf/multiqc'
      process.executor = 'google-lifesciences'
      process.container = 'quay.io/nextflow/rnaseq-nf:v1.1'
      workDir = 'gs://rnaseq-nf/scratch' // <- replace with your own bucket!
      google.region  = 'europe-west2'
  } 
  
  gcb {
      params.transcriptome = 'gs://rnaseq-nf/data/ggal/transcript.fa'
      params.reads = 'gs://rnaseq-nf/data/ggal/gut_{1,2}.fq'
      params.multiqc = 'gs://rnaseq-nf/multiqc'
      process.executor = 'google-batch'
      process.container = 'quay.io/nextflow/rnaseq-nf:v1.1'
      workDir = 'gs://rnaseq-nf/scratch' // <- replace with your own bucket!
      google.region  = 'europe-west2'
  }

  'gs-data' {
      process.container = 'quay.io/nextflow/rnaseq-nf:v1.1'
      params.transcriptome = 'gs://rnaseq-nf/data/ggal/transcript.fa'
      params.reads = 'gs://rnaseq-nf/data/ggal/gut_{1,2}.fq'
  }

  azb {
    process.container = 'quay.io/nextflow/rnaseq-nf:v1.1'
    workDir = 'az://nf-scratch/work'
    process.executor = 'azurebatch'
    process.queue = 'nextflow-ci' // replace with your own Azure pool name

    azure {
      batch {
        location = 'westeurope'
        accountName = "$AZURE_BATCH_ACCOUNT_NAME" 
        accountKey = "$AZURE_BATCH_ACCOUNT_KEY"
        autoPoolMode = true
        deletePoolsOnCompletion = true
      }

      storage {
        accountName = "$AZURE_STORAGE_ACCOUNT_NAME"
        accountKey = "$AZURE_STORAGE_ACCOUNT_KEY"
      }
    }
  }

  conda {
    process.conda = "$baseDir/conda.yml"
  }  
}

配置变量

propertyOne = 'world'
anotherProp = "Hello $propertyOne"
customPath = "$PATH:/my/app/folder"

配置include

process.executor = 'sge'
process.queue = 'long'
process.memory = '10G'

includeConfig 'path/foo.config'

配置scopes

alpha.x  = 1
alpha.y  = 'string value..'

beta {
     p = 2
     q = 'another string ..'
}

配置profiles

配置文件可以包含一个或多个profiles的定义。profiles是一组配置属性,可以在使用-profile命令行选项启动管道执行时激活/选择这些属性。

profiles {

    standard {
        process.executor = 'local'
    }

    cluster {
        process.executor = 'sge'
        process.queue = 'long'
        process.memory = '10GB'
    }

    cloud {
        process.executor = 'cirrus'
        process.container = 'cbcrg/imagex'
        docker.enabled = true
    }

}

用户没有指定时使用standardprofile

nextflow run <your script> -profile standard,cloud

相同的属性可以放在profiles

process.cpus = 1

profiles {
  foo {
    process.memory = '2 GB'
  }

  bar {
    process.memory = '4 GB'
  }
}

环境变量

NXF_ANSI_LOG Enables/disables ANSI console output (default true when ANSI terminal is detected).
...

environment-variables

常用配置

executor.queueSize= 2
executor.name          = 'local'
executor.cpus          = 16
executor.memory        = 60.GB
executor.queueSize = 1
process {
    executor = 'k8s'
    cpus = 30
    memory = 100.GB
}
k8s {
    storageClaimName = 'nfdata'
    computeResourceType = 'Job'
    // storageMountPath = '/data'
    // 查看cotext: kubectl config get-contexts
    context = 'kubernetes-admin@kubernetes' 
}

k8s.storageMountPath='/data'
kafka {
  url = '192.168.3.60:9092'
  group = 'group1'
}

bioproj {
  endpoint= 'http://192.168.3.60:8000/api'
  enabled = true
  kafkaEnabled = true
}
trace {
    enabled = true
    file = 'trace.txt'
    fields = 'task_id,name,status,exit,realtime,%cpu,rss'
    overwrite = true
}
timeline {
    enabled = true
    file = 'timeline.html'
    overwrite = true
}
report {
    enabled = true
    file = 'report.html'
    overwrite = true
}
mongo{
    url = 'mongodb://192.168.3.60:27017'
    databases = 'test-api'
    collection = 'task'
}
#!/usr/bin/env nextflow
nextflow.enable.dsl=2 

process sayHello {
  container "ubuntu:rolling"
  input: 
    val x
  output:
    path("*")
  script:
    """
    sleep  4
    echo '$x world!' > $x 
    """
}
println params.name1
println params.name2
workflow {
  Channel.of('Bonjour', 'Ciao', 'Hello', 'Hola') | sayHello | view
  Channel.fromQuery(params.taskId).view()
}