随机抽取10000条reads
process SAMPLE{
publishDir = [
path: { "test_data" },
mode: 'copy',
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
debug true
label 'process_high_memory'
input:
tuple val(pair_id),path(reads)
output:
tuple val(pair_id),path("${pair_id}/*.gz")
script:
// println reads[0]
"""
zcat ${reads[0]} | seqkit sample -n 10000 -o ${pair_id}/${pair_id}_1.fastq.gz &&
zcat ${reads[1]} | seqkit sample -n 10000 -o ${pair_id}/${pair_id}_2.fastq.gz
"""
}
include { INPUT_CHECK } from './subworkflows/local/input_check'
workflow{
read_pairs_ch = channel.fromFilePairs("/xx/*/*_{1,2}.fq.gz", checkIfExists: true )
read_pairs_ch_take = read_pairs_ch.take(5)
SAMPLE(read_pairs_ch_take)
Channel.of(["sample",["fastq_1","fastq_2"]])
.mix(SAMPLE.out)
.map{ it[0] +","+ it[1][0]+","+ it[1][1]}
.collectFile(name: 'samplesheet_test.csv', newLine: true,storeDir:".")
}
取前1000条reads
curl ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR507/SRR507778/SRR507778_1.fastq.gz | gzip -d | head -100000 > y1.fastq
curl ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR507/SRR507778/SRR507778_2.fastq.gz | gzip -d | head -100000 > y2.fastq