从真实数据中生成小样本测试数据

最后发布时间:2023-06-24 11:21:25 浏览量:

随机抽取10000条reads

process SAMPLE{
    publishDir = [
        path: { "test_data" },
        mode: 'copy',
        saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
    ]
    debug true
    label 'process_high_memory'

    
    input:
    tuple val(pair_id),path(reads)
    
    output:
    tuple val(pair_id),path("${pair_id}/*.gz")

    script:
        // println reads[0]
    """
        zcat ${reads[0]} | seqkit sample -n 10000 -o ${pair_id}/${pair_id}_1.fastq.gz    &&
        zcat ${reads[1]} | seqkit sample -n 10000 -o ${pair_id}/${pair_id}_2.fastq.gz    
    """

}


include { INPUT_CHECK } from './subworkflows/local/input_check' 
workflow{
    read_pairs_ch = channel.fromFilePairs("/xx/*/*_{1,2}.fq.gz", checkIfExists: true ) 
    read_pairs_ch_take = read_pairs_ch.take(5)

    SAMPLE(read_pairs_ch_take)
    Channel.of(["sample",["fastq_1","fastq_2"]])
        .mix(SAMPLE.out)
        .map{ it[0] +","+ it[1][0]+","+ it[1][1]}
        .collectFile(name: 'samplesheet_test.csv', newLine: true,storeDir:".")

}

取前1000条reads

curl ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR507/SRR507778/SRR507778_1.fastq.gz | gzip -d | head -100000 > y1.fastq
curl ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR507/SRR507778/SRR507778_2.fastq.gz | gzip -d | head -100000 > y2.fastq

: admin
: 联系作者

快捷入口: 测序数据模拟思维导图浏览PDF 下载PDF

分享到：

标签

随机抽取10000条reads
取前1000条reads