sudo su - root scontrol update NodeName=node2 State=resume
squeue | grep smk-gene | awk '{print $1}' | xargs scancel
scontrol show node node1
NodeName=node1 Arch=x86_64 CoresPerSocket=26 CPUAlloc=0 CPUEfctv=208 CPUTot=208 CPULoad=0.88 AvailableFeatures=(null) ActiveFeatures=(null) Gres=(null) NodeAddr=node1 NodeHostName=node1 Version=22.05.3 OS=Linux 5.4.0-26-generic #30-Ubuntu SMP Mon Apr 20 16:58:30 UTC 2020 RealMemory=512000 AllocMem=0 FreeMem=510881 Sockets=4 Boards=1 State=IDLE ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A Partitions=ubuntu,low,med,high BootTime=2023-03-16T10:48:53 SlurmdStartTime=2023-03-16T10:49:52 LastBusyTime=2023-03-16T10:51:36 CfgTRES=cpu=208,mem=500G,billing=208 AllocTRES= CapWatts=n/a CurrentWatts=0 AveWatts=0 ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s
NodeName=node1 CPUs=208 RealMemory=512000 Boards=1 SocketsPerBoard=4 CoresPerSocket=26 ThreadsPerCore=2 State=UNKNOWN
node: free -h
total used free shared buff/cache available Mem: 503Gi 3.2Gi 498Gi 9.0Mi 1.4Gi 497Gi Swap: 9.3Gi 0B 9.3Gi
squeue --format="%.18i %.9P %.30j %.8u %.8T %.10M %.9l %.6D %R"
full name如果要显示更多的job名称字符,只需更改%.30J的数量即可。
srun -w node4 --cpus-per-task=50 --mem-per-cpu=2G sleep 1000
-w(- -nodelist) 表示:在计算机节点 node 上执行; - -cpus-per-task=50 表示每个 task 分配50 个线程。--mem-per-cpu=2G表示每个cpu上分配2G内存
-w(- -nodelist)
- -cpus-per-task=50
--mem-per-cpu=2G
使用scontrol查看改job
scontrol
scontrol show job 578444
JobId=578444 JobName=sleep UserId=zyd(1001) GroupId=zyd(1001) MCS_label=N/A Priority=1 Nice=0 Account=test QOS=normal JobState=RUNNING Reason=None Dependency=(null) Requeue=1 Restarts=0 BatchFlag=0 Reboot=0 ExitCode=0:0 RunTime=00:00:16 TimeLimit=UNLIMITED TimeMin=N/A SubmitTime=2023-03-16T17:56:13 EligibleTime=2023-03-16T17:56:13 AccrueTime=Unknown StartTime=2023-03-16T17:56:13 EndTime=Unknown Deadline=N/A PreemptEligibleTime=2023-03-16T17:56:13 PreemptTime=None SuspendTime=None SecsPreSuspend=0 LastSchedEval=2023-03-16T17:56:13 Scheduler=Main Partition=ubuntu AllocNode:Sid=master:167597 ReqNodeList=node4 ExcNodeList=(null) NodeList=node4 BatchHost=node4 NumNodes=1 NumCPUs=50 NumTasks=1 CPUs/Task=50 ReqB:S:C:T=0:0:*:* TRES=cpu=50,mem=100G,node=1,billing=50 Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=* MinCPUsNode=50 MinMemoryCPU=2G MinTmpDiskNode=0 Features=(null) DelayBoot=00:00:00 OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null) Command=sleep WorkDir=/data/metagenomics/pml_nextflow Power=
可以看到TRES=cpu=50,mem=100G表示,可追踪的CPU有50个,由于我们设置了,每个CPU 2G内存,因此mem的值为100
TRES=cpu=50,mem=100G
srun -w node4 --mem=2G --cpus-per-task=50 sleep 1000
将--mem-per-cpu改为--mem
--mem-per-cpu
--mem
JobId=578445 JobName=sleep UserId=zyd(1001) GroupId=zyd(1001) MCS_label=N/A Priority=1 Nice=0 Account=test QOS=normal JobState=RUNNING Reason=None Dependency=(null) Requeue=1 Restarts=0 BatchFlag=0 Reboot=0 ExitCode=0:0 RunTime=00:01:03 TimeLimit=UNLIMITED TimeMin=N/A SubmitTime=2023-03-16T18:11:38 EligibleTime=2023-03-16T18:11:38 AccrueTime=Unknown StartTime=2023-03-16T18:11:38 EndTime=Unknown Deadline=N/A PreemptEligibleTime=2023-03-16T18:11:38 PreemptTime=None SuspendTime=None SecsPreSuspend=0 LastSchedEval=2023-03-16T18:11:38 Scheduler=Main Partition=ubuntu AllocNode:Sid=master:167597 ReqNodeList=node4 ExcNodeList=(null) NodeList=node4 BatchHost=node4 NumNodes=1 NumCPUs=50 NumTasks=1 CPUs/Task=50 ReqB:S:C:T=0:0:*:* TRES=cpu=50,mem=2G,node=1,billing=50 Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=* MinCPUsNode=50 MinMemoryNode=2G MinTmpDiskNode=0 Features=(null) DelayBoot=00:00:00 OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null) Command=sleep WorkDir=/data/metagenomics/pml_nextflow Power=
可以看到TRES=cpu=50,mem=2G中的mem已经变成了2G
TRES=cpu=50,mem=2G
在nextflow中配置这些参数
# 增加none和test账户并赋予相应权限 sacctmgr add account none,test Cluster=MyCluster Description="My slurm cluster" Organization="USTC" # 增加test1用户属于test账户 sacctmgr -i add user test1 account=test QOS:Quality of Service,服务质量,作业优先级 sacctmgr show qos format=name,priority 查看账户关联的QOS sacctmgr show assoc sacctmgr show assoc where name=hmli sacctmgr show assoc format=cluster,user,qos
Usage: srun [OPTIONS(0)... [executable(0) [args(0)...]]] [ : [OPTIONS(N)...]] executable(N) [args(N)...] Parallel run options: -A, --account=name charge job to specified account --acctg-freq=<datatype>=<interval> accounting and profiling sampling intervals. Supported datatypes: task=<interval> energy=<interval> network=<interval> filesystem=<interval> --bb=<spec> burst buffer specifications --bbf=<file_name> burst buffer specification file --bcast=<dest_path> Copy executable file to compute nodes --bcast-exclude=<paths> Shared object directory paths to exclude -b, --begin=time defer job until HH:MM MM/DD/YY -c, --cpus-per-task=ncpus number of cpus required per task --comment=name arbitrary comment --compress[=library] data compression library used with --bcast --container Path to OCI container bundle --cpu-freq=min[-max[:gov]] requested cpu frequency (and governor) -d, --dependency=type:jobid[:time] defer job until condition on jobid is satisfied --deadline=time remove the job if no ending possible before this deadline (start > (deadline - time[-min])) --delay-boot=mins delay boot for desired node features -D, --chdir=path change remote current working directory --export=env_vars|NONE environment variables passed to launcher with optional values or NONE (pass no variables) -e, --error=err location of stderr redirection --epilog=program run "program" after launching job step -E, --preserve-env env vars for node and task counts override command-line flags --gres=list required generic resources --gres-flags=opts flags related to GRES management -H, --hold submit job in held state -i, --input=in location of stdin redirection -I, --immediate[=secs] exit if resources not available in "secs" --jobid=id run under already allocated job -J, --job-name=jobname name of job -k, --no-kill do not kill job on node failure -K, --kill-on-bad-exit kill the job if any task terminates with a non-zero exit code -l, --label prepend task number to lines of stdout/err -L, --licenses=names required license, comma separated -M, --clusters=names Comma separated list of clusters to issue commands to. Default is current cluster. Name of 'all' will submit to run on all clusters. NOTE: SlurmDBD must up. -m, --distribution=type distribution method for processes to nodes (type = block|cyclic|arbitrary) --mail-type=type notify on state change: BEGIN, END, FAIL or ALL --mail-user=user who to send email notification for job state changes --mcs-label=mcs mcs label if mcs plugin mcs/group is used --mpi=type type of MPI being used --multi-prog if set the program name specified is the configuration specification for multiple programs -n, --ntasks=ntasks number of tasks to run --nice[=value] decrease scheduling priority by value --ntasks-per-node=n number of tasks to invoke on each node -N, --nodes=N number of nodes on which to run (N = min[-max]) -o, --output=out location of stdout redirection -O, --overcommit overcommit resources --overlap Allow other steps to overlap this step --het-group=value hetjob component allocation(s) in which to launch application -p, --partition=partition partition requested --power=flags power management options --priority=value set the priority of the job to value --prolog=program run "program" before launching job step --profile=value enable acct_gather_profile for detailed data value is all or none or any combination of energy, lustre, network or task --propagate[=rlimits] propagate all [or specific list of] rlimits --pty run task zero in pseudo terminal --quit-on-interrupt quit on single Ctrl-C -q, --qos=qos quality of service -Q, --quiet quiet mode (suppress informational messages) --reboot reboot block before starting job -r, --relative=n run job step relative to node n of allocation -s, --oversubscribe over-subscribe resources with other jobs -S, --core-spec=cores count of reserved cores --send-libs[=yes|no] autodetect and broadcast shared objects --signal=[R:]num[@time] send signal when time limit within time seconds --slurmd-debug=level slurmd debug level --spread-job spread job across as many nodes as possible --switches=max-switches{@max-time-to-wait} Optimum switches and max time to wait for optimum --task-epilog=program run "program" after launching task --task-prolog=program run "program" before launching task --thread-spec=threads count of reserved threads -T, --threads=threads set srun launch fanout -t, --time=minutes time limit --time-min=minutes minimum time limit (if distinct) -u, --unbuffered do not line-buffer stdout/err --use-min-nodes if a range of node counts is given, prefer the smaller count -v, --verbose verbose mode (multiple -v's increase verbosity) -W, --wait=sec seconds to wait after first task exits before killing job --wckey=wckey wckey to run job under -X, --disable-status Disable Ctrl-C status feature Constraint options: --cluster-constraint=list specify a list of cluster-constraints --contiguous demand a contiguous range of nodes -C, --constraint=list specify a list of constraints --mem=MB minimum amount of real memory --mincpus=n minimum number of logical processors (threads) per node --reservation=name allocate resources from named reservation --tmp=MB minimum amount of temporary disk -w, --nodelist=hosts... request a specific list of hosts -x, --exclude=hosts... exclude a specific list of hosts -Z, --no-allocate don't allocate nodes (must supply -w) Consumable resources related options: --exact use only the resources requested for the step (by default, all non-gres resources on each node in the allocation will be used in the step) --exclusive[=user] for job allocation, this allocates nodes in in exclusive mode for job steps, this is equivalent to --exact --exclusive[=mcs] allocate nodes in exclusive mode when cpu consumable resource is enabled and mcs plugin is enabled (--exact implied) or don't share CPUs for job steps --mem-per-cpu=MB maximum amount of real memory per allocated cpu required by the job. --mem >= --mem-per-cpu if --mem is specified. --resv-ports reserve communication ports Affinity/Multi-core options: (when the task/affinity plugin is enabled) For the following 4 options, you are specifying the minimum resources available for the node(s) allocated to the job. --sockets-per-node=S number of sockets per node to allocate --cores-per-socket=C number of cores per socket to allocate --threads-per-core=T number of threads per core to allocate -B --extra-node-info=S[:C[:T]] combine request of sockets per node, cores per socket and threads per core. Specify an asterisk (*) as a placeholder, a minimum value, or a min-max range. --ntasks-per-core=n number of tasks to invoke on each core --ntasks-per-socket=n number of tasks to invoke on each socket --cpu-bind= Bind tasks to CPUs (see "--cpu-bind=help" for options) --hint= Bind tasks according to application hints (see "--hint=help" for options) --mem-bind= Bind memory to locality domains (ldom) (see "--mem-bind=help" for options) GPU scheduling options: --cpus-per-gpu=n number of CPUs required per allocated GPU -G, --gpus=n count of GPUs required for the job --gpu-bind=... task to gpu binding options --gpu-freq=... frequency and voltage of GPUs --gpus-per-node=n number of GPUs required per allocated node --gpus-per-socket=n number of GPUs required per allocated socket --gpus-per-task=n number of GPUs required per spawned task --mem-per-gpu=n real memory required per allocated GPU Help options: -h, --help show this help message --usage display brief usage message Other options: -V, --version output version information and exit