跳到主要内容

更改任务资源请求

fsched(版本fsched-10.61 +)支持对运行中的任务更改资源请求,目前支持更改的参数包括:

  1. MinMemoryNode
    • 更改job需要的每节点最小内存(单位是MB)
  2. MinCPUsNode
    • 更改job需要的每节点最小cpu数

作用

通过更改运行中的任务的资源请求,可以在任务运行时减少多申请的资源,让节点上的其它因为资源不足pending的job能够运行

注意
  • 任务运行时减少申请的资源,不会减少任务实际使用的资源,所以其它job运行后可能会使节点资源负载过高

集群配置

修改以下配置,以使用支持更改运行中任务资源请求的插件select/cons_tres_ex

SelectType=select/cons_tres_ex

示例1_如果修改

  1. 提交job,等待运行,修改MinMemoryNode

    [root@head-1 ~]# srun -w compute-1 --mem 8000 sleep 120&
    [1] 17033
    [root@head-1 ~]# squeue
    JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON)
    289 partition sleep root R 0:02 1 compute-1
    [root@head-1 ~]# scontrol update job 289 MinMemoryNode=3000
  2. 查看修改后的MinMemoryNode

    [root@head-1 ~]# scontrol show job 289
    JobId=289 JobName=sleep
    UserId=root(0) GroupId=root(0) MCS_label=N/A
    Priority=4294901751 Nice=0 Account=root QOS=normal WCKey=*
    JobState=RUNNING Reason=None Dependency=(null)
    Requeue=1 Restarts=0 BatchFlag=0 Reboot=0 ExitCode=0:0
    RunTime=00:00:24 TimeLimit=UNLIMITED TimeMin=N/A
    SubmitTime=2024-12-05T14:01:30 EligibleTime=2024-12-05T14:01:30
    AccrueTime=Unknown
    StartTime=2024-12-05T14:01:30 EndTime=Unknown Deadline=N/A
    SuspendTime=None SecsPreSuspend=0 LastSchedEval=2024-12-05T14:01:30
    Partition=partition-9C3RA AllocNode:Sid=head-1:14475
    ReqNodeList=compute-1 ExcNodeList=(null)
    NodeList=compute-1
    BatchHost=compute-1
    NumNodes=1 NumCPUs=1 NumTasks=1 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
    TRES=cpu=1,mem=3000M,node=1,billing=1
    Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=*
    MinCPUsNode=1 MinMemoryNode=3000M MinTmpDiskNode=0
    Features=(null) DelayBoot=00:00:00
    OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)
    Command=sleep 120
    WorkDir=/root
    Power=
  3. 提交job,等待运行,修改MinCPUsNode

    [root@head-1 ~]# srun -w compute-1 --mincpus=3 sleep 120&
    [1] 18209
    [root@head-1 ~]# squeue
    JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON)
    290 partition sleep root R 0:02 1 compute-1
    [root@head-1 ~]# scontrol update job 290 MinCPUsNode=2
  4. 查看修改后的MinCPUsNode

    [root@head-1 ~]# scontrol show job 290
    JobId=290 JobName=sleep
    UserId=root(0) GroupId=root(0) MCS_label=N/A
    Priority=4294901750 Nice=0 Account=root QOS=normal WCKey=*
    JobState=RUNNING Reason=None Dependency=(null)
    Requeue=1 Restarts=0 BatchFlag=0 Reboot=0 ExitCode=0:0
    RunTime=00:00:28 TimeLimit=UNLIMITED TimeMin=N/A
    SubmitTime=2024-12-05T14:09:32 EligibleTime=2024-12-05T14:09:32
    AccrueTime=Unknown
    StartTime=2024-12-05T14:09:32 EndTime=Unknown Deadline=N/A
    SuspendTime=None SecsPreSuspend=0 LastSchedEval=2024-12-05T14:09:32
    Partition=partition-9C3RA AllocNode:Sid=head-1:14475
    ReqNodeList=compute-1 ExcNodeList=(null)
    NodeList=compute-1
    BatchHost=compute-1
    NumNodes=1 NumCPUs=2 NumTasks=1 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
    TRES=cpu=2,mem=1M,node=1,billing=2
    Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=*
    MinCPUsNode=2 MinMemoryNode=1M MinTmpDiskNode=0
    Features=(null) DelayBoot=00:00:00
    OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)
    Command=sleep 120
    WorkDir=/root
    Power=

示例2_修改后使其它job运行

  1. 修改MinMemoryNode

    [root@head-1 ~]# sinfo -Nel
    Thu Dec 5 14:35:37 2024
    NODELIST NODES PARTITION STATE CPUS S:C:T MEMORY TMP_DISK WEIGHT AVAIL_FE REASON
    compute-1 1 partition-9C3RA* allocated 4 4:1:1 13926 0 1 (null) none
    compute-2 1 partition-9C3RA* idle 4 4:1:1 13926 0 1 (null) none
    [root@head-1 ~]# srun -w compute-1 --mem 8000 sleep 120&
    [3] 22415
    [2] Done srun -w compute-1 --mincpus=3 sleep 120
    [root@head-1 ~]# srun -w compute-1 --mem 8000 sleep 120&
    [4] 22428
    [root@head-1 ~]# srun: job 300 queued and waiting for resources

    [root@head-1 ~]# squeue
    JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON)
    300 partition sleep root PD 0:00 1 (Resources)
    299 partition sleep root R 0:04 1 compute-1
    [root@head-1 ~]# scontrol update job 299 MinMemoryNode=2000
    [root@head-1 ~]# srun: job 300 has been allocated resources

    [root@head-1 ~]# squeue
    JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON)
    299 partition sleep root R 0:31 1 compute-1
    300 partition sleep root R 0:03 1 compute-1
  2. 修改MinCPUsNode

    [root@head-1 ~]# sinfo -Nel
    Thu Dec 5 14:35:37 2024
    NODELIST NODES PARTITION STATE CPUS S:C:T MEMORY TMP_DISK WEIGHT AVAIL_FE REASON
    compute-1 1 partition-9C3RA* allocated 4 4:1:1 13926 0 1 (null) none
    compute-2 1 partition-9C3RA* idle 4 4:1:1 13926 0 1 (null) none
    [root@head-1 ~]# srun -w compute-1 --mincpus=3 sleep 120&
    [1] 21997
    [root@head-1 ~]# srun -w compute-1 --mincpus=3 sleep 120&
    [2] 22014
    [root@head-1 ~]# srun: job 298 queued and waiting for resources

    [root@head-1 ~]# squeue
    JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON)
    298 partition sleep root PD 0:00 1 (Resources)
    297 partition sleep root R 0:03 1 compute-1
    [root@head-1 ~]# scontrol update job 297 MinCPUsNode=1
    [root@head-1 ~]# srun: job 298 has been allocated resources

    [root@head-1 ~]# squeue
    JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON)
    297 partition sleep root R 0:28 1 compute-1
    298 partition sleep root R 0:04 1 compute-1