batch-shipyard/slurm/slurm.conf

120 строки
2.6 KiB
Plaintext

#
# See the slurm.conf man page for more information.
#
ClusterName={CLUSTER_NAME}
SlurmctldHost={SLURMCTLD_HOST_PRIMARY}
#{SLURMCTLD_HOST_SECONDARY}
#{SLURMCTLD_HOST_TERTIARY}
SlurmUser=slurm
SlurmctldPort=6817
SlurmdPort=6818
AuthType=auth/munge
StateSaveLocation={SLURMCTLD_STATE_SAVE_PATH}
#SlurmdSpoolDir=/var/lib/slurm-llnl/slurmd
#SlurmctldPidFile=/var/run/slurm-llnl/slurmctld.pid
#SlurmdPidFile=/var/run/slurm-llnl/slurmd.pid
#PluginDir=/usr/lib/x86_64-linux-gnu/slurm-wlm
SlurmdSpoolDir=/var/spool/slurm/slurmd
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmdPidFile=/var/run/slurmd.pid
PluginDir=/usr/lib/slurm
# LOGGING
SlurmctldDebug=5
# slurmctld log file has no %h option, so need to log locally
SlurmctldLogFile=/var/log/slurm/slurmctld.log
SlurmdDebug=5
SlurmdLogFile={SLURM_LOG_PATH}/slurmd-%h.log
# PROCESS TRACKING
#ProctrackType=proctrack/pgid
ProctrackType=proctrack/cgroup
SwitchType=switch/none
MpiDefault=none
#FirstJobId=
ReturnToService=1
#MaxJobCount=
#PlugStackConfig=
#PropagatePrioProcess=
#PropagateResourceLimits=
#PropagateResourceLimitsExcept=
#Prolog=/etc/slurm/prolog.d/*
#Epilog=/etc/slurm/epilog.d/*
#SrunProlog=
#SrunEpilog=
#TaskProlog=
#TaskEpilog=
TaskPlugin=task/affinity,task/cgroup
#TrackWCKey=no
#TmpFS=
#UsePAM=
# TIMERS
SlurmctldTimeout=300
SlurmdTimeout=300
InactiveLimit=0
MinJobAge=300
KillWait=30
Waittime=0
# SCHEDULING
SchedulerType=sched/backfill
SchedulerParameters=defer
#SchedulerAuth=
SelectType=select/linear
#SelectType=select/cons_res
#SelectTypeParameters=CR_Core_Memory,CR_CORE_DEFAULT_DIST_BLOCK,CR_ONE_TASK_PER_CORE
FastSchedule=1
#PriorityType=priority/multifactor
#PriorityDecayHalfLife=14-0
#PriorityUsageResetPeriod=14-0
#PriorityWeightFairshare=100000
#PriorityWeightAge=1000
#PriorityWeightPartition=10000
#PriorityWeightJobSize=1000
#PriorityMaxAge=1-0
JobCompType=jobcomp/none
#JobCompLoc=
# ACCOUNTING
JobAcctGatherType=jobacct_gather/cgroup
#JobAcctGatherFrequency=30
AccountingStorageTRES=gres/gpu
DebugFlags=CPU_Bind,gres
AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageHost={HOSTNAME}
AccountingStoragePort=6819
#AccountingStorageLoc=
AccountingStoragePass=/var/run/munge/munge.socket.2
AccountingStorageUser=slurm
# POWER SAVE
SuspendProgram={SHIPYARD_VAR_DIR}/suspend.sh
ResumeProgram={SHIPYARD_VAR_DIR}/resume.sh
ResumeFailProgram={SHIPYARD_VAR_DIR}/resume-fail.sh
SuspendTime={IDLE_RECLAIM_TIME_SEC}
SuspendRate=0
ResumeRate=0
SuspendTimeout=1200
ResumeTimeout=1200
#{SUSPEND_EXC_NODES}
# TreeWidth must be at least as large as the maximum node count for cloud nodes
TreeWidth={MAX_NODES}
# GENERIC RESOURCES
#{GRES_TYPES}
# PARTITIONS AND NODES
#{ADDITIONAL_NODES}
#{ADDITIONAL_PARTITIONS}