slurm.conf系統初始配置


#slurm集群配置
##集群名稱
ClusterName=myslurm
##主控制器的主機名
ControlMachine=node11
##主控制器的IP地址
ControlAddr=192.168.80.11
##備控制器的主機名
BackupController=node12
##備控制器的IP地址
BackupAddr=192.168.80.12
#
##slurmd的進程用戶
SlurmdUser=root
##slurctld的監聽端口
SlurmctldPort=6817
##slurmd的通信端口
SlurmdPort=6818
##組件認證方式
AuthType=auth/munge # none|munge
##slurm daemons(slurmctld,slurmdbd,slurmd),slurm clients,的通信認證
AuthInfo=/var/run/munge/munge.socket.2 #cred_expire|socket|ttl
#JobCredentialPrivateKey=
#JobCredentialPublicCertificate=
##集群狀態文件存放位置(全局文件系統)
StateSaveLocation=/usr/local/globle/softs/slurm/16.05.3/state
##?
SlurmdSpoolDir=/var/spool/slurmd
##?
SwitchType=switch/none
MpiDefault=none
##調度
#[批處理作業]在被視為丟失並釋放分配的資源之前[允許進行啟動的最大時間] (以秒為單位)
#默認10秒.execute prolog,load user environment variables, slurmd get page from memroy.可能需要更大的時間.
#BatchStartTimeout=10
#BurstBufferType=burst_buffer/none
#CheckpointType=checkpoint/blcr|none|ompi
###
##slurmctld的pid文件存放
SlurmctldPidFile=/var/run/slurmctld.pid
##slurmd的pid文件存放
SlurmdPidFile=/var/run/slurmd.pid
##?
#ProctrackType=proctrack/cgroup
ProctrackType=proctrack/pgid
#PluginDir=
##第一個作業ID
FirstJobId=1
##?
ReturnToService=2
##最大的作業ID
MaxJobCount=10000
#PlugStackConfig=
#PropagatePrioProcess=
#PropagateResourceLimits=
#PropagateResourceLimitsExcept=
#Prolog=
#Epilog=
#SrunProlog=
#SrunEpilog=
#TaskProlog=
#TaskEpilog=
#TaskPlugin=
#TrackWCKey=no
#TreeWidth=50
#TmpFS=
#UsePAM=
#
# TIMERS
##控制器通信超時
SlurmctldTimeout=60
##slurmd通信超時
SlurmdTimeout=60
InactiveLimit=0
MinJobAge=300
KillWait=30
Waittime=0
#
# SCHEDULING
SchedulerType=sched/backfill
#SchedulerAuth=
#SchedulerPort=
#SchedulerRootFilter=
SelectType=select/linear
FastSchedule=1
#PriorityType=priority/multifactor
#PriorityDecayHalfLife=14-0
#PriorityUsageResetPeriod=14-0
#PriorityWeightFairshare=100000
#PriorityWeightAge=1000
#PriorityWeightPartition=10000
#PriorityWeightJobSize=1000
#PriorityMaxAge=1-0
#
# LOGGING
SlurmctldDebug=6
SlurmctldLogFile=/usr/local/globle/softs/slurm/16.05.3/log/SlurmctldLogFile
SlurmdDebug=6
SlurmdLogFile=/var/log/SlurmdLogFile
JobCompType=jobcomp/none
#JobCompLoc=
#
# ACCOUNTING
#JobAcctGatherType=jobacct_gather/cgroup
JobAcctGatherFrequency=30

##AccountingStorage setting
#filetxt,none,slurmdbd
AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageHost=192.168.80.13
AccountingStorageBackupHost=192.168.80.14
AccountingStorageLoc=slurm_acct_db
#AccountingStoragePass=liwanliang
AccountingStoragePass=/var/run/munge/munge.socket.2
AccountingStorageUser=slurmadmin
AccountingStorageEnforce=limints,qos
#AccountingStorageTRES=gres/craynetwork,license/iop1,cpu,energy,memroy,nodes
#AccountingStoreJobComment=yes
AcctGatherNodeFreq=180
#AcctGatherEnergyType=acct_gather_energy/none|ipmi|rapl
#AcctGatherInfinibandType=acct_gather_infiniband/none|ofed
#AcctGatherFilesystemType=acct_gather_filesystem/none|lustre
#AcctGatherProfileType=acct_gather_profile/none|hdf5
#AllowSpecResourceUsage=0|1
#
# COMPUTE NODES
NodeName=node[11-14] CPUs=4 RealMemory=900 Sockets=4 CoresPerSocket=1 ThreadsPerCore=1 Procs=4 State=UNKNOWN
PartitionName=q_x86_1 Nodes=node[11-12] Default=YES MaxTime=INFINITE State=UP
PartitionName=q_x86_2 Nodes=node[13-14] MaxTime=INFINITE State=UP


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM