Browse Source

gn: modularize slurm service

pull/5/head
Efraim Flashner 2 years ago
parent
commit
54c43ad0cd
Signed by: efraim GPG Key ID: 41AAE7DCCA3D8351
  1. 104
      gn/deploy/octopus.scm
  2. 136
      gn/services/science.scm

104
gn/deploy/octopus.scm

@ -10,71 +10,6 @@
(plain-file "id_rsa.pub"
"ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDUCDY8ZKFF/ln0yzDt3CNmKz3cT4wzNv9bzCKvOBXcL0O7JtPWwqgLlZgmMHfzhzgReAkHcrt+Gdsyduzm/s9Y8c6QpyfaH6uoDwjfoOs6GrAjZaOXmAdncf+9HZEAy/IrygQ1YFRu6BvYogsdhhtN+O6IXBuvQQDRzldHs53Y53DK06Nrs19vAPwELXcDxcx1FvO+/L9nT8RHkI1Z0ucgTS+F/BWXl8+mh89r4j+4IRpZXOuCD0DrW5rgEE1EygF2dVdWZQESi23gU5Mt6vnmysXzwixB7j6I+xTih8LH4pz7hewEx6754e/cs9Gm7ZtfXKfXUt6+GtsBSBF3ULKl efraimf@octopus01"))
(define %slurm.conf
(plain-file "slurm.conf"
(string-append
"# Defaults are commented out, otherwise noted at the end of the line
# Values are from example in the man page or from Debian
ClusterName=linux # no default, suggests lowercase
#ControlMachine=octopus # defunct, use SlurmctldHost
SlurmctldHost=octopus # no default, falls back to next SlurmctldHost in list
SlurmUser=slurm # default root, not recommended
#SlurmctldPort=6817
#SlurmdPort=6818
#AuthType=auth/munge
StateSaveLocation=/var/spool/slurmd/ctld # default /var/spool
#SlurmdSpoolDir=/var/spool/slurmd
#SwitchType=switch/none
#MpiDefault=none
#SlurmctldPidFile=/var/run/slurmctld.pid
#SlurmdPidFile=/var/run/slurmd.pid
ProctrackType=proctrack/pgid # default proctrack/cgroup
ReturnToService=1 # default 0
DebugFlags=NO_CONF_HASH # default empty
# TIMERS
SlurmctldTimeout=300 # default 120
#SlurmdTimeout=300
#InactiveLimit=0
#MinJobAge=300
#KillWait=30
#WaitTime=0
#
# LOGGING
#SlurmctldDebug=3
SlurmctldLogFile=/var/log/slurmctld.log # default none, syslog
#SlurmdDebug=3
SlurmdLogFile=/var/log/slurmd.log # default none, syslog
#JobCompType=jobcomp/none
# COMPUTE NODES
NodeName=octopus CPUs=1 Boards=1 SocketsPerBoard=1 CoresPerSocket=1 ThreadsPerCore=1 RealMemory=1024
PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP")))
(define %cgroup.conf
(plain-file "cgroup.conf"
(string-append
"###
# Slurm cgroup support configuration file
###
CgroupAutomount=yes # default no
ConstrainCores=yes # default no
#")))
(define %slurmdbd.conf
(plain-file "slurmdbd.conf"
(string-append
"#AuthType=auth/munge
#AuthInfo=/var/run/munge/munge.socket.2
DbdHost=localhost # must be specified
StorageHost=localhost # unclear, must be specified?
StorageType=accounting_storage/none # must be specified
StorageUser=slurm # unclear
#PidFile=/var/run/slurmdbd.pid
LogFile=/var/log/slurmdbd.log # default none, syslog
SlurmUser=slurm # default root, not recommended")))
(operating-system
(host-name "octopus")
@ -151,14 +86,6 @@ SlurmUser=slurm # default root, not recommended")))
(services
(append (list
;; This conflicts with everything when testing in a VM.
;(agetty-service
; (agetty-configuration
; (extra-options '("-L"))
; (baud-rate "115200")
; (term "vt100")
; (tty "ttyS0")))
(service openssh-service-type
(openssh-configuration
(authorized-keys
@ -168,17 +95,32 @@ SlurmUser=slurm # default root, not recommended")))
(service slurm-service-type
(slurm-configuration
(package slurm-18.08)
(slurmd-log-file "/var/log/slurmd.log")
(slurmctld-log-file "/var/log/slurmctld.log")
(SlurmdLogFile "/var/log/slurmd.log")
(SlurmctldLogFile "/var/log/slurmctld.log")
(ClusterName "linux")
(SlurmUser "slurm")
(SlurmctldHost '("octopus"))
(DbdHost "localhost")
(StorageType "accounting_storage/none")
(slurm-extra-content
(string-append
"StateSaveLocation=/var/spool/slurmd/ctld # default /var/spool\n"
"ProctrackType=proctrack/pgid # default proctrack/cgroup\n"
"ReturnToService=1 # default 0\n"
"DebugFlags=NO_CONF_HASH # default empty\n"
"# COMPUTE NODES\n"
"NodeName=octopus CPUs=1 Boards=1 SocketsPerBoard=1 CoresPerSocket=1 ThreadsPerCore=1 RealMemory=1024\n"
"PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP"))
(cgroup-extra-content
(string-append
"CgroupAutomount=yes # default no\n"
"ConstrainCores=yes # default no"))
(slurmdbd-extra-content
(string-append
"LogFile=/var/log/slurmdbd.log # default none, syslog"))
(run-slurmdbd? #t)
(run-slurmctld? #t)))
;; Some slurm configuration files
(simple-service 'slurm-conf etc-service-type
`(("slurm/slurm.conf" ,%slurm.conf)
("slurm/cgroup.conf" ,%cgroup.conf)
("slurm/slurmdbd.conf" ,%slurmdbd.conf)))
(service dhcp-client-service-type)
(service openntpd-service-type))
%base-services)))

136
gn/services/science.scm

@ -170,32 +170,113 @@
;; that machine. Therefore it makes sense to have one config section with
;; "common" and "extended" options. With all the possible options and
;; versions we only cover the ones which affect the services.
;; We keep the capitalization used in the config files to make discovery easier.
(package slurm-configuration-package
(default slurm))
(slurm-conf-file slurm-configuration-slurm-conf-file
(default "/etc/slurm/slurm.conf"))
(slurmd-log-file slurm-configuration-slurmd-log-file
(SlurmdLogFile slurm-configuration-slurmd-log-file
(default #f)) ; #f for syslog
(slurmd-pid-file slurm-configuration-slurmd-pid-file
(SlurmdPidFile slurm-configuration-slurmd-pidfile
(default "/var/run/slurmd.pid"))
(slurmd-spooldir slurm-configuration-slurmd-spooldir
(SlurmdSpoolDir slurm-configuration-slurmd-spooldir
(default "/var/spool/slurmd"))
(run-slurmctld? slurm-configuration-run-slurmctld
(default #f))
(slurmctld-log-file slurm-configuration-slurmctld-log-file
(SlurmctldLogFile slurm-configuration-slurmctld-log-file
(default #f)) ; #f for syslog
(slurmctld-pid-file slurm-configuration-slurmctld-pid-file
(SlurmctldPidFile slurm-configuration-slurmctld-pidfile
(default "/var/run/slurmctld.pid"))
(run-slurmdbd? slurm-configuration-run-slurmdbd
(default #f))
(slurmdbd-conf-file slurm-configuration-slurmdbd-conf-file
(default "/etc/slurm/slurmdbd.conf"))
(slurmdbd-pid-file slurm-configuration-slurmdbd-pid-file
(default "/var/run/slurmdbd.pid")))
(slurmdbd-PidFile slurm-configuration-slurmdbd-pidfile
(default "/var/run/slurmdbd.pid"))
(ClusterName slurm-configuration-clustername
(default #f)) ; string
(SlurmUser slurm-configuration-slurmuser
(default #f)) ; string
(SlurmctldHost slurm-configuration-slurmctldhost
(default #f)) ; list of strings
(slurm-extra-content slurm-configuration-slurm-extra-content
(default ""))
(cgroup-extra-content slurm-configuration-cgroup-extra-content
(default ""))
(DbdHost slurm-configuration-dbdhost
(default #f)) ; string
(StorageType slurm-configuration-storagetype
(default #f)) ; string
(slurmdbd-extra-content slurm-configuration-slurmdbd-extra-content
(default "")))
(define (%slurm.conf config)
"Return a slurm.conf configuration file corresponding to CONFIG."
(computed-file
"slurm_conf"
#~(begin
(use-modules (srfi srfi-26))
(call-with-output-file #$output
(lambda (port)
(display "# Generated by 'slurm-service'.\n" port)
(format port "ClusterName=~a\n"
#$(slurm-configuration-clustername config))
(for-each
(cut format port "SlurmCtldHost=~a\n" <>)
'#$(slurm-configuration-slurmctldhost config))
(format port "SlurmdSpoolDir=~a\n"
#$(slurm-configuration-slurmd-spooldir config))
(format port "SlurmdPidFile=~a\n"
#$(slurm-configuration-slurmd-pidfile config))
(if #$(slurm-configuration-slurmd-log-file config)
(format port "SlurmdLogFile=~a\n"
#$(slurm-configuration-slurmd-log-file config)))
(format port "SlurmctldPidFile=~a\n"
#$(slurm-configuration-slurmctld-pidfile config))
(if #$(slurm-configuration-slurmctld-log-file config)
(format port "SlurmctldLogFile=~a\n"
#$(slurm-configuration-slurmctld-log-file config)))
(format port "SlurmUser=~a\n"
#$(slurm-configuration-slurmuser config))
(format port "\n# Extra content here:\n~a\n"
#$(slurm-configuration-slurm-extra-content config))
#t)))))
(define (%cgroup.conf config)
"Return a cgroup.conf configuration file corresponding to CONFIG."
(computed-file
"cgroup_conf"
#~(begin
(call-with-output-file #$output
(lambda (port)
(display "# Generated by 'slurm-service'.\n" port)
(format port "~a\n"
#$(slurm-configuration-cgroup-extra-content config)))))))
(define (%slurmdbd.conf config)
"Return a slurm.conf configuration file corresponding to CONFIG."
(computed-file
"slurmdbd_conf"
#~(begin
(call-with-output-file #$output
(lambda (port)
(display "# Generated by 'slurm-service'.\n" port)
(format port "DbdHost=~a\n"
#$(slurm-configuration-dbdhost config))
(format port "StorageType=~a\n"
#$(slurm-configuration-storagetype config))
(format port "SlurmUser=~a\n"
#$(slurm-configuration-slurmuser config))
(format port "PidFile=~a\n"
#$(slurm-configuration-slurmdbd-pidfile config))
(format port "\n# Extra content here:\n~a\n"
#$(slurm-configuration-slurmdbd-extra-content config))
#t)))))
(define (slurm-activation config)
"Return the activation GEXP for CONFIG for the slurm service."
@ -207,7 +288,7 @@
(logdir (dirname (or #$(slurm-configuration-slurmd-log-file config)
#$(slurm-configuration-slurmctld-log-file config)
"/var/log/slurmd.log")))
(piddir (dirname #$(slurm-configuration-slurmd-pid-file config))))
(piddir (dirname #$(slurm-configuration-slurmd-pidfile config))))
(unless (file-exists? spooldir)
(mkdir-p spooldir))
(chown spooldir (passwd:uid %user) (passwd:gid %user))
@ -219,11 +300,15 @@
(unless (file-exists? piddir)
(mkdir-p piddir)))
;; /etc/slurm/slurm.conf needs to exist.
(file-exists? #$(slurm-configuration-slurm-conf-file config)))))
(file-exists? #$(slurm-configuration-slurm-conf-file config))
;; slurmdbd activation
(when #$(slurm-configuration-run-slurmdbd config)
(file-exists?
#$(slurm-configuration-slurmdbd-conf-file config))))))
(define slurmd-shepherd-service
(match-lambda
(($ <slurm-configuration> package slurm-conf-file slurmd-log-file slurmd-pid-file)
(($ <slurm-configuration> package slurm-conf-file _ slurmd-pidfile)
(list
(shepherd-service
(documentation "Slurmd server")
@ -231,14 +316,13 @@
(requirement '(loopback munge))
(start #~(make-forkexec-constructor
(list #$(file-append package "/sbin/slurmd")
;"-L" #$slurmd-log-file
"-f" #$slurm-conf-file)
#:pid-file #$slurmd-pid-file))
#:pid-file #$slurmd-pidfile))
(stop #~(make-kill-destructor)))))))
(define slurmctld-shepherd-service
(match-lambda
(($ <slurm-configuration> package slurm-conf-file _ _ _ run-slurmctld? slurmctld-log-file slurmctld-pid-file)
(($ <slurm-configuration> package slurm-conf-file _ _ _ run-slurmctld? _ slurmctld-pidfile)
(list
(shepherd-service
(documentation "Slurmctld server")
@ -246,22 +330,14 @@
(requirement '(loopback munge))
(start #~(make-forkexec-constructor
(list #$(file-append package "/sbin/slurmctld")
;"-L" #$slurmctld-log-file
"-f" #$slurm-conf-file)
#:pid-file #$slurmctld-pid-file))
#:pid-file #$slurmctld-pidfile))
(stop #~(make-kill-destructor))
(auto-start? run-slurmctld?))))))
(define (slurmdbd-activation config)
"Test the Slurmdbd configration exists."
(when (slurm-configuration-run-slurmdbd config)
(file-exists?
(slurm-configuration-slurmdbd-conf-file config)))
#t)
(define slurmdbd-shepherd-service
(match-lambda
(($ <slurm-configuration> package _ _ _ _ _ _ _ run-slurmdbd? slurmdbd-conf-file slurmdbd-pid-file)
(($ <slurm-configuration> package _ _ _ _ _ _ _ run-slurmdbd? _ slurmdbd-pidfile)
(list
(shepherd-service
(documentation "Slurmdbd server")
@ -269,7 +345,7 @@
(requirement '(loopback munge))
(start #~(make-forkexec-constructor
(list #$(file-append package "/sbin/slurmdbd"))
#:pid-file #$slurmdbd-pid-file))
#:pid-file #$slurmdbd-pidfile))
(stop #~(make-kill-destructor))
(auto-start? run-slurmdbd?))))))
@ -282,6 +358,14 @@
(slurmdbd-shepherd-service config)
'())))
(define (slurm-etc-service config)
(append
`(("slurm/slurm.conf" ,(%slurm.conf config))
("slurm/cgroup.conf" ,(%cgroup.conf config)))
(if (slurm-configuration-run-slurmdbd config)
`(("slurm/slurmdbd.conf" ,(%slurmdbd.conf config)))
'())))
(define slurm-service-type
(service-type
(name 'slurm)
@ -291,8 +375,8 @@
slurm-services-to-run)
(service-extension activation-service-type
slurm-activation)
(service-extension activation-service-type
slurmdbd-activation)
(service-extension etc-service-type
slurm-etc-service)
(service-extension account-service-type
(const %slurm-accounts))
(service-extension profile-service-type

Loading…
Cancel
Save