From 1d456b37cabefe6caa94cab22e952622c0d26c1f Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Tue, 1 Dec 2020 21:15:21 +0200 Subject: octopus: add slurm services --- gn/deploy/octopus.scm | 98 +++++++++++++++-- gn/services/science.scm | 274 ++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 322 insertions(+), 50 deletions(-) diff --git a/gn/deploy/octopus.scm b/gn/deploy/octopus.scm index f707462..9d1e7d3 100644 --- a/gn/deploy/octopus.scm +++ b/gn/deploy/octopus.scm @@ -2,10 +2,70 @@ (use-modules (gnu) (gn services science) - ) -(use-service-modules ssh web) + (gn packages parallel)) +(use-service-modules networking ssh web) (use-package-modules certs tmux screen vim) +(define %slurm.conf + (plain-file "slurm.conf" + (string-append +"ClusterName=linux +ControlMachine=octopus + +SlurmUser=slurm +#SlurmdUser=root +SlurmctldPort=6817 +SlurmdPort=6818 +AuthType=auth/munge +StateSaveLocation=/var/spool/slurmd/ctld +SlurmdSpoolDir=/var/spool/slurmd +SwitchType=switch/none +MpiDefault=none +SlurmctldPidFile=/var/run/slurm/slurmctld.pid +SlurmdPidFile=/var/run/slurm/slurmd.pid +ProctrackType=proctrack/pgid +ReturnToService=1 + +# TIMERS +InactiveLimit=0 +MinJobAge=300 +KillWait=30 +# + +# LOGGING +SlurmctldDebug=3 +SlurmctldLogFile=/var/log/slurm/slurmctld.log +SlurmdDebug=3 +SlurmdLogFile=/var/log/slurm/slurmd.log +JobCompType=jobcomp/none + +# COMPUTE NODES +NodeName=octopus CPUs=1 Boards=1 SocketsPerBoard=1 CoresPerSocket=1 ThreadsPerCore=1 RealMemory=1024 +PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP"))) + +(define %cgroup.conf + (plain-file "cgroup.conf" + (string-append +"### +# Slurm cgroup support configuration file +### +CgroupAutomount=yes +ConstrainCores=yes +#"))) + +(define %slurmdbd.conf + (plain-file "slurmdbd.conf" + (string-append +"AuthType=auth/munge +AuthInfo=/var/run/munge/munge.socket.2 +DbdHost=localhost +StorageHost=localhost +StorageType=accounting_storage/none +StorageUser=slurm +PidFile=/var/run/slurm/slurmdbd.pid +LogFile=/var/log/slurm/slurmdbd.log +SlurmUser=slurm"))) + (operating-system (host-name "octopus") (timezone "Etc/UTC") @@ -34,7 +94,9 @@ ;(file-system ; (device "octopus01:/home") ; (mount-point "/home") - ; (type "nfs")) + ; (type "nfs") + ; (mount? #f) ; is this necessary? + ; (check? #f)) ) %base-file-systems)) @@ -48,13 +110,13 @@ (comment "Efraim Flashner") (uid 1000) (group "users") - (supplementary-groups '("wheel"))) + (supplementary-groups '("wheel" "kvm"))) (user-account (name "wrk") (comment "Pjotr Prins") (uid 502) (group "users") - (supplementary-groups '("wheel"))) + (supplementary-groups '("wheel" "kvm"))) %base-user-accounts)) @@ -75,8 +137,30 @@ ; (term "vt100") ; (tty "ttyS0"))) - (service openssh-service-type) + (service openssh-service-type + (openssh-configuration + (authorized-keys + `(("efraimf" ,(local-file "/home/efraimf/.ssh/id_rsa.pub")))))) (service munge-service-type) - ) + (service slurmd-service-type + (slurm-configuration + (package slurm-18.08))) + (service slurmdbd-service-type + (slurm-configuration + (package slurm-18.08) + (run-slurmdbd? #t))) + (service slurmctld-service-type + (slurm-configuration + (package slurm-18.08) + (run-slurmctld? #t))) + + ;; Some slurm configuration files + (simple-service 'slurm-conf etc-service-type + `(("slurm/slurm.conf" ,%slurm.conf) + ("slurm/cgroup.conf" ,%cgroup.conf) + ("slurm/slurmdbd.conf" ,%slurmdbd.conf))) + + (service dhcp-client-service-type) + (service openntpd-service-type)) %base-services))) diff --git a/gn/services/science.scm b/gn/services/science.scm index 4b34882..d1f3190 100644 --- a/gn/services/science.scm +++ b/gn/services/science.scm @@ -1,7 +1,13 @@ (define-module (gn services science) #:export (munge-configuration munge-configuration? - munge-service-type)) + munge-service-type + + slurm-configuration + slurm-configuration? + slurmd-service-type + slurmdbd-service-type + slurmctld-service-type)) (use-modules (gnu) (guix records) @@ -9,6 +15,7 @@ (use-service-modules shepherd) (use-package-modules admin parallel) +;; TODO: Make id/uid configurable (define %munge-accounts (list (user-group (name "munge") @@ -23,19 +30,6 @@ (home-directory "/var/lib/munge") (shell (file-append shadow "/sbin/nologin"))))) -(define %slurm-accounts - (list (user-group - (name "slurm") - (id 901) - (system? #t)) - (user-account - (name "slurm") - (group "slurm") - (uid 901) - (system? #t) - (comment "Slurm User") - (home-directory "/var/lib/slurm")))) - (define-record-type* munge-configuration make-munge-configuration @@ -51,20 +45,10 @@ (key munge-configuration-key (default "/etc/munge/munge.key"))) -(define-record-type* - slurm-configuration - make-slurm-configuration - slurm-configuration? - (package slurm-configuration-package - (default slurm))) - (define (munge-activation config) "Return the activation GEXP for CONFIG for the munge service." (with-imported-modules '((guix build utils)) #~(begin - (use-modules (guix build utils) - (rnrs bytevectors) - (rnrs io ports)) (define %user (getpw "munge")) (let* ((homedir (passwd:dir %user)) (key #$(munge-configuration-key config)) @@ -78,29 +62,17 @@ (chmod dir #o700)) (list homedir etc-dir log-dir)) (unless (file-exists? key) - ;; Borrowed from /dev/urandom in (gnu services base) - (call-with-input-file "/dev/urandom" - (lambda (urandom) - (let ((buf (make-bytevector 1024))) - (get-bytevector-n! urandom buf 0 1024) - (call-with-output-file key - (lambda (seed) - (put-bytevector seed buf))))))) + (invoke #$(file-append (munge-configuration-package config) + "/sbin/mungekey") + "--create" + (string-append "--bits=" (number->string (* 8 1024))) ; bits, not bytes + (string-append "--keyfile=" key))) (chown key (passwd:uid %user) (passwd:gid %user)) (chmod key #o400) (unless (file-exists? run-dir) (mkdir-p run-dir)) (chown run-dir (passwd:uid %user) (passwd:gid %user)))))) -(define (slurm-activation config) - "Return the activation GEXP for CONFIG for the slurm service." - (with-imported-modules '((guix build utils)) - #~(begin - (use-modules (guix build utils)) - (unless (file-exists? "/var/lib/slurm") - (mkdir-p "/var/lib/slurm")) - (chown "/var/lib/slurm" (passwd:uid "slurm") (passwd:gid "slurm"))))) - (define munge-shepherd-service (match-lambda (($ package socket pid-file log-file key) @@ -108,7 +80,7 @@ (shepherd-service (documentation "Munge server") (provision '(munge)) - (requirement '(loopback user-processes file-systems)) + (requirement '(loopback user-processes)) (start #~(make-forkexec-constructor (list #$(file-append package "/sbin/munged") "--foreground" ; "--force" @@ -144,4 +116,220 @@ (compose list munge-configuration-package)))) (default-value (munge-configuration)) (description - "Run a munge service."))) + "Run @url{https://dun.github.io/munge/,Munge}, an authentication service."))) + +;; Initial documentation for upstreaming: +;@subsubheading Munge +; +;The following example describes a Munge service with the default configuration. +; +;@lisp +;(service munge-service-type) +;@end lisp +; +;@deftp {Data Type} munge-configuration +;Data type representing the configuration for the @code{munge-service-type}. +; +;@table @asis +;@item @code{package} +;Munge package to use for the service. +; +;@item @code{socket} (default "/var/run/munge/munge.socket.2") +;The socket Munge should use. +; +;@item @code{pid-file} (default "/var/run/munge/munged.pid") +;The PID file which Munge should use. +; +;@item @code{log-file} (default "/var/log/munge/munged.log") +;The location of the log file Munge should write to. +; +;@item @code{key} (default "/etc/munge/munge.key") +;The location of the shared key Munge should use. Since this a shared secret key between the different nodes it should not be added to the store. +; +;@end table +;@end deftp + + +;; TODO: Make id/uid configurable +(define %slurm-accounts + (list (user-group + (name "slurm") + (id 901) + (system? #t)) + (user-account + (name "slurm") + (group "slurm") + (uid 901) + (system? #t) + (comment "Slurm User") + (home-directory "/var/lib/slurm")))) + +(define-record-type* + slurm-configuration + make-slurm-configuration + slurm-configuration? + ;; As I understand it, all the services depend on also running slurmd on + ;; that machine. Therefore it makes sense to have one config section with + ;; "common" and "extended" options. With all the possible options and + ;; versions we only cover the ones which affect the services. + (package slurm-configuration-package + (default slurm)) + (slurm-conf-file slurm-configuration-slurm-conf-file + (default "/etc/slurm/slurm.conf")) + (slurmd-log-file slurm-configuration-slurmd-log-file + (default "/var/log/slurm/slurmd.log")) + (slurmd-pid-file slurm-configuration-slurmd-pid-file + (default "/var/run/slurm/slurmd.pid")) + + (slurmd-spooldir slurm-configuration-slurmd-spooldir + (default "/var/spool/slurmd")) + + (run-slurmctld? slurm-configuration-run-slurmctld + (default #f)) + (slurmctld-log-file slurm-configuration-slurmctld-log-file + (default "/var/log/slurm/slurmctld.log")) + (slurmctld-pid-file slurm-configuration-slurmctld-pid-file + (default "/var/run/slurm/slurmctld.pid")) + + (run-slurmdbd? slurm-configuration-run-slurmdbd + (default #f)) + (slurmdbd-conf-file slurm-configuration-slurmdbd-conf-file + (default "/etc/slurm/slurmdbd.conf")) + (slurmdbd-pid-file slurm-configuration-slurmdbd-pid-file + (default "/var/run/slurm/slurmdbd.pid"))) + + +(define (slurm-activation config) + "Return the activation GEXP for CONFIG for the slurm service." + (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (guix build utils)) + (define %user (getpw "slurm")) + (let ((homedir (passwd:dir %user)) + (spooldir #$(slurm-configuration-slurmd-spooldir config)) + (logdir (dirname #$(slurm-configuration-slurmd-log-file config))) + (piddir (dirname #$(slurm-configuration-slurmd-pid-file config)))) + (for-each (lambda (dir) + (unless (file-exists? dir) + (mkdir-p dir)) + (chown dir (passwd:uid %user) (passwd:gid %user))) + (list homedir spooldir piddir logdir))) + ;; /etc/slurm/slurm.conf needs to exist. + (file-exists? #$(slurm-configuration-slurm-conf-file config))))) + +(define slurmd-shepherd-service + (match-lambda + (($ package slurm-conf-file slurmd-log-file slurmd-pid-file) + (list + (shepherd-service + (documentation "Slurmd server") + (provision '(slurmd)) + (requirement '(loopback munge)) + (start #~(make-forkexec-constructor + (list #$(file-append package "/sbin/slurmd") + "-L" #$slurmd-log-file + "-f" #$slurm-conf-file) + #:pid-file #$slurmd-pid-file)) + (stop #~(make-kill-destructor))))))) + +(define slurmctld-shepherd-service + (match-lambda + (($ package slurm-conf-file _ _ _ run-slurmctld? slurmctld-log-file slurmctld-pid-file) + (list + (shepherd-service + (documentation "Slurmctld server") + (provision '(slurmctld)) + (requirement '(loopback munge)) + (start #~(make-forkexec-constructor + (list #$(file-append package "/sbin/slurmctld") + "-L" #$slurmctld-log-file + "-f" #$slurm-conf-file) + #:pid-file #$slurmctld-pid-file)) + (stop #~(make-kill-destructor)) + (auto-start? run-slurmctld?)))))) + +(define (slurmdbd-activation config) + "Test the Slurmdbd configration exists." + (file-exists? + (slurm-configuration-slurmdbd-conf-file config))) + +(define slurmdbd-shepherd-service + (match-lambda + (($ package _ _ _ _ _ _ _ run-slurmdbd? slurmdbd-conf-file slurmdbd-pid-file) + (list + (shepherd-service + (documentation "Slurmdbd server") + (provision '(slurmdbd)) + (requirement '(loopback munge)) + (start #~(make-forkexec-constructor + (list #$(file-append package "/sbin/slurmdbd")) + #:pid-file #$slurmdbd-pid-file)) + (stop #~(make-kill-destructor)) + (auto-start? run-slurmdbd?)))))) + +(define (slurm-services-to-run config) + (append (slurmd-shepherd-service config) + (if (slurm-configuration-run-slurmctld? config) + (slurmctld-shepherd-service config) + '()) + (if (slurm-configuration-run-slurmdbd? config) + (slurmdbd-shepherd-service config) + '()))) + +(define (slurm-activations-to-run config) + (append (slurm-activation config) + (if (slurm-configuration-run-slurmctld? config) + (slurmctld-activation config) + '()) + (if (slurm-configuration-run-slurmdbd? config) + (slurmdbd-activation config) + '()))) + +(define slurmd-service-type + (service-type + (name 'slurmd) + (extensions + (list + (service-extension shepherd-root-service-type + ;(cons slurmd-shepherd-service + ; slurmdbd-shepherd-service)) + slurmd-shepherd-service) + ;slurm-services-to-run) + (service-extension activation-service-type + ;(append slurm-activation + ; slurmdbd-activation)) + slurm-activation) + ;slurm-activations-to-run) + (service-extension account-service-type + (const %slurm-accounts)) + (service-extension profile-service-type + (compose list slurm-configuration-package)))) + (default-value (slurm-configuration)) + (description + "Run @url{https://slurm.schedmd.com/slurm.html,Slurm}, a workflow manager service."))) + +(define slurmdbd-service-type + (service-type + (name 'slurmdbd) + (extensions + (list + (service-extension shepherd-root-service-type + slurmdbd-shepherd-service) + (service-extension activation-service-type + slurmdbd-activation))) + (default-value (slurm-configuration)) + (description + ;; TODO: Fix for slurmdbd or integrate with slurm(d). + "Run @url{https://slurm.schedmd.com/slurm.html,Slurm}, a workflow manager service."))) + +(define slurmctld-service-type + (service-type + (name 'slurmctld) + (extensions + (list + (service-extension shepherd-root-service-type + slurmctld-shepherd-service))) + (default-value (slurm-configuration)) + (description + ;; TODO: Fix for slurmctld or integrate with slurm(d). + "Run @url{https://slurm.schedmd.com/slurm.html,Slurm}, a workflow manager service."))) -- cgit v1.2.3