From 3bd06f793ad0a78c1374277710c930ee5f735b56 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Thu, 3 Dec 2020 22:13:25 +0200 Subject: octopus: update slurm services --- gn/deploy/octopus.scm | 131 +++++++++++++++++++++++++++--------------------- gn/services/science.scm | 106 ++++++++++++++------------------------- 2 files changed, 112 insertions(+), 125 deletions(-) (limited to 'gn') diff --git a/gn/deploy/octopus.scm b/gn/deploy/octopus.scm index 9d1e7d3..ce480e2 100644 --- a/gn/deploy/octopus.scm +++ b/gn/deploy/octopus.scm @@ -2,42 +2,53 @@ (use-modules (gnu) (gn services science) - (gn packages parallel)) + (gn packages parallel) ; for slurm-18.08 + (srfi srfi-26)) (use-service-modules networking ssh web) -(use-package-modules certs tmux screen vim) +(use-package-modules shells) + +(define %efraimf-ssh-pubkey + (plain-file "id_rsa.pub" + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDUCDY8ZKFF/ln0yzDt3CNmKz3cT4wzNv9bzCKvOBXcL0O7JtPWwqgLlZgmMHfzhzgReAkHcrt+Gdsyduzm/s9Y8c6QpyfaH6uoDwjfoOs6GrAjZaOXmAdncf+9HZEAy/IrygQ1YFRu6BvYogsdhhtN+O6IXBuvQQDRzldHs53Y53DK06Nrs19vAPwELXcDxcx1FvO+/L9nT8RHkI1Z0ucgTS+F/BWXl8+mh89r4j+4IRpZXOuCD0DrW5rgEE1EygF2dVdWZQESi23gU5Mt6vnmysXzwixB7j6I+xTih8LH4pz7hewEx6754e/cs9Gm7ZtfXKfXUt6+GtsBSBF3ULKl efraimf@octopus01")) (define %slurm.conf (plain-file "slurm.conf" (string-append -"ClusterName=linux -ControlMachine=octopus - -SlurmUser=slurm -#SlurmdUser=root -SlurmctldPort=6817 -SlurmdPort=6818 -AuthType=auth/munge -StateSaveLocation=/var/spool/slurmd/ctld -SlurmdSpoolDir=/var/spool/slurmd -SwitchType=switch/none -MpiDefault=none -SlurmctldPidFile=/var/run/slurm/slurmctld.pid -SlurmdPidFile=/var/run/slurm/slurmd.pid -ProctrackType=proctrack/pgid -ReturnToService=1 - +"# Defaults are commented out, otherwise noted at the end of the line +# Values are from example in the man page or from Debian +ClusterName=linux # no default, suggests lowercase +#ControlMachine=octopus # defunct, use SlurmctldHost +SlurmctldHost=octopus # no default, falls back to next SlurmctldHost in list + +SlurmUser=slurm # default root, not recommended +#SlurmctldPort=6817 +#SlurmdPort=6818 +#AuthType=auth/munge +StateSaveLocation=/var/spool/slurmd/ctld # default /var/spool +#SlurmdSpoolDir=/var/spool/slurmd +#SwitchType=switch/none +#MpiDefault=none +#SlurmctldPidFile=/var/run/slurmctld.pid +#SlurmdPidFile=/var/run/slurmd.pid +ProctrackType=proctrack/pgid # default proctrack/cgroup +ReturnToService=1 # default 0 + +DebugFlags=NO_CONF_HASH # default empty # TIMERS -InactiveLimit=0 -MinJobAge=300 -KillWait=30 +SlurmctldTimeout=300 # default 120 +#SlurmdTimeout=300 +#InactiveLimit=0 +#MinJobAge=300 +#KillWait=30 +#WaitTime=0 # # LOGGING -SlurmctldDebug=3 -SlurmctldLogFile=/var/log/slurm/slurmctld.log -SlurmdDebug=3 -SlurmdLogFile=/var/log/slurm/slurmd.log -JobCompType=jobcomp/none +#SlurmctldDebug=3 +SlurmctldLogFile=/var/log/slurmctld.log # default none, syslog +#SlurmdDebug=3 +SlurmdLogFile=/var/log/slurmd.log # default none, syslog +#JobCompType=jobcomp/none # COMPUTE NODES NodeName=octopus CPUs=1 Boards=1 SocketsPerBoard=1 CoresPerSocket=1 ThreadsPerCore=1 RealMemory=1024 @@ -49,22 +60,22 @@ PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP"))) "### # Slurm cgroup support configuration file ### -CgroupAutomount=yes -ConstrainCores=yes +CgroupAutomount=yes # default no +ConstrainCores=yes # default no #"))) (define %slurmdbd.conf (plain-file "slurmdbd.conf" (string-append -"AuthType=auth/munge -AuthInfo=/var/run/munge/munge.socket.2 -DbdHost=localhost -StorageHost=localhost -StorageType=accounting_storage/none -StorageUser=slurm -PidFile=/var/run/slurm/slurmdbd.pid -LogFile=/var/log/slurm/slurmdbd.log -SlurmUser=slurm"))) +"#AuthType=auth/munge +#AuthInfo=/var/run/munge/munge.socket.2 +DbdHost=localhost # must be specified +StorageHost=localhost # unclear, must be specified? +StorageType=accounting_storage/none # must be specified +StorageUser=slurm # unclear +#PidFile=/var/run/slurmdbd.pid +LogFile=/var/log/slurmdbd.log # default none, syslog +SlurmUser=slurm # default root, not recommended"))) (operating-system (host-name "octopus") @@ -105,6 +116,12 @@ SlurmUser=slurm"))) (firmware '()) (users (cons* + (user-account + (name "wrk") + (comment "Pjotr Prins") + (uid 502) + (group "users") + (supplementary-groups '("wheel" "kvm"))) (user-account (name "efraimf") (comment "Efraim Flashner") @@ -112,19 +129,25 @@ SlurmUser=slurm"))) (group "users") (supplementary-groups '("wheel" "kvm"))) (user-account - (name "wrk") - (comment "Pjotr Prins") - (uid 502) + (name "erikg") + (comment "Erik Garrison") + (uid 1001) (group "users") - (supplementary-groups '("wheel" "kvm"))) + (shell (file-append zsh "/bin/zsh"))) + (user-account + (name "hchen") + (comment "Hao Chen") + (uid 1002) + (group "users")) %base-user-accounts)) - (packages (cons* - nss-certs - screen - tmux - vim + (packages (append + (map (cut specification->package <>) + '("nss-certs" + "screen" "tmux" + "vim" + "htop")) %base-packages)) (services @@ -140,19 +163,15 @@ SlurmUser=slurm"))) (service openssh-service-type (openssh-configuration (authorized-keys - `(("efraimf" ,(local-file "/home/efraimf/.ssh/id_rsa.pub")))))) + `(("efraimf" ,%efraimf-ssh-pubkey))))) (service munge-service-type) - (service slurmd-service-type - (slurm-configuration - (package slurm-18.08))) - (service slurmdbd-service-type - (slurm-configuration - (package slurm-18.08) - (run-slurmdbd? #t))) - (service slurmctld-service-type + (service slurm-service-type (slurm-configuration (package slurm-18.08) + (slurmd-log-file "/var/log/slurmd.log") + (slurmctld-log-file "/var/log/slurmctld.log") + (run-slurmdbd? #t) (run-slurmctld? #t))) ;; Some slurm configuration files diff --git a/gn/services/science.scm b/gn/services/science.scm index d1f3190..f0f43d0 100644 --- a/gn/services/science.scm +++ b/gn/services/science.scm @@ -5,9 +5,7 @@ slurm-configuration slurm-configuration? - slurmd-service-type - slurmdbd-service-type - slurmctld-service-type)) + slurm-service-type)) (use-modules (gnu) (guix records) @@ -177,9 +175,9 @@ (slurm-conf-file slurm-configuration-slurm-conf-file (default "/etc/slurm/slurm.conf")) (slurmd-log-file slurm-configuration-slurmd-log-file - (default "/var/log/slurm/slurmd.log")) + (default #f)) ; #f for syslog (slurmd-pid-file slurm-configuration-slurmd-pid-file - (default "/var/run/slurm/slurmd.pid")) + (default "/var/run/slurmd.pid")) (slurmd-spooldir slurm-configuration-slurmd-spooldir (default "/var/spool/slurmd")) @@ -187,16 +185,16 @@ (run-slurmctld? slurm-configuration-run-slurmctld (default #f)) (slurmctld-log-file slurm-configuration-slurmctld-log-file - (default "/var/log/slurm/slurmctld.log")) + (default #f)) ; #f for syslog (slurmctld-pid-file slurm-configuration-slurmctld-pid-file - (default "/var/run/slurm/slurmctld.pid")) + (default "/var/run/slurmctld.pid")) (run-slurmdbd? slurm-configuration-run-slurmdbd (default #f)) (slurmdbd-conf-file slurm-configuration-slurmdbd-conf-file (default "/etc/slurm/slurmdbd.conf")) (slurmdbd-pid-file slurm-configuration-slurmdbd-pid-file - (default "/var/run/slurm/slurmdbd.pid"))) + (default "/var/run/slurmdbd.pid"))) (define (slurm-activation config) @@ -204,16 +202,22 @@ (with-imported-modules '((guix build utils)) #~(begin (use-modules (guix build utils)) - (define %user (getpw "slurm")) - (let ((homedir (passwd:dir %user)) - (spooldir #$(slurm-configuration-slurmd-spooldir config)) - (logdir (dirname #$(slurm-configuration-slurmd-log-file config))) - (piddir (dirname #$(slurm-configuration-slurmd-pid-file config)))) - (for-each (lambda (dir) - (unless (file-exists? dir) - (mkdir-p dir)) - (chown dir (passwd:uid %user) (passwd:gid %user))) - (list homedir spooldir piddir logdir))) + (let* ((%user (getpw "slurm")) + (spooldir #$(slurm-configuration-slurmd-spooldir config)) + (logdir (dirname (or #$(slurm-configuration-slurmd-log-file config) + #$(slurm-configuration-slurmctld-log-file config) + "/var/log/slurmd.log"))) + (piddir (dirname #$(slurm-configuration-slurmd-pid-file config)))) + (unless (file-exists? spooldir) + (mkdir-p spooldir)) + (chown spooldir (passwd:uid %user) (passwd:gid %user)) + (when logdir + (unless (file-exists? logdir) + (mkdir-p logdir)) + (when (> (string-length logdir) (string-length "/var/log")) + (chown logdir (passwd:uid %user) (passwd:gid %user)))) + (unless (file-exists? piddir) + (mkdir-p piddir))) ;; /etc/slurm/slurm.conf needs to exist. (file-exists? #$(slurm-configuration-slurm-conf-file config))))) @@ -227,7 +231,7 @@ (requirement '(loopback munge)) (start #~(make-forkexec-constructor (list #$(file-append package "/sbin/slurmd") - "-L" #$slurmd-log-file + ;"-L" #$slurmd-log-file "-f" #$slurm-conf-file) #:pid-file #$slurmd-pid-file)) (stop #~(make-kill-destructor))))))) @@ -242,7 +246,7 @@ (requirement '(loopback munge)) (start #~(make-forkexec-constructor (list #$(file-append package "/sbin/slurmctld") - "-L" #$slurmctld-log-file + ;"-L" #$slurmctld-log-file "-f" #$slurm-conf-file) #:pid-file #$slurmctld-pid-file)) (stop #~(make-kill-destructor)) @@ -250,8 +254,10 @@ (define (slurmdbd-activation config) "Test the Slurmdbd configration exists." - (file-exists? - (slurm-configuration-slurmdbd-conf-file config))) + (when (slurm-configuration-run-slurmdbd config) + (file-exists? + (slurm-configuration-slurmdbd-conf-file config))) + #t) (define slurmdbd-shepherd-service (match-lambda @@ -269,67 +275,29 @@ (define (slurm-services-to-run config) (append (slurmd-shepherd-service config) - (if (slurm-configuration-run-slurmctld? config) + (if (slurm-configuration-run-slurmctld config) (slurmctld-shepherd-service config) '()) - (if (slurm-configuration-run-slurmdbd? config) + (if (slurm-configuration-run-slurmdbd config) (slurmdbd-shepherd-service config) '()))) -(define (slurm-activations-to-run config) - (append (slurm-activation config) - (if (slurm-configuration-run-slurmctld? config) - (slurmctld-activation config) - '()) - (if (slurm-configuration-run-slurmdbd? config) - (slurmdbd-activation config) - '()))) - -(define slurmd-service-type +(define slurm-service-type (service-type - (name 'slurmd) + (name 'slurm) (extensions (list (service-extension shepherd-root-service-type - ;(cons slurmd-shepherd-service - ; slurmdbd-shepherd-service)) - slurmd-shepherd-service) - ;slurm-services-to-run) + slurm-services-to-run) (service-extension activation-service-type - ;(append slurm-activation - ; slurmdbd-activation)) slurm-activation) - ;slurm-activations-to-run) + (service-extension activation-service-type + slurmdbd-activation) (service-extension account-service-type (const %slurm-accounts)) (service-extension profile-service-type (compose list slurm-configuration-package)))) (default-value (slurm-configuration)) (description - "Run @url{https://slurm.schedmd.com/slurm.html,Slurm}, a workflow manager service."))) - -(define slurmdbd-service-type - (service-type - (name 'slurmdbd) - (extensions - (list - (service-extension shepherd-root-service-type - slurmdbd-shepherd-service) - (service-extension activation-service-type - slurmdbd-activation))) - (default-value (slurm-configuration)) - (description - ;; TODO: Fix for slurmdbd or integrate with slurm(d). - "Run @url{https://slurm.schedmd.com/slurm.html,Slurm}, a workflow manager service."))) - -(define slurmctld-service-type - (service-type - (name 'slurmctld) - (extensions - (list - (service-extension shepherd-root-service-type - slurmctld-shepherd-service))) - (default-value (slurm-configuration)) - (description - ;; TODO: Fix for slurmctld or integrate with slurm(d). - "Run @url{https://slurm.schedmd.com/slurm.html,Slurm}, a workflow manager service."))) + "Run @url{https://slurm.schedmd.com/slurm.html,Slurm}, a workflow manager +service. Optionally also run @code{slurmctld} and @code{slurmdbd}."))) -- cgit v1.2.3