From 2df25a354555437cfe149f4293adb0ad8cbc5160 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Thu, 27 May 2021 02:27:29 -0500 Subject: gn: genecup testing --- gn/packages/ratspub.scm | 142 +++++++++++++++++++++++++++++++++++++- gn/services/genecup-container.scm | 129 ++++++++++++++++++++++++++++++++++ 2 files changed, 270 insertions(+), 1 deletion(-) create mode 100644 gn/services/genecup-container.scm diff --git a/gn/packages/ratspub.scm b/gn/packages/ratspub.scm index 85c73c7..1c00139 100644 --- a/gn/packages/ratspub.scm +++ b/gn/packages/ratspub.scm @@ -11,6 +11,7 @@ #:use-module (gnu packages machine-learning) #:use-module (gnu packages python) #:use-module (gnu packages python-crypto) + #:use-module (gnu packages python-science) #:use-module (gnu packages python-web) #:use-module (gnu packages python-xyz) #:use-module (gn packages javascript) @@ -139,7 +140,7 @@ ("jquery" ,web-jquery) ("js-filesaver" ,js-filesaver-1.3.2) ("js-popper" ,js-popper-1.12.9))) - (home-page "http://rats.pub/") + (home-page "https://rats.pub/") (synopsis "Relationship with Addiction Through Searches of PubMed") (description "RatsPub is a tool to efficiently and comprehensively answer the question @@ -266,3 +267,142 @@ if __name__ == '__main__': (synopsis "") (description "") (license license:expat))) + +(define-public genecup + (package + (name "genecup") + (version "1.0") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/hakangunturkun/GeneCup") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + ;; Change the port for running the service. + (modules '((guix build utils))) + (snippet + '(begin (substitute* "server.py" + (("4200") "4204")) + #t)) + (sha256 + (base32 "0ddgqjiacr0f33x0f9s10v3rqr3mmr92jwniprk22a167ncvfgx3")))) + (build-system python-build-system) + (arguments + `(#:tests? #f ; no test suite + #:phases + (modify-phases %standard-phases + (delete 'configure) + (delete 'build) + (add-after 'unpack 'make-files-writable + (lambda _ + (for-each make-file-writable (find-files ".")))) + (add-after 'unpack 'patch-datadir + (lambda _ + (substitute* "server.py" + (("^datadir.*") "datadir = \"/export/ratspub/\"\n")) + #t)) + (add-after 'unpack 'patch-sources + (lambda* (#:key inputs outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out")) + (inetutils (assoc-ref inputs "inetutils"))) + (substitute* '("templates/cytoscape.html" + "templates/tableview.html" + "templates/tableview0.html" + "templates/userarchive.html") + (("https.*FileSaver.js.*\\\">") "/static/FileSaver.js\">") + (("https.*cytoscape-svg.js.*\\\">") "/static/cytoscape-svg.js\">") + (("https.*cytoscape.min.js.*\\\">") "/static/cytoscape.min.js\">")) + (substitute* "templates/layout.html" + (("https.*bootstrap.min.css.*\\\">") "/static/bootstrap.min.css\">") + (("https.*4.*bootstrap.min.js.*\\\">") "/static/bootstrap.min.js\">") + (("https.*4.7.0/css/font-awesome.min.css") "/static/font-awesome.min.css") + (("https.*jquery-3.2.1.slim.min.js.*\\\">") "/static/jquery.slim.min.js\">") + (("https.*1.12.9/umd/popper.min.js.*\\\">") "/static/popper.min.js\">")) + (substitute* "ratspub.py" + (("hostname") (string-append inetutils "/bin/hostname")))) + #t)) + (replace 'install + (lambda* (#:key outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (copy-recursively "." out)) + #t)) + (add-after 'install 'install-javascript + (lambda* (#:key inputs outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out")) + (awesome (assoc-ref inputs "font-awesome")) + (bootstrap (assoc-ref inputs "bootstrap")) + (cytoscape (assoc-ref inputs "cytoscape")) + (cytoscape-svg (assoc-ref inputs "cytoscape-svg")) + (jquery (assoc-ref inputs "jquery")) + (js-filesaver (assoc-ref inputs "js-filesaver")) + (js-popper (assoc-ref inputs "js-popper"))) + (symlink (string-append awesome + "/share/web/font-awesomecss/font-awesome.min.css") + (string-append out "/static/font-awesome.min.css")) + (symlink (string-append bootstrap + "/share/web/bootstrap/css/bootstrap.min.css") + (string-append out "/static/bootstrap.min.css")) + (symlink (string-append bootstrap + "/share/web/bootstrap/js/bootstrap.min.js") + (string-append out "/static/bootstrap.min.js")) + (symlink (string-append cytoscape + "/share/genenetwork2/javascript/cytoscape/cytoscape.min.js") + (string-append out "/static/cytoscape.min.js")) + (symlink (string-append cytoscape-svg + "/share/javascript/cytoscape-svg.js") + (string-append out "/static/cytoscape-svg.js")) + (symlink (string-append jquery + "/share/web/jquery/jquery.slim.min.js") + (string-append out "/static/jquery.slim.min.js")) + (symlink (string-append js-filesaver + "/share/javascript/FileSaver.js") + (string-append out "/static/FileSaver.js")) + (symlink (string-append js-popper + "/share/javascript/popper.min.js") + (string-append out "/static/popper.min.js"))) + #t)) + (add-after 'install 'wrap-executable + (lambda* (#:key inputs outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out")) + (path (getenv "PYTHONPATH"))) + (wrap-program (string-append out "/server.py") + `("PATH" ":" prefix (,(dirname (which "edirect.pl")) + ,(dirname (which "dirname")) + ,(dirname (which "grep")) + ,(dirname (which "sed")))) + `("PYTHONPATH" ":" prefix (,path)))) + #t))))) + (inputs + `(("edirect" ,edirect) + ("inetutils" ,inetutils) + ("python-bcrypt" ,python-bcrypt) + ("python-flask-sqlalchemy" ,python-flask-sqlalchemy) + ("python-keras" ,python-keras-for-ratspub) + ("python-nltk" ,python-nltk) + ("python-pandas" ,python-pandas) + ("python-regex" ,python-regex) + ("tensorflow" ,tensorflow))) + (native-inputs + `(("bootstrap" ,web-bootstrap) + ("cytoscape" ,javascript-cytoscape-3.17) + ;("cytoscape-svg" ,js-cytoscape-svg-0.3.1) ; TODO + ("cytoscape-svg" ,js-cytoscape-svg-vendor-0.3.1) + ("font-awesome" ,web-font-awesome) + ("jquery" ,web-jquery) + ("js-filesaver" ,js-filesaver-1.3.2) + ("js-popper" ,js-popper-1.12.9))) + (home-page "http://genecut.org") + (synopsis "Using PubMed to find out how a gene contributes to addiction") + (description "GeneCup automatically extracts information from PubMed and +@url{https://www.ebi.ac.uk/gwas/, @acronym{NHGRI-EBI GWAS, European +Bioinformatics Institute Genome-Wide Association Studies}} catalog on the +relationship of any gene with a custom list of keywords hierarchically organized +into an ontology. The users create an ontology by identifying categories of +concepts and a list of keywords for each concept.") + (license license:expat))) + +(define-public genecup-with-tensorflow-native + (package + (inherit + (tensowflow-native-instead-of-tensorflow genecup)) + (name "genecup-with-tensorflow-native"))) diff --git a/gn/services/genecup-container.scm b/gn/services/genecup-container.scm new file mode 100644 index 0000000..eae3f9c --- /dev/null +++ b/gn/services/genecup-container.scm @@ -0,0 +1,129 @@ +(define-module (gn services genecup-container)) + +(use-modules (gnu) + (gn packages ratspub) + (guix download) + (guix modules) + (guix packages) + (guix records) + (ice-9 match)) +(use-service-modules shepherd) +(use-package-modules certs compression) + +(define-record-type* + genecup-configuration + make-genecup-configuration + genecup-configuration? + (package genecup-configuration-package ; package + (default genecup))) + +(define %punkt.zip + (origin + (method url-fetch) + (uri "https://github.com/nltk/nltk_data/raw/b63a469d2f83a3cc9a2efcfe36915839d4e11d42/packages/tokenizers/punkt.zip") + (sha256 + (base32 "0i01c5qzn1p8dxyrpx4hry2n6x6b8rgcq1sck091n0jp036f6x4s")))) + +(define genecup-activation + (match-lambda + (($ package) + #~(begin + (let ((nltk_data "/var/cache/nltk_data/tokenizers") + (data_dir "/export/ratspub")) + (unless (file-exists? "/export2/PubMed") + (mkdir-p "/export2/PubMed")) + (unless (file-exists? nltk_data) + (begin + ;; The correct way would be to use python-nltk to download the data + ;; python3 -m nltk.downloader -d /var/cache/nltk_data punkt + (mkdir-p nltk_data) + (chdir nltk_data) + (invoke #$(file-append unzip "/bin/unzip") "-q" #$%punkt.zip))) + (unless (file-exists? (string-append data_dir "/userspub.sqlite")) + (begin + (install-file #$(file-append package "/userspub.sqlite") data_dir) + (chmod (string-append data_dir "/userspub.sqlite") #o554)))))))) + +(define genecup-shepherd-service + (match-lambda + (($ package) + (with-imported-modules (source-module-closure + '((gnu build shepherd) + (gnu system file-systems))) + (list (shepherd-service + (provision '(genecup)) + (requirement '(networking)) + (modules '((gnu build shepherd) + (gnu system file-systems))) + (start #~(make-forkexec-constructor/container + (list #$(file-append package "/server.py")) + ;; Needs to run from the directory it is located in. + #:directory #$package + #:log-file "/var/log/genecup.log" + ;; We don't need to set TMPDIR because we're inside a container. + #:environment-variables + '("EDIRECT_PUBMED_MASTER=/export2/PubMed" + "NLTK_DATA=/var/cache/nltk_data" + "PERL_LWP_SSL_CA_FILE=/etc/ssl/certs/ca-certificates.crt") + #:mappings (list (file-system-mapping + (source "/export2/PubMed") + (target source) + (writable? #t)) + (file-system-mapping + (source "/export/ratspub") + (target source) + (writable? #t)) + (file-system-mapping + (source "/var/cache/nltk_data") + (target source)) + (file-system-mapping + (source "/etc/ssl/certs") + (target source))))) + (stop #~(make-kill-destructor)))))))) + +(define genecup-service-type + (service-type + (name 'genecup) + (extensions + (list + (service-extension shepherd-root-service-type + genecup-shepherd-service) + (service-extension activation-service-type + genecup-activation) + ;; Make sure we get all the dependencies of RatsPub. + (service-extension profile-service-type + (compose list genecup-configuration-package)))) + (default-value (genecup-configuration)) + (description + "Run a GeneCup Webserver."))) + +(operating-system + (host-name "genecup") + (timezone "Etc/UTC") + (locale "en_US.utf8") + + (bootloader (bootloader-configuration + (bootloader grub-bootloader) + (target "does-not-matter"))) + (file-systems (list (file-system + (device "does-not-matter") + (mount-point "/") + (type "does-not-matter")))) + ;; TODO: A more minimal kernel for use in a docker image + ;; (kernel linux-libre-vm) + ;; No firmware for VMs. + (firmware '()) + (packages (cons nss-certs %base-packages)) ;(list nss-certs)) + + (services (list (service genecup-service-type + (genecup-configuration + ;; genecup for docker, genecup-with-tensorflow-native for architecture specific speed optimizations. + ;(package genecup)))))) + (package genecup-with-tensorflow-native)))))) + +;; guix system container -L /path/to/guix-bioinformatics/ -L /path/to/guix-past/modules/ /path/to/guix-bioinformatics/gn/services/genecup-container.scm --network --share=/export2/PubMed=/export2/PubMed --share=/export/ratspub=/export/ratspub +;; For docker it isn't necessary to list the shared folders at build time. +;; guix system docker-image -L /path/to/guix-bioinformatics/ -L /path/to/guix-past/modules/ /path/to/guix-bioinformatics/gn/services/genecup-container.scm --network +;; Docker instructions: +;; docker load --input genecup-docker-image.tar.gz +;; docker run -d --privileged --net=host --name genecup --volume /path/to/PubMed:/export2/PubMed guix -- cgit v1.2.3