about summary refs log tree commit diff
;;
;; GeneCup guix.scm - package definition
;;
;; Build with:
;;
;;   guix build -f guix.scm
;;
;; Development shell:
;;
;;   guix shell -L . -C -N -F --expose=$HOME/.config/gemini --share=/export3/PubMed edirect-25 genecup-gemini coreutils -- genecup --port 4201
;;
;; In a shell you can run
;;
;;   guix shell -C -N -F -L . --expose=$HOME/.config/gemini --share=/export3/PubMed edirect-25 genecup-gemini
;;   env EDIRECT_LOCAL_ARCHIVE=/export3/PubMed/Source python3 -m unittest tests.test_network_esearch
;;   env EDIRECT_LOCAL_ARCHIVE=/export3/PubMed/Source python3 -m unittest tests.test_local_xfetch -v
;;   env EDIRECT_LOCAL_ARCHIVE=/export3/PubMed/Source python3 -m unittest tests.test_network_gemini_ontology
;;
;; Note: API key is read from ~/.config/gemini/credentials
;;

(define-module (guix)
  #:use-module ((guix licenses) #:prefix license:)
  #:use-module (guix build-system pyproject)
  #:use-module (guix build-system gnu)
  #:use-module (guix build-system python)
  #:use-module (guix download)
  #:use-module (guix gexp)
  #:use-module (guix git-download)
  #:use-module (guix packages)
  #:use-module (guix utils)
  #:use-module (gnu packages admin)
  #:use-module (gnu packages base)
  #:use-module (gnu packages bash)
  #:use-module (gnu packages compression)
  #:use-module (gnu packages curl)
  #:use-module (gnu packages wget)
  #:use-module (gnu packages gawk)
  #:use-module (gnu packages golang)
  #:use-module (gnu packages golang-build)
  #:use-module (gnu packages golang-compression)
  #:use-module (gnu packages golang-xyz)
  #:use-module (gnu packages javascript)
  #:use-module (gnu packages python)
  #:use-module (gnu packages python-crypto)
  #:use-module (gnu packages python-science)
  #:use-module (gnu packages python-web)
  #:use-module (gnu packages python-xyz)
  #:use-module (gnu packages python-check)
  #:use-module (gnu packages python-build)
  #:use-module (gnu packages nss)
  #:use-module (gnu packages perl)
  #:use-module (gnu packages xml)
  #:use-module (gnu packages time)
  #:use-module (gnu packages tls)
  #:use-module (gn packages javascript)
  #:use-module (gn packages web))

(define %source-dir (dirname (current-filename)))

(define nltk-punkt-source
  (origin
    (method url-fetch)
    (uri "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip")
    (sha256
     (base32 "01h11srafj57yvp74xkidikh6m7ch7qscz21lck7f9vlg4c68zz5"))))

(define-public nltk-punkt
  (package
    (name "nltk-punkt")
    (version "1.0")
    (source nltk-punkt-source)
    (build-system gnu-build-system)
    (arguments
     (list
      #:phases
      #~(modify-phases %standard-phases
          (delete 'configure)
          (delete 'build)
          (delete 'check)
          (replace 'unpack
            (lambda* (#:key source #:allow-other-keys)
              (invoke "unzip" source)))
          (replace 'install
            (lambda* (#:key outputs #:allow-other-keys)
              (let ((out (string-append (assoc-ref outputs "out")
                                        "/share/nltk_data/tokenizers/punkt_tab")))
                (mkdir-p out)
                (copy-recursively "punkt_tab" out)))))))
    (native-inputs (list unzip))
    (home-page "https://www.nltk.org/nltk_data/")
    (synopsis "NLTK Punkt_Tab sentence tokenizer models")
    (description "Pre-trained models for the Punkt sentence boundary
detection tokenizer (tab format), used by NLTK's sent_tokenize function.")
    (license license:asl2.0)))

(define minipubmed-source
  (origin
    (method url-fetch)
    (uri "https://git.genenetwork.org/genecup/plain/minipubmed.tgz")
    (sha256
     (base32 "116k7plhn7xkbv170035si7xhbfqb1ff15rxqwimjrwm8rb1bbcc"))))

(define-public minipubmed
  (package
    (name "minipubmed")
    (version "1.0")
    (source minipubmed-source)
    (build-system gnu-build-system)
    (arguments
     (list
      #:phases
      #~(modify-phases %standard-phases
          (delete 'configure)
          (delete 'build)
          (delete 'check)
          (replace 'unpack
            (lambda* (#:key source #:allow-other-keys)
              (invoke "tar" "xzf" source)))
          (replace 'install
            (lambda* (#:key inputs outputs #:allow-other-keys)
              (let ((out (string-append (assoc-ref outputs "out")
                                        "/share/minipubmed")))
                ;; Generate test.xml from pmid.list
                (with-directory-excursion "minipubmed"
                  ;; Generate test.xml from pmid.list using xfetch
                  (system "cat pmid.list | xfetch -db pubmed > test.xml"))
                (mkdir-p out)
                (copy-recursively "minipubmed" out)))))))
    (inputs (list edirect-25))
    (home-page "https://genecup.org")
    (synopsis "Mini PubMed archive for GeneCup testing")
    (description "A small collection of 2473 PubMed abstracts for testing
GeneCup with four gene symbols (gria1, crhr1, drd2, and penk).")
    (license license:expat)))

(define-public edirect-25
  (package
    (name "edirect-25")
    (version "25.2.20260328")
    (source (origin
              (method url-fetch)
              (uri (string-append "https://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect"
                                  "/versions/" version
                                  "/edirect-" version ".tar.gz"))
              (sha256
               (base32 "04km4hrnmiganafwn5516hm8n0var9ilhbr068chy8v95xk131x6"))
              (modules '((guix build utils)))
              (snippet
               '(begin
                  (delete-file "Mozilla-CA.tar.gz")
                  (delete-file "cacert.pem")))
              (patches
               (list (local-file "contrib/patches/edirect-xml-bounds-check.patch")))))
    (build-system gnu-build-system)
    (arguments
     (list
      #:tests? #t
      #:phases
      #~(modify-phases %standard-phases
          (delete 'configure)
          (add-after 'unpack 'patch-path-reset
            (lambda _
              ;; These scripts reset PATH=/bin:/usr/bin which breaks Guix
              (substitute* '("xtract" "rchive" "transmute")
                (("PATH=/bin:/usr/bin")
                 "PATH=\"/bin:/usr/bin:$PATH\""))))
          (add-after 'unpack 'patch-go-version
            (lambda _
              ;; Relax Go version requirement to match available toolchain
              (substitute* '("cmd/go.mod" "eutils/go.mod")
                (("go 1\\.26\\.1") "go 1.26.0"))))
          (replace 'build
            (lambda* (#:key inputs #:allow-other-keys)
              (setenv "HOME" (getcwd))
              (setenv "GOTOOLCHAIN" "local")
              (setenv "GO111MODULE" "off")
              ;; Build GOPATH from Guix Go package inputs + local eutils
              (let ((gopath (string-append (getcwd) "/gopath")))
                (mkdir-p (string-append gopath "/src"))
                (symlink (string-append (getcwd) "/eutils")
                         (string-append gopath "/src/eutils"))
                (setenv "GOPATH"
                  (string-join
                    (cons gopath
                      (map cdr
                        (filter
                          (lambda (input)
                            (directory-exists?
                              (string-append (cdr input) "/src")))
                          inputs)))
                    ":")))
              (with-directory-excursion "cmd"
                (for-each
                  (lambda (prog)
                    (invoke "go" "build" "-v"
                            "-o" (string-append prog ".Linux")
                            (string-append prog ".go")))
                  '("xtract" "rchive" "transmute")))))
          (replace 'install
            (lambda* (#:key outputs #:allow-other-keys)
              (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
                (mkdir-p bin)
                ;; Install Go binaries
                (for-each
                  (lambda (prog)
                    (install-file (string-append "cmd/" prog ".Linux") bin))
                  '("xtract" "rchive" "transmute"))
                ;; Install executable scripts
                (for-each
                  (lambda (f)
                    (when (and (not (file-is-directory? f))
                               (access? f X_OK)
                               (not (string-suffix? ".go" f))
                               (not (string-suffix? ".py" f))
                               (not (string-suffix? ".pm" f))
                               (not (string-suffix? ".pdf" f))
                               (not (string-suffix? ".pem" f))
                               (not (string-suffix? ".gz" f))
                               (not (member (basename f)
                                            '("LICENSE" "README"))))
                      (install-file f bin)))
                  (find-files "."
                    (lambda (f s)
                      (and (not (string-contains f "/cmd/"))
                           (not (string-contains f "/eutils/"))
                           (not (string-contains f "/gopath/"))))
                    #:directories? #f))
                ;; Install extern/ data (contains .ini config files)
                (copy-recursively "extern"
                                  (string-append bin "/extern")))))
          (add-after 'install 'wrap-programs
            (lambda* (#:key inputs outputs #:allow-other-keys)
              (let* ((out (assoc-ref outputs "out"))
                     (bin (string-append out "/bin"))
                     (coreutils (assoc-ref inputs "coreutils")))
                ;; Only wrap scripts directly in bin/, not in
                ;; subdirs (extern/ scripts are sourced, not executed).
                ;; Skip .sh (sourced) and .Linux (Go binaries).
                (for-each
                  (lambda (f)
                    (wrap-program f
                      `("PATH" ":" prefix
                        (,bin ,(string-append coreutils "/bin")))))
                  (filter
                    (lambda (f)
                      (and (string=? (dirname f) bin)
                           (not (string-suffix? ".sh" f))
                           (not (string-suffix? ".Linux" f))))
                    (find-files bin)))
                ;; wrap-program renames xtract -> .xtract-real, but the
                ;; script looks for $0.Linux, so create symlinks
                (for-each
                  (lambda (prog)
                    (symlink (string-append bin "/" prog ".Linux")
                             (string-append bin "/." prog "-real.Linux")))
                  '("xtract" "rchive" "transmute")))))
          (delete 'check)
          (add-after 'wrap-programs 'smoke-test
            (lambda* (#:key outputs #:allow-other-keys)
              (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
                ;; Smoke test: xtract.Linux parses XML
                (invoke "sh" "-c"
                  (string-append
                    "echo '<test><a>hello</a><b>world</b></test>' | "
                    bin "/xtract.Linux -pattern test -element a b"
                    " | grep -q hello"))
                ;; Smoke test: rchive.Linux version
                (invoke (string-append bin "/rchive.Linux") "-version")
                ;; Smoke test: transmute.Linux version
                (invoke (string-append bin "/transmute.Linux")
                        "-version")))))))
    (native-inputs
     (list go-1.26
           go-github-com-fatih-color
           go-github-com-gedex-inflector
           go-github-com-goccy-go-yaml
           go-github-com-klauspost-compress
           go-github-com-klauspost-cpuid-v2
           go-github-com-klauspost-pgzip
           go-github-com-komkom-toml
           go-github-com-mattn-go-colorable
           go-github-com-mattn-go-isatty
           go-github-com-pbnjay-memory
           go-github-com-pkg-errors
           go-github-com-surgebase-porter2
           go-golang-org-x-sys
           go-golang-org-x-text))
    (propagated-inputs (list curl wget grep sed gawk coreutils findutils gzip unzip findutils))
    (inputs (list bash-minimal coreutils perl perl-xml-simple python))
    (home-page "https://www.ncbi.nlm.nih.gov/books/NBK179288/")
    (synopsis "Tools for accessing the NCBI's set of databases")
    (description "Entrez Direct (EDirect) provides access to the NCBI's suite
of interconnected databases from a Unix terminal window.  Search terms are
entered as command-line arguments.  Individual operations are connected with
Unix pipes to construct multi-step queries.  Selected records can then be
retrieved in a variety of formats.")
    (license license:public-domain)))

(define-public python-google-genai
  (package
    (name "python-google-genai")
    (version "1.68.0")
    (source
     (origin
       (method url-fetch)
       (uri (pypi-uri "google_genai" version))
       (sha256
        (base32 "15na2kxak5farpm5az0dw7r3c3mf3nhy95rsk5r963v3pjwc0c5c"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:tests? #f)) ; tests require network access and API keys
    (propagated-inputs
     (list python
           python-google-auth
           python-httpx
           python-pydantic
           python-requests
           python-tenacity
           python-websockets
           python-typing-extensions
           python-distro
           python-sniffio
           sed))
    (native-inputs
     (list python-setuptools
           python-wheel))
    (home-page "https://github.com/googleapis/python-genai")
    (synopsis "Google Generative AI Python SDK")
    (description "Client library for the Google Generative AI API, providing
access to Gemini models.")
    (license license:asl2.0)))

(define-public genecup-gemini
  (package
    (name "genecup-gemini")
    (version "1.9")
    (source (local-file %source-dir #:recursive? #t))
    (build-system python-build-system)
    (arguments
     (list
      #:tests? #t
      #:phases
      #~(modify-phases %standard-phases
          (delete 'configure)
          (delete 'build)
          (add-after 'unpack 'patch-sources
            (lambda* (#:key inputs outputs #:allow-other-keys)
              (let ((inetutils (assoc-ref inputs "inetutils")))
                (substitute* '("templates/cytoscape.html"
                                "templates/tableview.html"
                                "templates/tableview0.html"
                                "templates/userarchive.html")
                  (("https.*FileSaver.js.*\\\">") "/static/FileSaver.js\">")
                  (("https.*cytoscape-svg.js.*\\\">") "/static/cytoscape-svg.js\">")
                  (("https.*cytoscape.min.js.*\\\">") "/static/cytoscape.min.js\">"))
                (substitute* "templates/layout.html"
                  (("https.*bootstrap.min.css.*\\\">") "/static/bootstrap.min.css\">")
                  (("https.*4.*bootstrap.min.js.*\\\">") "/static/bootstrap.min.js\">")
                  (("https.*4.7.0/css/font-awesome.min.css") "/static/font-awesome.min.css")
                  (("https.*jquery-3.2.1.slim.min.js.*\\\">") "/static/jquery.slim.min.js\">")
                  (("https.*1.12.9/umd/popper.min.js.*\\\">") "/static/popper.min.js\">")))))
          (add-after 'unpack 'setup-minipubmed
            (lambda* (#:key inputs #:allow-other-keys)
              (delete-file "minipubmed.tgz")
              (let ((pubmed (string-append (assoc-ref inputs "minipubmed")
                                           "/share/minipubmed/PubMed")))
                ;; Patch default pubmed path to store location
                (substitute* "more_functions.py"
                  (("\\./minipubmed") pubmed)))))
          (replace 'check
            (lambda _
              ;; test_network_* files need internet, skip them
              (invoke "python" "-m" "unittest" "tests.test_hello" "-v")))
          (replace 'install
            (lambda* (#:key outputs #:allow-other-keys)
              (let ((out (assoc-ref outputs "out")))
                (copy-recursively "." out))))
          (add-after 'install 'install-javascript
            (lambda* (#:key inputs outputs #:allow-other-keys)
              (let ((out       (assoc-ref outputs "out"))
                    (awesome   (assoc-ref inputs "font-awesome"))
                    (bootstrap (assoc-ref inputs "bootstrap"))
                    (cytoscape (assoc-ref inputs "cytoscape"))
                    (cytoscape-svg (assoc-ref inputs "cytoscape-svg"))
                    (jquery    (assoc-ref inputs "jquery"))
                    (js-filesaver (assoc-ref inputs "js-filesaver"))
                    (js-popper (assoc-ref inputs "js-popper")))
                (symlink (string-append awesome
                                        "/share/web/font-awesomecss/font-awesome.min.css")
                         (string-append out "/static/font-awesome.min.css"))
                (symlink (string-append bootstrap
                                        "/share/web/bootstrap/css/bootstrap.min.css")
                         (string-append out "/static/bootstrap.min.css"))
                (symlink (string-append bootstrap
                                        "/share/web/bootstrap/js/bootstrap.min.js")
                         (string-append out "/static/bootstrap.min.js"))
                (symlink (string-append cytoscape
                                        "/share/genenetwork2/javascript/cytoscape/cytoscape.min.js")
                         (string-append out "/static/cytoscape.min.js"))
                (symlink (string-append cytoscape-svg
                                        "/share/javascript/cytoscape-svg.js")
                         (string-append out "/static/cytoscape-svg.js"))
                (symlink (string-append jquery
                                        "/share/web/jquery/jquery.slim.min.js")
                         (string-append out "/static/jquery.slim.min.js"))
                (symlink (string-append js-filesaver
                                        "/share/javascript/FileSaver.js")
                         (string-append out "/static/FileSaver.js"))
                (symlink (string-append js-popper
                                        "/share/javascript/popper.min.js")
                         (string-append out "/static/popper.min.js")))))
          (add-after 'install 'create-bin-wrapper
            (lambda* (#:key inputs outputs #:allow-other-keys)
              (let ((out  (assoc-ref outputs "out"))
                    (path (getenv "GUIX_PYTHONPATH")))
                (mkdir-p (string-append out "/bin"))
                (call-with-output-file (string-append out "/bin/genecup")
                  (lambda (port)
                    (format port "#!~a~%cd ~a~%exec ~a/server.py \"$@\"~%"
                            (which "bash") out out)))
                (chmod (string-append out "/bin/genecup") #o755)
                (wrap-program (string-append out "/bin/genecup")
                  `("PATH" ":" prefix (,(dirname (which "esearch"))
                                        ,(dirname (which "dirname"))
                                        ,(dirname (which "grep"))
                                        ,(dirname (which "sed"))))
                  `("GUIX_PYTHONPATH" ":" prefix (,path))
                  `("NLTK_DATA" ":" prefix
                    (,(string-append (assoc-ref inputs "nltk-punkt")
                                     "/share/nltk_data"))))))))))
    (propagated-inputs
     (list
       curl
       findutils
       python-bcrypt
       python-flask
       python-flask-sqlalchemy
       python-google-genai
       python-nltk
       python-pandas
       python-pytz
       python
       nss-certs
       openssl
       ))
    (inputs
     `(("edirect-25" ,edirect-25)
       ("inetutils" ,inetutils)
       ("gzip" ,gzip)
       ("minipubmed" ,minipubmed)
       ("tar" ,tar)
       ;; JavaScript assets symlinked into static/
       ("bootstrap" ,web-bootstrap)
       ("cytoscape" ,javascript-cytoscape-3.17)
       ("cytoscape-svg" ,js-cytoscape-svg-vendor-0.3.1)
       ("font-awesome" ,web-font-awesome)
       ("jquery" ,web-jquery)
       ("js-filesaver" ,js-filesaver-1.3.2)
       ("nltk-punkt" ,nltk-punkt)
       ("js-popper" ,js-popper-1.12.9)))
    (home-page "http://genecup.org")
    (synopsis "GeneCup: gene-addiction relationship search using PubMed")
    (description "GeneCup automatically extracts information from PubMed and
the NHGRI-EBI GWAS catalog on the relationship of any gene with a custom list
of keywords hierarchically organized into an ontology.")
    (license license:expat)))

genecup-gemini