about summary refs log tree commit diff
path: root/guix.scm
blob: 43c3f777908968dbc7f64d8a6043be08a77b2204 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
;;
;; GeneCup guix.scm - package definition
;;
;; Build with:
;;
;;   guix build -f guix.scm
;;
;; Development shell:
;;
;;   guix shell -L . -C -N -F genecup-gemini coreutils -- genecup --port 4201
;;
;; Note: API key is read from ~/.config/gemini/credentials
;;

(define-module (guix)
  #:use-module ((guix licenses) #:prefix license:)
  #:use-module (guix build-system pyproject)
  #:use-module (guix build-system gnu)
  #:use-module (guix build-system python)
  #:use-module (guix download)
  #:use-module (guix gexp)
  #:use-module (guix git-download)
  #:use-module (guix packages)
  #:use-module (guix utils)
  #:use-module (gnu packages admin)
  #:use-module (gnu packages base)
  #:use-module (gnu packages bash)
  #:use-module (gnu packages compression)
  #:use-module (gnu packages curl)
  #:use-module (gnu packages wget)
  #:use-module (gnu packages gawk)
  #:use-module (gnu packages golang)
  #:use-module (gnu packages golang-build)
  #:use-module (gnu packages golang-compression)
  #:use-module (gnu packages golang-xyz)
  #:use-module (gnu packages javascript)
  #:use-module (gnu packages python)
  #:use-module (gnu packages python-crypto)
  #:use-module (gnu packages python-science)
  #:use-module (gnu packages python-web)
  #:use-module (gnu packages python-xyz)
  #:use-module (gnu packages python-check)
  #:use-module (gnu packages python-build)
  #:use-module (gnu packages nss)
  #:use-module (gnu packages perl)
  #:use-module (gnu packages xml)
  #:use-module (gnu packages time)
  #:use-module (gnu packages tls)
  #:use-module (gn packages javascript)
  #:use-module (gn packages web))

(define %source-dir (dirname (current-filename)))

(define nltk-punkt-source
  (origin
    (method url-fetch)
    (uri "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip")
    (sha256
     (base32 "01h11srafj57yvp74xkidikh6m7ch7qscz21lck7f9vlg4c68zz5"))))

(define-public nltk-punkt
  (package
    (name "nltk-punkt")
    (version "1.0")
    (source nltk-punkt-source)
    (build-system gnu-build-system)
    (arguments
     (list
      #:phases
      #~(modify-phases %standard-phases
          (delete 'configure)
          (delete 'build)
          (delete 'check)
          (replace 'unpack
            (lambda* (#:key source #:allow-other-keys)
              (invoke "unzip" source)))
          (replace 'install
            (lambda* (#:key outputs #:allow-other-keys)
              (let ((out (string-append (assoc-ref outputs "out")
                                        "/share/nltk_data/tokenizers/punkt_tab")))
                (mkdir-p out)
                (copy-recursively "punkt_tab" out)))))))
    (native-inputs (list unzip))
    (home-page "https://www.nltk.org/nltk_data/")
    (synopsis "NLTK Punkt_Tab sentence tokenizer models")
    (description "Pre-trained models for the Punkt sentence boundary
detection tokenizer (tab format), used by NLTK's sent_tokenize function.")
    (license license:asl2.0)))

(define minipubmed-source
  (origin
    (method url-fetch)
    (uri "https://git.genenetwork.org/genecup/plain/minipubmed.tgz")
    (sha256
     (base32 "116k7plhn7xkbv170035si7xhbfqb1ff15rxqwimjrwm8rb1bbcc"))))

(define-public minipubmed
  (package
    (name "minipubmed")
    (version "1.0")
    (source minipubmed-source)
    (build-system gnu-build-system)
    (arguments
     (list
      #:phases
      #~(modify-phases %standard-phases
          (delete 'configure)
          (delete 'build)
          (delete 'check)
          (replace 'unpack
            (lambda* (#:key source #:allow-other-keys)
              (invoke "tar" "xzf" source)))
          (replace 'install
            (lambda* (#:key inputs outputs #:allow-other-keys)
              (let ((out (string-append (assoc-ref outputs "out")
                                        "/share/minipubmed")))
                ;; Generate test.xml from pmid.list
                (with-directory-excursion "minipubmed"
                  (system "cat pmid.list | fetch-pubmed -path PubMed/Archive/ > test.xml"))
                (mkdir-p out)
                (copy-recursively "minipubmed" out)))))))
    (inputs (list edirect-25))
    (home-page "https://genecup.org")
    (synopsis "Mini PubMed archive for GeneCup testing")
    (description "A small collection of 2473 PubMed abstracts for testing
GeneCup with four gene symbols (gria1, crhr1, drd2, and penk).")
    (license license:expat)))

(define-public edirect-25
  (package
    (name "edirect-25")
    (version "25.2.20260328")
    (source (origin
              (method url-fetch)
              (uri (string-append "https://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect"
                                  "/versions/" version
                                  "/edirect-" version ".tar.gz"))
              (sha256
               (base32 "04km4hrnmiganafwn5516hm8n0var9ilhbr068chy8v95xk131x6"))
              (modules '((guix build utils)))
              (snippet
               '(begin
                  (delete-file "Mozilla-CA.tar.gz")
                  (delete-file "cacert.pem")))
              (patches
               (list (local-file "contrib/patches/edirect-xml-bounds-check.patch")))))
    (build-system gnu-build-system)
    (arguments
     (list
      #:tests? #t
      #:phases
      #~(modify-phases %standard-phases
          (delete 'configure)
          (add-after 'unpack 'patch-path-reset
            (lambda _
              ;; These scripts reset PATH=/bin:/usr/bin which breaks Guix
              (substitute* '("xtract" "rchive" "transmute")
                (("PATH=/bin:/usr/bin")
                 "PATH=\"/bin:/usr/bin:$PATH\""))))
          (add-after 'unpack 'patch-go-version
            (lambda _
              ;; Relax Go version requirement to match available toolchain
              (substitute* '("cmd/go.mod" "eutils/go.mod")
                (("go 1\\.26\\.1") "go 1.26.0"))))
          (replace 'build
            (lambda* (#:key inputs #:allow-other-keys)
              (setenv "HOME" (getcwd))
              (setenv "GOTOOLCHAIN" "local")
              (setenv "GO111MODULE" "off")
              ;; Build GOPATH from Guix Go package inputs + local eutils
              (let ((gopath (string-append (getcwd) "/gopath")))
                (mkdir-p (string-append gopath "/src"))
                (symlink (string-append (getcwd) "/eutils")
                         (string-append gopath "/src/eutils"))
                (setenv "GOPATH"
                  (string-join
                    (cons gopath
                      (map cdr
                        (filter
                          (lambda (input)
                            (directory-exists?
                              (string-append (cdr input) "/src")))
                          inputs)))
                    ":")))
              (with-directory-excursion "cmd"
                (for-each
                  (lambda (prog)
                    (invoke "go" "build" "-v"
                            "-o" (string-append prog ".Linux")
                            (string-append prog ".go")))
                  '("xtract" "rchive" "transmute")))))
          (replace 'install
            (lambda* (#:key outputs #:allow-other-keys)
              (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
                (mkdir-p bin)
                ;; Install Go binaries
                (for-each
                  (lambda (prog)
                    (install-file (string-append "cmd/" prog ".Linux") bin))
                  '("xtract" "rchive" "transmute"))
                ;; Install executable scripts
                (for-each
                  (lambda (f)
                    (when (and (not (file-is-directory? f))
                               (access? f X_OK)
                               (not (string-suffix? ".go" f))
                               (not (string-suffix? ".py" f))
                               (not (string-suffix? ".pm" f))
                               (not (string-suffix? ".pdf" f))
                               (not (string-suffix? ".pem" f))
                               (not (string-suffix? ".gz" f))
                               (not (member (basename f)
                                            '("LICENSE" "README"))))
                      (install-file f bin)))
                  (find-files "."
                    (lambda (f s)
                      (and (not (string-contains f "/cmd/"))
                           (not (string-contains f "/eutils/"))
                           (not (string-contains f "/gopath/"))))
                    #:directories? #f))
                ;; Install extern/ data (contains .ini config files)
                (copy-recursively "extern"
                                  (string-append bin "/extern")))))
          (add-after 'install 'wrap-programs
            (lambda* (#:key inputs outputs #:allow-other-keys)
              (let* ((out (assoc-ref outputs "out"))
                     (bin (string-append out "/bin"))
                     (coreutils (assoc-ref inputs "coreutils")))
                ;; Only wrap scripts directly in bin/, not in
                ;; subdirs (extern/ scripts are sourced, not executed).
                ;; Skip .sh (sourced) and .Linux (Go binaries).
                (for-each
                  (lambda (f)
                    (wrap-program f
                      `("PATH" ":" prefix
                        (,bin ,(string-append coreutils "/bin")))))
                  (filter
                    (lambda (f)
                      (and (string=? (dirname f) bin)
                           (not (string-suffix? ".sh" f))
                           (not (string-suffix? ".Linux" f))))
                    (find-files bin)))
                ;; wrap-program renames xtract -> .xtract-real, but the
                ;; script looks for $0.Linux, so create symlinks
                (for-each
                  (lambda (prog)
                    (symlink (string-append bin "/" prog ".Linux")
                             (string-append bin "/." prog "-real.Linux")))
                  '("xtract" "rchive" "transmute")))))
          (delete 'check)
          (add-after 'wrap-programs 'smoke-test
            (lambda* (#:key outputs #:allow-other-keys)
              (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
                ;; Smoke test: xtract.Linux parses XML
                (invoke "sh" "-c"
                  (string-append
                    "echo '<test><a>hello</a><b>world</b></test>' | "
                    bin "/xtract.Linux -pattern test -element a b"
                    " | grep -q hello"))
                ;; Smoke test: rchive.Linux version
                (invoke (string-append bin "/rchive.Linux") "-version")
                ;; Smoke test: transmute.Linux version
                (invoke (string-append bin "/transmute.Linux")
                        "-version")))))))
    (native-inputs
     (list go-1.26
           go-github-com-fatih-color
           go-github-com-gedex-inflector
           go-github-com-goccy-go-yaml
           go-github-com-klauspost-compress
           go-github-com-klauspost-cpuid-v2
           go-github-com-klauspost-pgzip
           go-github-com-komkom-toml
           go-github-com-mattn-go-colorable
           go-github-com-mattn-go-isatty
           go-github-com-pbnjay-memory
           go-github-com-pkg-errors
           go-github-com-surgebase-porter2
           go-golang-org-x-sys
           go-golang-org-x-text))
    (propagated-inputs (list curl wget grep sed gawk coreutils findutils gzip unzip))
    (inputs (list bash-minimal coreutils perl perl-xml-simple python))
    (home-page "https://www.ncbi.nlm.nih.gov/books/NBK179288/")
    (synopsis "Tools for accessing the NCBI's set of databases")
    (description "Entrez Direct (EDirect) provides access to the NCBI's suite
of interconnected databases from a Unix terminal window.  Search terms are
entered as command-line arguments.  Individual operations are connected with
Unix pipes to construct multi-step queries.  Selected records can then be
retrieved in a variety of formats.")
    (license license:public-domain)))

(define-public python-google-genai
  (package
    (name "python-google-genai")
    (version "1.68.0")
    (source
     (origin
       (method url-fetch)
       (uri (pypi-uri "google_genai" version))
       (sha256
        (base32 "15na2kxak5farpm5az0dw7r3c3mf3nhy95rsk5r963v3pjwc0c5c"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:tests? #f)) ; tests require network access and API keys
    (propagated-inputs
     (list python
           python-google-auth
           python-httpx
           python-pydantic
           python-requests
           python-tenacity
           python-websockets
           python-typing-extensions
           python-distro
           python-sniffio
           sed))
    (native-inputs
     (list python-setuptools
           python-wheel))
    (home-page "https://github.com/googleapis/python-genai")
    (synopsis "Google Generative AI Python SDK")
    (description "Client library for the Google Generative AI API, providing
access to Gemini models.")
    (license license:asl2.0)))

(define-public genecup-gemini
  (package
    (name "genecup-gemini")
    (version "1.9")
    (source (local-file %source-dir #:recursive? #t))
    (build-system python-build-system)
    (arguments
     (list
      #:tests? #f ; no test suite
      #:phases
      #~(modify-phases %standard-phases
          (delete 'configure)
          (delete 'build)
          (add-after 'unpack 'patch-sources
            (lambda* (#:key inputs outputs #:allow-other-keys)
              (let ((inetutils (assoc-ref inputs "inetutils")))
                (substitute* '("templates/cytoscape.html"
                                "templates/tableview.html"
                                "templates/tableview0.html"
                                "templates/userarchive.html")
                  (("https.*FileSaver.js.*\\\">") "/static/FileSaver.js\">")
                  (("https.*cytoscape-svg.js.*\\\">") "/static/cytoscape-svg.js\">")
                  (("https.*cytoscape.min.js.*\\\">") "/static/cytoscape.min.js\">"))
                (substitute* "templates/layout.html"
                  (("https.*bootstrap.min.css.*\\\">") "/static/bootstrap.min.css\">")
                  (("https.*4.*bootstrap.min.js.*\\\">") "/static/bootstrap.min.js\">")
                  (("https.*4.7.0/css/font-awesome.min.css") "/static/font-awesome.min.css")
                  (("https.*jquery-3.2.1.slim.min.js.*\\\">") "/static/jquery.slim.min.js\">")
                  (("https.*1.12.9/umd/popper.min.js.*\\\">") "/static/popper.min.js\">")))))
          (add-after 'unpack 'setup-minipubmed
            (lambda* (#:key inputs #:allow-other-keys)
              (delete-file "minipubmed.tgz")
              (let ((pubmed (string-append (assoc-ref inputs "minipubmed")
                                           "/share/minipubmed/PubMed")))
                ;; Patch default pubmed path to store location
                (substitute* "more_functions.py"
                  (("\\./minipubmed") pubmed)))))
          (replace 'install
            (lambda* (#:key outputs #:allow-other-keys)
              (let ((out (assoc-ref outputs "out")))
                (copy-recursively "." out))))
          (add-after 'install 'install-javascript
            (lambda* (#:key inputs outputs #:allow-other-keys)
              (let ((out       (assoc-ref outputs "out"))
                    (awesome   (assoc-ref inputs "font-awesome"))
                    (bootstrap (assoc-ref inputs "bootstrap"))
                    (cytoscape (assoc-ref inputs "cytoscape"))
                    (cytoscape-svg (assoc-ref inputs "cytoscape-svg"))
                    (jquery    (assoc-ref inputs "jquery"))
                    (js-filesaver (assoc-ref inputs "js-filesaver"))
                    (js-popper (assoc-ref inputs "js-popper")))
                (symlink (string-append awesome
                                        "/share/web/font-awesomecss/font-awesome.min.css")
                         (string-append out "/static/font-awesome.min.css"))
                (symlink (string-append bootstrap
                                        "/share/web/bootstrap/css/bootstrap.min.css")
                         (string-append out "/static/bootstrap.min.css"))
                (symlink (string-append bootstrap
                                        "/share/web/bootstrap/js/bootstrap.min.js")
                         (string-append out "/static/bootstrap.min.js"))
                (symlink (string-append cytoscape
                                        "/share/genenetwork2/javascript/cytoscape/cytoscape.min.js")
                         (string-append out "/static/cytoscape.min.js"))
                (symlink (string-append cytoscape-svg
                                        "/share/javascript/cytoscape-svg.js")
                         (string-append out "/static/cytoscape-svg.js"))
                (symlink (string-append jquery
                                        "/share/web/jquery/jquery.slim.min.js")
                         (string-append out "/static/jquery.slim.min.js"))
                (symlink (string-append js-filesaver
                                        "/share/javascript/FileSaver.js")
                         (string-append out "/static/FileSaver.js"))
                (symlink (string-append js-popper
                                        "/share/javascript/popper.min.js")
                         (string-append out "/static/popper.min.js")))))
          (add-after 'install 'create-bin-wrapper
            (lambda* (#:key inputs outputs #:allow-other-keys)
              (let ((out  (assoc-ref outputs "out"))
                    (path (getenv "GUIX_PYTHONPATH")))
                (mkdir-p (string-append out "/bin"))
                (call-with-output-file (string-append out "/bin/genecup")
                  (lambda (port)
                    (format port "#!~a~%cd ~a~%exec ~a/server.py \"$@\"~%"
                            (which "bash") out out)))
                (chmod (string-append out "/bin/genecup") #o755)
                (wrap-program (string-append out "/bin/genecup")
                  `("PATH" ":" prefix (,(dirname (which "esearch"))
                                        ,(dirname (which "dirname"))
                                        ,(dirname (which "grep"))
                                        ,(dirname (which "sed"))))
                  `("GUIX_PYTHONPATH" ":" prefix (,path))
                  `("NLTK_DATA" ":" prefix
                    (,(string-append (assoc-ref inputs "nltk-punkt")
                                     "/share/nltk_data"))))))))))
    (propagated-inputs
     (list
       python-bcrypt
       python-flask
       python-flask-sqlalchemy
       python-google-genai
       python-nltk
       python-pandas
       python-pytz
       python
       nss-certs
       openssl
       ))
    (inputs
     `(("edirect-25" ,edirect-25)
       ("inetutils" ,inetutils)
       ("gzip" ,gzip)
       ("minipubmed" ,minipubmed)
       ("tar" ,tar)
       ;; JavaScript assets symlinked into static/
       ("bootstrap" ,web-bootstrap)
       ("cytoscape" ,javascript-cytoscape-3.17)
       ("cytoscape-svg" ,js-cytoscape-svg-vendor-0.3.1)
       ("font-awesome" ,web-font-awesome)
       ("jquery" ,web-jquery)
       ("js-filesaver" ,js-filesaver-1.3.2)
       ("nltk-punkt" ,nltk-punkt)
       ("js-popper" ,js-popper-1.12.9)))
    (home-page "http://genecup.org")
    (synopsis "GeneCup: gene-addiction relationship search using PubMed")
    (description "GeneCup automatically extracts information from PubMed and
the NHGRI-EBI GWAS catalog on the relationship of any gene with a custom list
of keywords hierarchically organized into an ontology.")
    (license license:expat)))

genecup-gemini