From 3b1f3e5c087dc01db3eb76611253e523c9b4ffb1 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 28 Jan 2026 23:00:25 +0300 Subject: Move investigators transform to its own file. * examples/dataset-metadata.scm (investigators): Delete. (option-spec)[investigators]: Delete * transform/strings.scm (fix-email-id, investigator-attributes->id): Move these ... * examples/investigators.scm: ... here. Signed-off-by: Munyoki Kilyungi --- examples/dataset-metadata.scm | 23 ----------- examples/investigators.scm | 93 +++++++++++++++++++++++++++++++++++++++++++ transform/strings.scm | 17 -------- 3 files changed, 93 insertions(+), 40 deletions(-) create mode 100755 examples/investigators.scm diff --git a/examples/dataset-metadata.scm b/examples/dataset-metadata.scm index 783b90c..38dac9c 100755 --- a/examples/dataset-metadata.scm +++ b/examples/dataset-metadata.scm @@ -12,27 +12,6 @@ (transform special-forms)) -(define-transformer investigators - ;; There are a few duplicate entries. We group by email to - ;; deduplicate. - (tables (Investigators) - "GROUP BY Email") - (triples (investigator-attributes->id (field Investigators FirstName) - (field Investigators LastName) - "") - (set rdf:type 'foaf:Person) - (set foaf:name (string-append (field Investigators FirstName) " " - (field Investigators LastName))) - (set foaf:givenName - (field Investigators FirstName)) - (set foaf:familyName - (field Investigators LastName)) - (set foaf:homepage (field Investigators Url)) - (set v:adr (field Investigators Address)) - (set v:locality (field Investigators City)) - (set v:region (field Investigators State)) - (set v:postal-code (field Investigators ZipCode)) - (set v:country-name (field Investigators Country)))) (define-transformer gene-chip (tables (GeneChip @@ -406,7 +385,6 @@ #:separator "_" #:proc (lambda (x) x))))) -;; Molecular Traits are also referred to as ProbeSets (define-transformer probesetfreeze (tables (ProbeSetFreeze (left-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name") @@ -478,7 +456,6 @@ publishfreeze genofreeze probesetfreeze - investigators gene-chip)) (outputs `(#:documentation ,documentation diff --git a/examples/investigators.scm b/examples/investigators.scm new file mode 100755 index 0000000..8d31974 --- /dev/null +++ b/examples/investigators.scm @@ -0,0 +1,93 @@ +#! /usr/bin/env guile +!# + +(use-modules (srfi srfi-1) + (srfi srfi-26) + (ice-9 getopt-long) + (ice-9 match) + (ice-9 regex) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) + + +;; One email ID in the Investigators table has spaces in it. This +;; function fixes that. +(define (fix-email-id email) + (string-delete #\space email)) + +(define (investigator-attributes->id first-name last-name email) + ;; There is just one record corresponding to "Evan Williams" which + ;; does not have an email ID. To accommodate that record, we + ;; construct the investigator ID from not just the email ID, but + ;; also the first and the last names. It would be preferable to just + ;; find Evan Williams' email ID and insert it into the database. + (string->identifier "investigator" + (string-join + (list first-name last-name (fix-email-id email)) + "_") + #:separator "_")) + + +(define-transformer investigators + ;; There are a few duplicate entries. We group by email to + ;; deduplicate. + (tables (Investigators) + "GROUP BY Email") + (triples (investigator-attributes->id (field Investigators FirstName) + (field Investigators LastName) + "") + (set rdf:type 'foaf:Person) + (set foaf:name (string-append (field Investigators FirstName) " " + (field Investigators LastName))) + (set foaf:givenName + (field Investigators FirstName)) + (set foaf:familyName + (field Investigators LastName)) + (set foaf:homepage (field Investigators Url)) + (set v:adr (field Investigators Address)) + (set v:locality (field Investigators City)) + (set v:region (field Investigators State)) + (set v:postal-code (field Investigators ZipCode)) + (set v:country-name (field Investigators Country)))) + + +(let* ((option-spec + '((settings (single-char #\s) (value #t)) + (output (single-char #\o) (value #t)) + (documentation (single-char #\d) (value #t)))) + (options (getopt-long (command-line) option-spec)) + (settings (option-ref options 'settings #f)) + (output (option-ref options 'output #f)) + (documentation (option-ref options 'documentation #f)) + (%connection-settings + (call-with-input-file settings + read))) + (with-documentation + (name "Info files / Investigators Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '( + ("dcat:" "") + ("dct:" "") + ("foaf:" "") + ("geoSeries:" "") + ("gn:" "") + ("gnc:" "") + ("gnt:" "") + ("owl:" "") + ("rdf:" "") + ("rdfs:" "") + ("skos:" "") + ("taxon:" "") + ("v:" "") + ("xkos:" "") + ("xsd:" "") + )) + (inputs + (list investigators)) + (outputs + `(#:documentation ,documentation + #:rdf ,output)))) diff --git a/transform/strings.scm b/transform/strings.scm index 7544399..51c5ed1 100644 --- a/transform/strings.scm +++ b/transform/strings.scm @@ -19,23 +19,6 @@ fix-email-id investigator-attributes->id)) -;; One email ID in the Investigators table has spaces in it. This -;; function fixes that. -(define (fix-email-id email) - (string-delete #\space email)) - -(define (investigator-attributes->id first-name last-name email) - ;; There is just one record corresponding to "Evan Williams" which - ;; does not have an email ID. To accommodate that record, we - ;; construct the investigator ID from not just the email ID, but - ;; also the first and the last names. It would be preferable to just - ;; find Evan Williams' email ID and insert it into the database. - (string->identifier "investigator" - (string-join - (list first-name last-name (fix-email-id email)) - "_") - #:separator "_")) - (define (lower-case-and-replace-spaces str) (string-map (lambda (c) -- cgit 1.4.1