diff options
| author | Munyoki Kilyungi | 2026-01-28 23:00:25 +0300 |
|---|---|---|
| committer | Munyoki Kilyungi | 2026-01-28 23:00:25 +0300 |
| commit | 3b1f3e5c087dc01db3eb76611253e523c9b4ffb1 (patch) | |
| tree | 9cfabab3f07c17f253b75a963ef479c4cc032183 /examples/investigators.scm | |
| parent | d825d25692cdff572b979970f65bc64f2ab22c0d (diff) | |
| download | gn-transform-databases-3b1f3e5c087dc01db3eb76611253e523c9b4ffb1.tar.gz | |
Move investigators transform to its own file.
* examples/dataset-metadata.scm (investigators): Delete. (option-spec)[investigators]: Delete * transform/strings.scm (fix-email-id, investigator-attributes->id): Move these ... * examples/investigators.scm: ... here. Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples/investigators.scm')
| -rwxr-xr-x | examples/investigators.scm | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/examples/investigators.scm b/examples/investigators.scm new file mode 100755 index 0000000..8d31974 --- /dev/null +++ b/examples/investigators.scm @@ -0,0 +1,93 @@ +#! /usr/bin/env guile +!# + +(use-modules (srfi srfi-1) + (srfi srfi-26) + (ice-9 getopt-long) + (ice-9 match) + (ice-9 regex) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) + + +;; One email ID in the Investigators table has spaces in it. This +;; function fixes that. +(define (fix-email-id email) + (string-delete #\space email)) + +(define (investigator-attributes->id first-name last-name email) + ;; There is just one record corresponding to "Evan Williams" which + ;; does not have an email ID. To accommodate that record, we + ;; construct the investigator ID from not just the email ID, but + ;; also the first and the last names. It would be preferable to just + ;; find Evan Williams' email ID and insert it into the database. + (string->identifier "investigator" + (string-join + (list first-name last-name (fix-email-id email)) + "_") + #:separator "_")) + + +(define-transformer investigators + ;; There are a few duplicate entries. We group by email to + ;; deduplicate. + (tables (Investigators) + "GROUP BY Email") + (triples (investigator-attributes->id (field Investigators FirstName) + (field Investigators LastName) + "") + (set rdf:type 'foaf:Person) + (set foaf:name (string-append (field Investigators FirstName) " " + (field Investigators LastName))) + (set foaf:givenName + (field Investigators FirstName)) + (set foaf:familyName + (field Investigators LastName)) + (set foaf:homepage (field Investigators Url)) + (set v:adr (field Investigators Address)) + (set v:locality (field Investigators City)) + (set v:region (field Investigators State)) + (set v:postal-code (field Investigators ZipCode)) + (set v:country-name (field Investigators Country)))) + + +(let* ((option-spec + '((settings (single-char #\s) (value #t)) + (output (single-char #\o) (value #t)) + (documentation (single-char #\d) (value #t)))) + (options (getopt-long (command-line) option-spec)) + (settings (option-ref options 'settings #f)) + (output (option-ref options 'output #f)) + (documentation (option-ref options 'documentation #f)) + (%connection-settings + (call-with-input-file settings + read))) + (with-documentation + (name "Info files / Investigators Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '( + ("dcat:" "<http://www.w3.org/ns/dcat#>") + ("dct:" "<http://purl.org/dc/terms/>") + ("foaf:" "<http://xmlns.com/foaf/0.1/#term_>") + ("geoSeries:" "<http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=>") + ("gn:" "<http://rdf.genenetwork.org/v1/id/>") + ("gnc:" "<http://rdf.genenetwork.org/v1/category/>") + ("gnt:" "<http://rdf.genenetwork.org/v1/term/>") + ("owl:" "<http://www.w3.org/2002/07/owl#>") + ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") + ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") + ("skos:" "<http://www.w3.org/2004/02/skos/core#>") + ("taxon:" "<http://purl.uniprot.org/taxonomy/>") + ("v:" "<http://www.w3.org/2006/vcard/ns#>") + ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>") + ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") + )) + (inputs + (list investigators)) + (outputs + `(#:documentation ,documentation + #:rdf ,output)))) |
