#! /usr/bin/env guile !# (use-modules (srfi srfi-1) (srfi srfi-26) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) (define-transformer publication (tables (Publication)) (triples (let ((pmid (field ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" pmid))) (publication-id (field Publication Id))) (if (string-null? pmid) (string->identifier "unpublished" (number->string publication-id)) (ontology 'pubmed: pmid))) (set rdf:type 'fabio:ResearchPaper) (set fabio:hasPubMedId (ontology 'pubmed: (field ("IFNULL(PubMed_ID, '')" pubmedId)))) (set dct:title (delete-substrings (field Publication Title) "Unknown")) (set fabio:Journal (delete-substrings (field Publication Journal) "Unknown")) (set prism:volume (delete-substrings (field Publication Volume) "Unknown")) (set fabio:page (delete-substrings (field Publication Pages) "Unknown")) (set prism:publicationDate (annotate-field (delete-substrings (field Publication Month) "Unknown") '^^xsd:gMonth)) (set fabio:hasPublicationYear (annotate-field (field ("IF(Publication.Year = 0, NULL, Publication.Year)" Year)) '^^xsd:gYear)) (multiset dct:creator ;; The authors field is a comma ;; separated list. Split it. (map string-trim (string-split (sanitize-rdf-string (field Publication Authors)) #\,))) (set dct:abstract (sanitize-rdf-string (field Publication Abstract))))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "Publications Metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("gnt:" "<http://genenetwork.org/term/>") ("fabio:" "<http://purl.org/spar/fabio/>") ("dct:" "<http://purl.org/dc/terms/>") ("prism:" "<http://prismstandard.org/namespaces/basic/2.0/>") ("gn:" "<http://genenetwork.org/id/>") ("gnc:" "<http://genenetwork.org/category/>") ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>"))) (inputs (list publication)) (outputs `(#:documentation ,documentation #:rdf ,output))))