aboutsummaryrefslogtreecommitdiff
#! /usr/bin/env guile
!#

(use-modules (srfi srfi-1)
             (srfi srfi-26)
             (ice-9 getopt-long)
             (ice-9 match)
             (ice-9 regex)
             (transform strings)
             (transform sql)
             (transform triples)
             (transform special-forms))



(define-transformer publication
  (tables (Publication))
  (triples
      (let ((pmid (field
                   ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))"
                    pmid)))
            (publication-id (field Publication Id)))
        (if (string-null? pmid)
            (string->identifier "unpublished"
                                (number->string publication-id))
            (ontology 'pubmed: pmid)))
    (set rdf:type 'fabio:ResearchPaper)
    (set fabio:hasPubMedId
         (ontology 'pubmed: (field ("IFNULL(PubMed_ID, '')" pubmedId))))
    (set dct:title (delete-substrings (field Publication Title)
                                      "Unknown"))
    (set fabio:Journal (delete-substrings (field Publication Journal)
                                          "Unknown"))
    (set prism:volume (delete-substrings (field Publication Volume)
                                                "Unknown"))
    (set fabio:page (delete-substrings (field Publication Pages)
                                       "Unknown"))
    (set prism:publicationDate (annotate-field
                                (delete-substrings (field Publication Month)
                                                   "Unknown")
                                '^^xsd:gMonth))
    (set fabio:hasPublicationYear
         (annotate-field
          (field
           ("IF(Publication.Year = 0, NULL, Publication.Year)" Year))
          '^^xsd:gYear))
    (multiset dct:creator
              ;; The authors field is a comma
              ;; separated list. Split it.
              (map string-trim (string-split (sanitize-rdf-string (field Publication Authors)) #\,)))
    (set dct:abstract
         (sanitize-rdf-string
          (field Publication Abstract)))))



(let* ((option-spec
        '((settings (single-char #\s) (value #t))
          (output (single-char #\o) (value #t))
          (documentation (single-char #\d) (value #t))))
       (options (getopt-long (command-line) option-spec))
       (settings (option-ref options 'settings #f))
       (output (option-ref options 'output #f))
       (documentation (option-ref options 'documentation #f))
       (%connection-settings
        (call-with-input-file settings
          read)))
  (with-documentation
   (name "Publications Metadata")
   (connection %connection-settings)
   (table-metadata? #f)
   (prefixes
    '(("gnt:" "<http://genenetwork.org/term/>")
      ("fabio:" "<http://purl.org/spar/fabio/>")
      ("dct:" "<http://purl.org/dc/terms/>")
      ("prism:" "<http://prismstandard.org/namespaces/basic/2.0/>")
      ("gn:" "<http://genenetwork.org/id/>")
      ("gnc:" "<http://genenetwork.org/category/>")
      ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")
      ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
      ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
      ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")))
   (inputs
    (list publication))
   (outputs
    `(#:documentation ,documentation
      #:rdf ,output))))