about summary refs log tree commit diff
#! /usr/bin/env guile
!#

(use-modules (srfi srfi-1)
             (srfi srfi-26)
             (rnrs bytevectors)
             (ice-9 format)
             (ice-9 getopt-long)
             (ice-9 match)
             (ice-9 regex)
             (transform strings)
             (transform sql)
             (transform triples)
             (transform special-forms))



(define-transformer gn-genewiki-entries
  (tables (GeneRIF
           (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId")
           (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id")
           (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id"))
          "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
  (triples
      (string->identifier ""
                          (gn-uuid (format #f "~a.~a.~a?type=wikii"
                                           (field GeneRIF Id)
                                           (field GeneRIF versionId)
                                           (field GeneRIF createtime)))
                          #:url-char #\-)
    (set dct:identifier (gn-uuid (format #f "~a?type=wiki"
                                         (field GeneRIF Id))))
    (set rdfs:label (string->symbol
                     (format #f "'~a'@en"
                             (replace-substrings
                              (sanitize-rdf-string
                               (field GeneRIF comment))
                              '(("'" . "\\'"))))))
    (set rdf:type 'gnc:gn_wiki_entry)
    (set gnt:symbol (field GeneRIF symbol))
    (set gnt:has_species (string->identifier "" (remap-species-identifiers (field Species Fullname))))
    (set dct:created
         (string->symbol
          (format #f "~s^^xsd:datetime "
                  (field
                   ("CAST(createtime AS CHAR)" EntryCreateTime)))))
    (multiset dct:references
              (map (lambda (pmid)
                     (match pmid
                       ((? string-blank? p) "")
                       (p (string->symbol
                           (format #f "pubmed:~a" (string-trim-both pmid))))))
                   (string-split (field GeneRIF PubMed_ID PMID)
                                 #\space)))
    ;; Hide e-mail for now.
    ;; (set foaf:mbox
    ;;      (match (sanitize-rdf-string (field GeneRIF email))
    ;;        ((? string-blank? mbox) "")
    ;;        (mbox (string->symbol
    ;;               (format #f "<~a>" mbox)))))
    (set foaf:homepage
         (match (sanitize-rdf-string (field GeneRIF weburl))
           ((? string-blank? homepage) "")
           (homepage (string->symbol
                      (format #f "<~a>" homepage)))))
    (set dct:hasVersion (annotate-field (format #f "~s" (field GeneRIF versionId))
                                        '^^xsd:integer))
    (set gnt:initial (sanitize-rdf-string (field GeneRIF initial)))
    (set gnt:reason (field GeneRIF reason))
    (multiset gnt:belongs_to_category
              (string-split
               (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')"
                       GeneCategory))
               #\;))))

(define-transformer ncbi-genewiki-entries
  (tables (GeneRIF_BASIC
           (left-join Species "USING (SpeciesId)")))
  (triples
      (string->identifier
       "" (gn-uuid (format #f "~a_~a_~a_~a"
                           (field GeneRIF_BASIC GeneId)
                           (field GeneRIF_BASIC PubMed_ID)
                           (field ("DATE_FORMAT(createtime, '%Y-%m-%dT%T')" CreateTime))
                           (field GeneRIF_BASIC VersionId)))
       #:url-char #\-)
    (set rdf:type 'gnc:ncbi_wiki_entry)
    (set rdfs:label (format #f "'~a'@en"
                            (replace-substrings
                             (sanitize-rdf-string
                              (field GeneRIF_BASIC comment))
                             '(("\\" . "\\\\")
                               ("\n" . "\\n")
                               ("\r" . "\\r")
                               ("'" . "\\'")))))
    (set gnt:symbol (field GeneRIF_BASIC symbol))
    (set gnt:has_species (string->identifier "" (remap-species-identifiers (field Species Fullname))))
    (set skos:notation (ontology 'taxon: (field GeneRIF_BASIC TaxID TaxonomicId)))
    (set dct:hasVersion (annotate-field (field GeneRIF_BASIC versionId) '^^xsd:integer))
    (set gnt:has_gene_id (ontology 'generif: (field GeneRIF_BASIC GeneId)))
    (set dct:references (ontology 'pubmed: (field GeneRIF_BASIC PubMed_ID)))
    (set dct:created
         (string->symbol
          (format #f "~s^^xsd:datetime"
                  (field
                   ("CAST(createtime AS CHAR)" EntryCreateTime)))))))



(let* ((option-spec
        '((settings (single-char #\s) (value #t))
          (output (single-char #\o) (value #t))
          (documentation (single-char #\d) (value #t))))
       (options (getopt-long (command-line) option-spec))
       (settings (option-ref options 'settings #f))
       (output (option-ref options 'output #f))
       (documentation (option-ref options 'documentation #f))
       (%connection-settings
        (call-with-input-file settings
          read)))

  (with-documentation
   (name "GeneRIF Metadata")
   (connection %connection-settings)
   (table-metadata? #f)
   (prefixes
    '(("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
      ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
      ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
      ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
      ("gn:" "<http://rdf.genenetwork.org/v1/id/>")
      ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
      ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
      ("dct:" "<http://purl.org/dc/terms/>")
      ("foaf:" "<http://xmlns.com/foaf/0.1/#term_>")
      ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")
      ("taxon:" "<https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=>")
      ("generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>")
      ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
      ("owl:" "<http://www.w3.org/2002/07/owl#>")))
   (inputs
    (list
     gn-genewiki-entries
     ncbi-genewiki-entries))
   (outputs
    `(#:documentation ,documentation
      #:rdf ,output))))