about summary refs log tree commit diff
path: root/examples/generif.scm
diff options
context:
space:
mode:
Diffstat (limited to 'examples/generif.scm')
-rwxr-xr-xexamples/generif.scm250
1 files changed, 83 insertions, 167 deletions
diff --git a/examples/generif.scm b/examples/generif.scm
index 11235e0..a8a8460 100755
--- a/examples/generif.scm
+++ b/examples/generif.scm
@@ -11,181 +11,99 @@
              (transform strings)
              (transform sql)
              (transform triples)
-             (transform special-forms)
-             (transform uuid))
+             (transform special-forms))
 
 
 
-(define-transformer genewiki-symbols
-  (tables (GeneRIF_BASIC)
-          "GROUP BY BINARY symbol")
-  (triples
-      (string->identifier
-       "symbol"
-       (regexp-substitute/global #f "[^A-Za-z0-9:]"
-                                 (field GeneRIF_BASIC symbol)
-                                 'pre "_" 'post)
-       #:proc (lambda (x) x))
-    (set rdfs:label
-         (field GeneRIF_BASIC symbol))))
-
-;; Some symbols exist in the RIF table that don't exist in the GeneRIF
-;; table.
-(define-transformer generif-symbols
-  (tables (GeneRIF)
-          "WHERE symbol NOT IN (SELECT symbol from GeneRIF_BASIC) GROUP BY BINARY symbol")
-  (triples
-      (string->identifier
-       "symbol"
-       (regexp-substitute/global #f "[^A-Za-z0-9:]"
-                                 (field GeneRIF symbol)
-                                 'pre "_" 'post)
-       #:proc (lambda (x) x))
-    (set rdfs:label
-         (field GeneRIF symbol))))
-
 (define-transformer gn-genewiki-entries
   (tables (GeneRIF
            (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId")
            (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id")
            (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id"))
-          "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol, GeneRIF.SpeciesId, GeneRIF.createtime, GeneRIF.reason")
-  (schema-triples
-   (gnc:GeneWikiEntry a rdfs:Class)
-   (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
-   (gnt:initial a owl:ObjectProperty)
-   (gnt:initial rdfs:domain gnc:GeneWikiEntry)
-   (gnt:initial skos:definition "Optional user or project code or your initials")
-   (gnt:reason a owl:ObjectProperty)
-   (gnt:reason rdfs:domain gnc:GeneWikiEntry)
-   (gnt:reason skos:definition "The reason why this resource was modified")
-   (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork")
-   (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry))
+          "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
   (triples
-      (string->identifier
-       "symbol"
-       (regexp-substitute/global
-        #f "[^A-Za-z0-9:]"
-        (field GeneRIF symbol)
-        'pre "_" 'post)
-       #:proc (lambda (x) x))
-    (set rdfs:comment
-         (let* ((generif-comment (sanitize-rdf-string (field GeneRIF comment)))
-                (create-time (field GeneRIF createtime EntryCreateTime))
-                (pmid (field GeneRIF PubMed_ID PMID))
-                (web-url (field GeneRIF weburl))
-                (species (string->identifier
-                          ""
-                          (remap-species-identifiers (field Species Fullname))
-                          #:separator ""
-                          #:proc string-capitalize-first))
-                (version-id (field GeneRIF versionId))
-                (identifier (field GeneRIF Id))
-                (initial (sanitize-rdf-string (field GeneRIF initial)))
-                (reason (field GeneRIF reason))
-                (email (sanitize-rdf-string (field GeneRIF email)))
-                (category
-                 (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '; ')"
-                         GeneCategory))))
-           (string->symbol
-            (string-append
-             "[ "
-             (format #f "rdf:type gnc:GNWikiEntry ; ")
-             (if (string? species)
-                 ""
-                 (format #f "gnt:belongsToSpecies ~a ; "
-                         species))
-             (format #f "rdfs:comment ~s^^xsd:string ; "
-                     generif-comment)
-             (if (string? create-time)
-                 ""
-                 (format #f "dct:created ~s^^xsd:datetime ; "
-                         (time-unix->string
-                          create-time "~5")))
-             (if (and (string? pmid) (not (string-null? pmid)))
-                 (format #f
-                         "~{dct:references pubmed:~a ; ~}"
-                         (string-split pmid #\space))
-                 "")
-             (if (string-blank? email)
-                 ""
-                 (format #f "foaf:mbox ~s ; " email))
-             (format #f "dct:identifier ~s ; " identifier)
-             (format #f "dct:hasVersion \"~s\"^^xsd:int ; " version-id)
-             (if (string-blank? reason)
-                 ""
-                 (format #f "gnt:reason ~s ; " reason))
-             (if (or (null? initial)
-                      (string-blank? initial))
-                 "" (format #f "gnt:initial ~s ; " initial))
-             (if (string-blank? category)
-                 ""
-                 (format #f
-                         "gnt:belongsToCategory ~s ; "
-                         category))
-             (if (and (string? web-url) (not (string-null? web-url)))
-                 (format #f "foaf:homepage ~s ; "
-                         web-url)
-                 "")
-             " ] "))))))
+      (string->identifier ""
+                          (gn-uuid (format #f "~a.~a.~a?type=wikii"
+                                           (field GeneRIF Id)
+                                           (field GeneRIF versionId)
+                                           (field GeneRIF createtime)))
+                          #:url-char #\-)
+    (set dct:identifier (gn-uuid (format #f "~a?type=wiki"
+                                         (field GeneRIF Id))))
+    (set rdfs:label (string->symbol
+                     (format #f "'~a'@en"
+                             (replace-substrings
+                              (sanitize-rdf-string
+                               (field GeneRIF comment))
+                              '(("'" . "\\'"))))))
+    (set rdf:type 'gnc:gn_wiki_entry)
+    (set gnt:symbol (field GeneRIF symbol))
+    (set gnt:has_species (string->identifier "" (remap-species-identifiers (field Species Fullname))))
+    (set dct:created
+         (string->symbol
+          (format #f "~s^^xsd:datetime "
+                  (field
+                   ("CAST(createtime AS CHAR)" EntryCreateTime)))))
+    (multiset dct:references
+              (map (lambda (pmid)
+                     (match pmid
+                       ((? string-blank? p) "")
+                       (p (string->symbol
+                           (format #f "pubmed:~a" (string-trim-both pmid))))))
+                   (string-split (field GeneRIF PubMed_ID PMID)
+                                 #\space)))
+    ;; Hide e-mail for now.
+    ;; (set foaf:mbox
+    ;;      (match (sanitize-rdf-string (field GeneRIF email))
+    ;;        ((? string-blank? mbox) "")
+    ;;        (mbox (string->symbol
+    ;;               (format #f "<~a>" mbox)))))
+    (set foaf:homepage
+         (match (sanitize-rdf-string (field GeneRIF weburl))
+           ((? string-blank? homepage) "")
+           (homepage (string->symbol
+                      (format #f "<~a>" homepage)))))
+    (set dct:hasVersion (annotate-field (format #f "~s" (field GeneRIF versionId))
+                                        '^^xsd:integer))
+    (set gnt:initial (sanitize-rdf-string (field GeneRIF initial)))
+    (set gnt:reason (field GeneRIF reason))
+    (multiset gnt:belongs_to_category
+              (string-split
+               (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')"
+                       GeneCategory))
+               #\;))))
 
 (define-transformer ncbi-genewiki-entries
   (tables (GeneRIF_BASIC
-           (left-join Species "USING (SpeciesId)"))
-          "WHERE GeneRIF_BASIC.comment IS NOT NULL AND TRIM(GeneRIF_BASIC.comment) != '' AND TRIM(GeneRIF_BASIC.symbol) != '' GROUP BY GeneRIF_BASIC.comment, GeneRIF_BASIC.createtime, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.SpeciesId, GeneRIF_BASIC.TaxID")
-  (schema-triples
-   (gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
-   (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI")
-   (gnt:hasVersionId a owl:ObjectProperty)
-   (gnt:hasVersionId rdfs:domain gnc:NCBIWikiEntry)
-   (gnt:hasVersionId skos:definition "The VersionId of this this resource"))
+           (left-join Species "USING (SpeciesId)")))
   (triples
       (string->identifier
-       "symbol"
-       (regexp-substitute/global #f "[^A-Za-z0-9:]"
-                                 (field GeneRIF_BASIC symbol GeneRIFSymbol)
-                                 'pre "_" 'post)
-       #:proc (lambda (x) x))
-    (set rdfs:comment
-         (let ([ncbi-comment (sanitize-rdf-string (field GeneRIF_BASIC comment))]
-               [species-name
-                (string->identifier
-                 ""
-                 (remap-species-identifiers (field Species Fullname SpeciesFullName))
-                 #:separator ""
-                 #:proc string-capitalize-first)]
-               [taxonomic-id (field GeneRIF_BASIC TaxID TaxonomicId)]
-               [create-time (field GeneRIF_BASIC createtime EntryCreateTime)]
-               [pmid (field GeneRIF_BASIC PubMed_ID PMID)]
-               [gene-id (field GeneRIF_BASIC GeneId)]
-               [version-id (field GeneRIF_BASIC VersionId)])
-           (string->symbol
-            (string-append
-             "[ "
-             (format #f "rdf:type gnc:NCBIWikiEntry ; ")
-             (format #f "rdfs:comment ~s^^xsd:string ; "
-                     ncbi-comment)
-             (format #f "gnt:belongsToSpecies ~a ; "
-                     species-name)
-             (if (eq? #f taxonomic-id)
-                 ""
-                 (format #f "skos:notation taxon:~a ; "
-                         taxonomic-id))
-             (format #f "gnt:hasGeneId generif:~a ; "
-                     gene-id)
-             (format #f "dct:hasVersion '~a'^^xsd:int ; "
-                     version-id)
-             (if (and (string? pmid) (not (string-null? pmid)))
-                 (format #f
-                         "~{dct:references pubmed:~a ; ~}"
-                         (string-split pmid #\space))
-                 "")
-             (if (string? create-time)
-                 ""
-                 (format #f "dct:created ~s^^xsd:datetime ; "
-                         (time-unix->string
-                          create-time "~5")))
-             " ]"))))))
+       "" (gn-uuid (format #f "~a_~a_~a_~a"
+                           (field GeneRIF_BASIC GeneId)
+                           (field GeneRIF_BASIC PubMed_ID)
+                           (field ("DATE_FORMAT(createtime, '%Y-%m-%dT%T')" CreateTime))
+                           (field GeneRIF_BASIC VersionId)))
+       #:url-char #\-)
+    (set rdf:type 'gnc:ncbi_wiki_entry)
+    (set rdfs:label (format #f "'~a'@en"
+                            (replace-substrings
+                             (sanitize-rdf-string
+                              (field GeneRIF_BASIC comment))
+                             '(("\\" . "\\\\")
+                               ("\n" . "\\n")
+                               ("\r" . "\\r")
+                               ("'" . "\\'")))))
+    (set gnt:symbol (field GeneRIF_BASIC symbol))
+    (set gnt:has_species (string->identifier "" (remap-species-identifiers (field Species Fullname))))
+    (set skos:notation (ontology 'taxon: (field GeneRIF_BASIC TaxID TaxonomicId)))
+    (set dct:hasVersion (annotate-field (field GeneRIF_BASIC versionId) '^^xsd:integer))
+    (set gnt:has_gene_id (ontology 'generif: (field GeneRIF_BASIC GeneId)))
+    (set dct:references (ontology 'pubmed: (field GeneRIF_BASIC PubMed_ID)))
+    (set dct:created
+         (string->symbol
+          (format #f "~s^^xsd:datetime"
+                  (field
+                   ("CAST(createtime AS CHAR)" EntryCreateTime)))))))
 
 
 
@@ -210,11 +128,11 @@
       ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
       ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
       ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
-      ("gn:" "<http://genenetwork.org/id/>")
-      ("gnc:" "<http://genenetwork.org/category/>")
-      ("gnt:" "<http://genenetwork.org/term/>")
+      ("gn:" "<http://rdf.genenetwork.org/v1/id/>")
+      ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
+      ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
       ("dct:" "<http://purl.org/dc/terms/>")
-      ("foaf:" "<http://xmlns.com/foaf/0.1/>")
+      ("foaf:" "<http://xmlns.com/foaf/0.1/#term_>")
       ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")
       ("taxon:" "<https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=>")
       ("generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>")
@@ -222,8 +140,6 @@
       ("owl:" "<http://www.w3.org/2002/07/owl#>")))
    (inputs
     (list
-     genewiki-symbols
-     generif-symbols
      gn-genewiki-entries
      ncbi-genewiki-entries))
    (outputs