about summary refs log tree commit diff
path: root/examples
diff options
context:
space:
mode:
Diffstat (limited to 'examples')
-rwxr-xr-xexamples/generif.scm91
1 files changed, 39 insertions, 52 deletions
diff --git a/examples/generif.scm b/examples/generif.scm
index 1dfd224..7a60214 100755
--- a/examples/generif.scm
+++ b/examples/generif.scm
@@ -86,61 +86,48 @@ GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
 
 (define-transformer ncbi-genewiki-entries
   (tables (GeneRIF_BASIC
-           (left-join Species "USING (SpeciesId)"))
-          "WHERE GeneRIF_BASIC.comment IS NOT NULL AND TRIM(GeneRIF_BASIC.comment) != '' AND TRIM(GeneRIF_BASIC.symbol) != '' GROUP BY GeneRIF_BASIC.comment, GeneRIF_BASIC.createtime, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.SpeciesId, GeneRIF_BASIC.TaxID")
+           (left-join Species "USING (SpeciesId)")))
   (schema-triples
    (gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
-   (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI")
-   (gnt:hasVersionId a owl:ObjectProperty)
-   (gnt:hasVersionId rdfs:domain gnc:NCBIWikiEntry)
-   (gnt:hasVersionId skos:definition "The VersionId of this this resource"))
+   (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI"))
   (triples
-      (string->identifier
-       "symbol"
-       (regexp-substitute/global #f "[^A-Za-z0-9:]"
-                                 (field GeneRIF_BASIC symbol GeneRIFSymbol)
-                                 'pre "_" 'post)
-       #:proc (lambda (x) x))
-    (set rdfs:comment
-         (let ([ncbi-comment (sanitize-rdf-string (field GeneRIF_BASIC comment))]
-               [species-name
-                (string->identifier
-                 ""
-                 (remap-species-identifiers (field Species Fullname SpeciesFullName))
-                 #:separator ""
-                 #:proc string-capitalize-first)]
-               [taxonomic-id (field GeneRIF_BASIC TaxID TaxonomicId)]
-               [create-time (field GeneRIF_BASIC createtime EntryCreateTime)]
-               [pmid (field GeneRIF_BASIC PubMed_ID PMID)]
-               [gene-id (field GeneRIF_BASIC GeneId)]
-               [version-id (field GeneRIF_BASIC VersionId)])
-           (string->symbol
-            (string-append
-             "[ "
-             (format #f "rdf:type gnc:NCBIWikiEntry ; ")
-             (format #f "rdfs:comment ~s^^xsd:string ; "
-                     ncbi-comment)
-             (format #f "gnt:belongsToSpecies ~a ; "
-                     species-name)
-             (if (eq? #f taxonomic-id)
-                 ""
-                 (format #f "skos:notation taxon:~a ; "
-                         taxonomic-id))
-             (format #f "gnt:hasGeneId generif:~a ; "
-                     gene-id)
-             (format #f "dct:hasVersion '~a'^^xsd:int ; "
-                     version-id)
-             (if (and (string? pmid) (not (string-null? pmid)))
-                 (format #f
-                         "~{dct:references pubmed:~a ; ~}"
-                         (string-split pmid #\space))
-                 "")
-             (if (string? create-time)
-                 ""
-                 (format #f "dct:created ~s^^xsd:datetime ; "
-                         (time-unix->string
-                          create-time "~5")))
-             " ]"))))))
+      (format
+       #f "gn:rif-~a-~a-~a-~a"
+       (field GeneRIF_BASIC GeneId)
+       (field GeneRIF_BASIC PubMed_ID)
+       (field
+        ("DATE_FORMAT(createtime, '%Y-%m-%dT%T')" CreateTime))
+       (field GeneRIF_BASIC VersionId))
+    (set rdf:type 'gnc:NCBIWikiEntry)
+    (set gnt:symbol (field GeneRIF_BASIC symbol))
+    (set rdfs:label
+         (let* ((comment
+                 (format #f "'~a'@en"
+                         (replace-substrings
+                          (field GeneRIF_BASIC comment)
+                          '(("\\" . "\\\\")
+                            ("\n" . "\\n")
+                            ("\r" . "\\r")
+                            ("'" . "\\'"))))))
+           (string->symbol comment)))
+    (set dct:created
+         (string->symbol
+          (format #f "~s^^xsd:datetime "
+                  (field
+                   ("CAST(createtime AS CHAR)" EntryCreateTime)))))
+    (set gnt:belongsToSpecies (string->identifier
+                               ""
+                               (remap-species-identifiers (field Species Fullname))
+                               #:separator ""
+                               #:proc string-capitalize-first))
+    (set gnt:hasGeneId (string->symbol (format #f "generif:~a" (field GeneRIF_BASIC GeneId))))
+    (set skos:notation (match (field GeneRIF_BASIC TaxID TaxonomicId)
+                           ((? number? x)
+                            (string->symbol (format #f "taxon:~a" x)))
+                           (else "")))
+    (set dct:hasVersion (annotate-field (format #f "~s" (field GeneRIF_BASIC versionId))
+                                        '^^xsd:integer))
+    (set dct:references (string->symbol (format #f "pubmed:~a" (field GeneRIF_BASIC PubMed_ID))))))