From 685b9a6bfc273c8060a1d0d0daae8cc4e294c75b Mon Sep 17 00:00:00 2001
From: Munyoki Kilyungi
Date: Fri, 6 Sep 2024 23:55:36 +0300
Subject: Use predicateObject Lists with a blank-node to model GeneRIF.

Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
---
 examples/generif.scm | 120 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 72 insertions(+), 48 deletions(-)

(limited to 'examples')

diff --git a/examples/generif.scm b/examples/generif.scm
index 894b766..8dcc201 100755
--- a/examples/generif.scm
+++ b/examples/generif.scm
@@ -50,8 +50,7 @@
            (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id")
            (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id"))
           "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL
-GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol, GeneRIF.SpeciesId,
-GeneRIF.createtime, GeneRIF.reason")
+GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
   (schema-triples
    (gnc:GeneWikiEntry a rdfs:Class)
    (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
@@ -63,53 +62,78 @@ GeneRIF.createtime, GeneRIF.reason")
    (gnt:reason skos:definition "The reason why this resource was modified")
    (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork")
    (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry))
-  ;; Here we use the Id and VersionId to uniquely identify comments.
-  ;; We could use blank-nodes here; however, querying blank nodes
-  ;; E.g. getting the latest versionId is very complicated.  Prefer
-  ;; normal triplets over blank-nodes.
+  ;; We want to avoid manually generating a unique identifier for each
+  ;; comment.  As such we use a blank node (that has the comment) as
+  ;; the subject of the triples produced by matching the
+  ;; predicateObjectList production:
+  ;; <https://www.w3.org/TR/turtle/#grammar-production-predicateObjectList>
   (triples
-      (format #f "gn:wiki-~a-~a"
-              (field GeneRIF Id)
-              (field GeneRIF versionId))
-    (set rdfs:comment (sanitize-rdf-string (field GeneRIF comment)))
-    (set rdf:type 'gnc:GNWikiEntry)
-    (set gnt:symbol
-         (string->identifier
-          "symbol"
-          (regexp-substitute/global #f "[^A-Za-z0-9:]"
-                                    (field GeneRIF symbol)
-                                    'pre "_" 'post)
-          #:proc (lambda (x) x)))
-    (set dct:created
-         (let ((create-time (field GeneRIF createtime EntryCreateTime)))
-           (if (string? create-time)
-               ""
-               (annotate-field
-                (time-unix->string
-                 create-time
-                 "~5")
-                '^^xsd:datetime))))
-    (multiset dct:references
-              (string-split (field GeneRIF PubMed_ID PMID)
-                            #\space))
-    (set foaf:homepage (field GeneRIF weburl))
-    (set gnt:belongsToSpecies (string->identifier
-                               ""
-                               (remap-species-identifiers (field Species Fullname))
-                               #:separator ""
-                               #:proc string-capitalize-first))
-    (set dct:hasVersion (annotate-field (format #f "~s" (field GeneRIF versionId))
-                                        '^^xsd:int))
-    (set dct:identifier (annotate-field (format #f "~s" (field GeneRIF Id))
-                                        '^^xsd:int))
-    (set gnt:initial (sanitize-rdf-string (field GeneRIF initial)))
-    (set gnt:reason (field GeneRIF reason))
-    (set foaf:mbox (sanitize-rdf-string (field GeneRIF email)))
-    (multiset gnt:belongsToCategory
-              (string-split
-               (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')"
-                       GeneCategory))
-               #\;))))
+      (format #f "[ rdfs:comment '''~a'''@en] "
+              (field GeneRIF comment))
+    (set rdf:type
+         (let* ((create-time (field
+                              ("CAST(createtime AS CHAR)" EntryCreateTime)))
+                (pmid (field GeneRIF PubMed_ID PMID))
+                (web-url (field GeneRIF weburl))
+                (species (string->identifier
+                          ""
+                          (remap-species-identifiers (field Species Fullname))
+                          #:separator ""
+                          #:proc string-capitalize-first))
+                (version-id (field GeneRIF versionId))
+                (identifier (field GeneRIF Id))
+                (initial (sanitize-rdf-string (field GeneRIF initial)))
+                (reason (field GeneRIF reason))
+                (email (sanitize-rdf-string (field GeneRIF email)))
+                (category
+                 (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')"
+                         GeneCategory))))
+           (string->symbol
+            (string-append
+             (format #f "gnc:GNWikiEntry ;\n")
+             (if (string? species)
+                 ""
+                 (format #f "\tgnt:belongsToSpecies ~a ;\n"
+                         species))
+             (format #f "\tdct:created ~s^^xsd:datetime ;\n"
+                     create-time)
+             (if (and (string? pmid) (not (string-null? pmid)))
+                 (format #f
+                         "\tdct:references ( ~{pubmed:~a ~}) ;\n"
+                         (string-split pmid #\space))
+                 "\tdct:references rdf:nil ;\n")
+             (if (string-blank? email)
+                 ""
+                 (format #f "\tfoaf:mbox <~a> ;\n" email))
+             (format #f "\tdct:identifier \"~s\"^^xsd:integer ;\n" identifier)
+             (if (and (string? web-url) (not (string-null? web-url)))
+                 (format #f "\tfoaf:homepage <~a> ;\n"
+                         web-url)
+                 "")
+             (format #f "\tdct:hasVersion \"~s\"^^xsd:integer ;\n" version-id)
+             (if (or (null? initial)
+                     (string-blank? initial))
+                 "" (format #f "\tgnt:initial ~s ;\n" initial))
+             (if (string-blank? reason)
+                 ""
+                 (format #f "\tgnt:reason ~s ;\n" reason))
+             (if (string-blank? category)
+                 "\tgnt:belongsToCategory rdf:nil ;\n"
+                 (format #f
+                         "\tgnt:belongsToCategory ( ~{~s ~}) ;\n"
+                         (string-split category #\;)))
+             ;; We have this symbol at the very end of this transform
+             ;; because we have a strong guarantee that it will be a
+             ;; non-null value hence always terminating this triple
+             ;; properly with a "."
+             (format
+              #f "\tgnt:symbol ~a"
+              (string->identifier
+               "symbol"
+               (regexp-substitute/global #f "[^A-Za-z0-9:]"
+                                         (field GeneRIF symbol)
+                                         'pre "_" 'post)
+               #:proc (lambda (x) x)))))))))
 
 (define-transformer ncbi-genewiki-entries
   (tables (GeneRIF_BASIC
-- 
cgit v1.2.3