From 397745b554e03fa2df0784c0c48ac43d01428980 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 27 Aug 2024 15:23:16 +0300 Subject: Replace blank-nodes for gn-genewiki-entries with unique identifiers. Signed-off-by: Munyoki Kilyungi --- examples/generif.scm | 116 ++++++++++++++++++++++----------------------------- 1 file changed, 49 insertions(+), 67 deletions(-) diff --git a/examples/generif.scm b/examples/generif.scm index 11235e0..894b766 100755 --- a/examples/generif.scm +++ b/examples/generif.scm @@ -49,7 +49,9 @@ (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId") (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id") (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")) - "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol, GeneRIF.SpeciesId, GeneRIF.createtime, GeneRIF.reason") + "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL +GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol, GeneRIF.SpeciesId, +GeneRIF.createtime, GeneRIF.reason") (schema-triples (gnc:GeneWikiEntry a rdfs:Class) (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) @@ -61,73 +63,53 @@ (gnt:reason skos:definition "The reason why this resource was modified") (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork") (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry)) + ;; Here we use the Id and VersionId to uniquely identify comments. + ;; We could use blank-nodes here; however, querying blank nodes + ;; E.g. getting the latest versionId is very complicated. Prefer + ;; normal triplets over blank-nodes. (triples - (string->identifier - "symbol" - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field GeneRIF symbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:comment - (let* ((generif-comment (sanitize-rdf-string (field GeneRIF comment))) - (create-time (field GeneRIF createtime EntryCreateTime)) - (pmid (field GeneRIF PubMed_ID PMID)) - (web-url (field GeneRIF weburl)) - (species (string->identifier - "" - (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)) - (version-id (field GeneRIF versionId)) - (identifier (field GeneRIF Id)) - (initial (sanitize-rdf-string (field GeneRIF initial))) - (reason (field GeneRIF reason)) - (email (sanitize-rdf-string (field GeneRIF email))) - (category - (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '; ')" - GeneCategory)))) - (string->symbol - (string-append - "[ " - (format #f "rdf:type gnc:GNWikiEntry ; ") - (if (string? species) - "" - (format #f "gnt:belongsToSpecies ~a ; " - species)) - (format #f "rdfs:comment ~s^^xsd:string ; " - generif-comment) - (if (string? create-time) - "" - (format #f "dct:created ~s^^xsd:datetime ; " - (time-unix->string - create-time "~5"))) - (if (and (string? pmid) (not (string-null? pmid))) - (format #f - "~{dct:references pubmed:~a ; ~}" - (string-split pmid #\space)) - "") - (if (string-blank? email) - "" - (format #f "foaf:mbox ~s ; " email)) - (format #f "dct:identifier ~s ; " identifier) - (format #f "dct:hasVersion \"~s\"^^xsd:int ; " version-id) - (if (string-blank? reason) - "" - (format #f "gnt:reason ~s ; " reason)) - (if (or (null? initial) - (string-blank? initial)) - "" (format #f "gnt:initial ~s ; " initial)) - (if (string-blank? category) - "" - (format #f - "gnt:belongsToCategory ~s ; " - category)) - (if (and (string? web-url) (not (string-null? web-url))) - (format #f "foaf:homepage ~s ; " - web-url) - "") - " ] ")))))) + (format #f "gn:wiki-~a-~a" + (field GeneRIF Id) + (field GeneRIF versionId)) + (set rdfs:comment (sanitize-rdf-string (field GeneRIF comment))) + (set rdf:type 'gnc:GNWikiEntry) + (set gnt:symbol + (string->identifier + "symbol" + (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field GeneRIF symbol) + 'pre "_" 'post) + #:proc (lambda (x) x))) + (set dct:created + (let ((create-time (field GeneRIF createtime EntryCreateTime))) + (if (string? create-time) + "" + (annotate-field + (time-unix->string + create-time + "~5") + '^^xsd:datetime)))) + (multiset dct:references + (string-split (field GeneRIF PubMed_ID PMID) + #\space)) + (set foaf:homepage (field GeneRIF weburl)) + (set gnt:belongsToSpecies (string->identifier + "" + (remap-species-identifiers (field Species Fullname)) + #:separator "" + #:proc string-capitalize-first)) + (set dct:hasVersion (annotate-field (format #f "~s" (field GeneRIF versionId)) + '^^xsd:int)) + (set dct:identifier (annotate-field (format #f "~s" (field GeneRIF Id)) + '^^xsd:int)) + (set gnt:initial (sanitize-rdf-string (field GeneRIF initial))) + (set gnt:reason (field GeneRIF reason)) + (set foaf:mbox (sanitize-rdf-string (field GeneRIF email))) + (multiset gnt:belongsToCategory + (string-split + (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')" + GeneCategory)) + #\;)))) (define-transformer ncbi-genewiki-entries (tables (GeneRIF_BASIC -- cgit v1.2.3