From 7460e6733b9bd672befeb254a7c3644a758d9864 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Thu, 12 Sep 2024 11:21:32 +0300 Subject: Re-introduce unique identifiers for RIF metadata. Signed-off-by: Munyoki Kilyungi --- examples/generif.scm | 119 +++++++++++++++++++++------------------------------ 1 file changed, 48 insertions(+), 71 deletions(-) diff --git a/examples/generif.scm b/examples/generif.scm index 27fa388..1dfd224 100755 --- a/examples/generif.scm +++ b/examples/generif.scm @@ -34,78 +34,55 @@ GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol") (gnt:reason skos:definition "The reason why this resource was modified") (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork") (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry)) - ;; We want to avoid manually generating a unique identifier for each - ;; comment. As such we use a blank node (that has the comment) as - ;; the subject of the triples produced by matching the - ;; predicateObjectList production: - ;; (triples - (format #f "[ rdfs:label '''~a'''@en] " - (field GeneRIF comment)) - (set rdf:type - (let* ((create-time (field - ("CAST(createtime AS CHAR)" EntryCreateTime))) - (pmid (field GeneRIF PubMed_ID PMID)) - (web-url (field GeneRIF weburl)) - (species (string->identifier - "" - (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)) - (version-id (field GeneRIF versionId)) - (identifier (field GeneRIF Id)) - (initial (sanitize-rdf-string (field GeneRIF initial))) - (reason (field GeneRIF reason)) - (email (sanitize-rdf-string (field GeneRIF email))) - (category - (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')" - GeneCategory)))) - (string->symbol - (string-append - (format #f "gnc:GNWikiEntry ;\n") - (if (string? species) - "" - (format #f "\tgnt:belongsToSpecies ~a ;\n" - species)) - (format #f "\tdct:created ~s^^xsd:datetime ;\n" - create-time) - (if (and (string? pmid) (not (string-null? pmid))) - (format #f - "\tdct:references ( ~{pubmed:~a ~}) ;\n" - (string-split pmid #\space)) - "\tdct:references rdf:nil ;\n") - (if (string-blank? email) - "" - (format #f "\tfoaf:mbox <~a> ;\n" email)) - (format #f "\tdct:identifier \"~s\"^^xsd:integer ;\n" identifier) - (if (and (string? web-url) (not (string-null? web-url))) - (format #f "\tfoaf:homepage <~a> ;\n" - web-url) - "") - (format #f "\tdct:hasVersion \"~s\"^^xsd:integer ;\n" version-id) - (if (or (null? initial) - (string-blank? initial)) - "" (format #f "\tgnt:initial ~s ;\n" initial)) - (if (string-blank? reason) - "" - (format #f "\tgnt:reason ~s ;\n" reason)) - (if (string-blank? category) - "\tgnt:belongsToCategory rdf:nil ;\n" - (format #f - "\tgnt:belongsToCategory ( ~{~s ~}) ;\n" - (string-split category #\;))) - ;; We have this symbol at the very end of this transform - ;; because we have a strong guarantee that it will be a - ;; non-null value hence always terminating this triple - ;; properly with a "." - (format - #f "\tgnt:symbol ~a" - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF symbol) - 'pre "_" 'post) - #:proc (lambda (x) x))))))))) + (format + #f "gn:wiki-~a-~a" + (field GeneRIF Id) + (field GeneRIF versionId)) + (set rdfs:label (string->symbol (format #f "'''~a'''@en" (field GeneRIF comment)))) + (set rdf:type 'gnc:GNWikiEntry) + (set gnt:symbol (field GeneRIF symbol)) + (set gnt:belongsToSpecies (string->identifier + "" + (remap-species-identifiers (field Species Fullname)) + #:separator "" + #:proc string-capitalize-first)) + (set dct:created + (string->symbol + (format #f "~s^^xsd:datetime " + (field + ("CAST(createtime AS CHAR)" EntryCreateTime))))) + (multiset dct:references + (map (lambda (pmid) + (if (string-blank? pmid) + "" + (string->symbol + (format #f "pubmed:~a" (string-trim-both pmid))))) + (string-split (field GeneRIF PubMed_ID PMID) + #\space))) + (set foaf:mbox + (let ((mbox (sanitize-rdf-string (field GeneRIF email)))) + (if (string-blank? mbox) + "" + (string->symbol + (format #f "<~a>" mbox))))) + (set dct:identifier (annotate-field (format #f "~s" (field GeneRIF Id)) + '^^xsd:integer)) + (set foaf:homepage + (let ((homepage (sanitize-rdf-string (field GeneRIF weburl)))) + (if (string-blank? homepage) + "" + (string->symbol + (format #f "<~a>" homepage))))) + (set dct:hasVersion (annotate-field (format #f "~s" (field GeneRIF versionId)) + '^^xsd:integer)) + (set gnt:initial (sanitize-rdf-string (field GeneRIF initial))) + (set gnt:reason (field GeneRIF reason)) + (multiset gnt:belongsToCategory + (string-split + (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')" + GeneCategory)) + #\;)))) (define-transformer ncbi-genewiki-entries (tables (GeneRIF_BASIC -- cgit v1.2.3