aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2024-09-12 11:21:32 +0300
committerMunyoki Kilyungi2024-09-12 11:21:32 +0300
commit7460e6733b9bd672befeb254a7c3644a758d9864 (patch)
treec10e48e61a4ff7ea6dc492ac9d2bdeba6fea4e8d
parentf883b188b9a75a5f609b973fb5cc76db70304518 (diff)
downloadgn-transform-databases-7460e6733b9bd672befeb254a7c3644a758d9864.tar.gz
Re-introduce unique identifiers for RIF metadata.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xexamples/generif.scm119
1 files changed, 48 insertions, 71 deletions
diff --git a/examples/generif.scm b/examples/generif.scm
index 27fa388..1dfd224 100755
--- a/examples/generif.scm
+++ b/examples/generif.scm
@@ -34,78 +34,55 @@ GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
(gnt:reason skos:definition "The reason why this resource was modified")
(gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork")
(gnt:geneSymbol rdfs:domain gnc:GNWikiEntry))
- ;; We want to avoid manually generating a unique identifier for each
- ;; comment. As such we use a blank node (that has the comment) as
- ;; the subject of the triples produced by matching the
- ;; predicateObjectList production:
- ;; <https://www.w3.org/TR/turtle/#grammar-production-predicateObjectList>
(triples
- (format #f "[ rdfs:label '''~a'''@en] "
- (field GeneRIF comment))
- (set rdf:type
- (let* ((create-time (field
- ("CAST(createtime AS CHAR)" EntryCreateTime)))
- (pmid (field GeneRIF PubMed_ID PMID))
- (web-url (field GeneRIF weburl))
- (species (string->identifier
- ""
- (remap-species-identifiers (field Species Fullname))
- #:separator ""
- #:proc string-capitalize-first))
- (version-id (field GeneRIF versionId))
- (identifier (field GeneRIF Id))
- (initial (sanitize-rdf-string (field GeneRIF initial)))
- (reason (field GeneRIF reason))
- (email (sanitize-rdf-string (field GeneRIF email)))
- (category
- (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')"
- GeneCategory))))
- (string->symbol
- (string-append
- (format #f "gnc:GNWikiEntry ;\n")
- (if (string? species)
- ""
- (format #f "\tgnt:belongsToSpecies ~a ;\n"
- species))
- (format #f "\tdct:created ~s^^xsd:datetime ;\n"
- create-time)
- (if (and (string? pmid) (not (string-null? pmid)))
- (format #f
- "\tdct:references ( ~{pubmed:~a ~}) ;\n"
- (string-split pmid #\space))
- "\tdct:references rdf:nil ;\n")
- (if (string-blank? email)
- ""
- (format #f "\tfoaf:mbox <~a> ;\n" email))
- (format #f "\tdct:identifier \"~s\"^^xsd:integer ;\n" identifier)
- (if (and (string? web-url) (not (string-null? web-url)))
- (format #f "\tfoaf:homepage <~a> ;\n"
- web-url)
- "")
- (format #f "\tdct:hasVersion \"~s\"^^xsd:integer ;\n" version-id)
- (if (or (null? initial)
- (string-blank? initial))
- "" (format #f "\tgnt:initial ~s ;\n" initial))
- (if (string-blank? reason)
- ""
- (format #f "\tgnt:reason ~s ;\n" reason))
- (if (string-blank? category)
- "\tgnt:belongsToCategory rdf:nil ;\n"
- (format #f
- "\tgnt:belongsToCategory ( ~{~s ~}) ;\n"
- (string-split category #\;)))
- ;; We have this symbol at the very end of this transform
- ;; because we have a strong guarantee that it will be a
- ;; non-null value hence always terminating this triple
- ;; properly with a "."
- (format
- #f "\tgnt:symbol ~a"
- (string->identifier
- "symbol"
- (regexp-substitute/global #f "[^A-Za-z0-9:]"
- (field GeneRIF symbol)
- 'pre "_" 'post)
- #:proc (lambda (x) x)))))))))
+ (format
+ #f "gn:wiki-~a-~a"
+ (field GeneRIF Id)
+ (field GeneRIF versionId))
+ (set rdfs:label (string->symbol (format #f "'''~a'''@en" (field GeneRIF comment))))
+ (set rdf:type 'gnc:GNWikiEntry)
+ (set gnt:symbol (field GeneRIF symbol))
+ (set gnt:belongsToSpecies (string->identifier
+ ""
+ (remap-species-identifiers (field Species Fullname))
+ #:separator ""
+ #:proc string-capitalize-first))
+ (set dct:created
+ (string->symbol
+ (format #f "~s^^xsd:datetime "
+ (field
+ ("CAST(createtime AS CHAR)" EntryCreateTime)))))
+ (multiset dct:references
+ (map (lambda (pmid)
+ (if (string-blank? pmid)
+ ""
+ (string->symbol
+ (format #f "pubmed:~a" (string-trim-both pmid)))))
+ (string-split (field GeneRIF PubMed_ID PMID)
+ #\space)))
+ (set foaf:mbox
+ (let ((mbox (sanitize-rdf-string (field GeneRIF email))))
+ (if (string-blank? mbox)
+ ""
+ (string->symbol
+ (format #f "<~a>" mbox)))))
+ (set dct:identifier (annotate-field (format #f "~s" (field GeneRIF Id))
+ '^^xsd:integer))
+ (set foaf:homepage
+ (let ((homepage (sanitize-rdf-string (field GeneRIF weburl))))
+ (if (string-blank? homepage)
+ ""
+ (string->symbol
+ (format #f "<~a>" homepage)))))
+ (set dct:hasVersion (annotate-field (format #f "~s" (field GeneRIF versionId))
+ '^^xsd:integer))
+ (set gnt:initial (sanitize-rdf-string (field GeneRIF initial)))
+ (set gnt:reason (field GeneRIF reason))
+ (multiset gnt:belongsToCategory
+ (string-split
+ (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')"
+ GeneCategory))
+ #\;))))
(define-transformer ncbi-genewiki-entries
(tables (GeneRIF_BASIC