aboutsummaryrefslogtreecommitdiff
path: root/examples/generif.scm
diff options
context:
space:
mode:
authorMunyoki Kilyungi2024-09-06 23:55:36 +0300
committerMunyoki Kilyungi2024-09-06 23:55:36 +0300
commit685b9a6bfc273c8060a1d0d0daae8cc4e294c75b (patch)
tree44a0648345281fec0caf8fb7abfc2e616e4ace74 /examples/generif.scm
parent397745b554e03fa2df0784c0c48ac43d01428980 (diff)
downloadgn-transform-databases-685b9a6bfc273c8060a1d0d0daae8cc4e294c75b.tar.gz
Use predicateObject Lists with a blank-node to model GeneRIF.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples/generif.scm')
-rwxr-xr-xexamples/generif.scm120
1 files changed, 72 insertions, 48 deletions
diff --git a/examples/generif.scm b/examples/generif.scm
index 894b766..8dcc201 100755
--- a/examples/generif.scm
+++ b/examples/generif.scm
@@ -50,8 +50,7 @@
(left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id")
(left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id"))
"WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL
-GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol, GeneRIF.SpeciesId,
-GeneRIF.createtime, GeneRIF.reason")
+GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
(schema-triples
(gnc:GeneWikiEntry a rdfs:Class)
(gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
@@ -63,53 +62,78 @@ GeneRIF.createtime, GeneRIF.reason")
(gnt:reason skos:definition "The reason why this resource was modified")
(gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork")
(gnt:geneSymbol rdfs:domain gnc:GNWikiEntry))
- ;; Here we use the Id and VersionId to uniquely identify comments.
- ;; We could use blank-nodes here; however, querying blank nodes
- ;; E.g. getting the latest versionId is very complicated. Prefer
- ;; normal triplets over blank-nodes.
+ ;; We want to avoid manually generating a unique identifier for each
+ ;; comment. As such we use a blank node (that has the comment) as
+ ;; the subject of the triples produced by matching the
+ ;; predicateObjectList production:
+ ;; <https://www.w3.org/TR/turtle/#grammar-production-predicateObjectList>
(triples
- (format #f "gn:wiki-~a-~a"
- (field GeneRIF Id)
- (field GeneRIF versionId))
- (set rdfs:comment (sanitize-rdf-string (field GeneRIF comment)))
- (set rdf:type 'gnc:GNWikiEntry)
- (set gnt:symbol
- (string->identifier
- "symbol"
- (regexp-substitute/global #f "[^A-Za-z0-9:]"
- (field GeneRIF symbol)
- 'pre "_" 'post)
- #:proc (lambda (x) x)))
- (set dct:created
- (let ((create-time (field GeneRIF createtime EntryCreateTime)))
- (if (string? create-time)
- ""
- (annotate-field
- (time-unix->string
- create-time
- "~5")
- '^^xsd:datetime))))
- (multiset dct:references
- (string-split (field GeneRIF PubMed_ID PMID)
- #\space))
- (set foaf:homepage (field GeneRIF weburl))
- (set gnt:belongsToSpecies (string->identifier
- ""
- (remap-species-identifiers (field Species Fullname))
- #:separator ""
- #:proc string-capitalize-first))
- (set dct:hasVersion (annotate-field (format #f "~s" (field GeneRIF versionId))
- '^^xsd:int))
- (set dct:identifier (annotate-field (format #f "~s" (field GeneRIF Id))
- '^^xsd:int))
- (set gnt:initial (sanitize-rdf-string (field GeneRIF initial)))
- (set gnt:reason (field GeneRIF reason))
- (set foaf:mbox (sanitize-rdf-string (field GeneRIF email)))
- (multiset gnt:belongsToCategory
- (string-split
- (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')"
- GeneCategory))
- #\;))))
+ (format #f "[ rdfs:comment '''~a'''@en] "
+ (field GeneRIF comment))
+ (set rdf:type
+ (let* ((create-time (field
+ ("CAST(createtime AS CHAR)" EntryCreateTime)))
+ (pmid (field GeneRIF PubMed_ID PMID))
+ (web-url (field GeneRIF weburl))
+ (species (string->identifier
+ ""
+ (remap-species-identifiers (field Species Fullname))
+ #:separator ""
+ #:proc string-capitalize-first))
+ (version-id (field GeneRIF versionId))
+ (identifier (field GeneRIF Id))
+ (initial (sanitize-rdf-string (field GeneRIF initial)))
+ (reason (field GeneRIF reason))
+ (email (sanitize-rdf-string (field GeneRIF email)))
+ (category
+ (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')"
+ GeneCategory))))
+ (string->symbol
+ (string-append
+ (format #f "gnc:GNWikiEntry ;\n")
+ (if (string? species)
+ ""
+ (format #f "\tgnt:belongsToSpecies ~a ;\n"
+ species))
+ (format #f "\tdct:created ~s^^xsd:datetime ;\n"
+ create-time)
+ (if (and (string? pmid) (not (string-null? pmid)))
+ (format #f
+ "\tdct:references ( ~{pubmed:~a ~}) ;\n"
+ (string-split pmid #\space))
+ "\tdct:references rdf:nil ;\n")
+ (if (string-blank? email)
+ ""
+ (format #f "\tfoaf:mbox <~a> ;\n" email))
+ (format #f "\tdct:identifier \"~s\"^^xsd:integer ;\n" identifier)
+ (if (and (string? web-url) (not (string-null? web-url)))
+ (format #f "\tfoaf:homepage <~a> ;\n"
+ web-url)
+ "")
+ (format #f "\tdct:hasVersion \"~s\"^^xsd:integer ;\n" version-id)
+ (if (or (null? initial)
+ (string-blank? initial))
+ "" (format #f "\tgnt:initial ~s ;\n" initial))
+ (if (string-blank? reason)
+ ""
+ (format #f "\tgnt:reason ~s ;\n" reason))
+ (if (string-blank? category)
+ "\tgnt:belongsToCategory rdf:nil ;\n"
+ (format #f
+ "\tgnt:belongsToCategory ( ~{~s ~}) ;\n"
+ (string-split category #\;)))
+ ;; We have this symbol at the very end of this transform
+ ;; because we have a strong guarantee that it will be a
+ ;; non-null value hence always terminating this triple
+ ;; properly with a "."
+ (format
+ #f "\tgnt:symbol ~a"
+ (string->identifier
+ "symbol"
+ (regexp-substitute/global #f "[^A-Za-z0-9:]"
+ (field GeneRIF symbol)
+ 'pre "_" 'post)
+ #:proc (lambda (x) x)))))))))
(define-transformer ncbi-genewiki-entries
(tables (GeneRIF_BASIC