diff options
author | Munyoki Kilyungi | 2024-10-03 23:00:03 +0300 |
---|---|---|
committer | Munyoki Kilyungi | 2024-10-08 13:26:39 +0300 |
commit | 53ab3481a430de39fa110ad3760660e14c720987 (patch) | |
tree | 3ae90198189bb4aa9ecde554dff440d2714f5bf7 | |
parent | 7460e6733b9bd672befeb254a7c3644a758d9864 (diff) | |
download | gn-transform-databases-53ab3481a430de39fa110ad3760660e14c720987.tar.gz |
Re-model NCBI tranform.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-x | examples/generif.scm | 91 |
1 files changed, 39 insertions, 52 deletions
diff --git a/examples/generif.scm b/examples/generif.scm index 1dfd224..7a60214 100755 --- a/examples/generif.scm +++ b/examples/generif.scm @@ -86,61 +86,48 @@ GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol") (define-transformer ncbi-genewiki-entries (tables (GeneRIF_BASIC - (left-join Species "USING (SpeciesId)")) - "WHERE GeneRIF_BASIC.comment IS NOT NULL AND TRIM(GeneRIF_BASIC.comment) != '' AND TRIM(GeneRIF_BASIC.symbol) != '' GROUP BY GeneRIF_BASIC.comment, GeneRIF_BASIC.createtime, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.SpeciesId, GeneRIF_BASIC.TaxID") + (left-join Species "USING (SpeciesId)"))) (schema-triples (gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) - (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI") - (gnt:hasVersionId a owl:ObjectProperty) - (gnt:hasVersionId rdfs:domain gnc:NCBIWikiEntry) - (gnt:hasVersionId skos:definition "The VersionId of this this resource")) + (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI")) (triples - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF_BASIC symbol GeneRIFSymbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:comment - (let ([ncbi-comment (sanitize-rdf-string (field GeneRIF_BASIC comment))] - [species-name - (string->identifier - "" - (remap-species-identifiers (field Species Fullname SpeciesFullName)) - #:separator "" - #:proc string-capitalize-first)] - [taxonomic-id (field GeneRIF_BASIC TaxID TaxonomicId)] - [create-time (field GeneRIF_BASIC createtime EntryCreateTime)] - [pmid (field GeneRIF_BASIC PubMed_ID PMID)] - [gene-id (field GeneRIF_BASIC GeneId)] - [version-id (field GeneRIF_BASIC VersionId)]) - (string->symbol - (string-append - "[ " - (format #f "rdf:type gnc:NCBIWikiEntry ; ") - (format #f "rdfs:comment ~s^^xsd:string ; " - ncbi-comment) - (format #f "gnt:belongsToSpecies ~a ; " - species-name) - (if (eq? #f taxonomic-id) - "" - (format #f "skos:notation taxon:~a ; " - taxonomic-id)) - (format #f "gnt:hasGeneId generif:~a ; " - gene-id) - (format #f "dct:hasVersion '~a'^^xsd:int ; " - version-id) - (if (and (string? pmid) (not (string-null? pmid))) - (format #f - "~{dct:references pubmed:~a ; ~}" - (string-split pmid #\space)) - "") - (if (string? create-time) - "" - (format #f "dct:created ~s^^xsd:datetime ; " - (time-unix->string - create-time "~5"))) - " ]")))))) + (format + #f "gn:rif-~a-~a-~a-~a" + (field GeneRIF_BASIC GeneId) + (field GeneRIF_BASIC PubMed_ID) + (field + ("DATE_FORMAT(createtime, '%Y-%m-%dT%T')" CreateTime)) + (field GeneRIF_BASIC VersionId)) + (set rdf:type 'gnc:NCBIWikiEntry) + (set gnt:symbol (field GeneRIF_BASIC symbol)) + (set rdfs:label + (let* ((comment + (format #f "'~a'@en" + (replace-substrings + (field GeneRIF_BASIC comment) + '(("\\" . "\\\\") + ("\n" . "\\n") + ("\r" . "\\r") + ("'" . "\\'")))))) + (string->symbol comment))) + (set dct:created + (string->symbol + (format #f "~s^^xsd:datetime " + (field + ("CAST(createtime AS CHAR)" EntryCreateTime))))) + (set gnt:belongsToSpecies (string->identifier + "" + (remap-species-identifiers (field Species Fullname)) + #:separator "" + #:proc string-capitalize-first)) + (set gnt:hasGeneId (string->symbol (format #f "generif:~a" (field GeneRIF_BASIC GeneId)))) + (set skos:notation (match (field GeneRIF_BASIC TaxID TaxonomicId) + ((? number? x) + (string->symbol (format #f "taxon:~a" x))) + (else ""))) + (set dct:hasVersion (annotate-field (format #f "~s" (field GeneRIF_BASIC versionId)) + '^^xsd:integer)) + (set dct:references (string->symbol (format #f "pubmed:~a" (field GeneRIF_BASIC PubMed_ID)))))) |