diff options
Diffstat (limited to 'examples/generif.scm')
-rwxr-xr-x | examples/generif.scm | 246 |
1 files changed, 97 insertions, 149 deletions
diff --git a/examples/generif.scm b/examples/generif.scm index 11235e0..628e34e 100755 --- a/examples/generif.scm +++ b/examples/generif.scm @@ -11,45 +11,17 @@ (transform strings) (transform sql) (transform triples) - (transform special-forms) - (transform uuid)) + (transform special-forms)) -(define-transformer genewiki-symbols - (tables (GeneRIF_BASIC) - "GROUP BY BINARY symbol") - (triples - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF_BASIC symbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:label - (field GeneRIF_BASIC symbol)))) - -;; Some symbols exist in the RIF table that don't exist in the GeneRIF -;; table. -(define-transformer generif-symbols - (tables (GeneRIF) - "WHERE symbol NOT IN (SELECT symbol from GeneRIF_BASIC) GROUP BY BINARY symbol") - (triples - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF symbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:label - (field GeneRIF symbol)))) - (define-transformer gn-genewiki-entries (tables (GeneRIF (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId") (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id") (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")) - "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol, GeneRIF.SpeciesId, GeneRIF.createtime, GeneRIF.reason") + "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL +GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol") (schema-triples (gnc:GeneWikiEntry a rdfs:Class) (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) @@ -62,130 +34,108 @@ (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork") (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry)) (triples - (string->identifier - "symbol" - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field GeneRIF symbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:comment - (let* ((generif-comment (sanitize-rdf-string (field GeneRIF comment))) - (create-time (field GeneRIF createtime EntryCreateTime)) - (pmid (field GeneRIF PubMed_ID PMID)) - (web-url (field GeneRIF weburl)) - (species (string->identifier - "" - (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)) - (version-id (field GeneRIF versionId)) - (identifier (field GeneRIF Id)) - (initial (sanitize-rdf-string (field GeneRIF initial))) - (reason (field GeneRIF reason)) - (email (sanitize-rdf-string (field GeneRIF email))) - (category - (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '; ')" - GeneCategory)))) - (string->symbol - (string-append - "[ " - (format #f "rdf:type gnc:GNWikiEntry ; ") - (if (string? species) - "" - (format #f "gnt:belongsToSpecies ~a ; " - species)) - (format #f "rdfs:comment ~s^^xsd:string ; " - generif-comment) - (if (string? create-time) - "" - (format #f "dct:created ~s^^xsd:datetime ; " - (time-unix->string - create-time "~5"))) - (if (and (string? pmid) (not (string-null? pmid))) - (format #f - "~{dct:references pubmed:~a ; ~}" - (string-split pmid #\space)) - "") - (if (string-blank? email) - "" - (format #f "foaf:mbox ~s ; " email)) - (format #f "dct:identifier ~s ; " identifier) - (format #f "dct:hasVersion \"~s\"^^xsd:int ; " version-id) - (if (string-blank? reason) - "" - (format #f "gnt:reason ~s ; " reason)) - (if (or (null? initial) - (string-blank? initial)) - "" (format #f "gnt:initial ~s ; " initial)) - (if (string-blank? category) - "" - (format #f - "gnt:belongsToCategory ~s ; " - category)) - (if (and (string? web-url) (not (string-null? web-url))) - (format #f "foaf:homepage ~s ; " - web-url) - "") - " ] ")))))) + (format + #f "gn:wiki-~a-~a" + (field GeneRIF Id) + (field GeneRIF versionId)) + (set rdfs:label (string->symbol + (format #f "'~a'@en" + (replace-substrings + (sanitize-rdf-string + (field GeneRIF comment)) + '(("'" . "\\'")))))) + (set rdf:type 'gnc:GNWikiEntry) + (set gnt:symbol (field GeneRIF symbol)) + (set gnt:belongsToSpecies (string->identifier + "" + (remap-species-identifiers (field Species Fullname)) + #:separator "" + #:proc string-capitalize-first)) + (set dct:created + (string->symbol + (format #f "~s^^xsd:datetime " + (field + ("CAST(createtime AS CHAR)" EntryCreateTime))))) + (multiset dct:references + (map (lambda (pmid) + (match pmid + ((? string-blank? p) "") + (p (string->symbol + (format #f "pubmed:~a" (string-trim-both pmid)))))) + (string-split (field GeneRIF PubMed_ID PMID) + #\space))) + (set foaf:mbox + (match (sanitize-rdf-string (field GeneRIF email)) + ((? string-blank? mbox) "") + (mbox (string->symbol + (format #f "<~a>" mbox))))) + (set dct:identifier (annotate-field (format #f "~s" (field GeneRIF Id)) + '^^xsd:integer)) + (set foaf:homepage + (match (sanitize-rdf-string (field GeneRIF weburl)) + ((? string-blank? homepage) "") + (homepage (string->symbol + (format #f "<~a>" homepage))))) + (set dct:hasVersion (annotate-field (format #f "~s" (field GeneRIF versionId)) + '^^xsd:integer)) + (set gnt:initial (sanitize-rdf-string (field GeneRIF initial))) + (set gnt:reason (field GeneRIF reason)) + (multiset gnt:belongsToCategory + (string-split + (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')" + GeneCategory)) + #\;)))) (define-transformer ncbi-genewiki-entries (tables (GeneRIF_BASIC - (left-join Species "USING (SpeciesId)")) - "WHERE GeneRIF_BASIC.comment IS NOT NULL AND TRIM(GeneRIF_BASIC.comment) != '' AND TRIM(GeneRIF_BASIC.symbol) != '' GROUP BY GeneRIF_BASIC.comment, GeneRIF_BASIC.createtime, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.SpeciesId, GeneRIF_BASIC.TaxID") + (left-join Species "USING (SpeciesId)"))) (schema-triples (gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) - (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI") - (gnt:hasVersionId a owl:ObjectProperty) - (gnt:hasVersionId rdfs:domain gnc:NCBIWikiEntry) - (gnt:hasVersionId skos:definition "The VersionId of this this resource")) + (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI")) (triples - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF_BASIC symbol GeneRIFSymbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:comment - (let ([ncbi-comment (sanitize-rdf-string (field GeneRIF_BASIC comment))] - [species-name - (string->identifier - "" - (remap-species-identifiers (field Species Fullname SpeciesFullName)) - #:separator "" - #:proc string-capitalize-first)] - [taxonomic-id (field GeneRIF_BASIC TaxID TaxonomicId)] - [create-time (field GeneRIF_BASIC createtime EntryCreateTime)] - [pmid (field GeneRIF_BASIC PubMed_ID PMID)] - [gene-id (field GeneRIF_BASIC GeneId)] - [version-id (field GeneRIF_BASIC VersionId)]) + (format + #f "gn:rif-~a-~a-~a-~a" + (field GeneRIF_BASIC GeneId) + (field GeneRIF_BASIC PubMed_ID) + (field + ("DATE_FORMAT(createtime, '%Y-%m-%dT%T')" CreateTime)) + (field GeneRIF_BASIC VersionId)) + (set rdf:type + (let* ((comment (format #f "'~a'@en" + (replace-substrings + (sanitize-rdf-string + (field GeneRIF_BASIC comment)) + '(("\\" . "\\\\") + ("\n" . "\\n") + ("\r" . "\\r") + ("'" . "\\'"))))) + (create-time (format #f "~s^^xsd:datetime" + (field + ("CAST(createtime AS CHAR)" EntryCreateTime)))) + (symbol (field GeneRIF_BASIC symbol)) + (species (string->identifier + "" + (remap-species-identifiers (field Species Fullname)) + #:separator "" + #:proc string-capitalize-first)) + (gene-id (field GeneRIF_BASIC GeneId)) + (taxon-id (field GeneRIF_BASIC TaxID TaxonomicId)) + (pmid (field GeneRIF_BASIC PubMed_ID)) + (version-id (field GeneRIF_BASIC versionId))) (string->symbol (string-append - "[ " - (format #f "rdf:type gnc:NCBIWikiEntry ; ") - (format #f "rdfs:comment ~s^^xsd:string ; " - ncbi-comment) - (format #f "gnt:belongsToSpecies ~a ; " - species-name) - (if (eq? #f taxonomic-id) - "" - (format #f "skos:notation taxon:~a ; " - taxonomic-id)) - (format #f "gnt:hasGeneId generif:~a ; " - gene-id) - (format #f "dct:hasVersion '~a'^^xsd:int ; " - version-id) - (if (and (string? pmid) (not (string-null? pmid))) - (format #f - "~{dct:references pubmed:~a ; ~}" - (string-split pmid #\space)) - "") - (if (string? create-time) - "" - (format #f "dct:created ~s^^xsd:datetime ; " - (time-unix->string - create-time "~5"))) - " ]")))))) + (format #f "gnc:NCBIWikiEntry ;\n") + (format #f "\trdfs:label ~a ;\n" comment) + (format #f "\tgnt:belongsToSpecies ~a ;\n" species) + (format #f "\tgnt:symbol ~s ;\n" symbol) + (format #f "\tgnt:hasGeneId generif:~a ;\n" gene-id) + (match taxon-id + ((? number? x) + (format #f "\tskos:notation taxon:~a ;\n" taxon-id)) + (else "")) + (format #f "\tdct:hasVersion \"~a\"^^xsd:integer ;\n" version-id) + (format #f "\tdct:references pubmed:~a ;\n" pmid) + (format #f "\tdct:created ~a" create-time))))))) @@ -222,8 +172,6 @@ ("owl:" "<http://www.w3.org/2002/07/owl#>"))) (inputs (list - genewiki-symbols - generif-symbols gn-genewiki-entries ncbi-genewiki-entries)) (outputs |