diff options
author | Munyoki Kilyungi | 2024-12-09 11:05:24 +0300 |
---|---|---|
committer | Munyoki Kilyungi | 2024-12-09 12:10:25 +0300 |
commit | d3827cf3f82996e35c1c28e35813f9521f2257d7 (patch) | |
tree | a4c38989551e96bcb04ebe30914df8d38072861a /examples | |
parent | 3cd4fcb1c69bde2295f74af86e9eaa5f0343c48f (diff) | |
download | gn-transform-databases-d3827cf3f82996e35c1c28e35813f9521f2257d7.tar.gz |
Add old generif file.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples')
-rwxr-xr-x | examples/generif-old.scm | 230 |
1 files changed, 230 insertions, 0 deletions
diff --git a/examples/generif-old.scm b/examples/generif-old.scm new file mode 100755 index 0000000..ba6768d --- /dev/null +++ b/examples/generif-old.scm @@ -0,0 +1,230 @@ +#! /usr/bin/env guile +!# + +(use-modules (srfi srfi-1) + (srfi srfi-26) + (rnrs bytevectors) + (ice-9 format) + (ice-9 getopt-long) + (ice-9 match) + (ice-9 regex) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) + + + +(define-transformer genewiki-symbols + (tables (GeneRIF_BASIC) + "GROUP BY BINARY symbol") + (triples + (string->identifier + "symbol" + (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field GeneRIF_BASIC symbol) + 'pre "_" 'post) + #:proc (lambda (x) x)) + (set rdfs:label + (field GeneRIF_BASIC symbol)))) + +;; Some symbols exist in the RIF table that don't exist in the GeneRIF +;; table. +(define-transformer generif-symbols + (tables (GeneRIF) + "WHERE symbol NOT IN (SELECT symbol from GeneRIF_BASIC) GROUP BY BINARY symbol") + (triples + (string->identifier + "symbol" + (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field GeneRIF symbol) + 'pre "_" 'post) + #:proc (lambda (x) x)) + (set rdfs:label + (field GeneRIF symbol)))) + +(define-transformer gn-genewiki-entries + (tables (GeneRIF + (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId") + (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id") + (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")) + "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol, GeneRIF.SpeciesId, GeneRIF.createtime, GeneRIF.reason") + (schema-triples + (gnc:GeneWikiEntry a rdfs:Class) + (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) + (gnt:initial a owl:ObjectProperty) + (gnt:initial rdfs:domain gnc:GeneWikiEntry) + (gnt:initial skos:definition "Optional user or project code or your initials") + (gnt:reason a owl:ObjectProperty) + (gnt:reason rdfs:domain gnc:GeneWikiEntry) + (gnt:reason skos:definition "The reason why this resource was modified") + (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork") + (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry)) + (triples + (string->identifier + "symbol" + (regexp-substitute/global + #f "[^A-Za-z0-9:]" + (field GeneRIF symbol) + 'pre "_" 'post) + #:proc (lambda (x) x)) + (set rdfs:comment + (let* ((generif-comment (sanitize-rdf-string (field GeneRIF comment))) + (create-time (field GeneRIF createtime EntryCreateTime)) + (pmid (field GeneRIF PubMed_ID PMID)) + (web-url (field GeneRIF weburl)) + (species (string->identifier + "" + (remap-species-identifiers (field Species Fullname)) + #:separator "" + #:proc string-capitalize-first)) + (version-id (field GeneRIF versionId)) + (identifier (field GeneRIF Id)) + (initial (sanitize-rdf-string (field GeneRIF initial))) + (reason (field GeneRIF reason)) + (email (sanitize-rdf-string (field GeneRIF email))) + (category + (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '; ')" + GeneCategory)))) + (string->symbol + (string-append + "[ " + (format #f "rdf:type gnc:GNWikiEntry ; ") + (if (string? species) + "" + (format #f "gnt:belongsToSpecies ~a ; " + species)) + (format #f "rdfs:comment ~s^^xsd:string ; " + generif-comment) + (if (string? create-time) + "" + (format #f "dct:created ~s^^xsd:datetime ; " + (time-unix->string + create-time "~5"))) + (if (and (string? pmid) (not (string-null? pmid))) + (format #f + "~{dct:references pubmed:~a ; ~}" + (string-split pmid #\space)) + "") + (if (string-blank? email) + "" + (format #f "foaf:mbox ~s ; " email)) + (format #f "dct:identifier ~s ; " identifier) + (format #f "dct:hasVersion \"~s\"^^xsd:int ; " version-id) + (if (string-blank? reason) + "" + (format #f "gnt:reason ~s ; " reason)) + (if (or (null? initial) + (string-blank? initial)) + "" (format #f "gnt:initial ~s ; " initial)) + (if (string-blank? category) + "" + (format #f + "gnt:belongsToCategory ~s ; " + category)) + (if (and (string? web-url) (not (string-null? web-url))) + (format #f "foaf:homepage ~s ; " + web-url) + "") + " ] ")))))) + +(define-transformer ncbi-genewiki-entries + (tables (GeneRIF_BASIC + (left-join Species "USING (SpeciesId)")) + "WHERE GeneRIF_BASIC.comment IS NOT NULL AND TRIM(GeneRIF_BASIC.comment) != '' AND TRIM(GeneRIF_BASIC.symbol) != '' GROUP BY GeneRIF_BASIC.comment, GeneRIF_BASIC.createtime, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.SpeciesId, GeneRIF_BASIC.TaxID") + (schema-triples + (gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) + (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI") + (gnt:hasVersionId a owl:ObjectProperty) + (gnt:hasVersionId rdfs:domain gnc:NCBIWikiEntry) + (gnt:hasVersionId skos:definition "The VersionId of this this resource")) + (triples + (string->identifier + "symbol" + (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field GeneRIF_BASIC symbol GeneRIFSymbol) + 'pre "_" 'post) + #:proc (lambda (x) x)) + (set rdfs:comment + (let ([ncbi-comment (sanitize-rdf-string (field GeneRIF_BASIC comment))] + [species-name + (string->identifier + "" + (remap-species-identifiers (field Species Fullname SpeciesFullName)) + #:separator "" + #:proc string-capitalize-first)] + [taxonomic-id (field GeneRIF_BASIC TaxID TaxonomicId)] + [create-time (field GeneRIF_BASIC createtime EntryCreateTime)] + [pmid (field GeneRIF_BASIC PubMed_ID PMID)] + [gene-id (field GeneRIF_BASIC GeneId)] + [version-id (field GeneRIF_BASIC VersionId)]) + (string->symbol + (string-append + "[ " + (format #f "rdf:type gnc:NCBIWikiEntry ; ") + (format #f "rdfs:comment ~s^^xsd:string ; " + ncbi-comment) + (format #f "gnt:belongsToSpecies ~a ; " + species-name) + (if (eq? #f taxonomic-id) + "" + (format #f "skos:notation taxon:~a ; " + taxonomic-id)) + (format #f "gnt:hasGeneId generif:~a ; " + gene-id) + (format #f "dct:hasVersion '~a'^^xsd:int ; " + version-id) + (if (and (string? pmid) (not (string-null? pmid))) + (format #f + "~{dct:references pubmed:~a ; ~}" + (string-split pmid #\space)) + "") + (if (string? create-time) + "" + (format #f "dct:created ~s^^xsd:datetime ; " + (time-unix->string + create-time "~5"))) + " ]")))))) + + + +(let* ((option-spec + '((settings (single-char #\s) (value #t)) + (output (single-char #\o) (value #t)) + (documentation (single-char #\d) (value #t)))) + (options (getopt-long (command-line) option-spec)) + (settings (option-ref options 'settings #f)) + (output (option-ref options 'output #f)) + (documentation (option-ref options 'documentation #f)) + (%connection-settings + (call-with-input-file settings + read))) + + (with-documentation + (name "GeneRIF Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") + ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") + ("skos:" "<http://www.w3.org/2004/02/skos/core#>") + ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>") + ("gn:" "<http://genenetwork.org/id/>") + ("gnc:" "<http://genenetwork.org/category/>") + ("gnt:" "<http://genenetwork.org/term/>") + ("dct:" "<http://purl.org/dc/terms/>") + ("foaf:" "<http://xmlns.com/foaf/0.1/>") + ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>") + ("taxon:" "<https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=>") + ("generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>") + ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") + ("owl:" "<http://www.w3.org/2002/07/owl#>"))) + (inputs + (list + genewiki-symbols + generif-symbols + gn-genewiki-entries + ncbi-genewiki-entries)) + (outputs + `(#:documentation ,documentation + #:rdf ,output)))) |