diff options
Diffstat (limited to 'examples')
-rwxr-xr-x | examples/genelist.scm | 193 |
1 files changed, 193 insertions, 0 deletions
diff --git a/examples/genelist.scm b/examples/genelist.scm new file mode 100755 index 0000000..8097610 --- /dev/null +++ b/examples/genelist.scm @@ -0,0 +1,193 @@ +#! /usr/bin/env guile +!# + +(use-modules (srfi srfi-1) + (srfi srfi-26) + (ice-9 getopt-long) + (ice-9 match) + (ice-9 regex) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) + + +(define-transformer genelist + (tables (GeneList + (left-join Species "USING (SpeciesId)")) + "GROUP BY BINARY GeneSymbol, GeneId, chromosome, txStart, txEnd") + (schema-triples + (gnt:gene rdfs:domain gnc:GeneSymbol) + (gnt:belongsToSpecies rdfs:domain gnc:GeneSymbol) + (gnc:GeneSymbol a rdfs:Class) + (gnc:GeneSymbol rdfs:label "Gene Symbol") + (gnc:transcript rdfs:domain gnc:GeneSymbol) + (gnt:transcript a owl:ObjectProperty) + (gnc:transcript rdfs:comments "The gene transcript of this resource") + (gnc:hasKgID rdfs:domain gnc:GeneSymbol) + (gnt:hasKgID a owl:ObjectProperty) + (gnc:hasKgID rdfs:comments "The kgID of this resource") + (gnc:hasUnigenID rdfs:domain gnc:GeneSymbol) + (gnt:hasUnigenID a owl:ObjectProperty) + (gnc:hasUnigenID rdfs:comments "The UnigenID of this resource") + (gnc:hasProteinID rdfs:domain gnc:GeneSymbol) + (gnt:hasProteinID a owl:ObjectProperty) + (gnc:hasProteinID rdfs:comments "The ProteinID of this resource") + (gnc:hasAlignID rdfs:domain gnc:GeneSymbol) + (gnt:hasAlignID a owl:ObjectProperty) + (gnc:hasAlignID rdfs:comments "The AlignID of this resource") + (gnt:TxEnd rdfs:range xsd:double) + (gnt:TxStart rdfs:range xsd:double) + (gnt:hasTargetSeq rdfs:domain gnc:Probeset)) + (triples + (string->identifier + "gene" (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field GeneList GeneSymbol) + 'pre "_" 'post)) + (set rdf:type 'gnc:GeneSymbol) + (set rdfs:label (field GeneList GeneSymbol)) + (set dct:description (sanitize-rdf-string (field GeneList GeneDescription))) + (set gnt:gene (ontology 'gene: (field GeneList GeneId))) + (set gnt:chromosome (field GeneList Chromosome)) + (set gnt:TxStart (annotate-field + (field GeneList TxStart) + '^^xsd:double)) + (set gnt:TxEnd (annotate-field + (field GeneList TxEnd) + '^^xsd:double)) + (set gnt:Strand (string-trim-both (field GeneList Strand))) + (multiset + gnt:belongsToSpecies + (map + (lambda (species) + (string->identifier + "" + (remap-species-identifiers + (string-trim-both species)) + #:separator "" + #:proc string-capitalize-first)) + (string-split + (sanitize-rdf-string + (field ("GROUP_CONCAT( DISTINCT Species.Name )" SpeciesName))) + #\,))) + (multiset + gnt:transcript + (map + (lambda (transcript) + (ontology 'transcript: + (string-trim-both transcript))) + (string-split + (sanitize-rdf-string + (field ("GROUP_CONCAT( DISTINCT NM_ID )" NMID))) + #\,))) + (multiset + gnt:hasKgID + (map string-trim-both + (string-split + (sanitize-rdf-string + (field ("GROUP_CONCAT( DISTINCT kgID )" kgID))) + #\,))) + (multiset + gnt:hasUnigenID + (map string-trim-both + (string-split + (sanitize-rdf-string + (field ("GROUP_CONCAT( DISTINCT UnigenID )" UnigenID))) + #\,))) + (multiset + gnt:hasProteinID + (map string-trim-both + (string-split + (sanitize-rdf-string + (field ("GROUP_CONCAT( DISTINCT ProteinID )" ProteinID))) + #\,))) + (multiset + gnt:hasAlignID + (map string-trim-both + (string-split + (sanitize-rdf-string + (field ("GROUP_CONCAT( DISTINCT AlignID )" AlignID))) + #\,))) + (multiset + gnt:hasRgdID + (map string-trim-both + (string-split + (sanitize-rdf-string + (field ("GROUP_CONCAT( DISTINCT RGD_ID )" RgdID))) + #\,))))) + +(define-transformer genelist-rn33 + (tables (GeneList_rn33) + "GROUP BY BINARY GeneSymbol, chromosome, txStart, txEnd") + (triples + (string->identifier + "gene" (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field GeneList_rn33 geneSymbol) + 'pre "_" 'post)) + (set rdf:type 'gnc:GeneSymbol) + (set rdfs:label (field GeneList_rn33 geneSymbol)) + (set gnt:chromosome (field GeneList_rn33 chromosome)) + (set gnt:TxStart (annotate-field + (field GeneList_rn33 txStart) + '^^xsd:double)) + (set gnt:TxEnd (annotate-field + (field GeneList_rn33 txEnd) + '^^xsd:double)) + (set gnt:Strand (string-trim-both (field GeneList_rn33 strand))) + (set gnt:belongsToSpecies 'gn:Rattus_norvegicus) + (multiset + gnt:transcript + (map + (lambda (transcript) + (ontology 'transcript: + (string-trim-both transcript))) + (string-split + (sanitize-rdf-string + (field ("GROUP_CONCAT( DISTINCT NM_ID )" NMID))) + #\,))) + (multiset + gnt:hasKgID + (map string-trim-both + (string-split + (sanitize-rdf-string + (field ("GROUP_CONCAT( DISTINCT kgID )" kgID))) + #\,))))) + + + +(let* ((option-spec + '((settings (single-char #\s) (value #t)) + (output (single-char #\o) (value #t)) + (documentation (single-char #\d) (value #t)))) + (options (getopt-long (command-line) option-spec)) + (settings (option-ref options 'settings #f)) + (output (option-ref options 'output #f)) + (documentation (option-ref options 'documentation #f)) + (%connection-settings + (call-with-input-file settings + read))) + (with-documentation + (name "Gene Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("gn:" "<http://genenetwork.org/id/>") + ("probeset:" "<http://genenetwork.org/probeset/>") + ("gnc:" "<http://genenetwork.org/category/>") + ("gnt:" "<http://genenetwork.org/term/>") + ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") + ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") + ("dct:" "<http://purl.org/dc/terms/>") + ("owl:" "<http://www.w3.org/2002/07/owl#>") + ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") + ("qb:" "<http://purl.org/linked-data/cube#>") + ("gene:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>") + ("sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>") + ("transcript:" "<https://portals.broadinstitute.org/gpp/public/trans/details?transName=>") + ("skos:" "<http://www.w3.org/2004/02/skos/core#>"))) + (inputs + (list genelist-rn33 + genelist)) + (outputs + `(#:documentation ,documentation + #:rdf ,output)))) |