aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-11-22 18:30:23 +0300
committerMunyoki Kilyungi2023-11-22 18:30:23 +0300
commit7ba1c667540cd739730574fc46b3ae1ff99daefd (patch)
tree0c11d6deb911b7c282b03e3a40ecb1406eb87e3c /examples
parentcf54f81c7a90533ec0ed8d250a25ebc9fe183ba1 (diff)
downloadgn-transform-databases-7ba1c667540cd739730574fc46b3ae1ff99daefd.tar.gz
Construct URIs for external data in RDF ProbeSet transform.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples')
-rwxr-xr-xexamples/probeset.scm246
1 files changed, 246 insertions, 0 deletions
diff --git a/examples/probeset.scm b/examples/probeset.scm
index 78de14a..2516740 100755
--- a/examples/probeset.scm
+++ b/examples/probeset.scm
@@ -18,6 +18,48 @@
(left-join GeneList_rn33 "ON GeneList.geneSymbol = ProbeSet.Symbol")
(left-join Species "ON GeneChip.SpeciesId = Species.Id")))
(schema-triples
+ (gnc:pantherLink rdf:type gnc:ResourceLink)
+ (gnc:pantherLink rdfs:label "PANTHER")
+ (gnc:pantherLink rdfs:comments "Gene and protein data resources from Celera-ABI")
+ (gnc:gnt:NCBIGeneLink rdfs:Class gnc:ResourceLink)
+ (gnc:gnt:NCBIGeneLink rdfs:label "Gene")
+ (gnc:gnt:NCBIGeneLink rdfs:comments "Info from NCBI Entrez Gene")
+ (gnc:omimLink rdfs:Class gnc:ResourceLink)
+ (gnc:omimLink rdfs:label "OMIM")
+ (gnc:omimLink rdfs:comments "Summary from On Mendelion Inheritance in Man")
+ (gnc:homologeneLink rdfs:Class gnc:ResourceLink)
+ (gnc:homologeneLink rdfs:label "HomoloGene")
+ (gnc:homologeneLink rdfs:comments "Find similar genes in other species")
+ (gnc:uniprotLink rdfs:Class gnc:ResourceLink)
+ (gnc:uniprotLink rdfs:label "UniProt")
+ (gnc:uniprotLink rdfs:comments "UniProt")
+ (gnc:stringLink rdfs:Class gnc:ResourceLink)
+ (gnc:stringLink rdfs:label "STRING")
+ (gnc:stringLink rdfs:comments "Protein interactions: known and inferred")
+ (gnc:gtexLink rdfs:Class gnc:ResourceLink)
+ (gnc:gtexLink rdfs:label "GTEx Portal")
+ (gnc:gtexLink rdfs:comments "GTEx Portal")
+ (gnc:ebiGwasLink rdfs:Class gnc:ResourceLink)
+ (gnc:ebiGwasLink rdfs:label "EBI GWAS")
+ (gnc:ebiGwasLink rdfs:comments "EBI GWAS")
+ (gnc:genemaniaLink rdfs:Class gnc:ResourceLink)
+ (gnc:genemaniaLink rdfs:label "GeneMANIA")
+ (gnc:genemaniaLink rdfs:comments "GeneMANIA")
+ (gnc:gemmaLink rdfs:Class gnc:ResourceLink)
+ (gnc:gemmaLink rdfs:label "Gemma")
+ (gnc:gemmaLink rdfs:comments "Meta-analysis of gene expression data")
+ (gnc:biogpsLink rdfs:Class gnc:ResourceLink)
+ (gnc:biogpsLink rdfs:label "BioGPS")
+ (gnc:biogpsLink rdfs:comments "Expression across many tissues and cell types")
+ (gnc:abaLink rdfs:Class gnc:ResourceLink)
+ (gnc:abaLink rdfs:label "ABA")
+ (gnc:abaLink rdfs:comments "Allen Brain Atlas")
+ (gnc:ucsRefSeqLink rdfs:Class gnc:ResourceLink)
+ (gnc:ucsRefSeqLink rdfs:label "Info from UCSC Genome Browser")
+ (gnc:ucsRefSeqLink rdfs:comments "UCSC")
+ (gnc:proteinAtlasLink rdfs:Class gnc:ResourceLink)
+ (gnc:proteinAtlasLink rdfs:label "Protein Atlas")
+ (gnc:proteinAtlasLink rdfs:comments "Human Protein Atlas")
(gnt:hasChip a owl:ObjectProperty)
(gnt:hasChip rdfs:domain gnc:Probeset)
(gnt:hasTargetId a owl:ObjectProperty)
@@ -104,6 +146,210 @@
((string=? "-" strand-probe)
" on the minus strand")
(else "")))))))
+ ;; NCBI Gene Link
+ (set dct:references
+ (let ((geneId (field ProbeSet GeneId)))
+ (if (not (string-blank? geneId))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids="
+ geneId
+ "a gnc:NCBIGeneLink"))
+ "")))
+ ;; OMIM Link
+ (set dct:references
+ (let ((omim (field ProbeSet OMIM)))
+ (if (not (string-blank? omim))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "http://www.ncbi.nlm.nih.gov/omim/"
+ omim
+ "a gnc:omimLink"))
+ "")))
+ ;; Homologene Link
+ (set dct:references
+ (let ((homologene (field ProbeSet HomoloGeneID)))
+ (if (not (string-blank? homologene))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "http://www.ncbi.nlm.nih.gov/homologene/?term="
+ homologene
+ "a gnc:homologeneLink"))
+ "")))
+ ;; UniProt Link
+ (set dct:references
+ (let ((uniprot (field ProbeSet UniProtID)))
+ (if (not (string-blank? uniprot))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "https://www.uniprot.org/uniprot/"
+ uniprot
+ "a gnc:uniprotLink"))
+ "")))
+ ;; STRING Link
+ (set dct:references
+ (let ((symbol (field ProbeSet Symbol)))
+ (if (not (string-blank? symbol))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "http://string-db.org/newstring_cgi/show_network_section.pl?identifier="
+ symbol
+ "a gnc:stringLink"))
+ "")))
+ ;; GTEX link
+ (set dct:references
+ (let ((symbol (field ProbeSet Symbol)))
+ (if (not (string-blank? symbol))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "https://www.gtexportal.org/home/gene/"
+ (string-trim-both symbol)
+ "a gnc:gtexLink"))
+ "")))
+ ;; EBI GWAS Link
+ (set dct:references
+ (let ((symbol (field ProbeSet Symbol)))
+ (if (not (string-blank? symbol))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "https://www.ebi.ac.uk/gwas/search?query="
+ (string-trim-both symbol)
+ "a gnc:ebiGwasLink"))
+ "")))
+ ;; Protein Atlas Link
+ (set dct:references
+ (let ((symbol (field ProbeSet Symbol)))
+ (if (not (string-blank? symbol))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "http://www.proteinatlas.org/search/"
+ (string-trim-both symbol)
+ "a gnc:proteinAtlasLink"))
+ "")))
+ ;; UCS Link
+ (set dct:references
+ (let* ((symbol (field ProbeSet Symbol))
+ (species (field Species Name))
+ (db (if (string=? species "mouse")
+ "mm10" "rn7"))
+ (transcriptId (field ProbeSet RefSeq_TranscriptId))
+ (kgId (field GeneList_rn33 kgId))
+ (transcriptStart
+ (if (string=? species "mouse")
+ (field ("(GeneList.txStart * 1000000)" TranscriptStartMm10))
+ (field ("(GeneList_rn33.txStart * 1000000)" TranscriptStartRn7))))
+ (chromosome
+ (if (string=? species "mouse")
+ (field GeneList Chromosome)
+ (field GeneList_rn33 Chromosome)))
+ (transcriptEnd
+ (if (string=? species "mouse")
+ (field ("(GeneList.txEnd * 1000000)" TranscriptEndMm10))
+ (field ("(GeneList_rn33.txEnd * 1000000)" TranscriptEndRn7))))
+ (url (format
+ #f
+ "http://genome.cse.ucsc.edu/cgi-bin/hgTracks?db=~a&hgg_gene="
+ db)))
+ (if (and (not (string-blank? symbol))
+ (not (string-blank? transcriptId))
+ (number? transcriptStart)
+ (number? transcriptEnd)
+ (not (string-blank? chromosome))
+ (or (string=? species "mouse")
+ (string=? species "rat")))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a&hgg_chrom=chr~2@*~a&hgg_start=~3@*~a&hgg_end=~4@*~a> .~%<~0@*~a~1@*~a&hgg_chrom=chr~2@*~a&hgg_start=~3@*~a&hgg_end=~4@*~a> ~2@*~a"
+ url
+ transcriptId
+ chromosome
+ transcriptStart
+ transcriptEnd
+ "a gnc:ucsRefSeqLink"
+ ))
+ "")))
+ ;; PANTHER link
+ (set dct:references
+ (let ((symbol (field ProbeSet Symbol)))
+ (if (not (string-blank? symbol))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue="
+ (string-trim-both symbol)
+ "a gnc:PantherLink"))
+ "")))
+ ;; A GeneManiaLink
+ (set dct:references
+ (let ((symbol (field ProbeSet GeneId))
+ (species (lower-case-and-replace-spaces
+ (field Species FullName))))
+ (if (and (not (string-blank? symbol))
+ (not (string-blank? species))
+ (or
+ (string=? species "mus-musculus")
+ (string=? species "rattus-norvegicus")
+ (string=? species "homo-sapiens")))
+ (string->symbol
+ (format #f "<~0@*~a/~1@*~a/~2@*~a> .~%<~0@*~a/~1@*~a/~2@*~a> ~3@*~a"
+ "https://genemania.org/search" species (string-trim-both symbol)
+ "a gnc:genemaniaLink"))
+ "")))
+ ;; ABA Link
+ (set dct:references
+ (let ((symbol (field ProbeSet Symbol))
+ (geneId (field ProbeSet GeneId))
+ (species (field Species name)))
+ (if (and (not (string-blank? symbol))
+ (not (string-blank? species))
+ (or (string=? species "human")
+ (string=? species "mouse")))
+ (string->symbol
+ (format #f "<~0@*~a> .~%<~0@*~a> ~2@*~a"
+ "http://mouse.brain-map.org/search/show?search_type=gene&search_term="
+ "a "
+ (if (string=? species "mouse")
+ (string-trim-both symbol)
+ geneId)))
+ "")))
+ ;; Gemma Link
+ (set dct:references
+ (let ((geneId (field ProbeSet GeneId)))
+ (if (not (string-blank? geneId))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid="
+ geneId
+ "a gnc:gemmaLink"))
+ "")))
+ ;; BioGPS Link
+ (set dct:references
+ (let ((geneId (field ProbeSet GeneId))
+ (species (field Species Name)))
+ (if (and (not (string-blank? geneId))
+ (not (string-blank? species))
+ (or
+ (string=? species "mouse")
+ (string=? species "rat")
+ (string=? species "humans")))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a~2@*~a~3@*~a> .~%<~0@*~a~1@*~a~2@*~a~3@*~a> ~4@*~a ; ~5@*~a"
+ "http://biogps.org/?org="
+ species
+ "#goto=genereport&id="
+ geneId
+ "a gnc:ResourceLink"))
+ "")))
(set gnt:strandProbe
(field ProbeSet Strand_Probe))
(set gnt:hasSpecificity