From 7ba1c667540cd739730574fc46b3ae1ff99daefd Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 22 Nov 2023 18:30:23 +0300 Subject: Construct URIs for external data in RDF ProbeSet transform. Signed-off-by: Munyoki Kilyungi --- examples/probeset.scm | 246 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 246 insertions(+) diff --git a/examples/probeset.scm b/examples/probeset.scm index 78de14a..2516740 100755 --- a/examples/probeset.scm +++ b/examples/probeset.scm @@ -18,6 +18,48 @@ (left-join GeneList_rn33 "ON GeneList.geneSymbol = ProbeSet.Symbol") (left-join Species "ON GeneChip.SpeciesId = Species.Id"))) (schema-triples + (gnc:pantherLink rdf:type gnc:ResourceLink) + (gnc:pantherLink rdfs:label "PANTHER") + (gnc:pantherLink rdfs:comments "Gene and protein data resources from Celera-ABI") + (gnc:gnt:NCBIGeneLink rdfs:Class gnc:ResourceLink) + (gnc:gnt:NCBIGeneLink rdfs:label "Gene") + (gnc:gnt:NCBIGeneLink rdfs:comments "Info from NCBI Entrez Gene") + (gnc:omimLink rdfs:Class gnc:ResourceLink) + (gnc:omimLink rdfs:label "OMIM") + (gnc:omimLink rdfs:comments "Summary from On Mendelion Inheritance in Man") + (gnc:homologeneLink rdfs:Class gnc:ResourceLink) + (gnc:homologeneLink rdfs:label "HomoloGene") + (gnc:homologeneLink rdfs:comments "Find similar genes in other species") + (gnc:uniprotLink rdfs:Class gnc:ResourceLink) + (gnc:uniprotLink rdfs:label "UniProt") + (gnc:uniprotLink rdfs:comments "UniProt") + (gnc:stringLink rdfs:Class gnc:ResourceLink) + (gnc:stringLink rdfs:label "STRING") + (gnc:stringLink rdfs:comments "Protein interactions: known and inferred") + (gnc:gtexLink rdfs:Class gnc:ResourceLink) + (gnc:gtexLink rdfs:label "GTEx Portal") + (gnc:gtexLink rdfs:comments "GTEx Portal") + (gnc:ebiGwasLink rdfs:Class gnc:ResourceLink) + (gnc:ebiGwasLink rdfs:label "EBI GWAS") + (gnc:ebiGwasLink rdfs:comments "EBI GWAS") + (gnc:genemaniaLink rdfs:Class gnc:ResourceLink) + (gnc:genemaniaLink rdfs:label "GeneMANIA") + (gnc:genemaniaLink rdfs:comments "GeneMANIA") + (gnc:gemmaLink rdfs:Class gnc:ResourceLink) + (gnc:gemmaLink rdfs:label "Gemma") + (gnc:gemmaLink rdfs:comments "Meta-analysis of gene expression data") + (gnc:biogpsLink rdfs:Class gnc:ResourceLink) + (gnc:biogpsLink rdfs:label "BioGPS") + (gnc:biogpsLink rdfs:comments "Expression across many tissues and cell types") + (gnc:abaLink rdfs:Class gnc:ResourceLink) + (gnc:abaLink rdfs:label "ABA") + (gnc:abaLink rdfs:comments "Allen Brain Atlas") + (gnc:ucsRefSeqLink rdfs:Class gnc:ResourceLink) + (gnc:ucsRefSeqLink rdfs:label "Info from UCSC Genome Browser") + (gnc:ucsRefSeqLink rdfs:comments "UCSC") + (gnc:proteinAtlasLink rdfs:Class gnc:ResourceLink) + (gnc:proteinAtlasLink rdfs:label "Protein Atlas") + (gnc:proteinAtlasLink rdfs:comments "Human Protein Atlas") (gnt:hasChip a owl:ObjectProperty) (gnt:hasChip rdfs:domain gnc:Probeset) (gnt:hasTargetId a owl:ObjectProperty) @@ -104,6 +146,210 @@ ((string=? "-" strand-probe) " on the minus strand") (else ""))))))) + ;; NCBI Gene Link + (set dct:references + (let ((geneId (field ProbeSet GeneId))) + (if (not (string-blank? geneId)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=" + geneId + "a gnc:NCBIGeneLink")) + ""))) + ;; OMIM Link + (set dct:references + (let ((omim (field ProbeSet OMIM))) + (if (not (string-blank? omim)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "http://www.ncbi.nlm.nih.gov/omim/" + omim + "a gnc:omimLink")) + ""))) + ;; Homologene Link + (set dct:references + (let ((homologene (field ProbeSet HomoloGeneID))) + (if (not (string-blank? homologene)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "http://www.ncbi.nlm.nih.gov/homologene/?term=" + homologene + "a gnc:homologeneLink")) + ""))) + ;; UniProt Link + (set dct:references + (let ((uniprot (field ProbeSet UniProtID))) + (if (not (string-blank? uniprot)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "https://www.uniprot.org/uniprot/" + uniprot + "a gnc:uniprotLink")) + ""))) + ;; STRING Link + (set dct:references + (let ((symbol (field ProbeSet Symbol))) + (if (not (string-blank? symbol)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "http://string-db.org/newstring_cgi/show_network_section.pl?identifier=" + symbol + "a gnc:stringLink")) + ""))) + ;; GTEX link + (set dct:references + (let ((symbol (field ProbeSet Symbol))) + (if (not (string-blank? symbol)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "https://www.gtexportal.org/home/gene/" + (string-trim-both symbol) + "a gnc:gtexLink")) + ""))) + ;; EBI GWAS Link + (set dct:references + (let ((symbol (field ProbeSet Symbol))) + (if (not (string-blank? symbol)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "https://www.ebi.ac.uk/gwas/search?query=" + (string-trim-both symbol) + "a gnc:ebiGwasLink")) + ""))) + ;; Protein Atlas Link + (set dct:references + (let ((symbol (field ProbeSet Symbol))) + (if (not (string-blank? symbol)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "http://www.proteinatlas.org/search/" + (string-trim-both symbol) + "a gnc:proteinAtlasLink")) + ""))) + ;; UCS Link + (set dct:references + (let* ((symbol (field ProbeSet Symbol)) + (species (field Species Name)) + (db (if (string=? species "mouse") + "mm10" "rn7")) + (transcriptId (field ProbeSet RefSeq_TranscriptId)) + (kgId (field GeneList_rn33 kgId)) + (transcriptStart + (if (string=? species "mouse") + (field ("(GeneList.txStart * 1000000)" TranscriptStartMm10)) + (field ("(GeneList_rn33.txStart * 1000000)" TranscriptStartRn7)))) + (chromosome + (if (string=? species "mouse") + (field GeneList Chromosome) + (field GeneList_rn33 Chromosome))) + (transcriptEnd + (if (string=? species "mouse") + (field ("(GeneList.txEnd * 1000000)" TranscriptEndMm10)) + (field ("(GeneList_rn33.txEnd * 1000000)" TranscriptEndRn7)))) + (url (format + #f + "http://genome.cse.ucsc.edu/cgi-bin/hgTracks?db=~a&hgg_gene=" + db))) + (if (and (not (string-blank? symbol)) + (not (string-blank? transcriptId)) + (number? transcriptStart) + (number? transcriptEnd) + (not (string-blank? chromosome)) + (or (string=? species "mouse") + (string=? species "rat"))) + (string->symbol + (format #f + "<~0@*~a~1@*~a&hgg_chrom=chr~2@*~a&hgg_start=~3@*~a&hgg_end=~4@*~a> .~%<~0@*~a~1@*~a&hgg_chrom=chr~2@*~a&hgg_start=~3@*~a&hgg_end=~4@*~a> ~2@*~a" + url + transcriptId + chromosome + transcriptStart + transcriptEnd + "a gnc:ucsRefSeqLink" + )) + ""))) + ;; PANTHER link + (set dct:references + (let ((symbol (field ProbeSet Symbol))) + (if (not (string-blank? symbol)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue=" + (string-trim-both symbol) + "a gnc:PantherLink")) + ""))) + ;; A GeneManiaLink + (set dct:references + (let ((symbol (field ProbeSet GeneId)) + (species (lower-case-and-replace-spaces + (field Species FullName)))) + (if (and (not (string-blank? symbol)) + (not (string-blank? species)) + (or + (string=? species "mus-musculus") + (string=? species "rattus-norvegicus") + (string=? species "homo-sapiens"))) + (string->symbol + (format #f "<~0@*~a/~1@*~a/~2@*~a> .~%<~0@*~a/~1@*~a/~2@*~a> ~3@*~a" + "https://genemania.org/search" species (string-trim-both symbol) + "a gnc:genemaniaLink")) + ""))) + ;; ABA Link + (set dct:references + (let ((symbol (field ProbeSet Symbol)) + (geneId (field ProbeSet GeneId)) + (species (field Species name))) + (if (and (not (string-blank? symbol)) + (not (string-blank? species)) + (or (string=? species "human") + (string=? species "mouse"))) + (string->symbol + (format #f "<~0@*~a> .~%<~0@*~a> ~2@*~a" + "http://mouse.brain-map.org/search/show?search_type=gene&search_term=" + "a " + (if (string=? species "mouse") + (string-trim-both symbol) + geneId))) + ""))) + ;; Gemma Link + (set dct:references + (let ((geneId (field ProbeSet GeneId))) + (if (not (string-blank? geneId)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid=" + geneId + "a gnc:gemmaLink")) + ""))) + ;; BioGPS Link + (set dct:references + (let ((geneId (field ProbeSet GeneId)) + (species (field Species Name))) + (if (and (not (string-blank? geneId)) + (not (string-blank? species)) + (or + (string=? species "mouse") + (string=? species "rat") + (string=? species "humans"))) + (string->symbol + (format #f + "<~0@*~a~1@*~a~2@*~a~3@*~a> .~%<~0@*~a~1@*~a~2@*~a~3@*~a> ~4@*~a ; ~5@*~a" + "http://biogps.org/?org=" + species + "#goto=genereport&id=" + geneId + "a gnc:ResourceLink")) + ""))) (set gnt:strandProbe (field ProbeSet Strand_Probe)) (set gnt:hasSpecificity -- cgit v1.2.3