From cfcfa78e0149c2cc98fb0031707431d2138cbe91 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 13 Dec 2023 17:59:57 +0300 Subject: Link extra resources to genes and trim gene symbols. Signed-off-by: Munyoki Kilyungi --- examples/genelist.scm | 236 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 231 insertions(+), 5 deletions(-) (limited to 'examples') diff --git a/examples/genelist.scm b/examples/genelist.scm index 8097610..0ec9ed1 100755 --- a/examples/genelist.scm +++ b/examples/genelist.scm @@ -24,6 +24,42 @@ (gnc:transcript rdfs:domain gnc:GeneSymbol) (gnt:transcript a owl:ObjectProperty) (gnc:transcript rdfs:comments "The gene transcript of this resource") + (gnc:ebiGwasLink rdfs:Class gnc:ResourceLink) + (gnc:ebiGwasLink rdfs:label "EBI GWAS") + (gnc:ebiGwasLink rdfs:comments "EBI GWAS") + (gnc:biogpsLink rdfs:Class gnc:ResourceLink) + (gnc:biogpsLink rdfs:label "BioGPS Resource Link") + (gnc:biogpsLink rdfs:comments "Expression across many tissues and cell types") + (gnc:proteinAtlasLink rdfs:Class gnc:ResourceLink) + (gnc:proteinAtlasLink rdfs:label "Protein Atlas") + (gnc:proteinAtlasLink rdfs:comments "Human Protein Atlas") + (gnc:genemaniaLink rdfs:Class gnc:ResourceLink) + (gnc:genemaniaLink rdfs:label "GeneMANIA") + (gnc:genemaniaLink rdfs:comments "GeneMANIA") + (gnc:gemmaLink rdfs:Class gnc:ResourceLink) + (gnc:gemmaLink rdfs:label "Gemma") + (gnc:gemmaLink rdfs:comments "Meta-analysis of gene expression data") + (gnc:gtexLink rdfs:Class gnc:ResourceLink) + (gnc:gtexLink rdfs:label "GTEx Portal") + (gnc:gtexLink rdfs:comments "GTEx Portal") + (gnc:biogpsLink rdfs:Class gnc:ResourceLink) + (gnc:biogpsLink rdfs:label "BioGPS") + (gnc:biogpsLink rdfs:comments "Expression across many tissues and cell types") + (gnc:abaLink rdfs:Class gnc:ResourceLink) + (gnc:abaLink rdfs:label "ABA") + (gnc:abaLink rdfs:comments "Allen Brain Atlas") + (gnc:pantherLink rdf:type gnc:ResourceLink) + (gnc:pantherLink rdfs:label "PANTHER") + (gnc:pantherLink rdfs:comments "Gene and protein data resources from Celera-ABI") + (gnc:stringLink rdfs:Class gnc:ResourceLink) + (gnc:stringLink rdfs:label "STRING") + (gnc:stringLink rdfs:comments "Protein interactions: known and inferred") + (gnc:gtexLink rdfs:Class gnc:ResourceLink) + (gnc:gtexLink rdfs:label "GTEx Portal") + (gnc:gtexLink rdfs:comments "GTEx Portal") + (gnc:rgdLink rdfs:Class gnc:ResourceLink) + (gnc:rgdLink rdfs:label "Rat Genome DB") + (gnc:rgdLink rdfs:comments "Rat Genome DB") (gnc:hasKgID rdfs:domain gnc:GeneSymbol) (gnt:hasKgID a owl:ObjectProperty) (gnc:hasKgID rdfs:comments "The kgID of this resource") @@ -42,12 +78,146 @@ (triples (string->identifier "gene" (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneList GeneSymbol) - 'pre "_" 'post)) + (string-trim-both + (field GeneList GeneSymbol)) + 'pre "_" 'post)) (set rdf:type 'gnc:GeneSymbol) (set rdfs:label (field GeneList GeneSymbol)) (set dct:description (sanitize-rdf-string (field GeneList GeneDescription))) (set gnt:gene (ontology 'gene: (field GeneList GeneId))) + (set dct:references + (let ((symbol (string-trim-both + (field GeneList GeneSymbol)))) + (if (not (string-blank? symbol)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "https://www.ebi.ac.uk/gwas/search?query=" + (string-trim-both symbol) + "a gnc:ebiGwasLink")) + ""))) + (set dct:references + (let ((symbol (string-trim-both + (field GeneList GeneSymbol))) + (geneId (field GeneList GeneID)) + (species (field Species Name))) + (if (and (not (string-blank? symbol)) + (not (string-blank? species)) + (or (string=? species "human") + (string=? species "mouse"))) + (string->symbol + (format #f "<~0@*~a> .~%<~0@*~a> ~1@*~a" + "http://mouse.brain-map.org/search/show?search_type=gene&search_term=" + "a gnc:abaLink" + (if (string=? species "mouse") + (string-trim-both symbol) + geneId))) + ""))) + (set dct:references + (let ((symbol (string-trim-both + (field GeneList GeneSymbol))) + (species (field Species Name))) + (if (and (not (string-blank? symbol)) + (not (string-blank? species)) + (or + (string=? species "mouse") + (string=? species "human"))) + (string->symbol + (format #f + "<~0@*~a~1@*~a~2@*~a~3@*~a> .~%<~0@*~a~1@*~a~2@*~a~3@*~a> ~4@*~a" + "https://rgd.mcw.edu/rgdweb/elasticResults.html?term=" + symbol + "&category=Gene&species=" + (string-capitalize species) + "a gnc:rgdLink")) + ""))) + (set dct:references + (let ((geneId (field GeneList GeneID)) + (species (field Species Name))) + (if (and (not (string-blank? geneId)) + (not (string-blank? species)) + (or + (string=? species "mouse") + (string=? species "rat") + (string=? species "human"))) + (string->symbol + (format #f + "<~0@*~a~1@*~a~2@*~a~3@*~a> .~%<~0@*~a~1@*~a~2@*~a~3@*~a> ~4@*~a" + "http://biogps.org/?org=" + species + "#goto=genereport&id=" + geneId + "a gnc:biogpsLink")) + ""))) + (set dct:references + (let ((geneId (field GeneList GeneID))) + (if (not (string-blank? geneId)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid=" + geneId + "a gnc:gemmaLink")) + ""))) + (set dct:references + (let ((symbol (field GeneList GeneID)) + (species (lower-case-and-replace-spaces + (field Species FullName)))) + (if (and (not (string-blank? symbol)) + (not (string-blank? species)) + (or + (string=? species "mus-musculus") + (string=? species "rattus-norvegicus") + (string=? species "homo-sapiens"))) + (string->symbol + (format #f "<~0@*~a/~1@*~a/~2@*~a> .~%<~0@*~a/~1@*~a/~2@*~a> ~3@*~a" + "https://genemania.org/search" species (string-trim-both symbol) + "a gnc:genemaniaLink")) + ""))) + (set dct:references + (let ((symbol (string-trim-both + (field GeneList GeneSymbol)))) + (if (not (string-blank? symbol)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue=" + (string-trim-both symbol) + "a gnc:PantherLink")) + ""))) + (set dct:references + (let ((symbol (string-trim-both + (field GeneList GeneSymbol)))) + (if (not (string-blank? symbol)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "http://string-db.org/newstring_cgi/show_network_section.pl?identifier=" + symbol + "a gnc:stringLink")) + ""))) + (set dct:references + (let ((symbol (string-trim-both + (field GeneList GeneSymbol)))) + (if (not (string-blank? symbol)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "https://www.gtexportal.org/home/gene/" + (string-trim-both symbol) + "a gnc:gtexLink")) + ""))) + (set dct:references + (let ((symbol (string-trim-both + (field GeneList GeneSymbol)))) + (if (not (string-blank? symbol)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "http://www.proteinatlas.org/search/" + (string-trim-both symbol) + "a gnc:proteinAtlasLink")) + ""))) (set gnt:chromosome (field GeneList Chromosome)) (set gnt:TxStart (annotate-field (field GeneList TxStart) @@ -122,10 +292,66 @@ (triples (string->identifier "gene" (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneList_rn33 geneSymbol) - 'pre "_" 'post)) + (string-trim-both + (field GeneList_rn33 geneSymbol)) + 'pre "_" 'post)) + (set dct:references + (let ((symbol (field GeneList_rn33 geneSymbol))) + (if (not (string-blank? symbol)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue=" + (string-trim-both symbol) + "a gnc:PantherLink")) + ""))) + (set dct:references + (let ((symbol (string-trim-both + (field GeneList_rn33 geneSymbol)))) + (if (not (string-blank? symbol)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "https://www.ebi.ac.uk/gwas/search?query=" + (string-trim-both symbol) + "a gnc:ebiGwasLink")) + ""))) + (set dct:references + (let ((symbol (string-trim-both + (field GeneList_rn33 geneSymbol)))) + (if (not (string-blank? symbol)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "http://string-db.org/newstring_cgi/show_network_section.pl?identifier=" + symbol + "a gnc:stringLink")) + ""))) + (set dct:references + (let ((symbol (string-trim-both + (field GeneList_rn33 geneSymbol)))) + (if (not (string-blank? symbol)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "https://www.gtexportal.org/home/gene/" + (string-trim-both symbol) + "a gnc:gtexLink")) + ""))) + (set dct:references + (let ((symbol (string-trim-both + (field GeneList_rn33 geneSymbol)))) + (if (not (string-blank? symbol)) + (string->symbol + (format #f + "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" + "http://www.proteinatlas.org/search/" + (string-trim-both symbol) + "a gnc:proteinAtlasLink")) + ""))) (set rdf:type 'gnc:GeneSymbol) - (set rdfs:label (field GeneList_rn33 geneSymbol)) + (set rdfs:label (string-trim-both + (field GeneList_rn33 geneSymbol))) (set gnt:chromosome (field GeneList_rn33 chromosome)) (set gnt:TxStart (annotate-field (field GeneList_rn33 txStart) -- cgit v1.2.3