aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-12-13 17:59:57 +0300
committerMunyoki Kilyungi2023-12-14 00:55:53 +0300
commitcfcfa78e0149c2cc98fb0031707431d2138cbe91 (patch)
treec028eca56a43ec707b849ee996ebad0843ba29a8 /examples
parent1082c5bfb802995be5e6626f7281555013d824b8 (diff)
downloadgn-transform-databases-cfcfa78e0149c2cc98fb0031707431d2138cbe91.tar.gz
Link extra resources to genes and trim gene symbols.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples')
-rwxr-xr-xexamples/genelist.scm236
1 files changed, 231 insertions, 5 deletions
diff --git a/examples/genelist.scm b/examples/genelist.scm
index 8097610..0ec9ed1 100755
--- a/examples/genelist.scm
+++ b/examples/genelist.scm
@@ -24,6 +24,42 @@
(gnc:transcript rdfs:domain gnc:GeneSymbol)
(gnt:transcript a owl:ObjectProperty)
(gnc:transcript rdfs:comments "The gene transcript of this resource")
+ (gnc:ebiGwasLink rdfs:Class gnc:ResourceLink)
+ (gnc:ebiGwasLink rdfs:label "EBI GWAS")
+ (gnc:ebiGwasLink rdfs:comments "EBI GWAS")
+ (gnc:biogpsLink rdfs:Class gnc:ResourceLink)
+ (gnc:biogpsLink rdfs:label "BioGPS Resource Link")
+ (gnc:biogpsLink rdfs:comments "Expression across many tissues and cell types")
+ (gnc:proteinAtlasLink rdfs:Class gnc:ResourceLink)
+ (gnc:proteinAtlasLink rdfs:label "Protein Atlas")
+ (gnc:proteinAtlasLink rdfs:comments "Human Protein Atlas")
+ (gnc:genemaniaLink rdfs:Class gnc:ResourceLink)
+ (gnc:genemaniaLink rdfs:label "GeneMANIA")
+ (gnc:genemaniaLink rdfs:comments "GeneMANIA")
+ (gnc:gemmaLink rdfs:Class gnc:ResourceLink)
+ (gnc:gemmaLink rdfs:label "Gemma")
+ (gnc:gemmaLink rdfs:comments "Meta-analysis of gene expression data")
+ (gnc:gtexLink rdfs:Class gnc:ResourceLink)
+ (gnc:gtexLink rdfs:label "GTEx Portal")
+ (gnc:gtexLink rdfs:comments "GTEx Portal")
+ (gnc:biogpsLink rdfs:Class gnc:ResourceLink)
+ (gnc:biogpsLink rdfs:label "BioGPS")
+ (gnc:biogpsLink rdfs:comments "Expression across many tissues and cell types")
+ (gnc:abaLink rdfs:Class gnc:ResourceLink)
+ (gnc:abaLink rdfs:label "ABA")
+ (gnc:abaLink rdfs:comments "Allen Brain Atlas")
+ (gnc:pantherLink rdf:type gnc:ResourceLink)
+ (gnc:pantherLink rdfs:label "PANTHER")
+ (gnc:pantherLink rdfs:comments "Gene and protein data resources from Celera-ABI")
+ (gnc:stringLink rdfs:Class gnc:ResourceLink)
+ (gnc:stringLink rdfs:label "STRING")
+ (gnc:stringLink rdfs:comments "Protein interactions: known and inferred")
+ (gnc:gtexLink rdfs:Class gnc:ResourceLink)
+ (gnc:gtexLink rdfs:label "GTEx Portal")
+ (gnc:gtexLink rdfs:comments "GTEx Portal")
+ (gnc:rgdLink rdfs:Class gnc:ResourceLink)
+ (gnc:rgdLink rdfs:label "Rat Genome DB")
+ (gnc:rgdLink rdfs:comments "Rat Genome DB")
(gnc:hasKgID rdfs:domain gnc:GeneSymbol)
(gnt:hasKgID a owl:ObjectProperty)
(gnc:hasKgID rdfs:comments "The kgID of this resource")
@@ -42,12 +78,146 @@
(triples
(string->identifier
"gene" (regexp-substitute/global #f "[^A-Za-z0-9:]"
- (field GeneList GeneSymbol)
- 'pre "_" 'post))
+ (string-trim-both
+ (field GeneList GeneSymbol))
+ 'pre "_" 'post))
(set rdf:type 'gnc:GeneSymbol)
(set rdfs:label (field GeneList GeneSymbol))
(set dct:description (sanitize-rdf-string (field GeneList GeneDescription)))
(set gnt:gene (ontology 'gene: (field GeneList GeneId)))
+ (set dct:references
+ (let ((symbol (string-trim-both
+ (field GeneList GeneSymbol))))
+ (if (not (string-blank? symbol))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "https://www.ebi.ac.uk/gwas/search?query="
+ (string-trim-both symbol)
+ "a gnc:ebiGwasLink"))
+ "")))
+ (set dct:references
+ (let ((symbol (string-trim-both
+ (field GeneList GeneSymbol)))
+ (geneId (field GeneList GeneID))
+ (species (field Species Name)))
+ (if (and (not (string-blank? symbol))
+ (not (string-blank? species))
+ (or (string=? species "human")
+ (string=? species "mouse")))
+ (string->symbol
+ (format #f "<~0@*~a> .~%<~0@*~a> ~1@*~a"
+ "http://mouse.brain-map.org/search/show?search_type=gene&search_term="
+ "a gnc:abaLink"
+ (if (string=? species "mouse")
+ (string-trim-both symbol)
+ geneId)))
+ "")))
+ (set dct:references
+ (let ((symbol (string-trim-both
+ (field GeneList GeneSymbol)))
+ (species (field Species Name)))
+ (if (and (not (string-blank? symbol))
+ (not (string-blank? species))
+ (or
+ (string=? species "mouse")
+ (string=? species "human")))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a~2@*~a~3@*~a> .~%<~0@*~a~1@*~a~2@*~a~3@*~a> ~4@*~a"
+ "https://rgd.mcw.edu/rgdweb/elasticResults.html?term="
+ symbol
+ "&category=Gene&species="
+ (string-capitalize species)
+ "a gnc:rgdLink"))
+ "")))
+ (set dct:references
+ (let ((geneId (field GeneList GeneID))
+ (species (field Species Name)))
+ (if (and (not (string-blank? geneId))
+ (not (string-blank? species))
+ (or
+ (string=? species "mouse")
+ (string=? species "rat")
+ (string=? species "human")))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a~2@*~a~3@*~a> .~%<~0@*~a~1@*~a~2@*~a~3@*~a> ~4@*~a"
+ "http://biogps.org/?org="
+ species
+ "#goto=genereport&id="
+ geneId
+ "a gnc:biogpsLink"))
+ "")))
+ (set dct:references
+ (let ((geneId (field GeneList GeneID)))
+ (if (not (string-blank? geneId))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid="
+ geneId
+ "a gnc:gemmaLink"))
+ "")))
+ (set dct:references
+ (let ((symbol (field GeneList GeneID))
+ (species (lower-case-and-replace-spaces
+ (field Species FullName))))
+ (if (and (not (string-blank? symbol))
+ (not (string-blank? species))
+ (or
+ (string=? species "mus-musculus")
+ (string=? species "rattus-norvegicus")
+ (string=? species "homo-sapiens")))
+ (string->symbol
+ (format #f "<~0@*~a/~1@*~a/~2@*~a> .~%<~0@*~a/~1@*~a/~2@*~a> ~3@*~a"
+ "https://genemania.org/search" species (string-trim-both symbol)
+ "a gnc:genemaniaLink"))
+ "")))
+ (set dct:references
+ (let ((symbol (string-trim-both
+ (field GeneList GeneSymbol))))
+ (if (not (string-blank? symbol))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue="
+ (string-trim-both symbol)
+ "a gnc:PantherLink"))
+ "")))
+ (set dct:references
+ (let ((symbol (string-trim-both
+ (field GeneList GeneSymbol))))
+ (if (not (string-blank? symbol))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "http://string-db.org/newstring_cgi/show_network_section.pl?identifier="
+ symbol
+ "a gnc:stringLink"))
+ "")))
+ (set dct:references
+ (let ((symbol (string-trim-both
+ (field GeneList GeneSymbol))))
+ (if (not (string-blank? symbol))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "https://www.gtexportal.org/home/gene/"
+ (string-trim-both symbol)
+ "a gnc:gtexLink"))
+ "")))
+ (set dct:references
+ (let ((symbol (string-trim-both
+ (field GeneList GeneSymbol))))
+ (if (not (string-blank? symbol))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "http://www.proteinatlas.org/search/"
+ (string-trim-both symbol)
+ "a gnc:proteinAtlasLink"))
+ "")))
(set gnt:chromosome (field GeneList Chromosome))
(set gnt:TxStart (annotate-field
(field GeneList TxStart)
@@ -122,10 +292,66 @@
(triples
(string->identifier
"gene" (regexp-substitute/global #f "[^A-Za-z0-9:]"
- (field GeneList_rn33 geneSymbol)
- 'pre "_" 'post))
+ (string-trim-both
+ (field GeneList_rn33 geneSymbol))
+ 'pre "_" 'post))
+ (set dct:references
+ (let ((symbol (field GeneList_rn33 geneSymbol)))
+ (if (not (string-blank? symbol))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue="
+ (string-trim-both symbol)
+ "a gnc:PantherLink"))
+ "")))
+ (set dct:references
+ (let ((symbol (string-trim-both
+ (field GeneList_rn33 geneSymbol))))
+ (if (not (string-blank? symbol))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "https://www.ebi.ac.uk/gwas/search?query="
+ (string-trim-both symbol)
+ "a gnc:ebiGwasLink"))
+ "")))
+ (set dct:references
+ (let ((symbol (string-trim-both
+ (field GeneList_rn33 geneSymbol))))
+ (if (not (string-blank? symbol))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "http://string-db.org/newstring_cgi/show_network_section.pl?identifier="
+ symbol
+ "a gnc:stringLink"))
+ "")))
+ (set dct:references
+ (let ((symbol (string-trim-both
+ (field GeneList_rn33 geneSymbol))))
+ (if (not (string-blank? symbol))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "https://www.gtexportal.org/home/gene/"
+ (string-trim-both symbol)
+ "a gnc:gtexLink"))
+ "")))
+ (set dct:references
+ (let ((symbol (string-trim-both
+ (field GeneList_rn33 geneSymbol))))
+ (if (not (string-blank? symbol))
+ (string->symbol
+ (format #f
+ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+ "http://www.proteinatlas.org/search/"
+ (string-trim-both symbol)
+ "a gnc:proteinAtlasLink"))
+ "")))
(set rdf:type 'gnc:GeneSymbol)
- (set rdfs:label (field GeneList_rn33 geneSymbol))
+ (set rdfs:label (string-trim-both
+ (field GeneList_rn33 geneSymbol)))
(set gnt:chromosome (field GeneList_rn33 chromosome))
(set gnt:TxStart (annotate-field
(field GeneList_rn33 txStart)