about summary refs log tree commit diff
path: root/examples
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-12-13 17:59:57 +0300
committerMunyoki Kilyungi2023-12-14 00:55:53 +0300
commitcfcfa78e0149c2cc98fb0031707431d2138cbe91 (patch)
treec028eca56a43ec707b849ee996ebad0843ba29a8 /examples
parent1082c5bfb802995be5e6626f7281555013d824b8 (diff)
downloadgn-transform-databases-cfcfa78e0149c2cc98fb0031707431d2138cbe91.tar.gz
Link extra resources to genes and trim gene symbols.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples')
-rwxr-xr-xexamples/genelist.scm236
1 files changed, 231 insertions, 5 deletions
diff --git a/examples/genelist.scm b/examples/genelist.scm
index 8097610..0ec9ed1 100755
--- a/examples/genelist.scm
+++ b/examples/genelist.scm
@@ -24,6 +24,42 @@
    (gnc:transcript rdfs:domain gnc:GeneSymbol)
    (gnt:transcript a owl:ObjectProperty)
    (gnc:transcript rdfs:comments "The gene transcript of this resource")
+   (gnc:ebiGwasLink rdfs:Class gnc:ResourceLink)
+   (gnc:ebiGwasLink rdfs:label "EBI GWAS")
+   (gnc:ebiGwasLink rdfs:comments "EBI GWAS")
+   (gnc:biogpsLink rdfs:Class gnc:ResourceLink)
+   (gnc:biogpsLink rdfs:label "BioGPS Resource Link")
+   (gnc:biogpsLink rdfs:comments "Expression across many tissues and cell types")
+   (gnc:proteinAtlasLink rdfs:Class gnc:ResourceLink)
+   (gnc:proteinAtlasLink rdfs:label "Protein Atlas")
+   (gnc:proteinAtlasLink rdfs:comments "Human Protein Atlas")
+   (gnc:genemaniaLink rdfs:Class gnc:ResourceLink)
+   (gnc:genemaniaLink rdfs:label "GeneMANIA")
+   (gnc:genemaniaLink rdfs:comments "GeneMANIA")
+   (gnc:gemmaLink rdfs:Class gnc:ResourceLink)
+   (gnc:gemmaLink rdfs:label "Gemma")
+   (gnc:gemmaLink rdfs:comments "Meta-analysis of gene expression data")
+   (gnc:gtexLink rdfs:Class gnc:ResourceLink)
+   (gnc:gtexLink rdfs:label "GTEx Portal")
+   (gnc:gtexLink rdfs:comments "GTEx Portal")
+   (gnc:biogpsLink rdfs:Class gnc:ResourceLink)
+   (gnc:biogpsLink rdfs:label "BioGPS")
+   (gnc:biogpsLink rdfs:comments "Expression across many tissues and cell types")
+   (gnc:abaLink rdfs:Class gnc:ResourceLink)
+   (gnc:abaLink rdfs:label "ABA")
+   (gnc:abaLink rdfs:comments "Allen Brain Atlas")
+   (gnc:pantherLink rdf:type gnc:ResourceLink)
+   (gnc:pantherLink rdfs:label "PANTHER")
+   (gnc:pantherLink rdfs:comments "Gene and protein data resources from Celera-ABI")
+   (gnc:stringLink rdfs:Class gnc:ResourceLink)
+   (gnc:stringLink rdfs:label "STRING")
+   (gnc:stringLink rdfs:comments "Protein interactions: known and inferred")
+   (gnc:gtexLink rdfs:Class gnc:ResourceLink)
+   (gnc:gtexLink rdfs:label "GTEx Portal")
+   (gnc:gtexLink rdfs:comments "GTEx Portal")
+   (gnc:rgdLink rdfs:Class gnc:ResourceLink)
+   (gnc:rgdLink rdfs:label "Rat Genome DB")
+   (gnc:rgdLink rdfs:comments "Rat Genome DB")
    (gnc:hasKgID rdfs:domain gnc:GeneSymbol)
    (gnt:hasKgID a owl:ObjectProperty)
    (gnc:hasKgID rdfs:comments "The kgID of this resource")
@@ -42,12 +78,146 @@
   (triples
       (string->identifier
        "gene" (regexp-substitute/global #f "[^A-Za-z0-9:]"
-                                    (field GeneList GeneSymbol)
-                                    'pre "_" 'post))
+                                        (string-trim-both
+                                         (field GeneList GeneSymbol))
+                                        'pre "_" 'post))
     (set rdf:type 'gnc:GeneSymbol)
     (set rdfs:label (field GeneList GeneSymbol))
     (set dct:description (sanitize-rdf-string (field GeneList GeneDescription)))
     (set gnt:gene (ontology 'gene: (field GeneList GeneId)))
+    (set dct:references
+         (let ((symbol (string-trim-both
+                        (field GeneList GeneSymbol))))
+           (if (not (string-blank? symbol))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "https://www.ebi.ac.uk/gwas/search?query="
+                        (string-trim-both symbol)
+                        "a gnc:ebiGwasLink"))
+               "")))
+    (set dct:references
+         (let ((symbol (string-trim-both
+                        (field GeneList GeneSymbol)))
+               (geneId (field GeneList GeneID))
+               (species (field Species Name)))
+           (if (and (not (string-blank? symbol))
+                    (not (string-blank? species))
+                    (or (string=? species "human")
+                        (string=? species "mouse")))
+               (string->symbol
+                (format #f "<~0@*~a> .~%<~0@*~a> ~1@*~a"
+                        "http://mouse.brain-map.org/search/show?search_type=gene&search_term="
+                        "a gnc:abaLink"
+                        (if (string=? species "mouse")
+                            (string-trim-both symbol)
+                            geneId)))
+               "")))
+    (set dct:references
+         (let ((symbol (string-trim-both
+                        (field GeneList GeneSymbol)))
+               (species (field Species Name)))
+           (if (and (not (string-blank? symbol))
+                    (not (string-blank? species))
+                    (or
+                     (string=? species "mouse")
+                     (string=? species "human")))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a~2@*~a~3@*~a> .~%<~0@*~a~1@*~a~2@*~a~3@*~a> ~4@*~a"
+                        "https://rgd.mcw.edu/rgdweb/elasticResults.html?term="
+                        symbol
+                        "&category=Gene&species="
+                        (string-capitalize species)
+                        "a gnc:rgdLink"))
+               "")))
+    (set dct:references
+         (let ((geneId (field GeneList GeneID))
+               (species (field Species Name)))
+           (if (and (not (string-blank? geneId))
+                    (not (string-blank? species))
+                    (or
+                     (string=? species "mouse")
+                     (string=? species "rat")
+                     (string=? species "human")))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a~2@*~a~3@*~a> .~%<~0@*~a~1@*~a~2@*~a~3@*~a> ~4@*~a"
+                        "http://biogps.org/?org="
+                        species
+                        "#goto=genereport&id="
+                        geneId
+                        "a gnc:biogpsLink"))
+               "")))
+    (set dct:references
+         (let ((geneId (field GeneList GeneID)))
+           (if (not (string-blank? geneId))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid="
+                        geneId
+                        "a gnc:gemmaLink"))
+               "")))
+    (set dct:references
+         (let ((symbol (field GeneList GeneID))
+               (species (lower-case-and-replace-spaces
+                         (field Species FullName))))
+           (if (and (not (string-blank? symbol))
+                    (not (string-blank? species))
+                    (or
+                     (string=? species "mus-musculus")
+                     (string=? species "rattus-norvegicus")
+                     (string=? species "homo-sapiens")))
+               (string->symbol
+                (format #f "<~0@*~a/~1@*~a/~2@*~a> .~%<~0@*~a/~1@*~a/~2@*~a> ~3@*~a"
+                        "https://genemania.org/search" species (string-trim-both symbol)
+                        "a gnc:genemaniaLink"))
+               "")))
+    (set dct:references
+         (let ((symbol (string-trim-both
+                        (field GeneList GeneSymbol))))
+           (if (not (string-blank? symbol))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue="
+                        (string-trim-both symbol)
+                        "a gnc:PantherLink"))
+               "")))
+    (set dct:references
+         (let ((symbol (string-trim-both
+                        (field GeneList GeneSymbol))))
+           (if (not (string-blank? symbol))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "http://string-db.org/newstring_cgi/show_network_section.pl?identifier="
+                        symbol
+                        "a gnc:stringLink"))
+               "")))
+    (set dct:references
+         (let ((symbol (string-trim-both
+                        (field GeneList GeneSymbol))))
+           (if (not (string-blank? symbol))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "https://www.gtexportal.org/home/gene/"
+                        (string-trim-both symbol)
+                        "a gnc:gtexLink"))
+               "")))
+    (set dct:references
+         (let ((symbol (string-trim-both
+                        (field GeneList GeneSymbol))))
+           (if (not (string-blank? symbol))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "http://www.proteinatlas.org/search/"
+                        (string-trim-both symbol)
+                        "a gnc:proteinAtlasLink"))
+               "")))
     (set gnt:chromosome (field GeneList Chromosome))
     (set gnt:TxStart (annotate-field
                       (field GeneList TxStart)
@@ -122,10 +292,66 @@
   (triples
       (string->identifier
        "gene" (regexp-substitute/global #f "[^A-Za-z0-9:]"
-                                    (field GeneList_rn33 geneSymbol)
-                                    'pre "_" 'post))
+                                        (string-trim-both
+                                         (field GeneList_rn33 geneSymbol))
+                                        'pre "_" 'post))
+    (set dct:references
+         (let ((symbol (field GeneList_rn33 geneSymbol)))
+           (if (not (string-blank? symbol))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue="
+                        (string-trim-both symbol)
+                        "a gnc:PantherLink"))
+               "")))
+    (set dct:references
+         (let ((symbol (string-trim-both
+                        (field GeneList_rn33 geneSymbol))))
+           (if (not (string-blank? symbol))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "https://www.ebi.ac.uk/gwas/search?query="
+                        (string-trim-both symbol)
+                        "a gnc:ebiGwasLink"))
+               "")))
+    (set dct:references
+         (let ((symbol (string-trim-both
+                        (field GeneList_rn33 geneSymbol))))
+           (if (not (string-blank? symbol))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "http://string-db.org/newstring_cgi/show_network_section.pl?identifier="
+                        symbol
+                        "a gnc:stringLink"))
+               "")))
+    (set dct:references
+         (let ((symbol (string-trim-both
+                        (field GeneList_rn33 geneSymbol))))
+           (if (not (string-blank? symbol))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "https://www.gtexportal.org/home/gene/"
+                        (string-trim-both symbol)
+                        "a gnc:gtexLink"))
+               "")))
+    (set dct:references
+         (let ((symbol (string-trim-both
+                        (field GeneList_rn33 geneSymbol))))
+           (if (not (string-blank? symbol))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "http://www.proteinatlas.org/search/"
+                        (string-trim-both symbol)
+                        "a gnc:proteinAtlasLink"))
+               "")))
     (set rdf:type 'gnc:GeneSymbol)
-    (set rdfs:label (field GeneList_rn33 geneSymbol))
+    (set rdfs:label (string-trim-both
+                     (field GeneList_rn33 geneSymbol)))
     (set gnt:chromosome (field GeneList_rn33 chromosome))
     (set gnt:TxStart (annotate-field
                       (field GeneList_rn33 txStart)