about summary refs log tree commit diff
path: root/examples
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-11-22 18:30:23 +0300
committerMunyoki Kilyungi2023-11-22 18:30:23 +0300
commit7ba1c667540cd739730574fc46b3ae1ff99daefd (patch)
tree0c11d6deb911b7c282b03e3a40ecb1406eb87e3c /examples
parentcf54f81c7a90533ec0ed8d250a25ebc9fe183ba1 (diff)
downloadgn-transform-databases-7ba1c667540cd739730574fc46b3ae1ff99daefd.tar.gz
Construct URIs for external data in RDF ProbeSet transform.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples')
-rwxr-xr-xexamples/probeset.scm246
1 files changed, 246 insertions, 0 deletions
diff --git a/examples/probeset.scm b/examples/probeset.scm
index 78de14a..2516740 100755
--- a/examples/probeset.scm
+++ b/examples/probeset.scm
@@ -18,6 +18,48 @@
            (left-join GeneList_rn33 "ON GeneList.geneSymbol = ProbeSet.Symbol")
            (left-join Species "ON GeneChip.SpeciesId = Species.Id")))
   (schema-triples
+   (gnc:pantherLink rdf:type gnc:ResourceLink)
+   (gnc:pantherLink rdfs:label "PANTHER")
+   (gnc:pantherLink rdfs:comments "Gene and protein data resources from Celera-ABI")
+   (gnc:gnt:NCBIGeneLink rdfs:Class gnc:ResourceLink)
+   (gnc:gnt:NCBIGeneLink rdfs:label "Gene")
+   (gnc:gnt:NCBIGeneLink rdfs:comments "Info from NCBI Entrez Gene")
+   (gnc:omimLink rdfs:Class gnc:ResourceLink)
+   (gnc:omimLink rdfs:label "OMIM")
+   (gnc:omimLink rdfs:comments "Summary from On Mendelion Inheritance in Man")
+   (gnc:homologeneLink rdfs:Class gnc:ResourceLink)
+   (gnc:homologeneLink rdfs:label "HomoloGene")
+   (gnc:homologeneLink rdfs:comments "Find similar genes in other species")
+   (gnc:uniprotLink rdfs:Class gnc:ResourceLink)
+   (gnc:uniprotLink rdfs:label "UniProt")
+   (gnc:uniprotLink rdfs:comments "UniProt")
+   (gnc:stringLink rdfs:Class gnc:ResourceLink)
+   (gnc:stringLink rdfs:label "STRING")
+   (gnc:stringLink rdfs:comments "Protein interactions: known and inferred")
+   (gnc:gtexLink rdfs:Class gnc:ResourceLink)
+   (gnc:gtexLink rdfs:label "GTEx Portal")
+   (gnc:gtexLink rdfs:comments "GTEx Portal")
+   (gnc:ebiGwasLink rdfs:Class gnc:ResourceLink)
+   (gnc:ebiGwasLink rdfs:label "EBI GWAS")
+   (gnc:ebiGwasLink rdfs:comments "EBI GWAS")
+   (gnc:genemaniaLink rdfs:Class gnc:ResourceLink)
+   (gnc:genemaniaLink rdfs:label "GeneMANIA")
+   (gnc:genemaniaLink rdfs:comments "GeneMANIA")
+   (gnc:gemmaLink rdfs:Class gnc:ResourceLink)
+   (gnc:gemmaLink rdfs:label "Gemma")
+   (gnc:gemmaLink rdfs:comments "Meta-analysis of gene expression data")
+   (gnc:biogpsLink rdfs:Class gnc:ResourceLink)
+   (gnc:biogpsLink rdfs:label "BioGPS")
+   (gnc:biogpsLink rdfs:comments "Expression across many tissues and cell types")
+   (gnc:abaLink rdfs:Class gnc:ResourceLink)
+   (gnc:abaLink rdfs:label "ABA")
+   (gnc:abaLink rdfs:comments "Allen Brain Atlas")
+   (gnc:ucsRefSeqLink rdfs:Class gnc:ResourceLink)
+   (gnc:ucsRefSeqLink rdfs:label "Info from UCSC Genome Browser")
+   (gnc:ucsRefSeqLink rdfs:comments "UCSC")
+   (gnc:proteinAtlasLink rdfs:Class gnc:ResourceLink)
+   (gnc:proteinAtlasLink rdfs:label "Protein Atlas")
+   (gnc:proteinAtlasLink rdfs:comments "Human Protein Atlas")
    (gnt:hasChip a owl:ObjectProperty)
    (gnt:hasChip rdfs:domain gnc:Probeset)
    (gnt:hasTargetId a owl:ObjectProperty)
@@ -104,6 +146,210 @@
                             ((string=? "-" strand-probe)
                              " on the minus strand")
                             (else "")))))))
+    ;; NCBI Gene Link
+    (set dct:references
+         (let ((geneId (field ProbeSet GeneId)))
+           (if (not (string-blank? geneId))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids="
+                        geneId
+                        "a gnc:NCBIGeneLink"))
+               "")))
+    ;; OMIM Link
+    (set dct:references
+         (let ((omim (field ProbeSet OMIM)))
+           (if (not (string-blank? omim))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "http://www.ncbi.nlm.nih.gov/omim/"
+                        omim
+                        "a gnc:omimLink"))
+               "")))
+    ;; Homologene Link
+    (set dct:references
+         (let ((homologene (field ProbeSet HomoloGeneID)))
+           (if (not (string-blank? homologene))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "http://www.ncbi.nlm.nih.gov/homologene/?term="
+                        homologene
+                        "a gnc:homologeneLink"))
+               "")))
+    ;; UniProt Link
+    (set dct:references
+         (let ((uniprot (field ProbeSet UniProtID)))
+           (if (not (string-blank? uniprot))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "https://www.uniprot.org/uniprot/"
+                        uniprot
+                        "a gnc:uniprotLink"))
+               "")))
+    ;; STRING Link
+    (set dct:references
+         (let ((symbol (field ProbeSet Symbol)))
+           (if (not (string-blank? symbol))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "http://string-db.org/newstring_cgi/show_network_section.pl?identifier="
+                        symbol
+                        "a gnc:stringLink"))
+               "")))
+    ;; GTEX link
+    (set dct:references
+         (let ((symbol (field ProbeSet Symbol)))
+           (if (not (string-blank? symbol))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "https://www.gtexportal.org/home/gene/"
+                        (string-trim-both symbol)
+                        "a gnc:gtexLink"))
+               "")))
+    ;; EBI GWAS Link
+    (set dct:references
+         (let ((symbol (field ProbeSet Symbol)))
+           (if (not (string-blank? symbol))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "https://www.ebi.ac.uk/gwas/search?query="
+                        (string-trim-both symbol)
+                        "a gnc:ebiGwasLink"))
+               "")))
+    ;; Protein Atlas Link
+    (set dct:references
+         (let ((symbol (field ProbeSet Symbol)))
+           (if (not (string-blank? symbol))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "http://www.proteinatlas.org/search/"
+                        (string-trim-both symbol)
+                        "a gnc:proteinAtlasLink"))
+               "")))
+    ;; UCS Link
+    (set dct:references
+         (let* ((symbol (field ProbeSet Symbol))
+                (species (field Species Name))
+                (db (if (string=? species "mouse")
+                         "mm10" "rn7"))
+                (transcriptId (field ProbeSet RefSeq_TranscriptId))
+                (kgId (field GeneList_rn33 kgId))
+                (transcriptStart
+                 (if (string=? species "mouse")
+                     (field ("(GeneList.txStart * 1000000)" TranscriptStartMm10))
+                     (field ("(GeneList_rn33.txStart * 1000000)" TranscriptStartRn7))))
+                (chromosome
+                 (if (string=? species "mouse")
+                     (field GeneList Chromosome)
+                     (field GeneList_rn33 Chromosome)))
+                (transcriptEnd
+                 (if (string=? species "mouse")
+                     (field ("(GeneList.txEnd * 1000000)" TranscriptEndMm10))
+                     (field ("(GeneList_rn33.txEnd * 1000000)" TranscriptEndRn7))))
+                (url (format
+                      #f
+                      "http://genome.cse.ucsc.edu/cgi-bin/hgTracks?db=~a&hgg_gene="
+                      db)))
+           (if (and (not (string-blank? symbol))
+                    (not (string-blank? transcriptId))
+                    (number? transcriptStart)
+                    (number? transcriptEnd)
+                    (not (string-blank? chromosome))
+                    (or (string=? species "mouse")
+                        (string=? species "rat")))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a&hgg_chrom=chr~2@*~a&hgg_start=~3@*~a&hgg_end=~4@*~a> .~%<~0@*~a~1@*~a&hgg_chrom=chr~2@*~a&hgg_start=~3@*~a&hgg_end=~4@*~a> ~2@*~a"
+                        url
+                        transcriptId
+                        chromosome
+                        transcriptStart
+                        transcriptEnd
+                        "a gnc:ucsRefSeqLink"
+                        ))
+               "")))
+    ;; PANTHER link
+    (set dct:references
+         (let ((symbol (field ProbeSet Symbol)))
+           (if (not (string-blank? symbol))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue="
+                        (string-trim-both symbol)
+                        "a gnc:PantherLink"))
+               "")))
+    ;; A GeneManiaLink
+    (set dct:references
+         (let ((symbol (field ProbeSet GeneId))
+               (species (lower-case-and-replace-spaces
+                         (field Species FullName))))
+           (if (and (not (string-blank? symbol))
+                    (not (string-blank? species))
+                    (or
+                     (string=? species "mus-musculus")
+                     (string=? species "rattus-norvegicus")
+                     (string=? species "homo-sapiens")))
+               (string->symbol
+                (format #f "<~0@*~a/~1@*~a/~2@*~a> .~%<~0@*~a/~1@*~a/~2@*~a> ~3@*~a"
+                        "https://genemania.org/search" species (string-trim-both symbol)
+                        "a gnc:genemaniaLink"))
+               "")))
+    ;; ABA Link
+    (set dct:references
+         (let ((symbol (field ProbeSet Symbol))
+               (geneId (field ProbeSet GeneId))
+               (species (field Species name)))
+           (if (and (not (string-blank? symbol))
+                    (not (string-blank? species))
+                    (or (string=? species "human")
+                        (string=? species "mouse")))
+               (string->symbol
+                (format #f "<~0@*~a> .~%<~0@*~a> ~2@*~a"
+                        "http://mouse.brain-map.org/search/show?search_type=gene&search_term="
+                        "a "
+                        (if (string=? species "mouse")
+                            (string-trim-both symbol)
+                            geneId)))
+               "")))
+    ;; Gemma Link
+    (set dct:references
+         (let ((geneId (field ProbeSet GeneId)))
+           (if (not (string-blank? geneId))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
+                        "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid="
+                        geneId
+                        "a gnc:gemmaLink"))
+               "")))
+    ;; BioGPS Link
+    (set dct:references
+         (let ((geneId (field ProbeSet GeneId))
+               (species (field Species Name)))
+           (if (and (not (string-blank? geneId))
+                    (not (string-blank? species))
+                    (or
+                     (string=? species "mouse")
+                     (string=? species "rat")
+                     (string=? species "humans")))
+               (string->symbol
+                (format #f
+                        "<~0@*~a~1@*~a~2@*~a~3@*~a> .~%<~0@*~a~1@*~a~2@*~a~3@*~a> ~4@*~a ; ~5@*~a"
+                        "http://biogps.org/?org="
+                        species
+                        "#goto=genereport&id="
+                        geneId
+                        "a gnc:ResourceLink"))
+               "")))
     (set gnt:strandProbe
          (field ProbeSet Strand_Probe))
     (set gnt:hasSpecificity