about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-12-11 15:00:16 +0300
committerMunyoki Kilyungi2023-12-14 00:55:53 +0300
commitc3127599d06270acf85a5738c22913d079c38355 (patch)
treeda3f4987b249f32586caf54c4991b258388f78fc
parent718fe5c900a5cb6390f96e0e2911bd9d7d2b7347 (diff)
downloadgn-transform-databases-c3127599d06270acf85a5738c22913d079c38355.tar.gz
Remove some resource links from ProbeSet RDF transform.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xexamples/probeset.scm209
1 files changed, 2 insertions, 207 deletions
diff --git a/examples/probeset.scm b/examples/probeset.scm
index eeb2dcc..d214117 100755
--- a/examples/probeset.scm
+++ b/examples/probeset.scm
@@ -14,16 +14,9 @@
 (define-transformer probeset
   (tables (ProbeSet
            (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId")
-           (left-join GeneList "ON GeneList.GeneID = ProbeSet.GeneId")
-           (left-join GeneList_rn33 "ON GeneList.geneSymbol = ProbeSet.Symbol")
-           (left-join Species "ON GeneChip.SpeciesId = Species.Id")))
+           (left-join Species "ON GeneChip.SpeciesId = Species.Id"))
+          "WHERE ProbeSet.Name IS NOT NULL")
   (schema-triples
-   (gnc:pantherLink rdf:type gnc:ResourceLink)
-   (gnc:pantherLink rdfs:label "PANTHER")
-   (gnc:pantherLink rdfs:comments "Gene and protein data resources from Celera-ABI")
-   (gnc:gnt:NCBIGeneLink rdfs:Class gnc:ResourceLink)
-   (gnc:gnt:NCBIGeneLink rdfs:label "Gene")
-   (gnc:gnt:NCBIGeneLink rdfs:comments "Info from NCBI Entrez Gene")
    (gnc:omimLink rdfs:Class gnc:ResourceLink)
    (gnc:omimLink rdfs:label "OMIM")
    (gnc:omimLink rdfs:comments "Summary from On Mendelion Inheritance in Man")
@@ -33,33 +26,6 @@
    (gnc:uniprotLink rdfs:Class gnc:ResourceLink)
    (gnc:uniprotLink rdfs:label "UniProt")
    (gnc:uniprotLink rdfs:comments "UniProt")
-   (gnc:stringLink rdfs:Class gnc:ResourceLink)
-   (gnc:stringLink rdfs:label "STRING")
-   (gnc:stringLink rdfs:comments "Protein interactions: known and inferred")
-   (gnc:gtexLink rdfs:Class gnc:ResourceLink)
-   (gnc:gtexLink rdfs:label "GTEx Portal")
-   (gnc:gtexLink rdfs:comments "GTEx Portal")
-   (gnc:ebiGwasLink rdfs:Class gnc:ResourceLink)
-   (gnc:ebiGwasLink rdfs:label "EBI GWAS")
-   (gnc:ebiGwasLink rdfs:comments "EBI GWAS")
-   (gnc:genemaniaLink rdfs:Class gnc:ResourceLink)
-   (gnc:genemaniaLink rdfs:label "GeneMANIA")
-   (gnc:genemaniaLink rdfs:comments "GeneMANIA")
-   (gnc:gemmaLink rdfs:Class gnc:ResourceLink)
-   (gnc:gemmaLink rdfs:label "Gemma")
-   (gnc:gemmaLink rdfs:comments "Meta-analysis of gene expression data")
-   (gnc:biogpsLink rdfs:Class gnc:ResourceLink)
-   (gnc:biogpsLink rdfs:label "BioGPS")
-   (gnc:biogpsLink rdfs:comments "Expression across many tissues and cell types")
-   (gnc:abaLink rdfs:Class gnc:ResourceLink)
-   (gnc:abaLink rdfs:label "ABA")
-   (gnc:abaLink rdfs:comments "Allen Brain Atlas")
-   (gnc:ucsRefSeqLink rdfs:Class gnc:ResourceLink)
-   (gnc:ucsRefSeqLink rdfs:label "Info from UCSC Genome Browser")
-   (gnc:ucsRefSeqLink rdfs:comments "UCSC")
-   (gnc:proteinAtlasLink rdfs:Class gnc:ResourceLink)
-   (gnc:proteinAtlasLink rdfs:label "Protein Atlas")
-   (gnc:proteinAtlasLink rdfs:comments "Human Protein Atlas")
    (gnt:hasChip a owl:ObjectProperty)
    (gnt:hasChip rdfs:domain gnc:Probeset)
    (gnt:hasTargetId a owl:ObjectProperty)
@@ -148,17 +114,6 @@
                             ((string=? "-" strand-probe)
                              " on the minus strand")
                             (else "")))))))
-    ;; NCBI Gene Link
-    (set dct:references
-         (let ((geneId (field ProbeSet GeneId)))
-           (if (not (string-blank? geneId))
-               (string->symbol
-                (format #f
-                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
-                        "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids="
-                        geneId
-                        "a gnc:NCBIGeneLink"))
-               "")))
     ;; OMIM Link
     (set dct:references
          (let ((omim (field ProbeSet OMIM)))
@@ -192,166 +147,6 @@
                         uniprot
                         "a gnc:uniprotLink"))
                "")))
-    ;; STRING Link
-    (set dct:references
-         (let ((symbol (field ProbeSet Symbol)))
-           (if (not (string-blank? symbol))
-               (string->symbol
-                (format #f
-                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
-                        "http://string-db.org/newstring_cgi/show_network_section.pl?identifier="
-                        symbol
-                        "a gnc:stringLink"))
-               "")))
-    ;; GTEX link
-    (set dct:references
-         (let ((symbol (field ProbeSet Symbol)))
-           (if (not (string-blank? symbol))
-               (string->symbol
-                (format #f
-                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
-                        "https://www.gtexportal.org/home/gene/"
-                        (string-trim-both symbol)
-                        "a gnc:gtexLink"))
-               "")))
-    ;; EBI GWAS Link
-    (set dct:references
-         (let ((symbol (field ProbeSet Symbol)))
-           (if (not (string-blank? symbol))
-               (string->symbol
-                (format #f
-                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
-                        "https://www.ebi.ac.uk/gwas/search?query="
-                        (string-trim-both symbol)
-                        "a gnc:ebiGwasLink"))
-               "")))
-    ;; Protein Atlas Link
-    (set dct:references
-         (let ((symbol (field ProbeSet Symbol)))
-           (if (not (string-blank? symbol))
-               (string->symbol
-                (format #f
-                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
-                        "http://www.proteinatlas.org/search/"
-                        (string-trim-both symbol)
-                        "a gnc:proteinAtlasLink"))
-               "")))
-    ;; UCS Link
-    (set dct:references
-         (let* ((symbol (field ProbeSet Symbol))
-                (species (field Species Name))
-                (db (if (string=? species "mouse")
-                         "mm10" "rn7"))
-                (transcriptId (field ProbeSet RefSeq_TranscriptId))
-                (kgId (field GeneList_rn33 kgId))
-                (transcriptStart
-                 (if (string=? species "mouse")
-                     (field ("(GeneList.txStart * 1000000)" TranscriptStartMm10))
-                     (field ("(GeneList_rn33.txStart * 1000000)" TranscriptStartRn7))))
-                (chromosome
-                 (if (string=? species "mouse")
-                     (field GeneList Chromosome)
-                     (field GeneList_rn33 Chromosome)))
-                (transcriptEnd
-                 (if (string=? species "mouse")
-                     (field ("(GeneList.txEnd * 1000000)" TranscriptEndMm10))
-                     (field ("(GeneList_rn33.txEnd * 1000000)" TranscriptEndRn7))))
-                (url (format
-                      #f
-                      "http://genome.cse.ucsc.edu/cgi-bin/hgTracks?db=~a&hgg_gene="
-                      db)))
-           (if (and (not (string-blank? symbol))
-                    (not (string-blank? transcriptId))
-                    (number? transcriptStart)
-                    (number? transcriptEnd)
-                    (not (string-blank? chromosome))
-                    (or (string=? species "mouse")
-                        (string=? species "rat")))
-               (string->symbol
-                (format #f
-                        "<~0@*~a~1@*~a&hgg_chrom=chr~2@*~a&hgg_start=~3@*~a&hgg_end=~4@*~a> .~%<~0@*~a~1@*~a&hgg_chrom=chr~2@*~a&hgg_start=~3@*~a&hgg_end=~4@*~a> ~2@*~a"
-                        url
-                        transcriptId
-                        chromosome
-                        transcriptStart
-                        transcriptEnd
-                        "a gnc:ucsRefSeqLink"
-                        ))
-               "")))
-    ;; PANTHER link
-    (set dct:references
-         (let ((symbol (field ProbeSet Symbol)))
-           (if (not (string-blank? symbol))
-               (string->symbol
-                (format #f
-                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
-                        "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue="
-                        (string-trim-both symbol)
-                        "a gnc:PantherLink"))
-               "")))
-    ;; A GeneManiaLink
-    (set dct:references
-         (let ((symbol (field ProbeSet GeneId))
-               (species (lower-case-and-replace-spaces
-                         (field Species FullName))))
-           (if (and (not (string-blank? symbol))
-                    (not (string-blank? species))
-                    (or
-                     (string=? species "mus-musculus")
-                     (string=? species "rattus-norvegicus")
-                     (string=? species "homo-sapiens")))
-               (string->symbol
-                (format #f "<~0@*~a/~1@*~a/~2@*~a> .~%<~0@*~a/~1@*~a/~2@*~a> ~3@*~a"
-                        "https://genemania.org/search" species (string-trim-both symbol)
-                        "a gnc:genemaniaLink"))
-               "")))
-    ;; ABA Link
-    (set dct:references
-         (let ((symbol (field ProbeSet Symbol))
-               (geneId (field ProbeSet GeneId))
-               (species (field Species name)))
-           (if (and (not (string-blank? symbol))
-                    (not (string-blank? species))
-                    (or (string=? species "human")
-                        (string=? species "mouse")))
-               (string->symbol
-                (format #f "<~0@*~a> .~%<~0@*~a> ~2@*~a"
-                        "http://mouse.brain-map.org/search/show?search_type=gene&search_term="
-                        "a "
-                        (if (string=? species "mouse")
-                            (string-trim-both symbol)
-                            geneId)))
-               "")))
-    ;; Gemma Link
-    (set dct:references
-         (let ((geneId (field ProbeSet GeneId)))
-           (if (not (string-blank? geneId))
-               (string->symbol
-                (format #f
-                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
-                        "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid="
-                        geneId
-                        "a gnc:gemmaLink"))
-               "")))
-    ;; BioGPS Link
-    (set dct:references
-         (let ((geneId (field ProbeSet GeneId))
-               (species (field Species Name)))
-           (if (and (not (string-blank? geneId))
-                    (not (string-blank? species))
-                    (or
-                     (string=? species "mouse")
-                     (string=? species "rat")
-                     (string=? species "humans")))
-               (string->symbol
-                (format #f
-                        "<~0@*~a~1@*~a~2@*~a~3@*~a> .~%<~0@*~a~1@*~a~2@*~a~3@*~a> ~4@*~a ; ~5@*~a"
-                        "http://biogps.org/?org="
-                        species
-                        "#goto=genereport&id="
-                        geneId
-                        "a gnc:ResourceLink"))
-               "")))
     (set gnt:strandProbe
          (field ProbeSet Strand_Probe))
     (set gnt:hasSpecificity