about summary refs log tree commit diff
path: root/examples/genelist.scm
diff options
context:
space:
mode:
Diffstat (limited to 'examples/genelist.scm')
-rwxr-xr-xexamples/genelist.scm200
1 files changed, 97 insertions, 103 deletions
diff --git a/examples/genelist.scm b/examples/genelist.scm
index 9c1ced0..5048bf2 100755
--- a/examples/genelist.scm
+++ b/examples/genelist.scm
@@ -18,73 +18,72 @@
   (tables (GeneList
            (left-join Species "USING (SpeciesId)")))
   (schema-triples
-   (gnt:gene rdfs:domain gnc:GeneSymbol)
-   (gnt:belongsToSpecies rdfs:domain gnc:GeneSymbol)
-   (gnc:Gene a rdfs:Class)
-   (gnc:Gene rdfs:label "Gene")
-   (gnt:hasGeneId a owl:ObjectProperty)
-   (gnt:hasGeneId rdfs:domain gnc:NCBIWikiEntry)
-   (gnt:hasGeneId skos:definition "The GeneId of this this resource")
-   (gnc:transcript rdfs:domain gnc:GeneSymbol)
+   (gnc:gene_symbol a rdfs:Class)
+   (gnc:gene_symbol rdfs:label "A gene symbol")
+   (gnt:gene rdfs:domain gnc:gene_symbol)
+   (gnt:has_species rdfs:domain gnc:gene_symbol)
+   (gnc:gene a rdfs:Class)
+   (gnc:gene rdfs:label "Gene")
+   (gnt:has_gene_id a owl:ObjectProperty)
+   (gnt:has_gene_id rdfs:domain gnc:ncbi_wiki_entry)
+   (gnt:has_gene_id skos:definition "The GeneId of this this resource")
+   (gnc:transcript rdfs:domain gnc:gene_symbol)
    (gnt:transcript a owl:ObjectProperty)
    (gnc:transcript rdfs:comments "The gene transcript of this resource")
-   (gnc:ebiGwasLink rdfs:Class gnc:ResourceLink)
-   (gnc:ebiGwasLink rdfs:label "EBI GWAS")
-   (gnc:ebiGwasLink rdfs:comments "EBI GWAS")
-   (gnc:proteinAtlasLink rdfs:Class gnc:ResourceLink)
-   (gnc:proteinAtlasLink rdfs:label "Protein Atlas")
-   (gnc:proteinAtlasLink rdfs:comments "Human Protein Atlas")
-   (gnc:genemaniaLink rdfs:Class gnc:ResourceLink)
-   (gnc:genemaniaLink rdfs:label "GeneMANIA")
-   (gnc:genemaniaLink rdfs:comments "GeneMANIA")
-   (gnc:gemmaLink rdfs:Class gnc:ResourceLink)
-   (gnc:gemmaLink rdfs:label "Gemma")
-   (gnc:gemmaLink rdfs:comments "Meta-analysis of gene expression data")
-   (gnc:biogpsLink rdfs:Class gnc:ResourceLink)
-   (gnc:biogpsLink rdfs:label "BioGPS")
-   (gnc:biogpsLink rdfs:comments "Expression across many tissues and cell types")
-   (gnc:abaLink rdfs:Class gnc:ResourceLink)
-   (gnc:abaLink rdfs:label "ABA")
-   (gnc:abaLink rdfs:comments "Allen Brain Atlas")
-   (gnc:pantherLink rdfs:Class gnc:ResourceLink)
-   (gnc:pantherLink rdfs:label "PANTHER")
-   (gnc:pantherLink rdfs:comments "Gene and protein data resources from Celera-ABI")
-   (gnc:stringLink rdfs:Class gnc:ResourceLink)
-   (gnc:stringLink rdfs:label "STRING")
-   (gnc:stringLink rdfs:comments "Protein interactions: known and inferred")
-   (gnc:gtexLink rdfs:Class gnc:ResourceLink)
-   (gnc:gtexLink rdfs:label "GTEx Portal")
-   (gnc:gtexLink rdfs:comments "GTEx Portal")
-   (gnc:rgdLink rdfs:Class gnc:ResourceLink)
-   (gnc:rgdLink rdfs:label "Rat Genome DB")
-   (gnc:rgdLink rdfs:comments "Rat Genome DB")
-   (gnc:hasKgID rdfs:domain gnc:GeneSymbol)
-   (gnt:hasKgID a owl:ObjectProperty)
-   (gnc:hasKgID rdfs:comments "The kgID of this resource")
-   (gnc:hasUnigenID rdfs:domain gnc:GeneSymbol)
-   (gnt:hasUnigenID a owl:ObjectProperty)
-   (gnc:hasUnigenID rdfs:comments "The UnigenID of this resource")
-   (gnc:hasProteinID rdfs:domain gnc:GeneSymbol)
-   (gnt:hasProteinID a owl:ObjectProperty)
-   (gnc:hasProteinID rdfs:comments "The ProteinID of this resource")
-   (gnc:hasAlignID rdfs:domain gnc:GeneSymbol)
-   (gnt:hasAlignID a owl:ObjectProperty)
-   (gnc:hasAlignID rdfs:comments "The AlignID of this resource")
-   (gnt:TxEnd rdfs:range xsd:double)
-   (gnt:TxStart rdfs:range xsd:double)
-   (gnt:hasTargetSeq rdfs:domain gnc:Probeset))
+   (gnc:ebi_gwas_link rdfs:Class gnc:ResourceLink)
+   (gnc:ebi_gwas_link rdfs:label "EBI GWAS")
+   (gnc:ebi_gwas_link rdfs:comments "EBI GWAS")
+   (gnc:protein_atlas_link rdfs:Class gnc:ResourceLink)
+   (gnc:protein_atlas_link rdfs:label "Protein Atlas")
+   (gnc:protein_atlas_link rdfs:comments "Human Protein Atlas")
+   (gnc:genemania_link rdfs:Class gnc:ResourceLink)
+   (gnc:genemania_link rdfs:label "GeneMANIA")
+   (gnc:genemania_link rdfs:comments "GeneMANIA")
+   (gnc:gemma_link rdfs:Class gnc:ResourceLink)
+   (gnc:gemma_link rdfs:label "Gemma")
+   (gnc:gemma_link rdfs:comments "Meta-analysis of gene expression data")
+   (gnc:biogps_link rdfs:Class gnc:ResourceLink)
+   (gnc:biogps_link rdfs:label "BioGPS")
+   (gnc:biogps_link rdfs:comments "Expression across many tissues and cell types")
+   (gnc:aba_link rdfs:Class gnc:ResourceLink)
+   (gnc:aba_link rdfs:label "ABA")
+   (gnc:aba_link rdfs:comments "Allen Brain Atlas")
+   (gnc:panther_link rdfs:Class gnc:ResourceLink)
+   (gnc:panther_link rdfs:label "PANTHER")
+   (gnc:panther_link rdfs:comments "Gene and protein data resources from Celera-ABI")
+   (gnc:panther_link rdfs:Class gnc:ResourceLink)
+   (gnc:panther_link rdfs:label "STRING")
+   (gnc:panther_link rdfs:comments "Protein interactions: known and inferred")
+   (gnc:gtex_link rdfs:Class gnc:ResourceLink)
+   (gnc:gtex_link rdfs:label "GTEx Portal")
+   (gnc:gtex_link rdfs:comments "GTEx Portal")
+   (gnc:rgd_link rdfs:Class gnc:ResourceLink)
+   (gnc:rgd_link rdfs:label "Rat Genome DB")
+   (gnc:rgd_link rdfs:comments "Rat Genome DB")
+   (gnc:has_kg_id rdfs:domain gnc:gene_symbol)
+   (gnc:has_kg_id a owl:ObjectProperty)
+   (gnc:has_kg_id rdfs:comments "The kgID of this resource")
+   (gnc:has_unigen_id rdfs:domain gnc:gene_symbol)
+   (gnc:has_unigen_id a owl:ObjectProperty)
+   (gnc:has_unigen_id rdfs:comments "The UnigenID of this resource")
+   (gnc:has_protein_id rdfs:domain gnc:gene_symbol)
+   (gnt:has_protein_id a owl:ObjectProperty)
+   (gnc:has_protein_id rdfs:comments "The ProteinID of this resource")
+   (gnc:has_align_id rdfs:domain gnc:gene_symbol)
+   (gnt:has_align_id a owl:ObjectProperty)
+   (gnc:has_align_id rdfs:comments "The AlignID of this resource")
+   (gnt:tx_end rdfs:range xsd:double)
+   (gnt:tx_start rdfs:range xsd:double)
+   (gnt:has_target_seq rdfs:domain gnc:probeset))
   (triples
       (string->identifier
-       "gene" (regexp-substitute/global
-               #f "[^A-Za-z0-9:]"
-               (string-trim-both
-                (field ("CONCAT_WS('_', GeneSymbol, GeneID, AlignID)" GENE_UID)))
-               'pre "_" 'post)
-       #:proc (lambda (x) x))
-    (set rdf:type 'gnc:Gene)
-    (set gnt:geneSymbol (field GeneList GeneSymbol))
+       "gene" (normalize-string-field (string-trim-both
+                                       (field ("CONCAT_WS('_', GeneSymbol, GeneID, AlignID)" GENE_UID))))
+       #:separator "_")
+    (set rdf:type 'gnc:gene)
+    (set gnt:gene_symbol (field GeneList GeneSymbol))
     (set dct:description (sanitize-rdf-string (field GeneList GeneDescription)))
-    (set gnt:hasGeneId (ontology 'gene: (field GeneList GeneId)))
+    (set gnt:has_gene_id (ontology 'gene: (field GeneList GeneId)))
     (set dct:references
          (let ((symbol (field GeneList GeneSymbol)))
            (if (not (string-blank? symbol))
@@ -94,7 +93,7 @@
                         "https://www.ebi.ac.uk/gwas/search?query="
                         (uri-encode
                          (string-trim-both symbol))
-                        "a gnc:ebiGwasLink"))
+                        "a gnc:ebi_gwas_link"))
                "")))
     (set dct:references
          (let ((symbol (field GeneList GeneSymbol))
@@ -107,7 +106,7 @@
                (string->symbol
                 (format #f "<~0@*~a> .~%<~0@*~a> ~1@*~a"
                         "http://mouse.brain-map.org/search/show?search_type=gene&search_term="
-                        "a gnc:abaLink"
+                        "a gnc:aba_link"
                         (if (string=? species "mouse")
                             (uri-encode
                              (string-trim-both symbol))
@@ -129,7 +128,7 @@
                          (string-trim-both symbol))
                         "&category=Gene&species="
                         (string-capitalize species)
-                        "a gnc:rgdLink"))
+                        "a gnc:rgd_link"))
                "")))
     (set dct:references
          (let ((geneId (field GeneList GeneID))
@@ -147,7 +146,7 @@
                         species
                         "#goto=genereport&id="
                         geneId
-                        "a gnc:biogpsLink"))
+                        "a gnc:biogps_link"))
                "")))
     (set dct:references
          (let ((geneId (field GeneList GeneID)))
@@ -157,7 +156,7 @@
                         "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
                         "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid="
                         geneId
-                        "a gnc:gemmaLink"))
+                        "a gnc:gemma_link"))
                "")))
     (set dct:references
          (let ((symbol (field GeneList GeneSymbol))
@@ -175,7 +174,7 @@
                         species
                         (uri-encode
                          (string-trim-both symbol))
-                        "a gnc:genemaniaLink"))
+                        "a gnc:genemania_link"))
                "")))
     (set dct:references
          (let ((symbol (field GeneList GeneSymbol)))
@@ -186,7 +185,7 @@
                         "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue="
                         (uri-encode
                          (string-trim-both symbol))
-                        "a gnc:pantherLink"))
+                        "a gnc:panther_link"))
                "")))
     (set dct:references
          (let ((symbol (field GeneList GeneSymbol)))
@@ -197,7 +196,7 @@
                         "http://string-db.org/newstring_cgi/show_network_section.pl?identifier="
                         (uri-encode
                          (string-trim-both symbol))
-                        "a gnc:stringLink"))
+                        "a gnc:panther_link"))
                "")))
     (set dct:references
          (let ((symbol (field GeneList GeneSymbol)))
@@ -208,7 +207,7 @@
                         "https://www.gtexportal.org/home/gene/"
                         (uri-encode
                          (string-trim-both symbol))
-                        "a gnc:gtexLink"))
+                        "a gnc:gtex_link"))
                "")))
     (set dct:references
          (let ((symbol (field GeneList GeneSymbol)))
@@ -219,33 +218,27 @@
                         "http://www.proteinatlas.org/search/"
                         (uri-encode
                          (string-trim-both symbol))
-                        "a gnc:proteinAtlasLink"))
+                        "a gnc:protein_atlas_link"))
                "")))
     (set gnt:chromosome (field GeneList Chromosome))
-    (set gnt:TxStart (annotate-field
+    (set gnt:tx_start (annotate-field
                       (field GeneList TxStart)
                       '^^xsd:double))
-    (set gnt:TxEnd (annotate-field
+    (set gnt:tx_end (annotate-field
                     (field GeneList TxEnd)
                     '^^xsd:double))
-    (set gnt:Strand (string-trim-both (field GeneList Strand)))
+    (set gnt:strand (string-trim-both (field GeneList Strand)))
     (set
-     gnt:belongsToSpecies
-     (string->identifier
-      ""
-      (remap-species-identifiers
-       (string-trim-both (field Species Name)))
-      #:separator ""
-      #:proc string-capitalize-first))
+     gnt:has_species (string->identifier "" (remap-species-identifiers (field Species Fullname))))
     (set
      gnt:transcript
      (ontology 'transcript:
                (string-trim-both (field GeneList NM_ID))))
-    (set gnt:hasKgID (string-trim-both (field GeneList kgID)))
-    (set gnt:hasUnigenID (string-trim-both (field GeneList UnigenID)))
-    (set gnt:hasProteinID (string-trim-both (field GeneList ProteinID)))
-    (set gnt:hasAlignID (string-trim-both (field GeneList AlignID)))
-    (set gnt:hasRgdID
+    (set gnc:has_kg_id (string-trim-both (field GeneList kgID)))
+    (set gnc:has_unigen_id (string-trim-both (field GeneList UnigenID)))
+    (set gnt:has_protein_id (string-trim-both (field GeneList ProteinID)))
+    (set gnt:has_align_id (string-trim-both (field GeneList AlignID)))
+    (set gnt:has_rgd_id
          (field ("IFNULL(RGD_ID, '')" RGD_ID)))))
 
 (define-transformer genelist-rn33
@@ -257,25 +250,26 @@
          (if (number? gene-uid)
              (number->string
               gene-uid)
-             gene-uid)))
-    (set rdf:type 'gnc:Gene)
-    (set gnt:belongsToSpecies 'gn:Rattus_norvegicus)
-    (set gnt:geneSymbol (string-trim-both (field GeneList_rn33 geneSymbol)))
+             gene-uid)
+         #:separator "_"))
+    (set rdf:type 'gnc:gene)
+    (set gnt:has_species 'gn:Rattus_norvegicus)
+    (set gnt:gene_symbol (string-trim-both (field GeneList_rn33 geneSymbol)))
     (set gnt:chromosome (field GeneList_rn33 chromosome))
-    (set gnt:TxStart (annotate-field
+    (set gnt:tx_start (annotate-field
                       (field GeneList_rn33 txStart)
                       '^^xsd:double))
-    (set gnt:TxEnd (annotate-field
+    (set gnt:tx_end (annotate-field
                     (field GeneList_rn33 txEnd)
                     '^^xsd:double))
-    (set gnt:Strand (string-trim-both (field GeneList_rn33 strand)))
+    (set gnt:strand (string-trim-both (field GeneList_rn33 strand)))
     (set
      gnt:transcript
      (ontology
       'transcript:
       (string-trim-both (field GeneList_rn33 NM_ID))))
     (set
-     gnt:hasKgID
+     gnc:has_kg_id
      (string-trim-both (field GeneList_rn33 kgID)))
     (set dct:references
          (let ((symbol (field GeneList_rn33 geneSymbol)))
@@ -295,7 +289,7 @@
                         "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
                         "https://www.ebi.ac.uk/gwas/search?query="
                         (string-trim-both symbol)
-                        "a gnc:ebiGwasLink"))
+                        "a gnc:ebi_gwas_link"))
                "")))
     (set dct:references
          (let ((symbol (string-trim-both (field GeneList_rn33 geneSymbol))))
@@ -306,7 +300,7 @@
                         "http://string-db.org/newstring_cgi/show_network_section.pl?identifier="
                         (uri-encode
                          (string-trim-both symbol))
-                        "a gnc:stringLink"))
+                        "a gnc:panther_link"))
                "")))
     (set dct:references
          (let ((symbol (string-trim-both (field GeneList_rn33 geneSymbol))))
@@ -317,7 +311,7 @@
                         "https://www.gtexportal.org/home/gene/"
                         (uri-encode
                          (string-trim-both symbol))
-                        "a gnc:gtexLink"))
+                        "a gnc:gtex_link"))
                "")))
     (set dct:references
          (let ((symbol (string-trim-both (field GeneList_rn33 geneSymbol))))
@@ -328,7 +322,7 @@
                         "http://www.proteinatlas.org/search/"
                         (uri-encode
                          (string-trim-both symbol))
-                        "a gnc:proteinAtlasLink"))
+                        "a gnc:protein_atlas_link"))
                "")))))
 
 
@@ -349,10 +343,10 @@
    (connection %connection-settings)
    (table-metadata? #f)
    (prefixes
-    '(("gn:" "<http://genenetwork.org/id/>")
-      ("probeset:" "<http://genenetwork.org/probeset/>")
-      ("gnc:" "<http://genenetwork.org/category/>")
-      ("gnt:" "<http://genenetwork.org/term/>")
+    '(("gn:" "<http://rdf.genenetwork.org/v1/id/>")
+      ("probeset:" "<http://rdf.genenetwork.org/v1/probeset/>")
+      ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
+      ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
       ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
       ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
       ("dct:" "<http://purl.org/dc/terms/>")