about summary refs log tree commit diff
path: root/examples
diff options
context:
space:
mode:
authorMunyoki Kilyungi2026-01-28 12:19:04 +0300
committerMunyoki Kilyungi2026-01-28 12:19:04 +0300
commitddd1b36bc50c9f9deaac902d7591da0d26bc4cd5 (patch)
treec8555ef2e9144dc223b88dd122cba46afb54a5eb /examples
parentc57715940a901e1151e530c3e7ab36778ecda5f7 (diff)
downloadgn-transform-databases-ddd1b36bc50c9f9deaac902d7591da0d26bc4cd5.tar.gz
Add molecular traits.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples')
-rwxr-xr-xexamples/molecular-traits.scm73
1 files changed, 62 insertions, 11 deletions
diff --git a/examples/molecular-traits.scm b/examples/molecular-traits.scm
index 737c0b0..6ece7b5 100755
--- a/examples/molecular-traits.scm
+++ b/examples/molecular-traits.scm
@@ -11,23 +11,72 @@
              (transform triples)
              (transform special-forms))
 
-
 (define-transformer tissue
-  ;; The Name and TissueName fields seem to be identical. BIRN_lex_ID
-  ;; and BIRN_lex_Name are mostly NULL.
   (tables (Tissue))
   (schema-triples
    (gnc:tissue a owl:Class)
-   (gnc:tissue rdfs:subClassOf obo:UBERON_0000479) ; Anatomical Entity
-   (gnc:tissue rdfs:label "Tissue (GN)")
+   (gnc:tissue a skos:Concept)
+   (gnc:tissue rdfs:subClassOf obo:UBERON_0000479)
+   (gnc:tissue rdfs:label "Tissue (GN)"))
+  (triples (string->identifier "tissue" (field Tissue Short_Name) #:separator "_")
+    (set rdf:type 'gnc:tissue)
+    (set skos:prefLabel (field Tissue Name))
+    (set skos:altLabel (field Tissue Short_Name))))
+
+(define-transformer molecular-traits
+  (tables (Species
+           (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id")
+           (inner-join ProbeFreeze "ON ProbeFreeze.InbredSetId = InbredSet.Id")
+           (inner-join ProbeSetFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id")
+           (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id"))
+          "WHERE ProbeSetFreeze.public > 0 GROUP BY Species.Name, Tissue.Short_Name")
+  (schema-triples
+   (gnc:molecular_trait a owl:Class)
+   (gnc:molecular_trait a skos:Concept)
+   (gnc:molecular_trait rdfs:subClassOf obo:UBERON_0000479)
+   (gnc:molecular_trait rdfs:label "Molecular Trait.   This describe a melecular trait of a given species.  We combine the species name and the tissue name in order to differentiate the traits across different inbredset groups.")
    (gnt:has_tissue rdf:type owl:ObjectProperty)
    (gnt:has_tissue rdfs:domain gnc:molecular_traits)
    (gnt:has_tissue rdfs:range gnc:tissue)
    (gnt:has_tissue rdfs:label "has tissue"))
-  (triples (string->identifier "tissue" (field Tissue Short_Name)
-                               #:separator "_")
-    (set rdf:type 'gnc:tissue)
-    (set rdfs:label (field Tissue Name))))
+  (triples (string->identifier
+            (format #f "trait_~a" (field Species Name))
+            (field Tissue Short_Name)
+            #:separator "_")
+    (set rdf:type 'gnc:molecular_trait)
+    (set gnt:has_strain
+         (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))
+    (set gnt:has_species
+         (string->identifier "" (remap-species-identifiers (field Species Fullname))))
+    (multiset gnt:has_dataset
+              (map (cut string->identifier "dataset" <> #:separator "_")
+                   (string-split
+                    (field ("GROUP_CONCAT(ProbeSetFreeze.Name SEPARATOR ',')"
+                            dataset_name))
+                    #\,)))
+    (set gnt:has_tissue
+         (string->identifier "tissue"
+                             (field Tissue Short_Name)
+                             #:separator "_"))))
+
+(define-transformer list-molecular-traits
+  (tables (Species
+           (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id")
+           (inner-join ProbeFreeze "ON ProbeFreeze.InbredSetId = InbredSet.Id")
+           (inner-join ProbeSetFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id")
+           (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id"))
+          "WHERE ProbeSetFreeze.public > 0 GROUP BY Species.Name, Tissue.Short_Name")
+  (schema-triples
+   (gnt:has_molecular_trait rdf:type owl:ObjectProperty)
+   (gnt:has_molecular_trait rdfs:domain gnc:set)
+   (gnt:has_molecular_trait rdfs:range gnc:molecular_traits)
+   (gnt:has_molecular_trait rdfs:label "has molecular trait"))
+  (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
+    (set gnt:has_molecular_trait
+         (string->identifier (format #f "trait_~a"
+                                     (field Species Name))
+                             (field Tissue Short_Name) #:separator "_"))))
+
 
 
 
@@ -56,8 +105,10 @@
       ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
       ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")))
    (inputs
-    (list tissue))
+    (list
+     tissue
+     molecular-traits
+     list-molecular-traits))
    (outputs
     `(#:documentation ,documentation
       #:rdf ,output))))
-;; http://purl.obolibrary.org/obo/UBERON_0000479