about summary refs log tree commit diff
path: root/examples/molecular-traits.scm
diff options
context:
space:
mode:
authorMunyoki Kilyungi2026-01-29 01:34:49 +0300
committerMunyoki Kilyungi2026-01-29 01:34:49 +0300
commitbe124badae9b875174a1f31a3a19db8d068d4f7b (patch)
tree9037996cfe1be7d9525e75e4c15c42354981a092 /examples/molecular-traits.scm
parent4a0db655dadf07835362f5acea0ec0705120c9dc (diff)
downloadgn-transform-databases-be124badae9b875174a1f31a3a19db8d068d4f7b.tar.gz
Update mRNA datasets.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples/molecular-traits.scm')
-rwxr-xr-xexamples/molecular-traits.scm162
1 files changed, 155 insertions, 7 deletions
diff --git a/examples/molecular-traits.scm b/examples/molecular-traits.scm
index 9e826f6..0393a0d 100755
--- a/examples/molecular-traits.scm
+++ b/examples/molecular-traits.scm
@@ -12,7 +12,7 @@
              (transform special-forms))
 
 
-(define-transformer tissues->gn:molecular-traits
+(define-transformer gn:molecular-traits->gn:datasets
   (tables (Tissue))
   (schema-triples
    (gnc:molecular_trait a owl:Class)
@@ -64,9 +64,152 @@
     (set gnt:has_molecular_trait
          (string->identifier "trait" (field Tissue Short_Name) #:separator "_"))))
 
+(define-transformer gn:dataset->metadata
+  (tables (ProbeSetFreeze
+           (inner-join ProbeFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id")
+           (inner-join InbredSet "ON InbredSet.Id = ProbeFreeze.InbredSetId")
+           (inner-join Species "ON InbredSet.SpeciesId = Species.Id")
+           (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id")
+           (inner-join AvgMethod "ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID")
+           (inner-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name")
+           (left-join GeneChip "ON GeneChip.Id =  InfoFiles.GeneChipId"))
+          "WHERE ProbeSetFreeze.public > 0")
+  (schema-triples
+   (gnt:has_case_info a owl:ObjectProperty)
+   (gnt:has_case_info rdfs:comment "Information about the cases used in this platform")
+   (gnt:has_case_info rdfs:domain dcat:Dataset)
+   (gnt:has_case_info rdfs:label "About Case")
+   (gnt:has_citation a owl:ObjectProperty)
+   (gnt:has_citation rdfs:comment "Citation for this dataset")
+   (gnt:has_citation rdfs:domain dcat:Dataset)
+   (gnt:has_citation rdfs:label "Citation")
+   (gnt:has_contributors a owl:ObjectProperty)
+   (gnt:has_contributors rdfs:comment "Contributors of this resource")
+   (gnt:has_contributors rdfs:comment "Contributors of this resource")
+   (gnt:has_contributors rdfs:domain dcat:Dataset)
+   (gnt:has_contributors rdfs:label "Contributors")
+   (gnt:has_data_processing_info a owl:ObjectProperty)
+   (gnt:has_data_processing_info rdfs:comment "Information about how this dataset was processed")
+   (gnt:has_data_processing_info rdfs:domain dcat:Dataset)
+   (gnt:has_data_processing_info rdfs:label "About Data Processing")
+   (gnt:has_experiment_design a owl:ObjectProperty)
+   (gnt:has_experiment_design rdfs:comment "Experiment Design for this resource")
+   (gnt:has_experiment_design rdfs:domain dcat:Dataset)
+   (gnt:has_experiment_design rdfs:label "Experiment Design")
+   (gnt:has_experiment_design_info a owl:ObjectProperty)
+   (gnt:has_experiment_design_info rdfs:comment "Information about how the experiment was designed")
+   (gnt:has_experiment_design_info rdfs:domain dcat:Dataset)
+   (gnt:has_experiment_design_info rdfs:label "Experiment Design")
+   (gnt:has_experiment_type a owl:ObjectProperty)
+   (gnt:has_experiment_type rdfs:comment "Information about the experiment type")
+   (gnt:has_experiment_type rdfs:comment "Information about the experiment type")
+   (gnt:has_experiment_type rdfs:domain dcat:Dataset)
+   (gnt:has_experiment_type rdfs:label "Experiment Type Metadata")
+   (gnt:has_platform_info a owl:ObjectProperty)
+   (gnt:has_platform_info rdfs:comment "Information about the platform that was used with this dataset")
+   (gnt:has_platform_info rdfs:domain dcat:Dataset)
+   (gnt:has_platform_info rdfs:label "About Platform")
+   (gnt:has_samples a owl:ObjectProperty)
+   (gnt:has_samples rdfs:domain dcat:Dataset)
+   (gnt:has_samples rdfs:label "Samples")
+   (gnt:has_specifics a owl:ObjectProperty)
+   (gnt:has_specifics rdfs:comment "Has specifics")
+   (gnt:has_specifics rdfs:domain dcat:Dataset)
+   (gnt:has_specifics rdfs:label "Specifics")
+   (gnt:has_summary a owl:ObjectProperty)
+   (gnt:has_summary rdfs:comment "Summary information about dataset")
+   (gnt:has_summary rdfs:domain dcat:Dataset)
+   (gnt:has_summary rdfs:label "Summary")
+   (gnt:has_tissue_info a owl:ObjectProperty)
+   (gnt:has_tissue_info rdfs:domain dcat:Dataset)
+   (gnt:has_tissue_info rdfs:label "Metadata about Tissue for this resource")
+   (gnt:uses_genechip a owl:ObjectProperty)
+   (gnt:uses_genechip rdfs:domain dcat:Dataset)
+   (gnt:uses_genechip skos:definition "The Platform this resource uses")
+   (gnt:uses_normalization_method rdfs:comment "The method used to map genetic or experimental data for this resource.")
+   (gnt:uses_normalization_method rdfs:domain dcat:Dataset)
+   (gnt:uses_normalization_method rdfs:label "Averaging method")
+   (gnt:uses_normalization_method rdfs:range gnc:avg_method))
+ (gnt:uses_normalization_method a owl:ObjectProperty)
+  (triples (string->identifier "dataset" (field ProbeSetFreeze Name) #:separator "_")
+    (set rdf:type 'dcat:Dataset)
+    (set skos:prefLabel (field ProbeSetFreeze Name))
+    (set dct:title (normalize-string-field (field InfoFiles InfoPageName)))
+    (set rdfs:label (normalize-string-field (field InfoFiles InfoPageName)))
+    (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime))
+    (set gnt:uses_normalization_method
+         (string->identifier "avg_method" (field AvgMethod Name AvgMethodName) #:separator "_"))
+    (set gnt:has_strain
+         (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))
+    (set gnt:has_species
+         (string->identifier "" (remap-species-identifiers (field Species Fullname))))
+    (set gnt:has_molecular_trait
+         (string->identifier "trait" (field Tissue Short_Name) #:separator "_"))
+    (set gnt:uses_genechip
+         (string->identifier "platform" (field GeneChip Name) #:separator "_"))
+    (set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId)))
+    (set gnt:has_experiment_type
+         (let ((experiment-type
+                (field InfoFiles Experiment_Type)))
+           (if (or (null? experiment-type) (string-blank? experiment-type))
+               "" (sanitize-rdf-string experiment-type))))
+    (set gnt:has_tissue_info
+         (let ((tissue-info
+                (field InfoFiles About_Tissue)))
+           (if (or (null? tissue-info) (string-blank? tissue-info))
+               "" (sanitize-rdf-string tissue-info))))
+    (set gnt:has_summary
+         (let* ((summary
+                 (field InfoFiles Summary)))
+           (if (or (null? summary) (string-blank? summary))
+               "" (sanitize-rdf-string summary))))
+    (set gnt:has_citation
+         (let ((citation
+                (field InfoFiles Citation)))
+           (if (or (null? citation) (string-blank? citation))
+               "" (sanitize-rdf-string citation))))
+    (set gnt:has_samples
+         (let ((samples
+                (field InfoFiles samples)))
+           (if (or (null? samples) (string-blank? samples))
+               "" (sanitize-rdf-string samples))))
+    (set gnt:has_specifics
+         (let* ((specifics
+                 (field InfoFiles Specifics)))
+           (if (or (null? specifics) (string-blank? specifics))
+               "" (sanitize-rdf-string specifics))))
+    (set gnt:has_case_info
+         (let ((cases
+                (field InfoFiles About_Cases)))
+           (if (or (null? cases) (string-blank? cases))
+               "" (sanitize-rdf-string cases))))
+    (set gnt:has_platform_info
+         (let* ((platform
+                 (field InfoFiles About_Array_Platform)))
+           (if (or (null? platform) (string-blank? platform))
+               "" (sanitize-rdf-string platform))))
+    (set gnt:has_data_processing_info
+         (let* ((processing
+                 (field InfoFiles About_Data_Values_Processing)))
+           (if (or (null? processing) (string-blank? processing))
+               "" (sanitize-rdf-string processing))))
+    (set gnt:has_experiment_type
+         (let ((experiment-type
+                (field InfoFiles Experiment_Type)))
+           (if (or (null? experiment-type) (string-blank? experiment-type))
+               "" (sanitize-rdf-string experiment-type))))
+    (set gnt:has_experiment_design
+         (let ((experiment-design
+                (field InfoFiles Overall_Design)))
+           (if (or (null? experiment-design) (string-blank? experiment-design))
+               "" (sanitize-rdf-string experiment-design))))
+    (set gnt:has_contributors
+         (let ((contributors
+                (field InfoFiles Contributor)))
+           (if (or (null? contributors) (string-blank? contributors))
+               "" (sanitize-rdf-string contributors))))))
 
 
-
 (let* ((option-spec
         '((settings (single-char #\s) (value #t))
           (output (single-char #\o) (value #t))
@@ -79,13 +222,17 @@
         (call-with-input-file settings
           read)))
   (with-documentation
-   (name "Tissue Metadata")
+   (name "Molecular Traits")
    (connection %connection-settings)
    (table-metadata? #f)
    (prefixes
-    '(("gn:" "<http://rdf.genenetwork.org/v1/id/>")
+    '(("dcat:" "<http://www.w3.org/ns/dcat#>")
+      ("gn:" "<http://rdf.genenetwork.org/v1/id/>")
       ("obo:" "<http://purl.obolibrary.org/obo/>")
       ("owl:" "<http://www.w3.org/2002/07/owl#>")
+      ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
+      ("dct:" "<http://purl.org/dc/terms/>")
+      ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
       ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
       ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
       ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
@@ -93,10 +240,11 @@
       ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")))
    (inputs
     (list
-     tissues->gn:molecular-traits
-     gnc:molecular_trait->gn:molecular_trait
+     gn:dataset->metadata
+     gn:dataset->set/species/molecular_trait
+     gn:molecular-traits->gn:datasets
      gn:set->gn:dataset
-     gn:dataset->set/species/molecular_trait))
+     gnc:molecular_trait->gn:molecular_trait))
    (outputs
     `(#:documentation ,documentation
       #:rdf ,output))))