about summary refs log tree commit diff
path: root/examples/phenotype.scm
diff options
context:
space:
mode:
Diffstat (limited to 'examples/phenotype.scm')
-rwxr-xr-xexamples/phenotype.scm187
1 files changed, 101 insertions, 86 deletions
diff --git a/examples/phenotype.scm b/examples/phenotype.scm
index aa1e9c5..37bbd59 100755
--- a/examples/phenotype.scm
+++ b/examples/phenotype.scm
@@ -14,100 +14,112 @@
              (transform special-forms))
 
 
-(define-transformer phenotypes
-  (tables (PublishXRef
-           (left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId")
-           (left-join Publication "ON Publication.Id = PublishXRef.PublicationId")
-           (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId")))
-  (schema-triples
-   (gnt:traitId a owl:ObjectProperty)
-   (gnt:traitId rdfs:domain gnc:Phenotype)
-   (gnt:traitId skos:definition "This is the unique trait id assigned from GeneNetwork")
-   (gnt:abbreviation a owl:ObjectProperty)
-   (gnt:abbreviation rdfs:domain gnc:Phenotype)
-   (gnt:abbreviation skos:definition "The abbreviation used for this resource")
-   (gnt:labCode a owl:ObjectProperty)
-   (gnt:labCode rdfs:domain gnc:Phenotype)
-   (gnt:submitter a owl:ObjectProperty)
-   (gnt:submitter rdfs:domain gnc:Phenotype)
-   (gnt:submitter skos:definition "A person who submitted this resource to GN")
-   (gnt:mean a rdf:Property)
-   (gnt:mean a qb:MeasureProperty)
-   (gnt:mean rdfs:subPropertyOf sdmx-measure:obsValue)
-   (gnt:mean rdfs:domain gnc:Phenotype)
-   (gnt:mean rdfs:range xsd:double)
-   (gnt:lodScore a rdf:Property)
-   (gnt:lodScore a qb:MeasureProperty)
-   (gnt:lodScore rdfs:subPropertyOf sdmx-measure:obsValue)
-   (gnt:lodScore rdfs:domain gnc:Phenotype)
-   (gnt:lodScore rdfs:range xsd:double)
-   (gnt:lodScore rdfs:label "Peak -logP")
-   (gnt:lodScore skos:definition "Statistical measurement assessing the likelihood of genetic linkage between traits or genetic markers.")
-   (gnt:locus a rdf:Property)
-   (gnt:locus a qb:MeasureProperty)
-   (gnt:locus rdfs:subPropertyOf sdmx-measure:obsValue)
-   (gnt:locus rdfs:domain gnc:Phenotype)
-   (gnt:locus rdfs:range rdfs:Literal)
-   (gnt:additive rdfs:domain gnc:Phenotype)
-   (gnt:additive rdfs:range xsd:double)
-   (gnt:sequence rdfs:domain gnc:Phenotype)
-   (gnt:sequence rdfs:range xsd:integer))
-  (triples (string->identifier
-            "trait"
-            (field ("CONCAT(IFNULL(InbredSet.InbredSetCode, PublishXRef.InbredSetId), '_', PublishXRef.Id)"
-                    Phenotype)))
-    (set rdf:type 'gnc:Phenotype)
-    (set gnt:belongsToGroup
-         (string->identifier
-          "set" (field InbredSet Name InbredSetName)
-          #:separator ""
-          #:proc string-capitalize-first))
-    ;; This is the trait's name
-    (set gnt:traitId
-         (let ((trait-id (field PublishXRef Id)))
-           (if (number? trait-id)
-               (number->string trait-id)
-               trait-id)))
-    (set skos:altLabel
-         (field ("CONCAT(IFNULL(InbredSet.InbredSetCode, PublishXRef.InbredSetId), '_', PublishXRef.Id)"
-                 Phenotype)))
+
+
+
+
+
+
+
+(define-transformer gnc:phenotype->gn:phenotype
+  (tables (Phenotype))
+  (triples "gnc:phenotype"
+    (set skos:member
+         (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation)))
+               (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation)))
+               (post-desc (blank-p (field Phenotype Post_publication_description)))
+               (pre-desc (blank-p (field Phenotype Post_publication_description))))
+           (string->identifier
+            "phenotype"
+            (or post-abbrev pre-abbrev post-desc pre-desc)
+            #:separator "_")))))
+
+(define-transformer gn:phenotype->metadata
+  (tables (Phenotype))
+  (triples (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation)))
+                 (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation)))
+                 (post-desc (blank-p (field Phenotype Post_publication_description)))
+                 (pre-desc (blank-p (field Phenotype Post_publication_description))))
+             (string->identifier
+              "phenotype"
+              (or post-abbrev pre-abbrev post-desc pre-desc)
+              #:separator "_"))
+    (set rdf:type 'gnc:phenotype)
     ;; All phenotypes have a post-publication description
     (set dct:description
          (sanitize-rdf-string
           (field Phenotype Post_publication_description)))
     ;; All phenotypes have a post-publication abbreviation
-    (set gnt:abbreviation (field Phenotype Post_publication_abbreviation))
-    (set gnt:labCode (field Phenotype Lab_code))
+    (set gnt:abbreviation (sanitize-rdf-string (field Phenotype Post_publication_abbreviation)))
+    (set gnt:has_lab_code (field Phenotype Lab_code))
     (set gnt:submitter
          (sanitize-rdf-string (field Phenotype Submitter)))
     (set dct:contributor (sanitize-rdf-string (field Phenotype Owner)))
-    (set gnt:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean))
-                                  '^^xsd:double))
-    (set gnt:locus
-         (string->identifier
-          ""
-          (regexp-substitute/global
-           #f "[^A-Za-z0-9:]"
-           (sanitize-rdf-string (field PublishXRef Locus))
-           'pre "_" 'post)
-          #:separator ""
-          #:proc string-capitalize-first))
-    (set gnt:lodScore (annotate-field
-                  (field ("IFNULL((PublishXRef.LRS/4.604), '')" lrs))
-                  '^^xsd:double))
-    (set gnt:additive
-         (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive))
-                         '^^xsd:double))
-    (set gnt:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:integer))
-    (set dct:isReferencedBy
+    (set skos:member
+         (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation)))
+               (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation)))
+               (post-desc (blank-p (field Phenotype Post_publication_description)))
+               (pre-desc (blank-p (field Phenotype Post_publication_description))))
+           (string->identifier
+            "phenotype"
+            (or post-abbrev pre-abbrev post-desc pre-desc)
+            #:separator "_")))))
+
+(define-transformer gn:trait->gn:phenotype
+  (tables (PublishXRef
+           (left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId")
+           (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id")
+           (left-join Publication "ON Publication.Id = PublishXRef.PublicationId")
+           (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId"))
+          "WHERE InbredSet.public > 0")
+  (triples (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation)))
+                 (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation)))
+                 (post-desc (blank-p (field Phenotype Post_publication_description)))
+                 (pre-desc (blank-p (field Phenotype Post_publication_description))))
+             (string->identifier
+              "trait"
+              (format #f "~a_~a" (field PublishFreeze Name)
+                      (or post-abbrev pre-abbrev post-desc pre-desc))
+              #:separator "_"))
+    (set rdf:type 'gnc:phenotype_trait)
+    (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))
+    (set owl:equivalentClass
+         (field ("CONCAT(PublishFreeze.Name, '_', PublishXRef.Id)"
+                 PublishFreeze)))
+    (set dcat:distribution
+         (string->symbol
+          (format #f "gnd:~a"
+                  (field ("CONCAT(PublishFreeze.Name, '_', PublishXRef.Id)"
+                          PublishFreeze)))) )
+    (set dct:references
          (let ((pmid (field
                       ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))"
                        pmid)))
-               (publication-id (field Publication Id PublicationId)))
+               (publication-id (field Publication Id)))
            (if (string-null? pmid)
                (string->identifier "unpublished"
                                    (number->string publication-id))
-               (ontology 'pubmed: pmid))))))
+               (ontology 'pubmed: pmid))))
+    (set gnt:has_phenotype
+         (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation)))
+               (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation)))
+               (post-desc (blank-p (field Phenotype Post_publication_description)))
+               (pre-desc (blank-p (field Phenotype Post_publication_description))))
+           (string->identifier
+            "phenotype"
+            (or post-abbrev pre-abbrev post-desc pre-desc)
+            #:separator "_")))
+    (set gnt:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean))
+                                  '^^xsd:double))
+    (set gnt:locus (sanitize-rdf-string (field PublishXRef Locus)))
+    (set gnt:lod_score (annotate-field
+                        (field ("IFNULL((PublishXRef.LRS/4.604), '')" lrs))
+                        '^^xsd:double))
+    (set gnt:additive
+         (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive))
+                         '^^xsd:double))
+    (set gnt:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:integer))
+    (set rdfs:comment (sanitize-rdf-string (field PublishXRef comments)))))
 
 
 
@@ -127,11 +139,13 @@
    (connection %connection-settings)
    (table-metadata? #f)
    (prefixes
-    '(("dct:" "<http://purl.org/dc/terms/>")
-      ("gn:" "<http://genenetwork.org/id/>")
+    '(("dcat:" "<http://www.w3.org/ns/dcat#>")
+      ("dct:" "<http://purl.org/dc/terms/>")
+      ("gn:" "<http://rdf.genenetwork.org/v1/id/>")
       ("owl:" "<http://www.w3.org/2002/07/owl#>")
-      ("gnc:" "<http://genenetwork.org/category/>")
-      ("gnt:" "<http://genenetwork.org/term/>")
+      ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
+      ("gnd:" "<https://cd.genenetwork.org/api3/lmdb/v1/data/traits/>")
+      ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
       ("sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>")
       ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
       ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
@@ -141,8 +155,9 @@
       ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
       ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")))
    (inputs
-    (list
-     phenotypes))
+    (list gnc:phenotype->gn:phenotype
+          gn:phenotype->metadata
+          gn:trait->gn:phenotype))
    (outputs
     `(#:documentation ,documentation
       #:rdf ,output))))