about summary refs log tree commit diff
path: root/examples/phenotype.scm
diff options
context:
space:
mode:
Diffstat (limited to 'examples/phenotype.scm')
-rwxr-xr-xexamples/phenotype.scm105
1 files changed, 66 insertions, 39 deletions
diff --git a/examples/phenotype.scm b/examples/phenotype.scm
index af47b51..3e69607 100755
--- a/examples/phenotype.scm
+++ b/examples/phenotype.scm
@@ -61,43 +61,76 @@
             (or post-abbrev pre-abbrev post-desc pre-desc)
             #:separator "_")))))
 
-(define-transformer phenotypes
+(define-transformer gnc:set->gn:trait
   (tables (PublishXRef
            (left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId")
            (left-join Publication "ON Publication.Id = PublishXRef.PublicationId")
-           (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId")))
-  (triples (string->identifier
+           (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId"))
+          "WHERE InbredSet.public > 0")
+  (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
+    (set gnt:has_phenotype_trait
+         (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation)))
+               (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation)))
+               (post-desc (blank-p (field Phenotype Post_publication_description)))
+               (pre-desc (blank-p (field Phenotype Post_publication_description))))
+           (string->identifier
             "trait"
-            (field ("CONCAT(IFNULL(InbredSet.InbredSetCode, PublishXRef.InbredSetId), '_', PublishXRef.Id)"
-                    Phenotype))
-            #:separator "_")
-    (set rdf:type 'gnc:phenotype)
-    (set gnt:has_strain
+            (format #f "~a_~a" (field InbredSet Name InbredSetName)
+                    (or post-abbrev pre-abbrev post-desc pre-desc))
+            #:separator "_")))))
+
+(define-transformer gn:trait->gn:phenotype
+  (tables (PublishXRef
+           (left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId")
+           (left-join Publication "ON Publication.Id = PublishXRef.PublicationId")
+           (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId"))
+          "WHERE InbredSet.public > 0")
+  (triples (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation)))
+                 (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation)))
+                 (post-desc (blank-p (field Phenotype Post_publication_description)))
+                 (pre-desc (blank-p (field Phenotype Post_publication_description))))
+             (string->identifier
+              "trait"
+              (format #f "~a_~a" (field InbredSet Name InbredSetName)
+                      (or post-abbrev pre-abbrev post-desc pre-desc))
+              #:separator "_"))
+    (set rdf:type 'gnc:phenotype_trait)
+    (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))
+    (set owl:equivalentClass
          (string->identifier
-          "set" (field InbredSet Name InbredSetName)
+          "trait"
+          (field ("CONCAT(IFNULL(InbredSet.InbredSetCode, PublishXRef.InbredSetId), '_', PublishXRef.Id)"
+                  Phenotype))
           #:separator "_"))
-    ;; This is the trait's name
-    (set gnt:trait_id
-         (let ((trait-id (field PublishXRef Id)))
-           (if (number? trait-id)
-               (number->string trait-id)
-               trait-id)))
-    (set skos:altLabel
-         (field ("CONCAT(IFNULL(InbredSet.InbredSetCode, PublishXRef.InbredSetId), '_', PublishXRef.Id)"
-                 Phenotype)))
-    ;; All phenotypes have a post-publication description
-    (set dct:description
-         (sanitize-rdf-string
-          (field Phenotype Post_publication_description)))
-    ;; All phenotypes have a post-publication abbreviation
-    (set gnt:abbreviation (field Phenotype Post_publication_abbreviation))
-    (set gnt:labCode (field Phenotype Lab_code))
-    (set gnt:submitter
-         (sanitize-rdf-string (field Phenotype Submitter)))
-    (set dct:contributor (sanitize-rdf-string (field Phenotype Owner)))
+    (set dct:references
+         (let ((pmid (field
+                      ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))"
+                       pmid)))
+               (publication-id (field Publication Id)))
+           (if (string-null? pmid)
+               (string->identifier "unpublished"
+                                   (number->string publication-id))
+               (ontology 'pubmed: pmid))))
+    (set gnt:has_phenotype
+         (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation)))
+               (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation)))
+               (post-desc (blank-p (field Phenotype Post_publication_description)))
+               (pre-desc (blank-p (field Phenotype Post_publication_description))))
+           (string->identifier
+            "phenotype"
+            (or post-abbrev pre-abbrev post-desc pre-desc)
+            #:separator "_")))
     (set gnt:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean))
                                   '^^xsd:double))
-    (set gnt:locus (sanitize-rdf-string (field PublishXRef Locus)))
+    (set gnt:locus
+         (string->identifier
+          ""
+          (regexp-substitute/global
+           #f "[^A-Za-z0-9:]"
+           (sanitize-rdf-string (field PublishXRef Locus))
+           'pre "_" 'post)
+          #:separator ""
+          #:proc string-capitalize-first))
     (set gnt:lod_score (annotate-field
                         (field ("IFNULL((PublishXRef.LRS/4.604), '')" lrs))
                         '^^xsd:double))
@@ -105,15 +138,7 @@
          (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive))
                          '^^xsd:double))
     (set gnt:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:integer))
-    (set dct:isReferencedBy
-         (let ((pmid (field
-                      ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))"
-                       pmid)))
-               (publication-id (field Publication Id PublicationId)))
-           (if (string-null? pmid)
-               (string->identifier "unpublished"
-                                   (number->string publication-id))
-               (ontology 'pubmed: pmid))))))
+    (set rdfs:comment (sanitize-rdf-string (field PublishXRef comments)))))
 
 
 
@@ -149,7 +174,9 @@
    (inputs
     (list
      gnc:phenotype->gn:phenotype
-     gn:phenotype->metadata))
+     gn:phenotype->metadata
+     gnc:set->gn:trait
+     gn:trait->gn:phenotype))
    (outputs
     `(#:documentation ,documentation
       #:rdf ,output))))