about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-03-06 17:34:27 +0300
committerBonfaceKilz2023-03-06 22:30:50 +0300
commitf3aff608272bc099f862bc4f1156ae039024a723 (patch)
tree4d8c6fc0a1cb265946dbabc3615311d11cd3f98d
parentad03253d9399040e71f1696f9c35c26509e6c53e (diff)
downloadgn-transform-databases-f3aff608272bc099f862bc4f1156ae039024a723.tar.gz
Dump relevant metadata about phenotypes
* dump.scm (dump-publishfreeze, dump-published-phenotypes): New dumps.

Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xdump.scm86
1 files changed, 86 insertions, 0 deletions
diff --git a/dump.scm b/dump.scm
index aabb9f7..9ce78c8 100755
--- a/dump.scm
+++ b/dump.scm
@@ -385,7 +385,91 @@ must be remedied."
     (set gn:inbredSetOfSpecies
          (binomial-name->species-id (field Species FullName BinomialName)))))
 
+;; Metadata for published datasets
+(define-dump dump-publishfreeze
+  (tables (PublishFreeze
+           (left-join InbredSet "USING (InbredSetId)")))
   (schema-triples
+   (gn:datasetOfInbredSet rdfs:range gn:inbredSet)
+   (gn:name rdfs:range rdfs:Literal)
+   (gn:fullName rdfs:range rdfs:Literal)
+   (gn:shortName rdfs:range rdfs:Literal)
+   (gn:createTime rdfs:range rdfs:Literal))
+  (triples (string->identifier "dataset" (field PublishFreeze Name))
+    (set rdf:type 'gn:dataset)
+    (set gn:name (field PublishFreeze Name))
+    (set gn:fullName (field PublishFreeze FullName))
+    (set gn:shortName (field PublishFreeze ShortName))
+    (set gn:createTime (field PublishFreeze CreateTime))
+    (set gn:datasetOfInbredSet
+         (inbred-set-name->id (field InbredSet Name InbredSetName)))))
+
+;; Phenotype metadata
+(define-dump dump-published-phenotypes
+  (tables (PublishXRef
+           (inner-join
+            Phenotype
+            "ON PublishXRef.PhenotypeId = Phenotype.Id")
+           (inner-join
+            Publication
+            "ON PublishXRef.PublicationId =
+Publication.Id")
+           (inner-join PublishFreeze "USING (InbredSetId)"))
+          "WHERE PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1")
+  (schema-triples
+   (gn:prePublicationDescription rdfs:range rdfs:Literal)
+   (gn:postPublicationDescription rdfs:range rdfs:Literal)
+   (gn:originalDescription rdfs:range rdfs:Literal)
+   (gn:units rdfs:range rdfs:Literal)
+   (gn:prePublicationAbbreviation rdfs:range rdfs:Literal)
+   (gn:postPublicationAbbreviation rdfs:range rdfs:Literal)
+   (gn:labCode rdfs:range rdfs:Literal)
+   (gn:submitter rdfs:range rdfs:Literal)
+   (gn:owner rdfs:range rdfs:Literal)
+   (gn:phenotypeOfDataset rdfs:range gn:dataset)
+   (gn:pubMedId rdfs:range rdfs:Literal)
+   (gn:publicationId rdfs:range gn:publication)
+   (gn:mean rdfs:range rdfs:Literal)
+   (gn:locus rdfs:range rdfs:Literal)
+   (gn:lrs rdfs:range rdfs:Literal)
+   (gn:additive rdfs:range rdfs:Literal)
+   (gn:sequence rdfs:range rdfs:Literal)
+   (gn:comments rdfs:range rdfs:Literal))
+  ;; In GN, a given trait is identified by the id of the PublishXRef!
+  (triples (string->identifier "phenotype"
+                               (number->string
+                                (field PublishXRef Id)))
+    (set rdf:type 'gn:phenotype)
+    (set rdf:type 'gn:phenotype)
+    (set gn:prePublicationDescription (field Phenotype Pre_publication_description))
+    (set gn:prePublicationDescription (field Phenotype Pre_publication_description))
+    (set gn:postPublicationDescription (field Phenotype Post_publication_description))
+    (set gn:originalDescription (field Phenotype Original_description))
+    (set gn:units (field Phenotype Units))
+    (set gn:prePublicationAbbreviation (field Phenotype Pre_publication_description))
+    (set gn:postPublicationAbbreviation (field Phenotype Post_publication_abbreviation))
+    (set gn:labCode (field Phenotype Lab_code))
+    (multiset gn:submitter
+              (map string-trim (string-split
+                                (field Phenotype Submitter) #\,)))
+    (multiset gn:owner
+              (map string-trim (string-split
+                                (field Phenotype Owner) #\,)))
+    (set gn:pubMedId (field Publication PubMed_ID))
+    (set gn:publicationId
+         (string->identifier
+          "publication"
+          (number->string (field Publication Id))))
+    (set gn:mean (field PublishXRef mean))
+    (set gn:locus (field PublishXRef Locus))
+    (set gn:lrs (field PublishXRef LRS))
+    (set gn:additive (field PublishXRef additive))
+    (set gn:sequence (field PublishXRef Sequence))
+    (set gn:comments (field PublishXRef comments))
+    (set gn:phenotypeOfDataset
+         (string->identifier "dataset"
+                             (field PublishFreeze Name)))))
+
 
 (define-dump dump-publication
   (tables (Publication))
@@ -783,6 +867,7 @@ is a <table> object."
        (dump-strain db)
        (dump-mapping-method db)
        (dump-inbred-set db)
+       (dump-publishfreeze db)
        (dump-publication db)
        (dump-tissue db)
        (dump-investigators db)
@@ -792,3 +877,4 @@ is a <table> object."
        (dump-schema db)
        (dump-groups db)
        (import-generif (assq-ref %connection-settings 'generif-data-file))))))
+       (dump-published-phenotypes db)