about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-03-14 16:12:16 +0300
committerBonfaceKilz2023-04-05 16:17:11 +0300
commitbe42b42be2a87a9872c153a9b6f3da7ed135efa1 (patch)
tree4dcae7195b4eaf41ecc4b8d3176423674b6ee0aa
parent9a8115504747f79254606a6f063d0f46ffbf13a0 (diff)
downloadgn-transform-databases-be42b42be2a87a9872c153a9b6f3da7ed135efa1.tar.gz
Dump GeneRIF wikidata
* dump.scm (dump-generif-basic): Annotate createTime field with xsd.
* dump.scm (dump-generif): New dump.

Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xdump.scm63
1 files changed, 40 insertions, 23 deletions
diff --git a/dump.scm b/dump.scm
index cfd603f..9650de5 100755
--- a/dump.scm
+++ b/dump.scm
@@ -813,8 +813,8 @@ is a <table> object."
    (gn:pubMedId rdfs:range rdfs:Literal)
    (gn:geneRIFOfSpecies rdfs:range gn:species)
    (gn:comment rdfs:range rdfs:Literal)
-   (gn:email rdfs:range rdfs:Literal)
    (gn:weburl rdfs:range rdfs:Literal)
+   (gn:createTime rdfs:range xsd:datetime)
    (gn:createTime rdfs:range rdfs:Literal)
    (gn:reason rdfs:range rdfs:Literal)
    (gn:geneRIFOFGenenetwork rdfs:range gn:geneRIF)
@@ -832,37 +832,44 @@ is a <table> object."
          (binomial-name->species-id
           (field Species FullName)))
     (set gn:comment
-         (replace-substrings
-          (field GeneRIF comment)
-          '(("\xa0" . " ")
-            ("â\x81„" . "/")
-            ("â€\x9d" . #\")
-            ("’" . #\')
-            ("\x02" . "")
-            ("\x01" . "")
-            ("β" . "β")
-            ("α-Â\xad" . "α")
-            ("Â\xad" . "")
-            ("α" . "α")
-            ("–" . "-"))))
-    (set gn:email (field GeneRIF email))
+         (format #f "(~a) (~a) ~a"
+                 (time-unix->string (field GeneRIF createtime) "~5")
+                 (field GeneRIF email)
+                 (replace-substrings
+                  (field GeneRIF comment)
+                  '(("\xa0" . " ")
+                    ("â\x81„" . "/")
+                    ("â€\x9d" . #\")
+                    ("’" . #\')
+                    ("\x02" . "")
+                    ("\x01" . "")
+                    ("β" . "β")
+                    ("α-Â\xad" . "α")
+                    ("Â\xad" . "")
+                    ("α" . "α")
+                    ("–" . "-")))))
+    (set gn:createTime
+         (annotate-field
+          (time-unix->string
+           (field GeneRIF createtime) "~5")
+          '^^xsd:datetime))
     (set gn:weburl (field GeneRIF weburl))
-    (set gn:createTime (field GeneRIF createtime))
     (set gn:reason (field GeneRIF reason))
     (set gn:initial (field GeneRIF initial))))
 
 ;; GeneRIF data from NCBI
 (define-dump dump-generif-basic
   (tables (GeneRIF_BASIC
-           (left-join Species "USING (SpeciesId)")))
+           (left-join Species "USING (SpeciesId)"))
+          "GROUP BY SpeciesId, symbol, GeneId, VersionId")
   (schema-triples
    (gn:taxId rdfs:range rdfs:Literal)
    (gn:geneId rdfs:range rdfs:Literal)
-   (gn:symbol rdfs:range rdfs:Literal)
    (gn:pubMedId rdfs:range rdfs:Literal)
+   (pubmed:pmid rdfs:range rdfs:Literal)
+   (gn:comment rdfs:range rdfs:Literal)
    (gn:symbol rdfs:range rdfs:Literal)
    (gn:geneRIFOfSpecies rdfs:range gn:species)
-   (gn:createTime rdfs:range rdfs:Literal)
    (gn:versionId rdfs:range rdfs:Literal))
   (triples
       (string->identifier
@@ -872,11 +879,19 @@ is a <table> object."
     (set gn:geneRIFOfSpecies
          (binomial-name->species-id
           (field Species FullName)))
-    (set gn:taxId (field GeneRIF_BASIC TaxID))
-    (set gn:geneId (field GeneRIF_BASIC GeneId))
+    (set gn:taxId (ontology 'taxon: (field GeneRIF_BASIC TaxID)))
+    (set gn:geneId (ontology 'generif: (field GeneRIF_BASIC GeneId)))
     (set gn:symbol (field GeneRIF_BASIC symbol))
-    (set gn:pubMedId (field GeneRIF_BASIC PubMed_ID))
-    (set gn:createTime (field GeneRIF_BASIC createtime))
+    (set gn:comment (field GeneRIF_BASIC comment))
+    (multiset gn:pubMedId
+              (map (compose
+                    (cut ontology 'pubmed: <>)
+                    string-trim)
+                   (string-split (field GeneRIF_BASIC
+                                        PubMed_ID
+                                        GROUP_CONCAT
+                                        PubMedID)
+                                 #\,)))
     (set gn:versionId (field GeneRIF_BASIC VersionId))))
 
 
@@ -956,6 +971,8 @@ is a <table> object."
        (dump-investigators db)
        (dump-avg-method db)
        (dump-gene-chip db)
+       (dump-generif-basic db)
+       (dump-generif db)
        (dump-info-files db)
        (dump-schema db)
        (dump-groups db)