about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-03-07 12:02:22 +0300
committerBonfaceKilz2023-03-08 14:27:39 +0300
commitd31b9f5dac6d35b3a60eafbed4911446e8f89c61 (patch)
tree5719ec6f6fa433ee1c9ce2447130a5f9e2b83963
parentf3aff608272bc099f862bc4f1156ae039024a723 (diff)
downloadgn-transform-databases-d31b9f5dac6d35b3a60eafbed4911446e8f89c61.tar.gz
Dump GeneWiki metadata
* dump.scm (dump-generif): New data dump.

Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xdump.scm83
1 files changed, 82 insertions, 1 deletions
diff --git a/dump.scm b/dump.scm
index 9ce78c8..784dd15 100755
--- a/dump.scm
+++ b/dump.scm
@@ -799,6 +799,84 @@ is a <table> object."
     (set gn:binomialName (field InbredSet fullName))
     (set gn:species (field Species Name))))
 
+(define-dump dump-generif
+  (tables (GeneRIF
+	   (left-join Species "USING (SpeciesId)")
+	   (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id")
+	   (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id"))
+          "WHERE GeneRIF.display > 0")
+  (schema-triples
+   (gn:versionId rdfs:range rdfs:Literal)
+   (gn:symbol rdfs:range rdfs:Literal)
+   (gn:pubMedId rdfs:range rdfs:Literal)
+   (gn:geneRIFOfSpecies rdfs:range gn:species)
+   (gn:comment rdfs:range rdfs:Literal)
+   (gn:email rdfs:range rdfs:Literal)
+   (gn:weburl rdfs:range rdfs:Literal)
+   (gn:createTime rdfs:range rdfs:Literal)
+   (gn:reason rdfs:range rdfs:Literal)
+   (gn:geneRIFOFGenenetwork rdfs:range gn:geneRIF)
+   (gn:geneCategory rdfs:range gn:geneRIF)
+   (gn:initial rdfs:range rdfs:Literal))
+  (triples (string->identifier
+            "geneRIF"
+            (number->string (field GeneRIF Id)))
+    (set rdf:type 'gn:geneRIFOfGenenetwork)
+    (set gn:versionId (field GeneRIF versionId))
+    (set gn:symbol (field GeneRIF symbol))
+    (set gn:geneCategory (field GeneCategory Name))
+    (set gn:pubMedId (field GeneRIF PubMed_ID))
+    (set gn:geneRIFOfSpecies
+         (binomial-name->species-id
+          (field Species FullName)))
+    (set gn:comment
+         (replace-substrings
+          (field GeneRIF comment)
+          '(("\xa0" . " ")
+            ("â\x81„" . "/")
+            ("â€\x9d" . #\")
+            ("’" . #\')
+            ("\x02" . "")
+            ("\x01" . "")
+            ("β" . "β")
+            ("α-Â\xad" . "α")
+            ("Â\xad" . "")
+            ("α" . "α")
+            ("–" . "-"))))
+    (set gn:email (field GeneRIF email))
+    (set gn:weburl (field GeneRIF weburl))
+    (set gn:createTime (field GeneRIF createtime))
+    (set gn:reason (field GeneRIF reason))
+    (set gn:initial (field GeneRIF initial))))
+
+;; GeneRIF data from NCBI
+(define-dump dump-generif-basic
+  (tables (GeneRIF_BASIC
+           (left-join Species "USING (SpeciesId)")))
+  (schema-triples
+   (gn:taxId rdfs:range rdfs:Literal)
+   (gn:geneId rdfs:range rdfs:Literal)
+   (gn:symbol rdfs:range rdfs:Literal)
+   (gn:pubMedId rdfs:range rdfs:Literal)
+   (gn:symbol rdfs:range rdfs:Literal)
+   (gn:geneRIFOfSpecies rdfs:range gn:species)
+   (gn:createTime rdfs:range rdfs:Literal)
+   (gn:versionId rdfs:range rdfs:Literal))
+  (triples
+      (string->identifier
+       "geneRIF"
+       (number->string (field GeneRIF_BASIC GeneId)))
+    (set rdf:type 'gn:geneRIFOfNcbi)
+    (set gn:geneRIFOfSpecies
+         (binomial-name->species-id
+          (field Species FullName)))
+    (set gn:taxId (field GeneRIF_BASIC TaxID))
+    (set gn:geneId (field GeneRIF_BASIC GeneId))
+    (set gn:symbol (field GeneRIF_BASIC symbol))
+    (set gn:pubMedId (field GeneRIF_BASIC PubMed_ID))
+    (set gn:createTime (field GeneRIF_BASIC createtime))
+    (set gn:versionId (field GeneRIF_BASIC VersionId))))
+
 
 ;; Import GeneRIF
 
@@ -876,5 +954,8 @@ is a <table> object."
        (dump-info-files db)
        (dump-schema db)
        (dump-groups db)
-       (import-generif (assq-ref %connection-settings 'generif-data-file))))))
        (dump-published-phenotypes db)
+       (dump-generif db)
+       (dump-generif-basic db)
+       (import-generif (assq-ref %connection-settings 'generif-data-file))))))
+