From 2219c47dc1bb9a6da5eccc89aee173d81d1e0038 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 29 Mar 2023 00:01:31 +0300 Subject: Refactor dump-generif-basic/dump-generif to dump-gn-genewiki-entries * dump.scm (dump-generif, dump-generif-basic): Refactor to ... (dump-gn-genewiki-entries): ... this. Signed-off-by: Munyoki Kilyungi --- dump.scm | 67 +++++++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/dump.scm b/dump.scm index 1a1d900..3a827d8 100755 --- a/dump.scm +++ b/dump.scm @@ -865,16 +865,18 @@ is a object." (left-join GeneRIF "USING (symbol)") (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id") (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")) - "WHERE GeneRIF.display > 0 and GeneRIF.VersionId = 0 AND GeneRIF.Id != 2322 GROUP BY GeneRIF.symbol") + "WHERE GeneRIF.display > 0 and GeneRIF.VersionId = 0 GROUP BY GeneRIF.symbol") (schema-triples + (gn:geneWikiEntry rdfs:domain gn:geneWiki) (gn:geneWikiEntryOfGN rdfs:domain gn:geneWiki) + (gn:geneWikiEntryofNCBI rdfs:domain gn:geneWiki) (gn:weburl rdfs:domain gn:geneWiki) (gn:versionId rdfs:domain gn:geneWiki) (gn:category rdfs:domain gn:geneWiki) (gn:pubMedId rdfs:domain rdfs:Literal) (gn:createTime rdfs:range xsd:datetime)) (triples (ontology 'generif: - (field GeneRIF_BASIC GeneId)) + (field GeneRIF_BASIC GeneId)) (set rdf:type 'gn:geneWikiEntry) (set gn:symbol (field GeneRIF symbol)) (multiset gn:geneWikiEntryOfGn @@ -893,33 +895,49 @@ is a
object." ("α-Â\xad" . "α") ("Â\xad" . "") ("α" . "α") - ("–" . "-"))) - ] + ("–" . "-")))] [comments (string-split-substring entries ";;;;;")]) (map (match-lambda - ;; annotate pubmed id properly ((genecategory pmid email text createtime weburl) (blank-node - (gn:category genecategory) - (multiset - gn:pubMedId - (string-split - (ontology 'pubmed: pmid) - #\space)) - ;; TODO: Truncate mail to '@' - (gn:email email) - (gn:comment - (annotate-field text '^^xsd:string)) - (gn:createTime (annotate-field - createtime - ;; (time-unix->string - ;; createtime) - '^^xsd:datetime)) - (gn:weburl weburl)))) + (set gn:category genecategory) + (multiset gn:pubMedId + (map (cut ontology 'pubmed: <>) + (string-split pmid #\space))) + (set gn:author (regexp-substitute/global #f "@.*$" + email + 'pre + "" + 'post)) + (set gn:geneWikiEntry + (annotate-field text '^^xsd:string)) + (set gn:createTime (annotate-field + createtime + '^^xsd:datetime)) + (set gn:weburl weburl)))) + (map + (cut string-split-substring <> "::::") + comments)))) + (multiset gn:geneWikiEntryOfNCBI + (let* ([entries (field + ("GROUP_CONCAT(DISTINCT CONCAT_WS('::::', IFNULL(GeneRIF_BASIC.PubMed_ID, ''), IFNULL(GeneRIF_BASIC.comment, '')) SEPARATOR'|||||')" + ncbientry)) + ] + [ncbi-comments (string-split-substring entries "|||||")]) + (map + (match-lambda + ((pmid text) + (blank-node + (set gn:geneWikiEntry (annotate-field text '^^xsd:string)) + (set gn:pubMedId (ontology 'pubmed: pmid)))) + (_ (display (string-split-substring ncbi-comments "::::")) + (error "error"))) (map (cut string-split-substring <> "::::") - comments)))))) + ncbi-comments)))) + )) + ;; Import GeneRIF @@ -988,6 +1006,7 @@ is a
object." (prefix "xsd:" "") (prefix "owl:" "") (newline) + (dump-gn-genewiki-entries db) (dump-species db) (dump-strain db) (dump-mapping-method db) @@ -998,13 +1017,9 @@ is a
object." (dump-investigators db) (dump-avg-method db) (dump-gene-chip db) - (dump-generif-basic db) - (dump-generif db) (dump-info-files db) (dump-schema db) (dump-groups db) (dump-published-phenotypes db) - (dump-generif db) - (dump-generif-basic db) (import-generif (assq-ref %connection-settings 'generif-data-file)))))) -- cgit v1.2.3