diff options
author | Munyoki Kilyungi | 2023-03-29 00:01:31 +0300 |
---|---|---|
committer | BonfaceKilz | 2023-04-05 16:17:11 +0300 |
commit | 2219c47dc1bb9a6da5eccc89aee173d81d1e0038 (patch) | |
tree | 3f161185b4bcc8b6318a0af49d886738cdf2c54a | |
parent | 98865301aebfe704dbaa4cb0790972c019c798ca (diff) | |
download | gn-transform-databases-2219c47dc1bb9a6da5eccc89aee173d81d1e0038.tar.gz |
Refactor dump-generif-basic/dump-generif to dump-gn-genewiki-entries
* dump.scm (dump-generif, dump-generif-basic): Refactor to ...
(dump-gn-genewiki-entries): ... this.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-x | dump.scm | 67 |
1 files changed, 41 insertions, 26 deletions
@@ -865,16 +865,18 @@ is a <table> object." (left-join GeneRIF "USING (symbol)") (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id") (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")) - "WHERE GeneRIF.display > 0 and GeneRIF.VersionId = 0 AND GeneRIF.Id != 2322 GROUP BY GeneRIF.symbol") + "WHERE GeneRIF.display > 0 and GeneRIF.VersionId = 0 GROUP BY GeneRIF.symbol") (schema-triples + (gn:geneWikiEntry rdfs:domain gn:geneWiki) (gn:geneWikiEntryOfGN rdfs:domain gn:geneWiki) + (gn:geneWikiEntryofNCBI rdfs:domain gn:geneWiki) (gn:weburl rdfs:domain gn:geneWiki) (gn:versionId rdfs:domain gn:geneWiki) (gn:category rdfs:domain gn:geneWiki) (gn:pubMedId rdfs:domain rdfs:Literal) (gn:createTime rdfs:range xsd:datetime)) (triples (ontology 'generif: - (field GeneRIF_BASIC GeneId)) + (field GeneRIF_BASIC GeneId)) (set rdf:type 'gn:geneWikiEntry) (set gn:symbol (field GeneRIF symbol)) (multiset gn:geneWikiEntryOfGn @@ -893,33 +895,49 @@ is a <table> object." ("α-Â\xad" . "α") ("Â\xad" . "") ("α" . "α") - ("–" . "-"))) - ] + ("–" . "-")))] [comments (string-split-substring entries ";;;;;")]) (map (match-lambda - ;; annotate pubmed id properly ((genecategory pmid email text createtime weburl) (blank-node - (gn:category genecategory) - (multiset - gn:pubMedId - (string-split - (ontology 'pubmed: pmid) - #\space)) - ;; TODO: Truncate mail to '@' - (gn:email email) - (gn:comment - (annotate-field text '^^xsd:string)) - (gn:createTime (annotate-field - createtime - ;; (time-unix->string - ;; createtime) - '^^xsd:datetime)) - (gn:weburl weburl)))) + (set gn:category genecategory) + (multiset gn:pubMedId + (map (cut ontology 'pubmed: <>) + (string-split pmid #\space))) + (set gn:author (regexp-substitute/global #f "@.*$" + email + 'pre + "" + 'post)) + (set gn:geneWikiEntry + (annotate-field text '^^xsd:string)) + (set gn:createTime (annotate-field + createtime + '^^xsd:datetime)) + (set gn:weburl weburl)))) + (map + (cut string-split-substring <> "::::") + comments)))) + (multiset gn:geneWikiEntryOfNCBI + (let* ([entries (field + ("GROUP_CONCAT(DISTINCT CONCAT_WS('::::', IFNULL(GeneRIF_BASIC.PubMed_ID, ''), IFNULL(GeneRIF_BASIC.comment, '')) SEPARATOR'|||||')" + ncbientry)) + ] + [ncbi-comments (string-split-substring entries "|||||")]) + (map + (match-lambda + ((pmid text) + (blank-node + (set gn:geneWikiEntry (annotate-field text '^^xsd:string)) + (set gn:pubMedId (ontology 'pubmed: pmid)))) + (_ (display (string-split-substring ncbi-comments "::::")) + (error "error"))) (map (cut string-split-substring <> "::::") - comments)))))) + ncbi-comments)))) + )) + ;; Import GeneRIF @@ -988,6 +1006,7 @@ is a <table> object." (prefix "xsd:" "<http://www.w3.org/2001/XMLSchema#>") (prefix "owl:" "<http://www.w3.org/2002/07/owl#>") (newline) + (dump-gn-genewiki-entries db) (dump-species db) (dump-strain db) (dump-mapping-method db) @@ -998,13 +1017,9 @@ is a <table> object." (dump-investigators db) (dump-avg-method db) (dump-gene-chip db) - (dump-generif-basic db) - (dump-generif db) (dump-info-files db) (dump-schema db) (dump-groups db) (dump-published-phenotypes db) - (dump-generif db) - (dump-generif-basic db) (import-generif (assq-ref %connection-settings 'generif-data-file)))))) |