aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-03-29 00:03:30 +0300
committerBonfaceKilz2023-04-05 16:17:11 +0300
commit33fa6f87c4739bc2225dff861f07156faa099c6b (patch)
treea6d8db9863e64d3ba1f1a9cba30f1abbbd11c84d
parent2219c47dc1bb9a6da5eccc89aee173d81d1e0038 (diff)
downloadgn-transform-databases-33fa6f87c4739bc2225dff861f07156faa099c6b.tar.gz
Delete generif file dump
* dump.scm (decode-html-entities, import-generif): Delete. Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xdump.scm54
1 files changed, 1 insertions, 53 deletions
diff --git a/dump.scm b/dump.scm
index 3a827d8..412df2f 100755
--- a/dump.scm
+++ b/dump.scm
@@ -939,57 +939,6 @@ is a <table> object."
))
-;; Import GeneRIF
-
-;; Download GeneRIF data from
-;; https://ftp.ncbi.nih.gov/gene/GeneRIF/generifs_basic.gz
-(define decode-html-entities
- (cut regexp-substitute/global
- #f
- ;; We tolerate the absence of the trailing semicolon.
- "&#([[:digit:]]+);{0,1}"
- <>
- 'pre
- (compose string integer->char string->number (cut match:substring <> 1))
- 'post))
-
-(define (import-generif generif-data-file)
- ;; TODO: Link to gene objects, not merely literal Gene IDs.
- (triple 'gn:geneId 'rdfs:domain 'gn:geneRIF)
- (triple 'gn:geneId 'rdfs:range 'rdfs:Literal)
- ;; TODO: Link to gn:publication objects, not merely literal PubMed
- ;; IDs.
- (triple 'gn:geneRIFEvidencedByPubMedId 'rdfs:domain 'gn:geneRIF)
- (triple 'gn:geneRIFEvidencedByPubMedId 'rdfs:range 'rdfs:Literal)
- (triple 'gn:geneRIFText 'rdfs:domain 'gn:geneRIF)
- (triple 'gn:geneRIFText 'rdfs:range 'rdfs:Literal)
-
- (call-with-gzip-input-port (open-input-file generif-data-file)
- (lambda (port)
- ;; Read and discard header.
- (get-line port)
- ;; Dump other lines.
- (port-transduce
- (compose (tenumerate)
- (tmap (match-lambda
- ;; Is there a better way to identify GeneRIF
- ;; entries instead of merely enumerating them?
- ((i . line)
- (match (string-split line #\tab)
- ((_ gene-id pubmed-id _ text)
- (scm->triples
- `((rdf:type . gn:geneRIF)
- (gn:geneId . ,(string->number gene-id))
- (gn:pubMedId . ,(string->number pubmed-id))
- ;; Some text has HTML entities. Decode
- ;; them.
- (gn:geneRIFText . ,(decode-html-entities text)))
- (string->identifier "geneRIF" (number->string i)))))))))
- (const #t)
- get-line
- port))))
-
-
;; Main function
(call-with-genenetwork-database
@@ -1020,6 +969,5 @@ is a <table> object."
(dump-info-files db)
(dump-schema db)
(dump-groups db)
- (dump-published-phenotypes db)
- (import-generif (assq-ref %connection-settings 'generif-data-file))))))
+ (dump-published-phenotypes db)))))