aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-05-09 16:38:08 +0300
committerBonfaceKilz2023-05-26 08:40:22 +0300
commitdebf29172ff53efe327611c55191c713909c4387 (patch)
tree9c55076aadcbb2102a4654b8440ea26936ccf689
parentf02867825c04e13b23415a4855d732286032fad5 (diff)
downloadgn-transform-databases-debf29172ff53efe327611c55191c713909c4387.tar.gz
Update how publications are dumped
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xdump.scm24
1 files changed, 12 insertions, 12 deletions
diff --git a/dump.scm b/dump.scm
index 4a3f37f..1f110a8 100755
--- a/dump.scm
+++ b/dump.scm
@@ -509,10 +509,17 @@ must be remedied."
(gn:year rdfs:range rdfs:Literal)
(gn:author rdfs:range rdfs:Literal)
(gn:abstract rdfs:range rdfs:Literal))
- (triples (string->identifier "publication"
- (number->string (field Publication Id)))
+ (triples
+ (let ((pmid (field
+ ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))"
+ pmid)))
+ (publication-id (field Publication Id)))
+ (if (string-null? pmid)
+ (string->identifier "publication"
+ (number->string publication-id))
+ (ontology 'pubmed: pmid)))
(set rdf:type 'gn:publication)
- (set gn:pubMedId (field Publication PubMed_ID))
+ (set gn:pubMedId (field ("IFNULL(PubMed_ID, '')" pubmedId)))
(set gn:title (field Publication Title))
(set gn:journal (field Publication Journal))
(set gn:volume (field Publication Volume))
@@ -522,16 +529,9 @@ must be remedied."
(multiset gn:author
;; The authors field is a comma
;; separated list. Split it.
- (map string-trim (string-split (field Publication Authors) #\,)))
+ (map string-trim (string-split (sanitize-rdf-string (field Publication Authors)) #\,)))
(set gn:abstract
- ;; TODO: Why are there unprintable characters?
- (delete-substrings (field Publication Abstract)
- "\x01"
- ;; \v is a vertical tab
- ;; character. Microsoft Word probably
- ;; still uses this.
- "\v"))))
-
+ (sanitize-rdf-string (field Publication Abstract)))))
(define tissue-short-name->id
(cut string->identifier "tissue" <>))