From debf29172ff53efe327611c55191c713909c4387 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 9 May 2023 16:38:08 +0300 Subject: Update how publications are dumped Signed-off-by: Munyoki Kilyungi --- dump.scm | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/dump.scm b/dump.scm index 4a3f37f..1f110a8 100755 --- a/dump.scm +++ b/dump.scm @@ -509,10 +509,17 @@ must be remedied." (gn:year rdfs:range rdfs:Literal) (gn:author rdfs:range rdfs:Literal) (gn:abstract rdfs:range rdfs:Literal)) - (triples (string->identifier "publication" - (number->string (field Publication Id))) + (triples + (let ((pmid (field + ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" + pmid))) + (publication-id (field Publication Id))) + (if (string-null? pmid) + (string->identifier "publication" + (number->string publication-id)) + (ontology 'pubmed: pmid))) (set rdf:type 'gn:publication) - (set gn:pubMedId (field Publication PubMed_ID)) + (set gn:pubMedId (field ("IFNULL(PubMed_ID, '')" pubmedId))) (set gn:title (field Publication Title)) (set gn:journal (field Publication Journal)) (set gn:volume (field Publication Volume)) @@ -522,16 +529,9 @@ must be remedied." (multiset gn:author ;; The authors field is a comma ;; separated list. Split it. - (map string-trim (string-split (field Publication Authors) #\,))) + (map string-trim (string-split (sanitize-rdf-string (field Publication Authors)) #\,))) (set gn:abstract - ;; TODO: Why are there unprintable characters? - (delete-substrings (field Publication Abstract) - "\x01" - ;; \v is a vertical tab - ;; character. Microsoft Word probably - ;; still uses this. - "\v")))) - + (sanitize-rdf-string (field Publication Abstract))))) (define tissue-short-name->id (cut string->identifier "tissue" <>)) -- cgit v1.2.3