diff options
author | Munyoki Kilyungi | 2023-05-09 16:38:08 +0300 |
---|---|---|
committer | BonfaceKilz | 2023-05-26 08:40:22 +0300 |
commit | debf29172ff53efe327611c55191c713909c4387 (patch) | |
tree | 9c55076aadcbb2102a4654b8440ea26936ccf689 | |
parent | f02867825c04e13b23415a4855d732286032fad5 (diff) | |
download | gn-transform-databases-debf29172ff53efe327611c55191c713909c4387.tar.gz |
Update how publications are dumped
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-x | dump.scm | 24 |
1 files changed, 12 insertions, 12 deletions
@@ -509,10 +509,17 @@ must be remedied." (gn:year rdfs:range rdfs:Literal) (gn:author rdfs:range rdfs:Literal) (gn:abstract rdfs:range rdfs:Literal)) - (triples (string->identifier "publication" - (number->string (field Publication Id))) + (triples + (let ((pmid (field + ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" + pmid))) + (publication-id (field Publication Id))) + (if (string-null? pmid) + (string->identifier "publication" + (number->string publication-id)) + (ontology 'pubmed: pmid))) (set rdf:type 'gn:publication) - (set gn:pubMedId (field Publication PubMed_ID)) + (set gn:pubMedId (field ("IFNULL(PubMed_ID, '')" pubmedId))) (set gn:title (field Publication Title)) (set gn:journal (field Publication Journal)) (set gn:volume (field Publication Volume)) @@ -522,16 +529,9 @@ must be remedied." (multiset gn:author ;; The authors field is a comma ;; separated list. Split it. - (map string-trim (string-split (field Publication Authors) #\,))) + (map string-trim (string-split (sanitize-rdf-string (field Publication Authors)) #\,))) (set gn:abstract - ;; TODO: Why are there unprintable characters? - (delete-substrings (field Publication Abstract) - "\x01" - ;; \v is a vertical tab - ;; character. Microsoft Word probably - ;; still uses this. - "\v")))) - + (sanitize-rdf-string (field Publication Abstract))))) (define tissue-short-name->id (cut string->identifier "tissue" <>)) |