diff options
author | Munyoki Kilyungi | 2024-12-09 14:15:44 +0300 |
---|---|---|
committer | Munyoki Kilyungi | 2024-12-09 14:15:44 +0300 |
commit | 9a36fbdf563722b351b4c53c3152e7dac92a7978 (patch) | |
tree | 5be060adaf1a6a70dee9b74bfdb6a8844f328865 /examples | |
parent | d3827cf3f82996e35c1c28e35813f9521f2257d7 (diff) | |
download | gn-transform-databases-master.tar.gz |
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples')
-rwxr-xr-x | examples/generif-old.scm | 87 |
1 files changed, 49 insertions, 38 deletions
diff --git a/examples/generif-old.scm b/examples/generif-old.scm index ba6768d..ede5a28 100755 --- a/examples/generif-old.scm +++ b/examples/generif-old.scm @@ -15,6 +15,22 @@ +(define (fix-email-id email) + (string-delete #\space email)) + +(define (investigator-attributes->id first-name last-name email) + ;; There is just one record corresponding to "Evan Williams" which + ;; does not have an email ID. To accommodate that record, we + ;; construct the investigator ID from not just the email ID, but + ;; also the first and the last names. It would be preferable to just + ;; find Evan Williams' email ID and insert it into the database. + (string->identifier "investigator" + (string-join + (list first-name last-name (fix-email-id email)) + "_"))) + + + (define-transformer genewiki-symbols (tables (GeneRIF_BASIC) "GROUP BY BINARY symbol") @@ -47,17 +63,12 @@ (tables (GeneRIF (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId") (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id") - (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")) - "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol, GeneRIF.SpeciesId, GeneRIF.createtime, GeneRIF.reason") + (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id") + (left-join Investigators "ON Investigators.Email = GeneRIF.email")) + "WHERE GeneRIF.display > 0 AND GeneRIF.VersionId = 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.comment, BINARY GeneRIF.symbol") (schema-triples (gnc:GeneWikiEntry a rdfs:Class) (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) - (gnt:initial a owl:ObjectProperty) - (gnt:initial rdfs:domain gnc:GeneWikiEntry) - (gnt:initial skos:definition "Optional user or project code or your initials") - (gnt:reason a owl:ObjectProperty) - (gnt:reason rdfs:domain gnc:GeneWikiEntry) - (gnt:reason skos:definition "The reason why this resource was modified") (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork") (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry)) (triples @@ -69,23 +80,25 @@ 'pre "_" 'post) #:proc (lambda (x) x)) (set rdfs:comment - (let* ((generif-comment (sanitize-rdf-string (field GeneRIF comment))) - (create-time (field GeneRIF createtime EntryCreateTime)) - (pmid (field GeneRIF PubMed_ID PMID)) - (web-url (field GeneRIF weburl)) - (species (string->identifier + (let* ([generif-comment (sanitize-rdf-string (field GeneRIF comment))] + [create-time (field GeneRIF createtime EntryCreateTime)] + [pmid (field GeneRIF PubMed_ID PMID)] + [web-url (field GeneRIF weburl)] + [species (string->identifier "" (remap-species-identifiers (field Species Fullname)) #:separator "" - #:proc string-capitalize-first)) - (version-id (field GeneRIF versionId)) - (identifier (field GeneRIF Id)) - (initial (sanitize-rdf-string (field GeneRIF initial))) - (reason (field GeneRIF reason)) - (email (sanitize-rdf-string (field GeneRIF email))) - (category - (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '; ')" - GeneCategory)))) + #:proc string-capitalize-first)] + [categories + (remove (lambda (x) + (or (eq? x #f) + (and (string? x) + (string-null? x)))) + (remove-duplicates + (string-split-substring + (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '$$')" + GeneCategory)) + "$$")))]) (string->symbol (string-append "[ " @@ -106,22 +119,20 @@ "~{dct:references pubmed:~a ; ~}" (string-split pmid #\space)) "") - (if (string-blank? email) - "" - (format #f "foaf:mbox ~s ; " email)) - (format #f "dct:identifier ~s ; " identifier) - (format #f "dct:hasVersion \"~s\"^^xsd:int ; " version-id) - (if (string-blank? reason) - "" - (format #f "gnt:reason ~s ; " reason)) - (if (or (null? initial) - (string-blank? initial)) - "" (format #f "gnt:initial ~s ; " initial)) - (if (string-blank? category) - "" + (if (and (not (string-null? + (string-trim-both (field GeneRIF email)))) + (not (string-null? (field Investigators Email)))) + (format #f "dct:creator ~a ; " + (investigator-attributes->id + (field Investigators FirstName) + (field Investigators LastName) + (field Investigators Email))) + "") + (if (not (null? categories)) (format #f - "gnt:belongsToCategory ~s ; " - category)) + "~{gnt:belongsToCategory ~s ; ~}" + categories) + "") (if (and (string? web-url) (not (string-null? web-url))) (format #f "foaf:homepage ~s ; " web-url) @@ -172,7 +183,7 @@ taxonomic-id)) (format #f "gnt:hasGeneId generif:~a ; " gene-id) - (format #f "dct:hasVersion '~a'^^xsd:int ; " + (format #f "gnt:hasVersionId '~a'^^xsd:integer ; " version-id) (if (and (string? pmid) (not (string-null? pmid))) (format #f |