aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2024-12-09 14:15:44 +0300
committerMunyoki Kilyungi2024-12-09 14:15:44 +0300
commit9a36fbdf563722b351b4c53c3152e7dac92a7978 (patch)
tree5be060adaf1a6a70dee9b74bfdb6a8844f328865
parentd3827cf3f82996e35c1c28e35813f9521f2257d7 (diff)
downloadgn-transform-databases-master.tar.gz
Reset old generif dump to: 0bbf317208b8.HEADmaster
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xexamples/generif-old.scm87
1 files changed, 49 insertions, 38 deletions
diff --git a/examples/generif-old.scm b/examples/generif-old.scm
index ba6768d..ede5a28 100755
--- a/examples/generif-old.scm
+++ b/examples/generif-old.scm
@@ -15,6 +15,22 @@
+(define (fix-email-id email)
+ (string-delete #\space email))
+
+(define (investigator-attributes->id first-name last-name email)
+ ;; There is just one record corresponding to "Evan Williams" which
+ ;; does not have an email ID. To accommodate that record, we
+ ;; construct the investigator ID from not just the email ID, but
+ ;; also the first and the last names. It would be preferable to just
+ ;; find Evan Williams' email ID and insert it into the database.
+ (string->identifier "investigator"
+ (string-join
+ (list first-name last-name (fix-email-id email))
+ "_")))
+
+
+
(define-transformer genewiki-symbols
(tables (GeneRIF_BASIC)
"GROUP BY BINARY symbol")
@@ -47,17 +63,12 @@
(tables (GeneRIF
(left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId")
(left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id")
- (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id"))
- "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol, GeneRIF.SpeciesId, GeneRIF.createtime, GeneRIF.reason")
+ (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")
+ (left-join Investigators "ON Investigators.Email = GeneRIF.email"))
+ "WHERE GeneRIF.display > 0 AND GeneRIF.VersionId = 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.comment, BINARY GeneRIF.symbol")
(schema-triples
(gnc:GeneWikiEntry a rdfs:Class)
(gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
- (gnt:initial a owl:ObjectProperty)
- (gnt:initial rdfs:domain gnc:GeneWikiEntry)
- (gnt:initial skos:definition "Optional user or project code or your initials")
- (gnt:reason a owl:ObjectProperty)
- (gnt:reason rdfs:domain gnc:GeneWikiEntry)
- (gnt:reason skos:definition "The reason why this resource was modified")
(gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork")
(gnt:geneSymbol rdfs:domain gnc:GNWikiEntry))
(triples
@@ -69,23 +80,25 @@
'pre "_" 'post)
#:proc (lambda (x) x))
(set rdfs:comment
- (let* ((generif-comment (sanitize-rdf-string (field GeneRIF comment)))
- (create-time (field GeneRIF createtime EntryCreateTime))
- (pmid (field GeneRIF PubMed_ID PMID))
- (web-url (field GeneRIF weburl))
- (species (string->identifier
+ (let* ([generif-comment (sanitize-rdf-string (field GeneRIF comment))]
+ [create-time (field GeneRIF createtime EntryCreateTime)]
+ [pmid (field GeneRIF PubMed_ID PMID)]
+ [web-url (field GeneRIF weburl)]
+ [species (string->identifier
""
(remap-species-identifiers (field Species Fullname))
#:separator ""
- #:proc string-capitalize-first))
- (version-id (field GeneRIF versionId))
- (identifier (field GeneRIF Id))
- (initial (sanitize-rdf-string (field GeneRIF initial)))
- (reason (field GeneRIF reason))
- (email (sanitize-rdf-string (field GeneRIF email)))
- (category
- (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '; ')"
- GeneCategory))))
+ #:proc string-capitalize-first)]
+ [categories
+ (remove (lambda (x)
+ (or (eq? x #f)
+ (and (string? x)
+ (string-null? x))))
+ (remove-duplicates
+ (string-split-substring
+ (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '$$')"
+ GeneCategory))
+ "$$")))])
(string->symbol
(string-append
"[ "
@@ -106,22 +119,20 @@
"~{dct:references pubmed:~a ; ~}"
(string-split pmid #\space))
"")
- (if (string-blank? email)
- ""
- (format #f "foaf:mbox ~s ; " email))
- (format #f "dct:identifier ~s ; " identifier)
- (format #f "dct:hasVersion \"~s\"^^xsd:int ; " version-id)
- (if (string-blank? reason)
- ""
- (format #f "gnt:reason ~s ; " reason))
- (if (or (null? initial)
- (string-blank? initial))
- "" (format #f "gnt:initial ~s ; " initial))
- (if (string-blank? category)
- ""
+ (if (and (not (string-null?
+ (string-trim-both (field GeneRIF email))))
+ (not (string-null? (field Investigators Email))))
+ (format #f "dct:creator ~a ; "
+ (investigator-attributes->id
+ (field Investigators FirstName)
+ (field Investigators LastName)
+ (field Investigators Email)))
+ "")
+ (if (not (null? categories))
(format #f
- "gnt:belongsToCategory ~s ; "
- category))
+ "~{gnt:belongsToCategory ~s ; ~}"
+ categories)
+ "")
(if (and (string? web-url) (not (string-null? web-url)))
(format #f "foaf:homepage ~s ; "
web-url)
@@ -172,7 +183,7 @@
taxonomic-id))
(format #f "gnt:hasGeneId generif:~a ; "
gene-id)
- (format #f "dct:hasVersion '~a'^^xsd:int ; "
+ (format #f "gnt:hasVersionId '~a'^^xsd:integer ; "
version-id)
(if (and (string? pmid) (not (string-null? pmid)))
(format #f