about summary refs log tree commit diff
diff options
context:
space:
mode:
-rwxr-xr-xexamples/generif-old.scm87
1 files changed, 49 insertions, 38 deletions
diff --git a/examples/generif-old.scm b/examples/generif-old.scm
index ba6768d..ede5a28 100755
--- a/examples/generif-old.scm
+++ b/examples/generif-old.scm
@@ -15,6 +15,22 @@
 
 
 
+(define (fix-email-id email)
+  (string-delete #\space email))
+
+(define (investigator-attributes->id first-name last-name email)
+  ;; There is just one record corresponding to "Evan Williams" which
+  ;; does not have an email ID. To accommodate that record, we
+  ;; construct the investigator ID from not just the email ID, but
+  ;; also the first and the last names. It would be preferable to just
+  ;; find Evan Williams' email ID and insert it into the database.
+  (string->identifier "investigator"
+                      (string-join
+                       (list first-name last-name (fix-email-id email))
+                       "_")))
+
+
+
 (define-transformer genewiki-symbols
   (tables (GeneRIF_BASIC)
           "GROUP BY BINARY symbol")
@@ -47,17 +63,12 @@
   (tables (GeneRIF
            (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId")
            (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id")
-           (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id"))
-          "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol, GeneRIF.SpeciesId, GeneRIF.createtime, GeneRIF.reason")
+           (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")
+           (left-join Investigators "ON Investigators.Email = GeneRIF.email"))
+          "WHERE GeneRIF.display > 0 AND GeneRIF.VersionId = 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.comment, BINARY GeneRIF.symbol")
   (schema-triples
    (gnc:GeneWikiEntry a rdfs:Class)
    (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
-   (gnt:initial a owl:ObjectProperty)
-   (gnt:initial rdfs:domain gnc:GeneWikiEntry)
-   (gnt:initial skos:definition "Optional user or project code or your initials")
-   (gnt:reason a owl:ObjectProperty)
-   (gnt:reason rdfs:domain gnc:GeneWikiEntry)
-   (gnt:reason skos:definition "The reason why this resource was modified")
    (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork")
    (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry))
   (triples
@@ -69,23 +80,25 @@
         'pre "_" 'post)
        #:proc (lambda (x) x))
     (set rdfs:comment
-         (let* ((generif-comment (sanitize-rdf-string (field GeneRIF comment)))
-                (create-time (field GeneRIF createtime EntryCreateTime))
-                (pmid (field GeneRIF PubMed_ID PMID))
-                (web-url (field GeneRIF weburl))
-                (species (string->identifier
+         (let* ([generif-comment (sanitize-rdf-string (field GeneRIF comment))]
+                [create-time (field GeneRIF createtime EntryCreateTime)]
+                [pmid (field GeneRIF PubMed_ID PMID)]
+                [web-url (field GeneRIF weburl)]
+                [species (string->identifier
                           ""
                           (remap-species-identifiers (field Species Fullname))
                           #:separator ""
-                          #:proc string-capitalize-first))
-                (version-id (field GeneRIF versionId))
-                (identifier (field GeneRIF Id))
-                (initial (sanitize-rdf-string (field GeneRIF initial)))
-                (reason (field GeneRIF reason))
-                (email (sanitize-rdf-string (field GeneRIF email)))
-                (category
-                 (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '; ')"
-                         GeneCategory))))
+                          #:proc string-capitalize-first)]
+                [categories
+                 (remove (lambda (x)
+                           (or (eq? x #f)
+                               (and (string? x)
+                                    (string-null? x))))
+                         (remove-duplicates
+                          (string-split-substring
+                           (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '$$')"
+                                   GeneCategory))
+                           "$$")))])
            (string->symbol
             (string-append
              "[ "
@@ -106,22 +119,20 @@
                          "~{dct:references pubmed:~a ; ~}"
                          (string-split pmid #\space))
                  "")
-             (if (string-blank? email)
-                 ""
-                 (format #f "foaf:mbox ~s ; " email))
-             (format #f "dct:identifier ~s ; " identifier)
-             (format #f "dct:hasVersion \"~s\"^^xsd:int ; " version-id)
-             (if (string-blank? reason)
-                 ""
-                 (format #f "gnt:reason ~s ; " reason))
-             (if (or (null? initial)
-                      (string-blank? initial))
-                 "" (format #f "gnt:initial ~s ; " initial))
-             (if (string-blank? category)
-                 ""
+             (if (and (not (string-null?
+                            (string-trim-both (field GeneRIF email))))
+                      (not (string-null? (field Investigators Email))))
+                 (format #f "dct:creator ~a ; "
+                         (investigator-attributes->id
+                          (field Investigators FirstName)
+                          (field Investigators LastName)
+                          (field Investigators Email)))
+                 "")
+             (if (not (null? categories))
                  (format #f
-                         "gnt:belongsToCategory ~s ; "
-                         category))
+                         "~{gnt:belongsToCategory ~s ; ~}"
+                         categories)
+                 "")
              (if (and (string? web-url) (not (string-null? web-url)))
                  (format #f "foaf:homepage ~s ; "
                          web-url)
@@ -172,7 +183,7 @@
                          taxonomic-id))
              (format #f "gnt:hasGeneId generif:~a ; "
                      gene-id)
-             (format #f "dct:hasVersion '~a'^^xsd:int ; "
+             (format #f "gnt:hasVersionId '~a'^^xsd:integer ; "
                      version-id)
              (if (and (string? pmid) (not (string-null? pmid)))
                  (format #f