about summary refs log tree commit diff
path: root/dump.scm
diff options
context:
space:
mode:
authorArun Isaac2021-09-09 15:12:28 +0530
committerArun Isaac2021-09-09 15:15:13 +0530
commit7498accb1e0a5c73fabeb5fa44db22ced561842f (patch)
treeb5c6c5838795d729ece7d5ead6a6c9d653e43329 /dump.scm
parent3a7bcfb8f3eb8eb9a198542fcb9abc0c7242bbfa (diff)
downloadgn-transform-databases-7498accb1e0a5c73fabeb5fa44db22ced561842f.tar.gz
Dump Investigators.
* dump.scm (fix-email-id, investigator-email->id, dump-investigators):
New functions.
Invoke dump-investigators.
Diffstat (limited to 'dump.scm')
-rwxr-xr-xdump.scm43
1 files changed, 43 insertions, 0 deletions
diff --git a/dump.scm b/dump.scm
index d49e8bf..64f1c68 100755
--- a/dump.scm
+++ b/dump.scm
@@ -292,6 +292,48 @@ INNER JOIN InbredSet USING (InbredSetId)"))
                 db
                 "SELECT Name, Short_Name FROM Tissue"))
 
+;; One email ID in the Investigators table has spaces in it. This
+;; function fixes that.
+(define (fix-email-id email)
+  (string-replace-substring email " " ""))
+
+(define (investigator-email->id email)
+  (string->symbol
+   (string-append "gn:investigator"
+                  (string-replace-substring
+                   (fix-email-id email) "@" "_"))))
+
+(define (dump-investigators db)
+  (sql-for-each (lambda (alist)
+                  (let ((id (investigator-email->id (assoc-ref alist "Email"))))
+                    (triple id 'rdf:type 'foaf:Person)
+                    (scm->triples
+                     (cons (cons 'foaf:name (string-append
+                                             (assoc-ref alist "FirstName")
+                                             " " (assoc-ref alist "LastName")))
+                           (map (match-lambda
+                                  (('gn:firstName . first-name)
+                                   (cons 'foaf:givenName first-name))
+                                  (('gn:lastName . last-name)
+                                   (cons 'foaf:familyName last-name))
+                                  (('gn:phone . phone)
+                                   (cons 'foaf:phone phone))
+                                  (('gn:email . email)
+                                   (cons 'foaf:mbox (fix-email-id email)))
+                                  (('gn:url . url)
+                                   (cons 'foaf:homepage url))
+                                  (x x))
+                                (process-metadata-alist alist)))
+                     id)))
+                db
+                ;; There are a few duplicate entries. We group by
+                ;; email to deduplicate.
+                ;; TODO: Find email ID for records with none. (This is
+                ;; just one record corresponding to "Evan Williams")
+                "SELECT FirstName, LastName, Address, City, State, ZipCode, Phone, Email, Country, Url FROM Investigators
+WHERE Email != ''
+GROUP BY Email"))
+
 (define (dump-data-table db table-name data-field)
   (let ((dump-directory (string-append %dump-directory "/" table-name))
         (port #f)
@@ -341,3 +383,4 @@ INNER JOIN InbredSet USING (InbredSetId)"))
        (dump-publication db)
        (dump-publish-xref db)))))
        (dump-tissue db)
+       (dump-investigators db)