aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Isaac2021-09-14 13:38:02 +0530
committerArun Isaac2021-09-14 13:39:46 +0530
commit27b9692815dcc433cc0433ceaf948833b340a21b (patch)
tree6db47dd1c33432b465a6a44c0fd918005b028554
parent573f49eb873651f1fecfda352007da8429e2bc8a (diff)
downloadgn-transform-databases-27b9692815dcc433cc0433ceaf948833b340a21b.tar.gz
Dump InfoFiles.
* dump.scm (dump-info-files): New function. [main]: Call dump-info-files.
-rwxr-xr-xdump.scm77
1 files changed, 76 insertions, 1 deletions
diff --git a/dump.scm b/dump.scm
index 0e57a03..a629153 100755
--- a/dump.scm
+++ b/dump.scm
@@ -360,6 +360,80 @@ GROUP BY Email"))
db
"SELECT GeneChipName, Name FROM GeneChip"))
+(define (dump-info-files db)
+ (sql-for-each (lambda (alist)
+ (let ((id (string-append "gn:dataset"
+ (number->string
+ (assoc-ref alist "GN_AccesionId")))))
+ (triple id 'rdf:type 'gn:dataset)
+ (scm->triples
+ (filter-map (match-lambda
+ (('gn:gNAccesionId . accession-id)
+ (cons 'gn:accessionId
+ (string-append "GN" (number->string accession-id))))
+ (('gn:datasetStatusName . status)
+ (cons 'gn:datasetStatus
+ (string-downcase status)))
+ (('gn:binomialName . binomial-name)
+ (cons 'gn:datasetOfSpecies
+ (binomial-name->species-id binomial-name)))
+ (('gn:inbredSetName . inbred-set-name)
+ (cons 'gn:datasetOfInbredSet
+ (inbred-set-name->id inbred-set-name)))
+ (('gn:shortName . short-name)
+ (cons 'gn:datasetOfTissue
+ (tissue-short-name->id short-name)))
+ (('gn:email . email)
+ (cons 'gn:datasetOfInvestigator
+ (investigator-email->id email)))
+ (('gn:avgMethodName . (? (negate (cut string=? <> "N/A"))
+ avg-method-name))
+ (cons 'gn:normalization
+ (avg-method-name->id avg-method-name)))
+ (('gn:geneChip . name)
+ (cons 'gn:datasetOfPlatform
+ (gene-chip-name->id name)))
+ (('gn:summary . summary)
+ ;; TODO: Why are there unprintable
+ ;; characters in the summary?
+ (cons 'gn:summary
+ (delete-substrings summary "\x01" "\x03")))
+ (('gn:aboutTissue . about-tissue)
+ ;; TODO: Why are there unprintable
+ ;; characters in the summary?
+ (cons 'gn:aboutTissue
+ (delete-substrings about-tissue "\x01" "\x03")))
+ (x x))
+ (process-metadata-alist alist))
+ id)))
+ db
+ ;; TODO: Find email ID for records with none. (This is
+ ;; just one record corresponding to "Evan Williams")
+ ;; TODO: Double check Platforms. It doesn't seem to
+ ;; match up.
+ "SELECT GN_AccesionId, InfoPageTitle AS Name, InfoFiles.Title,
+Specifics, DatasetStatusName,
+Datasets.Summary, Datasets.GeoSeries, Datasets.AboutCases,
+Datasets.AboutPlatform, Datasets.AboutTissue, Datasets.AboutDataProcessing,
+Datasets.Notes, Datasets.ExperimentDesign, Datasets.Contributors,
+Datasets.Citation, Datasets.Acknowledgment,
+Species.FullName AS BinomialName,
+InbredSet.Name AS InbredSetName,
+Tissue.Short_Name,
+Investigators.Email,
+AvgMethod.Name AS AvgMethodName,
+GeneChip.Name AS GeneChip
+FROM InfoFiles
+LEFT JOIN Datasets USING (DatasetId)
+LEFT JOIN DatasetStatus USING (DatasetStatusId)
+LEFT JOIN Species USING (SpeciesId)
+LEFT JOIN InbredSet USING (InbredSetId)
+LEFT JOIN Tissue USING (TissueId)
+LEFT JOIN Investigators USING (InvestigatorId)
+LEFT JOIN AvgMethod USING (AvgMethodId)
+LEFT JOIN GeneChip USING (GeneChipId)
+WHERE Investigators.Email != ''"))
+
(define (dump-data-table db table-name data-field)
(let ((dump-directory (string-append %dump-directory "/" table-name))
(port #f)
@@ -411,4 +485,5 @@ GROUP BY Email"))
(dump-tissue db)
(dump-investigators db)
(dump-avg-method db)
- (dump-gene-chip db)))))
+ (dump-gene-chip db)
+ (dump-info-files db)))))