about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/lmdb-publishdata-export.scm47
1 files changed, 22 insertions, 25 deletions
diff --git a/scripts/lmdb-publishdata-export.scm b/scripts/lmdb-publishdata-export.scm
index 2c1b4f3..8427112 100755
--- a/scripts/lmdb-publishdata-export.scm
+++ b/scripts/lmdb-publishdata-export.scm
@@ -154,12 +154,7 @@ dataset-trait combinations, and saves strain values to LMDB files in
 	    (match row
 	      ((("Name" . dataset-name)
 		("Id" . trait-id))
-	       (let* ((md5-hash
-		       (md5->string (md5 (string->bytevector (format #f "~a-~a" dataset-name trait-id)
-							     (make-transcoder (utf-8-codec))))))
-		      (data-dir (assq-ref settings 'output-dir))
-		      (md5-hash-dir (format #f "~a/~a" data-dir md5-hash))
-		      (data-query (format #f "SELECT
+			    (let* ((data-query (format #f "SELECT
 JSON_ARRAYAGG(JSON_ARRAY(Strain.Name, PublishData.Value)) AS data,
  MD5(JSON_ARRAY(Strain.Name, PublishData.Value)) as md5hash
 FROM
@@ -181,25 +176,27 @@ WHERE
     PublishFreeze.confidentiality < 1
 ORDER BY
     LENGTH(Strain.Name), Strain.Name" dataset-name trait-id)))
-		 (match (call-with-target-database
-			 settings
-			 (lambda (db2) (sql-find db2 data-query)))
-		   ((("data" . data)
-		     ("md5hash" . dataset-hash))
-		    (let ((lmdb-dir (string-join data-dir "/" md5-hash "-" dataset-hash)))
-		      (log-msg
-		       'INFO (format #f "Writing ~a-~a to: ~a" dataset-name trait-id lmdb-dir))
-		      (unless (file-exists? data-dir)
-			(mkdir data-dir))
-		      (lmdb-save (string-join data-dir "/index")
-				 (string-join (list dataset-name "-" trait-id))
-				 (string-join (list md5-hash "-" dataset-hash)))
-		      (vector-for-each
-		       (lambda (_ x)
-			 (match x
-			   (#(strain value)
-			    (lmdb-save lmdb-dir strain value))))
-		       (json-string->scm data)))))))))
+			      (match (call-with-target-database
+				      settings
+				      (lambda (db2) (sql-find db2 data-query)))
+				((("data" . data)
+				  ("md5hash" . md5-hash))
+				 (let* ((trait-name (format #f "~a~a" dataset-name trait-id))
+					(base-dir (assq-ref settings 'output-dir))
+					(out (format #f "~a-~a" trait-name
+						     (substring md5-hash 0 12)))
+					(out-dir (format #f "~a/~a" base-dir out)))
+				   (log-msg
+				    'INFO (format #f "Writing ~a to: ~a" trait-name out-dir))
+				   (unless (file-exists? out-dir)
+				     (mkdir out-dir))
+				   (lmdb-save (format #f "~a/index" base-dir) trait-name out)
+				   (vector-for-each
+				    (lambda (_ x)
+				      (match x
+					(#(strain value)
+					 (lmdb-save out-dir strain value))))
+				    (json-string->scm data)))))))))
           db
           "SELECT DISTINCT PublishFreeze.Name, PublishXRef.Id FROM
 PublishData INNER JOIN Strain ON PublishData.StrainId = Strain.Id