about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/lmdb-publishdata-export.scm47
-rwxr-xr-xscripts/precompute/list-traits-to-compute.scm4
2 files changed, 26 insertions, 25 deletions
diff --git a/scripts/lmdb-publishdata-export.scm b/scripts/lmdb-publishdata-export.scm
index 2c1b4f3..8427112 100755
--- a/scripts/lmdb-publishdata-export.scm
+++ b/scripts/lmdb-publishdata-export.scm
@@ -154,12 +154,7 @@ dataset-trait combinations, and saves strain values to LMDB files in
 	    (match row
 	      ((("Name" . dataset-name)
 		("Id" . trait-id))
-	       (let* ((md5-hash
-		       (md5->string (md5 (string->bytevector (format #f "~a-~a" dataset-name trait-id)
-							     (make-transcoder (utf-8-codec))))))
-		      (data-dir (assq-ref settings 'output-dir))
-		      (md5-hash-dir (format #f "~a/~a" data-dir md5-hash))
-		      (data-query (format #f "SELECT
+			    (let* ((data-query (format #f "SELECT
 JSON_ARRAYAGG(JSON_ARRAY(Strain.Name, PublishData.Value)) AS data,
  MD5(JSON_ARRAY(Strain.Name, PublishData.Value)) as md5hash
 FROM
@@ -181,25 +176,27 @@ WHERE
     PublishFreeze.confidentiality < 1
 ORDER BY
     LENGTH(Strain.Name), Strain.Name" dataset-name trait-id)))
-		 (match (call-with-target-database
-			 settings
-			 (lambda (db2) (sql-find db2 data-query)))
-		   ((("data" . data)
-		     ("md5hash" . dataset-hash))
-		    (let ((lmdb-dir (string-join data-dir "/" md5-hash "-" dataset-hash)))
-		      (log-msg
-		       'INFO (format #f "Writing ~a-~a to: ~a" dataset-name trait-id lmdb-dir))
-		      (unless (file-exists? data-dir)
-			(mkdir data-dir))
-		      (lmdb-save (string-join data-dir "/index")
-				 (string-join (list dataset-name "-" trait-id))
-				 (string-join (list md5-hash "-" dataset-hash)))
-		      (vector-for-each
-		       (lambda (_ x)
-			 (match x
-			   (#(strain value)
-			    (lmdb-save lmdb-dir strain value))))
-		       (json-string->scm data)))))))))
+			      (match (call-with-target-database
+				      settings
+				      (lambda (db2) (sql-find db2 data-query)))
+				((("data" . data)
+				  ("md5hash" . md5-hash))
+				 (let* ((trait-name (format #f "~a~a" dataset-name trait-id))
+					(base-dir (assq-ref settings 'output-dir))
+					(out (format #f "~a-~a" trait-name
+						     (substring md5-hash 0 12)))
+					(out-dir (format #f "~a/~a" base-dir out)))
+				   (log-msg
+				    'INFO (format #f "Writing ~a to: ~a" trait-name out-dir))
+				   (unless (file-exists? out-dir)
+				     (mkdir out-dir))
+				   (lmdb-save (format #f "~a/index" base-dir) trait-name out)
+				   (vector-for-each
+				    (lambda (_ x)
+				      (match x
+					(#(strain value)
+					 (lmdb-save out-dir strain value))))
+				    (json-string->scm data)))))))))
           db
           "SELECT DISTINCT PublishFreeze.Name, PublishXRef.Id FROM
 PublishData INNER JOIN Strain ON PublishData.StrainId = Strain.Id
diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm
index 9f900d1..102a6fa 100755
--- a/scripts/precompute/list-traits-to-compute.scm
+++ b/scripts/precompute/list-traits-to-compute.scm
@@ -15,6 +15,10 @@ You may want to forward a mysql port if there is no DB locally
 
     ssh -L 3306:127.0.0.1:3306 -f -N tux02.genenetwork.org
 
+ignore IPv6 message:
+
+    bind [::1]:3306: Cannot assign requested address
+
 test connection with mysql client:
 
     mysql -uwebqtlout -pwebqtlout -A -h 127.0.0.1 -P 3306 db_webqtl -e "show tables;"