diff options
author | Munyoki Kilyungi | 2025-02-19 12:31:36 +0300 |
---|---|---|
committer | Munyoki Kilyungi | 2025-02-19 12:31:36 +0300 |
commit | 169d26710c978484f8e9464a95be6eaa47fa704e (patch) | |
tree | 591c2084f1bccdcea4247dc68cca7b665817d234 | |
parent | c083d08e253f36e5bb31d15e3e01683219f9d3ba (diff) | |
download | gn-guile-main.tar.gz |
* scripts/lmdb-publishdata-export.scm (save-dataset-values): Use a
trait's name and the first 12 characters of the dataset's md5 checksum
in the index, and when storing to path.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-x | scripts/lmdb-publishdata-export.scm | 47 |
1 files changed, 22 insertions, 25 deletions
diff --git a/scripts/lmdb-publishdata-export.scm b/scripts/lmdb-publishdata-export.scm index 2c1b4f3..8427112 100755 --- a/scripts/lmdb-publishdata-export.scm +++ b/scripts/lmdb-publishdata-export.scm @@ -154,12 +154,7 @@ dataset-trait combinations, and saves strain values to LMDB files in (match row ((("Name" . dataset-name) ("Id" . trait-id)) - (let* ((md5-hash - (md5->string (md5 (string->bytevector (format #f "~a-~a" dataset-name trait-id) - (make-transcoder (utf-8-codec)))))) - (data-dir (assq-ref settings 'output-dir)) - (md5-hash-dir (format #f "~a/~a" data-dir md5-hash)) - (data-query (format #f "SELECT + (let* ((data-query (format #f "SELECT JSON_ARRAYAGG(JSON_ARRAY(Strain.Name, PublishData.Value)) AS data, MD5(JSON_ARRAY(Strain.Name, PublishData.Value)) as md5hash FROM @@ -181,25 +176,27 @@ WHERE PublishFreeze.confidentiality < 1 ORDER BY LENGTH(Strain.Name), Strain.Name" dataset-name trait-id))) - (match (call-with-target-database - settings - (lambda (db2) (sql-find db2 data-query))) - ((("data" . data) - ("md5hash" . dataset-hash)) - (let ((lmdb-dir (string-join data-dir "/" md5-hash "-" dataset-hash))) - (log-msg - 'INFO (format #f "Writing ~a-~a to: ~a" dataset-name trait-id lmdb-dir)) - (unless (file-exists? data-dir) - (mkdir data-dir)) - (lmdb-save (string-join data-dir "/index") - (string-join (list dataset-name "-" trait-id)) - (string-join (list md5-hash "-" dataset-hash))) - (vector-for-each - (lambda (_ x) - (match x - (#(strain value) - (lmdb-save lmdb-dir strain value)))) - (json-string->scm data))))))))) + (match (call-with-target-database + settings + (lambda (db2) (sql-find db2 data-query))) + ((("data" . data) + ("md5hash" . md5-hash)) + (let* ((trait-name (format #f "~a~a" dataset-name trait-id)) + (base-dir (assq-ref settings 'output-dir)) + (out (format #f "~a-~a" trait-name + (substring md5-hash 0 12))) + (out-dir (format #f "~a/~a" base-dir out))) + (log-msg + 'INFO (format #f "Writing ~a to: ~a" trait-name out-dir)) + (unless (file-exists? out-dir) + (mkdir out-dir)) + (lmdb-save (format #f "~a/index" base-dir) trait-name out) + (vector-for-each + (lambda (_ x) + (match x + (#(strain value) + (lmdb-save out-dir strain value)))) + (json-string->scm data))))))))) db "SELECT DISTINCT PublishFreeze.Name, PublishXRef.Id FROM PublishData INNER JOIN Strain ON PublishData.StrainId = Strain.Id |