aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2025-02-19 12:31:36 +0300
committerMunyoki Kilyungi2025-02-19 12:31:36 +0300
commit169d26710c978484f8e9464a95be6eaa47fa704e (patch)
tree591c2084f1bccdcea4247dc68cca7b665817d234
parentc083d08e253f36e5bb31d15e3e01683219f9d3ba (diff)
downloadgn-guile-main.tar.gz
Use a combination of trait-id and first 12 chars of md5 sum.HEADmain
* scripts/lmdb-publishdata-export.scm (save-dataset-values): Use a trait's name and the first 12 characters of the dataset's md5 checksum in the index, and when storing to path. Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xscripts/lmdb-publishdata-export.scm47
1 files changed, 22 insertions, 25 deletions
diff --git a/scripts/lmdb-publishdata-export.scm b/scripts/lmdb-publishdata-export.scm
index 2c1b4f3..8427112 100755
--- a/scripts/lmdb-publishdata-export.scm
+++ b/scripts/lmdb-publishdata-export.scm
@@ -154,12 +154,7 @@ dataset-trait combinations, and saves strain values to LMDB files in
(match row
((("Name" . dataset-name)
("Id" . trait-id))
- (let* ((md5-hash
- (md5->string (md5 (string->bytevector (format #f "~a-~a" dataset-name trait-id)
- (make-transcoder (utf-8-codec))))))
- (data-dir (assq-ref settings 'output-dir))
- (md5-hash-dir (format #f "~a/~a" data-dir md5-hash))
- (data-query (format #f "SELECT
+ (let* ((data-query (format #f "SELECT
JSON_ARRAYAGG(JSON_ARRAY(Strain.Name, PublishData.Value)) AS data,
MD5(JSON_ARRAY(Strain.Name, PublishData.Value)) as md5hash
FROM
@@ -181,25 +176,27 @@ WHERE
PublishFreeze.confidentiality < 1
ORDER BY
LENGTH(Strain.Name), Strain.Name" dataset-name trait-id)))
- (match (call-with-target-database
- settings
- (lambda (db2) (sql-find db2 data-query)))
- ((("data" . data)
- ("md5hash" . dataset-hash))
- (let ((lmdb-dir (string-join data-dir "/" md5-hash "-" dataset-hash)))
- (log-msg
- 'INFO (format #f "Writing ~a-~a to: ~a" dataset-name trait-id lmdb-dir))
- (unless (file-exists? data-dir)
- (mkdir data-dir))
- (lmdb-save (string-join data-dir "/index")
- (string-join (list dataset-name "-" trait-id))
- (string-join (list md5-hash "-" dataset-hash)))
- (vector-for-each
- (lambda (_ x)
- (match x
- (#(strain value)
- (lmdb-save lmdb-dir strain value))))
- (json-string->scm data)))))))))
+ (match (call-with-target-database
+ settings
+ (lambda (db2) (sql-find db2 data-query)))
+ ((("data" . data)
+ ("md5hash" . md5-hash))
+ (let* ((trait-name (format #f "~a~a" dataset-name trait-id))
+ (base-dir (assq-ref settings 'output-dir))
+ (out (format #f "~a-~a" trait-name
+ (substring md5-hash 0 12)))
+ (out-dir (format #f "~a/~a" base-dir out)))
+ (log-msg
+ 'INFO (format #f "Writing ~a to: ~a" trait-name out-dir))
+ (unless (file-exists? out-dir)
+ (mkdir out-dir))
+ (lmdb-save (format #f "~a/index" base-dir) trait-name out)
+ (vector-for-each
+ (lambda (_ x)
+ (match x
+ (#(strain value)
+ (lmdb-save out-dir strain value))))
+ (json-string->scm data)))))))))
db
"SELECT DISTINCT PublishFreeze.Name, PublishXRef.Id FROM
PublishData INNER JOIN Strain ON PublishData.StrainId = Strain.Id