about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-06-03 12:09:59 -0500
committerFrederick Muriuki Muriithi2025-06-03 12:09:59 -0500
commit9280e6232152991fca762d74f415b704a452673d (patch)
treeefe5df2ee4674551d201f167e5520e579bfd9e88 /scripts
parent59b07a41ca5181a19e631f640b38e5fd33d1d550 (diff)
downloadgn-uploader-9280e6232152991fca762d74f415b704a452673d.tar.gz
Save data with `LOAD DATA INFILE …` query
To help speed up the saving of the data (for really huge files) into
the database, use the `LOAD DATA INFILE …` command if available, and
if not fallback to the one using raw queries.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/load_phenotypes_to_db.py40
1 files changed, 29 insertions, 11 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index de06d70..d2d1d2c 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -18,7 +18,8 @@ from uploader.samples.models import samples_by_species_and_population
 from uploader.phenotypes.models import (
     dataset_by_id,
     save_phenotypes_data,
-    create_new_phenotypes)
+    create_new_phenotypes,
+    quick_save_phenotypes_data)
 from uploader.publications.models import (
     create_new_publications,
     fetch_publication_by_id)
@@ -157,16 +158,33 @@ def save_numeric_data(
                 build_line_joiner(control_data))
             for _file in phenofiles)
 
-    return save_phenotypes_data(
-        conn,
-        table,
-        __build_dataitems__(
-            filetype,
-            phenofiles,
-            control_data,
-            samples,
-            dataidmap,
-            pheno_name2id))
+    try:
+        logger.debug("Attempt quick save with `LOAD … INFILE`.")
+        return quick_save_phenotypes_data(
+            conn,
+            table,
+            __build_dataitems__(
+                filetype,
+                phenofiles,
+                control_data,
+                samples,
+                dataidmap,
+                pheno_name2id),
+            filesdir)
+    except Exception as _exc:
+        logger.debug("Could not use `LOAD … INFILE`, using raw query",
+                     exc_info=True)
+        import time;time.sleep(60)
+        return save_phenotypes_data(
+            conn,
+            table,
+            __build_dataitems__(
+                filetype,
+                phenofiles,
+                control_data,
+                samples,
+                dataidmap,
+                pheno_name2id))
 
 
 save_pheno_data = partial(save_numeric_data,