about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--scripts/load_phenotypes_to_db.py40
-rw-r--r--uploader/phenotypes/models.py56
2 files changed, 76 insertions, 20 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index de06d70..d2d1d2c 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -18,7 +18,8 @@ from uploader.samples.models import samples_by_species_and_population
 from uploader.phenotypes.models import (
     dataset_by_id,
     save_phenotypes_data,
-    create_new_phenotypes)
+    create_new_phenotypes,
+    quick_save_phenotypes_data)
 from uploader.publications.models import (
     create_new_publications,
     fetch_publication_by_id)
@@ -157,16 +158,33 @@ def save_numeric_data(
                 build_line_joiner(control_data))
             for _file in phenofiles)
 
-    return save_phenotypes_data(
-        conn,
-        table,
-        __build_dataitems__(
-            filetype,
-            phenofiles,
-            control_data,
-            samples,
-            dataidmap,
-            pheno_name2id))
+    try:
+        logger.debug("Attempt quick save with `LOAD … INFILE`.")
+        return quick_save_phenotypes_data(
+            conn,
+            table,
+            __build_dataitems__(
+                filetype,
+                phenofiles,
+                control_data,
+                samples,
+                dataidmap,
+                pheno_name2id),
+            filesdir)
+    except Exception as _exc:
+        logger.debug("Could not use `LOAD … INFILE`, using raw query",
+                     exc_info=True)
+        import time;time.sleep(60)
+        return save_phenotypes_data(
+            conn,
+            table,
+            __build_dataitems__(
+                filetype,
+                phenofiles,
+                control_data,
+                samples,
+                dataidmap,
+                pheno_name2id))
 
 
 save_pheno_data = partial(save_numeric_data,
diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py
index e47a016..20b8e77 100644
--- a/uploader/phenotypes/models.py
+++ b/uploader/phenotypes/models.py
@@ -1,5 +1,7 @@
 """Database and utility functions for phenotypes."""
 import logging
+import tempfile
+from pathlib import Path
 from functools import reduce
 from datetime import datetime
 from typing import Optional, Iterable
@@ -13,6 +15,16 @@ from gn_libs.mysqldb import debug_query
 logger = logging.getLogger(__name__)
 
 
+__PHENO_DATA_TABLES__ = {
+    "PublishData": {
+        "table": "PublishData", "valueCol": "value", "DataIdCol": "Id"},
+    "PublishSE": {
+        "table": "PublishSE", "valueCol": "error", "DataIdCol": "DataId"},
+    "NStrain": {
+        "table": "NStrain", "valueCol": "count", "DataIdCol": "DataId"}
+}
+
+
 def datasets_by_population(
         conn: mdb.Connection,
         species_id: int,
@@ -326,14 +338,7 @@ def save_phenotypes_data(
         data: Iterable[dict]
 ) -> int:
     """Save new phenotypes data into the database."""
-    _table_details = {
-        "PublishData": {
-            "table": "PublishData", "valueCol": "value", "DataIdCol": "Id"},
-        "PublishSE": {
-            "table": "PublishSE", "valueCol": "error", "DataIdCol": "DataId"},
-        "NStrain": {
-            "table": "PublishData", "valueCol": "count", "DataIdCol": "DataId"}
-    }[table]
+    _table_details = __PHENO_DATA_TABLES__[table]
     with conn.cursor(cursorclass=DictCursor) as cursor:
         _count = 0
         while True:
@@ -347,7 +352,7 @@ def save_phenotypes_data(
                 (f"INSERT INTO {_table_details['table']}"
                  f"({_table_details['DataIdCol']}, StrainId, {_table_details['valueCol']}) "
                  "VALUES "
-                 f"(%(data_id)s, %(sample_id)s, %({_table_details['valueCol']})s) "),
+                 f"(%(data_id)s, %(sample_id)s, %(value)s) "),
                 tuple(batch))
             debug_query(cursor, logger)
             _count = _count + len(batch)
@@ -355,3 +360,36 @@ def save_phenotypes_data(
 
     logger.debug("Saved a total of %s data rows", _count)
     return _count
+
+
+def quick_save_phenotypes_data(
+        conn: mdb.Connection,
+        table: str,
+        dataitems: Iterable[dict],
+        tmpdir: Path
+) -> int:
+    """Save data items to the database, but using """
+    _table_details = __PHENO_DATA_TABLES__[table]
+    with (tempfile.NamedTemporaryFile(
+            prefix=f"{table}_data", mode="wt", dir=tmpdir) as tmpfile,
+          conn.cursor(cursorclass=DictCursor) as cursor):
+        _count = 0
+        console.debug("Write data rows to text file.")
+        for row in dataitems:
+            tmpfile.write(
+                f'{row["data_id"]}\t{row["sample_id"]}\t{row["value"]}\n')
+            _count = _count + 1
+        tmpfile.flush()
+
+        console.debug("Load text file into database (table: %s)",
+                      _table_details["table"])
+        cursor.execute(
+            f"LOAD DATA LOCAL INFILE '{tmpfile.name}' "
+            f"INTO TABLE {_table_details['table']} "
+            "("
+            f"{_table_details['DataIdCol']}, "
+            "StrainId, "
+            f"{_table_details['valueCol']}"
+            ")")
+        debug_query(cursor, logger)
+        return _count