2 files changed, 78 insertions, 37 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index b78c648..3a0df77 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -3,6 +3,7 @@ import json
 import logging
 import argparse
 from pathlib import Path
+from zipfile import ZipFile
 from typing import Any, Union
 
 from MySQLdb.cursors import Cursor, DictCursor
@@ -10,8 +11,10 @@ from MySQLdb.cursors import Cursor, DictCursor
 from gn_libs import jobs, mysqldb, sqlite3
 
 from r_qtl import r_qtl2 as rqtl2
-from uploader.phenotypes.models import save_phenotypes_data
 from uploader.samples.models import samples_by_species_and_population
+from uploader.phenotypes.models import (
+    save_phenotypes_data,
+    create_new_phenotypes)
 from uploader.publications.models import (
     create_new_publications,
     fetch_publication_by_id)
@@ -25,18 +28,18 @@ logger = logging.getLogger(__name__)
 
 def save_phenotypes(
         cursor: mysqldb.Connection,
-        control_data: dict[str, Any]
+        control_data: dict[str, Any],
+        filesdir: Path
 ) -> tuple[dict, ...]:
     """Read `phenofiles` and save the phenotypes therein."""
     logger.info("Saving new phenotypes.")
-    logger.debug("Processing %s 'pheno' files.", len(phenofiles))
     ## TODO: Replace with something like this: ##
     # phenofiles = control_data["phenocovar"] + control_data.get(
     #     "gn-metadata", {}).get("pheno", [])
     #
     # This is meant to load (and merge) data from the "phenocovar" and
     # "gn-metadata -> pheno" files into a single collection of phenotypes.
-    phenofiles = control_data["phenocovar"]
+    phenofiles = tuple(filesdir.joinpath(_file) for _file in control_data["phenocovar"])
     if len(phenofiles) <= 0:
         return tuple()
 
@@ -47,9 +50,9 @@ def save_phenotypes(
                 _file,
                 build_line_splitter(control_data),
                 build_line_joiner(control_data))
-            for _file in control_data["phenocovar"])
+            for _file in phenofiles)
 
-    _headers = rqtl2.read_csv_file_headers(control_data["phenocovar"][0],
+    _headers = rqtl2.read_csv_file_headers(phenofiles[0],
                                            control_data["phenocovar_transposed"],
                                            control_data["sep"],
                                            control_data["comment.char"])
@@ -66,17 +69,19 @@ def __fetch_next_dataid__(conn: mysqldb.Connection) -> int:
     with conn.cursor(cursorclass=DictCursor) as cursor:
         cursor.execute(
             "SELECT MAX(DataId) AS CurrentMaxDataId FROM PublishXRef")
-        return int(cursor.fetchone()) + 1
+        return int(cursor.fetchone()["CurrentMaxDataId"]) + 1
 
 
 def save_pheno_data(
         conn: mysqldb.Connection,
         dataidmap: dict,
         samples: tuple[dict, ...],
-        control_data: dict
+        control_data: dict,
+        filesdir: Path
 ):
     """Read the `datafiles` and save the data in the database."""
-    phenofiles = control_data["pheno"]
+    phenofiles = tuple(
+        filesdir.joinpath(_file) for file in control_data["pheno"])
     if len(phenofiles) <= 0:
         return tuple()
 
@@ -87,9 +92,9 @@ def save_pheno_data(
                 _file,
                 build_line_splitter(control_data),
                 build_line_joiner(control_data))
-            for _file in control_data["pheno"])
+            for _file in phenofiles)
 
-    _headers = rqtl2.read_csv_file_headers(control_data["pheno"][0],
+    _headers = rqtl2.read_csv_file_headers(phenofiles[0],
                                            control_data["pheno_transposed"],
                                            control_data["sep"],
                                            control_data["comment.char"])
@@ -108,7 +113,7 @@ def save_pheno_data(
 
     return save_phenotypes_data(
         conn,
-        "PublishData"
+        "PublishData",
         (item for item in
          (row_to_dataitems(dict(zip(_headers, line))) for filecontent
           in (rqtl2.read_csv_file(path) for path in phenofiles)
@@ -155,7 +160,7 @@ def save_phenotype_se(
 
     return save_phenotypes_data(
         conn,
-        "PublishSE"
+        "PublishSE",
         (item for item in
          (row_to_dataitems(dict(zip(_headers, line))) for filecontent
           in (rqtl2.read_csv_file(path) for path in sefiles)
@@ -202,7 +207,7 @@ def save_phenotype_n(
 
     return save_phenotypes_data(
         conn,
-        "NStrain"
+        "NStrain",
         (item for item in
          (row_to_dataitems(dict(zip(_headers, line))) for filecontent
           in (rqtl2.read_csv_file(path) for path in sefiles)
@@ -243,10 +248,14 @@ def load_data(conn, job):
         conn, int(_job_metadata.get("publicationid", "0"))) or {"Id": 0}
     # 2. Save all new phenotypes:
     #     -> return phenotype IDs
-    _control_data = rqtl.control_data(job["job-metadata"]["bundle-file"])
+    bundle = Path(_job_metadata["bundle_file"])
+    _control_data = rqtl2.control_data(bundle)
+    logger.info("Extracting the zipped bundle of files.")
+    _outdir = Path(bundle.parent, f"bundle_{bundle.stem}")
+    with ZipFile(str(bundle), "r") as zfile:
+        _files = rqtl2.extract(zfile, _outdir)
     logger.info("Saving basic phenotype data.")
-
-    _phenos = save_phenotypes(cursor, _control_data)
+    _phenos = save_phenotypes(conn, _control_data, _outdir)
     dataidmap = {
         row["phenotype_id"]: {
             "population_id": population["Id"],
@@ -262,14 +271,14 @@ def load_data(conn, job):
         for row in samples_by_species_and_population(
                 conn, species["SpeciesId"], population["PopulationId"])}
     #    b. Save all the data items (DataIds are vibes), return new IDs
-    data = save_pheno_data(conn, dataidmap, samples, _control_data)
+    data = save_pheno_data(conn, dataidmap, samples, _control_data, _outdir)
     # 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef
     xrefs = cross_reference_phenotypes_publications_and_data(
         conn, tuple(dataidmap.values()))
     # 5. If standard errors and N exist, save them too
     #    (use IDs returned in `3. b.` above).
-    data_se = save_phenotypes_se(conn, dataidmap, samples, _control_data)
-    data_n = save_phenotypes_n(conn, dataidmap, samples, _control_data)
+    data_se = save_phenotypes_se(conn, dataidmap, samples, _control_data, _outdir)
+    data_n = save_phenotypes_n(conn, dataidmap, samples, _control_data, _outdir)
     # 6. If entirely new data, update authorisations (break this down)
     update_auth(_user, _species, _population, _dataset, _phenos)
     return 0
@@ -314,29 +323,21 @@ if __name__ == "__main__":
             #     How do you check for a table lock?
             #     https://oracle-base.com/articles/mysql/mysql-identify-locked-tables
             #     `SHOW OPEN TABLES LIKE 'Publish%';`
-            logger.debug(
-                ("Locking database tables for the connection:"
-                 "\n\t- %s\n\t- %s\n\t- %s\n\t- %s\n\t- %s\n"),
+            _db_tables_ = (
                 "Publication",
+                "Phenotype",
                 "PublishXRef",
                 "PublishData",
                 "PublishSE",
                 "NStrain")
+
+            logger.debug(
+                ("Locking database tables for the connection:" +
+                 "".join("\n\t- %s" for _ in _db_tables_) + "\n"),
+                *_db_tables_)
             cursor.execute(# Lock the tables to avoid race conditions
-                "LOCK TABLES "
-                "Publication WRITE, "
-                "PublishXRef WRITE, "
-                "PublishData WRITE, "
-                "PublishSE WRITE, "
-                "NStrain WRITE")
-            try:
-                return load_data(conn, jobs.job(jobs_conn, args.job_id))
-            except jobs.jobs.JobNotFound as _jne:
-                logger.error("Could not find job with ID: %s", args.job_id)
-            except Exception as _exc:
-                logger.error("Loading failed with general exception!",
-                             exc_info=True,
-                             stack_info=True)
+                "LOCK TABLES " + ", ".join(
+                    f"{_table} WRITE" for _table in _db_tables_))
 
             logger.debug("Unlocking all database tables.")
             cursor.execute("UNLOCK TABLES")
diff --git a/uploader/static/js/debug.js b/uploader/static/js/debug.js
new file mode 100644
index 0000000..eb01209
--- /dev/null
+++ b/uploader/static/js/debug.js
@@ -0,0 +1,40 @@
+/**
+ * The entire purpose of this function is for use to debug values inline
+ * without changing the flow of the code too much.
+ *
+ * This **MUST** be a non-arrow function to allow access to the `arguments`
+ * object.
+ *
+ * This function expects at least one argument.
+ *
+ * If more than one argument is provided, then:
+ * a) the last argument is considered the value, and will be returned
+ * b) all other arguments will be converted to string and output
+ *
+ * If only one argument is provided, it is considered the value, and will be
+ * returned.
+ *
+ * Zero arguments is an error condition.
+ **/
+function __pk__(val) {
+    /* Handle zero arguments */
+    if (arguments.length < 1) {
+        throw new Error("Invalid arguments: Expected at least one argument.");
+    }
+
+    msg = "/********** DEBUG **********/";
+    if (arguments.length > 1) {
+        msg = Array.from(
+            arguments
+        ).slice(
+            0,
+            arguments.length - 1
+        ).map((val) => {
+            return String(val);
+        }).join("; ")
+    }
+
+    value = arguments[arguments.length - 1];
+    console.debug("/********** " + msg + " **********/", value);
+    return value;
+}