aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--scripts/load_phenotypes_to_db.py75
-rw-r--r--uploader/static/js/debug.js40
2 files changed, 78 insertions, 37 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index b78c648..3a0df77 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -3,6 +3,7 @@ import json
import logging
import argparse
from pathlib import Path
+from zipfile import ZipFile
from typing import Any, Union
from MySQLdb.cursors import Cursor, DictCursor
@@ -10,8 +11,10 @@ from MySQLdb.cursors import Cursor, DictCursor
from gn_libs import jobs, mysqldb, sqlite3
from r_qtl import r_qtl2 as rqtl2
-from uploader.phenotypes.models import save_phenotypes_data
from uploader.samples.models import samples_by_species_and_population
+from uploader.phenotypes.models import (
+ save_phenotypes_data,
+ create_new_phenotypes)
from uploader.publications.models import (
create_new_publications,
fetch_publication_by_id)
@@ -25,18 +28,18 @@ logger = logging.getLogger(__name__)
def save_phenotypes(
cursor: mysqldb.Connection,
- control_data: dict[str, Any]
+ control_data: dict[str, Any],
+ filesdir: Path
) -> tuple[dict, ...]:
"""Read `phenofiles` and save the phenotypes therein."""
logger.info("Saving new phenotypes.")
- logger.debug("Processing %s 'pheno' files.", len(phenofiles))
## TODO: Replace with something like this: ##
# phenofiles = control_data["phenocovar"] + control_data.get(
# "gn-metadata", {}).get("pheno", [])
#
# This is meant to load (and merge) data from the "phenocovar" and
# "gn-metadata -> pheno" files into a single collection of phenotypes.
- phenofiles = control_data["phenocovar"]
+ phenofiles = tuple(filesdir.joinpath(_file) for _file in control_data["phenocovar"])
if len(phenofiles) <= 0:
return tuple()
@@ -47,9 +50,9 @@ def save_phenotypes(
_file,
build_line_splitter(control_data),
build_line_joiner(control_data))
- for _file in control_data["phenocovar"])
+ for _file in phenofiles)
- _headers = rqtl2.read_csv_file_headers(control_data["phenocovar"][0],
+ _headers = rqtl2.read_csv_file_headers(phenofiles[0],
control_data["phenocovar_transposed"],
control_data["sep"],
control_data["comment.char"])
@@ -66,17 +69,19 @@ def __fetch_next_dataid__(conn: mysqldb.Connection) -> int:
with conn.cursor(cursorclass=DictCursor) as cursor:
cursor.execute(
"SELECT MAX(DataId) AS CurrentMaxDataId FROM PublishXRef")
- return int(cursor.fetchone()) + 1
+ return int(cursor.fetchone()["CurrentMaxDataId"]) + 1
def save_pheno_data(
conn: mysqldb.Connection,
dataidmap: dict,
samples: tuple[dict, ...],
- control_data: dict
+ control_data: dict,
+ filesdir: Path
):
"""Read the `datafiles` and save the data in the database."""
- phenofiles = control_data["pheno"]
+ phenofiles = tuple(
+ filesdir.joinpath(_file) for file in control_data["pheno"])
if len(phenofiles) <= 0:
return tuple()
@@ -87,9 +92,9 @@ def save_pheno_data(
_file,
build_line_splitter(control_data),
build_line_joiner(control_data))
- for _file in control_data["pheno"])
+ for _file in phenofiles)
- _headers = rqtl2.read_csv_file_headers(control_data["pheno"][0],
+ _headers = rqtl2.read_csv_file_headers(phenofiles[0],
control_data["pheno_transposed"],
control_data["sep"],
control_data["comment.char"])
@@ -108,7 +113,7 @@ def save_pheno_data(
return save_phenotypes_data(
conn,
- "PublishData"
+ "PublishData",
(item for item in
(row_to_dataitems(dict(zip(_headers, line))) for filecontent
in (rqtl2.read_csv_file(path) for path in phenofiles)
@@ -155,7 +160,7 @@ def save_phenotype_se(
return save_phenotypes_data(
conn,
- "PublishSE"
+ "PublishSE",
(item for item in
(row_to_dataitems(dict(zip(_headers, line))) for filecontent
in (rqtl2.read_csv_file(path) for path in sefiles)
@@ -202,7 +207,7 @@ def save_phenotype_n(
return save_phenotypes_data(
conn,
- "NStrain"
+ "NStrain",
(item for item in
(row_to_dataitems(dict(zip(_headers, line))) for filecontent
in (rqtl2.read_csv_file(path) for path in sefiles)
@@ -243,10 +248,14 @@ def load_data(conn, job):
conn, int(_job_metadata.get("publicationid", "0"))) or {"Id": 0}
# 2. Save all new phenotypes:
# -> return phenotype IDs
- _control_data = rqtl.control_data(job["job-metadata"]["bundle-file"])
+ bundle = Path(_job_metadata["bundle_file"])
+ _control_data = rqtl2.control_data(bundle)
+ logger.info("Extracting the zipped bundle of files.")
+ _outdir = Path(bundle.parent, f"bundle_{bundle.stem}")
+ with ZipFile(str(bundle), "r") as zfile:
+ _files = rqtl2.extract(zfile, _outdir)
logger.info("Saving basic phenotype data.")
-
- _phenos = save_phenotypes(cursor, _control_data)
+ _phenos = save_phenotypes(conn, _control_data, _outdir)
dataidmap = {
row["phenotype_id"]: {
"population_id": population["Id"],
@@ -262,14 +271,14 @@ def load_data(conn, job):
for row in samples_by_species_and_population(
conn, species["SpeciesId"], population["PopulationId"])}
# b. Save all the data items (DataIds are vibes), return new IDs
- data = save_pheno_data(conn, dataidmap, samples, _control_data)
+ data = save_pheno_data(conn, dataidmap, samples, _control_data, _outdir)
# 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef
xrefs = cross_reference_phenotypes_publications_and_data(
conn, tuple(dataidmap.values()))
# 5. If standard errors and N exist, save them too
# (use IDs returned in `3. b.` above).
- data_se = save_phenotypes_se(conn, dataidmap, samples, _control_data)
- data_n = save_phenotypes_n(conn, dataidmap, samples, _control_data)
+ data_se = save_phenotypes_se(conn, dataidmap, samples, _control_data, _outdir)
+ data_n = save_phenotypes_n(conn, dataidmap, samples, _control_data, _outdir)
# 6. If entirely new data, update authorisations (break this down)
update_auth(_user, _species, _population, _dataset, _phenos)
return 0
@@ -314,29 +323,21 @@ if __name__ == "__main__":
# How do you check for a table lock?
# https://oracle-base.com/articles/mysql/mysql-identify-locked-tables
# `SHOW OPEN TABLES LIKE 'Publish%';`
- logger.debug(
- ("Locking database tables for the connection:"
- "\n\t- %s\n\t- %s\n\t- %s\n\t- %s\n\t- %s\n"),
+ _db_tables_ = (
"Publication",
+ "Phenotype",
"PublishXRef",
"PublishData",
"PublishSE",
"NStrain")
+
+ logger.debug(
+ ("Locking database tables for the connection:" +
+ "".join("\n\t- %s" for _ in _db_tables_) + "\n"),
+ *_db_tables_)
cursor.execute(# Lock the tables to avoid race conditions
- "LOCK TABLES "
- "Publication WRITE, "
- "PublishXRef WRITE, "
- "PublishData WRITE, "
- "PublishSE WRITE, "
- "NStrain WRITE")
- try:
- return load_data(conn, jobs.job(jobs_conn, args.job_id))
- except jobs.jobs.JobNotFound as _jne:
- logger.error("Could not find job with ID: %s", args.job_id)
- except Exception as _exc:
- logger.error("Loading failed with general exception!",
- exc_info=True,
- stack_info=True)
+ "LOCK TABLES " + ", ".join(
+ f"{_table} WRITE" for _table in _db_tables_))
logger.debug("Unlocking all database tables.")
cursor.execute("UNLOCK TABLES")
diff --git a/uploader/static/js/debug.js b/uploader/static/js/debug.js
new file mode 100644
index 0000000..eb01209
--- /dev/null
+++ b/uploader/static/js/debug.js
@@ -0,0 +1,40 @@
+/**
+ * The entire purpose of this function is for use to debug values inline
+ * without changing the flow of the code too much.
+ *
+ * This **MUST** be a non-arrow function to allow access to the `arguments`
+ * object.
+ *
+ * This function expects at least one argument.
+ *
+ * If more than one argument is provided, then:
+ * a) the last argument is considered the value, and will be returned
+ * b) all other arguments will be converted to string and output
+ *
+ * If only one argument is provided, it is considered the value, and will be
+ * returned.
+ *
+ * Zero arguments is an error condition.
+ **/
+function __pk__(val) {
+ /* Handle zero arguments */
+ if (arguments.length < 1) {
+ throw new Error("Invalid arguments: Expected at least one argument.");
+ }
+
+ msg = "/********** DEBUG **********/";
+ if (arguments.length > 1) {
+ msg = Array.from(
+ arguments
+ ).slice(
+ 0,
+ arguments.length - 1
+ ).map((val) => {
+ return String(val);
+ }).join("; ")
+ }
+
+ value = arguments[arguments.length - 1];
+ console.debug("/********** " + msg + " **********/", value);
+ return value;
+}