aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/insert_data.py4
-rw-r--r--scripts/insert_samples.py9
-rw-r--r--scripts/process_rqtl2_bundle.py8
-rw-r--r--scripts/qc.py2
-rw-r--r--scripts/qc_on_rqtl2_bundle.py24
-rw-r--r--scripts/qcapp_wsgi.py4
-rw-r--r--scripts/rqtl2/entry.py6
-rw-r--r--scripts/rqtl2/install_genotypes.py117
-rw-r--r--scripts/validate_file.py4
-rw-r--r--scripts/worker.py4
10 files changed, 108 insertions, 74 deletions
diff --git a/scripts/insert_data.py b/scripts/insert_data.py
index 1465348..4b2e5f3 100644
--- a/scripts/insert_data.py
+++ b/scripts/insert_data.py
@@ -14,8 +14,8 @@ from MySQLdb.cursors import DictCursor
from functional_tools import take
from quality_control.file_utils import open_file
-from qc_app.db_utils import database_connection
-from qc_app.check_connections import check_db, check_redis
+from uploader.db_utils import database_connection
+from uploader.check_connections import check_db, check_redis
# Set up logging
stderr_handler = logging.StreamHandler(stream=sys.stderr)
diff --git a/scripts/insert_samples.py b/scripts/insert_samples.py
index 8431462..e3577b6 100644
--- a/scripts/insert_samples.py
+++ b/scripts/insert_samples.py
@@ -7,10 +7,11 @@ import argparse
import MySQLdb as mdb
from redis import Redis
-from qc_app.db_utils import database_connection
-from qc_app.check_connections import check_db, check_redis
-from qc_app.db import species_by_id, population_by_id
-from qc_app.samples import (
+from uploader.db_utils import database_connection
+from uploader.check_connections import check_db, check_redis
+from uploader.species.models import species_by_id
+from uploader.population.models import population_by_id
+from uploader.samples.models import (
save_samples_data,
read_samples_file,
cross_reference_samples)
diff --git a/scripts/process_rqtl2_bundle.py b/scripts/process_rqtl2_bundle.py
index 4da3936..20cfd3b 100644
--- a/scripts/process_rqtl2_bundle.py
+++ b/scripts/process_rqtl2_bundle.py
@@ -13,13 +13,13 @@ from redis import Redis
from functional_tools import take
-import r_qtl.errors as rqe
import r_qtl.r_qtl2 as rqtl2
import r_qtl.r_qtl2_qc as rqc
+import r_qtl.exceptions as rqe
-from qc_app import jobs
-from qc_app.db_utils import database_connection
-from qc_app.check_connections import check_db, check_redis
+from uploader import jobs
+from uploader.db_utils import database_connection
+from uploader.check_connections import check_db, check_redis
from scripts.cli_parser import init_cli_parser
from scripts.redis_logger import setup_redis_logger
diff --git a/scripts/qc.py b/scripts/qc.py
index e8573a9..6de051f 100644
--- a/scripts/qc.py
+++ b/scripts/qc.py
@@ -11,7 +11,7 @@ from quality_control.utils import make_progress_calculator
from quality_control.errors import InvalidValue, DuplicateHeading
from quality_control.parsing import FileType, strain_names, collect_errors
-from qc_app.db_utils import database_connection
+from uploader.db_utils import database_connection
from .cli_parser import init_cli_parser
diff --git a/scripts/qc_on_rqtl2_bundle.py b/scripts/qc_on_rqtl2_bundle.py
index 40809b7..fc95d13 100644
--- a/scripts/qc_on_rqtl2_bundle.py
+++ b/scripts/qc_on_rqtl2_bundle.py
@@ -16,13 +16,13 @@ from redis import Redis
from quality_control.errors import InvalidValue
from quality_control.checks import decimal_points_error
-from qc_app import jobs
-from qc_app.db_utils import database_connection
-from qc_app.check_connections import check_db, check_redis
+from uploader import jobs
+from uploader.db_utils import database_connection
+from uploader.check_connections import check_db, check_redis
-from r_qtl import errors as rqe
from r_qtl import r_qtl2 as rqtl2
from r_qtl import r_qtl2_qc as rqc
+from r_qtl import exceptions as rqe
from r_qtl import fileerrors as rqfe
from scripts.process_rqtl2_bundle import parse_job
@@ -105,7 +105,7 @@ def retrieve_errors_with_progress(rconn: Redis,#pylint: disable=[too-many-locals
__update_processed__(value)
rconn.hset(fqjobid, f"{filetype}-linecount", count)
- except rqe.MissingFileError:
+ except rqe.MissingFileException:
fname = cdata.get(filetype)
yield rqfe.MissingFile(filetype, fname, (
f"The file '{fname}' does not exist in the bundle despite it being "
@@ -133,7 +133,7 @@ def qc_geno_errors(rconn, fqjobid, _dburi, _speciesid, zfile, logger) -> bool:
def fetch_db_geno_samples(conn: mdb.Connection, speciesid: int) -> tuple[str, ...]:
"""Fetch samples/cases/individuals from the database."""
- samples = set()
+ samples = set()# type: ignore[var-annotated]
with conn.cursor() as cursor:
cursor.execute("SELECT Name, Name2 from Strain WHERE SpeciesId=%s",
(speciesid,))
@@ -191,12 +191,13 @@ def check_pheno_samples(
return allerrors
-def qc_pheno_errors(rconn, fqjobid, dburi, speciesid, zfile, logger) -> bool:
+def qc_pheno_errors(# pylint: disable=[too-many-arguments]
+ rconn, fqjobid, dburi, speciesid, zfile, logger) -> bool:
"""Check for errors in `pheno` file(s)."""
cdata = rqtl2.control_data(zfile)
if "pheno" in cdata:
logger.info("Checking for errors in the 'pheno' fileā€¦")
- perrs = tuple()
+ perrs = tuple()# type: ignore[var-annotated]
with database_connection(dburi) as dbconn:
perrs = check_pheno_samples(
dbconn, speciesid, zfile.filename, logger) + tuple(
@@ -216,7 +217,8 @@ def qc_pheno_errors(rconn, fqjobid, dburi, speciesid, zfile, logger) -> bool:
return False
-def qc_phenose_errors(rconn, fqjobid, dburi, speciesid, zfile, logger) -> bool:
+def qc_phenose_errors(# pylint: disable=[too-many-arguments]
+ rconn, fqjobid, _dburi, _speciesid, zfile, logger) -> bool:
"""Check for errors in `phenose` file(s)."""
cdata = rqtl2.control_data(zfile)
if "phenose" in cdata:
@@ -258,7 +260,9 @@ def run_qc(rconn: Redis,
if qc_missing_files(rconn, fqjobid, zfile, logger):
return 1
- def with_zipfile(rconn, fqjobid, dbconn, speciesid, filename, logger, func):
+ def with_zipfile(# pylint: disable=[too-many-arguments]
+ rconn, fqjobid, dbconn, speciesid, filename, logger, func
+ ):
with ZipFile(filename, "r") as zfile:
return func(rconn, fqjobid, dbconn, speciesid, zfile, logger)
diff --git a/scripts/qcapp_wsgi.py b/scripts/qcapp_wsgi.py
index 349c006..fe77031 100644
--- a/scripts/qcapp_wsgi.py
+++ b/scripts/qcapp_wsgi.py
@@ -5,8 +5,8 @@ from logging import getLogger, StreamHandler
from flask import Flask
-from qc_app import create_app
-from qc_app.check_connections import check_db, check_redis
+from uploader import create_app
+from uploader.check_connections import check_db, check_redis
def setup_logging(appl: Flask) -> Flask:
"""Setup appropriate logging paradigm depending on environment."""
diff --git a/scripts/rqtl2/entry.py b/scripts/rqtl2/entry.py
index 93fc130..b7fb68e 100644
--- a/scripts/rqtl2/entry.py
+++ b/scripts/rqtl2/entry.py
@@ -6,9 +6,9 @@ from argparse import Namespace
from redis import Redis
from MySQLdb import Connection
-from qc_app import jobs
-from qc_app.db_utils import database_connection
-from qc_app.check_connections import check_db, check_redis
+from uploader import jobs
+from uploader.db_utils import database_connection
+from uploader.check_connections import check_db, check_redis
from scripts.redis_logger import setup_redis_logger
diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py
index 9f8bf03..6b89142 100644
--- a/scripts/rqtl2/install_genotypes.py
+++ b/scripts/rqtl2/install_genotypes.py
@@ -19,10 +19,13 @@ from scripts.rqtl2.entry import build_main
from scripts.rqtl2.cli_parser import add_common_arguments
from scripts.cli_parser import init_cli_parser, add_global_data_arguments
-def insert_markers(dbconn: mdb.Connection,
- speciesid: int,
- markers: tuple[str, ...],
- pmapdata: Optional[Iterator[dict]]) -> int:
+def insert_markers(
+ dbconn: mdb.Connection,
+ speciesid: int,
+ markers: tuple[str, ...],
+ pmapdata: Optional[Iterator[dict]],
+ _logger: Logger
+) -> int:
"""Insert genotype and genotype values into the database."""
mdata = reduce(#type: ignore[var-annotated]
lambda acc, row: ({#type: ignore[arg-type, return-value]
@@ -48,9 +51,12 @@ def insert_markers(dbconn: mdb.Connection,
} for marker in markers}.values()))
return cursor.rowcount
-def insert_individuals(dbconn: mdb.Connection,
- speciesid: int,
- individuals: tuple[str, ...]) -> int:
+def insert_individuals(
+ dbconn: mdb.Connection,
+ speciesid: int,
+ individuals: tuple[str, ...],
+ _logger: Logger
+) -> int:
"""Insert individuals/samples into the database."""
with dbconn.cursor() as cursor:
cursor.executemany(
@@ -61,10 +67,13 @@ def insert_individuals(dbconn: mdb.Connection,
for individual in individuals))
return cursor.rowcount
-def cross_reference_individuals(dbconn: mdb.Connection,
- speciesid: int,
- populationid: int,
- individuals: tuple[str, ...]) -> int:
+def cross_reference_individuals(
+ dbconn: mdb.Connection,
+ speciesid: int,
+ populationid: int,
+ individuals: tuple[str, ...],
+ _logger: Logger
+) -> int:
"""Cross reference any inserted individuals."""
with dbconn.cursor(cursorclass=DictCursor) as cursor:
paramstr = ", ".join(["%s"] * len(individuals))
@@ -80,11 +89,13 @@ def cross_reference_individuals(dbconn: mdb.Connection,
tuple(ids))
return cursor.rowcount
-def insert_genotype_data(dbconn: mdb.Connection,
- speciesid: int,
- genotypes: tuple[dict, ...],
- individuals: tuple[str, ...]) -> tuple[
- int, tuple[dict, ...]]:
+def insert_genotype_data(
+ dbconn: mdb.Connection,
+ speciesid: int,
+ genotypes: tuple[dict, ...],
+ individuals: tuple[str, ...],
+ _logger: Logger
+) -> tuple[int, tuple[dict, ...]]:
"""Insert the genotype data values into the database."""
with dbconn.cursor(cursorclass=DictCursor) as cursor:
paramstr = ", ".join(["%s"] * len(individuals))
@@ -120,11 +131,14 @@ def insert_genotype_data(dbconn: mdb.Connection,
"markerid": row["markerid"]
} for row in data)
-def cross_reference_genotypes(dbconn: mdb.Connection,
- speciesid: int,
- datasetid: int,
- dataids: tuple[dict, ...],
- gmapdata: Optional[Iterator[dict]]) -> int:
+def cross_reference_genotypes(
+ dbconn: mdb.Connection,
+ speciesid: int,
+ datasetid: int,
+ dataids: tuple[dict, ...],
+ gmapdata: Optional[Iterator[dict]],
+ _logger: Logger
+) -> int:
"""Cross-reference the data to the relevant dataset."""
_rows, markers, mdata = reduce(#type: ignore[var-annotated]
lambda acc, row: (#type: ignore[return-value,arg-type]
@@ -140,30 +154,43 @@ def cross_reference_genotypes(dbconn: mdb.Connection,
(tuple(), tuple(), {}))
with dbconn.cursor(cursorclass=DictCursor) as cursor:
- paramstr = ", ".join(["%s"] * len(markers))
- cursor.execute("SELECT Id, Name FROM Geno "
- f"WHERE SpeciesId=%s AND Name IN ({paramstr})",
- (speciesid,) + markers)
- markersdict = {row["Id"]: row["Name"] for row in cursor.fetchall()}
- cursor.executemany(
+ markersdict = {}
+ if len(markers) > 0:
+ paramstr = ", ".join(["%s"] * len(markers))
+ insertparams = (speciesid,) + markers
+ selectquery = ("SELECT Id, Name FROM Geno "
+ f"WHERE SpeciesId=%s AND Name IN ({paramstr})")
+ _logger.debug(
+ "The select query was\n\t%s\n\nwith the parameters\n\t%s",
+ selectquery,
+ (speciesid,) + markers)
+ cursor.execute(selectquery, insertparams)
+ markersdict = {row["Id"]: row["Name"] for row in cursor.fetchall()}
+
+ insertquery = (
"INSERT INTO GenoXRef(GenoFreezeId, GenoId, DataId, cM) "
"VALUES(%(datasetid)s, %(markerid)s, %(dataid)s, %(pos)s) "
- "ON DUPLICATE KEY UPDATE GenoFreezeId=GenoFreezeId",
- tuple({
- **row,
- "datasetid": datasetid,
- "pos": mdata.get(markersdict.get(
- row.get("markerid"), {}), {}).get("pos")
- } for row in dataids))
+ "ON DUPLICATE KEY UPDATE GenoFreezeId=GenoFreezeId")
+ insertparams = tuple({
+ **row,
+ "datasetid": datasetid,
+ "pos": mdata.get(markersdict.get(
+ row.get("markerid"), "nosuchkey"), {}).get("pos")
+ } for row in dataids)
+ _logger.debug(
+ "The insert query was\n\t%s\n\nwith the parameters\n\t%s",
+ insertquery, insertparams)
+ cursor.executemany(insertquery, insertparams)
return cursor.rowcount
def install_genotypes(#pylint: disable=[too-many-arguments, too-many-locals]
dbconn: mdb.Connection,
- speciesid: int,
- populationid: int,
- datasetid: int,
- rqtl2bundle: Path,
- logger: Logger = getLogger()) -> int:
+ speciesid: int,
+ populationid: int,
+ datasetid: int,
+ rqtl2bundle: Path,
+ logger: Logger = getLogger(__name__)
+) -> int:
"""Load any existing genotypes into the database."""
count = 0
with ZipFile(str(rqtl2bundle.absolute()), "r") as zfile:
@@ -189,20 +216,22 @@ def install_genotypes(#pylint: disable=[too-many-arguments, too-many-locals]
speciesid,
tuple(key for key in batch[0].keys() if key != "id"),
(rqtl2.file_data(zfile, "pmap", cdata) if "pmap" in cdata
- else None))
+ else None),
+ logger)
individuals = tuple(row["id"] for row in batch)
- insert_individuals(dbconn, speciesid, individuals)
+ insert_individuals(dbconn, speciesid, individuals, logger)
cross_reference_individuals(
- dbconn, speciesid, populationid, individuals)
+ dbconn, speciesid, populationid, individuals, logger)
_num_rows, dataids = insert_genotype_data(
- dbconn, speciesid, batch, individuals)
+ dbconn, speciesid, batch, individuals, logger)
cross_reference_genotypes(
dbconn,
speciesid,
datasetid,
dataids,
(rqtl2.file_data(zfile, "gmap", cdata)
- if "gmap" in cdata else None))
+ if "gmap" in cdata else None),
+ logger)
count = count + len(batch)
except rqtl2.InvalidFormat as exc:
logger.error(str(exc))
diff --git a/scripts/validate_file.py b/scripts/validate_file.py
index 0028795..a40d7e7 100644
--- a/scripts/validate_file.py
+++ b/scripts/validate_file.py
@@ -12,8 +12,8 @@ from redis.exceptions import ConnectionError # pylint: disable=[redefined-builti
from quality_control.utils import make_progress_calculator
from quality_control.parsing import FileType, strain_names, collect_errors
-from qc_app import jobs
-from qc_app.db_utils import database_connection
+from uploader import jobs
+from uploader.db_utils import database_connection
from .cli_parser import init_cli_parser
from .qc import add_file_validation_arguments
diff --git a/scripts/worker.py b/scripts/worker.py
index 0eb9ea5..91b0332 100644
--- a/scripts/worker.py
+++ b/scripts/worker.py
@@ -11,8 +11,8 @@ from tempfile import TemporaryDirectory
from redis import Redis
-from qc_app import jobs
-from qc_app.check_connections import check_redis
+from uploader import jobs
+from uploader.check_connections import check_redis
def parse_args():
"Parse the command-line arguments"