aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/insert_data.py6
-rw-r--r--scripts/insert_samples.py4
-rw-r--r--scripts/load_phenotypes_to_db.py43
-rw-r--r--scripts/process_rqtl2_bundle.py4
-rw-r--r--scripts/qc_on_rqtl2_bundle.py4
-rw-r--r--scripts/redis_logger.py2
-rw-r--r--scripts/rqtl2/phenotypes_qc.py40
7 files changed, 44 insertions, 59 deletions
diff --git a/scripts/insert_data.py b/scripts/insert_data.py
index 67038f8..aec0251 100644
--- a/scripts/insert_data.py
+++ b/scripts/insert_data.py
@@ -197,7 +197,7 @@ def probeset_ids(dbconn: mdb.Connection,
break
yield row
-def insert_means(# pylint: disable=[too-many-locals, too-many-arguments]
+def insert_means(# pylint: disable=[too-many-locals, too-many-arguments, too-many-positional-arguments]
filepath: str, speciesid: int, platform_id: int, datasetid: int,
dbconn: mdb.Connection, rconn: Redis) -> int: # pylint: disable=[unused-argument]
"Insert the means/averages data into the database"
@@ -232,7 +232,7 @@ def insert_means(# pylint: disable=[too-many-locals, too-many-arguments]
item for sublist in
read_datavalues(filepath, headings, strains).values()
for item in sublist),
- start=(last_data_id(dbconn)+1)))
+ start=last_data_id(dbconn)+1))
with dbconn.cursor(cursorclass=DictCursor) as cursor:
while True:
means = tuple(take(the_means, 10000))
@@ -245,7 +245,7 @@ def insert_means(# pylint: disable=[too-many-locals, too-many-arguments]
cursor.executemany(xref_query, means)
return 0
-def insert_se(# pylint: disable = [too-many-arguments,too-many-locals]
+def insert_se(# pylint: disable = [too-many-arguments,too-many-locals, too-many-positional-arguments]
filepath: str, speciesid: int, platformid: int, datasetid: int,
dbconn: mdb.Connection, rconn: Redis) -> int: # pylint: disable=[unused-argument]
"Insert the standard-error data into the database"
diff --git a/scripts/insert_samples.py b/scripts/insert_samples.py
index 742c4ae..fc029f9 100644
--- a/scripts/insert_samples.py
+++ b/scripts/insert_samples.py
@@ -34,7 +34,7 @@ class SeparatorAction(argparse.Action):
"""Process the value passed in."""
setattr(namespace, self.dest, (chr(9) if values == "\\t" else values))
-def insert_samples(conn: mdb.Connection,# pylint: disable=[too-many-arguments]
+def insert_samples(conn: mdb.Connection,# pylint: disable=[too-many-arguments, too-many-positional-arguments]
rconn: Redis,# pylint: disable=[unused-argument]
speciesid: int,
populationid: int,
@@ -149,7 +149,7 @@ if __name__ == "__main__":
args.separator,
args.firstlineheading,
args.quotechar)
- except Exception as _exc:
+ except Exception as _exc:# pylint: disable=[broad-exception-caught]
print(traceback.format_exc(), file=sys.stderr)
return status_code
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index e4802b7..8855c4c 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -1,3 +1,4 @@
+"""Load phenotypes and their data provided in files into the database."""
import sys
import uuid
import json
@@ -44,12 +45,6 @@ def save_phenotypes(
filesdir: Path
) -> tuple[dict, ...]:
"""Read `phenofiles` and save the phenotypes therein."""
- ## TODO: Replace with something like this: ##
- # phenofiles = control_data["phenocovar"] + control_data.get(
- # "gn-metadata", {}).get("pheno", [])
- #
- # This is meant to load (and merge) data from the "phenocovar" and
- # "gn-metadata -> pheno" files into a single collection of phenotypes.
phenofiles = tuple(filesdir.joinpath(_file) for _file in control_data["phenocovar"])
if len(phenofiles) <= 0:
return tuple()
@@ -106,7 +101,6 @@ def __row_to_dataitems__(
def __build_dataitems__(
- filetype,
phenofiles,
control_data,
samples,
@@ -138,7 +132,7 @@ def __build_dataitems__(
if item["value"] is not None)
-def save_numeric_data(
+def save_numeric_data(# pylint: disable=[too-many-positional-arguments,too-many-arguments]
conn: mysqldb.Connection,
dataidmap: dict,
pheno_name2id: dict[str, int],
@@ -169,14 +163,13 @@ def save_numeric_data(
conn,
table,
__build_dataitems__(
- filetype,
phenofiles,
control_data,
samples,
dataidmap,
pheno_name2id),
filesdir)
- except Exception as _exc:
+ except Exception as _exc:# pylint: disable=[broad-exception-caught]
logger.debug("Could not use `LOAD … INFILE`, using raw query",
exc_info=True)
time.sleep(60)
@@ -184,7 +177,6 @@ def save_numeric_data(
conn,
table,
__build_dataitems__(
- filetype,
phenofiles,
control_data,
samples,
@@ -235,22 +227,15 @@ def cross_reference_phenotypes_publications_and_data(
return tuple()
-def update_auth(authserver, token, species, population, dataset, xrefdata):
+def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments,too-many-arguments]
+ authserver,
+ token,
+ species,
+ population,
+ dataset,
+ xrefdata):
"""Grant the user access to their data."""
- # TODO Call into the auth server to:
- # 1. Link the phenotypes with a user group
- # - fetch group: http://localhost:8081/auth/user/group
- # - link data to group: http://localhost:8081/auth/data/link/phenotype
- # - *might need code update in gn-auth: remove restriction, perhaps*
- # 2. Create resource (perhaps?)
- # - Get resource categories: http://localhost:8081/auth/resource/categories
- # - Create a new resource: http://localhost:80host:8081/auth/resource/create
- # - single resource for all phenotypes
- # - resource name from user, species, population, dataset, datetime?
- # - User will have "ownership" of resource by default
- # 3. Link data to the resource: http://localhost:8081/auth/resource/data/link
- # - Update code to allow linking multiple items in a single request
- _tries = 0 # TODO use this to limit how many tries before quiting and bailing
+ _tries = 0
_delay = 1
headers = {
"Authorization": f"Bearer {token}",
@@ -334,7 +319,7 @@ def update_auth(authserver, token, species, population, dataset, xrefdata):
resp.json())
return 1
- def __handle_success__(val):
+ def __handle_success__(_val):
logger.info(
"The authorisation for the data has been updated successfully.")
return 0
@@ -348,7 +333,7 @@ def update_auth(authserver, token, species, population, dataset, xrefdata):
).either(__handle_error__, __handle_success__)
-def load_data(conn: mysqldb.Connection, job: dict) -> int:
+def load_data(conn: mysqldb.Connection, job: dict) -> int:#pylint: disable=[too-many-locals]
"""Load the data attached in the given job."""
_job_metadata = job["metadata"]
# Steps
@@ -520,7 +505,7 @@ if __name__ == "__main__":
try:
sys.exit(main())
- except Exception as _exc:
+ except Exception as _exc:# pylint: disable=[broad-exception-caught]
logger.debug("Data loading failed… Halting!",
exc_info=True)
sys.exit(1)
diff --git a/scripts/process_rqtl2_bundle.py b/scripts/process_rqtl2_bundle.py
index 8b7a0fb..e2ce420 100644
--- a/scripts/process_rqtl2_bundle.py
+++ b/scripts/process_rqtl2_bundle.py
@@ -104,7 +104,7 @@ def process_bundle(dbconn: mdb.Connection,
rqtl2bundle=Path(meta["rqtl2-bundle-file"])),
logger)
if genoexit != 0:
- raise Exception("Processing 'geno' file failed.")
+ raise Exception("Processing 'geno' file failed.")# pylint: disable=[broad-exception-raised]
logger.debug(
"geno file processing completed successfully. (ExitCode: %s)",
genoexit)
@@ -122,7 +122,7 @@ def process_bundle(dbconn: mdb.Connection,
rqtl2bundle=Path(meta["rqtl2-bundle-file"])),
logger)
if phenoexit != 0:
- raise Exception("Processing 'pheno' file failed.")
+ raise Exception("Processing 'pheno' file failed.")# pylint: disable=[broad-exception-raised]
logger.debug(
"pheno file processing completed successfully. (ExitCode: %s)",
phenoexit)
diff --git a/scripts/qc_on_rqtl2_bundle.py b/scripts/qc_on_rqtl2_bundle.py
index 9f9248c..0207938 100644
--- a/scripts/qc_on_rqtl2_bundle.py
+++ b/scripts/qc_on_rqtl2_bundle.py
@@ -191,7 +191,7 @@ def check_pheno_samples(
return allerrors
-def qc_pheno_errors(# pylint: disable=[too-many-arguments]
+def qc_pheno_errors(# pylint: disable=[too-many-arguments, too-many-positional-arguments]
rconn, fqjobid, dburi, speciesid, zfile, logger) -> bool:
"""Check for errors in `pheno` file(s)."""
cdata = rqtl2.control_data(zfile)
@@ -260,7 +260,7 @@ def run_qc(rconn: Redis,
if qc_missing_files(rconn, fqjobid, zfile, logger):
return 1
- def with_zipfile(# pylint: disable=[too-many-arguments]
+ def with_zipfile(# pylint: disable=[too-many-arguments,too-many-positional-arguments]
rconn, fqjobid, dbconn, speciesid, filename, logger, func
):
with ZipFile(filename, "r") as zfile:
diff --git a/scripts/redis_logger.py b/scripts/redis_logger.py
index d3fde5f..a74e5e4 100644
--- a/scripts/redis_logger.py
+++ b/scripts/redis_logger.py
@@ -6,7 +6,7 @@ from redis import Redis
class RedisLogger(logging.Handler):
"""Log out to redis for our worker scripts"""
- def __init__(self,#pylint: disable=[too-many-arguments]
+ def __init__(self,#pylint: disable=[too-many-arguments, too-many-positional-arguments]
rconn: Redis,
fullyqualifiedjobid: str,
messageslistname: str,
diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py
index 5c89ca0..98b855f 100644
--- a/scripts/rqtl2/phenotypes_qc.py
+++ b/scripts/rqtl2/phenotypes_qc.py
@@ -41,15 +41,15 @@ logging.basicConfig(
"(%(pathname)s: %(lineno)d) %(message)s"))
logger = logging.getLogger(__MODULE__)
-def validate(phenobundle: Path, logger: Logger) -> dict:
+def validate(phenobundle: Path, alogger: Logger) -> dict:
"""Check that the bundle is generally valid"""
try:
rqc.validate_bundle(phenobundle)
except rqe.RQTLError as rqtlerr:
- # logger.error("Bundle file validation failed!", exc_info=True)
+ # alogger.error("Bundle file validation failed!", exc_info=True)
return {
"skip": True,
- "logger": logger,
+ "logger": alogger,
"phenobundle": phenobundle,
"errors": (" ".join(rqtlerr.args),)
}
@@ -57,20 +57,20 @@ def validate(phenobundle: Path, logger: Logger) -> dict:
"errors": tuple(),
"skip": False,
"phenobundle": phenobundle,
- "logger": logger
+ "logger": alogger
}
def check_for_mandatory_pheno_keys(
phenobundle: Path,
- logger: Logger,
+ alogger: Logger,
**kwargs
) -> dict:
"""Check that the mandatory keys exist for phenotypes."""
if kwargs.get("skip", False):
return {
**kwargs,
- "logger": logger,
+ "logger": alogger,
"phenobundle": phenobundle
}
@@ -81,7 +81,7 @@ def check_for_mandatory_pheno_keys(
for key in _mandatory_keys if key not in _cdata.keys())
return {
**kwargs,
- "logger": logger,
+ "logger": alogger,
"phenobundle": phenobundle,
"errors": _errors,
"skip": len(_errors) > 0
@@ -90,14 +90,14 @@ def check_for_mandatory_pheno_keys(
def check_for_averages_files(
phenobundle: Path,
- logger: Logger,
+ alogger: Logger,
**kwargs
) -> dict:
"""Check that averages files appear together"""
if kwargs.get("skip", False):
return {
**kwargs,
- "logger": logger,
+ "logger": alogger,
"phenobundle": phenobundle
}
@@ -110,7 +110,7 @@ def check_for_averages_files(
if ((first in _cdata.keys()) and (second not in _cdata.keys())))
return {
**kwargs,
- "logger": logger,
+ "logger": alogger,
"phenobundle": phenobundle,
"errors": _errors,
"skip": len(_errors) > 0
@@ -144,15 +144,15 @@ def redis_logger(
) -> Iterator[logging.Logger]:
"""Build a Redis message-list logger."""
rconn = Redis.from_url(redisuri, decode_responses=True)
- logger = logging.getLogger(loggername)
- logger.propagate = False
+ _logger = logging.getLogger(loggername)
+ _logger.propagate = False
handler = RedisMessageListHandler(
rconn,
fullyqualifiedkey(fqkey, filename))#type: ignore[arg-type]
handler.setFormatter(logging.getLogger().handlers[0].formatter)
- logger.addHandler(handler)
+ _logger.addHandler(handler)
try:
- yield logger
+ yield _logger
finally:
rconn.close()
@@ -179,7 +179,7 @@ def qc_phenocovar_file(
redisuri,
f"{__MODULE__}.qc_phenocovar_file",
filepath.name,
- f"{fqkey}:logs") as logger,
+ f"{fqkey}:logs") as _logger,
Redis.from_url(redisuri, decode_responses=True) as rconn):
print("Running QC on file: ", filepath.name)
_csvfile = rqtl2.read_csv_file(filepath, separator, comment_char)
@@ -199,7 +199,7 @@ def qc_phenocovar_file(
def collect_errors(errors_and_linecount, line):
_errs, _lc = errors_and_linecount
- logger.info("Testing record '%s'", line[0])
+ _logger.info("Testing record '%s'", line[0])
if len(line) != len(_headings):
_errs = _errs + (save_error(InvalidValue(
filepath.name,
@@ -240,7 +240,7 @@ def merge_dicts(*dicts):
return reduce(lambda merged, dct: {**merged, **dct}, dicts, {})
-def decimal_points_error(# pylint: disable=[too-many-arguments]
+def decimal_points_error(# pylint: disable=[too-many-arguments,too-many-positional-arguments]
filename: str,
rowtitle: str,
coltitle: str,
@@ -271,7 +271,7 @@ def integer_error(
return InvalidValue(filename, rowtitle, coltitle, cellvalue, message)
-def qc_pheno_file(# pylint: disable=[too-many-locals, too-many-arguments]
+def qc_pheno_file(# pylint: disable=[too-many-locals, too-many-arguments, too-many-positional-arguments]
filepath: Path,
redisuri: str,
fqkey: str,
@@ -287,7 +287,7 @@ def qc_pheno_file(# pylint: disable=[too-many-locals, too-many-arguments]
redisuri,
f"{__MODULE__}.qc_pheno_file",
filepath.name,
- f"{fqkey}:logs") as logger,
+ f"{fqkey}:logs") as _logger,
Redis.from_url(redisuri, decode_responses=True) as rconn):
print("Running QC on file: ", filepath.name)
save_error = partial(
@@ -314,7 +314,7 @@ def qc_pheno_file(# pylint: disable=[too-many-locals, too-many-arguments]
def collect_errors(errors_and_linecount, line):
_errs, _lc = errors_and_linecount
- logger.debug("Checking row %s", line[0])
+ _logger.debug("Checking row %s", line[0])
if line[0] not in samples:
_errs = _errs + (save_error(InvalidValue(
filepath.name,