From 6c8ab5537c7fb7eb94c62415269e193516283f6d Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 17 May 2021 20:10:30 +0000 Subject: Register rqtl blueprint --- gn3/app.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'gn3') diff --git a/gn3/app.py b/gn3/app.py index dc89f55..046b5de 100644 --- a/gn3/app.py +++ b/gn3/app.py @@ -5,6 +5,7 @@ from typing import Dict from typing import Union from flask import Flask from gn3.api.gemma import gemma +from gn3.api.rqtl import rqtl from gn3.api.general import general from gn3.api.correlation import correlation from gn3.api.data_entry import data_entry @@ -28,6 +29,7 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask: app.config.from_pyfile(config) app.register_blueprint(general, url_prefix="/api/") app.register_blueprint(gemma, url_prefix="/api/gemma") + app.register_blueprint(rqtl, url_prefix="/api/rqtl") app.register_blueprint(correlation, url_prefix="/api/correlation") app.register_blueprint(data_entry, url_prefix="/api/dataentry") return app -- cgit v1.2.3 From 9b628abcee86ef29e869b92a66b6034c1b63359d Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 17 May 2021 20:11:15 +0000 Subject: Add rqtl.py for rqtl endpoints --- gn3/api/rqtl.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 gn3/api/rqtl.py (limited to 'gn3') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py new file mode 100644 index 0000000..8dd4bb7 --- /dev/null +++ b/gn3/api/rqtl.py @@ -0,0 +1,19 @@ +import os + +from flask import Blueprint +from flask import current_app +from flask import request + +rqtl = Blueprint("rqtl", __name__) + +@rqtl.route("/compute", methods=["POST"]) +def compute(): + working_dir = os.path.join(current_app.config.get("TMPDIR")) + + genofile = request.form['geno_file'] + phenofile = request.form['pheno_file'] + + if not do_paths_exist([genofile, phenofile]): + raise FileNotFoundError + + return current_app.config.get("RQTL_WRAPPER_CMD") \ No newline at end of file -- cgit v1.2.3 From ea9d9d6454783a6af07384a66f204e199035e5bd Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 17 May 2021 21:52:43 +0000 Subject: Added RQTL_WRAPPER_CMD (which is fetched from environment) in settings.py --- gn3/settings.py | 1 + 1 file changed, 1 insertion(+) (limited to 'gn3') diff --git a/gn3/settings.py b/gn3/settings.py index 2057ce1..ecfd502 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -6,6 +6,7 @@ import os BCRYPT_SALT = "$2b$12$mxLvu9XRLlIaaSeDxt8Sle" # Change this! DATA_DIR = "" GEMMA_WRAPPER_CMD = os.environ.get("GEMMA_WRAPPER", "gemma-wrapper") +RQTL_WRAPPER_CMD = os.environ.get("RQTL_WRAPPER") CACHEDIR = "" REDIS_URI = "redis://localhost:6379/0" REDIS_JOB_QUEUE = "GN3::job-queue" -- cgit v1.2.3 From 7ed84670c0d13de38b578a4e4177b2529ff3fb40 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 17 May 2021 21:54:42 +0000 Subject: Read in kwargs fromrequest and pass the command, tmpdir, and kwargs to generate_rqtl_cmd which returns the actual command and output path --- gn3/api/rqtl.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'gn3') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 8dd4bb7..82cf34f 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -2,8 +2,12 @@ import os from flask import Blueprint from flask import current_app +from flask import jsonify from flask import request +from gn3.computations.rqtl import generate_rqtl_cmd +from gn3.computations.gemma import do_paths_exist + rqtl = Blueprint("rqtl", __name__) @rqtl.route("/compute", methods=["POST"]) @@ -16,4 +20,18 @@ def compute(): if not do_paths_exist([genofile, phenofile]): raise FileNotFoundError - return current_app.config.get("RQTL_WRAPPER_CMD") \ No newline at end of file + kwarg_list = ["addcovar", "model", "method", "interval", "nperm", "scale", "control_marker"] + + rqtl_kwargs = {"geno": genofile, "pheno": phenofile} + for kwarg in kwarg_list: + if kwarg in request.form: + rqtl_kwargs[kwarg] = request.form[kwarg] + + results = generate_rqtl_cmd( + rqtl_wrapper_cmd = current_app.config.get("RQTL_WRAPPER_CMD"), + output_dir = current_app.config.get('TMPDIR'), + rqtl_wrapper_kwargs = rqtl_kwargs + ) + + return jsonify(results) + -- cgit v1.2.3 From 624ece086d026da9150cd35b2404874ccf607b07 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 17 May 2021 21:56:51 +0000 Subject: Created compose_rqtl_command and generate_rqtl_command to create the actual command to be run from the command line; used the same pattern as for GEMMA for consistency --- gn3/commands.py | 7 +++++++ gn3/computations/rqtl.py | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 gn3/computations/rqtl.py (limited to 'gn3') diff --git a/gn3/commands.py b/gn3/commands.py index 4b0d62d..db32d1f 100644 --- a/gn3/commands.py +++ b/gn3/commands.py @@ -30,6 +30,13 @@ def compose_gemma_cmd(gemma_wrapper_cmd: str = "gemma-wrapper", cmd += " ".join([f"{arg}" for arg in gemma_args]) return cmd +def compose_rqtl_cmd(rqtl_wrapper_cmd: str, + rqtl_wrapper_kwargs: Dict) -> str: + """Compose a valid R/qtl command given the correct input""" + cmd = rqtl_wrapper_cmd + " " + " ".join( + [f"--{key} {val}" for key, val in rqtl_wrapper_kwargs.items()]) + + return cmd def queue_cmd(conn: Redis, job_queue: str, diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py new file mode 100644 index 0000000..087a99f --- /dev/null +++ b/gn3/computations/rqtl.py @@ -0,0 +1,25 @@ +"""Procedures related rqtl computations""" +import os + +from typing import Dict +from gn3.commands import compose_rqtl_cmd +from gn3.fs_helpers import get_hash_of_files + +def generate_rqtl_cmd(rqtl_wrapper_cmd: str, + output_dir: str, + rqtl_wrapper_kwargs: Dict) -> Dict: + + _hash = get_hash_of_files( + [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p", "addcovar", + "model", "method", + "interval", "nperm", + "scale", "control"]]) + + _output_filename = f"{_hash}-output.json" + return { + "output_file": + _output_filename, + "rqtl_cmd": + compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd, + rqtl_wrapper_kwargs=rqtl_wrapper_kwargs) + } \ No newline at end of file -- cgit v1.2.3 From 0a8754a582f057bd335441eab15da3f629df9ad7 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 17 May 2021 23:56:10 +0000 Subject: Fixed variety of issues detected by pylint --- gn3/api/rqtl.py | 13 ++++++------- gn3/computations/rqtl.py | 50 ++++++++++++++++++++++++------------------------ 2 files changed, 31 insertions(+), 32 deletions(-) (limited to 'gn3') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 82cf34f..7756310 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -1,5 +1,4 @@ -import os - +"""Endpoints for running the rqtl cmd""" from flask import Blueprint from flask import current_app from flask import jsonify @@ -12,8 +11,10 @@ rqtl = Blueprint("rqtl", __name__) @rqtl.route("/compute", methods=["POST"]) def compute(): - working_dir = os.path.join(current_app.config.get("TMPDIR")) + """Given at least a geno_file and pheno_file, generate and +run the rqtl_wrapper script and return the results as JSON + """ genofile = request.form['geno_file'] phenofile = request.form['pheno_file'] @@ -28,10 +29,8 @@ def compute(): rqtl_kwargs[kwarg] = request.form[kwarg] results = generate_rqtl_cmd( - rqtl_wrapper_cmd = current_app.config.get("RQTL_WRAPPER_CMD"), - output_dir = current_app.config.get('TMPDIR'), - rqtl_wrapper_kwargs = rqtl_kwargs + rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"), + rqtl_wrapper_kwargs=rqtl_kwargs ) return jsonify(results) - diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 087a99f..0e8cd1f 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,25 +1,25 @@ -"""Procedures related rqtl computations""" -import os - -from typing import Dict -from gn3.commands import compose_rqtl_cmd -from gn3.fs_helpers import get_hash_of_files - -def generate_rqtl_cmd(rqtl_wrapper_cmd: str, - output_dir: str, - rqtl_wrapper_kwargs: Dict) -> Dict: - - _hash = get_hash_of_files( - [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p", "addcovar", - "model", "method", - "interval", "nperm", - "scale", "control"]]) - - _output_filename = f"{_hash}-output.json" - return { - "output_file": - _output_filename, - "rqtl_cmd": - compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd, - rqtl_wrapper_kwargs=rqtl_wrapper_kwargs) - } \ No newline at end of file +"""Procedures related rqtl computations""" + +from typing import Dict +from gn3.commands import compose_rqtl_cmd +from gn3.fs_helpers import get_hash_of_files + +def generate_rqtl_cmd(rqtl_wrapper_cmd: str, + rqtl_wrapper_kwargs: Dict) -> Dict: + """Given the base rqtl_wrapper command and +dict of keyword arguments, return the full rqtl_wrapper command and an +output filename generated from a hash of the genotype and phenotype files + + """ + + _hash = get_hash_of_files( + [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p"]]) + + _output_filename = f"{_hash}-output.json" + return { + "output_file": + _output_filename, + "rqtl_cmd": + compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd, + rqtl_wrapper_kwargs=rqtl_wrapper_kwargs) + } -- cgit v1.2.3 From 74bc179807e80c1ee0f89cd98953263f68a05661 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 18 May 2021 19:40:46 +0000 Subject: Fixed generate_rqtl_cmd to make the kwarg hash from a combination of keywords and arguments + account for boolean kwargs without values (like --interval or --covar) --- gn3/computations/rqtl.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'gn3') diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 0e8cd1f..855a819 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -2,24 +2,36 @@ from typing import Dict from gn3.commands import compose_rqtl_cmd +from gn3.computations.gemma import generate_hash_of_string from gn3.fs_helpers import get_hash_of_files def generate_rqtl_cmd(rqtl_wrapper_cmd: str, - rqtl_wrapper_kwargs: Dict) -> Dict: + rqtl_wrapper_kwargs: Dict, + rqtl_wrapper_bool_kwargs: list) -> Dict: """Given the base rqtl_wrapper command and dict of keyword arguments, return the full rqtl_wrapper command and an output filename generated from a hash of the genotype and phenotype files """ + # Generate a hash from contents of the genotype and phenotype files _hash = get_hash_of_files( [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p"]]) + # Append to hash a hash of keyword arguments + _hash += generate_hash_of_string( + ",".join([f"{k}:{v}" for k, v in rqtl_wrapper_kwargs.items() if k not in ["g", "p"]])) + + # Append to hash a hash of boolean keyword arguments + _hash += generate_hash_of_string( + ",".join(rqtl_wrapper_bool_kwargs)) + _output_filename = f"{_hash}-output.json" return { "output_file": _output_filename, "rqtl_cmd": compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd, - rqtl_wrapper_kwargs=rqtl_wrapper_kwargs) + rqtl_wrapper_kwargs=rqtl_wrapper_kwargs, + rqtl_wrapper_bool_kwargs=rqtl_wrapper_bool_kwargs) } -- cgit v1.2.3 From e61aa16f0bc3dd282060585e655e497fa3d06b49 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 18 May 2021 19:41:01 +0000 Subject: Account for boolean kwargs in compose_rqtl_cmd --- gn3/commands.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'gn3') diff --git a/gn3/commands.py b/gn3/commands.py index db32d1f..add715c 100644 --- a/gn3/commands.py +++ b/gn3/commands.py @@ -31,11 +31,18 @@ def compose_gemma_cmd(gemma_wrapper_cmd: str = "gemma-wrapper", return cmd def compose_rqtl_cmd(rqtl_wrapper_cmd: str, - rqtl_wrapper_kwargs: Dict) -> str: + rqtl_wrapper_kwargs: Dict, + rqtl_wrapper_bool_kwargs: list) -> str: """Compose a valid R/qtl command given the correct input""" + # Add kwargs with values cmd = rqtl_wrapper_cmd + " " + " ".join( [f"--{key} {val}" for key, val in rqtl_wrapper_kwargs.items()]) + # Add boolean kwargs (kwargs without values) + if len(rqtl_wrapper_bool_kwargs): + cmd += " " + cmd += " ".join([f"--{val}" for val in rqtl_wrapper_bool_kwargs]) + return cmd def queue_cmd(conn: Redis, -- cgit v1.2.3 From 33d3a518e378c56b080a2a15b264ee5d031537e8 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 18 May 2021 19:42:27 +0000 Subject: Account for boolean kwargs in compute() by storing them in a list, since they don't have corresponding values --- gn3/api/rqtl.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'gn3') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 7756310..de620f7 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -21,16 +21,24 @@ run the rqtl_wrapper script and return the results as JSON if not do_paths_exist([genofile, phenofile]): raise FileNotFoundError - kwarg_list = ["addcovar", "model", "method", "interval", "nperm", "scale", "control_marker"] + # Split kwargs by those with values and boolean ones that just convert to True/False + kwargs = ["model", "method", "nperm", "scale", "control_marker"] + boolean_kwargs = ["addcovar", "interval"] + all_kwargs = kwargs + boolean_kwargs rqtl_kwargs = {"geno": genofile, "pheno": phenofile} - for kwarg in kwarg_list: + rqtl_bool_kwargs = [] + for kwarg in all_kwargs: if kwarg in request.form: - rqtl_kwargs[kwarg] = request.form[kwarg] + if kwarg in kwargs: + rqtl_kwargs[kwarg] = request.form[kwarg] + if kwarg in boolean_kwargs: + rqtl_bool_kwargs.append(kwarg) results = generate_rqtl_cmd( rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"), - rqtl_wrapper_kwargs=rqtl_kwargs + rqtl_wrapper_kwargs=rqtl_kwargs, + rqtl_wrapper_bool_kwargs=boolean_kwargs ) return jsonify(results) -- cgit v1.2.3 From 36365588d95d96458da02090ebef21a02366784a Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 18 May 2021 19:49:44 +0000 Subject: Removed len from this if statement, since an empty list evaluates to False by itself --- gn3/commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gn3') diff --git a/gn3/commands.py b/gn3/commands.py index add715c..255ea1d 100644 --- a/gn3/commands.py +++ b/gn3/commands.py @@ -39,7 +39,7 @@ def compose_rqtl_cmd(rqtl_wrapper_cmd: str, [f"--{key} {val}" for key, val in rqtl_wrapper_kwargs.items()]) # Add boolean kwargs (kwargs without values) - if len(rqtl_wrapper_bool_kwargs): + if rqtl_wrapper_bool_kwargs: cmd += " " cmd += " ".join([f"--{val}" for val in rqtl_wrapper_bool_kwargs]) -- cgit v1.2.3 From e29a349b46d932411879a810fb0be3a0042bf540 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 19 May 2021 20:03:01 +0000 Subject: Temporarily replace forward-slashes with underscores, since they can be included in the hashes used for filenames --- gn3/computations/rqtl.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'gn3') diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 855a819..605e0e1 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -26,6 +26,9 @@ output filename generated from a hash of the genotype and phenotype files _hash += generate_hash_of_string( ",".join(rqtl_wrapper_bool_kwargs)) + # Temporarily substitute forward-slashes in hash with underscores + _hash = _hash.replace("/", "_") + _output_filename = f"{_hash}-output.json" return { "output_file": -- cgit v1.2.3 From d3a4146fd38fc1d372091cecadfcf7c8fb377f3b Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 25 May 2021 20:27:05 +0000 Subject: Include code that processes rqtl output files and returns actual results instead of just the output filename --- gn3/api/rqtl.py | 16 +++++++++--- gn3/computations/rqtl.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 75 insertions(+), 5 deletions(-) (limited to 'gn3') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index de620f7..0194b6f 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -1,10 +1,13 @@ """Endpoints for running the rqtl cmd""" +import os + from flask import Blueprint from flask import current_app from flask import jsonify from flask import request -from gn3.computations.rqtl import generate_rqtl_cmd +from gn3.commands import run_cmd +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output from gn3.computations.gemma import do_paths_exist rqtl = Blueprint("rqtl", __name__) @@ -35,10 +38,17 @@ run the rqtl_wrapper script and return the results as JSON if kwarg in boolean_kwargs: rqtl_bool_kwargs.append(kwarg) - results = generate_rqtl_cmd( + rqtl_cmd = generate_rqtl_cmd( rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"), rqtl_wrapper_kwargs=rqtl_kwargs, rqtl_wrapper_bool_kwargs=boolean_kwargs ) - return jsonify(results) + os.system(rqtl_cmd.get('rqtl_cmd')) + + rqtl_output = {} + rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + if int(rqtl_kwargs['nperm']) > 0: + rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = process_perm_output(rqtl_cmd.get('output_file')) + + return jsonify(rqtl_output) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 605e0e1..22d9faf 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,6 +1,11 @@ """Procedures related rqtl computations""" - +import os +import numpy as np from typing import Dict +from typing import List + +from flask import current_app + from gn3.commands import compose_rqtl_cmd from gn3.computations.gemma import generate_hash_of_string from gn3.fs_helpers import get_hash_of_files @@ -29,7 +34,9 @@ output filename generated from a hash of the genotype and phenotype files # Temporarily substitute forward-slashes in hash with underscores _hash = _hash.replace("/", "_") - _output_filename = f"{_hash}-output.json" + _output_filename = f"{_hash}-output.csv" + rqtl_wrapper_kwargs["filename"] = _output_filename + return { "output_file": _output_filename, @@ -38,3 +45,56 @@ output filename generated from a hash of the genotype and phenotype files rqtl_wrapper_kwargs=rqtl_wrapper_kwargs, rqtl_wrapper_bool_kwargs=rqtl_wrapper_bool_kwargs) } + + +def process_rqtl_output(file_name: str) -> List: + """Given an output file name, read in R/qtl results and return + a List of marker objects + + """ + marker_obs = [] + # Later I should probably redo this using csv.read to avoid the + # awkwardness with removing quotes with [1:-1] + with open(os.path.join(current_app.config.get("TMPDIR"), "output", file_name), "r") as the_file: + for line in the_file: + line_items = line.split(",") + if line_items[1][1:-1] == "chr" or not line_items: + # Skip header line + continue + else: + # Convert chr to int if possible + try: + the_chr = int(line_items[1][1:-1]) + except: + the_chr = line_items[1][1:-1] + this_marker = { + "name": line_items[0][1:-1], + "chr": the_chr, + "cM": float(line_items[2]), + "Mb": float(line_items[2]), + "lod_score": float(line_items[3]) + } + marker_obs.append(this_marker) + + return marker_obs + + +def process_perm_output(file_name: str): + """Given base filename, read in R/qtl permutation output and calculate + suggestive and significant thresholds + + """ + perm_results = [] + with open(os.path.join(current_app.config.get("TMPDIR"), "output", "PERM_" + file_name), "r") as the_file: + for i, line in enumerate(the_file): + if i == 0: + # Skip header line + continue + else: + line_items = line.split(",") + perm_results.append(float(line_items[1])) + + suggestive = np.percentile(np.array(perm_results), 67) + significant = np.percentile(np.array(perm_results), 95) + + return perm_results, suggestive, significant -- cgit v1.2.3 From bf0a8cb22c0cc0c1dfe25740f88a4cb159dd0064 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 25 May 2021 20:27:33 +0000 Subject: Fix R/qtl command and the way keyword arguments are passed --- gn3/commands.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gn3') diff --git a/gn3/commands.py b/gn3/commands.py index 255ea1d..14bd295 100644 --- a/gn3/commands.py +++ b/gn3/commands.py @@ -35,10 +35,10 @@ def compose_rqtl_cmd(rqtl_wrapper_cmd: str, rqtl_wrapper_bool_kwargs: list) -> str: """Compose a valid R/qtl command given the correct input""" # Add kwargs with values - cmd = rqtl_wrapper_cmd + " " + " ".join( + cmd = f"Rscript { rqtl_wrapper_cmd } " + " ".join( [f"--{key} {val}" for key, val in rqtl_wrapper_kwargs.items()]) - # Add boolean kwargs (kwargs without values) + # Add boolean kwargs (kwargs that are either on or off, like --interval) if rqtl_wrapper_bool_kwargs: cmd += " " cmd += " ".join([f"--{val}" for val in rqtl_wrapper_bool_kwargs]) -- cgit v1.2.3 From 84790de8bcc51c00a92b71878088345cd58ccc51 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 28 May 2021 17:44:07 +0000 Subject: Fixed issue where all bool kwargs were always being passed to generate_rqtl_cmd and also made code check if output file already exists (so caching works) --- gn3/api/rqtl.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'gn3') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 0194b6f..38f4c1e 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -26,7 +26,7 @@ run the rqtl_wrapper script and return the results as JSON # Split kwargs by those with values and boolean ones that just convert to True/False kwargs = ["model", "method", "nperm", "scale", "control_marker"] - boolean_kwargs = ["addcovar", "interval"] + boolean_kwargs = ["addcovar", "interval", "pstrata"] all_kwargs = kwargs + boolean_kwargs rqtl_kwargs = {"geno": genofile, "pheno": phenofile} @@ -41,12 +41,15 @@ run the rqtl_wrapper script and return the results as JSON rqtl_cmd = generate_rqtl_cmd( rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"), rqtl_wrapper_kwargs=rqtl_kwargs, - rqtl_wrapper_bool_kwargs=boolean_kwargs + rqtl_wrapper_bool_kwargs=rqtl_bool_kwargs ) - os.system(rqtl_cmd.get('rqtl_cmd')) - rqtl_output = {} + if not os.path.isfile(os.path.join(current_app.config.get("TMPDIR"), "output", rqtl_cmd.get('output_file'))): + os.system(rqtl_cmd.get('rqtl_cmd')) + + rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) if int(rqtl_kwargs['nperm']) > 0: rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = process_perm_output(rqtl_cmd.get('output_file')) -- cgit v1.2.3 From c5488d5c4556e84397a36c3dbc27dab377749bb3 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 17 May 2021 14:33:10 +0300 Subject: db: phenotypes: Add a way of updating the Phenotype table * gn3/db/phenotypes.py (Phenotype): New dataclass. (update_phenotype): New function. [phenotype_column_mapping]: New variable. --- gn3/db/phenotypes.py | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 gn3/db/phenotypes.py (limited to 'gn3') diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py new file mode 100644 index 0000000..46a54bd --- /dev/null +++ b/gn3/db/phenotypes.py @@ -0,0 +1,61 @@ +"""This contains all the necessary functions that access the phenotypes from +the db""" +from dataclasses import dataclass, asdict, astuple + +from typing import Any, Optional +from MySQLdb import escape_string + + +# pylint: disable=[R0902] +@dataclass(frozen=True) +class Phenotype: + """Data Type that represents a Phenotype""" + id_: Optional[int] = None + pre_pub_description: Optional[str] = None + post_pub_description: Optional[str] = None + original_description: Optional[str] = None + units: Optional[str] = None + pre_pub_abbrevition: Optional[str] = None + post_pub_abbreviation: Optional[str] = None + lab_code: Optional[str] = None + submitter: Optional[str] = None + owner: Optional[str] = None + authorized_users: Optional[str] = None + + +# Mapping from the Phenotype dataclass to the actual column names in the +# database +phenotype_column_mapping = { + "id_": "id", + "pre_pub_description": "Pre_publication_description", + "post_pub_description": "Post_publication_description", + "original_description": "Original_description", + "units": "Units", + "pre_pub_abbrevition": "Pre_publication_abbreviation", + "post_pub_abbreviation": "Post_publication_abbreviation", + "lab_code": "Lab_code", + "submitter": "Submitter", + "owner": "Owner", + "authorized_users": "Authorized_Users", +} + + +def update_phenotype(conn: Any, + data: Phenotype, + where: Phenotype) -> Optional[int]: + """Update phenotype metadata with DATA that depends on the WHERE clause""" + if not any(astuple(data) + astuple(where)): + return None + sql = "UPDATE Phenotype SET " + sql += ", ".join(f"{phenotype_column_mapping.get(k)} " + f"= '{escape_string(str(v)).decode('utf-8')}'" for + k, v in asdict(data).items() + if v is not None and k in phenotype_column_mapping) + sql += " WHERE " + sql += "AND ".join(f"{phenotype_column_mapping.get(k)} = " + f"'{escape_string(str(v)).decode('utf-8')}'" for + k, v in asdict(where).items() + if v is not None and k in phenotype_column_mapping) + with conn.cursor() as cursor: + cursor.execute(sql) + return cursor.rowcount -- cgit v1.2.3 From 18826b348149f1233bcf3e2e4cda5da5297ecb6b Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 21:32:24 +0300 Subject: db: phenotype: Make "pylint: disable=[R0902]" global for file --- gn3/db/phenotypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gn3') diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 46a54bd..355c2e2 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -1,3 +1,4 @@ +# pylint: disable=[R0902] """This contains all the necessary functions that access the phenotypes from the db""" from dataclasses import dataclass, asdict, astuple @@ -6,7 +7,6 @@ from typing import Any, Optional from MySQLdb import escape_string -# pylint: disable=[R0902] @dataclass(frozen=True) class Phenotype: """Data Type that represents a Phenotype""" -- cgit v1.2.3 From 58ebaba0a9249e6fa39d8059b32c17623113a205 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 21:33:00 +0300 Subject: db: phenotypes: Add dataclass to represent PublishXRef --- gn3/db/phenotypes.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'gn3') diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 355c2e2..11d67db 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -23,6 +23,22 @@ class Phenotype: authorized_users: Optional[str] = None +@dataclass(frozen=True) +class PublishXRef: + """Data Type that represents the table PublishXRef""" + id_: Optional[int] = None + inbred_set_id: Optional[str] = None + phenotype_id: Optional[int] = None + publication_id: Optional[str] = None + data_id: Optional[int] = None + mean: Optional[float] = None + locus: Optional[str] = None + lrs: Optional[float] = None + additive: Optional[float] = None + sequence: Optional[int] = None + comments: Optional[str] = None + + # Mapping from the Phenotype dataclass to the actual column names in the # database phenotype_column_mapping = { -- cgit v1.2.3 From dfafb7c11cb4f57c7e1a61de2ce2f1b62e1018be Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 21:44:34 +0300 Subject: db: phenotypes: Add phenotype table mapping --- gn3/db/phenotypes.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'gn3') diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 11d67db..645c0af 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -23,6 +23,23 @@ class Phenotype: authorized_users: Optional[str] = None +# Mapping from the Phenotype dataclass to the actual column names in the +# database +phenotype_column_mapping = { + "id_": "id", + "pre_pub_description": "Pre_publication_description", + "post_pub_description": "Post_publication_description", + "original_description": "Original_description", + "units": "Units", + "pre_pub_abbrevition": "Pre_publication_abbreviation", + "post_pub_abbreviation": "Post_publication_abbreviation", + "lab_code": "Lab_code", + "submitter": "Submitter", + "owner": "Owner", + "authorized_users": "Authorized_Users", +} + + @dataclass(frozen=True) class PublishXRef: """Data Type that represents the table PublishXRef""" -- cgit v1.2.3 From a1f48d95f17e939512fa9c276caf39d9f75878f9 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 21:44:58 +0300 Subject: db: phenotypes: Put mapping def after dataclass --- gn3/db/phenotypes.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'gn3') diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 645c0af..be5fb7b 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -56,20 +56,20 @@ class PublishXRef: comments: Optional[str] = None -# Mapping from the Phenotype dataclass to the actual column names in the +# Mapping from the PublishXRef dataclass to the actual column names in the # database -phenotype_column_mapping = { - "id_": "id", - "pre_pub_description": "Pre_publication_description", - "post_pub_description": "Post_publication_description", - "original_description": "Original_description", - "units": "Units", - "pre_pub_abbrevition": "Pre_publication_abbreviation", - "post_pub_abbreviation": "Post_publication_abbreviation", - "lab_code": "Lab_code", - "submitter": "Submitter", - "owner": "Owner", - "authorized_users": "Authorized_Users", +publish_x_ref_mapping = { + "id_": "Id", + "inbred_set_id": "InbredSetId", + "phenotype_id": "PhenotypeId", + "publication_id": "PublicationId", + "data_id": "DataId", + "mean": "mean", + "locus": "locus", + "lrs": "lrs", + "additive": "additive", + "sequence": "sequence", + "comments": "comments", } -- cgit v1.2.3 From 5c48d14d95b46caa10bcdbd80aec1ae04ec7f225 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 22:00:02 +0300 Subject: db: phenotypes: Add type for Dataclass See: https://www.py4u.net/discuss/188952 --- gn3/db/phenotypes.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'gn3') diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index be5fb7b..e97322a 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -3,9 +3,16 @@ the db""" from dataclasses import dataclass, asdict, astuple -from typing import Any, Optional +from typing import Any, Dict, Optional from MySQLdb import escape_string +from typing_extensions import Protocol + + +class Dataclass(Protocol): + """Type Definition for a Dataclass""" + __dataclass_fields__: Dict + @dataclass(frozen=True) class Phenotype: -- cgit v1.2.3 From d7571e3c30aa7b1f312fcf975e7336b9f2912709 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 22:00:30 +0300 Subject: db: phenotypes: Map a table to it's relevant dict mapping --- gn3/db/phenotypes.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'gn3') diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index e97322a..f6ca944 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -79,6 +79,11 @@ publish_x_ref_mapping = { "comments": "comments", } +TABLEMAP = { + "Phenotype": phenotype_column_mapping, + "PublishXRef": publish_x_ref_mapping, +} + def update_phenotype(conn: Any, data: Phenotype, -- cgit v1.2.3 From b30a029cc3c0771a5f5ecaeed663cca24c88f534 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 22:09:46 +0300 Subject: db: phenotypes: Generalise the update function * gn3/db/phenotypes.py (update_phenotype): Delete it. (update): New, more general function. --- gn3/db/phenotypes.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'gn3') diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index f6ca944..fdb148b 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -1,4 +1,4 @@ -# pylint: disable=[R0902] +# pylint: disable=[R0902, R0903] """This contains all the necessary functions that access the phenotypes from the db""" from dataclasses import dataclass, asdict, astuple @@ -85,22 +85,23 @@ TABLEMAP = { } -def update_phenotype(conn: Any, - data: Phenotype, - where: Phenotype) -> Optional[int]: - """Update phenotype metadata with DATA that depends on the WHERE clause""" +def update(conn: Any, + table: str, + data: Dataclass, + where: Dataclass) -> Optional[int]: + """Run an UPDATE on a table""" if not any(astuple(data) + astuple(where)): return None - sql = "UPDATE Phenotype SET " - sql += ", ".join(f"{phenotype_column_mapping.get(k)} " + sql = f"UPDATE {table} SET " + sql += ", ".join(f"{TABLEMAP[table].get(k)} " f"= '{escape_string(str(v)).decode('utf-8')}'" for k, v in asdict(data).items() - if v is not None and k in phenotype_column_mapping) + if v is not None and k in TABLEMAP[table]) sql += " WHERE " - sql += "AND ".join(f"{phenotype_column_mapping.get(k)} = " + sql += "AND ".join(f"{TABLEMAP[table].get(k)} = " f"'{escape_string(str(v)).decode('utf-8')}'" for k, v in asdict(where).items() - if v is not None and k in phenotype_column_mapping) + if v is not None and k in TABLEMAP[table]) with conn.cursor() as cursor: cursor.execute(sql) return cursor.rowcount -- cgit v1.2.3 From 1965fef7170a5de14988bc672424be2be6884ec2 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 22:23:56 +0300 Subject: db: phenotypes: Add Publication dataclass and mapping --- gn3/db/phenotypes.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'gn3') diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index fdb148b..92d8e84 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -79,6 +79,36 @@ publish_x_ref_mapping = { "comments": "comments", } + +@dataclass(frozen=True) +class Publication: + """Data Type that represents the table Publication""" + id_: Optional[int] = None + pubmed_id: Optional[int] = None + abstract: Optional[str] = None + authors: Optional[str] = None + title: Optional[str] = None + journal: Optional[str] = None + volume: Optional[str] = None + pages: Optional[str] = None + month: Optional[str] = None + year: Optional[str] = None + + +publication_mapping = { + "id_": "id", + "PubMed_ID": "pubmed_id", + "Abstract": "abstract", + "Authors": "authors", + "Title": "title", + "Journal": "journal", + "Volume": "volume", + "Pages": "pages", + "Month": "month", + "Year": "year", +} + + TABLEMAP = { "Phenotype": phenotype_column_mapping, "PublishXRef": publish_x_ref_mapping, -- cgit v1.2.3 From c0b617858f36de8e1c15172b36f2a17742c1658c Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 20 May 2021 12:46:37 +0300 Subject: db: phenotypes: Rename phenotype_column_mapping --- gn3/db/phenotypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gn3') diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 92d8e84..9e40692 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -32,7 +32,7 @@ class Phenotype: # Mapping from the Phenotype dataclass to the actual column names in the # database -phenotype_column_mapping = { +phenotype_mapping = { "id_": "id", "pre_pub_description": "Pre_publication_description", "post_pub_description": "Post_publication_description", @@ -110,7 +110,7 @@ publication_mapping = { TABLEMAP = { - "Phenotype": phenotype_column_mapping, + "Phenotype": phenotype_mapping, "PublishXRef": publish_x_ref_mapping, } -- cgit v1.2.3 From c237b50a13299dc5b4e8bfa3d719f2668f699b6c Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 20 May 2021 18:51:48 +0300 Subject: db: phenotypes: Add Publication table mapping --- gn3/db/phenotypes.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'gn3') diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 9e40692..514037d 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -112,6 +112,8 @@ publication_mapping = { TABLEMAP = { "Phenotype": phenotype_mapping, "PublishXRef": publish_x_ref_mapping, + "Publication": publication_mapping, +} } -- cgit v1.2.3 From 1b6ed578ac251daf19ca2299870a0e7e9a3eb6cc Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 20 May 2021 21:25:10 +0300 Subject: db: phenotypes: Add a dataclass map Maps a string to it's dataclass. --- gn3/db/phenotypes.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'gn3') diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 514037d..c3ad683 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -114,6 +114,11 @@ TABLEMAP = { "PublishXRef": publish_x_ref_mapping, "Publication": publication_mapping, } + +DATACLASSMAP = { + "Phenotype": Phenotype, + "PublishXRef": PublishXRef, + "Publication": Publication, } -- cgit v1.2.3 From 2face963fcba5b3231e9cc26a38f1370b00eb7b0 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 20 May 2021 21:25:48 +0300 Subject: db: phenotypes: Add function for fetching a single result * gn3/db/phenotypes.py (fetchone): New function. --- gn3/db/phenotypes.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'gn3') diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index c3ad683..96cb275 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -142,3 +142,20 @@ def update(conn: Any, with conn.cursor() as cursor: cursor.execute(sql) return cursor.rowcount + + +def fetchone(conn: Any, + table: str, + where: Dataclass) -> Optional[Dataclass]: + """Run a SELECT on a table. Returns only one result!""" + if not any(astuple(where)): + return None + sql = f"SELECT * FROM {table} " + sql += "WHERE " + sql += "AND ".join(f"{TABLEMAP[table].get(k)} = " + f"'{escape_string(str(v)).decode('utf-8')}'" for + k, v in asdict(where).items() + if v is not None and k in TABLEMAP[table]) + with conn.cursor() as cursor: + cursor.execute(sql) + return DATACLASSMAP[table](*cursor.fetchone()) -- cgit v1.2.3 From 12e8ca8eeefa2daea25f7dbc2fc9c99310766d1b Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 20 May 2021 22:41:54 +0300 Subject: db: phenotypes: Fix typo --- gn3/db/phenotypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gn3') diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 96cb275..ddcd11e 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -22,7 +22,7 @@ class Phenotype: post_pub_description: Optional[str] = None original_description: Optional[str] = None units: Optional[str] = None - pre_pub_abbrevition: Optional[str] = None + pre_pub_abbreviation: Optional[str] = None post_pub_abbreviation: Optional[str] = None lab_code: Optional[str] = None submitter: Optional[str] = None @@ -38,7 +38,7 @@ phenotype_mapping = { "post_pub_description": "Post_publication_description", "original_description": "Original_description", "units": "Units", - "pre_pub_abbrevition": "Pre_publication_abbreviation", + "pre_pub_abbreviation": "Pre_publication_abbreviation", "post_pub_abbreviation": "Post_publication_abbreviation", "lab_code": "Lab_code", "submitter": "Submitter", -- cgit v1.2.3 From 57a1194a12fe1a8565d8abd6b833da57f12898b4 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 25 May 2021 14:09:47 +0300 Subject: db: phenotypes: Fix publication_mapping --- gn3/db/phenotypes.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'gn3') diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index ddcd11e..ee523ad 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -97,15 +97,15 @@ class Publication: publication_mapping = { "id_": "id", - "PubMed_ID": "pubmed_id", - "Abstract": "abstract", - "Authors": "authors", - "Title": "title", - "Journal": "journal", - "Volume": "volume", - "Pages": "pages", - "Month": "month", - "Year": "year", + "pubmed_id": "PubMed_ID", + "abstract": "Abstract", + "authors": "Authors", + "title": "Title", + "journal": "Journal", + "volume": "Volume", + "pages": "Pages", + "month": "Month", + "year": "Year", } -- cgit v1.2.3 From 93ab68fe650eed0bb53d77225f47f72e527e48c4 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 26 May 2021 12:05:08 +0300 Subject: Move the methods, "update" and "fetch", to gn3.db --- gn3/db/__init__.py | 70 ++++++++++++++++++++++++++++++++++++++++ gn3/db/phenotypes.py | 64 ++---------------------------------- tests/unit/db/test_phenotypes.py | 4 +-- 3 files changed, 74 insertions(+), 64 deletions(-) (limited to 'gn3') diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index e69de29..fae4d29 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -0,0 +1,70 @@ +# pylint: disable=[R0902, R0903] +"""Module that exposes common db operations""" +from typing import Optional, Dict, Any +from dataclasses import dataclass, asdict, astuple +from typing_extensions import Protocol +from MySQLdb import escape_string + +from gn3.db.phenotypes import Phenotype +from gn3.db.phenotypes import PublishXRef +from gn3.db.phenotypes import Publication + +from gn3.db.phenotypes import phenotype_mapping +from gn3.db.phenotypes import publish_x_ref_mapping +from gn3.db.phenotypes import publication_mapping + +TABLEMAP = { + "Phenotype": phenotype_mapping, + "PublishXRef": publish_x_ref_mapping, + "Publication": publication_mapping, +} + +DATACLASSMAP = { + "Phenotype": Phenotype, + "PublishXRef": PublishXRef, + "Publication": Publication, +} + + +class Dataclass(Protocol): + """Type Definition for a Dataclass""" + __dataclass_fields__: Dict + + +def update(conn: Any, + table: str, + data: Dataclass, + where: Dataclass) -> Optional[int]: + """Run an UPDATE on a table""" + if not any(astuple(data) + astuple(where)): + return None + sql = f"UPDATE {table} SET " + sql += ", ".join(f"{TABLEMAP[table].get(k)} " + f"= '{escape_string(str(v)).decode('utf-8')}'" for + k, v in asdict(data).items() + if v is not None and k in TABLEMAP[table]) + sql += " WHERE " + sql += "AND ".join(f"{TABLEMAP[table].get(k)} = " + f"'{escape_string(str(v)).decode('utf-8')}'" for + k, v in asdict(where).items() + if v is not None and k in TABLEMAP[table]) + with conn.cursor() as cursor: + cursor.execute(sql) + return cursor.rowcount + + +def fetchone(conn: Any, + table: str, + where: Dataclass) -> Optional[Dataclass]: + """Run a SELECT on a table. Returns only one result!""" + if not any(astuple(where)): + return None + sql = f"SELECT * FROM {table} " + sql += "WHERE " + sql += "AND ".join(f"{TABLEMAP[table].get(k)} = " + f"'{escape_string(str(v)).decode('utf-8')}'" for + k, v in asdict(where).items() + if v is not None and k in TABLEMAP[table]) + with conn.cursor() as cursor: + cursor.execute(sql) + return DATACLASSMAP[table](*cursor.fetchone()) diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index ee523ad..2b93c85 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -1,17 +1,9 @@ # pylint: disable=[R0902, R0903] """This contains all the necessary functions that access the phenotypes from the db""" -from dataclasses import dataclass, asdict, astuple +from dataclasses import dataclass -from typing import Any, Dict, Optional -from MySQLdb import escape_string - -from typing_extensions import Protocol - - -class Dataclass(Protocol): - """Type Definition for a Dataclass""" - __dataclass_fields__: Dict +from typing import Optional @dataclass(frozen=True) @@ -107,55 +99,3 @@ publication_mapping = { "month": "Month", "year": "Year", } - - -TABLEMAP = { - "Phenotype": phenotype_mapping, - "PublishXRef": publish_x_ref_mapping, - "Publication": publication_mapping, -} - -DATACLASSMAP = { - "Phenotype": Phenotype, - "PublishXRef": PublishXRef, - "Publication": Publication, -} - - -def update(conn: Any, - table: str, - data: Dataclass, - where: Dataclass) -> Optional[int]: - """Run an UPDATE on a table""" - if not any(astuple(data) + astuple(where)): - return None - sql = f"UPDATE {table} SET " - sql += ", ".join(f"{TABLEMAP[table].get(k)} " - f"= '{escape_string(str(v)).decode('utf-8')}'" for - k, v in asdict(data).items() - if v is not None and k in TABLEMAP[table]) - sql += " WHERE " - sql += "AND ".join(f"{TABLEMAP[table].get(k)} = " - f"'{escape_string(str(v)).decode('utf-8')}'" for - k, v in asdict(where).items() - if v is not None and k in TABLEMAP[table]) - with conn.cursor() as cursor: - cursor.execute(sql) - return cursor.rowcount - - -def fetchone(conn: Any, - table: str, - where: Dataclass) -> Optional[Dataclass]: - """Run a SELECT on a table. Returns only one result!""" - if not any(astuple(where)): - return None - sql = f"SELECT * FROM {table} " - sql += "WHERE " - sql += "AND ".join(f"{TABLEMAP[table].get(k)} = " - f"'{escape_string(str(v)).decode('utf-8')}'" for - k, v in asdict(where).items() - if v is not None and k in TABLEMAP[table]) - with conn.cursor() as cursor: - cursor.execute(sql) - return DATACLASSMAP[table](*cursor.fetchone()) diff --git a/tests/unit/db/test_phenotypes.py b/tests/unit/db/test_phenotypes.py index 9fed524..505714a 100644 --- a/tests/unit/db/test_phenotypes.py +++ b/tests/unit/db/test_phenotypes.py @@ -2,9 +2,9 @@ from unittest import TestCase from unittest import mock -from gn3.db.phenotypes import fetchone +from gn3.db import fetchone +from gn3.db import update from gn3.db.phenotypes import Phenotype -from gn3.db.phenotypes import update class TestPhenotypes(TestCase): -- cgit v1.2.3 From 07464f44f48895cc31ba2b088d6125e7777e1073 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Sun, 30 May 2021 13:26:15 +0300 Subject: fix index error (#16) --- gn3/computations/correlations.py | 2 +- tests/unit/computations/test_correlation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'gn3') diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index 25dd26d..f0ce502 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -247,7 +247,7 @@ def fetch_lit_correlation_data( cursor.execute(query_formatter(query, *tuple(reversed(query_values)))) lit_corr_results = cursor.fetchone() - lit_results = (gene_id, lit_corr_results[1])\ + lit_results = (gene_id, lit_corr_results[0])\ if lit_corr_results else (gene_id, 0) return lit_results return (gene_id, 0) diff --git a/tests/unit/computations/test_correlation.py b/tests/unit/computations/test_correlation.py index d264738..5746adf 100644 --- a/tests/unit/computations/test_correlation.py +++ b/tests/unit/computations/test_correlation.py @@ -276,7 +276,7 @@ class TestCorrelation(TestCase): input trait mouse gene id and mouse gene id """ - expected_db_results = [("val", x*0.1) + expected_db_results = [[x*0.1] for x in range(1, 4)] conn = DataBase(expected_results=expected_db_results) expected_results = ("1", 0.1) -- cgit v1.2.3 From 515ac34950db419bd6440afd1393cf41310d1814 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 2 Jun 2021 07:38:53 +0300 Subject: gn3: db: Return None if data and where are empty --- gn3/db/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gn3') diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index fae4d29..1eb7b12 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -36,7 +36,7 @@ def update(conn: Any, data: Dataclass, where: Dataclass) -> Optional[int]: """Run an UPDATE on a table""" - if not any(astuple(data) + astuple(where)): + if not (any(astuple(data)) and any(astuple(where))): return None sql = f"UPDATE {table} SET " sql += ", ".join(f"{TABLEMAP[table].get(k)} " -- cgit v1.2.3 From ece41f5f971595c5d005c4beaa984c45471a6647 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 2 Jun 2021 07:54:35 +0300 Subject: Get the diff between 2 dicts and return that as a dict --- gn3/db/__init__.py | 16 ++++++++++++++++ tests/unit/db/test_phenotypes.py | 8 ++++++++ 2 files changed, 24 insertions(+) (limited to 'gn3') diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 1eb7b12..19135fc 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -68,3 +68,19 @@ def fetchone(conn: Any, with conn.cursor() as cursor: cursor.execute(sql) return DATACLASSMAP[table](*cursor.fetchone()) + + +def diff_from_dict(old: Dict, new: Dict) -> Dict: + """Construct a new dict with a specific structure that contains the difference +between the 2 dicts in the structure: + +diff_from_dict({"id": 1, "data": "a"}, {"id": 2, "data": "b"}) + +Should return: + +{"id": {"old": 1, "new": 2}, "data": {"old": "a", "new": "b"}} + """ + dict_ = {} + for key, value in old.items(): + dict_[key] = {"old": old[key], "new": new[key]} + return dict_ diff --git a/tests/unit/db/test_phenotypes.py b/tests/unit/db/test_phenotypes.py index 505714a..b53db23 100644 --- a/tests/unit/db/test_phenotypes.py +++ b/tests/unit/db/test_phenotypes.py @@ -4,6 +4,7 @@ from unittest import mock from gn3.db import fetchone from gn3.db import update +from gn3.db import diff_from_dict from gn3.db.phenotypes import Phenotype @@ -63,3 +64,10 @@ class TestPhenotypes(TestCase): "Test pre-publication") cursor.execute.assert_called_once_with( "SELECT * FROM Phenotype WHERE id = '35'") + + def test_diff_from_dict(self): + """Test that a correct diff is generated""" + self.assertEqual(diff_from_dict({"id": 1, "data": "a"}, + {"id": 2, "data": "b"}), + {"id": {"old": 1, "new": 2}, + "data": {"old": "a", "new": "b"}}) -- cgit v1.2.3 From 9d46f943894e15b4a70c64ecba6a3b684863a81f Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 2 Jun 2021 08:23:59 +0300 Subject: gn3: db: Add spacing before around "AND" in sql clause --- gn3/db/__init__.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'gn3') diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 19135fc..d89dbf4 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -44,10 +44,10 @@ def update(conn: Any, k, v in asdict(data).items() if v is not None and k in TABLEMAP[table]) sql += " WHERE " - sql += "AND ".join(f"{TABLEMAP[table].get(k)} = " - f"'{escape_string(str(v)).decode('utf-8')}'" for - k, v in asdict(where).items() - if v is not None and k in TABLEMAP[table]) + sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " + f"'{escape_string(str(v)).decode('utf-8')}'" for + k, v in asdict(where).items() + if v is not None and k in TABLEMAP[table]) with conn.cursor() as cursor: cursor.execute(sql) return cursor.rowcount @@ -61,10 +61,10 @@ def fetchone(conn: Any, return None sql = f"SELECT * FROM {table} " sql += "WHERE " - sql += "AND ".join(f"{TABLEMAP[table].get(k)} = " - f"'{escape_string(str(v)).decode('utf-8')}'" for - k, v in asdict(where).items() - if v is not None and k in TABLEMAP[table]) + sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " + f"'{escape_string(str(v)).decode('utf-8')}'" for + k, v in asdict(where).items() + if v is not None and k in TABLEMAP[table]) with conn.cursor() as cursor: cursor.execute(sql) return DATACLASSMAP[table](*cursor.fetchone()) -- cgit v1.2.3 From c96b29e63577f7189afd02df2ced26b150830341 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 3 Jun 2021 11:08:58 +0300 Subject: Add data structures for the table metadata_audit --- gn3/db/__init__.py | 5 +++++ gn3/db/metadata_audit.py | 26 ++++++++++++++++++++++++++ tests/unit/db/test_audit.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 gn3/db/metadata_audit.py create mode 100644 tests/unit/db/test_audit.py (limited to 'gn3') diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index d89dbf4..175a640 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -5,21 +5,26 @@ from dataclasses import dataclass, asdict, astuple from typing_extensions import Protocol from MySQLdb import escape_string +from gn3.db.metadata_audit import MetadataAudit from gn3.db.phenotypes import Phenotype from gn3.db.phenotypes import PublishXRef from gn3.db.phenotypes import Publication +from gn3.db.metadata_audit import metadata_audit_mapping from gn3.db.phenotypes import phenotype_mapping from gn3.db.phenotypes import publish_x_ref_mapping from gn3.db.phenotypes import publication_mapping + TABLEMAP = { + "metadata_audit": metadata_audit_mapping, "Phenotype": phenotype_mapping, "PublishXRef": publish_x_ref_mapping, "Publication": publication_mapping, } DATACLASSMAP = { + "MetadataAudit": MetadataAudit, "Phenotype": Phenotype, "PublishXRef": PublishXRef, "Publication": Publication, diff --git a/gn3/db/metadata_audit.py b/gn3/db/metadata_audit.py new file mode 100644 index 0000000..6e22b32 --- /dev/null +++ b/gn3/db/metadata_audit.py @@ -0,0 +1,26 @@ +# pylint: disable=[R0902, R0903] +"""This contains all the necessary functions that access the metadata_audit +table from the db + +""" +from dataclasses import dataclass +from typing import Optional + + +@dataclass(frozen=True) +class MetadataAudit: + """Data Type that represents a Phenotype""" + dataset_id: int + editor: str + json_data: str + time_stamp: Optional[str] = None + + +# Mapping from the MetadataAudit dataclass to the actual column names in the +# database +metadata_audit_mapping = { + "dataset_id": "dataset_id", + "editor": "editor", + "json_data": "json_data", + "time_stamp": "time_stamp", +} diff --git a/tests/unit/db/test_audit.py b/tests/unit/db/test_audit.py new file mode 100644 index 0000000..22787bb --- /dev/null +++ b/tests/unit/db/test_audit.py @@ -0,0 +1,28 @@ +"""Tests for db/phenotypes.py""" +import json +from unittest import TestCase +from unittest import mock + +from gn3.db import insert +from gn3.db.metadata_audit import MetadataAudit + + +class TestMetadatAudit(TestCase): + """Test cases for fetching chromosomes""" + + def test_insert_into_metadata_audit(self): + """Test that data is inserted correctly in the audit table + + """ + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + type(cursor).rowcount = 1 + self.assertEqual(insert( + conn=db_mock, table="metadata_audit", + data=MetadataAudit(dataset_id=35, + editor="Bonface", + json_data=json.dumps({"a": "b"}))), 1) + cursor.execute.assert_called_once_with( + "INSERT INTO metadata_audit ('dataset_id', " + "'editor', 'json_data') " + 'VALUES (\'35\', \'Bonface\', \'{\\"a\\": \\"b\\"}\')') -- cgit v1.2.3 From 511cde13c8f18c2e2be3a29eee3fa7366fce81a3 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 3 Jun 2021 21:11:01 +0300 Subject: gn3: db: Add new function for doing sql INSERT --- gn3/db/__init__.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'gn3') diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 175a640..8b6bf73 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -1,7 +1,7 @@ # pylint: disable=[R0902, R0903] """Module that exposes common db operations""" from typing import Optional, Dict, Any -from dataclasses import dataclass, asdict, astuple +from dataclasses import asdict, astuple from typing_extensions import Protocol from MySQLdb import escape_string @@ -75,6 +75,22 @@ def fetchone(conn: Any, return DATACLASSMAP[table](*cursor.fetchone()) +def insert(conn: Any, + table: str, + data: Dataclass) -> Optional[int]: + """Run an INSERT into a table""" + dict_ = {k: v for k, v in asdict(data).items() + if v is not None and k in TABLEMAP[table]} + sql = f"INSERT INTO {table} (" + sql += ", ".join(f"{k}" for k in dict_.keys()) + sql += ") VALUES (" + sql += ", ".join("%s" for _ in dict_.keys()) + sql += ")" + with conn.cursor() as cursor: + cursor.execute(sql, tuple(dict_.values())) + conn.commit() + return cursor.rowcount + def diff_from_dict(old: Dict, new: Dict) -> Dict: """Construct a new dict with a specific structure that contains the difference between the 2 dicts in the structure: -- cgit v1.2.3 From 4c9bbe6d4229b79a1bc62cf2f641fbc4c4f00abc Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 3 Jun 2021 21:38:58 +0300 Subject: Use prepared statements for UPDATE sql function --- gn3/db/__init__.py | 16 +++++++++------- tests/unit/db/test_phenotypes.py | 10 ++++------ 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'gn3') diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 8b6bf73..ce92a7d 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -43,18 +43,20 @@ def update(conn: Any, """Run an UPDATE on a table""" if not (any(astuple(data)) and any(astuple(where))): return None + data_ = {k: v for k, v in asdict(data).items() + if v is not None and k in TABLEMAP[table]} + where_ = {k: v for k, v in asdict(where).items() + if v is not None and k in TABLEMAP[table]} sql = f"UPDATE {table} SET " sql += ", ".join(f"{TABLEMAP[table].get(k)} " - f"= '{escape_string(str(v)).decode('utf-8')}'" for - k, v in asdict(data).items() - if v is not None and k in TABLEMAP[table]) + "= %s" for k in data_.keys()) sql += " WHERE " sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " - f"'{escape_string(str(v)).decode('utf-8')}'" for - k, v in asdict(where).items() - if v is not None and k in TABLEMAP[table]) + "%s" for k in where_.keys()) with conn.cursor() as cursor: - cursor.execute(sql) + cursor.execute(sql, + tuple(data_.values()) + tuple(where_.values())) + conn.commit() return cursor.rowcount diff --git a/tests/unit/db/test_phenotypes.py b/tests/unit/db/test_phenotypes.py index fdeca5e..21eb757 100644 --- a/tests/unit/db/test_phenotypes.py +++ b/tests/unit/db/test_phenotypes.py @@ -37,12 +37,10 @@ class TestPhenotypes(TestCase): where=Phenotype(id_=1, owner="Rob")), 1) cursor.execute.assert_called_once_with( "UPDATE Phenotype SET " - "Pre_publication_description = " - "'Test Pre Pub', " - "Post_publication_description = " - "'Test Post Pub', Submitter = 'Rob' " - "WHERE id = '1' AND Owner = 'Rob'" - ) + "Pre_publication_description = %s, " + "Post_publication_description = %s, " + "Submitter = %s WHERE id = %s AND Owner = %s", + ('Test Pre Pub', 'Test Post Pub', 'Rob', 1, 'Rob')) def test_fetch_phenotype(self): """Test that a single phenotype is fetched properly -- cgit v1.2.3 From d769bfcc38a14720fa888e2b7c0ff874cc91f6a2 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 3 Jun 2021 21:39:25 +0300 Subject: gn3: db: Replace items() with keys() * gn3/db/__init__.py (diff_from_dict): We only use the keys of the dict! --- gn3/db/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'gn3') diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index ce92a7d..d62b575 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -93,6 +93,7 @@ def insert(conn: Any, conn.commit() return cursor.rowcount + def diff_from_dict(old: Dict, new: Dict) -> Dict: """Construct a new dict with a specific structure that contains the difference between the 2 dicts in the structure: @@ -104,6 +105,6 @@ Should return: {"id": {"old": 1, "new": 2}, "data": {"old": "a", "new": "b"}} """ dict_ = {} - for key, value in old.items(): + for key in old.keys(): dict_[key] = {"old": old[key], "new": new[key]} return dict_ -- cgit v1.2.3 From 8210c46fde908b8815ab97f2f91039f87365369b Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 3 Jun 2021 21:45:25 +0300 Subject: Use prepared statements for FETCH sql function --- gn3/db/__init__.py | 8 ++++---- tests/unit/db/test_phenotypes.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'gn3') diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index d62b575..fea43ec 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -66,14 +66,14 @@ def fetchone(conn: Any, """Run a SELECT on a table. Returns only one result!""" if not any(astuple(where)): return None + where_ = {k: v for k, v in asdict(where).items() + if v is not None and k in TABLEMAP[table]} sql = f"SELECT * FROM {table} " sql += "WHERE " sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " - f"'{escape_string(str(v)).decode('utf-8')}'" for - k, v in asdict(where).items() - if v is not None and k in TABLEMAP[table]) + "%s" for k in where_.keys()) with conn.cursor() as cursor: - cursor.execute(sql) + cursor.execute(sql, tuple(where_.values())) return DATACLASSMAP[table](*cursor.fetchone()) diff --git a/tests/unit/db/test_phenotypes.py b/tests/unit/db/test_phenotypes.py index 21eb757..824d186 100644 --- a/tests/unit/db/test_phenotypes.py +++ b/tests/unit/db/test_phenotypes.py @@ -61,7 +61,8 @@ class TestPhenotypes(TestCase): self.assertEqual(phenotype.pre_pub_description, "Test pre-publication") cursor.execute.assert_called_once_with( - "SELECT * FROM Phenotype WHERE id = '35' AND Owner = 'Rob'") + "SELECT * FROM Phenotype WHERE id = %s AND Owner = %s", + (35, 'Rob')) def test_diff_from_dict(self): """Test that a correct diff is generated""" -- cgit v1.2.3 From d55adb4e549bce790522d1a311bbcd53627c3259 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 3 Jun 2021 21:50:52 +0300 Subject: gn3: db: Remove "escape_string" from imports We use prepared statements, so no need to have this. --- gn3/db/__init__.py | 1 - 1 file changed, 1 deletion(-) (limited to 'gn3') diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index fea43ec..824e5b2 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -3,7 +3,6 @@ from typing import Optional, Dict, Any from dataclasses import asdict, astuple from typing_extensions import Protocol -from MySQLdb import escape_string from gn3.db.metadata_audit import MetadataAudit from gn3.db.phenotypes import Phenotype -- cgit v1.2.3 From 49f9d977fc9bd9165392c21f7311b6fe72d2d83a Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 10:09:00 +0300 Subject: gn3: db: sort imports --- gn3/db/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gn3') diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 824e5b2..ed23c97 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -1,7 +1,7 @@ # pylint: disable=[R0902, R0903] """Module that exposes common db operations""" -from typing import Optional, Dict, Any from dataclasses import asdict, astuple +from typing import Optional, Dict, Any from typing_extensions import Protocol from gn3.db.metadata_audit import MetadataAudit @@ -11,8 +11,8 @@ from gn3.db.phenotypes import Publication from gn3.db.metadata_audit import metadata_audit_mapping from gn3.db.phenotypes import phenotype_mapping -from gn3.db.phenotypes import publish_x_ref_mapping from gn3.db.phenotypes import publication_mapping +from gn3.db.phenotypes import publish_x_ref_mapping TABLEMAP = { -- cgit v1.2.3 From 3abef0f7a14860c0babbf59bd99b4c5a88387693 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 11:13:50 +0300 Subject: gn3: db: Use correct DATACLASSMAP entry from metadata_audit --- gn3/db/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gn3') diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index ed23c97..997a230 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -23,7 +23,7 @@ TABLEMAP = { } DATACLASSMAP = { - "MetadataAudit": MetadataAudit, + "metadata_audit": MetadataAudit, "Phenotype": Phenotype, "PublishXRef": PublishXRef, "Publication": Publication, -- cgit v1.2.3 From 3b53e42162fbcbdba782016f7b63604081f2b6b1 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 11:14:30 +0300 Subject: gn3: db: Make "WHERE" clause optional * gn3/db/__init__.py (fetchone): Make "WHERE" an Optional arg. --- gn3/db/__init__.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'gn3') diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 997a230..d311dea 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -61,16 +61,17 @@ def update(conn: Any, def fetchone(conn: Any, table: str, - where: Dataclass) -> Optional[Dataclass]: + where: Optional[Dataclass]) -> Optional[Dataclass]: """Run a SELECT on a table. Returns only one result!""" if not any(astuple(where)): return None where_ = {k: v for k, v in asdict(where).items() if v is not None and k in TABLEMAP[table]} sql = f"SELECT * FROM {table} " - sql += "WHERE " - sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " - "%s" for k in where_.keys()) + if where: + sql += "WHERE " + sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " + "%s" for k in where_.keys()) with conn.cursor() as cursor: cursor.execute(sql, tuple(where_.values())) return DATACLASSMAP[table](*cursor.fetchone()) -- cgit v1.2.3 From cf8e058682702e7203ec5eb019717cba09887272 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 11:15:45 +0300 Subject: gn3: metadata_audit: Make props for MetadataAudit class optional --- gn3/db/metadata_audit.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'gn3') diff --git a/gn3/db/metadata_audit.py b/gn3/db/metadata_audit.py index 6e22b32..e73e988 100644 --- a/gn3/db/metadata_audit.py +++ b/gn3/db/metadata_audit.py @@ -10,9 +10,9 @@ from typing import Optional @dataclass(frozen=True) class MetadataAudit: """Data Type that represents a Phenotype""" - dataset_id: int - editor: str - json_data: str + dataset_id: Optional[int] = None + editor: Optional[str] = None + json_data: Optional[str] = None time_stamp: Optional[str] = None -- cgit v1.2.3 From f5d3828b965bdc5ce98dd8f4714c5bf264c23f9c Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 11:16:25 +0300 Subject: gn3: db: Add "fetchall" method. --- gn3/db/__init__.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'gn3') diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index d311dea..25ecfd6 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -1,7 +1,7 @@ # pylint: disable=[R0902, R0903] """Module that exposes common db operations""" from dataclasses import asdict, astuple -from typing import Optional, Dict, Any +from typing import Any, Dict, Optional, Generator from typing_extensions import Protocol from gn3.db.metadata_audit import MetadataAudit @@ -77,6 +77,24 @@ def fetchone(conn: Any, return DATACLASSMAP[table](*cursor.fetchone()) +def fetchall(conn: Any, + table: str, + where: Optional[Dataclass]) -> Optional[Generator]: + """Run a SELECT on a table. Returns all the results as a tuple!""" + if not any(astuple(where)): + return None + where_ = {k: v for k, v in asdict(where).items() + if v is not None and k in TABLEMAP[table]} + sql = f"SELECT * FROM {table} " + if where: + sql += "WHERE " + sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " + "%s" for k in where_.keys()) + with conn.cursor() as cursor: + cursor.execute(sql, tuple(where_.values())) + return (DATACLASSMAP[table](*record) for record in cursor.fetchall()) + + def insert(conn: Any, table: str, data: Dataclass) -> Optional[int]: -- cgit v1.2.3 From 712a4f7235cc1167fe1c4e591737a045b9a60ac3 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 18:53:59 +0300 Subject: gn3: db: Add "id_" property to metadata_audit class and mapping --- gn3/db/metadata_audit.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'gn3') diff --git a/gn3/db/metadata_audit.py b/gn3/db/metadata_audit.py index e73e988..8765738 100644 --- a/gn3/db/metadata_audit.py +++ b/gn3/db/metadata_audit.py @@ -10,6 +10,7 @@ from typing import Optional @dataclass(frozen=True) class MetadataAudit: """Data Type that represents a Phenotype""" + id_: Optional[int] = None dataset_id: Optional[int] = None editor: Optional[str] = None json_data: Optional[str] = None @@ -19,6 +20,7 @@ class MetadataAudit: # Mapping from the MetadataAudit dataclass to the actual column names in the # database metadata_audit_mapping = { + "id_": "id", "dataset_id": "dataset_id", "editor": "editor", "json_data": "json_data", -- cgit v1.2.3 From e67316601be4b43da6f60322fef1f4ce15ed5905 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 19:43:17 +0300 Subject: gn3: db: Fix how columns from tables is resolved --- gn3/db/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'gn3') diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 25ecfd6..34a0236 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -65,12 +65,12 @@ def fetchone(conn: Any, """Run a SELECT on a table. Returns only one result!""" if not any(astuple(where)): return None - where_ = {k: v for k, v in asdict(where).items() + where_ = {TABLEMAP[table].get(k): v for k, v in asdict(where).items() if v is not None and k in TABLEMAP[table]} sql = f"SELECT * FROM {table} " if where: sql += "WHERE " - sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " + sql += " AND ".join(f"{k} = " "%s" for k in where_.keys()) with conn.cursor() as cursor: cursor.execute(sql, tuple(where_.values())) @@ -83,12 +83,12 @@ def fetchall(conn: Any, """Run a SELECT on a table. Returns all the results as a tuple!""" if not any(astuple(where)): return None - where_ = {k: v for k, v in asdict(where).items() + where_ = {TABLEMAP[table].get(k): v for k, v in asdict(where).items() if v is not None and k in TABLEMAP[table]} sql = f"SELECT * FROM {table} " if where: sql += "WHERE " - sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " + sql += " AND ".join(f"{k} = " "%s" for k in where_.keys()) with conn.cursor() as cursor: cursor.execute(sql, tuple(where_.values())) @@ -99,7 +99,7 @@ def insert(conn: Any, table: str, data: Dataclass) -> Optional[int]: """Run an INSERT into a table""" - dict_ = {k: v for k, v in asdict(data).items() + dict_ = {TABLEMAP[table].get(k): v for k, v in asdict(data).items() if v is not None and k in TABLEMAP[table]} sql = f"INSERT INTO {table} (" sql += ", ".join(f"{k}" for k in dict_.keys()) -- cgit v1.2.3 From bb55cf948974d85fb31d2f424f7ee94f7ab5e3d6 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 19:44:00 +0300 Subject: Rename json_data column to json_diff_data --- gn3/db/metadata_audit.py | 2 +- sql/metadata_audit.sql | 12 ++++++------ tests/unit/db/test_audit.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'gn3') diff --git a/gn3/db/metadata_audit.py b/gn3/db/metadata_audit.py index 8765738..9c4474d 100644 --- a/gn3/db/metadata_audit.py +++ b/gn3/db/metadata_audit.py @@ -23,6 +23,6 @@ metadata_audit_mapping = { "id_": "id", "dataset_id": "dataset_id", "editor": "editor", - "json_data": "json_data", + "json_data": "json_diff_data", "time_stamp": "time_stamp", } diff --git a/sql/metadata_audit.sql b/sql/metadata_audit.sql index 9771e74..514a2fc 100644 --- a/sql/metadata_audit.sql +++ b/sql/metadata_audit.sql @@ -20,10 +20,10 @@ -- This table stores data on diffs when editing a Published dataset's data CREATE TABLE metadata_audit ( PRIMARY KEY (id), - id INTEGER AUTO_INCREMENT NOT NULL, - dataset_id INTEGER NOT NULL, - editor VARCHAR(255) NOT NULL, - json_data VARCHAR(2048) NOT NULL, - time_stamp timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL, - CHECK (JSON_VALID(json_data)) + id INTEGER AUTO_INCREMENT NOT NULL, + dataset_id INTEGER NOT NULL, + editor VARCHAR(255) NOT NULL, + json_diff_data VARCHAR(2048) NOT NULL, + time_stamp timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL, + CHECK (JSON_VALID(json_diff_data)) ); diff --git a/tests/unit/db/test_audit.py b/tests/unit/db/test_audit.py index 1449281..7480169 100644 --- a/tests/unit/db/test_audit.py +++ b/tests/unit/db/test_audit.py @@ -24,5 +24,5 @@ class TestMetadatAudit(TestCase): json_data=json.dumps({"a": "b"}))), 1) cursor.execute.assert_called_once_with( "INSERT INTO metadata_audit (dataset_id, " - "editor, json_data) VALUES (%s, %s, %s)", + "editor, json_diff_data) VALUES (%s, %s, %s)", (35, 'Bonface', '{"a": "b"}')) -- cgit v1.2.3 From cd921a4778d141b6dbbf9f60c178a4f681d47860 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 16 Jun 2021 19:40:03 +0000 Subject: Fixed spelling of coeffient to coefficient --- gn3/computations/correlations.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'gn3') diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index f0ce502..0fe46ab 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -68,8 +68,8 @@ pearson,spearman and biweight mid correlation return value is rho and p_value "spearman": scipy.stats.spearmanr } use_corr_method = corr_mapping.get(corr_method, "spearman") - corr_coeffient, p_val = use_corr_method(primary_values, target_values) - return (corr_coeffient, p_val) + corr_coefficient, p_val = use_corr_method(primary_values, target_values) + return (corr_coefficient, p_val) def compute_sample_r_correlation(trait_name, corr_method, trait_vals, @@ -84,13 +84,13 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals, if num_overlap > 5: - (corr_coeffient, p_value) =\ + (corr_coefficient, p_value) =\ compute_corr_coeff_p_value(primary_values=sanitized_traits_vals, target_values=sanitized_target_vals, corr_method=corr_method) - if corr_coeffient is not None: - return (trait_name, corr_coeffient, p_value, num_overlap) + if corr_coefficient is not None: + return (trait_name, corr_coefficient, p_value, num_overlap) return None @@ -140,10 +140,10 @@ def compute_all_sample_correlation(this_trait, for sample_correlation in results: if sample_correlation is not None: - (trait_name, corr_coeffient, p_value, + (trait_name, corr_coefficient, p_value, num_overlap) = sample_correlation corr_result = { - "corr_coeffient": corr_coeffient, + "corr_coefficient": corr_coefficient, "p_value": p_value, "num_overlap": num_overlap } @@ -151,7 +151,7 @@ def compute_all_sample_correlation(this_trait, corr_results.append({trait_name: corr_result}) return sorted( corr_results, - key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coeffient"])) + key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coefficient"])) def benchmark_compute_all_sample(this_trait, @@ -174,12 +174,12 @@ def benchmark_compute_all_sample(this_trait, trait_vals=this_vals, target_samples_vals=target_vals) if sample_correlation is not None: - (trait_name, corr_coeffient, + (trait_name, corr_coefficient, p_value, num_overlap) = sample_correlation else: continue corr_result = { - "corr_coeffient": corr_coeffient, + "corr_coefficient": corr_coefficient, "p_value": p_value, "num_overlap": num_overlap } @@ -195,20 +195,20 @@ def tissue_correlation_for_trait( compute_corr_p_value: Callable = compute_corr_coeff_p_value) -> dict: """Given a primary tissue values for a trait and the target tissues values compute the correlation_cooeff and p value the input required are arrays - output -> List containing Dicts with corr_coefficient value,P_value and + output -> List containing Dicts with corr_coefficient value, P_value and also the tissue numbers is len(primary) == len(target) """ # ax :todo assertion that length one one target tissue ==primary_tissue - (tissue_corr_coeffient, + (tissue_corr_coefficient, p_value) = compute_corr_p_value(primary_values=primary_tissue_vals, target_values=target_tissues_values, corr_method=corr_method) tiss_corr_result = {trait_id: { - "tissue_corr": tissue_corr_coeffient, + "tissue_corr": tissue_corr_coefficient, "tissue_number": len(primary_tissue_vals), "tissue_p_val": p_value}} -- cgit v1.2.3 From 476b146562070cee6a55c55c03c37ef3e19a1474 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 18 Jun 2021 20:54:18 +0000 Subject: Resolve mypy errors in computations/rqtl.py --- gn3/computations/rqtl.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'gn3') diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 22d9faf..7b1a35c 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,8 +1,7 @@ """Procedures related rqtl computations""" import os import numpy as np -from typing import Dict -from typing import List +from typing import Dict, List, Union from flask import current_app @@ -55,7 +54,7 @@ def process_rqtl_output(file_name: str) -> List: marker_obs = [] # Later I should probably redo this using csv.read to avoid the # awkwardness with removing quotes with [1:-1] - with open(os.path.join(current_app.config.get("TMPDIR"), "output", file_name), "r") as the_file: + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: for line in the_file: line_items = line.split(",") if line_items[1][1:-1] == "chr" or not line_items: @@ -63,6 +62,7 @@ def process_rqtl_output(file_name: str) -> List: continue else: # Convert chr to int if possible + the_chr: Union[int, str] try: the_chr = int(line_items[1][1:-1]) except: @@ -85,7 +85,7 @@ def process_perm_output(file_name: str): """ perm_results = [] - with open(os.path.join(current_app.config.get("TMPDIR"), "output", "PERM_" + file_name), "r") as the_file: + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", "PERM_" + file_name), "r") as the_file: for i, line in enumerate(the_file): if i == 0: # Skip header line -- cgit v1.2.3 From d42b85ae5fcea1b71a7165fd6e64745a228c48f9 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 18 Jun 2021 21:13:03 +0000 Subject: Fixed pylint issues --- gn3/api/rqtl.py | 7 ++++--- gn3/computations/rqtl.py | 45 ++++++++++++++++++++++++--------------------- 2 files changed, 28 insertions(+), 24 deletions(-) (limited to 'gn3') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 38f4c1e..ebb746c 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -6,7 +6,6 @@ from flask import current_app from flask import jsonify from flask import request -from gn3.commands import run_cmd from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output from gn3.computations.gemma import do_paths_exist @@ -45,13 +44,15 @@ run the rqtl_wrapper script and return the results as JSON ) rqtl_output = {} - if not os.path.isfile(os.path.join(current_app.config.get("TMPDIR"), "output", rqtl_cmd.get('output_file'))): + if not os.path.isfile(os.path.join(current_app.config.get("TMPDIR"), + "output", rqtl_cmd.get('output_file'))): os.system(rqtl_cmd.get('rqtl_cmd')) rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) if int(rqtl_kwargs['nperm']) > 0: - rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = process_perm_output(rqtl_cmd.get('output_file')) + rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \ + process_perm_output(rqtl_cmd.get('output_file')) return jsonify(rqtl_output) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 7b1a35c..0433b3f 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,8 +1,9 @@ """Procedures related rqtl computations""" import os -import numpy as np from typing import Dict, List, Union +import numpy as np + from flask import current_app from gn3.commands import compose_rqtl_cmd @@ -54,27 +55,28 @@ def process_rqtl_output(file_name: str) -> List: marker_obs = [] # Later I should probably redo this using csv.read to avoid the # awkwardness with removing quotes with [1:-1] - with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: for line in the_file: line_items = line.split(",") if line_items[1][1:-1] == "chr" or not line_items: # Skip header line continue - else: - # Convert chr to int if possible - the_chr: Union[int, str] - try: - the_chr = int(line_items[1][1:-1]) - except: - the_chr = line_items[1][1:-1] - this_marker = { - "name": line_items[0][1:-1], - "chr": the_chr, - "cM": float(line_items[2]), - "Mb": float(line_items[2]), - "lod_score": float(line_items[3]) - } - marker_obs.append(this_marker) + + # Convert chr to int if possible + the_chr: Union[int, str] + try: + the_chr = int(line_items[1][1:-1]) + except ValueError: + the_chr = line_items[1][1:-1] + this_marker = { + "name": line_items[0][1:-1], + "chr": the_chr, + "cM": float(line_items[2]), + "Mb": float(line_items[2]), + "lod_score": float(line_items[3]) + } + marker_obs.append(this_marker) return marker_obs @@ -85,14 +87,15 @@ def process_perm_output(file_name: str): """ perm_results = [] - with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", "PERM_" + file_name), "r") as the_file: + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "PERM_" + file_name), "r") as the_file: for i, line in enumerate(the_file): if i == 0: # Skip header line continue - else: - line_items = line.split(",") - perm_results.append(float(line_items[1])) + + line_items = line.split(",") + perm_results.append(float(line_items[1])) suggestive = np.percentile(np.array(perm_results), 67) significant = np.percentile(np.array(perm_results), 95) -- cgit v1.2.3