From 6c8ab5537c7fb7eb94c62415269e193516283f6d Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 17 May 2021 20:10:30 +0000 Subject: Register rqtl blueprint --- gn3/app.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gn3/app.py b/gn3/app.py index dc89f55..046b5de 100644 --- a/gn3/app.py +++ b/gn3/app.py @@ -5,6 +5,7 @@ from typing import Dict from typing import Union from flask import Flask from gn3.api.gemma import gemma +from gn3.api.rqtl import rqtl from gn3.api.general import general from gn3.api.correlation import correlation from gn3.api.data_entry import data_entry @@ -28,6 +29,7 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask: app.config.from_pyfile(config) app.register_blueprint(general, url_prefix="/api/") app.register_blueprint(gemma, url_prefix="/api/gemma") + app.register_blueprint(rqtl, url_prefix="/api/rqtl") app.register_blueprint(correlation, url_prefix="/api/correlation") app.register_blueprint(data_entry, url_prefix="/api/dataentry") return app -- cgit v1.2.3 From 9b628abcee86ef29e869b92a66b6034c1b63359d Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 17 May 2021 20:11:15 +0000 Subject: Add rqtl.py for rqtl endpoints --- gn3/api/rqtl.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 gn3/api/rqtl.py diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py new file mode 100644 index 0000000..8dd4bb7 --- /dev/null +++ b/gn3/api/rqtl.py @@ -0,0 +1,19 @@ +import os + +from flask import Blueprint +from flask import current_app +from flask import request + +rqtl = Blueprint("rqtl", __name__) + +@rqtl.route("/compute", methods=["POST"]) +def compute(): + working_dir = os.path.join(current_app.config.get("TMPDIR")) + + genofile = request.form['geno_file'] + phenofile = request.form['pheno_file'] + + if not do_paths_exist([genofile, phenofile]): + raise FileNotFoundError + + return current_app.config.get("RQTL_WRAPPER_CMD") \ No newline at end of file -- cgit v1.2.3 From ea9d9d6454783a6af07384a66f204e199035e5bd Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 17 May 2021 21:52:43 +0000 Subject: Added RQTL_WRAPPER_CMD (which is fetched from environment) in settings.py --- gn3/settings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gn3/settings.py b/gn3/settings.py index 2057ce1..ecfd502 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -6,6 +6,7 @@ import os BCRYPT_SALT = "$2b$12$mxLvu9XRLlIaaSeDxt8Sle" # Change this! DATA_DIR = "" GEMMA_WRAPPER_CMD = os.environ.get("GEMMA_WRAPPER", "gemma-wrapper") +RQTL_WRAPPER_CMD = os.environ.get("RQTL_WRAPPER") CACHEDIR = "" REDIS_URI = "redis://localhost:6379/0" REDIS_JOB_QUEUE = "GN3::job-queue" -- cgit v1.2.3 From 7ed84670c0d13de38b578a4e4177b2529ff3fb40 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 17 May 2021 21:54:42 +0000 Subject: Read in kwargs fromrequest and pass the command, tmpdir, and kwargs to generate_rqtl_cmd which returns the actual command and output path --- gn3/api/rqtl.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 8dd4bb7..82cf34f 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -2,8 +2,12 @@ import os from flask import Blueprint from flask import current_app +from flask import jsonify from flask import request +from gn3.computations.rqtl import generate_rqtl_cmd +from gn3.computations.gemma import do_paths_exist + rqtl = Blueprint("rqtl", __name__) @rqtl.route("/compute", methods=["POST"]) @@ -16,4 +20,18 @@ def compute(): if not do_paths_exist([genofile, phenofile]): raise FileNotFoundError - return current_app.config.get("RQTL_WRAPPER_CMD") \ No newline at end of file + kwarg_list = ["addcovar", "model", "method", "interval", "nperm", "scale", "control_marker"] + + rqtl_kwargs = {"geno": genofile, "pheno": phenofile} + for kwarg in kwarg_list: + if kwarg in request.form: + rqtl_kwargs[kwarg] = request.form[kwarg] + + results = generate_rqtl_cmd( + rqtl_wrapper_cmd = current_app.config.get("RQTL_WRAPPER_CMD"), + output_dir = current_app.config.get('TMPDIR'), + rqtl_wrapper_kwargs = rqtl_kwargs + ) + + return jsonify(results) + -- cgit v1.2.3 From 624ece086d026da9150cd35b2404874ccf607b07 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 17 May 2021 21:56:51 +0000 Subject: Created compose_rqtl_command and generate_rqtl_command to create the actual command to be run from the command line; used the same pattern as for GEMMA for consistency --- gn3/commands.py | 7 +++++++ gn3/computations/rqtl.py | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 gn3/computations/rqtl.py diff --git a/gn3/commands.py b/gn3/commands.py index 4b0d62d..db32d1f 100644 --- a/gn3/commands.py +++ b/gn3/commands.py @@ -30,6 +30,13 @@ def compose_gemma_cmd(gemma_wrapper_cmd: str = "gemma-wrapper", cmd += " ".join([f"{arg}" for arg in gemma_args]) return cmd +def compose_rqtl_cmd(rqtl_wrapper_cmd: str, + rqtl_wrapper_kwargs: Dict) -> str: + """Compose a valid R/qtl command given the correct input""" + cmd = rqtl_wrapper_cmd + " " + " ".join( + [f"--{key} {val}" for key, val in rqtl_wrapper_kwargs.items()]) + + return cmd def queue_cmd(conn: Redis, job_queue: str, diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py new file mode 100644 index 0000000..087a99f --- /dev/null +++ b/gn3/computations/rqtl.py @@ -0,0 +1,25 @@ +"""Procedures related rqtl computations""" +import os + +from typing import Dict +from gn3.commands import compose_rqtl_cmd +from gn3.fs_helpers import get_hash_of_files + +def generate_rqtl_cmd(rqtl_wrapper_cmd: str, + output_dir: str, + rqtl_wrapper_kwargs: Dict) -> Dict: + + _hash = get_hash_of_files( + [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p", "addcovar", + "model", "method", + "interval", "nperm", + "scale", "control"]]) + + _output_filename = f"{_hash}-output.json" + return { + "output_file": + _output_filename, + "rqtl_cmd": + compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd, + rqtl_wrapper_kwargs=rqtl_wrapper_kwargs) + } \ No newline at end of file -- cgit v1.2.3 From 0a8754a582f057bd335441eab15da3f629df9ad7 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 17 May 2021 23:56:10 +0000 Subject: Fixed variety of issues detected by pylint --- gn3/api/rqtl.py | 13 ++++++------- gn3/computations/rqtl.py | 50 ++++++++++++++++++++++++------------------------ 2 files changed, 31 insertions(+), 32 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 82cf34f..7756310 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -1,5 +1,4 @@ -import os - +"""Endpoints for running the rqtl cmd""" from flask import Blueprint from flask import current_app from flask import jsonify @@ -12,8 +11,10 @@ rqtl = Blueprint("rqtl", __name__) @rqtl.route("/compute", methods=["POST"]) def compute(): - working_dir = os.path.join(current_app.config.get("TMPDIR")) + """Given at least a geno_file and pheno_file, generate and +run the rqtl_wrapper script and return the results as JSON + """ genofile = request.form['geno_file'] phenofile = request.form['pheno_file'] @@ -28,10 +29,8 @@ def compute(): rqtl_kwargs[kwarg] = request.form[kwarg] results = generate_rqtl_cmd( - rqtl_wrapper_cmd = current_app.config.get("RQTL_WRAPPER_CMD"), - output_dir = current_app.config.get('TMPDIR'), - rqtl_wrapper_kwargs = rqtl_kwargs + rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"), + rqtl_wrapper_kwargs=rqtl_kwargs ) return jsonify(results) - diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 087a99f..0e8cd1f 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,25 +1,25 @@ -"""Procedures related rqtl computations""" -import os - -from typing import Dict -from gn3.commands import compose_rqtl_cmd -from gn3.fs_helpers import get_hash_of_files - -def generate_rqtl_cmd(rqtl_wrapper_cmd: str, - output_dir: str, - rqtl_wrapper_kwargs: Dict) -> Dict: - - _hash = get_hash_of_files( - [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p", "addcovar", - "model", "method", - "interval", "nperm", - "scale", "control"]]) - - _output_filename = f"{_hash}-output.json" - return { - "output_file": - _output_filename, - "rqtl_cmd": - compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd, - rqtl_wrapper_kwargs=rqtl_wrapper_kwargs) - } \ No newline at end of file +"""Procedures related rqtl computations""" + +from typing import Dict +from gn3.commands import compose_rqtl_cmd +from gn3.fs_helpers import get_hash_of_files + +def generate_rqtl_cmd(rqtl_wrapper_cmd: str, + rqtl_wrapper_kwargs: Dict) -> Dict: + """Given the base rqtl_wrapper command and +dict of keyword arguments, return the full rqtl_wrapper command and an +output filename generated from a hash of the genotype and phenotype files + + """ + + _hash = get_hash_of_files( + [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p"]]) + + _output_filename = f"{_hash}-output.json" + return { + "output_file": + _output_filename, + "rqtl_cmd": + compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd, + rqtl_wrapper_kwargs=rqtl_wrapper_kwargs) + } -- cgit v1.2.3 From 74bc179807e80c1ee0f89cd98953263f68a05661 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 18 May 2021 19:40:46 +0000 Subject: Fixed generate_rqtl_cmd to make the kwarg hash from a combination of keywords and arguments + account for boolean kwargs without values (like --interval or --covar) --- gn3/computations/rqtl.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 0e8cd1f..855a819 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -2,24 +2,36 @@ from typing import Dict from gn3.commands import compose_rqtl_cmd +from gn3.computations.gemma import generate_hash_of_string from gn3.fs_helpers import get_hash_of_files def generate_rqtl_cmd(rqtl_wrapper_cmd: str, - rqtl_wrapper_kwargs: Dict) -> Dict: + rqtl_wrapper_kwargs: Dict, + rqtl_wrapper_bool_kwargs: list) -> Dict: """Given the base rqtl_wrapper command and dict of keyword arguments, return the full rqtl_wrapper command and an output filename generated from a hash of the genotype and phenotype files """ + # Generate a hash from contents of the genotype and phenotype files _hash = get_hash_of_files( [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p"]]) + # Append to hash a hash of keyword arguments + _hash += generate_hash_of_string( + ",".join([f"{k}:{v}" for k, v in rqtl_wrapper_kwargs.items() if k not in ["g", "p"]])) + + # Append to hash a hash of boolean keyword arguments + _hash += generate_hash_of_string( + ",".join(rqtl_wrapper_bool_kwargs)) + _output_filename = f"{_hash}-output.json" return { "output_file": _output_filename, "rqtl_cmd": compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd, - rqtl_wrapper_kwargs=rqtl_wrapper_kwargs) + rqtl_wrapper_kwargs=rqtl_wrapper_kwargs, + rqtl_wrapper_bool_kwargs=rqtl_wrapper_bool_kwargs) } -- cgit v1.2.3 From e61aa16f0bc3dd282060585e655e497fa3d06b49 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 18 May 2021 19:41:01 +0000 Subject: Account for boolean kwargs in compose_rqtl_cmd --- gn3/commands.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/gn3/commands.py b/gn3/commands.py index db32d1f..add715c 100644 --- a/gn3/commands.py +++ b/gn3/commands.py @@ -31,11 +31,18 @@ def compose_gemma_cmd(gemma_wrapper_cmd: str = "gemma-wrapper", return cmd def compose_rqtl_cmd(rqtl_wrapper_cmd: str, - rqtl_wrapper_kwargs: Dict) -> str: + rqtl_wrapper_kwargs: Dict, + rqtl_wrapper_bool_kwargs: list) -> str: """Compose a valid R/qtl command given the correct input""" + # Add kwargs with values cmd = rqtl_wrapper_cmd + " " + " ".join( [f"--{key} {val}" for key, val in rqtl_wrapper_kwargs.items()]) + # Add boolean kwargs (kwargs without values) + if len(rqtl_wrapper_bool_kwargs): + cmd += " " + cmd += " ".join([f"--{val}" for val in rqtl_wrapper_bool_kwargs]) + return cmd def queue_cmd(conn: Redis, -- cgit v1.2.3 From 33d3a518e378c56b080a2a15b264ee5d031537e8 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 18 May 2021 19:42:27 +0000 Subject: Account for boolean kwargs in compute() by storing them in a list, since they don't have corresponding values --- gn3/api/rqtl.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 7756310..de620f7 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -21,16 +21,24 @@ run the rqtl_wrapper script and return the results as JSON if not do_paths_exist([genofile, phenofile]): raise FileNotFoundError - kwarg_list = ["addcovar", "model", "method", "interval", "nperm", "scale", "control_marker"] + # Split kwargs by those with values and boolean ones that just convert to True/False + kwargs = ["model", "method", "nperm", "scale", "control_marker"] + boolean_kwargs = ["addcovar", "interval"] + all_kwargs = kwargs + boolean_kwargs rqtl_kwargs = {"geno": genofile, "pheno": phenofile} - for kwarg in kwarg_list: + rqtl_bool_kwargs = [] + for kwarg in all_kwargs: if kwarg in request.form: - rqtl_kwargs[kwarg] = request.form[kwarg] + if kwarg in kwargs: + rqtl_kwargs[kwarg] = request.form[kwarg] + if kwarg in boolean_kwargs: + rqtl_bool_kwargs.append(kwarg) results = generate_rqtl_cmd( rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"), - rqtl_wrapper_kwargs=rqtl_kwargs + rqtl_wrapper_kwargs=rqtl_kwargs, + rqtl_wrapper_bool_kwargs=boolean_kwargs ) return jsonify(results) -- cgit v1.2.3 From 5745c3bdd086f7c499ee63a580df822db5af2826 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 18 May 2021 19:43:50 +0000 Subject: Added unit test for computations/rqtl.py --- tests/unit/computations/test_rqtl.py | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 tests/unit/computations/test_rqtl.py diff --git a/tests/unit/computations/test_rqtl.py b/tests/unit/computations/test_rqtl.py new file mode 100644 index 0000000..b16f136 --- /dev/null +++ b/tests/unit/computations/test_rqtl.py @@ -0,0 +1,41 @@ +"""Test cases for procedures defined in computations.rqtl""" +import unittest + +from unittest import mock +from gn3.computations.rqtl import generate_rqtl_cmd + +class TestRqtl(unittest.TestCase): + """Test cases for computations.rqtl module""" + @mock.patch("gn3.computations.rqtl.generate_hash_of_string") + @mock.patch("gn3.computations.rqtl.get_hash_of_files") + def test_generate_rqtl_command(self, mock_get_hash_files, mock_generate_hash_string): + """Test computing mapping results with R/qtl""" + mock_get_hash_files.return_value = "my-hash1" + mock_generate_hash_string.return_value = "my-hash2" + + self.assertEqual( + generate_rqtl_cmd(rqtl_wrapper_cmd="rqtl-wrapper", + rqtl_wrapper_kwargs={ + "g": "genofile", + "p": "phenofile", + "model": "normal", + "method": "hk", + "nperm": 1000, + "scale": "Mb", + "control": "rs123456" + }, + rqtl_wrapper_bool_kwargs=[ + "addcovar", + "interval" + ]), { + "output_file": + "my-hash1my-hash2my-hash2-output.json", + "rqtl_cmd": ( + "rqtl-wrapper " + "--g genofile --p phenofile " + "--model normal --method hk " + "--nperm 1000 --scale Mb " + "--control rs123456 " + "--addcovar --interval" + ) + }) -- cgit v1.2.3 From 36365588d95d96458da02090ebef21a02366784a Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 18 May 2021 19:49:44 +0000 Subject: Removed len from this if statement, since an empty list evaluates to False by itself --- gn3/commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/commands.py b/gn3/commands.py index add715c..255ea1d 100644 --- a/gn3/commands.py +++ b/gn3/commands.py @@ -39,7 +39,7 @@ def compose_rqtl_cmd(rqtl_wrapper_cmd: str, [f"--{key} {val}" for key, val in rqtl_wrapper_kwargs.items()]) # Add boolean kwargs (kwargs without values) - if len(rqtl_wrapper_bool_kwargs): + if rqtl_wrapper_bool_kwargs: cmd += " " cmd += " ".join([f"--{val}" for val in rqtl_wrapper_bool_kwargs]) -- cgit v1.2.3 From d66b71a1e149ccddbbcc66e439067250827e0b6f Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 18 May 2021 20:06:57 +0000 Subject: Added test for compose_rqtl_cmd in tests/unit/test_commands.py --- tests/unit/test_commands.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/unit/test_commands.py b/tests/unit/test_commands.py index aafb3a2..a3d0273 100644 --- a/tests/unit/test_commands.py +++ b/tests/unit/test_commands.py @@ -6,6 +6,7 @@ from datetime import datetime from typing import Callable from unittest import mock from gn3.commands import compose_gemma_cmd +from gn3.commands import compose_rqtl_cmd from gn3.commands import queue_cmd from gn3.commands import run_cmd from gn3.exceptions import RedisConnectionError @@ -53,6 +54,31 @@ class TestCommands(unittest.TestCase): "-p /tmp/gf13Ad0tRX/phenofile.txt" " -gk")) + def test_compose_rqtl_cmd(self): + """Test that the R/qtl cmd is composed correctly""" + self.assertEqual( + compose_rqtl_cmd(rqtl_wrapper_cmd="rqtl-wrapper", + rqtl_wrapper_kwargs={ + "g": "genofile", + "p": "phenofile", + "model": "normal", + "method": "hk", + "nperm": 1000, + "scale": "Mb", + "control": "rs123456" + }, + rqtl_wrapper_bool_kwargs=[ + "addcovar", + "interval" + ]), + ("rqtl-wrapper " + "--g genofile --p phenofile " + "--model normal --method hk " + "--nperm 1000 --scale Mb " + "--control rs123456 " + "--addcovar --interval") + ) + def test_queue_cmd_exception_raised_when_redis_is_down(self): """Test that the correct error is raised when Redis is unavailable""" self.assertRaises(RedisConnectionError, -- cgit v1.2.3 From e29a349b46d932411879a810fb0be3a0042bf540 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 19 May 2021 20:03:01 +0000 Subject: Temporarily replace forward-slashes with underscores, since they can be included in the hashes used for filenames --- gn3/computations/rqtl.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 855a819..605e0e1 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -26,6 +26,9 @@ output filename generated from a hash of the genotype and phenotype files _hash += generate_hash_of_string( ",".join(rqtl_wrapper_bool_kwargs)) + # Temporarily substitute forward-slashes in hash with underscores + _hash = _hash.replace("/", "_") + _output_filename = f"{_hash}-output.json" return { "output_file": -- cgit v1.2.3 From d3a4146fd38fc1d372091cecadfcf7c8fb377f3b Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 25 May 2021 20:27:05 +0000 Subject: Include code that processes rqtl output files and returns actual results instead of just the output filename --- gn3/api/rqtl.py | 16 +++++++++--- gn3/computations/rqtl.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 75 insertions(+), 5 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index de620f7..0194b6f 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -1,10 +1,13 @@ """Endpoints for running the rqtl cmd""" +import os + from flask import Blueprint from flask import current_app from flask import jsonify from flask import request -from gn3.computations.rqtl import generate_rqtl_cmd +from gn3.commands import run_cmd +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output from gn3.computations.gemma import do_paths_exist rqtl = Blueprint("rqtl", __name__) @@ -35,10 +38,17 @@ run the rqtl_wrapper script and return the results as JSON if kwarg in boolean_kwargs: rqtl_bool_kwargs.append(kwarg) - results = generate_rqtl_cmd( + rqtl_cmd = generate_rqtl_cmd( rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"), rqtl_wrapper_kwargs=rqtl_kwargs, rqtl_wrapper_bool_kwargs=boolean_kwargs ) - return jsonify(results) + os.system(rqtl_cmd.get('rqtl_cmd')) + + rqtl_output = {} + rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + if int(rqtl_kwargs['nperm']) > 0: + rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = process_perm_output(rqtl_cmd.get('output_file')) + + return jsonify(rqtl_output) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 605e0e1..22d9faf 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,6 +1,11 @@ """Procedures related rqtl computations""" - +import os +import numpy as np from typing import Dict +from typing import List + +from flask import current_app + from gn3.commands import compose_rqtl_cmd from gn3.computations.gemma import generate_hash_of_string from gn3.fs_helpers import get_hash_of_files @@ -29,7 +34,9 @@ output filename generated from a hash of the genotype and phenotype files # Temporarily substitute forward-slashes in hash with underscores _hash = _hash.replace("/", "_") - _output_filename = f"{_hash}-output.json" + _output_filename = f"{_hash}-output.csv" + rqtl_wrapper_kwargs["filename"] = _output_filename + return { "output_file": _output_filename, @@ -38,3 +45,56 @@ output filename generated from a hash of the genotype and phenotype files rqtl_wrapper_kwargs=rqtl_wrapper_kwargs, rqtl_wrapper_bool_kwargs=rqtl_wrapper_bool_kwargs) } + + +def process_rqtl_output(file_name: str) -> List: + """Given an output file name, read in R/qtl results and return + a List of marker objects + + """ + marker_obs = [] + # Later I should probably redo this using csv.read to avoid the + # awkwardness with removing quotes with [1:-1] + with open(os.path.join(current_app.config.get("TMPDIR"), "output", file_name), "r") as the_file: + for line in the_file: + line_items = line.split(",") + if line_items[1][1:-1] == "chr" or not line_items: + # Skip header line + continue + else: + # Convert chr to int if possible + try: + the_chr = int(line_items[1][1:-1]) + except: + the_chr = line_items[1][1:-1] + this_marker = { + "name": line_items[0][1:-1], + "chr": the_chr, + "cM": float(line_items[2]), + "Mb": float(line_items[2]), + "lod_score": float(line_items[3]) + } + marker_obs.append(this_marker) + + return marker_obs + + +def process_perm_output(file_name: str): + """Given base filename, read in R/qtl permutation output and calculate + suggestive and significant thresholds + + """ + perm_results = [] + with open(os.path.join(current_app.config.get("TMPDIR"), "output", "PERM_" + file_name), "r") as the_file: + for i, line in enumerate(the_file): + if i == 0: + # Skip header line + continue + else: + line_items = line.split(",") + perm_results.append(float(line_items[1])) + + suggestive = np.percentile(np.array(perm_results), 67) + significant = np.percentile(np.array(perm_results), 95) + + return perm_results, suggestive, significant -- cgit v1.2.3 From 1c32cb1df09475ef70dbe2d7310ba33026baea22 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 16 Jun 2021 19:40:03 +0000 Subject: Fixed spelling of coeffient to coefficient --- gn3/computations/correlations.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index f0ce502..0fe46ab 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -68,8 +68,8 @@ pearson,spearman and biweight mid correlation return value is rho and p_value "spearman": scipy.stats.spearmanr } use_corr_method = corr_mapping.get(corr_method, "spearman") - corr_coeffient, p_val = use_corr_method(primary_values, target_values) - return (corr_coeffient, p_val) + corr_coefficient, p_val = use_corr_method(primary_values, target_values) + return (corr_coefficient, p_val) def compute_sample_r_correlation(trait_name, corr_method, trait_vals, @@ -84,13 +84,13 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals, if num_overlap > 5: - (corr_coeffient, p_value) =\ + (corr_coefficient, p_value) =\ compute_corr_coeff_p_value(primary_values=sanitized_traits_vals, target_values=sanitized_target_vals, corr_method=corr_method) - if corr_coeffient is not None: - return (trait_name, corr_coeffient, p_value, num_overlap) + if corr_coefficient is not None: + return (trait_name, corr_coefficient, p_value, num_overlap) return None @@ -140,10 +140,10 @@ def compute_all_sample_correlation(this_trait, for sample_correlation in results: if sample_correlation is not None: - (trait_name, corr_coeffient, p_value, + (trait_name, corr_coefficient, p_value, num_overlap) = sample_correlation corr_result = { - "corr_coeffient": corr_coeffient, + "corr_coefficient": corr_coefficient, "p_value": p_value, "num_overlap": num_overlap } @@ -151,7 +151,7 @@ def compute_all_sample_correlation(this_trait, corr_results.append({trait_name: corr_result}) return sorted( corr_results, - key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coeffient"])) + key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coefficient"])) def benchmark_compute_all_sample(this_trait, @@ -174,12 +174,12 @@ def benchmark_compute_all_sample(this_trait, trait_vals=this_vals, target_samples_vals=target_vals) if sample_correlation is not None: - (trait_name, corr_coeffient, + (trait_name, corr_coefficient, p_value, num_overlap) = sample_correlation else: continue corr_result = { - "corr_coeffient": corr_coeffient, + "corr_coefficient": corr_coefficient, "p_value": p_value, "num_overlap": num_overlap } @@ -195,20 +195,20 @@ def tissue_correlation_for_trait( compute_corr_p_value: Callable = compute_corr_coeff_p_value) -> dict: """Given a primary tissue values for a trait and the target tissues values compute the correlation_cooeff and p value the input required are arrays - output -> List containing Dicts with corr_coefficient value,P_value and + output -> List containing Dicts with corr_coefficient value, P_value and also the tissue numbers is len(primary) == len(target) """ # ax :todo assertion that length one one target tissue ==primary_tissue - (tissue_corr_coeffient, + (tissue_corr_coefficient, p_value) = compute_corr_p_value(primary_values=primary_tissue_vals, target_values=target_tissues_values, corr_method=corr_method) tiss_corr_result = {trait_id: { - "tissue_corr": tissue_corr_coeffient, + "tissue_corr": tissue_corr_coefficient, "tissue_number": len(primary_tissue_vals), "tissue_p_val": p_value}} -- cgit v1.2.3 From bf0a8cb22c0cc0c1dfe25740f88a4cb159dd0064 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 25 May 2021 20:27:33 +0000 Subject: Fix R/qtl command and the way keyword arguments are passed --- gn3/commands.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gn3/commands.py b/gn3/commands.py index 255ea1d..14bd295 100644 --- a/gn3/commands.py +++ b/gn3/commands.py @@ -35,10 +35,10 @@ def compose_rqtl_cmd(rqtl_wrapper_cmd: str, rqtl_wrapper_bool_kwargs: list) -> str: """Compose a valid R/qtl command given the correct input""" # Add kwargs with values - cmd = rqtl_wrapper_cmd + " " + " ".join( + cmd = f"Rscript { rqtl_wrapper_cmd } " + " ".join( [f"--{key} {val}" for key, val in rqtl_wrapper_kwargs.items()]) - # Add boolean kwargs (kwargs without values) + # Add boolean kwargs (kwargs that are either on or off, like --interval) if rqtl_wrapper_bool_kwargs: cmd += " " cmd += " ".join([f"--{val}" for val in rqtl_wrapper_bool_kwargs]) -- cgit v1.2.3 From 0bfe2990ce31fbd808b680a883619575d864aef5 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 25 May 2021 20:28:02 +0000 Subject: Add rqtl_wrapper.R script and necessary imports to guix.scm --- guix.scm | 2 + scripts/rqtl_wrapper.R | 204 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+) create mode 100644 scripts/rqtl_wrapper.R diff --git a/guix.scm b/guix.scm index 75084db..d20c7f0 100644 --- a/guix.scm +++ b/guix.scm @@ -87,6 +87,8 @@ ; ("r-ctl" ,r-ctl) ("r-qtl" ,r-qtl) ("r-optparse" ,r-optparse) + ("r-stringi" ,r-stringi) + ("r-stringr" ,r-stringr) ; ("r-wgcna" ,r-wgcna) )) (build-system python-build-system) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R new file mode 100644 index 0000000..ae970d4 --- /dev/null +++ b/scripts/rqtl_wrapper.R @@ -0,0 +1,204 @@ +library(optparse) +library(qtl) +library(stringi) +library(stringr) + +tmp_dir = Sys.getenv("TMPDIR") + +option_list = list( + make_option(c("-g", "--geno"), type="character", help=".geno file containing a dataset's genotypes"), + make_option(c("-p", "--pheno"), type="character", help="File containing two columns - sample names and values"), + make_option(c("-c", "--addcovar"), action="store_true", default=NULL, help="Use covariates (included as extra columns in the phenotype input file)"), + make_option(c("--model"), type="character", default="normal", help="Mapping Model - Normal or Non-Parametric"), + make_option(c("--method"), type="character", default="hk", help="Mapping Method - hk (Haley Knott), ehk (Extended Haley Knott), mr (Marker Regression), em (Expectation-Maximization), imp (Imputation)"), + make_option(c("-i", "--interval"), action="store_true", default=NULL, help="Use interval mapping"), + make_option(c("--nperm"), type="integer", default=0, help="Number of permutations"), + make_option(c("-s", "--scale"), type="character", default="mb", help="Mapping scale - Megabases (Mb) or Centimorgans (cM)"), + make_option(c("--control"), type="character", default=NULL, help="Name of marker (contained in genotype file) to be used as a control"), + make_option(c("-o", "--outdir"), type="character", default=file.path(tmp_dir, "output"), help="Directory in which to write result file"), + make_option(c("-f", "--filename"), type="character", default=NULL, help="Name to use for result file"), + make_option(c("-v", "--verbose"), action="store_true", default=NULL, help="Show extra information") +); + +opt_parser = OptionParser(option_list=option_list); +opt = parse_args(opt_parser); + +verbose_print <- function(...){ + if (!is.null(opt$verbose)) { + for(item in list(...)){ + cat(item) + } + cat("\n") + } +} + +if (is.null(opt$geno) || is.null(opt$pheno)){ + print_help(opt_parser) + stop("Both a genotype and phenotype file must be provided.", call.=FALSE) +} + +geno_file = opt$geno +pheno_file = opt$pheno + +# Generate randomized filename for cross object +cross_file = file.path(tmp_dir, "cross", paste(stri_rand_strings(1, 8), ".cross", sep = "")) + +trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } + +get_geno_code <- function(header, name = 'unk'){ + mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1) + return(trim(strsplit(header[mat],':')[[1]][2])) +} + +geno_to_csvr <- function(genotypes, trait_names, trait_vals, out, sex = NULL, + mapping_scale = "Mb", verbose = FALSE){ + # Assume a geno header is not longer than 40 lines + header = readLines(genotypes, 40) + + # Major hack to skip the geno headers + toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1) - 1 + + type <- get_geno_code(header, 'type') + + # Get the genotype codes + if(type == '4-way'){ + genocodes <- NULL + } else { + genocodes <- c(get_geno_code(header, 'mat'), get_geno_code(header, 'het'), + get_geno_code(header, 'pat')) + } + genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, + na.strings=get_geno_code(header,'unk'), + colClasses='character', comment.char = '#') + + verbose_print('Genodata:', toskip, " ", dim(genodata), genocodes, '\n') + + # If there isn't a sex phenotype, treat all as males + if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) + + cross_items = list() + + # Add trait and covar phenotypes + for (i in 1:length(trait_names)){ + cross_items[[i]] <- c(trait_names[i], '', '', unlist(trait_vals[[i]])) + } + + # Sex phenotype for the mice + cross_items[[length(trait_names) + 1]] <- c('sex', '', '', sex) + # Genotypes + cross_items[[length(trait_names) + 2]] <- cbind(genodata[,c('Locus','Chr', mapping_scale)], + genodata[, 5:ncol(genodata)]) + + out_csvr <- do.call(rbind, cross_items) + + # Save it to a file + write.table(out_csvr, file=out, row.names=FALSE, col.names=FALSE, quote=FALSE, sep=',') + + # Load the created cross file using R/qtl read.cross + if (type == '4-way') { + verbose_print('Loading in as 4-WAY\n') + cross = read.cross(file=out, 'csvr', genotypes=NULL, crosstype="4way") + } else if(type == 'f2') { + verbose_print('Loading in as F2\n') + cross = read.cross(file=out, 'csvr', genotypes=genocodes, crosstype="f2") + } else { + verbose_print('Loading in as normal\n') + cross = read.cross(file=out, 'csvr', genotypes=genocodes) + } + if (type == 'riset') { + # If its a RIL, convert to a RIL in R/qtl + verbose_print('Converting to RISELF\n') + cross <- convert2riself(cross) + } + + return(cross) +} + +create_marker_covars <- function(the_cross, control_marker){ + #' Given a string of one or more marker names (comma separated), fetch + #' the markers' values from the genotypes and return them as vectors/a vector + #' of values + + # In case spaces are added in the string of marker names + covariate_names <- strsplit(str_replace(control_marker, " ", ""), ",") + + genotypes <- pull.geno(the_cross) + covariates_in_geno <- which(covariate_names %in% colnames(genotypes)) + covariate_names <- covariate_names[covariates_in_geno] + marker_covars <- genotypes[, unlist(covariate_names)] + + return(marker_covars) +} + +# Get phenotype vector from input file +df <- read.table(pheno_file, na.strings = "x", header=TRUE, check.names=FALSE) +sample_names <- df$Sample +trait_names <- colnames(df)[2:length(colnames(df))] + +trait_vals <- vector(mode = "list", length = length(trait_names)) +for (i in 1:length(trait_names)) { + this_trait <- trait_names[i] + this_vals <- df[this_trait] + trait_vals[[i]] <- this_vals +} + +# Since there will always only be one non-covar phenotype, its name will be in the first column +pheno_name = unlist(trait_names)[1] + +verbose_print('Generating cross object\n') +cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file) + +# Calculate genotype probabilities +if (!is.null(opt$interval)) { + verbose_print('Calculating genotype probabilities with interval mapping\n') + cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") +} else { + verbose_print('Calculating genotype probabilities\n') + cross_object <- calc.genoprob(cross_object) +} + +# Pull covariates out of cross object, if they exist +covars = vector() +if (!is.null(opt$addcovar)) { + covar_names = trait_names[2:length(trait_names)] + covars <- pull.pheno(cross_object, covar_names) +} + +# If a marker name is supplied as covariate, get its vector of values and add them as a covariate +if (!is.null(opt$control)) { + marker_covars = create_marker_covars(cross_object, opt$control) + covars <- cbind(covars, marker_covars) +} + +# Calculate permutations +if (opt$nperm > 0) { + if (!is.null(opt$filename)){ + perm_out_file = file.path(opt$outdir, paste("PERM_", opt$filename, sep = "")) + } else { + perm_out_file = file.path(opt$outdir, paste(pheno_name, "_PERM_", stri_rand_strings(1, 8), ".csv", sep = "")) + } + + if (!is.null(opt$addcovar) || !is.null(opt$control)){ + verbose_print('Running permutations with cofactors\n') + perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method) + } else { + verbose_print('Running permutations\n') + perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method) + } + write.csv(perm_results, perm_out_file) +} + +if (!is.null(opt$filename)){ + out_file = file.path(opt$outdir, opt$filename) +} else { + out_file = file.path(opt$outdir, paste(pheno_name, "_", stri_rand_strings(1, 8), ".csv", sep = "")) +} + +if (!is.null(opt$addcovar) || !is.null(opt$control)){ + verbose_print('Running scanone with cofactors\n') + qtl_results = scanone(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method) +} else { + verbose_print('Running scanone\n') + qtl_results = scanone(cross_object, pheno.col=1, model=opt$model, method=opt$method) +} +write.csv(qtl_results, out_file) \ No newline at end of file -- cgit v1.2.3 From 9b8fb0f63114119e58663a3e7fee352ce7375fb4 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 26 May 2021 15:09:00 +0000 Subject: Added option for enabling permutation strata Fixed issue where covariates with numerical names were being ignored Fixed issue where the hash filename wasn't being used for the permutation output --- scripts/rqtl_wrapper.R | 51 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index ae970d4..f7e0406 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -13,6 +13,7 @@ option_list = list( make_option(c("--method"), type="character", default="hk", help="Mapping Method - hk (Haley Knott), ehk (Extended Haley Knott), mr (Marker Regression), em (Expectation-Maximization), imp (Imputation)"), make_option(c("-i", "--interval"), action="store_true", default=NULL, help="Use interval mapping"), make_option(c("--nperm"), type="integer", default=0, help="Number of permutations"), + make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata (stored as final column/vector in phenotype input file)"), make_option(c("-s", "--scale"), type="character", default="mb", help="Mapping scale - Megabases (Mb) or Centimorgans (cM)"), make_option(c("--control"), type="character", default=NULL, help="Name of marker (contained in genotype file) to be used as a control"), make_option(c("-o", "--outdir"), type="character", default=file.path(tmp_dir, "output"), help="Directory in which to write result file"), @@ -135,15 +136,17 @@ df <- read.table(pheno_file, na.strings = "x", header=TRUE, check.names=FALSE) sample_names <- df$Sample trait_names <- colnames(df)[2:length(colnames(df))] +# Since there will always only be one non-covar phenotype, its name will be in the first column +pheno_name = unlist(trait_names)[1] + trait_vals <- vector(mode = "list", length = length(trait_names)) for (i in 1:length(trait_names)) { this_trait <- trait_names[i] this_vals <- df[this_trait] trait_vals[[i]] <- this_vals -} -# Since there will always only be one non-covar phenotype, its name will be in the first column -pheno_name = unlist(trait_names)[1] + trait_names[i] = paste("T_", this_trait, sep = "") +} verbose_print('Generating cross object\n') cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file) @@ -158,12 +161,24 @@ if (!is.null(opt$interval)) { } # Pull covariates out of cross object, if they exist -covars = vector() +covars = vector(mode = "list", length = length(trait_names) - 1) if (!is.null(opt$addcovar)) { - covar_names = trait_names[2:length(trait_names)] + #If perm strata are being used, it'll be included as the final column in the phenotype file + if (!is.null(opt$pstrata)) { + covar_names = trait_names[3:length(trait_names) - 1] + } else { + covar_names = trait_names[3:length(trait_names)] + } covars <- pull.pheno(cross_object, covar_names) } +# Pull permutation strata out of cross object, if it is being used +perm_strata = vector() +if (!is.null(opt$pstrata)) { + strata_col = trait_names[length(trait_names)] + perm_strata <- pull.pheno(cross_object, strata_col) +} + # If a marker name is supplied as covariate, get its vector of values and add them as a covariate if (!is.null(opt$control)) { marker_covars = create_marker_covars(cross_object, opt$control) @@ -173,17 +188,27 @@ if (!is.null(opt$control)) { # Calculate permutations if (opt$nperm > 0) { if (!is.null(opt$filename)){ - perm_out_file = file.path(opt$outdir, paste("PERM_", opt$filename, sep = "")) + perm_out_file = file.path(opt$outdir, paste("PERM_", opt$filename, sep = "" )) } else { - perm_out_file = file.path(opt$outdir, paste(pheno_name, "_PERM_", stri_rand_strings(1, 8), ".csv", sep = "")) + perm_out_file = file.path(opt$outdir, paste(pheno_name, "_PERM_", stri_rand_strings(1, 8), sep = "")) } if (!is.null(opt$addcovar) || !is.null(opt$control)){ - verbose_print('Running permutations with cofactors\n') - perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method) + if (!is.null(opt$pstrata)) { + verbose_print('Running permutations with cofactors and strata\n') + perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) + } else { + verbose_print('Running permutations with cofactors\n') + perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method) + } } else { - verbose_print('Running permutations\n') - perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method) + if (!is.null(opt$pstrata)) { + verbose_print('Running permutations with strata\n') + perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) + } else { + verbose_print('Running permutations\n') + perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method) + } } write.csv(perm_results, perm_out_file) } @@ -191,7 +216,7 @@ if (opt$nperm > 0) { if (!is.null(opt$filename)){ out_file = file.path(opt$outdir, opt$filename) } else { - out_file = file.path(opt$outdir, paste(pheno_name, "_", stri_rand_strings(1, 8), ".csv", sep = "")) + out_file = file.path(opt$outdir, paste(pheno_name, "_", stri_rand_strings(1, 8), sep = "")) } if (!is.null(opt$addcovar) || !is.null(opt$control)){ @@ -201,4 +226,4 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){ verbose_print('Running scanone\n') qtl_results = scanone(cross_object, pheno.col=1, model=opt$model, method=opt$method) } -write.csv(qtl_results, out_file) \ No newline at end of file +write.csv(qtl_results, out_file) -- cgit v1.2.3 From 84790de8bcc51c00a92b71878088345cd58ccc51 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 28 May 2021 17:44:07 +0000 Subject: Fixed issue where all bool kwargs were always being passed to generate_rqtl_cmd and also made code check if output file already exists (so caching works) --- gn3/api/rqtl.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 0194b6f..38f4c1e 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -26,7 +26,7 @@ run the rqtl_wrapper script and return the results as JSON # Split kwargs by those with values and boolean ones that just convert to True/False kwargs = ["model", "method", "nperm", "scale", "control_marker"] - boolean_kwargs = ["addcovar", "interval"] + boolean_kwargs = ["addcovar", "interval", "pstrata"] all_kwargs = kwargs + boolean_kwargs rqtl_kwargs = {"geno": genofile, "pheno": phenofile} @@ -41,12 +41,15 @@ run the rqtl_wrapper script and return the results as JSON rqtl_cmd = generate_rqtl_cmd( rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"), rqtl_wrapper_kwargs=rqtl_kwargs, - rqtl_wrapper_bool_kwargs=boolean_kwargs + rqtl_wrapper_bool_kwargs=rqtl_bool_kwargs ) - os.system(rqtl_cmd.get('rqtl_cmd')) - rqtl_output = {} + if not os.path.isfile(os.path.join(current_app.config.get("TMPDIR"), "output", rqtl_cmd.get('output_file'))): + os.system(rqtl_cmd.get('rqtl_cmd')) + + rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) if int(rqtl_kwargs['nperm']) > 0: rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = process_perm_output(rqtl_cmd.get('output_file')) -- cgit v1.2.3 From c5488d5c4556e84397a36c3dbc27dab377749bb3 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 17 May 2021 14:33:10 +0300 Subject: db: phenotypes: Add a way of updating the Phenotype table * gn3/db/phenotypes.py (Phenotype): New dataclass. (update_phenotype): New function. [phenotype_column_mapping]: New variable. --- gn3/db/phenotypes.py | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 gn3/db/phenotypes.py diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py new file mode 100644 index 0000000..46a54bd --- /dev/null +++ b/gn3/db/phenotypes.py @@ -0,0 +1,61 @@ +"""This contains all the necessary functions that access the phenotypes from +the db""" +from dataclasses import dataclass, asdict, astuple + +from typing import Any, Optional +from MySQLdb import escape_string + + +# pylint: disable=[R0902] +@dataclass(frozen=True) +class Phenotype: + """Data Type that represents a Phenotype""" + id_: Optional[int] = None + pre_pub_description: Optional[str] = None + post_pub_description: Optional[str] = None + original_description: Optional[str] = None + units: Optional[str] = None + pre_pub_abbrevition: Optional[str] = None + post_pub_abbreviation: Optional[str] = None + lab_code: Optional[str] = None + submitter: Optional[str] = None + owner: Optional[str] = None + authorized_users: Optional[str] = None + + +# Mapping from the Phenotype dataclass to the actual column names in the +# database +phenotype_column_mapping = { + "id_": "id", + "pre_pub_description": "Pre_publication_description", + "post_pub_description": "Post_publication_description", + "original_description": "Original_description", + "units": "Units", + "pre_pub_abbrevition": "Pre_publication_abbreviation", + "post_pub_abbreviation": "Post_publication_abbreviation", + "lab_code": "Lab_code", + "submitter": "Submitter", + "owner": "Owner", + "authorized_users": "Authorized_Users", +} + + +def update_phenotype(conn: Any, + data: Phenotype, + where: Phenotype) -> Optional[int]: + """Update phenotype metadata with DATA that depends on the WHERE clause""" + if not any(astuple(data) + astuple(where)): + return None + sql = "UPDATE Phenotype SET " + sql += ", ".join(f"{phenotype_column_mapping.get(k)} " + f"= '{escape_string(str(v)).decode('utf-8')}'" for + k, v in asdict(data).items() + if v is not None and k in phenotype_column_mapping) + sql += " WHERE " + sql += "AND ".join(f"{phenotype_column_mapping.get(k)} = " + f"'{escape_string(str(v)).decode('utf-8')}'" for + k, v in asdict(where).items() + if v is not None and k in phenotype_column_mapping) + with conn.cursor() as cursor: + cursor.execute(sql) + return cursor.rowcount -- cgit v1.2.3 From c73c4d4edf88d2af5636962f1e4710be17516ea1 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 17 May 2021 14:34:25 +0300 Subject: tests: test_phenotypes: New test cases for loading phenotypes --- tests/unit/db/test_phenotypes.py | 41 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 tests/unit/db/test_phenotypes.py diff --git a/tests/unit/db/test_phenotypes.py b/tests/unit/db/test_phenotypes.py new file mode 100644 index 0000000..8b810fe --- /dev/null +++ b/tests/unit/db/test_phenotypes.py @@ -0,0 +1,41 @@ +"""Tests for db/phenotypes.py""" +from unittest import TestCase +from unittest import mock + +from gn3.db.phenotypes import Phenotype +from gn3.db.phenotypes import update_phenotype + + +class TestPhenotypes(TestCase): + """Test cases for fetching chromosomes""" + def test_update_phenotype_with_no_data(self): + """ + Test that a phenotype is updated correctly if an empty Phenotype dataclass + is provided + """ + db_mock = mock.MagicMock() + self.assertEqual(update_phenotype( + db_mock, data=Phenotype(), where=Phenotype()), None) + + def test_update_phenotype_with_data(self): + """ + Test that a phenotype is updated correctly if some + data is provided + """ + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + type(cursor).rowcount = 1 + self.assertEqual(update_phenotype( + db_mock, data=Phenotype( + pre_pub_description="Test Pre Pub", + submitter="Rob", + post_pub_description="Test Post Pub"), + where=Phenotype(id_=1)), 1) + cursor.execute.assert_called_once_with( + "UPDATE Phenotype SET " + "Pre_publication_description = " + "'Test Pre Pub', " + "Post_publication_description = " + "'Test Post Pub', Submitter = 'Rob' " + "WHERE id = '1'" + ) -- cgit v1.2.3 From 18826b348149f1233bcf3e2e4cda5da5297ecb6b Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 21:32:24 +0300 Subject: db: phenotype: Make "pylint: disable=[R0902]" global for file --- gn3/db/phenotypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 46a54bd..355c2e2 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -1,3 +1,4 @@ +# pylint: disable=[R0902] """This contains all the necessary functions that access the phenotypes from the db""" from dataclasses import dataclass, asdict, astuple @@ -6,7 +7,6 @@ from typing import Any, Optional from MySQLdb import escape_string -# pylint: disable=[R0902] @dataclass(frozen=True) class Phenotype: """Data Type that represents a Phenotype""" -- cgit v1.2.3 From 58ebaba0a9249e6fa39d8059b32c17623113a205 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 21:33:00 +0300 Subject: db: phenotypes: Add dataclass to represent PublishXRef --- gn3/db/phenotypes.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 355c2e2..11d67db 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -23,6 +23,22 @@ class Phenotype: authorized_users: Optional[str] = None +@dataclass(frozen=True) +class PublishXRef: + """Data Type that represents the table PublishXRef""" + id_: Optional[int] = None + inbred_set_id: Optional[str] = None + phenotype_id: Optional[int] = None + publication_id: Optional[str] = None + data_id: Optional[int] = None + mean: Optional[float] = None + locus: Optional[str] = None + lrs: Optional[float] = None + additive: Optional[float] = None + sequence: Optional[int] = None + comments: Optional[str] = None + + # Mapping from the Phenotype dataclass to the actual column names in the # database phenotype_column_mapping = { -- cgit v1.2.3 From dfafb7c11cb4f57c7e1a61de2ce2f1b62e1018be Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 21:44:34 +0300 Subject: db: phenotypes: Add phenotype table mapping --- gn3/db/phenotypes.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 11d67db..645c0af 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -23,6 +23,23 @@ class Phenotype: authorized_users: Optional[str] = None +# Mapping from the Phenotype dataclass to the actual column names in the +# database +phenotype_column_mapping = { + "id_": "id", + "pre_pub_description": "Pre_publication_description", + "post_pub_description": "Post_publication_description", + "original_description": "Original_description", + "units": "Units", + "pre_pub_abbrevition": "Pre_publication_abbreviation", + "post_pub_abbreviation": "Post_publication_abbreviation", + "lab_code": "Lab_code", + "submitter": "Submitter", + "owner": "Owner", + "authorized_users": "Authorized_Users", +} + + @dataclass(frozen=True) class PublishXRef: """Data Type that represents the table PublishXRef""" -- cgit v1.2.3 From a1f48d95f17e939512fa9c276caf39d9f75878f9 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 21:44:58 +0300 Subject: db: phenotypes: Put mapping def after dataclass --- gn3/db/phenotypes.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 645c0af..be5fb7b 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -56,20 +56,20 @@ class PublishXRef: comments: Optional[str] = None -# Mapping from the Phenotype dataclass to the actual column names in the +# Mapping from the PublishXRef dataclass to the actual column names in the # database -phenotype_column_mapping = { - "id_": "id", - "pre_pub_description": "Pre_publication_description", - "post_pub_description": "Post_publication_description", - "original_description": "Original_description", - "units": "Units", - "pre_pub_abbrevition": "Pre_publication_abbreviation", - "post_pub_abbreviation": "Post_publication_abbreviation", - "lab_code": "Lab_code", - "submitter": "Submitter", - "owner": "Owner", - "authorized_users": "Authorized_Users", +publish_x_ref_mapping = { + "id_": "Id", + "inbred_set_id": "InbredSetId", + "phenotype_id": "PhenotypeId", + "publication_id": "PublicationId", + "data_id": "DataId", + "mean": "mean", + "locus": "locus", + "lrs": "lrs", + "additive": "additive", + "sequence": "sequence", + "comments": "comments", } -- cgit v1.2.3 From 5c48d14d95b46caa10bcdbd80aec1ae04ec7f225 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 22:00:02 +0300 Subject: db: phenotypes: Add type for Dataclass See: https://www.py4u.net/discuss/188952 --- gn3/db/phenotypes.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index be5fb7b..e97322a 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -3,9 +3,16 @@ the db""" from dataclasses import dataclass, asdict, astuple -from typing import Any, Optional +from typing import Any, Dict, Optional from MySQLdb import escape_string +from typing_extensions import Protocol + + +class Dataclass(Protocol): + """Type Definition for a Dataclass""" + __dataclass_fields__: Dict + @dataclass(frozen=True) class Phenotype: -- cgit v1.2.3 From d7571e3c30aa7b1f312fcf975e7336b9f2912709 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 22:00:30 +0300 Subject: db: phenotypes: Map a table to it's relevant dict mapping --- gn3/db/phenotypes.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index e97322a..f6ca944 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -79,6 +79,11 @@ publish_x_ref_mapping = { "comments": "comments", } +TABLEMAP = { + "Phenotype": phenotype_column_mapping, + "PublishXRef": publish_x_ref_mapping, +} + def update_phenotype(conn: Any, data: Phenotype, -- cgit v1.2.3 From b30a029cc3c0771a5f5ecaeed663cca24c88f534 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 22:09:46 +0300 Subject: db: phenotypes: Generalise the update function * gn3/db/phenotypes.py (update_phenotype): Delete it. (update): New, more general function. --- gn3/db/phenotypes.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index f6ca944..fdb148b 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -1,4 +1,4 @@ -# pylint: disable=[R0902] +# pylint: disable=[R0902, R0903] """This contains all the necessary functions that access the phenotypes from the db""" from dataclasses import dataclass, asdict, astuple @@ -85,22 +85,23 @@ TABLEMAP = { } -def update_phenotype(conn: Any, - data: Phenotype, - where: Phenotype) -> Optional[int]: - """Update phenotype metadata with DATA that depends on the WHERE clause""" +def update(conn: Any, + table: str, + data: Dataclass, + where: Dataclass) -> Optional[int]: + """Run an UPDATE on a table""" if not any(astuple(data) + astuple(where)): return None - sql = "UPDATE Phenotype SET " - sql += ", ".join(f"{phenotype_column_mapping.get(k)} " + sql = f"UPDATE {table} SET " + sql += ", ".join(f"{TABLEMAP[table].get(k)} " f"= '{escape_string(str(v)).decode('utf-8')}'" for k, v in asdict(data).items() - if v is not None and k in phenotype_column_mapping) + if v is not None and k in TABLEMAP[table]) sql += " WHERE " - sql += "AND ".join(f"{phenotype_column_mapping.get(k)} = " + sql += "AND ".join(f"{TABLEMAP[table].get(k)} = " f"'{escape_string(str(v)).decode('utf-8')}'" for k, v in asdict(where).items() - if v is not None and k in phenotype_column_mapping) + if v is not None and k in TABLEMAP[table]) with conn.cursor() as cursor: cursor.execute(sql) return cursor.rowcount -- cgit v1.2.3 From 856d4a72ace3584b731a7c45470e3df2704be02f Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 22:12:45 +0300 Subject: db: phenotypes: Update failing tests --- tests/unit/db/test_phenotypes.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/unit/db/test_phenotypes.py b/tests/unit/db/test_phenotypes.py index 8b810fe..6b394d7 100644 --- a/tests/unit/db/test_phenotypes.py +++ b/tests/unit/db/test_phenotypes.py @@ -3,19 +3,20 @@ from unittest import TestCase from unittest import mock from gn3.db.phenotypes import Phenotype -from gn3.db.phenotypes import update_phenotype +from gn3.db.phenotypes import update class TestPhenotypes(TestCase): """Test cases for fetching chromosomes""" def test_update_phenotype_with_no_data(self): - """ - Test that a phenotype is updated correctly if an empty Phenotype dataclass + """Test that a phenotype is updated correctly if an empty Phenotype dataclass is provided + """ db_mock = mock.MagicMock() - self.assertEqual(update_phenotype( - db_mock, data=Phenotype(), where=Phenotype()), None) + self.assertEqual(update( + conn=db_mock, table="Phenotype", + data=Phenotype(), where=Phenotype()), None) def test_update_phenotype_with_data(self): """ @@ -25,8 +26,9 @@ class TestPhenotypes(TestCase): db_mock = mock.MagicMock() with db_mock.cursor() as cursor: type(cursor).rowcount = 1 - self.assertEqual(update_phenotype( - db_mock, data=Phenotype( + self.assertEqual(update( + conn=db_mock, table="Phenotype", + data=Phenotype( pre_pub_description="Test Pre Pub", submitter="Rob", post_pub_description="Test Post Pub"), -- cgit v1.2.3 From 1965fef7170a5de14988bc672424be2be6884ec2 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 19 May 2021 22:23:56 +0300 Subject: db: phenotypes: Add Publication dataclass and mapping --- gn3/db/phenotypes.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index fdb148b..92d8e84 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -79,6 +79,36 @@ publish_x_ref_mapping = { "comments": "comments", } + +@dataclass(frozen=True) +class Publication: + """Data Type that represents the table Publication""" + id_: Optional[int] = None + pubmed_id: Optional[int] = None + abstract: Optional[str] = None + authors: Optional[str] = None + title: Optional[str] = None + journal: Optional[str] = None + volume: Optional[str] = None + pages: Optional[str] = None + month: Optional[str] = None + year: Optional[str] = None + + +publication_mapping = { + "id_": "id", + "PubMed_ID": "pubmed_id", + "Abstract": "abstract", + "Authors": "authors", + "Title": "title", + "Journal": "journal", + "Volume": "volume", + "Pages": "pages", + "Month": "month", + "Year": "year", +} + + TABLEMAP = { "Phenotype": phenotype_column_mapping, "PublishXRef": publish_x_ref_mapping, -- cgit v1.2.3 From c0b617858f36de8e1c15172b36f2a17742c1658c Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 20 May 2021 12:46:37 +0300 Subject: db: phenotypes: Rename phenotype_column_mapping --- gn3/db/phenotypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 92d8e84..9e40692 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -32,7 +32,7 @@ class Phenotype: # Mapping from the Phenotype dataclass to the actual column names in the # database -phenotype_column_mapping = { +phenotype_mapping = { "id_": "id", "pre_pub_description": "Pre_publication_description", "post_pub_description": "Post_publication_description", @@ -110,7 +110,7 @@ publication_mapping = { TABLEMAP = { - "Phenotype": phenotype_column_mapping, + "Phenotype": phenotype_mapping, "PublishXRef": publish_x_ref_mapping, } -- cgit v1.2.3 From c237b50a13299dc5b4e8bfa3d719f2668f699b6c Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 20 May 2021 18:51:48 +0300 Subject: db: phenotypes: Add Publication table mapping --- gn3/db/phenotypes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 9e40692..514037d 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -112,6 +112,8 @@ publication_mapping = { TABLEMAP = { "Phenotype": phenotype_mapping, "PublishXRef": publish_x_ref_mapping, + "Publication": publication_mapping, +} } -- cgit v1.2.3 From 1b6ed578ac251daf19ca2299870a0e7e9a3eb6cc Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 20 May 2021 21:25:10 +0300 Subject: db: phenotypes: Add a dataclass map Maps a string to it's dataclass. --- gn3/db/phenotypes.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 514037d..c3ad683 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -114,6 +114,11 @@ TABLEMAP = { "PublishXRef": publish_x_ref_mapping, "Publication": publication_mapping, } + +DATACLASSMAP = { + "Phenotype": Phenotype, + "PublishXRef": PublishXRef, + "Publication": Publication, } -- cgit v1.2.3 From 2face963fcba5b3231e9cc26a38f1370b00eb7b0 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 20 May 2021 21:25:48 +0300 Subject: db: phenotypes: Add function for fetching a single result * gn3/db/phenotypes.py (fetchone): New function. --- gn3/db/phenotypes.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index c3ad683..96cb275 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -142,3 +142,20 @@ def update(conn: Any, with conn.cursor() as cursor: cursor.execute(sql) return cursor.rowcount + + +def fetchone(conn: Any, + table: str, + where: Dataclass) -> Optional[Dataclass]: + """Run a SELECT on a table. Returns only one result!""" + if not any(astuple(where)): + return None + sql = f"SELECT * FROM {table} " + sql += "WHERE " + sql += "AND ".join(f"{TABLEMAP[table].get(k)} = " + f"'{escape_string(str(v)).decode('utf-8')}'" for + k, v in asdict(where).items() + if v is not None and k in TABLEMAP[table]) + with conn.cursor() as cursor: + cursor.execute(sql) + return DATACLASSMAP[table](*cursor.fetchone()) -- cgit v1.2.3 From 786d48ede007f0134081495f3f63be3a33b8f71e Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 20 May 2021 21:26:34 +0300 Subject: tests: test_phenotype: Add function that tests "fetchone" --- tests/unit/db/test_phenotypes.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/unit/db/test_phenotypes.py b/tests/unit/db/test_phenotypes.py index 6b394d7..9fed524 100644 --- a/tests/unit/db/test_phenotypes.py +++ b/tests/unit/db/test_phenotypes.py @@ -2,6 +2,7 @@ from unittest import TestCase from unittest import mock +from gn3.db.phenotypes import fetchone from gn3.db.phenotypes import Phenotype from gn3.db.phenotypes import update @@ -41,3 +42,24 @@ class TestPhenotypes(TestCase): "'Test Post Pub', Submitter = 'Rob' " "WHERE id = '1'" ) + + def test_fetch_phenotype(self): + """Test that a single phenotype is fetched properly + + """ + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + test_data = ( + 35, "Test pre-publication", "Test post-publication", + "Original description A", "cm^2", "pre-abbrev", + "post-abbrev", "LAB001", "R. W.", "R. W.", "R. W." + ) + cursor.fetchone.return_value = test_data + phenotype = fetchone(db_mock, + "Phenotype", + where=Phenotype(id_=35)) + self.assertEqual(phenotype.id_, 35) + self.assertEqual(phenotype.pre_pub_description, + "Test pre-publication") + cursor.execute.assert_called_once_with( + "SELECT * FROM Phenotype WHERE id = '35'") -- cgit v1.2.3 From 12e8ca8eeefa2daea25f7dbc2fc9c99310766d1b Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 20 May 2021 22:41:54 +0300 Subject: db: phenotypes: Fix typo --- gn3/db/phenotypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 96cb275..ddcd11e 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -22,7 +22,7 @@ class Phenotype: post_pub_description: Optional[str] = None original_description: Optional[str] = None units: Optional[str] = None - pre_pub_abbrevition: Optional[str] = None + pre_pub_abbreviation: Optional[str] = None post_pub_abbreviation: Optional[str] = None lab_code: Optional[str] = None submitter: Optional[str] = None @@ -38,7 +38,7 @@ phenotype_mapping = { "post_pub_description": "Post_publication_description", "original_description": "Original_description", "units": "Units", - "pre_pub_abbrevition": "Pre_publication_abbreviation", + "pre_pub_abbreviation": "Pre_publication_abbreviation", "post_pub_abbreviation": "Post_publication_abbreviation", "lab_code": "Lab_code", "submitter": "Submitter", -- cgit v1.2.3 From 57a1194a12fe1a8565d8abd6b833da57f12898b4 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 25 May 2021 14:09:47 +0300 Subject: db: phenotypes: Fix publication_mapping --- gn3/db/phenotypes.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index ddcd11e..ee523ad 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -97,15 +97,15 @@ class Publication: publication_mapping = { "id_": "id", - "PubMed_ID": "pubmed_id", - "Abstract": "abstract", - "Authors": "authors", - "Title": "title", - "Journal": "journal", - "Volume": "volume", - "Pages": "pages", - "Month": "month", - "Year": "year", + "pubmed_id": "PubMed_ID", + "abstract": "Abstract", + "authors": "Authors", + "title": "Title", + "journal": "Journal", + "volume": "Volume", + "pages": "Pages", + "month": "Month", + "year": "Year", } -- cgit v1.2.3 From 93ab68fe650eed0bb53d77225f47f72e527e48c4 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 26 May 2021 12:05:08 +0300 Subject: Move the methods, "update" and "fetch", to gn3.db --- gn3/db/__init__.py | 70 ++++++++++++++++++++++++++++++++++++++++ gn3/db/phenotypes.py | 64 ++---------------------------------- tests/unit/db/test_phenotypes.py | 4 +-- 3 files changed, 74 insertions(+), 64 deletions(-) diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index e69de29..fae4d29 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -0,0 +1,70 @@ +# pylint: disable=[R0902, R0903] +"""Module that exposes common db operations""" +from typing import Optional, Dict, Any +from dataclasses import dataclass, asdict, astuple +from typing_extensions import Protocol +from MySQLdb import escape_string + +from gn3.db.phenotypes import Phenotype +from gn3.db.phenotypes import PublishXRef +from gn3.db.phenotypes import Publication + +from gn3.db.phenotypes import phenotype_mapping +from gn3.db.phenotypes import publish_x_ref_mapping +from gn3.db.phenotypes import publication_mapping + +TABLEMAP = { + "Phenotype": phenotype_mapping, + "PublishXRef": publish_x_ref_mapping, + "Publication": publication_mapping, +} + +DATACLASSMAP = { + "Phenotype": Phenotype, + "PublishXRef": PublishXRef, + "Publication": Publication, +} + + +class Dataclass(Protocol): + """Type Definition for a Dataclass""" + __dataclass_fields__: Dict + + +def update(conn: Any, + table: str, + data: Dataclass, + where: Dataclass) -> Optional[int]: + """Run an UPDATE on a table""" + if not any(astuple(data) + astuple(where)): + return None + sql = f"UPDATE {table} SET " + sql += ", ".join(f"{TABLEMAP[table].get(k)} " + f"= '{escape_string(str(v)).decode('utf-8')}'" for + k, v in asdict(data).items() + if v is not None and k in TABLEMAP[table]) + sql += " WHERE " + sql += "AND ".join(f"{TABLEMAP[table].get(k)} = " + f"'{escape_string(str(v)).decode('utf-8')}'" for + k, v in asdict(where).items() + if v is not None and k in TABLEMAP[table]) + with conn.cursor() as cursor: + cursor.execute(sql) + return cursor.rowcount + + +def fetchone(conn: Any, + table: str, + where: Dataclass) -> Optional[Dataclass]: + """Run a SELECT on a table. Returns only one result!""" + if not any(astuple(where)): + return None + sql = f"SELECT * FROM {table} " + sql += "WHERE " + sql += "AND ".join(f"{TABLEMAP[table].get(k)} = " + f"'{escape_string(str(v)).decode('utf-8')}'" for + k, v in asdict(where).items() + if v is not None and k in TABLEMAP[table]) + with conn.cursor() as cursor: + cursor.execute(sql) + return DATACLASSMAP[table](*cursor.fetchone()) diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index ee523ad..2b93c85 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -1,17 +1,9 @@ # pylint: disable=[R0902, R0903] """This contains all the necessary functions that access the phenotypes from the db""" -from dataclasses import dataclass, asdict, astuple +from dataclasses import dataclass -from typing import Any, Dict, Optional -from MySQLdb import escape_string - -from typing_extensions import Protocol - - -class Dataclass(Protocol): - """Type Definition for a Dataclass""" - __dataclass_fields__: Dict +from typing import Optional @dataclass(frozen=True) @@ -107,55 +99,3 @@ publication_mapping = { "month": "Month", "year": "Year", } - - -TABLEMAP = { - "Phenotype": phenotype_mapping, - "PublishXRef": publish_x_ref_mapping, - "Publication": publication_mapping, -} - -DATACLASSMAP = { - "Phenotype": Phenotype, - "PublishXRef": PublishXRef, - "Publication": Publication, -} - - -def update(conn: Any, - table: str, - data: Dataclass, - where: Dataclass) -> Optional[int]: - """Run an UPDATE on a table""" - if not any(astuple(data) + astuple(where)): - return None - sql = f"UPDATE {table} SET " - sql += ", ".join(f"{TABLEMAP[table].get(k)} " - f"= '{escape_string(str(v)).decode('utf-8')}'" for - k, v in asdict(data).items() - if v is not None and k in TABLEMAP[table]) - sql += " WHERE " - sql += "AND ".join(f"{TABLEMAP[table].get(k)} = " - f"'{escape_string(str(v)).decode('utf-8')}'" for - k, v in asdict(where).items() - if v is not None and k in TABLEMAP[table]) - with conn.cursor() as cursor: - cursor.execute(sql) - return cursor.rowcount - - -def fetchone(conn: Any, - table: str, - where: Dataclass) -> Optional[Dataclass]: - """Run a SELECT on a table. Returns only one result!""" - if not any(astuple(where)): - return None - sql = f"SELECT * FROM {table} " - sql += "WHERE " - sql += "AND ".join(f"{TABLEMAP[table].get(k)} = " - f"'{escape_string(str(v)).decode('utf-8')}'" for - k, v in asdict(where).items() - if v is not None and k in TABLEMAP[table]) - with conn.cursor() as cursor: - cursor.execute(sql) - return DATACLASSMAP[table](*cursor.fetchone()) diff --git a/tests/unit/db/test_phenotypes.py b/tests/unit/db/test_phenotypes.py index 9fed524..505714a 100644 --- a/tests/unit/db/test_phenotypes.py +++ b/tests/unit/db/test_phenotypes.py @@ -2,9 +2,9 @@ from unittest import TestCase from unittest import mock -from gn3.db.phenotypes import fetchone +from gn3.db import fetchone +from gn3.db import update from gn3.db.phenotypes import Phenotype -from gn3.db.phenotypes import update class TestPhenotypes(TestCase): -- cgit v1.2.3 From 07464f44f48895cc31ba2b088d6125e7777e1073 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Sun, 30 May 2021 13:26:15 +0300 Subject: fix index error (#16) --- gn3/computations/correlations.py | 2 +- tests/unit/computations/test_correlation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index 25dd26d..f0ce502 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -247,7 +247,7 @@ def fetch_lit_correlation_data( cursor.execute(query_formatter(query, *tuple(reversed(query_values)))) lit_corr_results = cursor.fetchone() - lit_results = (gene_id, lit_corr_results[1])\ + lit_results = (gene_id, lit_corr_results[0])\ if lit_corr_results else (gene_id, 0) return lit_results return (gene_id, 0) diff --git a/tests/unit/computations/test_correlation.py b/tests/unit/computations/test_correlation.py index d264738..5746adf 100644 --- a/tests/unit/computations/test_correlation.py +++ b/tests/unit/computations/test_correlation.py @@ -276,7 +276,7 @@ class TestCorrelation(TestCase): input trait mouse gene id and mouse gene id """ - expected_db_results = [("val", x*0.1) + expected_db_results = [[x*0.1] for x in range(1, 4)] conn = DataBase(expected_results=expected_db_results) expected_results = ("1", 0.1) -- cgit v1.2.3 From 515ac34950db419bd6440afd1393cf41310d1814 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 2 Jun 2021 07:38:53 +0300 Subject: gn3: db: Return None if data and where are empty --- gn3/db/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index fae4d29..1eb7b12 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -36,7 +36,7 @@ def update(conn: Any, data: Dataclass, where: Dataclass) -> Optional[int]: """Run an UPDATE on a table""" - if not any(astuple(data) + astuple(where)): + if not (any(astuple(data)) and any(astuple(where))): return None sql = f"UPDATE {table} SET " sql += ", ".join(f"{TABLEMAP[table].get(k)} " -- cgit v1.2.3 From ece41f5f971595c5d005c4beaa984c45471a6647 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 2 Jun 2021 07:54:35 +0300 Subject: Get the diff between 2 dicts and return that as a dict --- gn3/db/__init__.py | 16 ++++++++++++++++ tests/unit/db/test_phenotypes.py | 8 ++++++++ 2 files changed, 24 insertions(+) diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 1eb7b12..19135fc 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -68,3 +68,19 @@ def fetchone(conn: Any, with conn.cursor() as cursor: cursor.execute(sql) return DATACLASSMAP[table](*cursor.fetchone()) + + +def diff_from_dict(old: Dict, new: Dict) -> Dict: + """Construct a new dict with a specific structure that contains the difference +between the 2 dicts in the structure: + +diff_from_dict({"id": 1, "data": "a"}, {"id": 2, "data": "b"}) + +Should return: + +{"id": {"old": 1, "new": 2}, "data": {"old": "a", "new": "b"}} + """ + dict_ = {} + for key, value in old.items(): + dict_[key] = {"old": old[key], "new": new[key]} + return dict_ diff --git a/tests/unit/db/test_phenotypes.py b/tests/unit/db/test_phenotypes.py index 505714a..b53db23 100644 --- a/tests/unit/db/test_phenotypes.py +++ b/tests/unit/db/test_phenotypes.py @@ -4,6 +4,7 @@ from unittest import mock from gn3.db import fetchone from gn3.db import update +from gn3.db import diff_from_dict from gn3.db.phenotypes import Phenotype @@ -63,3 +64,10 @@ class TestPhenotypes(TestCase): "Test pre-publication") cursor.execute.assert_called_once_with( "SELECT * FROM Phenotype WHERE id = '35'") + + def test_diff_from_dict(self): + """Test that a correct diff is generated""" + self.assertEqual(diff_from_dict({"id": 1, "data": "a"}, + {"id": 2, "data": "b"}), + {"id": {"old": 1, "new": 2}, + "data": {"old": "a", "new": "b"}}) -- cgit v1.2.3 From a2ef9618dbba2d3f0416cbe8a527ed12070aa67e Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 2 Jun 2021 08:22:11 +0300 Subject: unit: test_phenotypes: Test for multiple "WHERE" clauses --- tests/unit/db/test_phenotypes.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/db/test_phenotypes.py b/tests/unit/db/test_phenotypes.py index b53db23..fdeca5e 100644 --- a/tests/unit/db/test_phenotypes.py +++ b/tests/unit/db/test_phenotypes.py @@ -34,14 +34,14 @@ class TestPhenotypes(TestCase): pre_pub_description="Test Pre Pub", submitter="Rob", post_pub_description="Test Post Pub"), - where=Phenotype(id_=1)), 1) + where=Phenotype(id_=1, owner="Rob")), 1) cursor.execute.assert_called_once_with( "UPDATE Phenotype SET " "Pre_publication_description = " "'Test Pre Pub', " "Post_publication_description = " "'Test Post Pub', Submitter = 'Rob' " - "WHERE id = '1'" + "WHERE id = '1' AND Owner = 'Rob'" ) def test_fetch_phenotype(self): @@ -58,12 +58,12 @@ class TestPhenotypes(TestCase): cursor.fetchone.return_value = test_data phenotype = fetchone(db_mock, "Phenotype", - where=Phenotype(id_=35)) + where=Phenotype(id_=35, owner="Rob")) self.assertEqual(phenotype.id_, 35) self.assertEqual(phenotype.pre_pub_description, "Test pre-publication") cursor.execute.assert_called_once_with( - "SELECT * FROM Phenotype WHERE id = '35'") + "SELECT * FROM Phenotype WHERE id = '35' AND Owner = 'Rob'") def test_diff_from_dict(self): """Test that a correct diff is generated""" -- cgit v1.2.3 From 9d46f943894e15b4a70c64ecba6a3b684863a81f Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 2 Jun 2021 08:23:59 +0300 Subject: gn3: db: Add spacing before around "AND" in sql clause --- gn3/db/__init__.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 19135fc..d89dbf4 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -44,10 +44,10 @@ def update(conn: Any, k, v in asdict(data).items() if v is not None and k in TABLEMAP[table]) sql += " WHERE " - sql += "AND ".join(f"{TABLEMAP[table].get(k)} = " - f"'{escape_string(str(v)).decode('utf-8')}'" for - k, v in asdict(where).items() - if v is not None and k in TABLEMAP[table]) + sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " + f"'{escape_string(str(v)).decode('utf-8')}'" for + k, v in asdict(where).items() + if v is not None and k in TABLEMAP[table]) with conn.cursor() as cursor: cursor.execute(sql) return cursor.rowcount @@ -61,10 +61,10 @@ def fetchone(conn: Any, return None sql = f"SELECT * FROM {table} " sql += "WHERE " - sql += "AND ".join(f"{TABLEMAP[table].get(k)} = " - f"'{escape_string(str(v)).decode('utf-8')}'" for - k, v in asdict(where).items() - if v is not None and k in TABLEMAP[table]) + sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " + f"'{escape_string(str(v)).decode('utf-8')}'" for + k, v in asdict(where).items() + if v is not None and k in TABLEMAP[table]) with conn.cursor() as cursor: cursor.execute(sql) return DATACLASSMAP[table](*cursor.fetchone()) -- cgit v1.2.3 From c96b29e63577f7189afd02df2ced26b150830341 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 3 Jun 2021 11:08:58 +0300 Subject: Add data structures for the table metadata_audit --- gn3/db/__init__.py | 5 +++++ gn3/db/metadata_audit.py | 26 ++++++++++++++++++++++++++ tests/unit/db/test_audit.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 gn3/db/metadata_audit.py create mode 100644 tests/unit/db/test_audit.py diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index d89dbf4..175a640 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -5,21 +5,26 @@ from dataclasses import dataclass, asdict, astuple from typing_extensions import Protocol from MySQLdb import escape_string +from gn3.db.metadata_audit import MetadataAudit from gn3.db.phenotypes import Phenotype from gn3.db.phenotypes import PublishXRef from gn3.db.phenotypes import Publication +from gn3.db.metadata_audit import metadata_audit_mapping from gn3.db.phenotypes import phenotype_mapping from gn3.db.phenotypes import publish_x_ref_mapping from gn3.db.phenotypes import publication_mapping + TABLEMAP = { + "metadata_audit": metadata_audit_mapping, "Phenotype": phenotype_mapping, "PublishXRef": publish_x_ref_mapping, "Publication": publication_mapping, } DATACLASSMAP = { + "MetadataAudit": MetadataAudit, "Phenotype": Phenotype, "PublishXRef": PublishXRef, "Publication": Publication, diff --git a/gn3/db/metadata_audit.py b/gn3/db/metadata_audit.py new file mode 100644 index 0000000..6e22b32 --- /dev/null +++ b/gn3/db/metadata_audit.py @@ -0,0 +1,26 @@ +# pylint: disable=[R0902, R0903] +"""This contains all the necessary functions that access the metadata_audit +table from the db + +""" +from dataclasses import dataclass +from typing import Optional + + +@dataclass(frozen=True) +class MetadataAudit: + """Data Type that represents a Phenotype""" + dataset_id: int + editor: str + json_data: str + time_stamp: Optional[str] = None + + +# Mapping from the MetadataAudit dataclass to the actual column names in the +# database +metadata_audit_mapping = { + "dataset_id": "dataset_id", + "editor": "editor", + "json_data": "json_data", + "time_stamp": "time_stamp", +} diff --git a/tests/unit/db/test_audit.py b/tests/unit/db/test_audit.py new file mode 100644 index 0000000..22787bb --- /dev/null +++ b/tests/unit/db/test_audit.py @@ -0,0 +1,28 @@ +"""Tests for db/phenotypes.py""" +import json +from unittest import TestCase +from unittest import mock + +from gn3.db import insert +from gn3.db.metadata_audit import MetadataAudit + + +class TestMetadatAudit(TestCase): + """Test cases for fetching chromosomes""" + + def test_insert_into_metadata_audit(self): + """Test that data is inserted correctly in the audit table + + """ + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + type(cursor).rowcount = 1 + self.assertEqual(insert( + conn=db_mock, table="metadata_audit", + data=MetadataAudit(dataset_id=35, + editor="Bonface", + json_data=json.dumps({"a": "b"}))), 1) + cursor.execute.assert_called_once_with( + "INSERT INTO metadata_audit ('dataset_id', " + "'editor', 'json_data') " + 'VALUES (\'35\', \'Bonface\', \'{\\"a\\": \\"b\\"}\')') -- cgit v1.2.3 From 1c7709af72e1826da830e2897d937a0706753b89 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 3 Jun 2021 11:30:48 +0300 Subject: sql: Add a new folder that houses sql statements for new tables --- sql/README.md | 9 +++++++++ sql/metadata_audit.sql | 29 +++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 sql/README.md create mode 100644 sql/metadata_audit.sql diff --git a/sql/README.md b/sql/README.md new file mode 100644 index 0000000..2f93d1c --- /dev/null +++ b/sql/README.md @@ -0,0 +1,9 @@ +# MariaDB setup + +This directory hosts any new SQL table added to genenetwork3. When preparing +statements, be sure to follow [this](https://www.sqlstyle.guide/) style guide. + +Also, not that in some tables, we use the json data format; therefore make +sure you have MariaDB version 10.2 and later which include a range of JSON +supporting functions. Read about that +[here](https://mariadb.com/resources/blog/json-with-mariadb-10-2/) diff --git a/sql/metadata_audit.sql b/sql/metadata_audit.sql new file mode 100644 index 0000000..9771e74 --- /dev/null +++ b/sql/metadata_audit.sql @@ -0,0 +1,29 @@ +-- metadata_audit.sql --- + +-- Copyright (C) 2021 Bonface Munyoki K. + +-- Author: Bonface Munyoki K. + +-- This program is free software; you can redistribute it and/or +-- modify it under the terms of the GNU General Public License +-- as published by the Free Software Foundation; either version 3 +-- of the License, or (at your option) any later version. + +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. + +-- You should have received a copy of the GNU General Public License +-- along with this program. If not, see . + +-- This table stores data on diffs when editing a Published dataset's data +CREATE TABLE metadata_audit ( + PRIMARY KEY (id), + id INTEGER AUTO_INCREMENT NOT NULL, + dataset_id INTEGER NOT NULL, + editor VARCHAR(255) NOT NULL, + json_data VARCHAR(2048) NOT NULL, + time_stamp timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL, + CHECK (JSON_VALID(json_data)) +); -- cgit v1.2.3 From 511cde13c8f18c2e2be3a29eee3fa7366fce81a3 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 3 Jun 2021 21:11:01 +0300 Subject: gn3: db: Add new function for doing sql INSERT --- gn3/db/__init__.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 175a640..8b6bf73 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -1,7 +1,7 @@ # pylint: disable=[R0902, R0903] """Module that exposes common db operations""" from typing import Optional, Dict, Any -from dataclasses import dataclass, asdict, astuple +from dataclasses import asdict, astuple from typing_extensions import Protocol from MySQLdb import escape_string @@ -75,6 +75,22 @@ def fetchone(conn: Any, return DATACLASSMAP[table](*cursor.fetchone()) +def insert(conn: Any, + table: str, + data: Dataclass) -> Optional[int]: + """Run an INSERT into a table""" + dict_ = {k: v for k, v in asdict(data).items() + if v is not None and k in TABLEMAP[table]} + sql = f"INSERT INTO {table} (" + sql += ", ".join(f"{k}" for k in dict_.keys()) + sql += ") VALUES (" + sql += ", ".join("%s" for _ in dict_.keys()) + sql += ")" + with conn.cursor() as cursor: + cursor.execute(sql, tuple(dict_.values())) + conn.commit() + return cursor.rowcount + def diff_from_dict(old: Dict, new: Dict) -> Dict: """Construct a new dict with a specific structure that contains the difference between the 2 dicts in the structure: -- cgit v1.2.3 From de834809dbf5f054a5f75c35dbee653cac8311f3 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 3 Jun 2021 21:11:33 +0300 Subject: tests: test_audit: Update test case to check for prepared statements --- tests/unit/db/test_audit.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/db/test_audit.py b/tests/unit/db/test_audit.py index 22787bb..1449281 100644 --- a/tests/unit/db/test_audit.py +++ b/tests/unit/db/test_audit.py @@ -23,6 +23,6 @@ class TestMetadatAudit(TestCase): editor="Bonface", json_data=json.dumps({"a": "b"}))), 1) cursor.execute.assert_called_once_with( - "INSERT INTO metadata_audit ('dataset_id', " - "'editor', 'json_data') " - 'VALUES (\'35\', \'Bonface\', \'{\\"a\\": \\"b\\"}\')') + "INSERT INTO metadata_audit (dataset_id, " + "editor, json_data) VALUES (%s, %s, %s)", + (35, 'Bonface', '{"a": "b"}')) -- cgit v1.2.3 From 4c9bbe6d4229b79a1bc62cf2f641fbc4c4f00abc Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 3 Jun 2021 21:38:58 +0300 Subject: Use prepared statements for UPDATE sql function --- gn3/db/__init__.py | 16 +++++++++------- tests/unit/db/test_phenotypes.py | 10 ++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 8b6bf73..ce92a7d 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -43,18 +43,20 @@ def update(conn: Any, """Run an UPDATE on a table""" if not (any(astuple(data)) and any(astuple(where))): return None + data_ = {k: v for k, v in asdict(data).items() + if v is not None and k in TABLEMAP[table]} + where_ = {k: v for k, v in asdict(where).items() + if v is not None and k in TABLEMAP[table]} sql = f"UPDATE {table} SET " sql += ", ".join(f"{TABLEMAP[table].get(k)} " - f"= '{escape_string(str(v)).decode('utf-8')}'" for - k, v in asdict(data).items() - if v is not None and k in TABLEMAP[table]) + "= %s" for k in data_.keys()) sql += " WHERE " sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " - f"'{escape_string(str(v)).decode('utf-8')}'" for - k, v in asdict(where).items() - if v is not None and k in TABLEMAP[table]) + "%s" for k in where_.keys()) with conn.cursor() as cursor: - cursor.execute(sql) + cursor.execute(sql, + tuple(data_.values()) + tuple(where_.values())) + conn.commit() return cursor.rowcount diff --git a/tests/unit/db/test_phenotypes.py b/tests/unit/db/test_phenotypes.py index fdeca5e..21eb757 100644 --- a/tests/unit/db/test_phenotypes.py +++ b/tests/unit/db/test_phenotypes.py @@ -37,12 +37,10 @@ class TestPhenotypes(TestCase): where=Phenotype(id_=1, owner="Rob")), 1) cursor.execute.assert_called_once_with( "UPDATE Phenotype SET " - "Pre_publication_description = " - "'Test Pre Pub', " - "Post_publication_description = " - "'Test Post Pub', Submitter = 'Rob' " - "WHERE id = '1' AND Owner = 'Rob'" - ) + "Pre_publication_description = %s, " + "Post_publication_description = %s, " + "Submitter = %s WHERE id = %s AND Owner = %s", + ('Test Pre Pub', 'Test Post Pub', 'Rob', 1, 'Rob')) def test_fetch_phenotype(self): """Test that a single phenotype is fetched properly -- cgit v1.2.3 From d769bfcc38a14720fa888e2b7c0ff874cc91f6a2 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 3 Jun 2021 21:39:25 +0300 Subject: gn3: db: Replace items() with keys() * gn3/db/__init__.py (diff_from_dict): We only use the keys of the dict! --- gn3/db/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index ce92a7d..d62b575 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -93,6 +93,7 @@ def insert(conn: Any, conn.commit() return cursor.rowcount + def diff_from_dict(old: Dict, new: Dict) -> Dict: """Construct a new dict with a specific structure that contains the difference between the 2 dicts in the structure: @@ -104,6 +105,6 @@ Should return: {"id": {"old": 1, "new": 2}, "data": {"old": "a", "new": "b"}} """ dict_ = {} - for key, value in old.items(): + for key in old.keys(): dict_[key] = {"old": old[key], "new": new[key]} return dict_ -- cgit v1.2.3 From 8210c46fde908b8815ab97f2f91039f87365369b Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 3 Jun 2021 21:45:25 +0300 Subject: Use prepared statements for FETCH sql function --- gn3/db/__init__.py | 8 ++++---- tests/unit/db/test_phenotypes.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index d62b575..fea43ec 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -66,14 +66,14 @@ def fetchone(conn: Any, """Run a SELECT on a table. Returns only one result!""" if not any(astuple(where)): return None + where_ = {k: v for k, v in asdict(where).items() + if v is not None and k in TABLEMAP[table]} sql = f"SELECT * FROM {table} " sql += "WHERE " sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " - f"'{escape_string(str(v)).decode('utf-8')}'" for - k, v in asdict(where).items() - if v is not None and k in TABLEMAP[table]) + "%s" for k in where_.keys()) with conn.cursor() as cursor: - cursor.execute(sql) + cursor.execute(sql, tuple(where_.values())) return DATACLASSMAP[table](*cursor.fetchone()) diff --git a/tests/unit/db/test_phenotypes.py b/tests/unit/db/test_phenotypes.py index 21eb757..824d186 100644 --- a/tests/unit/db/test_phenotypes.py +++ b/tests/unit/db/test_phenotypes.py @@ -61,7 +61,8 @@ class TestPhenotypes(TestCase): self.assertEqual(phenotype.pre_pub_description, "Test pre-publication") cursor.execute.assert_called_once_with( - "SELECT * FROM Phenotype WHERE id = '35' AND Owner = 'Rob'") + "SELECT * FROM Phenotype WHERE id = %s AND Owner = %s", + (35, 'Rob')) def test_diff_from_dict(self): """Test that a correct diff is generated""" -- cgit v1.2.3 From d55adb4e549bce790522d1a311bbcd53627c3259 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 3 Jun 2021 21:50:52 +0300 Subject: gn3: db: Remove "escape_string" from imports We use prepared statements, so no need to have this. --- gn3/db/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index fea43ec..824e5b2 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -3,7 +3,6 @@ from typing import Optional, Dict, Any from dataclasses import asdict, astuple from typing_extensions import Protocol -from MySQLdb import escape_string from gn3.db.metadata_audit import MetadataAudit from gn3.db.phenotypes import Phenotype -- cgit v1.2.3 From 49f9d977fc9bd9165392c21f7311b6fe72d2d83a Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 10:09:00 +0300 Subject: gn3: db: sort imports --- gn3/db/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 824e5b2..ed23c97 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -1,7 +1,7 @@ # pylint: disable=[R0902, R0903] """Module that exposes common db operations""" -from typing import Optional, Dict, Any from dataclasses import asdict, astuple +from typing import Optional, Dict, Any from typing_extensions import Protocol from gn3.db.metadata_audit import MetadataAudit @@ -11,8 +11,8 @@ from gn3.db.phenotypes import Publication from gn3.db.metadata_audit import metadata_audit_mapping from gn3.db.phenotypes import phenotype_mapping -from gn3.db.phenotypes import publish_x_ref_mapping from gn3.db.phenotypes import publication_mapping +from gn3.db.phenotypes import publish_x_ref_mapping TABLEMAP = { -- cgit v1.2.3 From 77e7fb0d6a8090770460cc05e1707056b09e6802 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 10:18:22 +0300 Subject: tests: test_phenotype: Rename to test_db --- tests/unit/db/test_db.py | 73 ++++++++++++++++++++++++++++++++++++++++ tests/unit/db/test_phenotypes.py | 72 --------------------------------------- 2 files changed, 73 insertions(+), 72 deletions(-) create mode 100644 tests/unit/db/test_db.py delete mode 100644 tests/unit/db/test_phenotypes.py diff --git a/tests/unit/db/test_db.py b/tests/unit/db/test_db.py new file mode 100644 index 0000000..a3b1d71 --- /dev/null +++ b/tests/unit/db/test_db.py @@ -0,0 +1,73 @@ +"""Tests for db/phenotypes.py""" +from unittest import TestCase +from unittest import mock + +from gn3.db import fetchone +from gn3.db import update +from gn3.db import diff_from_dict +from gn3.db.phenotypes import Phenotype + + +class TestPhenotypes(TestCase): + """Test cases for fetching chromosomes""" + + def test_update_phenotype_with_no_data(self): + """Test that a phenotype is updated correctly if an empty Phenotype dataclass + is provided + + """ + db_mock = mock.MagicMock() + self.assertEqual(update( + conn=db_mock, table="Phenotype", + data=Phenotype(), where=Phenotype()), None) + + def test_update_phenotype_with_data(self): + """ + Test that a phenotype is updated correctly if some + data is provided + """ + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + type(cursor).rowcount = 1 + self.assertEqual(update( + conn=db_mock, table="Phenotype", + data=Phenotype( + pre_pub_description="Test Pre Pub", + submitter="Rob", + post_pub_description="Test Post Pub"), + where=Phenotype(id_=1, owner="Rob")), 1) + cursor.execute.assert_called_once_with( + "UPDATE Phenotype SET " + "Pre_publication_description = %s, " + "Post_publication_description = %s, " + "Submitter = %s WHERE id = %s AND Owner = %s", + ('Test Pre Pub', 'Test Post Pub', 'Rob', 1, 'Rob')) + + def test_fetch_phenotype(self): + """Test that a single phenotype is fetched properly + + """ + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + test_data = ( + 35, "Test pre-publication", "Test post-publication", + "Original description A", "cm^2", "pre-abbrev", + "post-abbrev", "LAB001", "R. W.", "R. W.", "R. W." + ) + cursor.fetchone.return_value = test_data + phenotype = fetchone(db_mock, + "Phenotype", + where=Phenotype(id_=35, owner="Rob")) + self.assertEqual(phenotype.id_, 35) + self.assertEqual(phenotype.pre_pub_description, + "Test pre-publication") + cursor.execute.assert_called_once_with( + "SELECT * FROM Phenotype WHERE id = %s AND Owner = %s", + (35, 'Rob')) + + def test_diff_from_dict(self): + """Test that a correct diff is generated""" + self.assertEqual(diff_from_dict({"id": 1, "data": "a"}, + {"id": 2, "data": "b"}), + {"id": {"old": 1, "new": 2}, + "data": {"old": "a", "new": "b"}}) diff --git a/tests/unit/db/test_phenotypes.py b/tests/unit/db/test_phenotypes.py deleted file mode 100644 index 824d186..0000000 --- a/tests/unit/db/test_phenotypes.py +++ /dev/null @@ -1,72 +0,0 @@ -"""Tests for db/phenotypes.py""" -from unittest import TestCase -from unittest import mock - -from gn3.db import fetchone -from gn3.db import update -from gn3.db import diff_from_dict -from gn3.db.phenotypes import Phenotype - - -class TestPhenotypes(TestCase): - """Test cases for fetching chromosomes""" - def test_update_phenotype_with_no_data(self): - """Test that a phenotype is updated correctly if an empty Phenotype dataclass - is provided - - """ - db_mock = mock.MagicMock() - self.assertEqual(update( - conn=db_mock, table="Phenotype", - data=Phenotype(), where=Phenotype()), None) - - def test_update_phenotype_with_data(self): - """ - Test that a phenotype is updated correctly if some - data is provided - """ - db_mock = mock.MagicMock() - with db_mock.cursor() as cursor: - type(cursor).rowcount = 1 - self.assertEqual(update( - conn=db_mock, table="Phenotype", - data=Phenotype( - pre_pub_description="Test Pre Pub", - submitter="Rob", - post_pub_description="Test Post Pub"), - where=Phenotype(id_=1, owner="Rob")), 1) - cursor.execute.assert_called_once_with( - "UPDATE Phenotype SET " - "Pre_publication_description = %s, " - "Post_publication_description = %s, " - "Submitter = %s WHERE id = %s AND Owner = %s", - ('Test Pre Pub', 'Test Post Pub', 'Rob', 1, 'Rob')) - - def test_fetch_phenotype(self): - """Test that a single phenotype is fetched properly - - """ - db_mock = mock.MagicMock() - with db_mock.cursor() as cursor: - test_data = ( - 35, "Test pre-publication", "Test post-publication", - "Original description A", "cm^2", "pre-abbrev", - "post-abbrev", "LAB001", "R. W.", "R. W.", "R. W." - ) - cursor.fetchone.return_value = test_data - phenotype = fetchone(db_mock, - "Phenotype", - where=Phenotype(id_=35, owner="Rob")) - self.assertEqual(phenotype.id_, 35) - self.assertEqual(phenotype.pre_pub_description, - "Test pre-publication") - cursor.execute.assert_called_once_with( - "SELECT * FROM Phenotype WHERE id = %s AND Owner = %s", - (35, 'Rob')) - - def test_diff_from_dict(self): - """Test that a correct diff is generated""" - self.assertEqual(diff_from_dict({"id": 1, "data": "a"}, - {"id": 2, "data": "b"}), - {"id": {"old": 1, "new": 2}, - "data": {"old": "a", "new": "b"}}) -- cgit v1.2.3 From 92a6ed26a6990fd37b86efe4caa9f6cd2365f476 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 10:21:33 +0300 Subject: tests: test_db: Rename TestPhenotype --- tests/unit/db/test_db.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/db/test_db.py b/tests/unit/db/test_db.py index a3b1d71..2e38d4c 100644 --- a/tests/unit/db/test_db.py +++ b/tests/unit/db/test_db.py @@ -8,8 +8,8 @@ from gn3.db import diff_from_dict from gn3.db.phenotypes import Phenotype -class TestPhenotypes(TestCase): - """Test cases for fetching chromosomes""" +class TestCrudMethods(TestCase): + """Test cases for CRUD methods""" def test_update_phenotype_with_no_data(self): """Test that a phenotype is updated correctly if an empty Phenotype dataclass -- cgit v1.2.3 From 3abef0f7a14860c0babbf59bd99b4c5a88387693 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 11:13:50 +0300 Subject: gn3: db: Use correct DATACLASSMAP entry from metadata_audit --- gn3/db/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index ed23c97..997a230 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -23,7 +23,7 @@ TABLEMAP = { } DATACLASSMAP = { - "MetadataAudit": MetadataAudit, + "metadata_audit": MetadataAudit, "Phenotype": Phenotype, "PublishXRef": PublishXRef, "Publication": Publication, -- cgit v1.2.3 From 3b53e42162fbcbdba782016f7b63604081f2b6b1 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 11:14:30 +0300 Subject: gn3: db: Make "WHERE" clause optional * gn3/db/__init__.py (fetchone): Make "WHERE" an Optional arg. --- gn3/db/__init__.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 997a230..d311dea 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -61,16 +61,17 @@ def update(conn: Any, def fetchone(conn: Any, table: str, - where: Dataclass) -> Optional[Dataclass]: + where: Optional[Dataclass]) -> Optional[Dataclass]: """Run a SELECT on a table. Returns only one result!""" if not any(astuple(where)): return None where_ = {k: v for k, v in asdict(where).items() if v is not None and k in TABLEMAP[table]} sql = f"SELECT * FROM {table} " - sql += "WHERE " - sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " - "%s" for k in where_.keys()) + if where: + sql += "WHERE " + sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " + "%s" for k in where_.keys()) with conn.cursor() as cursor: cursor.execute(sql, tuple(where_.values())) return DATACLASSMAP[table](*cursor.fetchone()) -- cgit v1.2.3 From cf8e058682702e7203ec5eb019717cba09887272 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 11:15:45 +0300 Subject: gn3: metadata_audit: Make props for MetadataAudit class optional --- gn3/db/metadata_audit.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gn3/db/metadata_audit.py b/gn3/db/metadata_audit.py index 6e22b32..e73e988 100644 --- a/gn3/db/metadata_audit.py +++ b/gn3/db/metadata_audit.py @@ -10,9 +10,9 @@ from typing import Optional @dataclass(frozen=True) class MetadataAudit: """Data Type that represents a Phenotype""" - dataset_id: int - editor: str - json_data: str + dataset_id: Optional[int] = None + editor: Optional[str] = None + json_data: Optional[str] = None time_stamp: Optional[str] = None -- cgit v1.2.3 From f5d3828b965bdc5ce98dd8f4714c5bf264c23f9c Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 11:16:25 +0300 Subject: gn3: db: Add "fetchall" method. --- gn3/db/__init__.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index d311dea..25ecfd6 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -1,7 +1,7 @@ # pylint: disable=[R0902, R0903] """Module that exposes common db operations""" from dataclasses import asdict, astuple -from typing import Optional, Dict, Any +from typing import Any, Dict, Optional, Generator from typing_extensions import Protocol from gn3.db.metadata_audit import MetadataAudit @@ -77,6 +77,24 @@ def fetchone(conn: Any, return DATACLASSMAP[table](*cursor.fetchone()) +def fetchall(conn: Any, + table: str, + where: Optional[Dataclass]) -> Optional[Generator]: + """Run a SELECT on a table. Returns all the results as a tuple!""" + if not any(astuple(where)): + return None + where_ = {k: v for k, v in asdict(where).items() + if v is not None and k in TABLEMAP[table]} + sql = f"SELECT * FROM {table} " + if where: + sql += "WHERE " + sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " + "%s" for k in where_.keys()) + with conn.cursor() as cursor: + cursor.execute(sql, tuple(where_.values())) + return (DATACLASSMAP[table](*record) for record in cursor.fetchall()) + + def insert(conn: Any, table: str, data: Dataclass) -> Optional[int]: -- cgit v1.2.3 From ea90f7d8276ec9df583b94d7d00c128e63f147ef Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 11:16:45 +0300 Subject: tests: test_db: Add a test-case for fetchall --- tests/unit/db/test_db.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/unit/db/test_db.py b/tests/unit/db/test_db.py index 2e38d4c..485678f 100644 --- a/tests/unit/db/test_db.py +++ b/tests/unit/db/test_db.py @@ -2,10 +2,12 @@ from unittest import TestCase from unittest import mock +from gn3.db import fetchall from gn3.db import fetchone from gn3.db import update from gn3.db import diff_from_dict from gn3.db.phenotypes import Phenotype +from gn3.db.metadata_audit import MetadataAudit class TestCrudMethods(TestCase): @@ -65,6 +67,35 @@ class TestCrudMethods(TestCase): "SELECT * FROM Phenotype WHERE id = %s AND Owner = %s", (35, 'Rob')) + def test_fetchall_metadataaudit(self): + """Test that multiple metadata_audit entries are fetched properly + + """ + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + test_data = ( + 35, "Rob", ('{"pages": ' + '{"old": "5099-5109", ' + '"new": "5099-5110"}, ' + '"month": {"old": "July", ' + '"new": "June"}, ' + '"year": {"old": "2001", ' + '"new": "2002"}}'), + "2021-06-04 09:01:05" + ) + cursor.fetchall.return_value = (test_data,) + metadata = list(fetchall(db_mock, + "metadata_audit", + where=MetadataAudit(dataset_id=35, + editor="Rob")))[0] + self.assertEqual(metadata.dataset_id, 35) + self.assertEqual(metadata.time_stamp, + "2021-06-04 09:01:05") + cursor.execute.assert_called_once_with( + ("SELECT * FROM metadata_audit WHERE " + "dataset_id = %s AND editor = %s"), + (35, 'Rob')) + def test_diff_from_dict(self): """Test that a correct diff is generated""" self.assertEqual(diff_from_dict({"id": 1, "data": "a"}, -- cgit v1.2.3 From 712a4f7235cc1167fe1c4e591737a045b9a60ac3 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 18:53:59 +0300 Subject: gn3: db: Add "id_" property to metadata_audit class and mapping --- gn3/db/metadata_audit.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gn3/db/metadata_audit.py b/gn3/db/metadata_audit.py index e73e988..8765738 100644 --- a/gn3/db/metadata_audit.py +++ b/gn3/db/metadata_audit.py @@ -10,6 +10,7 @@ from typing import Optional @dataclass(frozen=True) class MetadataAudit: """Data Type that represents a Phenotype""" + id_: Optional[int] = None dataset_id: Optional[int] = None editor: Optional[str] = None json_data: Optional[str] = None @@ -19,6 +20,7 @@ class MetadataAudit: # Mapping from the MetadataAudit dataclass to the actual column names in the # database metadata_audit_mapping = { + "id_": "id", "dataset_id": "dataset_id", "editor": "editor", "json_data": "json_data", -- cgit v1.2.3 From e67316601be4b43da6f60322fef1f4ce15ed5905 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 19:43:17 +0300 Subject: gn3: db: Fix how columns from tables is resolved --- gn3/db/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 25ecfd6..34a0236 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -65,12 +65,12 @@ def fetchone(conn: Any, """Run a SELECT on a table. Returns only one result!""" if not any(astuple(where)): return None - where_ = {k: v for k, v in asdict(where).items() + where_ = {TABLEMAP[table].get(k): v for k, v in asdict(where).items() if v is not None and k in TABLEMAP[table]} sql = f"SELECT * FROM {table} " if where: sql += "WHERE " - sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " + sql += " AND ".join(f"{k} = " "%s" for k in where_.keys()) with conn.cursor() as cursor: cursor.execute(sql, tuple(where_.values())) @@ -83,12 +83,12 @@ def fetchall(conn: Any, """Run a SELECT on a table. Returns all the results as a tuple!""" if not any(astuple(where)): return None - where_ = {k: v for k, v in asdict(where).items() + where_ = {TABLEMAP[table].get(k): v for k, v in asdict(where).items() if v is not None and k in TABLEMAP[table]} sql = f"SELECT * FROM {table} " if where: sql += "WHERE " - sql += " AND ".join(f"{TABLEMAP[table].get(k)} = " + sql += " AND ".join(f"{k} = " "%s" for k in where_.keys()) with conn.cursor() as cursor: cursor.execute(sql, tuple(where_.values())) @@ -99,7 +99,7 @@ def insert(conn: Any, table: str, data: Dataclass) -> Optional[int]: """Run an INSERT into a table""" - dict_ = {k: v for k, v in asdict(data).items() + dict_ = {TABLEMAP[table].get(k): v for k, v in asdict(data).items() if v is not None and k in TABLEMAP[table]} sql = f"INSERT INTO {table} (" sql += ", ".join(f"{k}" for k in dict_.keys()) -- cgit v1.2.3 From bb55cf948974d85fb31d2f424f7ee94f7ab5e3d6 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 19:44:00 +0300 Subject: Rename json_data column to json_diff_data --- gn3/db/metadata_audit.py | 2 +- sql/metadata_audit.sql | 12 ++++++------ tests/unit/db/test_audit.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/gn3/db/metadata_audit.py b/gn3/db/metadata_audit.py index 8765738..9c4474d 100644 --- a/gn3/db/metadata_audit.py +++ b/gn3/db/metadata_audit.py @@ -23,6 +23,6 @@ metadata_audit_mapping = { "id_": "id", "dataset_id": "dataset_id", "editor": "editor", - "json_data": "json_data", + "json_data": "json_diff_data", "time_stamp": "time_stamp", } diff --git a/sql/metadata_audit.sql b/sql/metadata_audit.sql index 9771e74..514a2fc 100644 --- a/sql/metadata_audit.sql +++ b/sql/metadata_audit.sql @@ -20,10 +20,10 @@ -- This table stores data on diffs when editing a Published dataset's data CREATE TABLE metadata_audit ( PRIMARY KEY (id), - id INTEGER AUTO_INCREMENT NOT NULL, - dataset_id INTEGER NOT NULL, - editor VARCHAR(255) NOT NULL, - json_data VARCHAR(2048) NOT NULL, - time_stamp timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL, - CHECK (JSON_VALID(json_data)) + id INTEGER AUTO_INCREMENT NOT NULL, + dataset_id INTEGER NOT NULL, + editor VARCHAR(255) NOT NULL, + json_diff_data VARCHAR(2048) NOT NULL, + time_stamp timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL, + CHECK (JSON_VALID(json_diff_data)) ); diff --git a/tests/unit/db/test_audit.py b/tests/unit/db/test_audit.py index 1449281..7480169 100644 --- a/tests/unit/db/test_audit.py +++ b/tests/unit/db/test_audit.py @@ -24,5 +24,5 @@ class TestMetadatAudit(TestCase): json_data=json.dumps({"a": "b"}))), 1) cursor.execute.assert_called_once_with( "INSERT INTO metadata_audit (dataset_id, " - "editor, json_data) VALUES (%s, %s, %s)", + "editor, json_diff_data) VALUES (%s, %s, %s)", (35, 'Bonface', '{"a": "b"}')) -- cgit v1.2.3 From b05c273eae3f19387ab0a6afea4277afd52a9378 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 7 Jun 2021 19:44:31 +0300 Subject: db: test_db: Add extra variable in test data for "id" --- tests/unit/db/test_db.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tests/unit/db/test_db.py b/tests/unit/db/test_db.py index 485678f..520e781 100644 --- a/tests/unit/db/test_db.py +++ b/tests/unit/db/test_db.py @@ -74,15 +74,14 @@ class TestCrudMethods(TestCase): db_mock = mock.MagicMock() with db_mock.cursor() as cursor: test_data = ( - 35, "Rob", ('{"pages": ' - '{"old": "5099-5109", ' - '"new": "5099-5110"}, ' - '"month": {"old": "July", ' - '"new": "June"}, ' - '"year": {"old": "2001", ' - '"new": "2002"}}'), - "2021-06-04 09:01:05" - ) + 1, 35, "Rob", ('{"pages": ' + '{"old": "5099-5109", ' + '"new": "5099-5110"}, ' + '"month": {"old": "July", ' + '"new": "June"}, ' + '"year": {"old": "2001", ' + '"new": "2002"}}'), + "2021-06-04 09:01:05") cursor.fetchall.return_value = (test_data,) metadata = list(fetchall(db_mock, "metadata_audit", -- cgit v1.2.3 From f3d62f491dedbf0fe824446ced5d1f12d5837a52 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 8 Jun 2021 01:40:47 +0300 Subject: initial commit for queries perfomance tests --- tests/performance/__init__.py | 0 tests/performance/test_query.py | 119 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 tests/performance/__init__.py create mode 100644 tests/performance/test_query.py diff --git a/tests/performance/__init__.py b/tests/performance/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/performance/test_query.py b/tests/performance/test_query.py new file mode 100644 index 0000000..c0a3210 --- /dev/null +++ b/tests/performance/test_query.py @@ -0,0 +1,119 @@ +"""module contains performance tests for queries""" + +import time +import sys + +from inspect import getmembers +from inspect import isfunction + +from functools import wraps +from gn3.db_utils import database_connector + + +def timer(func): + """time function""" + @wraps(func) + def wrapper_time(*args, **kwargs): + """time wrapper""" + start_time = time.perf_counter() + results = func(*args, **kwargs) + end_time = time.perf_counter() + run_time = end_time - start_time + print(f"the time taken is {run_time:.3f} seconds") + return results + + return wrapper_time + + +def query_executor(query, fetch_all=True): + """function to execute a query""" + conn, _ = database_connector() + + with conn: + cursor = conn.cursor() + cursor.execute(query) + + if fetch_all: + return cursor.fetchall() + return cursor.fetchone() + + +def fetch_probeset_query(dataset_name): + """contains queries for datasets""" + + query = """SELECT * from ProbeSetData + where StrainID in (4, 5, 6, 7, 8, 9, 10, 11, 12, + 14, 15, 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, + 29, 30, 31, 35, 36, 37, 39, 98, 99, 100, 103, + 487, 105, 106, 110, 115,116, 117, 118, 119, + 120, 919, 147, 121, 40, 41, 124, 125, 128, 135, + 129, 130, 131, 132, 134, 138, 139, 140, 141, 142, + 144, 145, 148, 149, 920, 922, 2, 3, 1, 1100) + and id in (SELECT ProbeSetXRef.DataId + FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) + WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id + and ProbeSetFreeze.Name = '{}' + and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(dataset_name) + + return query + + +@timer +def perf_simple_query(): + """initial simple query test""" + + query = """select * from ProbeSetData limit 1""" + + _results = query_executor(query) + + return {} + + +@timer +def perf_hc_m2_dataset(): + """test the default dataset HC_M2_0606_P""" + + query = fetch_probeset_query("HC_M2_0606_P") + + _results = query_executor(query) + + return {} + + +@timer +def perf_umutaffyexon_dataset(): + """largest dataset in gn""" + + query = fetch_probeset_query("UMUTAffyExon_0209_RMA") + _results = query_executor(query) + return {} + + +def fetch_perf_functions(): + """function to filter all functions strwith perf_""" + name_func_dict = {name: obj for name, obj in + getmembers(sys.modules[__name__], isfunction)if isfunction( + obj) and obj.__module__ == __name__ and name.startswith('perf_')} + + return name_func_dict + + +def fetch_cmd_args(): + """function to fetch cmd args""" + cmd_args = sys.argv[1:] + + name_func_dict = fetch_perf_functions() + + if len(cmd_args) > 0: + callables = [func_call for name, + func_call in name_func_dict.items() if name in cmd_args] + + return callables + + return list(name_func_dict.values()) + + +if __name__ == '__main__': + func_list = fetch_cmd_args() + for call_func in func_list: + call_func() -- cgit v1.2.3 From 70888fb5fb78e7d77ca19c1a015d6c0ae494c722 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 8 Jun 2021 03:00:35 +0300 Subject: add types and dataset names --- tests/performance/test_query.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tests/performance/test_query.py b/tests/performance/test_query.py index c0a3210..222e8d6 100644 --- a/tests/performance/test_query.py +++ b/tests/performance/test_query.py @@ -6,6 +6,7 @@ import sys from inspect import getmembers from inspect import isfunction +from typing import Optional from functools import wraps from gn3.db_utils import database_connector @@ -25,9 +26,12 @@ def timer(func): return wrapper_time -def query_executor(query, fetch_all=True): +def query_executor(query: str, + dataset_name: Optional[str] = "dataset_name", + fetch_all: bool = True): """function to execute a query""" conn, _ = database_connector() + print(f"Performance tests for {dataset_name}") with conn: cursor = conn.cursor() @@ -38,7 +42,7 @@ def query_executor(query, fetch_all=True): return cursor.fetchone() -def fetch_probeset_query(dataset_name): +def fetch_probeset_query(dataset_name: str): """contains queries for datasets""" query = """SELECT * from ProbeSetData @@ -75,7 +79,7 @@ def perf_hc_m2_dataset(): query = fetch_probeset_query("HC_M2_0606_P") - _results = query_executor(query) + _results = query_executor(query, "HC_M2_0606_P") return {} @@ -85,21 +89,24 @@ def perf_umutaffyexon_dataset(): """largest dataset in gn""" query = fetch_probeset_query("UMUTAffyExon_0209_RMA") - _results = query_executor(query) + _results = query_executor(query, "UMUTAffyExon_0209_RMA") return {} def fetch_perf_functions(): """function to filter all functions strwith perf_""" - name_func_dict = {name: obj for name, obj in + name_func_dict = {name: func_obj for name, func_obj in getmembers(sys.modules[__name__], isfunction)if isfunction( - obj) and obj.__module__ == __name__ and name.startswith('perf_')} + func_obj) + and func_obj.__module__ == __name__ and name.startswith('perf_')} return name_func_dict def fetch_cmd_args(): - """function to fetch cmd args""" + """function to fetch cmd args\ + for example python file.py perf_hc_m2_dataset\ + output [perf_hc_m2_dataset obj]""" cmd_args = sys.argv[1:] name_func_dict = fetch_perf_functions() @@ -115,5 +122,5 @@ def fetch_cmd_args(): if __name__ == '__main__': func_list = fetch_cmd_args() - for call_func in func_list: - call_func() + for func in func_list: + func() -- cgit v1.2.3 From 5348003fdf3628d574ca95911fecebbfd81ba8ee Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 8 Jun 2021 03:40:18 +0300 Subject: add tests to setup packages --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index e43ab0a..3f0922b 100644 --- a/setup.py +++ b/setup.py @@ -30,6 +30,7 @@ setup(author='Bonface M. K.', 'gn3.api', 'gn3.computations', 'gn3.db', + 'tests' ], url='https://github.com/genenetwork/genenetwork3', version='0.1') -- cgit v1.2.3 From c8b5a6f407c00847ee28984f7cc18bc391c45120 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 8 Jun 2021 03:48:04 +0300 Subject: pep8 formatting --- tests/performance/test_query.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/performance/test_query.py b/tests/performance/test_query.py index 222e8d6..2d05d26 100644 --- a/tests/performance/test_query.py +++ b/tests/performance/test_query.py @@ -31,7 +31,7 @@ def query_executor(query: str, fetch_all: bool = True): """function to execute a query""" conn, _ = database_connector() - print(f"Performance tests for {dataset_name}") + print(f"Performance test for {dataset_name}") with conn: cursor = conn.cursor() @@ -122,5 +122,5 @@ def fetch_cmd_args(): if __name__ == '__main__': func_list = fetch_cmd_args() - for func in func_list: - func() + for func_obj in func_list: + func_obj() -- cgit v1.2.3 From e176297bbca642f5125b5b01ae07141ebc96425d Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 9 Jun 2021 20:23:58 +0300 Subject: rename perf query file --- tests/performance/test_query.py | 126 ---------------------------------------- 1 file changed, 126 deletions(-) delete mode 100644 tests/performance/test_query.py diff --git a/tests/performance/test_query.py b/tests/performance/test_query.py deleted file mode 100644 index 2d05d26..0000000 --- a/tests/performance/test_query.py +++ /dev/null @@ -1,126 +0,0 @@ -"""module contains performance tests for queries""" - -import time -import sys - -from inspect import getmembers -from inspect import isfunction - -from typing import Optional -from functools import wraps -from gn3.db_utils import database_connector - - -def timer(func): - """time function""" - @wraps(func) - def wrapper_time(*args, **kwargs): - """time wrapper""" - start_time = time.perf_counter() - results = func(*args, **kwargs) - end_time = time.perf_counter() - run_time = end_time - start_time - print(f"the time taken is {run_time:.3f} seconds") - return results - - return wrapper_time - - -def query_executor(query: str, - dataset_name: Optional[str] = "dataset_name", - fetch_all: bool = True): - """function to execute a query""" - conn, _ = database_connector() - print(f"Performance test for {dataset_name}") - - with conn: - cursor = conn.cursor() - cursor.execute(query) - - if fetch_all: - return cursor.fetchall() - return cursor.fetchone() - - -def fetch_probeset_query(dataset_name: str): - """contains queries for datasets""" - - query = """SELECT * from ProbeSetData - where StrainID in (4, 5, 6, 7, 8, 9, 10, 11, 12, - 14, 15, 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, - 29, 30, 31, 35, 36, 37, 39, 98, 99, 100, 103, - 487, 105, 106, 110, 115,116, 117, 118, 119, - 120, 919, 147, 121, 40, 41, 124, 125, 128, 135, - 129, 130, 131, 132, 134, 138, 139, 140, 141, 142, - 144, 145, 148, 149, 920, 922, 2, 3, 1, 1100) - and id in (SELECT ProbeSetXRef.DataId - FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) - WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id - and ProbeSetFreeze.Name = '{}' - and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(dataset_name) - - return query - - -@timer -def perf_simple_query(): - """initial simple query test""" - - query = """select * from ProbeSetData limit 1""" - - _results = query_executor(query) - - return {} - - -@timer -def perf_hc_m2_dataset(): - """test the default dataset HC_M2_0606_P""" - - query = fetch_probeset_query("HC_M2_0606_P") - - _results = query_executor(query, "HC_M2_0606_P") - - return {} - - -@timer -def perf_umutaffyexon_dataset(): - """largest dataset in gn""" - - query = fetch_probeset_query("UMUTAffyExon_0209_RMA") - _results = query_executor(query, "UMUTAffyExon_0209_RMA") - return {} - - -def fetch_perf_functions(): - """function to filter all functions strwith perf_""" - name_func_dict = {name: func_obj for name, func_obj in - getmembers(sys.modules[__name__], isfunction)if isfunction( - func_obj) - and func_obj.__module__ == __name__ and name.startswith('perf_')} - - return name_func_dict - - -def fetch_cmd_args(): - """function to fetch cmd args\ - for example python file.py perf_hc_m2_dataset\ - output [perf_hc_m2_dataset obj]""" - cmd_args = sys.argv[1:] - - name_func_dict = fetch_perf_functions() - - if len(cmd_args) > 0: - callables = [func_call for name, - func_call in name_func_dict.items() if name in cmd_args] - - return callables - - return list(name_func_dict.values()) - - -if __name__ == '__main__': - func_list = fetch_cmd_args() - for func_obj in func_list: - func_obj() -- cgit v1.2.3 From 5d4b24f0f2bbe72c2b2f57cc0e8596399b412199 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 9 Jun 2021 20:24:36 +0300 Subject: refactor perf query functions --- tests/performance/perf_query.py | 113 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 tests/performance/perf_query.py diff --git a/tests/performance/perf_query.py b/tests/performance/perf_query.py new file mode 100644 index 0000000..594b9ea --- /dev/null +++ b/tests/performance/perf_query.py @@ -0,0 +1,113 @@ +"""module contains performance tests for queries""" + +import time +import sys + +from inspect import getmembers +from inspect import isfunction + +from typing import Optional +from functools import wraps +from gn3.db_utils import database_connector + + +def timer(func): + """time function""" + @wraps(func) + def wrapper_time(*args, **kwargs): + """time wrapper""" + start_time = time.perf_counter() + results = func(*args, **kwargs) + end_time = time.perf_counter() + run_time = end_time - start_time + print(f"the time taken is {run_time:.3f} seconds") + return results + + return wrapper_time + + +def query_executor(query: str, + dataset_name: Optional[str] = "dataset_name", + fetch_all: bool = True): + """function to execute a query""" + conn, _ = database_connector() + + with conn: + cursor = conn.cursor() + cursor.execute(query) + + if fetch_all: + return cursor.fetchall() + return cursor.fetchone() + + +def fetch_probeset_query(dataset_name: str): + """contains queries for datasets""" + + query = """SELECT * from ProbeSetData + where StrainID in (4, 5, 6, 7, 8, 9, 10, 11, 12, + 14, 15, 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, + 29, 30, 31, 35, 36, 37, 39, 98, 99, 100, 103, + 487, 105, 106, 110, 115,116, 117, 118, 119, + 120, 919, 147, 121, 40, 41, 124, 125, 128, 135, + 129, 130, 131, 132, 134, 138, 139, 140, 141, 142, + 144, 145, 148, 149, 920, 922, 2, 3, 1, 1100) + and id in (SELECT ProbeSetXRef.DataId + FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) + WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id + and ProbeSetFreeze.Name = '{}' + and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(dataset_name) + + return query + + +@timer +def perf_hc_m2_dataset(): + """test the default dataset HC_M2_0606_P""" + + dataset_name = "HC_M2_0606_P" + print(f"Performance test for {dataset_name}") + + query_executor(fetch_probeset_query(dataset_name=dataset_name), + dataset_name=dataset_name) + + +@timer +def perf_umutaffyexon_dataset(): + """largest dataset in gn""" + + dataset_name = "UMUTAffyExon_0209_RMA" + print(f"Performance test for {dataset_name}") + query_executor(fetch_probeset_query(dataset_name=dataset_name), + dataset_name=dataset_name) + + +def fetch_perf_functions(): + """function to filter all functions strwith perf_""" + name_func_dict = {name: func_obj for name, func_obj in + getmembers(sys.modules[__name__], isfunction)if isfunction( + func_obj) + and func_obj.__module__ == __name__ and name.startswith('perf_')} + + return name_func_dict + + +def fetch_cmd_args(): + """function to fetch cmd args""" + cmd_args = sys.argv[1:] + + name_func_dict = fetch_perf_functions() + + if len(cmd_args) > 0: + callables = [func_call for name, + func_call in name_func_dict.items() if name in cmd_args] + + return callables + + return list(name_func_dict.values()) + + +if __name__ == '__main__': + func_list = fetch_cmd_args() + for func_obj in func_list: + func_obj() -- cgit v1.2.3 From 78e90b41c0577f724121ae2156b9c1759d30812d Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 9 Jun 2021 20:27:20 +0300 Subject: remove ununsed variables --- tests/performance/perf_query.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/performance/perf_query.py b/tests/performance/perf_query.py index 594b9ea..93bb3b2 100644 --- a/tests/performance/perf_query.py +++ b/tests/performance/perf_query.py @@ -27,7 +27,6 @@ def timer(func): def query_executor(query: str, - dataset_name: Optional[str] = "dataset_name", fetch_all: bool = True): """function to execute a query""" conn, _ = database_connector() -- cgit v1.2.3 From 14be11dea2844750eee5029197f82d9e0b67123e Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 9 Jun 2021 20:31:43 +0300 Subject: minor fixes --- tests/performance/perf_query.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/performance/perf_query.py b/tests/performance/perf_query.py index 93bb3b2..12cb944 100644 --- a/tests/performance/perf_query.py +++ b/tests/performance/perf_query.py @@ -6,7 +6,6 @@ import sys from inspect import getmembers from inspect import isfunction -from typing import Optional from functools import wraps from gn3.db_utils import database_connector @@ -67,8 +66,7 @@ def perf_hc_m2_dataset(): dataset_name = "HC_M2_0606_P" print(f"Performance test for {dataset_name}") - query_executor(fetch_probeset_query(dataset_name=dataset_name), - dataset_name=dataset_name) + query_executor(fetch_probeset_query(dataset_name=dataset_name)) @timer @@ -77,8 +75,7 @@ def perf_umutaffyexon_dataset(): dataset_name = "UMUTAffyExon_0209_RMA" print(f"Performance test for {dataset_name}") - query_executor(fetch_probeset_query(dataset_name=dataset_name), - dataset_name=dataset_name) + query_executor(fetch_probeset_query(dataset_name=dataset_name)) def fetch_perf_functions(): -- cgit v1.2.3 From cd921a4778d141b6dbbf9f60c178a4f681d47860 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 16 Jun 2021 19:40:03 +0000 Subject: Fixed spelling of coeffient to coefficient --- gn3/computations/correlations.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index f0ce502..0fe46ab 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -68,8 +68,8 @@ pearson,spearman and biweight mid correlation return value is rho and p_value "spearman": scipy.stats.spearmanr } use_corr_method = corr_mapping.get(corr_method, "spearman") - corr_coeffient, p_val = use_corr_method(primary_values, target_values) - return (corr_coeffient, p_val) + corr_coefficient, p_val = use_corr_method(primary_values, target_values) + return (corr_coefficient, p_val) def compute_sample_r_correlation(trait_name, corr_method, trait_vals, @@ -84,13 +84,13 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals, if num_overlap > 5: - (corr_coeffient, p_value) =\ + (corr_coefficient, p_value) =\ compute_corr_coeff_p_value(primary_values=sanitized_traits_vals, target_values=sanitized_target_vals, corr_method=corr_method) - if corr_coeffient is not None: - return (trait_name, corr_coeffient, p_value, num_overlap) + if corr_coefficient is not None: + return (trait_name, corr_coefficient, p_value, num_overlap) return None @@ -140,10 +140,10 @@ def compute_all_sample_correlation(this_trait, for sample_correlation in results: if sample_correlation is not None: - (trait_name, corr_coeffient, p_value, + (trait_name, corr_coefficient, p_value, num_overlap) = sample_correlation corr_result = { - "corr_coeffient": corr_coeffient, + "corr_coefficient": corr_coefficient, "p_value": p_value, "num_overlap": num_overlap } @@ -151,7 +151,7 @@ def compute_all_sample_correlation(this_trait, corr_results.append({trait_name: corr_result}) return sorted( corr_results, - key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coeffient"])) + key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coefficient"])) def benchmark_compute_all_sample(this_trait, @@ -174,12 +174,12 @@ def benchmark_compute_all_sample(this_trait, trait_vals=this_vals, target_samples_vals=target_vals) if sample_correlation is not None: - (trait_name, corr_coeffient, + (trait_name, corr_coefficient, p_value, num_overlap) = sample_correlation else: continue corr_result = { - "corr_coeffient": corr_coeffient, + "corr_coefficient": corr_coefficient, "p_value": p_value, "num_overlap": num_overlap } @@ -195,20 +195,20 @@ def tissue_correlation_for_trait( compute_corr_p_value: Callable = compute_corr_coeff_p_value) -> dict: """Given a primary tissue values for a trait and the target tissues values compute the correlation_cooeff and p value the input required are arrays - output -> List containing Dicts with corr_coefficient value,P_value and + output -> List containing Dicts with corr_coefficient value, P_value and also the tissue numbers is len(primary) == len(target) """ # ax :todo assertion that length one one target tissue ==primary_tissue - (tissue_corr_coeffient, + (tissue_corr_coefficient, p_value) = compute_corr_p_value(primary_values=primary_tissue_vals, target_values=target_tissues_values, corr_method=corr_method) tiss_corr_result = {trait_id: { - "tissue_corr": tissue_corr_coeffient, + "tissue_corr": tissue_corr_coefficient, "tissue_number": len(primary_tissue_vals), "tissue_p_val": p_value}} -- cgit v1.2.3 From 476b146562070cee6a55c55c03c37ef3e19a1474 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 18 Jun 2021 20:54:18 +0000 Subject: Resolve mypy errors in computations/rqtl.py --- gn3/computations/rqtl.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 22d9faf..7b1a35c 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,8 +1,7 @@ """Procedures related rqtl computations""" import os import numpy as np -from typing import Dict -from typing import List +from typing import Dict, List, Union from flask import current_app @@ -55,7 +54,7 @@ def process_rqtl_output(file_name: str) -> List: marker_obs = [] # Later I should probably redo this using csv.read to avoid the # awkwardness with removing quotes with [1:-1] - with open(os.path.join(current_app.config.get("TMPDIR"), "output", file_name), "r") as the_file: + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: for line in the_file: line_items = line.split(",") if line_items[1][1:-1] == "chr" or not line_items: @@ -63,6 +62,7 @@ def process_rqtl_output(file_name: str) -> List: continue else: # Convert chr to int if possible + the_chr: Union[int, str] try: the_chr = int(line_items[1][1:-1]) except: @@ -85,7 +85,7 @@ def process_perm_output(file_name: str): """ perm_results = [] - with open(os.path.join(current_app.config.get("TMPDIR"), "output", "PERM_" + file_name), "r") as the_file: + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", "PERM_" + file_name), "r") as the_file: for i, line in enumerate(the_file): if i == 0: # Skip header line -- cgit v1.2.3 From f172f7cbe0e5fd44b452bf3f3be094ba8b6b2c0e Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 18 Jun 2021 20:54:52 +0000 Subject: add ignore_missing_imports for numpy in mypy.ini since it complains about that and we seem to have done the same thing for scipy --- mypy.ini | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mypy.ini b/mypy.ini index 5e1af13..5d66812 100644 --- a/mypy.ini +++ b/mypy.ini @@ -3,6 +3,9 @@ [mypy-scipy.*] ignore_missing_imports = True +[mypy-numpy.*] +ignore_missing_imports = True + [mypy-MySQLdb.*] ignore_missing_imports = True -- cgit v1.2.3 From d42b85ae5fcea1b71a7165fd6e64745a228c48f9 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 18 Jun 2021 21:13:03 +0000 Subject: Fixed pylint issues --- gn3/api/rqtl.py | 7 ++++--- gn3/computations/rqtl.py | 45 ++++++++++++++++++++++++--------------------- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 38f4c1e..ebb746c 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -6,7 +6,6 @@ from flask import current_app from flask import jsonify from flask import request -from gn3.commands import run_cmd from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output from gn3.computations.gemma import do_paths_exist @@ -45,13 +44,15 @@ run the rqtl_wrapper script and return the results as JSON ) rqtl_output = {} - if not os.path.isfile(os.path.join(current_app.config.get("TMPDIR"), "output", rqtl_cmd.get('output_file'))): + if not os.path.isfile(os.path.join(current_app.config.get("TMPDIR"), + "output", rqtl_cmd.get('output_file'))): os.system(rqtl_cmd.get('rqtl_cmd')) rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) if int(rqtl_kwargs['nperm']) > 0: - rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = process_perm_output(rqtl_cmd.get('output_file')) + rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \ + process_perm_output(rqtl_cmd.get('output_file')) return jsonify(rqtl_output) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 7b1a35c..0433b3f 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,8 +1,9 @@ """Procedures related rqtl computations""" import os -import numpy as np from typing import Dict, List, Union +import numpy as np + from flask import current_app from gn3.commands import compose_rqtl_cmd @@ -54,27 +55,28 @@ def process_rqtl_output(file_name: str) -> List: marker_obs = [] # Later I should probably redo this using csv.read to avoid the # awkwardness with removing quotes with [1:-1] - with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: for line in the_file: line_items = line.split(",") if line_items[1][1:-1] == "chr" or not line_items: # Skip header line continue - else: - # Convert chr to int if possible - the_chr: Union[int, str] - try: - the_chr = int(line_items[1][1:-1]) - except: - the_chr = line_items[1][1:-1] - this_marker = { - "name": line_items[0][1:-1], - "chr": the_chr, - "cM": float(line_items[2]), - "Mb": float(line_items[2]), - "lod_score": float(line_items[3]) - } - marker_obs.append(this_marker) + + # Convert chr to int if possible + the_chr: Union[int, str] + try: + the_chr = int(line_items[1][1:-1]) + except ValueError: + the_chr = line_items[1][1:-1] + this_marker = { + "name": line_items[0][1:-1], + "chr": the_chr, + "cM": float(line_items[2]), + "Mb": float(line_items[2]), + "lod_score": float(line_items[3]) + } + marker_obs.append(this_marker) return marker_obs @@ -85,14 +87,15 @@ def process_perm_output(file_name: str): """ perm_results = [] - with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", "PERM_" + file_name), "r") as the_file: + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "PERM_" + file_name), "r") as the_file: for i, line in enumerate(the_file): if i == 0: # Skip header line continue - else: - line_items = line.split(",") - perm_results.append(float(line_items[1])) + + line_items = line.split(",") + perm_results.append(float(line_items[1])) suggestive = np.percentile(np.array(perm_results), 67) significant = np.percentile(np.array(perm_results), 95) -- cgit v1.2.3 From 12089cb2eae2201d3b348d21dd0e93a61e16e7c7 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 18 Jun 2021 21:27:38 +0000 Subject: Change test_compose_rqtl_command in test_commands.py to have different arguments to hopefully avoid github's pylint 'duplicate code' complaint Changed some parameters in test_compose_rqtl_cmd to avoid pylint complaining about duplicate code --- tests/unit/test_commands.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/tests/unit/test_commands.py b/tests/unit/test_commands.py index a3d0273..aaefb23 100644 --- a/tests/unit/test_commands.py +++ b/tests/unit/test_commands.py @@ -59,24 +59,23 @@ class TestCommands(unittest.TestCase): self.assertEqual( compose_rqtl_cmd(rqtl_wrapper_cmd="rqtl-wrapper", rqtl_wrapper_kwargs={ - "g": "genofile", - "p": "phenofile", - "model": "normal", - "method": "hk", - "nperm": 1000, + "g": "the_genofile", + "p": "the_phenofile", + "model": "np", + "method": "ehk", + "nperm": 2000, "scale": "Mb", - "control": "rs123456" + "control": "rs234567" }, rqtl_wrapper_bool_kwargs=[ - "addcovar", - "interval" + "addcovar" ]), ("rqtl-wrapper " - "--g genofile --p phenofile " - "--model normal --method hk " - "--nperm 1000 --scale Mb " - "--control rs123456 " - "--addcovar --interval") + "--g the_genofile --p the_phenofile " + "--model np --method ehk " + "--nperm 2000 --scale Mb " + "--control rs234567 " + "--addcovar") ) def test_queue_cmd_exception_raised_when_redis_is_down(self): -- cgit v1.2.3 From efe73f2f142bd2a12b89ad638c9387cf9c6ded18 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 18 Jun 2021 22:01:21 +0000 Subject: Fixed test_compose_rqtl_command test; forgot to include Rscript in the command --- tests/unit/test_commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_commands.py b/tests/unit/test_commands.py index aaefb23..f36ba55 100644 --- a/tests/unit/test_commands.py +++ b/tests/unit/test_commands.py @@ -70,7 +70,7 @@ class TestCommands(unittest.TestCase): rqtl_wrapper_bool_kwargs=[ "addcovar" ]), - ("rqtl-wrapper " + ("Rscript rqtl-wrapper " "--g the_genofile --p the_phenofile " "--model np --method ehk " "--nperm 2000 --scale Mb " -- cgit v1.2.3 From d8c4b1a0188574cdd70638a99e2984b54bec6033 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 18 Jun 2021 22:16:26 +0000 Subject: Fixed test_rqtl.py to include Rscript in the command --- tests/unit/computations/test_rqtl.py | 82 ++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/tests/unit/computations/test_rqtl.py b/tests/unit/computations/test_rqtl.py index b16f136..3bf8f81 100644 --- a/tests/unit/computations/test_rqtl.py +++ b/tests/unit/computations/test_rqtl.py @@ -1,41 +1,41 @@ -"""Test cases for procedures defined in computations.rqtl""" -import unittest - -from unittest import mock -from gn3.computations.rqtl import generate_rqtl_cmd - -class TestRqtl(unittest.TestCase): - """Test cases for computations.rqtl module""" - @mock.patch("gn3.computations.rqtl.generate_hash_of_string") - @mock.patch("gn3.computations.rqtl.get_hash_of_files") - def test_generate_rqtl_command(self, mock_get_hash_files, mock_generate_hash_string): - """Test computing mapping results with R/qtl""" - mock_get_hash_files.return_value = "my-hash1" - mock_generate_hash_string.return_value = "my-hash2" - - self.assertEqual( - generate_rqtl_cmd(rqtl_wrapper_cmd="rqtl-wrapper", - rqtl_wrapper_kwargs={ - "g": "genofile", - "p": "phenofile", - "model": "normal", - "method": "hk", - "nperm": 1000, - "scale": "Mb", - "control": "rs123456" - }, - rqtl_wrapper_bool_kwargs=[ - "addcovar", - "interval" - ]), { - "output_file": - "my-hash1my-hash2my-hash2-output.json", - "rqtl_cmd": ( - "rqtl-wrapper " - "--g genofile --p phenofile " - "--model normal --method hk " - "--nperm 1000 --scale Mb " - "--control rs123456 " - "--addcovar --interval" - ) - }) +"""Test cases for procedures defined in computations.rqtl""" +import unittest + +from unittest import mock +from gn3.computations.rqtl import generate_rqtl_cmd + +class TestRqtl(unittest.TestCase): + """Test cases for computations.rqtl module""" + @mock.patch("gn3.computations.rqtl.generate_hash_of_string") + @mock.patch("gn3.computations.rqtl.get_hash_of_files") + def test_generate_rqtl_command(self, mock_get_hash_files, mock_generate_hash_string): + """Test computing mapping results with R/qtl""" + mock_get_hash_files.return_value = "my-hash1" + mock_generate_hash_string.return_value = "my-hash2" + + self.assertEqual( + generate_rqtl_cmd(rqtl_wrapper_cmd="rqtl-wrapper", + rqtl_wrapper_kwargs={ + "g": "genofile", + "p": "phenofile", + "model": "normal", + "method": "hk", + "nperm": 1000, + "scale": "Mb", + "control": "rs123456" + }, + rqtl_wrapper_bool_kwargs=[ + "addcovar", + "interval" + ]), { + "output_file": + "my-hash1my-hash2my-hash2-output.json", + "rqtl_cmd": ( + "Rscript rqtl-wrapper " + "--g genofile --p phenofile " + "--model normal --method hk " + "--nperm 1000 --scale Mb " + "--control rs123456 " + "--addcovar --interval" + ) + }) -- cgit v1.2.3 From 9dca7551ff6c1fa8c3b26ded2c1b63cfab001eca Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 18 Jun 2021 22:21:46 +0000 Subject: Fixed file type from json to csv for test_generate_rqtl_command --- tests/unit/computations/test_rqtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/computations/test_rqtl.py b/tests/unit/computations/test_rqtl.py index 3bf8f81..09790b7 100644 --- a/tests/unit/computations/test_rqtl.py +++ b/tests/unit/computations/test_rqtl.py @@ -29,7 +29,7 @@ class TestRqtl(unittest.TestCase): "interval" ]), { "output_file": - "my-hash1my-hash2my-hash2-output.json", + "my-hash1my-hash2my-hash2-output.csv", "rqtl_cmd": ( "Rscript rqtl-wrapper " "--g genofile --p phenofile " -- cgit v1.2.3 From f7becfa11ca857104ecc1b668b4bd3d0a721083c Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 18 Jun 2021 22:28:37 +0000 Subject: Fixed another error where test_generate_rqtl_command didn't include the filename argument (not sure why running unit tests locally doesn't detect this) --- tests/unit/computations/test_rqtl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/computations/test_rqtl.py b/tests/unit/computations/test_rqtl.py index 09790b7..955d0ab 100644 --- a/tests/unit/computations/test_rqtl.py +++ b/tests/unit/computations/test_rqtl.py @@ -36,6 +36,7 @@ class TestRqtl(unittest.TestCase): "--model normal --method hk " "--nperm 1000 --scale Mb " "--control rs123456 " + "--filename my-hash1my-hash2my-hash2-output.csv " "--addcovar --interval" ) }) -- cgit v1.2.3