From 529e72eb1aed89e018428cabdf63e98983c508ed Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Fri, 5 Mar 2021 16:00:25 +0300 Subject: Replace "compute_k_values" with "generate_gemma_cmd" --- gn3/api/gemma.py | 140 +++++++++++++------------------ tests/integration/test_gemma.py | 150 +++++++++++++++++----------------- tests/unit/computations/test_gemma.py | 70 ++++++++-------- 3 files changed, 169 insertions(+), 191 deletions(-) diff --git a/gn3/api/gemma.py b/gn3/api/gemma.py index 1751530..a69b73e 100644 --- a/gn3/api/gemma.py +++ b/gn3/api/gemma.py @@ -7,12 +7,10 @@ from flask import current_app from flask import jsonify from flask import request -from gn3.commands import compose_gemma_cmd from gn3.commands import queue_cmd from gn3.commands import run_cmd -from gn3.file_utils import get_hash_of_files from gn3.file_utils import jsonfile_to_dict -from gn3.computations.gemma import compute_k_values +from gn3.computations.gemma import generate_gemma_cmd from gn3.computations.gemma import do_paths_exist from gn3.computations.gemma import generate_hash_of_string from gn3.computations.gemma import generate_pheno_txt_file @@ -116,12 +114,12 @@ traitfile, and snpsfile are extracted from a metadata.json file. if not do_paths_exist([genofile, phenofile, snpsfile]): raise FileNotFoundError gemma_kwargs = {"g": genofile, "p": phenofile, "a": snpsfile} - results = compute_k_values(gemma_cmd=current_app.config.get( - "GEMMA_" - "WRAPPER_CMD"), - output_dir=current_app.config.get('TMPDIR'), - token=token, - gemma_kwargs=gemma_kwargs) + results = generate_gemma_cmd( + gemma_cmd=current_app.config.get("GEMMA_" + "WRAPPER_CMD"), + output_dir=current_app.config.get('TMPDIR'), + token=token, + gemma_kwargs=gemma_kwargs) return jsonify(unique_id=queue_cmd( conn=redis.Redis(), email=(request.get_json() or {}).get('email'), @@ -153,13 +151,13 @@ values. if not do_paths_exist([genofile, phenofile, snpsfile]): raise FileNotFoundError gemma_kwargs = {"g": genofile, "p": phenofile, "a": snpsfile} - results = compute_k_values(gemma_cmd=current_app.config.get( - "GEMMA_" - "WRAPPER_CMD"), - output_dir=current_app.config.get('TMPDIR'), - token=token, - gemma_kwargs=gemma_kwargs, - chromosomes=chromosomes) + results = generate_gemma_cmd( + gemma_cmd=current_app.config.get("GEMMA_" + "WRAPPER_CMD"), + output_dir=current_app.config.get('TMPDIR'), + token=token, + gemma_kwargs=gemma_kwargs, + chromosomes=chromosomes) return jsonify(unique_id=queue_cmd( conn=redis.Redis(), email=(request.get_json() or {}).get('email'), @@ -193,27 +191,22 @@ def compute_gwa(k_filename, token): "a": snpsfile, "lmm": _dict.get("lmm", 9) } - _hash = get_hash_of_files([genofile, phenofile, snpsfile]) - _output_filename = f"{_hash}-gwa-output.json" + results = generate_gemma_cmd( + gemma_cmd=current_app.config.get("GEMMA_" + "WRAPPER_CMD"), + output_dir=current_app.config.get('TMPDIR'), + token=token, + gemma_kwargs=gemma_kwargs, + gemma_wrapper_kwargs={ + "input": os.path.join(working_dir, k_filename) + }) return jsonify(unique_id=queue_cmd( conn=redis.Redis(), email=(request.get_json() or {}).get('email'), job_queue=current_app.config.get("REDIS_JOB_QUEUE"), - cmd=compose_gemma_cmd(gemma_wrapper_cmd=current_app.config.get( - "GEMMA_" - "WRAPPER_CMD"), - gemma_wrapper_kwargs={ - "input": - os.path.join(working_dir, k_filename) - }, - gemma_kwargs=gemma_kwargs, - gemma_args=[ - "gk", ">", - (f"{current_app.config.get('TMPDIR')}/" - f"{token}/{_output_filename}") - ])), + cmd=results.get("gemma_cmd")), status="queued", - output_file=_output_filename) + output_file=results.get("output_file")) # pylint: disable=W0703 except Exception: return jsonify( @@ -224,7 +217,7 @@ def compute_gwa(k_filename, token): @gemma.route("/gwa-compute/covars//", methods=["POST"]) def compute_gwa_with_covar(k_filename, token): - """Compute GWA values. Covariates provided. + """Compute GWA values. No Loco; Covariates provided. """ working_dir = os.path.join(current_app.config.get("TMPDIR"), token) @@ -241,27 +234,22 @@ def compute_gwa_with_covar(k_filename, token): "c": covarfile, "lmm": _dict.get("lmm", 9) } - _hash = get_hash_of_files([genofile, phenofile, snpsfile, covarfile]) - _output_filename = f"{_hash}-gwa-output.json" + results = generate_gemma_cmd( + gemma_cmd=current_app.config.get("GEMMA_" + "WRAPPER_CMD"), + output_dir=current_app.config.get('TMPDIR'), + token=token, + gemma_kwargs=gemma_kwargs, + gemma_wrapper_kwargs={ + "input": os.path.join(working_dir, k_filename) + }) return jsonify(unique_id=queue_cmd( conn=redis.Redis(), email=(request.get_json() or {}).get('email'), job_queue=current_app.config.get("REDIS_JOB_QUEUE"), - cmd=compose_gemma_cmd(gemma_wrapper_cmd=current_app.config.get( - "GEMMA_" - "WRAPPER_CMD"), - gemma_wrapper_kwargs={ - "input": - os.path.join(working_dir, k_filename) - }, - gemma_kwargs=gemma_kwargs, - gemma_args=[ - "-gk", ">", - (f"{current_app.config.get('TMPDIR')}/" - f"{token}/{_output_filename}") - ])), + cmd=results.get("gemma_cmd")), status="queued", - output_file=_output_filename) + output_file=results.get("output_file")) # pylint: disable=W0703 except Exception: return jsonify( @@ -292,27 +280,22 @@ def compute_gwa_with_loco_maf(k_filename, maf, token): "lmm": _dict.get("lmm", 9), 'maf': float(maf) } - _hash = get_hash_of_files([genofile, phenofile, snpsfile]) - _output_filename = f"{_hash}-gwa-output.json" + results = generate_gemma_cmd( + gemma_cmd=current_app.config.get("GEMMA_" + "WRAPPER_CMD"), + output_dir=current_app.config.get('TMPDIR'), + token=token, + gemma_kwargs=gemma_kwargs, + gemma_wrapper_kwargs={ + "loco": f"--input {os.path.join(working_dir, k_filename)}" + }) return jsonify(unique_id=queue_cmd( conn=redis.Redis(), email=(request.get_json() or {}).get('email'), job_queue=current_app.config.get("REDIS_JOB_QUEUE"), - cmd=compose_gemma_cmd( - gemma_wrapper_cmd=current_app.config.get("GEMMA_" - "WRAPPER_CMD"), - gemma_wrapper_kwargs={ - "loco": ("--input " - f"{os.path.join(working_dir, k_filename)}") - }, - gemma_kwargs=gemma_kwargs, - gemma_args=[ - "-gk", ">", - (f"{current_app.config.get('TMPDIR')}/" - f"{token}/{_output_filename}") - ])), + cmd=results.get("gemma_cmd")), status="queued", - output_file=_output_filename) + output_file=results.get("output_file")) # pylint: disable=W0703 except Exception: return jsonify( @@ -344,27 +327,22 @@ def compute_gwa_with_loco_covar(k_filename, maf, token): "lmm": _dict.get("lmm", 9), "maf": float(maf) } - _hash = get_hash_of_files([genofile, phenofile, snpsfile, covarfile]) - _output_filename = f"{_hash}-gwa-output.json" + results = generate_gemma_cmd( + gemma_cmd=current_app.config.get("GEMMA_" + "WRAPPER_CMD"), + output_dir=current_app.config.get('TMPDIR'), + token=token, + gemma_kwargs=gemma_kwargs, + gemma_wrapper_kwargs={ + "loco": f"--input {os.path.join(working_dir, k_filename)}" + }) return jsonify(unique_id=queue_cmd( conn=redis.Redis(), email=(request.get_json() or {}).get('email'), job_queue=current_app.config.get("REDIS_JOB_QUEUE"), - cmd=compose_gemma_cmd( - gemma_wrapper_cmd=current_app.config.get("GEMMA_" - "WRAPPER_CMD"), - gemma_wrapper_kwargs={ - "loco": ("--input " - f"{os.path.join(working_dir, k_filename)}") - }, - gemma_kwargs=gemma_kwargs, - gemma_args=[ - "-gk", ">", - (f"{current_app.config.get('TMPDIR')}/" - f"{token}/{_output_filename}") - ])), + cmd=results.get("gemma_cmd")), status="queued", - output_file=_output_filename) + output_file=results.get("output_file")) # pylint: disable=W0703 except Exception: return jsonify( diff --git a/tests/integration/test_gemma.py b/tests/integration/test_gemma.py index f4ac685..4573de1 100644 --- a/tests/integration/test_gemma.py +++ b/tests/integration/test_gemma.py @@ -187,21 +187,21 @@ class GemmaAPITest(unittest.TestCase): "-g /tmp/test-data/genofile.txt " "-p /tmp/test-data/phenofile.txt " "-a /tmp/test-data/snpfile.txt " - "-gk > /tmp/test-data/hash-k-output.json")) - self.assertEqual(response.get_json(), { - "output_file": "hash-k-output.json", - "status": "queued", - "unique_id": "my-unique-id" - }) + "-gk > /tmp/test-data/hash-output.json")) + self.assertEqual( + response.get_json(), { + "output_file": "hash-output.json", + "status": "queued", + "unique_id": "my-unique-id" + }) @mock.patch("gn3.api.gemma.queue_cmd") @mock.patch("gn3.computations.gemma.get_hash_of_files") @mock.patch("gn3.api.gemma.jsonfile_to_dict") @mock.patch("gn3.api.gemma.do_paths_exist") @mock.patch("gn3.api.gemma.redis.Redis") - def test_k_compute_loco(self, mock_redis, mock_path_exist, - mock_json, mock_hash, - mock_queue_cmd): + def test_k_compute_loco(self, mock_redis, mock_path_exist, mock_json, + mock_hash, mock_queue_cmd): """Test /gemma/k-compute/loco//""" mock_path_exist.return_value, _redis_conn = True, mock.MagicMock() mock_redis.return_value = _redis_conn @@ -223,21 +223,21 @@ class GemmaAPITest(unittest.TestCase): "-g /tmp/test-data/genofile.txt " "-p /tmp/test-data/phenofile.txt " "-a /tmp/test-data/snpfile.txt " - "-gk > /tmp/test-data/hash-3R77Mz-k-output.json")) - self.assertEqual(response.get_json(), { - "output_file": "hash-3R77Mz-k-output.json", - "status": "queued", - "unique_id": "my-unique-id" - }) + "-gk > /tmp/test-data/hash-3R77Mz-output.json")) + self.assertEqual( + response.get_json(), { + "output_file": "hash-3R77Mz-output.json", + "status": "queued", + "unique_id": "my-unique-id" + }) @mock.patch("gn3.api.gemma.queue_cmd") - @mock.patch("gn3.api.gemma.get_hash_of_files") + @mock.patch("gn3.computations.gemma.get_hash_of_files") @mock.patch("gn3.api.gemma.jsonfile_to_dict") @mock.patch("gn3.api.gemma.do_paths_exist") @mock.patch("gn3.api.gemma.redis.Redis") - def test_gwa_compute(self, mock_redis, mock_path_exist, - mock_json, mock_hash, - mock_queue_cmd): + def test_gwa_compute(self, mock_redis, mock_path_exist, mock_json, + mock_hash, mock_queue_cmd): """Test /gemma/gwa-compute//""" mock_path_exist.return_value, _redis_conn = True, mock.MagicMock() mock_redis.return_value = _redis_conn @@ -250,9 +250,10 @@ class GemmaAPITest(unittest.TestCase): mock_hash.return_value = "hash" response = self.app.post(("/api/gemma/gwa-compute/hash-k-output.json/" "my-token")) - mock_hash.assert_called_once_with(['/tmp/my-token/genofile.txt', - '/tmp/my-token/phenofile.txt', - '/tmp/my-token/snpfile.txt']) + mock_hash.assert_called_once_with([ + '/tmp/my-token/genofile.txt', '/tmp/my-token/phenofile.txt', + '/tmp/my-token/snpfile.txt' + ]) mock_queue_cmd.assert_called_once_with( conn=_redis_conn, email=None, @@ -262,22 +263,21 @@ class GemmaAPITest(unittest.TestCase): " -- -g /tmp/my-token/genofile.txt " "-p /tmp/my-token/phenofile.txt " "-a /tmp/my-token/snpfile.txt " - "-lmm 9 gk > /tmp/my-token/hash-gwa-output.json")) - self.assertEqual(response.get_json(), { - "unique_id": "my-unique-id", - "status": "queued", - "output_file": "hash-gwa-output.json" - }) + "-lmm 9 -gk > /tmp/my-token/hash-output.json")) + self.assertEqual( + response.get_json(), { + "unique_id": "my-unique-id", + "status": "queued", + "output_file": "hash-output.json" + }) @mock.patch("gn3.api.gemma.queue_cmd") - @mock.patch("gn3.api.gemma.get_hash_of_files") + @mock.patch("gn3.computations.gemma.get_hash_of_files") @mock.patch("gn3.api.gemma.jsonfile_to_dict") @mock.patch("gn3.api.gemma.do_paths_exist") @mock.patch("gn3.api.gemma.redis.Redis") - def test_gwa_compute_with_covars(self, mock_redis, - mock_path_exist, - mock_json, mock_hash, - mock_queue_cmd): + def test_gwa_compute_with_covars(self, mock_redis, mock_path_exist, + mock_json, mock_hash, mock_queue_cmd): """Test /gemma/gwa-compute/covars//""" mock_path_exist.return_value, _redis_conn = True, mock.MagicMock() mock_redis.return_value = _redis_conn @@ -293,10 +293,10 @@ class GemmaAPITest(unittest.TestCase): response = self.app.post(("/api/gemma/gwa-compute/" "covars/hash-k-output.json/" "my-token")) - mock_hash.assert_called_once_with(['/tmp/my-token/genofile.txt', - '/tmp/my-token/phenofile.txt', - '/tmp/my-token/snpfile.txt', - '/tmp/my-token/covarfile.txt']) + mock_hash.assert_called_once_with([ + '/tmp/my-token/genofile.txt', '/tmp/my-token/phenofile.txt', + '/tmp/my-token/snpfile.txt', '/tmp/my-token/covarfile.txt' + ]) mock_queue_cmd.assert_called_once_with( conn=_redis_conn, email=None, @@ -307,22 +307,21 @@ class GemmaAPITest(unittest.TestCase): "-p /tmp/my-token/phenofile.txt " "-a /tmp/my-token/snpfile.txt " "-c /tmp/my-token/covarfile.txt -lmm 9 " - "-gk > /tmp/my-token/hash-gwa-output.json")) - self.assertEqual(response.get_json(), { - "unique_id": "my-unique-id", - "status": "queued", - "output_file": "hash-gwa-output.json" - }) + "-gk > /tmp/my-token/hash-output.json")) + self.assertEqual( + response.get_json(), { + "unique_id": "my-unique-id", + "status": "queued", + "output_file": "hash-output.json" + }) @mock.patch("gn3.api.gemma.queue_cmd") - @mock.patch("gn3.api.gemma.get_hash_of_files") + @mock.patch("gn3.computations.gemma.get_hash_of_files") @mock.patch("gn3.api.gemma.jsonfile_to_dict") @mock.patch("gn3.api.gemma.do_paths_exist") @mock.patch("gn3.api.gemma.redis.Redis") - def test_gwa_compute_with_loco_only(self, mock_redis, - mock_path_exist, - mock_json, mock_hash, - mock_queue_cmd): + def test_gwa_compute_with_loco_only(self, mock_redis, mock_path_exist, + mock_json, mock_hash, mock_queue_cmd): """Test /gemma/gwa-compute//loco/maf// """ @@ -336,35 +335,36 @@ class GemmaAPITest(unittest.TestCase): } mock_hash.return_value = "hash" response = self.app.post(("/api/gemma/gwa-compute/" - "hash-k-output.json/loco/" + "hash-output.json/loco/" "maf/21/my-token")) - mock_hash.assert_called_once_with(['/tmp/my-token/genofile.txt', - '/tmp/my-token/phenofile.txt', - '/tmp/my-token/snpfile.txt']) + mock_hash.assert_called_once_with([ + '/tmp/my-token/genofile.txt', '/tmp/my-token/phenofile.txt', + '/tmp/my-token/snpfile.txt' + ]) mock_queue_cmd.assert_called_once_with( conn=_redis_conn, email=None, job_queue='GN3::job-queue', cmd=("gemma-wrapper --json --loco --input " - "/tmp/my-token/hash-k-output.json -- " + "/tmp/my-token/hash-output.json -- " "-g /tmp/my-token/genofile.txt " "-p /tmp/my-token/phenofile.txt " "-a /tmp/my-token/snpfile.txt " "-lmm 9 -maf 21.0 " - "-gk > /tmp/my-token/hash-gwa-output.json")) - self.assertEqual(response.get_json(), { - "unique_id": "my-unique-id", - "status": "queued", - "output_file": "hash-gwa-output.json" - }) + "-gk > /tmp/my-token/hash-output.json")) + self.assertEqual( + response.get_json(), { + "unique_id": "my-unique-id", + "status": "queued", + "output_file": "hash-output.json" + }) @mock.patch("gn3.api.gemma.queue_cmd") - @mock.patch("gn3.api.gemma.get_hash_of_files") + @mock.patch("gn3.computations.gemma.get_hash_of_files") @mock.patch("gn3.api.gemma.jsonfile_to_dict") @mock.patch("gn3.api.gemma.do_paths_exist") @mock.patch("gn3.api.gemma.redis.Redis") - def test_gwa_compute_with_loco_covars(self, mock_redis, - mock_path_exist, + def test_gwa_compute_with_loco_covars(self, mock_redis, mock_path_exist, mock_json, mock_hash, mock_queue_cmd): """Test /gemma/gwa-compute//loco/covariates/maf/ @@ -381,27 +381,27 @@ class GemmaAPITest(unittest.TestCase): } mock_hash.return_value = "hash" response = self.app.post(("/api/gemma/gwa-compute/" - "hash-k-output.json/loco/" + "hash-output.json/loco/" "covariates/maf/21/my-token")) - mock_hash.assert_called_once_with(['/tmp/my-token/genofile.txt', - '/tmp/my-token/phenofile.txt', - '/tmp/my-token/snpfile.txt', - "/tmp/my-token/covarfile.txt" - ]) + mock_hash.assert_called_once_with([ + '/tmp/my-token/genofile.txt', '/tmp/my-token/phenofile.txt', + '/tmp/my-token/snpfile.txt', "/tmp/my-token/covarfile.txt" + ]) mock_queue_cmd.assert_called_once_with( conn=_redis_conn, email=None, job_queue='GN3::job-queue', cmd=("gemma-wrapper --json --loco --input " - "/tmp/my-token/hash-k-output.json -- " + "/tmp/my-token/hash-output.json -- " "-g /tmp/my-token/genofile.txt " "-p /tmp/my-token/phenofile.txt " "-a /tmp/my-token/snpfile.txt " "-c /tmp/my-token/covarfile.txt " "-lmm 9 -maf 21.0 " - "-gk > /tmp/my-token/hash-gwa-output.json")) - self.assertEqual(response.get_json(), { - "unique_id": "my-unique-id", - "status": "queued", - "output_file": "hash-gwa-output.json" - }) + "-gk > /tmp/my-token/hash-output.json")) + self.assertEqual( + response.get_json(), { + "unique_id": "my-unique-id", + "status": "queued", + "output_file": "hash-output.json" + }) diff --git a/tests/unit/computations/test_gemma.py b/tests/unit/computations/test_gemma.py index d3fb7aa..4a76bef 100644 --- a/tests/unit/computations/test_gemma.py +++ b/tests/unit/computations/test_gemma.py @@ -2,7 +2,7 @@ import unittest from unittest import mock -from gn3.computations.gemma import compute_k_values +from gn3.computations.gemma import generate_gemma_cmd from gn3.computations.gemma import generate_hash_of_string from gn3.computations.gemma import generate_pheno_txt_file from gn3.computations.gemma import generate_gemma_computation_cmd @@ -57,42 +57,42 @@ class TestGemma(unittest.TestCase): """Test computing k valuse without loco""" mock_get_hash.return_value = "my-hash" self.assertEqual( - compute_k_values(gemma_cmd="gemma-wrapper", - output_dir="/tmp", - token="my-token", - gemma_kwargs={ - "g": "genofile", - "p": "phenofile", - "a": "snpsfile" - }), { - "output_file": - "my-hash-k-output.json", - "gemma_cmd": - ("gemma-wrapper --json -- -g genofile " - "-p phenofile -a snpsfile " - "-gk > /tmp/my-token/my-hash-k-output.json") - }) + generate_gemma_cmd(gemma_cmd="gemma-wrapper", + output_dir="/tmp", + token="my-token", + gemma_kwargs={ + "g": "genofile", + "p": "phenofile", + "a": "snpsfile" + }), { + "output_file": + "my-hash-output.json", + "gemma_cmd": + ("gemma-wrapper --json -- -g genofile " + "-p phenofile -a snpsfile " + "-gk > /tmp/my-token/my-hash-output.json") + }) @mock.patch("gn3.computations.gemma.get_hash_of_files") - def test_compute_k_values_with_loco(self, mock_get_hash): + def test_generate_gemma_cmd_with_loco(self, mock_get_hash): """Test computing k valuse with loco""" mock_get_hash.return_value = "my-hash" self.assertEqual( - compute_k_values(gemma_cmd="gemma-wrapper", - output_dir="/tmp", - token="my-token", - chromosomes="1,2,3,4,5", - gemma_kwargs={ - "g": "genofile", - "p": "phenofile", - "a": "snpsfile" - }), { - "output_file": - "my-hash-r+gF5a-k-output.json", - "gemma_cmd": ("gemma-wrapper --json " - "--loco --input 1,2,3,4,5 " - "-- -g genofile " - "-p phenofile -a snpsfile " - "-gk > /tmp/my-token/" - "my-hash-r+gF5a-k-output.json") - }) + generate_gemma_cmd(gemma_cmd="gemma-wrapper", + output_dir="/tmp", + token="my-token", + chromosomes="1,2,3,4,5", + gemma_kwargs={ + "g": "genofile", + "p": "phenofile", + "a": "snpsfile" + }), { + "output_file": + "my-hash-r+gF5a-output.json", + "gemma_cmd": ("gemma-wrapper --json " + "--loco --input 1,2,3,4,5 " + "-- -g genofile " + "-p phenofile -a snpsfile " + "-gk > /tmp/my-token/" + "my-hash-r+gF5a-output.json") + }) -- cgit v1.2.3