From 53f27b547e7220d46bdc2e92debb38a8739e511c Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 11 May 2021 16:47:38 +0300 Subject: computations: correlations: Apply pep-8 --- gn3/computations/correlations.py | 142 +++++++++++++++------------------------ 1 file changed, 54 insertions(+), 88 deletions(-) diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index 0d15d9b..cd7d604 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -9,12 +9,17 @@ from typing import Callable import scipy.stats -def map_shared_keys_to_values(target_sample_keys: List, target_sample_vals: dict)-> List: - """Function to construct target dataset data items given commoned shared\ - keys and trait samplelist values for example given keys >>>>>>>>>>\ - ["BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9"] and value object as\ - "HCMA:_AT": [4.1, 5.6, 3.2, 1.1, 4.4, 2.2],TXD_AT": [6.2, 5.7, 3.6, 1.5, 4.2, 2.3]}\ - return results should be a list of dicts mapping the shared keys to the trait values""" +def map_shared_keys_to_values(target_sample_keys: List, + target_sample_vals: dict) -> List: + """Function to construct target dataset data items given common shared keys + and trait sample-list values for example given keys + + >>>>>>>>>> ["BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9"] and value + object as "HCMA:_AT": [4.1, 5.6, 3.2, 1.1, 4.4, 2.2],TXD_AT": [6.2, 5.7, + 3.6, 1.5, 4.2, 2.3]} return results should be a list of dicts mapping the + shared keys to the trait values + + """ target_dataset_data = [] for trait_id, sample_values in target_sample_vals.items(): @@ -32,9 +37,9 @@ def map_shared_keys_to_values(target_sample_keys: List, target_sample_vals: dict def normalize_values(a_values: List, b_values: List) -> Tuple[List[float], List[float], int]: - """Trim two lists of values to contain only the values they both share - Given two lists of sample values, trim each list so that it contains only - the samples that contain a value in both lists. Also returns the number of + """Trim two lists of values to contain only the values they both share Given + two lists of sample values, trim each list so that it contains only the + samples that contain a value in both lists. Also returns the number of such samples. >>> normalize_values([2.3, None, None, 3.2, 4.1, 5], @@ -62,16 +67,14 @@ pearson,spearman and biweight mid correlation return value is rho and p_value "pearson": scipy.stats.pearsonr, "spearman": scipy.stats.spearmanr } - use_corr_method = corr_mapping.get(corr_method, "spearman") - corr_coeffient, p_val = use_corr_method(primary_values, target_values) - return (corr_coeffient, p_val) def compute_sample_r_correlation(trait_name, corr_method, trait_vals, - target_samples_vals) -> Optional[Tuple[str, float, float, int]]: + target_samples_vals) -> Optional[ + Tuple[str, float, float, int]]: """Given a primary trait values and target trait values calculate the correlation coeff and p value @@ -90,7 +93,6 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals, # should use numpy.isNan scipy.isNan is deprecated if corr_coeffient is not None: return (trait_name, corr_coeffient, p_value, num_overlap) - return None @@ -99,15 +101,16 @@ def do_bicor(x_val, y_val) -> Tuple[float, float]: package :not packaged in guix """ - _corr_input = (x_val, y_val) - return (0.0, 0.0) + x_val, y_val = 0, 0 + return (x_val, y_val) def filter_shared_sample_keys(this_samplelist, target_samplelist) -> Tuple[List, List]: - """Given primary and target samplelist\ - for two base and target trait select\ - filter the values using the shared keys""" + """Given primary and target sample-list for two base and target trait select + filter the values using the shared keys + + """ this_vals = [] target_vals = [] for key, value in target_samplelist.items(): @@ -120,21 +123,18 @@ def filter_shared_sample_keys(this_samplelist, def compute_all_sample_correlation(this_trait, target_dataset, corr_method="pearson") -> List: - """Given a trait data samplelist and\ - target__datasets compute all sample correlation + """Given a trait data sample-list and target__datasets compute all sample + correlation + """ # xtodo fix trait_name currently returning single one # pylint: disable-msg=too-many-locals - this_trait_samples = this_trait["trait_sample_data"] corr_results = [] processed_values = [] for target_trait in target_dataset: trait_name = target_trait.get("trait_id") target_trait_data = target_trait["trait_sample_data"] - # this_vals, target_vals = filter_shared_sample_keys( - # this_trait_samples, target_trait_data) - processed_values.append((trait_name, corr_method, *filter_shared_sample_keys( this_trait_samples, target_trait_data))) with multiprocessing.Pool(4) as pool: @@ -144,7 +144,6 @@ def compute_all_sample_correlation(this_trait, if sample_correlation is not None: (trait_name, corr_coeffient, p_value, num_overlap) = sample_correlation - corr_result = { "corr_coeffient": corr_coeffient, "p_value": p_value, @@ -152,7 +151,6 @@ def compute_all_sample_correlation(this_trait, } corr_results.append({trait_name: corr_result}) - return sorted( corr_results, key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coeffient"])) @@ -160,42 +158,34 @@ def compute_all_sample_correlation(this_trait, def benchmark_compute_all_sample(this_trait, target_dataset, - corr_method="pearson") ->List: - """Temp function to benchmark with compute_all_sample_r\ - alternative to compute_all_sample_r where we use \ - multiprocessing - """ + corr_method="pearson") -> List: + """Temp function to benchmark with compute_all_sample_r alternative to + compute_all_sample_r where we use multiprocessing + """ this_trait_samples = this_trait["trait_sample_data"] - corr_results = [] - for target_trait in target_dataset: trait_name = target_trait.get("trait_id") target_trait_data = target_trait["trait_sample_data"] this_vals, target_vals = filter_shared_sample_keys( this_trait_samples, target_trait_data) - sample_correlation = compute_sample_r_correlation( trait_name=trait_name, corr_method=corr_method, trait_vals=this_vals, target_samples_vals=target_vals) - if sample_correlation is not None: - (trait_name, corr_coeffient, p_value, num_overlap) = sample_correlation - + (trait_name, corr_coeffient, + p_value, num_overlap) = sample_correlation else: continue - corr_result = { "corr_coeffient": corr_coeffient, "p_value": p_value, "num_overlap": num_overlap } - corr_results.append({trait_name: corr_result}) - return corr_results @@ -205,11 +195,8 @@ list depending on whether both dataset and target_dataset are both set to probet """ - corr_results = {"lit": 1} - if corr_type not in ("lit", "literature"): - corr_results["top_corr_results"] = top_corr_results # run lit_correlation for the given top_corr_results if corr_type == "tissue": @@ -255,8 +242,10 @@ def fetch_lit_correlation_data( input_mouse_gene_id: Optional[str], gene_id: str, mouse_gene_id: Optional[str] = None) -> Tuple[str, float]: - """Given input trait mouse gene id and mouse gene id fetch the lit\ - corr_data""" + """Given input trait mouse gene id and mouse gene id fetch the lit + corr_data + + """ if mouse_gene_id is not None and ";" not in mouse_gene_id: query = """ SELECT VALUE @@ -283,7 +272,6 @@ def fetch_lit_correlation_data( lit_results = (gene_id, lit_corr_results.val)\ if lit_corr_results else (gene_id, 0) return lit_results - return (gene_id, 0) @@ -295,11 +283,9 @@ def lit_correlation_for_trait_list( """given species,base trait gene id fetch the lit corr results from the db\ output is float for lit corr results """ fetched_lit_corr_results = [] - this_trait_mouse_gene_id = map_to_mouse_gene_id(conn=conn, species=species, gene_id=trait_gene_id) - for (trait_name, target_trait_gene_id) in target_trait_lists: corr_results = {} if target_trait_gene_id: @@ -307,29 +293,26 @@ def lit_correlation_for_trait_list( conn=conn, species=species, gene_id=target_trait_gene_id) - fetched_corr_data = fetch_lit_correlation_data( conn=conn, input_mouse_gene_id=this_trait_mouse_gene_id, gene_id=target_trait_gene_id, mouse_gene_id=target_mouse_gene_id) - dict_results = dict(zip(("gene_id", "lit_corr"), fetched_corr_data)) corr_results[trait_name] = dict_results fetched_lit_corr_results.append(corr_results) - return fetched_lit_corr_results def query_formatter(query_string: str, *query_values): - """Formatter query string given the unformatted query string\ - and the respectibe values.Assumes number of placeholders is - equal to the number of query values """ - # xtodo escape sql queries - results = query_string % (query_values) + """Formatter query string given the unformatted query string and the + respectibe values.Assumes number of placeholders is equal to the number of + query values - return results + """ + # xtodo escape sql queries + return query_string % (query_values) def map_to_mouse_gene_id(conn, species: Optional[str], @@ -342,26 +325,23 @@ def map_to_mouse_gene_id(conn, species: Optional[str], return None if species == "mouse": return gene_id - cursor = conn.cursor() query = """SELECT mouse FROM GeneIDXRef WHERE '%s' = '%s'""" - query_values = (species, gene_id) cursor.execute(query_formatter(query, *query_values)) results = cursor.fetchone() - mouse_gene_id = results.mouse if results is not None else None - return mouse_gene_id def compute_all_lit_correlation(conn, trait_lists: List, species: str, gene_id): - """Function that acts as an abstraction for - lit_correlation_for_trait_list""" + """Function that acts as an abstraction for lit_correlation_for_trait_list + + """ lit_results = lit_correlation_for_trait_list( conn=conn, @@ -378,47 +358,37 @@ def compute_all_lit_correlation(conn, trait_lists: List, def compute_all_tissue_correlation(primary_tissue_dict: dict, target_tissues_data: dict, corr_method: str): - """Function acts as an abstraction for tissue_correlation_for_trait_list\ - required input are target tissue object and primary tissue trait\ - target tissues data contains the trait_symbol_dict and symbol_tissue_vals + """Function acts as an abstraction for tissue_correlation_for_trait_list + required input are target tissue object and primary tissue trait target + tissues data contains the trait_symbol_dict and symbol_tissue_vals """ - tissues_results = [] - primary_tissue_vals = primary_tissue_dict["tissue_values"] traits_symbol_dict = target_tissues_data["trait_symbol_dict"] symbol_tissue_vals_dict = target_tissues_data["symbol_tissue_vals_dict"] - target_tissues_list = process_trait_symbol_dict( traits_symbol_dict, symbol_tissue_vals_dict) - for target_tissue_obj in target_tissues_list: trait_id = target_tissue_obj.get("trait_id") - target_tissue_vals = target_tissue_obj.get("tissue_values") - tissue_result = tissue_correlation_for_trait_list( primary_tissue_vals=primary_tissue_vals, target_tissues_values=target_tissue_vals, trait_id=trait_id, corr_method=corr_method) - tissue_result_dict = {trait_id: tissue_result} tissues_results.append(tissue_result_dict) - - sorted_tissues_results = sorted( + return sorted( tissues_results, key=lambda trait_name: -abs(list(trait_name.values())[0]["tissue_corr"])) - return sorted_tissues_results - def process_trait_symbol_dict(trait_symbol_dict, symbol_tissue_vals_dict) -> List: - """Method for processing trait symbol\ - dict given the symbol tissue values """ - traits_tissue_vals = [] + """Method for processing trait symbol dict given the symbol tissue values + """ + traits_tissue_vals = [] for (trait, symbol) in trait_symbol_dict.items(): if symbol is not None: target_symbol = symbol.lower() @@ -427,25 +397,21 @@ def process_trait_symbol_dict(trait_symbol_dict, symbol_tissue_vals_dict) -> Lis target_tissue_dict = {"trait_id": trait, "symbol": target_symbol, "tissue_values": trait_tissue_val} - traits_tissue_vals.append(target_tissue_dict) - return traits_tissue_vals def compute_tissue_correlation(primary_tissue_dict: dict, target_tissues_data: dict, corr_method: str): - """Experimental function that uses multiprocessing\ - for computing tissue correlation - """ + """Experimental function that uses multiprocessing for computing tissue + correlation + """ tissues_results = [] - primary_tissue_vals = primary_tissue_dict["tissue_values"] traits_symbol_dict = target_tissues_data["trait_symbol_dict"] symbol_tissue_vals_dict = target_tissues_data["symbol_tissue_vals_dict"] - target_tissues_list = process_trait_symbol_dict( traits_symbol_dict, symbol_tissue_vals_dict) processed_values = [] -- cgit v1.2.3 From 378d0fc7f4ff5df5e8e77617c37bcef2b26ddf02 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 11 May 2021 17:00:26 +0300 Subject: Rename file_utils to fs_helpers Generally avoid naming things with a "utils" prefix/ suffix since it encourages contributors to dump any new functions there; and over time, as the code grows, things get messy... --- gn3/api/gemma.py | 4 +- gn3/api/general.py | 2 +- gn3/computations/gemma.py | 2 +- gn3/file_utils.py | 98 ------------------------------------------- gn3/fs_helpers.py | 98 +++++++++++++++++++++++++++++++++++++++++++ tests/unit/test_file_utils.py | 20 ++++----- 6 files changed, 112 insertions(+), 112 deletions(-) delete mode 100644 gn3/file_utils.py create mode 100644 gn3/fs_helpers.py diff --git a/gn3/api/gemma.py b/gn3/api/gemma.py index 81e185d..6b0b20e 100644 --- a/gn3/api/gemma.py +++ b/gn3/api/gemma.py @@ -9,8 +9,8 @@ from flask import request from gn3.commands import queue_cmd from gn3.commands import run_cmd -from gn3.file_utils import cache_ipfs_file -from gn3.file_utils import jsonfile_to_dict +from gn3.fs_helpers import cache_ipfs_file +from gn3.fs_helpers import jsonfile_to_dict from gn3.computations.gemma import generate_gemma_cmd from gn3.computations.gemma import do_paths_exist diff --git a/gn3/api/general.py b/gn3/api/general.py index 38e6154..a9a8da2 100644 --- a/gn3/api/general.py +++ b/gn3/api/general.py @@ -5,7 +5,7 @@ from flask import current_app from flask import jsonify from flask import request -from gn3.file_utils import extract_uploaded_file +from gn3.fs_helpers import extract_uploaded_file general = Blueprint("general", __name__) diff --git a/gn3/computations/gemma.py b/gn3/computations/gemma.py index 5f9d5a3..0b22d3c 100644 --- a/gn3/computations/gemma.py +++ b/gn3/computations/gemma.py @@ -7,7 +7,7 @@ from typing import Dict from typing import List from typing import ValuesView from gn3.commands import compose_gemma_cmd -from gn3.file_utils import get_hash_of_files +from gn3.fs_helpers import get_hash_of_files def generate_hash_of_string(unhashed_str: str) -> str: diff --git a/gn3/file_utils.py b/gn3/file_utils.py deleted file mode 100644 index 73f6567..0000000 --- a/gn3/file_utils.py +++ /dev/null @@ -1,98 +0,0 @@ -"""Procedures that operate on files/ directories""" -import hashlib -import json -import os -import random -import string -import tarfile -import pathlib - -from functools import partial -from typing import Dict -from typing import List -from werkzeug.utils import secure_filename - -import ipfshttpclient - - -def get_hash_of_files(files: List[str]) -> str: - """Given a list of valid of FILES, return their hash as a string""" - md5hash = hashlib.md5() - for file_path in sorted(files): - if not os.path.exists(file_path): - raise FileNotFoundError - with open(file_path, "rb") as file_: - for buf in iter(partial(file_.read, 4096), b''): - md5hash.update(bytearray( - hashlib.md5(buf).hexdigest(), "utf-8")) - return md5hash.hexdigest() - - -def get_dir_hash(directory: str) -> str: - """Return the hash of a DIRECTORY""" - if not os.path.exists(directory): - raise FileNotFoundError - all_files = [ - os.path.join(root, names) for root, _, files in os.walk(directory) - for names in sorted(files) - ] - return get_hash_of_files(all_files) - - -def jsonfile_to_dict(json_file: str) -> Dict: - """Give a JSON_FILE, return a python dict""" - with open(json_file) as _file: - data = json.load(_file) - return data - raise FileNotFoundError - - -def generate_random_n_string(n_length: int) -> str: - """Generate a random string that is N chars long""" - return ''.join( - random.choice(string.ascii_uppercase + string.digits) - for _ in range(n_length)) - - -def extract_uploaded_file(gzipped_file, - target_dir: str, - token: str = "") -> Dict: - """Get the (directory) hash of extracted contents of GZIPPED_FILE; and move -contents to TARGET_DIR/. - - """ - if not token: - token = (f"{generate_random_n_string(6)}-" - f"{generate_random_n_string(6)}") - tar_target_loc = os.path.join(target_dir, token, - secure_filename(gzipped_file.filename)) - try: - if not os.path.exists(os.path.join(target_dir, token)): - os.mkdir(os.path.join(target_dir, token)) - gzipped_file.save(tar_target_loc) - # Extract to "tar_target_loc/token" - tar = tarfile.open(tar_target_loc) - tar.extractall(path=os.path.join(target_dir, token)) - tar.close() - # pylint: disable=W0703 - except Exception: - return {"status": 128, "error": "gzip failed to unpack file"} - return {"status": 0, "token": token} - - -def cache_ipfs_file(ipfs_file: str, - cache_dir: str, - ipfs_addr: str = "/ip4/127.0.0.1/tcp/5001") -> str: - """Check if a file exists in cache; if it doesn't, cache it. Return the - cached file location - - """ - file_loc = os.path.join(cache_dir, ipfs_file.split("ipfs/")[-1]) - if not os.path.exists(file_loc): - client = ipfshttpclient.connect(ipfs_addr) - client.get(ipfs_file, - target=str( - pathlib.Path - (os.path.join(cache_dir, - ipfs_file.split("ipfs/")[-1])).parent)) - return file_loc diff --git a/gn3/fs_helpers.py b/gn3/fs_helpers.py new file mode 100644 index 0000000..73f6567 --- /dev/null +++ b/gn3/fs_helpers.py @@ -0,0 +1,98 @@ +"""Procedures that operate on files/ directories""" +import hashlib +import json +import os +import random +import string +import tarfile +import pathlib + +from functools import partial +from typing import Dict +from typing import List +from werkzeug.utils import secure_filename + +import ipfshttpclient + + +def get_hash_of_files(files: List[str]) -> str: + """Given a list of valid of FILES, return their hash as a string""" + md5hash = hashlib.md5() + for file_path in sorted(files): + if not os.path.exists(file_path): + raise FileNotFoundError + with open(file_path, "rb") as file_: + for buf in iter(partial(file_.read, 4096), b''): + md5hash.update(bytearray( + hashlib.md5(buf).hexdigest(), "utf-8")) + return md5hash.hexdigest() + + +def get_dir_hash(directory: str) -> str: + """Return the hash of a DIRECTORY""" + if not os.path.exists(directory): + raise FileNotFoundError + all_files = [ + os.path.join(root, names) for root, _, files in os.walk(directory) + for names in sorted(files) + ] + return get_hash_of_files(all_files) + + +def jsonfile_to_dict(json_file: str) -> Dict: + """Give a JSON_FILE, return a python dict""" + with open(json_file) as _file: + data = json.load(_file) + return data + raise FileNotFoundError + + +def generate_random_n_string(n_length: int) -> str: + """Generate a random string that is N chars long""" + return ''.join( + random.choice(string.ascii_uppercase + string.digits) + for _ in range(n_length)) + + +def extract_uploaded_file(gzipped_file, + target_dir: str, + token: str = "") -> Dict: + """Get the (directory) hash of extracted contents of GZIPPED_FILE; and move +contents to TARGET_DIR/. + + """ + if not token: + token = (f"{generate_random_n_string(6)}-" + f"{generate_random_n_string(6)}") + tar_target_loc = os.path.join(target_dir, token, + secure_filename(gzipped_file.filename)) + try: + if not os.path.exists(os.path.join(target_dir, token)): + os.mkdir(os.path.join(target_dir, token)) + gzipped_file.save(tar_target_loc) + # Extract to "tar_target_loc/token" + tar = tarfile.open(tar_target_loc) + tar.extractall(path=os.path.join(target_dir, token)) + tar.close() + # pylint: disable=W0703 + except Exception: + return {"status": 128, "error": "gzip failed to unpack file"} + return {"status": 0, "token": token} + + +def cache_ipfs_file(ipfs_file: str, + cache_dir: str, + ipfs_addr: str = "/ip4/127.0.0.1/tcp/5001") -> str: + """Check if a file exists in cache; if it doesn't, cache it. Return the + cached file location + + """ + file_loc = os.path.join(cache_dir, ipfs_file.split("ipfs/")[-1]) + if not os.path.exists(file_loc): + client = ipfshttpclient.connect(ipfs_addr) + client.get(ipfs_file, + target=str( + pathlib.Path + (os.path.join(cache_dir, + ipfs_file.split("ipfs/")[-1])).parent)) + return file_loc diff --git a/tests/unit/test_file_utils.py b/tests/unit/test_file_utils.py index cc842d5..75be4f6 100644 --- a/tests/unit/test_file_utils.py +++ b/tests/unit/test_file_utils.py @@ -1,14 +1,14 @@ -"""Test cases for procedures defined in file_utils.py""" +"""Test cases for procedures defined in fs_helpers.py""" import os import unittest from dataclasses import dataclass from typing import Callable from unittest import mock -from gn3.file_utils import extract_uploaded_file -from gn3.file_utils import get_dir_hash -from gn3.file_utils import jsonfile_to_dict -from gn3.file_utils import cache_ipfs_file +from gn3.fs_helpers import extract_uploaded_file +from gn3.fs_helpers import get_dir_hash +from gn3.fs_helpers import jsonfile_to_dict +from gn3.fs_helpers import cache_ipfs_file @dataclass @@ -19,7 +19,7 @@ class MockFile: class TestFileUtils(unittest.TestCase): - """Test cases for procedures defined in file_utils.py""" + """Test cases for procedures defined in fs_helpers.py""" def test_get_dir_hash(self): """Test that a directory is hashed correctly""" @@ -45,8 +45,8 @@ non-existent""" self.assertRaises(FileNotFoundError, jsonfile_to_dict, "/non-existent-dir") - @mock.patch("gn3.file_utils.tarfile") - @mock.patch("gn3.file_utils.secure_filename") + @mock.patch("gn3.fs_helpers.tarfile") + @mock.patch("gn3.fs_helpers.secure_filename") def test_extract_uploaded_file(self, mock_file, mock_tarfile): """Test that the gzip file is extracted to the right location""" mock_file.return_value = "upload-data.tar.gz" @@ -65,7 +65,7 @@ non-existent""" mock_file.assert_called_once_with("upload-data.tar.gz") self.assertEqual(result, {"status": 0, "token": "abcdef-abcdef"}) - @mock.patch("gn3.file_utils.secure_filename") + @mock.patch("gn3.fs_helpers.secure_filename") def test_extract_uploaded_file_non_existent_gzip(self, mock_file): """Test that the right error message is returned when there is a problem extracting the file""" @@ -96,7 +96,7 @@ extracting the file""" os.rmdir(test_dir) self.assertEqual(file_loc, f"{test_dir}/genotype.txt") - @mock.patch("gn3.file_utils.ipfshttpclient") + @mock.patch("gn3.fs_helpers.ipfshttpclient") def test_cache_ipfs_file_cache_miss(self, mock_ipfs): """Test that a file is cached if there's a cache miss""" -- cgit v1.2.3 From 7ab86429219a2ed5ad7b94f0ce9667dd4c38c56d Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 13 May 2021 10:58:19 +0300 Subject: Add end-point for running an rQTL program * gn3/api/general.py (run_r_qtl): New function. * gn3/settings.py: New variable. --- gn3/api/general.py | 14 ++++++++++++++ gn3/settings.py | 1 + 2 files changed, 15 insertions(+) diff --git a/gn3/api/general.py b/gn3/api/general.py index a9a8da2..cebb2e3 100644 --- a/gn3/api/general.py +++ b/gn3/api/general.py @@ -6,6 +6,7 @@ from flask import jsonify from flask import request from gn3.fs_helpers import extract_uploaded_file +from gn3.commands import run_cmd general = Blueprint("general", __name__) @@ -50,3 +51,16 @@ TTL is set in the metadata file. If none is provided, the default is 1 week. if results.get("status") > 0: status = 500 return jsonify(results), status + + +@general.route("/qtl/run//", + methods=["POST"], + strict_slashes=False) +def run_r_qtl(geno_filestr, pheno_filestr): + """Run r_qtl command using the written rqtl_wrapper program + + """ + rqtl_wrapper = current_app.config["RQTL_WRAPPER"] + cmd = (f"Rscript {rqtl_wrapper} " + f"{geno_filestr} {pheno_filestr}") + return jsonify(run_cmd(cmd)), 201 diff --git a/gn3/settings.py b/gn3/settings.py index 7b3ffb7..2057ce1 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -10,6 +10,7 @@ CACHEDIR = "" REDIS_URI = "redis://localhost:6379/0" REDIS_JOB_QUEUE = "GN3::job-queue" TMPDIR = os.environ.get("TMPDIR", tempfile.gettempdir()) +RQTL_WRAPPER = "rqtl_wrapper.R" # SQL confs SQL_URI = os.environ.get("SQL_URI", "mysql://webqtlout:webqtlout@localhost/db_webqtl") -- cgit v1.2.3 From 09f699253400a807e2390e6515b204a1b9f4c3a9 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 13 May 2021 11:16:32 +0300 Subject: tests: test_general: Add test case for run_r_qtl endpoint --- tests/integration/test_general.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/integration/test_general.py b/tests/integration/test_general.py index 99c4824..8fc2b43 100644 --- a/tests/integration/test_general.py +++ b/tests/integration/test_general.py @@ -46,3 +46,14 @@ class GeneralAPITest(unittest.TestCase): self.assertEqual(response.get_json(), {"status": 128, "error": "gzip failed to unpack file"}) + + @mock.patch("gn3.api.general.run_cmd") + def test_run_r_qtl(self, mock_run_cmd): + """Test correct upload of file""" + mock_run_cmd.return_value = "Random results from STDOUT" + response = self.app.post("/api/qtl/run/" + "geno_file_test/" + "pheno_file_test") + self.assertEqual(response.status_code, 201) + self.assertEqual(response.get_json(), + "Random results from STDOUT") -- cgit v1.2.3 From 46a96ec0b89620eed4874ada565a9643ac19a042 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 14 May 2021 14:09:48 -0500 Subject: README --- README.md | 49 +++++++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index b18fdf1..c1acba1 100644 --- a/README.md +++ b/README.md @@ -3,34 +3,27 @@ GeneNetwork3 REST API for data science and machine learning ## Installation -##### Using python-pip +#### Using guix -1. Prepare your system. You need to make you have python > 3.8, and - the ability to install modules. -2. Create and enter your virtualenv: +Simply load up the environment (for development purposes): ```bash -virtualenv --python python3 venv -. venv/bin/activate +guix environment --load=guix.scm ``` -3. Install the required packages + +Also, make sure you have the *guix-bioinformatics* channel set up. ```bash -# The --ignore-installed flag forces packages to -# get installed in the venv even if they existed -# in the global env -pip install -r requirements.txt --ignore-installed +env GUIX_PACKAGE_PATH=~/guix-bioinformatics/ ~/.config/guix/current/bin/guix environment --load=guix.scm +python3 + import redis ``` -#### Using guix +Better run a proper container -Simply load up the environment (for development purposes): - -```bash -guix environment --load=guix.scm ``` - -Also, make sure you have the *guix-bioinformatics* channel set up. +env GUIX_PACKAGE_PATH=~/guix-bioinformatics/ ~/.config/guix/current/bin/guix environment -C --network --load=guix.scm +``` #### Running Tests @@ -62,6 +55,26 @@ To spin up the server: env FLASK_DEBUG=1 FLASK_APP="main.py" flask run --port=8080 ``` +##### Using python-pip + +IMPORTANT NOTE: we do not recommend using pip tools, use Guix instead + +1. Prepare your system. You need to make you have python > 3.8, and + the ability to install modules. +2. Create and enter your virtualenv: + +```bash +virtualenv --python python3 venv +. venv/bin/activate +``` +3. Install the required packages + +```bash +# The --ignore-installed flag forces packages to +# get installed in the venv even if they existed +# in the global env +pip install -r requirements.txt --ignore-installed +``` #### A note on dependencies -- cgit v1.2.3