From 61c13a09dba95958f183dc55f3d59c8856b5f753 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 13 Feb 2019 12:41:45 +0000 Subject: Removed pylmm references and related functions --- bin/genenetwork2 | 1 - etc/default_settings.py | 1 - test/requests/mapping_tests.py | 10 -- wqflask/base/data_set.py | 2 +- wqflask/utility/tools.py | 4 - wqflask/wqflask/heatmap/heatmap.py | 5 +- wqflask/wqflask/marker_regression/run_mapping.py | 204 +---------------------- 7 files changed, 4 insertions(+), 223 deletions(-) diff --git a/bin/genenetwork2 b/bin/genenetwork2 index 21f0db13..7c875274 100755 --- a/bin/genenetwork2 +++ b/bin/genenetwork2 @@ -131,7 +131,6 @@ else export LC_ALL=C # FIXME export GUIX_GENENETWORK_FILES="$GN2_PROFILE/share/genenetwork2" export PLINK_COMMAND="$GN2_PROFILE/bin/plink2" - export PYLMM_COMMAND="$GN2_PROFILE/bin/pylmm_redis" export GEMMA_COMMAND="$GN2_PROFILE/bin/gemma" if [ -z $GEMMA_WRAPPER_COMMAND ]; then export GEMMA_WRAPPER_COMMAND="$GN2_PROFILE/bin/gemma-wrapper" diff --git a/etc/default_settings.py b/etc/default_settings.py index 3e54ad1f..a1fe81e5 100644 --- a/etc/default_settings.py +++ b/etc/default_settings.py @@ -89,7 +89,6 @@ GENENETWORK_FILES = HOME+"/genotype_files" # base dir for all static data fil JS_GN_PATH = os.environ['HOME']+"/genenetwork/javascript" # ---- GN2 Executables (overwrite for testing only) -# PYLMM_COMMAND = str.strip(os.popen("which pylmm_redis").read()) # PLINK_COMMAND = str.strip(os.popen("which plink2").read()) # GEMMA_COMMAND = str.strip(os.popen("which gemma").read()) # GEMMA_WRAPPER_COMMAND = str.strip(os.popen("which gemma-wrapper").read()) diff --git a/test/requests/mapping_tests.py b/test/requests/mapping_tests.py index 8eb19de7..6de81bfe 100644 --- a/test/requests/mapping_tests.py +++ b/test/requests/mapping_tests.py @@ -11,16 +11,6 @@ def load_data_from_file(): file_data = json.loads(file_handle.read().encode("utf-8")) return file_data -def check_pylmm_tool_selection(host, data): - print("") - print("pylmm mapping tool selection") - data["method"] = "pylmm" - page = requests.post(host+"/marker_regression", data=data) - doc = fromstring(page.text) - form = doc.forms[1] - assert form.fields["dataset"] == "HC_M2_0606_P" - assert form.fields["value:BXD1"] == "15.034" # Check value in the file - def check_R_qtl_tool_selection(host, data): print("") print("R/qtl mapping tool selection") diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 79f72390..ca6621e9 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -317,7 +317,7 @@ class DatasetGroup(object): mapping_id = g.db.execute("select MappingMethodId from InbredSet where Name= '%s'" % self.name).fetchone()[0] if mapping_id == "1": - mapping_names = ["QTLReaper", "PYLMM", "R/qtl"] + mapping_names = ["QTLReaper", "R/qtl"] elif mapping_id == "2": mapping_names = ["GEMMA"] elif mapping_id == "4": diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py index ea216a35..86ef2e1e 100644 --- a/wqflask/utility/tools.py +++ b/wqflask/utility/tools.py @@ -107,9 +107,6 @@ def js_path(module=None): return try_guix raise "No JS path found for "+module+" (if not in Guix check JS_GN_PATH)" -def pylmm_command(guess=None): - return assert_bin(get_setting("PYLMM_COMMAND",guess)) - def gemma_command(guess=None): return assert_bin(get_setting("GEMMA_COMMAND",guess)) @@ -276,7 +273,6 @@ SMTP_CONNECT = get_setting('SMTP_CONNECT') SMTP_USERNAME = get_setting('SMTP_USERNAME') SMTP_PASSWORD = get_setting('SMTP_PASSWORD') -PYLMM_COMMAND = app_set("PYLMM_COMMAND",pylmm_command()) GEMMA_COMMAND = app_set("GEMMA_COMMAND",gemma_command()) assert(GEMMA_COMMAND is not None) PLINK_COMMAND = app_set("PLINK_COMMAND",plink_command()) diff --git a/wqflask/wqflask/heatmap/heatmap.py b/wqflask/wqflask/heatmap/heatmap.py index ff589693..1bdf252b 100644 --- a/wqflask/wqflask/heatmap/heatmap.py +++ b/wqflask/wqflask/heatmap/heatmap.py @@ -24,12 +24,9 @@ import reaper from base.trait import GeneralTrait from base import data_set from base import species -# from wqflask.my_pylmm.pyLMM import lmm -# from wqflask.my_pylmm.pyLMM import input from utility import helper_functions from utility import Plot, Bunch from utility import temp_data -from utility.tools import PYLMM_COMMAND from MySQLdb import escape_string as escape @@ -144,4 +141,4 @@ class Heatmap(object): if qtl.additive > 0: self.trait_results[this_trait.name].append(-float(qtl.lrs)) else: - self.trait_results[this_trait.name].append(float(qtl.lrs)) \ No newline at end of file + self.trait_results[this_trait.name].append(float(qtl.lrs)) diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py index 3057e340..73d985b8 100644 --- a/wqflask/wqflask/marker_regression/run_mapping.py +++ b/wqflask/wqflask/marker_regression/run_mapping.py @@ -38,7 +38,7 @@ from utility import temp_data from utility.benchmark import Bench from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping -from utility.tools import locate, locate_ignore_error, PYLMM_COMMAND, GEMMA_COMMAND, PLINK_COMMAND, TEMPDIR +from utility.tools import locate, locate_ignore_error, GEMMA_COMMAND, PLINK_COMMAND, TEMPDIR from utility.external import shell from base.webqtlConfig import TMPDIR, GENERATED_TEXT_DIR @@ -239,11 +239,6 @@ class RunMapping(object): self.manhattan_plot = True results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) #results = self.run_plink() - elif self.mapping_method == "pylmm": - logger.debug("RUNNING PYLMM") - if self.num_perm > 0: - self.run_permutations(str(temp_uuid)) - results = self.gen_data(str(temp_uuid)) else: logger.debug("RUNNING NOTHING") @@ -354,201 +349,6 @@ class RunMapping(object): count, p_values = self.parse_rqtl_output(plink_output_filename) - def run_permutations(self, temp_uuid): - """Runs permutations and gets significant and suggestive LOD scores""" - - top_lod_scores = [] - - #logger.debug("self.num_perm:", self.num_perm) - - for permutation in range(self.num_perm): - - pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals]) - np.random.shuffle(pheno_vector) - - key = "pylmm:input:" + temp_uuid - - if self.dataset.group.species == "human": - p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid) - else: - genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] - - no_val_samples = self.identify_empty_samples() - trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) - - genotype_matrix = np.array(trimmed_genotype_data).T - - params = dict(pheno_vector = pheno_vector.tolist(), - genotype_matrix = genotype_matrix.tolist(), - restricted_max_likelihood = True, - refit = False, - temp_uuid = temp_uuid, - - # meta data - timestamp = datetime.datetime.now().isoformat(), - ) - - json_params = json.dumps(params) - Redis.set(key, json_params) - Redis.expire(key, 60*60) - - command = PYLMM_COMMAND+' --key {} --species {}'.format(key,"other") - shell(command) - - json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60) - results = json.loads(json_results[1]) - p_values = [float(result) for result in results['p_values']] - - lowest_p_value = 1 - for p_value in p_values: - if p_value < lowest_p_value: - lowest_p_value = p_value - - #logger.debug("lowest_p_value:", lowest_p_value) - top_lod_scores.append(-math.log10(lowest_p_value)) - - #logger.debug("top_lod_scores:", top_lod_scores) - - self.suggestive = np.percentile(top_lod_scores, 67) - self.significant = np.percentile(top_lod_scores, 95) - - def gen_data(self, temp_uuid): - """Generates p-values for each marker""" - - logger.debug("self.vals is:", self.vals) - pheno_vector = np.array([(val == "x" or val == "") and np.nan or float(val) for val in self.vals]) - - #lmm_uuid = str(uuid.uuid4()) - - key = "pylmm:input:" + temp_uuid - logger.debug("key is:", pf(key)) - #with Bench("Loading cache"): - # result = Redis.get(key) - - if self.dataset.group.species == "human": - p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid) - - else: - logger.debug("NOW CWD IS:", os.getcwd()) - genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] - - no_val_samples = self.identify_empty_samples() - trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) - - genotype_matrix = np.array(genotype_data).T - - #logger.debug("pheno_vector: ", pf(pheno_vector)) - #logger.debug("genotype_matrix: ", pf(genotype_matrix)) - #logger.debug("genotype_matrix.shape: ", pf(genotype_matrix.shape)) - - #params = {"pheno_vector": pheno_vector, - # "genotype_matrix": genotype_matrix, - # "restricted_max_likelihood": True, - # "refit": False, - # "temp_data": tempdata} - - # logger.debug("genotype_matrix:", str(genotype_matrix.tolist())) - # logger.debug("pheno_vector:", str(pheno_vector.tolist())) - - params = dict(pheno_vector = pheno_vector.tolist(), - genotype_matrix = genotype_matrix.tolist(), - restricted_max_likelihood = True, - refit = False, - temp_uuid = temp_uuid, - - # meta data - timestamp = datetime.datetime.now().isoformat(), - ) - - json_params = json.dumps(params) - #logger.debug("json_params:", json_params) - Redis.set(key, json_params) - Redis.expire(key, 60*60) - logger.debug("before printing command") - - command = PYLMM_COMMAND + ' --key {} --species {}'.format(key, "other") - logger.debug("command is:", command) - logger.debug("after printing command") - - shell(command) - - #t_stats, p_values = lmm.run(key) - #lmm.run(key) - - json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60) - results = json.loads(json_results[1]) - p_values = [float(result) for result in results['p_values']] - t_stats = results['t_stats'] - - #t_stats, p_values = lmm.run( - # pheno_vector, - # genotype_matrix, - # restricted_max_likelihood=True, - # refit=False, - # temp_data=tempdata - #) - #logger.debug("p_values:", p_values) - - self.dataset.group.markers.add_pvalues(p_values) - - return self.dataset.group.markers.markers - - def gen_human_results(self, pheno_vector, key, temp_uuid): - file_base = locate(self.dataset.group.name,"mapping") - - plink_input = input.plink(file_base, type='b') - input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz") - - pheno_vector = pheno_vector.reshape((len(pheno_vector), 1)) - covariate_matrix = np.ones((pheno_vector.shape[0],1)) - kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ") - kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs))) - - logger.debug("Before creating params") - - params = dict(pheno_vector = pheno_vector.tolist(), - covariate_matrix = covariate_matrix.tolist(), - input_file_name = input_file_name, - kinship_matrix = kinship_matrix.tolist(), - refit = False, - temp_uuid = temp_uuid, - - # meta data - timestamp = datetime.datetime.now().isoformat(), - ) - - logger.debug("After creating params") - - json_params = json.dumps(params) - Redis.set(key, json_params) - Redis.expire(key, 60*60) - - logger.debug("Before creating the command") - - command = PYLMM_COMMAND+' --key {} --species {}'.format(key, "human") - - logger.debug("command is:", command) - - os.system(command) - - json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60) - results = json.loads(json_results[1]) - t_stats = results['t_stats'] - p_values = results['p_values'] - - - #p_values, t_stats = lmm.run_human(key) - - #p_values, t_stats = lmm.run_human( - # pheno_vector, - # covariate_matrix, - # input_file_name, - # kinship_matrix, - # loading_progress=tempdata - # ) - - return p_values, t_stats - def identify_empty_samples(self): no_val_samples = [] for sample_count, val in enumerate(self.vals): @@ -657,4 +457,4 @@ def trim_markers_for_table(markers): trimmed_sorted_markers = sorted_markers[:2000] return trimmed_sorted_markers else: - return sorted_markers \ No newline at end of file + return sorted_markers -- cgit v1.2.3