From 9e892146abeeeff5ef4d08f6773a2272fe752e0c Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 6 Jun 2019 15:34:07 -0500 Subject: Made some changes to REST API, including adding limit_to option to mapping --- wqflask/wqflask/api/mapping.py | 263 ++++++++++++++++++++++------------------- wqflask/wqflask/api/router.py | 63 +++++++--- 2 files changed, 188 insertions(+), 138 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/api/mapping.py b/wqflask/wqflask/api/mapping.py index 83c61796..d830cefc 100644 --- a/wqflask/wqflask/api/mapping.py +++ b/wqflask/wqflask/api/mapping.py @@ -1,122 +1,141 @@ -from __future__ import absolute_import, division, print_function - -import string - -from base import data_set -from base import webqtlConfig -from base.trait import GeneralTrait, retrieve_sample_data - -from utility import helper_functions -from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping - -import utility.logger -logger = utility.logger.getLogger(__name__ ) - -def do_mapping_for_api(start_vars): - assert('db' in start_vars) - assert('trait_id' in start_vars) - - dataset = data_set.create_dataset(dataset_name = start_vars['db']) - dataset.group.get_markers() - this_trait = GeneralTrait(dataset = dataset, name = start_vars['trait_id']) - this_trait = retrieve_sample_data(this_trait, dataset) - - samples = [] - vals = [] - - for sample in dataset.group.samplelist: - in_trait_data = False - for item in this_trait.data: - if this_trait.data[item].name == sample: - value = str(this_trait.data[item].value) - samples.append(item) - vals.append(value) - in_trait_data = True - break - if not in_trait_data: - vals.append("x") - - mapping_params = initialize_parameters(start_vars, dataset, this_trait) - - covariates = "" #ZS: It seems to take an empty string as default. This should probably be changed. - - if mapping_params['mapping_method'] == "gemma": - header_row = ["name", "chr", "Mb", "lod_score", "p_value"] - if mapping_params['use_loco'] == "True": #ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api - result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0] - else: - result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf']) - elif mapping_params['mapping_method'] == "rqtl": - header_row = ["name", "chr", "Mb", "lod_score"] - if mapping_params['num_perm'] > 0: - _sperm_output, _suggestive, _significant, result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], - mapping_params['perm_check'], mapping_params['num_perm'], - mapping_params['do_control'], mapping_params['control_marker'], - mapping_params['manhattan_plot'], mapping_params['pair_scan']) - else: - result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], - mapping_params['perm_check'], mapping_params['num_perm'], - mapping_params['do_control'], mapping_params['control_marker'], - mapping_params['manhattan_plot'], mapping_params['pair_scan']) - - output_rows = [] - output_rows.append(header_row) - for marker in result_markers: - this_row = [marker[header] for header in header_row] - output_rows.append(this_row) - - return output_rows - - -def initialize_parameters(start_vars, dataset, this_trait): - mapping_params = {} - mapping_params['mapping_method'] = "gemma" - if 'method' in start_vars: - mapping_params['mapping_method'] = start_vars['method'] - - if mapping_params['mapping_method'] == "rqtl": - mapping_params['rqtl_method'] = "hk" - mapping_params['rqtl_model'] = "normal" - mapping_params['do_control'] = False - mapping_params['control_marker'] = "" - mapping_params['manhattan_plot'] = True - mapping_params['pair_scan'] = False - if 'rqtl_method' in start_vars: - mapping_params['rqtl_method'] = start_vars['rqtl_method'] - if 'rqtl_model' in start_vars: - mapping_params['rqtl_model'] = start_vars['rqtl_model'] - if 'control_marker' in start_vars: - mapping_params['control_marker'] = start_vars['control_marker'] - mapping_params['do_control'] = True - if 'pair_scan' in start_vars: - if start_vars['pair_scan'].lower() == "true": - mapping_params['pair_scan'] = True - - if 'interval_mapping' in start_vars: - if start_vars['interval_mapping'].lower() == "true": - mapping_params['manhattan_plot'] = False - elif 'manhattan_plot' in start_vars: - if start_vars['manhattan_plot'].lower() != "true": - mapping_params['manhattan_plot'] = False - - mapping_params['maf'] = 0.01 - if 'maf' in start_vars: - mapping_params['maf'] = start_vars['maf'] # Minor allele frequency - - mapping_params['use_loco'] = False - if 'use_loco' in start_vars: - if start_vars['use_loco'].lower() != "false": - mapping_params['use_loco'] = start_vars['use_loco'] - - mapping_params['num_perm'] = 0 - mapping_params['perm_check'] = False - if 'num_perm' in start_vars: - try: - mapping_params['num_perm'] = int(start_vars['num_perm']) - mapping_params['perm_check'] = "ON" - except: - mapping_params['perm_check'] = False - - return mapping_params - - +from __future__ import absolute_import, division, print_function + +import string + +from base import data_set +from base import webqtlConfig +from base.trait import GeneralTrait, retrieve_sample_data + +from utility import helper_functions +from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping + +import utility.logger +logger = utility.logger.getLogger(__name__ ) + +def do_mapping_for_api(start_vars): + assert('db' in start_vars) + assert('trait_id' in start_vars) + + dataset = data_set.create_dataset(dataset_name = start_vars['db']) + dataset.group.get_markers() + this_trait = GeneralTrait(dataset = dataset, name = start_vars['trait_id']) + this_trait = retrieve_sample_data(this_trait, dataset) + + samples = [] + vals = [] + + for sample in dataset.group.samplelist: + in_trait_data = False + for item in this_trait.data: + if this_trait.data[item].name == sample: + value = str(this_trait.data[item].value) + samples.append(item) + vals.append(value) + in_trait_data = True + break + if not in_trait_data: + vals.append("x") + + mapping_params = initialize_parameters(start_vars, dataset, this_trait) + + covariates = "" #ZS: It seems to take an empty string as default. This should probably be changed. + + if mapping_params['mapping_method'] == "gemma": + header_row = ["name", "chr", "Mb", "lod_score", "p_value"] + if mapping_params['use_loco'] == "True": #ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api + result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0] + else: + result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf']) + elif mapping_params['mapping_method'] == "rqtl": + header_row = ["name", "chr", "cM", "lod_score"] + if mapping_params['num_perm'] > 0: + _sperm_output, _suggestive, _significant, result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], + mapping_params['perm_check'], mapping_params['num_perm'], + mapping_params['do_control'], mapping_params['control_marker'], + mapping_params['manhattan_plot'], mapping_params['pair_scan']) + else: + result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], + mapping_params['perm_check'], mapping_params['num_perm'], + mapping_params['do_control'], mapping_params['control_marker'], + mapping_params['manhattan_plot'], mapping_params['pair_scan']) + + if mapping_params['limit_to']: + result_markers = result_markers[:mapping_params['limit_to']] + + if mapping_params['format'] == "csv": + output_rows = [] + output_rows.append(header_row) + for marker in result_markers: + this_row = [marker[header] for header in header_row] + output_rows.append(this_row) + + return output_rows, mapping_params['format'] + elif mapping_params['format'] == "json": + return result_markers, mapping_params['format'] + else: + return result_markers, None + + + +def initialize_parameters(start_vars, dataset, this_trait): + mapping_params = {} + + mapping_params['format'] = "json" + if 'format' in start_vars: + mapping_params['format'] = start_vars['format'] + + mapping_params['limit_to'] = False + if 'limit_to' in start_vars: + if start_vars['limit_to'].isdigit(): + mapping_params['limit_to'] = int(start_vars['limit_to']) + + mapping_params['mapping_method'] = "gemma" + if 'method' in start_vars: + mapping_params['mapping_method'] = start_vars['method'] + + if mapping_params['mapping_method'] == "rqtl": + mapping_params['rqtl_method'] = "hk" + mapping_params['rqtl_model'] = "normal" + mapping_params['do_control'] = False + mapping_params['control_marker'] = "" + mapping_params['manhattan_plot'] = True + mapping_params['pair_scan'] = False + if 'rqtl_method' in start_vars: + mapping_params['rqtl_method'] = start_vars['rqtl_method'] + if 'rqtl_model' in start_vars: + mapping_params['rqtl_model'] = start_vars['rqtl_model'] + if 'control_marker' in start_vars: + mapping_params['control_marker'] = start_vars['control_marker'] + mapping_params['do_control'] = True + if 'pair_scan' in start_vars: + if start_vars['pair_scan'].lower() == "true": + mapping_params['pair_scan'] = True + + if 'interval_mapping' in start_vars: + if start_vars['interval_mapping'].lower() == "true": + mapping_params['manhattan_plot'] = False + elif 'manhattan_plot' in start_vars: + if start_vars['manhattan_plot'].lower() != "true": + mapping_params['manhattan_plot'] = False + + mapping_params['maf'] = 0.01 + if 'maf' in start_vars: + mapping_params['maf'] = start_vars['maf'] # Minor allele frequency + + mapping_params['use_loco'] = True + if 'use_loco' in start_vars: + if (start_vars['use_loco'].lower() == "false") or (start_vars['use_loco'].lower() == "no"): + mapping_params['use_loco'] = False + + mapping_params['num_perm'] = 0 + mapping_params['perm_check'] = False + if 'num_perm' in start_vars: + try: + mapping_params['num_perm'] = int(start_vars['num_perm']) + mapping_params['perm_check'] = "ON" + except: + mapping_params['perm_check'] = False + + return mapping_params + + diff --git a/wqflask/wqflask/api/router.py b/wqflask/wqflask/api/router.py index 8e2cbbdc..cbff6e83 100644 --- a/wqflask/wqflask/api/router.py +++ b/wqflask/wqflask/api/router.py @@ -293,7 +293,7 @@ def get_dataset_info(dataset_name, group_name = None, file_format="json"): @app.route("/api/v_{}/traits/".format(version), methods=("GET",)) @app.route("/api/v_{}/traits/.".format(version), methods=("GET",)) def fetch_traits(dataset_name, file_format = "json"): - trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name) + trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name, request.args) if ("ids_only" in request.args) and (len(trait_ids) > 0): if file_format == "json": filename = dataset_name + "_trait_ids.json" @@ -367,6 +367,10 @@ def fetch_traits(dataset_name, file_format = "json"): field_list = ["Id", "PhenotypeId", "PublicationId", "Locus", "LRS", "Additive", "Sequence"] + if 'limit_to' in request.args: + limit_number = request.args['limit_to'] + query += "LIMIT " + str(limit_number) + if file_format == "json": filename = dataset_name + "_traits.json" @@ -408,7 +412,7 @@ def fetch_traits(dataset_name, file_format = "json"): @app.route("/api/v_{}/sample_data/".format(version)) @app.route("/api/v_{}/sample_data/.".format(version)) def all_sample_data(dataset_name, file_format = "csv"): - trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name) + trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name, request.args) if len(trait_ids) > 0: sample_list = get_samplelist(dataset_name) @@ -496,7 +500,7 @@ def all_sample_data(dataset_name, file_format = "csv"): output.headers["Content-type"] = "text/csv" return output else: - return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + return return_error(code=415, source=request.url_rule.rule, title="Unsupported file format", details="") else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") @@ -669,25 +673,35 @@ def get_corr_results(): @app.route("/api/v_{}/mapping".format(version), methods=("GET",)) def get_mapping_results(): - results = mapping.do_mapping_for_api(request.args) + results, format = mapping.do_mapping_for_api(request.args) if len(results) > 0: - filename = "mapping_" + datetime.datetime.utcnow().strftime("%b_%d_%Y_%I:%M%p") + ".csv" + if format == "csv": + filename = "mapping_" + datetime.datetime.utcnow().strftime("%b_%d_%Y_%I:%M%p") + ".csv" - si = StringIO.StringIO() - csv_writer = csv.writer(si) - csv_writer.writerows(results) - output = make_response(si.getvalue()) - output.headers["Content-Disposition"] = "attachment; filename=" + filename - output.headers["Content-type"] = "text/csv" + si = StringIO.StringIO() + csv_writer = csv.writer(si) + csv_writer.writerows(results) + output = make_response(si.getvalue()) + output.headers["Content-Disposition"] = "attachment; filename=" + filename + output.headers["Content-type"] = "text/csv" - return output + return output + elif format == "json": + return flask.jsonify(results) + else: + return return_error(code=415, source=request.url_rule.rule, title="Unsupported Format", details="") else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") @app.route("/api/v_{}/genotypes/".format(version)) @app.route("/api/v_{}/genotypes/.".format(version)) def get_genotypes(group_name, file_format="csv"): + limit_num = None + if 'limit_to' in request.args: + if request.args['limit_to'].isdigit(): + limit_num = int(request.args['limit_to']) + si = StringIO.StringIO() if file_format == "csv" or file_format == "geno": filename = group_name + ".geno" @@ -695,11 +709,15 @@ def get_genotypes(group_name, file_format="csv"): if os.path.isfile("{0}/{1}.geno".format(flat_files("genotype"), group_name)): output_lines = [] with open("{0}/{1}.geno".format(flat_files("genotype"), group_name)) as genofile: + i = 0 for line in genofile: if line[0] == "#" or line[0] == "@": output_lines.append([line.strip()]) else: + if i >= limit_num: + break output_lines.append(line.split()) + i += 1 csv_writer = csv.writer(si, delimiter = "\t", escapechar = "\\", quoting = csv.QUOTE_NONE) else: @@ -710,8 +728,12 @@ def get_genotypes(group_name, file_format="csv"): if os.path.isfile("{0}/{1}.geno".format(flat_files("genotype"), group_name)): output_lines = [] with open("{0}/{1}_geno.txt".format(flat_files("genotype/bimbam"), group_name)) as genofile: + i = 0 for line in genofile: + if i >= limit_num: + break output_lines.append([line.strip() for line in line.split(",")]) + i += 1 csv_writer = csv.writer(si, delimiter = ",") else: @@ -736,7 +758,13 @@ def return_error(code, source, title, details): return flask.jsonify(json_ob) -def get_dataset_trait_ids(dataset_name): +def get_dataset_trait_ids(dataset_name, start_vars): + + if 'limit_to' in start_vars: + limit_string = "LIMIT " + str(start_vars['limit_to']) + else: + limit_string = "" + if "Geno" in dataset_name: data_type = "Geno" #ZS: Need to pass back the dataset type query = """ @@ -748,7 +776,8 @@ def get_dataset_trait_ids(dataset_name): Geno.Id = GenoXRef.GenoId AND GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoFreeze.Name = "{0}" - """.format(dataset_name) + {1} + """.format(dataset_name, limit_string) results = g.db.execute(query).fetchall() @@ -769,7 +798,8 @@ def get_dataset_trait_ids(dataset_name): PublishXRef WHERE PublishXRef.InbredSetId = "{0}" - """.format(dataset_id) + {1} + """.format(dataset_id, limit_string) results = g.db.execute(query).fetchall() @@ -788,7 +818,8 @@ def get_dataset_trait_ids(dataset_name): ProbeSet.Id = ProbeSetXRef.ProbeSetId AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND ProbeSetFreeze.Name = "{0}" - """.format(dataset_name) + {1} + """.format(dataset_name, limit_string) results = g.db.execute(query).fetchall() -- cgit v1.2.3