From 1217563ede4aef48b124613ea852c4db8803e6b4 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 24 May 2019 11:36:08 -0500 Subject: Added currently api progress, which will henceforth be appropriately committed in this branch --- wqflask/wqflask/__init__.py | 1 + wqflask/wqflask/api/__init__.py | 0 wqflask/wqflask/api/correlation.py | 237 ++++++++++++ wqflask/wqflask/api/mapping.py | 122 ++++++ wqflask/wqflask/api/router.py | 747 +++++++++++++++++++++++++++++++++++++ 5 files changed, 1107 insertions(+) create mode 100644 wqflask/wqflask/api/__init__.py create mode 100644 wqflask/wqflask/api/correlation.py create mode 100644 wqflask/wqflask/api/mapping.py create mode 100644 wqflask/wqflask/api/router.py diff --git a/wqflask/wqflask/__init__.py b/wqflask/wqflask/__init__.py index bc8e9900..399e794d 100644 --- a/wqflask/wqflask/__init__.py +++ b/wqflask/wqflask/__init__.py @@ -22,3 +22,4 @@ app.jinja_env.globals.update( ) import wqflask.views +from wqflask.api import router \ No newline at end of file diff --git a/wqflask/wqflask/api/__init__.py b/wqflask/wqflask/api/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/wqflask/wqflask/api/correlation.py b/wqflask/wqflask/api/correlation.py new file mode 100644 index 00000000..66eb94ac --- /dev/null +++ b/wqflask/wqflask/api/correlation.py @@ -0,0 +1,237 @@ +from __future__ import absolute_import, division, print_function + +import collections + +import scipy + +from MySQLdb import escape_string as escape + +from flask import g + +from base import data_set +from base.trait import GeneralTrait, retrieve_sample_data + +from wqflask.correlation.show_corr_results import generate_corr_json +from wqflask.correlation import correlation_functions + +from utility import webqtlUtil, helper_functions, corr_result_helpers +from utility.benchmark import Bench + +import utility.logger +logger = utility.logger.getLogger(__name__ ) + +def do_correlation(start_vars): + assert('db' in start_vars) + assert('target_db' in start_vars) + assert('trait_id' in start_vars) + + this_dataset = data_set.create_dataset(dataset_name = start_vars['db']) + target_dataset = data_set.create_dataset(dataset_name = start_vars['target_db']) + this_trait = GeneralTrait(dataset = this_dataset, name = start_vars['trait_id']) + this_trait = retrieve_sample_data(this_trait, this_dataset) + + corr_params = init_corr_params(start_vars) + + corr_results = calculate_results(this_trait, this_dataset, target_dataset, corr_params) + #corr_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0]))) + + final_results = [] + for _trait_counter, trait in enumerate(corr_results.keys()[:corr_params['return_count']]): + if corr_params['type'] == "tissue": + [sample_r, num_overlap, sample_p, symbol] = corr_results[trait] + result_dict = { + "trait" : trait, + "sample_r" : sample_r, + "#_strains" : num_overlap, + "p_value" : sample_p, + "symbol" : symbol + } + elif corr_params['type'] == "literature" or corr_params['type'] == "lit": + [gene_id, sample_r] = corr_results[trait] + result_dict = { + "trait" : trait, + "sample_r" : sample_r, + "gene_id" : gene_id + } + else: + [sample_r, sample_p, num_overlap] = corr_results[trait] + result_dict = { + "trait" : trait, + "sample_r" : sample_r, + "#_strains" : num_overlap, + "p_value" : sample_p + } + + final_results.append(result_dict) + + # json_corr_results = generate_corr_json(final_corr_results, this_trait, this_dataset, target_dataset, for_api = True) + + return final_results + +def calculate_results(this_trait, this_dataset, target_dataset, corr_params): + corr_results = {} + + target_dataset.get_trait_data() + + if corr_params['type'] == "tissue": + trait_symbol_dict = this_dataset.retrieve_genes("Symbol") + corr_results = do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params) + sorted_results = collections.OrderedDict(sorted(corr_results.items(), + key=lambda t: -abs(t[1][1]))) + elif corr_params['type'] == "literature" or corr_params['type'] == "lit": #ZS: Just so a user can use either "lit" or "literature" + trait_geneid_dict = this_dataset.retrieve_genes("GeneId") + corr_results = do_literature_correlation_for_all_traits(this_trait, this_dataset, trait_geneid_dict, corr_params) + sorted_results = collections.OrderedDict(sorted(corr_results.items(), + key=lambda t: -abs(t[1][1]))) + else: + for target_trait, target_vals in target_dataset.trait_data.iteritems(): + result = get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, corr_params['type']) + if result is not None: + corr_results[target_trait] = result + + sorted_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0]))) + + return sorted_results + +def do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params, tissue_dataset_id=1): + #Gets tissue expression values for the primary trait + primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list = [this_trait.symbol]) + + if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: + primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower()] + + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list=trait_symbol_dict.values()) + + tissue_corr_data = {} + for trait, symbol in trait_symbol_dict.iteritems(): + if symbol and symbol.lower() in corr_result_tissue_vals_dict: + this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] + + result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, + this_trait_tissue_values, + corr_params['method']) + + tissue_corr_data[trait] = [result[0], result[1], result[2], symbol] + + return tissue_corr_data + +def do_literature_correlation_for_all_traits(this_trait, target_dataset, trait_geneid_dict, corr_params): + input_trait_mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), this_trait.geneid) + + lit_corr_data = {} + for trait, gene_id in trait_geneid_dict.iteritems(): + mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), gene_id) + + if mouse_gene_id and str(mouse_gene_id).find(";") == -1: + result = g.db.execute( + """SELECT value + FROM LCorrRamin3 + WHERE GeneId1='%s' and + GeneId2='%s' + """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) + ).fetchone() + if not result: + result = g.db.execute("""SELECT value + FROM LCorrRamin3 + WHERE GeneId2='%s' and + GeneId1='%s' + """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) + ).fetchone() + if result: + lit_corr = result.value + lit_corr_data[trait] = [gene_id, lit_corr] + else: + lit_corr_data[trait] = [gene_id, 0] + else: + lit_corr_data[trait] = [gene_id, 0] + + return lit_corr_data + +def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, type): + """ + Calculates the sample r (or rho) and p-value + + Given a primary trait and a target trait's sample values, + calculates either the pearson r or spearman rho and the p-value + using the corresponding scipy functions. + """ + + this_trait_vals = [] + shared_target_vals = [] + for i, sample in enumerate(target_dataset.group.samplelist): + if sample in this_trait.data: + this_sample_value = this_trait.data[sample].value + target_sample_value = target_vals[i] + this_trait_vals.append(this_sample_value) + shared_target_vals.append(target_sample_value) + + this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, shared_target_vals) + + if type == 'pearson': + sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, shared_target_vals) + else: + sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, shared_target_vals) + + if num_overlap > 5: + if scipy.isnan(sample_r): + return None + else: + return [sample_r, sample_p, num_overlap] + +def convert_to_mouse_gene_id(species=None, gene_id=None): + """If the species is rat or human, translate the gene_id to the mouse geneid + + If there is no input gene_id or there's no corresponding mouse gene_id, return None + + """ + if not gene_id: + return None + + mouse_gene_id = None + + if species == 'mouse': + mouse_gene_id = gene_id + + elif species == 'rat': + + query = """SELECT mouse + FROM GeneIDXRef + WHERE rat='%s'""" % escape(gene_id) + + result = g.db.execute(query).fetchone() + if result != None: + mouse_gene_id = result.mouse + + elif species == 'human': + + query = """SELECT mouse + FROM GeneIDXRef + WHERE human='%s'""" % escape(gene_id) + + result = g.db.execute(query).fetchone() + if result != None: + mouse_gene_id = result.mouse + + return mouse_gene_id + +def init_corr_params(start_vars): + method = "pearson" + if 'method' in start_vars: + method = start_vars['method'] + + type = "sample" + if 'type' in start_vars: + type = start_vars['type'] + + return_count = 500 + if 'return_count' in start_vars: + assert(start_vars['return_count'].isdigit()) + return_count = int(start_vars['return_count']) + + corr_params = { + 'method' : method, + 'type' : type, + 'return_count' : return_count + } + + return corr_params \ No newline at end of file diff --git a/wqflask/wqflask/api/mapping.py b/wqflask/wqflask/api/mapping.py new file mode 100644 index 00000000..83c61796 --- /dev/null +++ b/wqflask/wqflask/api/mapping.py @@ -0,0 +1,122 @@ +from __future__ import absolute_import, division, print_function + +import string + +from base import data_set +from base import webqtlConfig +from base.trait import GeneralTrait, retrieve_sample_data + +from utility import helper_functions +from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping + +import utility.logger +logger = utility.logger.getLogger(__name__ ) + +def do_mapping_for_api(start_vars): + assert('db' in start_vars) + assert('trait_id' in start_vars) + + dataset = data_set.create_dataset(dataset_name = start_vars['db']) + dataset.group.get_markers() + this_trait = GeneralTrait(dataset = dataset, name = start_vars['trait_id']) + this_trait = retrieve_sample_data(this_trait, dataset) + + samples = [] + vals = [] + + for sample in dataset.group.samplelist: + in_trait_data = False + for item in this_trait.data: + if this_trait.data[item].name == sample: + value = str(this_trait.data[item].value) + samples.append(item) + vals.append(value) + in_trait_data = True + break + if not in_trait_data: + vals.append("x") + + mapping_params = initialize_parameters(start_vars, dataset, this_trait) + + covariates = "" #ZS: It seems to take an empty string as default. This should probably be changed. + + if mapping_params['mapping_method'] == "gemma": + header_row = ["name", "chr", "Mb", "lod_score", "p_value"] + if mapping_params['use_loco'] == "True": #ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api + result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0] + else: + result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf']) + elif mapping_params['mapping_method'] == "rqtl": + header_row = ["name", "chr", "Mb", "lod_score"] + if mapping_params['num_perm'] > 0: + _sperm_output, _suggestive, _significant, result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], + mapping_params['perm_check'], mapping_params['num_perm'], + mapping_params['do_control'], mapping_params['control_marker'], + mapping_params['manhattan_plot'], mapping_params['pair_scan']) + else: + result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], + mapping_params['perm_check'], mapping_params['num_perm'], + mapping_params['do_control'], mapping_params['control_marker'], + mapping_params['manhattan_plot'], mapping_params['pair_scan']) + + output_rows = [] + output_rows.append(header_row) + for marker in result_markers: + this_row = [marker[header] for header in header_row] + output_rows.append(this_row) + + return output_rows + + +def initialize_parameters(start_vars, dataset, this_trait): + mapping_params = {} + mapping_params['mapping_method'] = "gemma" + if 'method' in start_vars: + mapping_params['mapping_method'] = start_vars['method'] + + if mapping_params['mapping_method'] == "rqtl": + mapping_params['rqtl_method'] = "hk" + mapping_params['rqtl_model'] = "normal" + mapping_params['do_control'] = False + mapping_params['control_marker'] = "" + mapping_params['manhattan_plot'] = True + mapping_params['pair_scan'] = False + if 'rqtl_method' in start_vars: + mapping_params['rqtl_method'] = start_vars['rqtl_method'] + if 'rqtl_model' in start_vars: + mapping_params['rqtl_model'] = start_vars['rqtl_model'] + if 'control_marker' in start_vars: + mapping_params['control_marker'] = start_vars['control_marker'] + mapping_params['do_control'] = True + if 'pair_scan' in start_vars: + if start_vars['pair_scan'].lower() == "true": + mapping_params['pair_scan'] = True + + if 'interval_mapping' in start_vars: + if start_vars['interval_mapping'].lower() == "true": + mapping_params['manhattan_plot'] = False + elif 'manhattan_plot' in start_vars: + if start_vars['manhattan_plot'].lower() != "true": + mapping_params['manhattan_plot'] = False + + mapping_params['maf'] = 0.01 + if 'maf' in start_vars: + mapping_params['maf'] = start_vars['maf'] # Minor allele frequency + + mapping_params['use_loco'] = False + if 'use_loco' in start_vars: + if start_vars['use_loco'].lower() != "false": + mapping_params['use_loco'] = start_vars['use_loco'] + + mapping_params['num_perm'] = 0 + mapping_params['perm_check'] = False + if 'num_perm' in start_vars: + try: + mapping_params['num_perm'] = int(start_vars['num_perm']) + mapping_params['perm_check'] = "ON" + except: + mapping_params['perm_check'] = False + + return mapping_params + + diff --git a/wqflask/wqflask/api/router.py b/wqflask/wqflask/api/router.py new file mode 100644 index 00000000..efc817ea --- /dev/null +++ b/wqflask/wqflask/api/router.py @@ -0,0 +1,747 @@ +# GN2 API + +from __future__ import absolute_import, division, print_function + +import os, io, csv, json, datetime + +import StringIO + +import flask +from flask import g, Response, request, make_response, render_template, send_from_directory, jsonify, redirect +import sqlalchemy +from wqflask import app + +from wqflask.api import correlation, mapping + +from utility.tools import flat_files + +import utility.logger +logger = utility.logger.getLogger(__name__ ) + +version = "pre1" + +@app.route("/api/v_{}/".format(version)) +def hello_world(): + return flask.jsonify({'hello':'world'}) + +@app.route("/api/v_{}/species".format(version)) +def get_species_list(): + results = g.db.execute("SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species;") + the_species = results.fetchall() + species_list = [] + for species in the_species: + species_dict = { + 'Id' : species[0], + 'Name' : species[1], + 'FullName' : species[2], + 'TaxonomyId' : species[3] + } + species_list.append(species_dict) + + return flask.jsonify(species_list) + +@app.route("/api/v_{}/species/".format(version)) +@app.route("/api/v_{}/species/.".format(version)) +def get_species_info(species_name, file_format = "json"): + results = g.db.execute("""SELECT SpeciesId, Name, FullName, TaxonomyId + FROM Species + WHERE (Name='{0}' OR FullName='{0}' OR SpeciesName='{0}');""".format(species_name)) + + the_species = results.fetchone() + species_dict = { + 'Id' : the_species[0], + 'Name' : the_species[1], + 'FullName' : the_species[2], + 'TaxonomyId' : the_species[3] + } + + return flask.jsonify(species_dict) + +@app.route("/api/v_{}/groups".format(version)) +@app.route("/api/v_{}//groups".format(version)) +def get_groups_list(species_name=None): + if species_name: + results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName, + InbredSet.Name, InbredSet.FullName, InbredSet.public, + InbredSet.MappingMethodId, InbredSet.GeneticType + FROM InbredSet, Species + WHERE InbredSet.SpeciesId = Species.Id AND + (Species.Name = '{0}' OR + Species.FullName='{0}' OR + Species.SpeciesName='{0}');""".format(species_name)) + else: + results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName, + InbredSet.Name, InbredSet.FullName, InbredSet.public, + InbredSet.MappingMethodId, InbredSet.GeneticType + FROM InbredSet;""") + + the_groups = results.fetchall() + if the_groups: + groups_list = [] + for group in the_groups: + group_dict = { + 'Id' : group[0], + 'SpeciesId' : group[1], + 'DisplayName' : group[2], + 'Name' : group[3], + 'FullName' : group[4], + 'public' : group[5], + 'MappingMethodId' : group[6], + 'GeneticType' : group[7] + } + groups_list.append(group_dict) + + return flask.jsonify(groups_list) + else: + return render_template("/api/no_results.html") + +@app.route("/api/v_{}/group/".format(version)) +@app.route("/api/v_{}/group/.".format(version)) +@app.route("/api/v_{}/group//".format(version)) +@app.route("/api/v_{}/group//.".format(version)) +def get_group_info(group_name, species_name = None, file_format = "json"): + if species_name: + results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName, + InbredSet.Name, InbredSet.FullName, InbredSet.public, + InbredSet.MappingMethodId, InbredSet.GeneticType + FROM InbredSet, Species + WHERE InbredSet.SpeciesId = Species.Id AND + (InbredSet.InbredSetName = '{0}' OR + InbredSet.Name = '{0}' OR + InbredSet.FullName = '{0}') AND + (Species.Name = '{1}' OR + Species.FullName='{1}' OR + Species.SpeciesName='{1}');""".format(group_name, species_name)) + else: + results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName, + InbredSet.Name, InbredSet.FullName, InbredSet.public, + InbredSet.MappingMethodId, InbredSet.GeneticType + FROM InbredSet + WHERE (InbredSet.InbredSetName = '{0}' OR + InbredSet.Name = '{0}' OR + InbredSet.FullName = '{0}');""".format(group_name)) + + group = results.fetchone() + if group: + group_dict = { + 'Id' : group[0], + 'SpeciesId' : group[1], + 'DisplayName' : group[2], + 'Name' : group[3], + 'FullName' : group[4], + 'public' : group[5], + 'MappingMethodId' : group[6], + 'GeneticType' : group[7] + } + + return flask.jsonify(group_dict) + else: + return render_template("/api/no_results.html") + +@app.route("/api/v_{}/datasets/".format(version)) +@app.route("/api/v_{}/datasets//".format(version)) +def get_datasets_for_group(group_name, species_name=None): + if species_name: + results = g.db.execute(""" + SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, ProbeSetFreeze.AvgID, + ProbeSetFreeze.Name, ProbeSetFreeze.Name2, ProbeSetFreeze.FullName, + ProbeSetFreeze.ShortName, ProbeSetFreeze.CreateTime, ProbeSetFreeze.public, + ProbeSetFreeze.confidentiality, ProbeSetFreeze.DataScale + FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Species + WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND + ProbeFreeze.InbredSetId = InbredSet.Id AND + (InbredSet.Name = '{0}' OR InbredSet.InbredSetName = '{0}' OR InbredSet.FullName = '{0}') AND + InbredSet.SpeciesId = Species.Id AND + (Species.SpeciesName = '{1}' OR Species.MenuName = '{1}' OR Species.FullName = '{1}'); + """.format(group_name, species_name)) + else: + results = g.db.execute(""" + SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, ProbeSetFreeze.AvgID, + ProbeSetFreeze.Name, ProbeSetFreeze.Name2, ProbeSetFreeze.FullName, + ProbeSetFreeze.ShortName, ProbeSetFreeze.CreateTime, ProbeSetFreeze.public, + ProbeSetFreeze.confidentiality, ProbeSetFreeze.DataScale + FROM ProbeSetFreeze, ProbeFreeze, InbredSet + WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND + ProbeFreeze.InbredSetId = InbredSet.Id AND + (InbredSet.Name = '{0}' OR InbredSet.InbredSetName = '{0}' OR InbredSet.FullName = '{0}'); + """.format(group_name)) + + the_datasets = results.fetchall() + + if the_datasets: + datasets_list = [] + for dataset in the_datasets: + dataset_dict = { + 'Id' : dataset[0], + 'ProbeFreezeId' : dataset[1], + 'AvgID' : dataset[2], + 'Short_Abbreviation' : dataset[3], + 'Long_Abbreviation' : dataset[4], + 'FullName' : dataset[5], + 'ShortName' : dataset[6], + 'CreateTime' : dataset[7], + 'public' : dataset[8], + 'confidentiality' : dataset[9], + 'DataScale' : dataset[10] + } + datasets_list.append(dataset_dict) + + return flask.jsonify(datasets_list) + else: + return render_template("/api/no_results.html") + +@app.route("/api/v_{}/dataset/".format(version)) +@app.route("/api/v_{}/dataset/.".format(version)) +@app.route("/api/v_{}/dataset//".format(version)) +@app.route("/api/v_{}/dataset//.".format(version)) +def get_dataset_info(dataset_name, group_name = None, file_format="json"): + #ZS: First get ProbeSet (mRNA expression) datasets and then get Phenotype datasets + + datasets_list = [] #ZS: I figure I might as well return a list if there are multiple matches, though I don't know if this will actually happen in practice + + probeset_query = """ + SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName, + ProbeSetFreeze.ShortName, ProbeSetFreeze.DataScale, ProbeFreeze.TissueId, + Tissue.Name, ProbeSetFreeze.public, ProbeSetFreeze.confidentiality + FROM ProbeSetFreeze, ProbeFreeze, Tissue + """ + + where_statement = """ + WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND + ProbeFreeze.TissueId = Tissue.Id AND + """ + if dataset_name.isdigit(): + where_statement += """ + ProbeSetFreeze.Id = '{}' + """.format(dataset_name) + else: + where_statement += """ + (ProbeSetFreeze.Name = '{0}' OR ProbeSetFreeze.Name2 = '{0}' OR + ProbeSetFreeze.FullName = '{0}' OR ProbeSetFreeze.ShortName = '{0}') + """.format(dataset_name) + + probeset_query += where_statement + probeset_results = g.db.execute(probeset_query) + dataset = probeset_results.fetchone() + + if dataset: + dataset_dict = { + 'dataset_type' : "mRNA expression", + 'id' : dataset[0], + 'name' : dataset[1], + 'full_name' : dataset[2], + 'short_name' : dataset[3], + 'data_scale' : dataset[4], + 'tissue_id' : dataset[5], + 'tissue' : dataset[6], + 'public' : dataset[7], + 'confidential' : dataset[8] + } + + datasets_list.append(dataset_dict) + + if group_name: + pheno_query = """ + SELECT PublishXRef.Id, Phenotype.Post_publication_abbreviation, Phenotype.Post_publication_description, + Phenotype.Pre_publication_abbreviation, Phenotype.Pre_publication_description, + Publication.PubMed_ID, Publication.Title, Publication.Year + FROM PublishXRef, Phenotype, Publication, InbredSet + WHERE PublishXRef.InbredSetId = InbredSet.Id AND + PublishXRef.PhenotypeId = Phenotype.Id AND + PublishXRef.PublicationId = Publication.Id AND + InbredSet.Name = '{0}' AND PublishXRef.Id = '{1}' + """.format(group_name, dataset_name) + + logger.debug("QUERY:", pheno_query) + + pheno_results = g.db.execute(pheno_query) + dataset = pheno_results.fetchone() + + if dataset: + if dataset[5]: + dataset_dict = { + 'dataset_type' : "phenotype", + 'id' : dataset[0], + 'name' : dataset[1], + 'description' : dataset[2], + 'pubmed_id' : dataset[5], + 'title' : dataset[6], + 'year' : dataset[7] + } + elif dataset[4]: + dataset_dict = { + 'dataset_type' : "phenotype", + 'id' : dataset[0], + 'name' : dataset[3], + 'description' : dataset[4] + } + else: + dataset_dict = { + 'dataset_type' : "phenotype", + 'id' : dataset[0] + } + + datasets_list.append(dataset_dict) + + if len(datasets_list) > 1: + return flask.jsonify(datasets_list) + elif len(datasets_list) == 1: + return flask.jsonify(dataset_dict) + else: + return render_template("/api/no_results.html") + + +@app.route("/api/v_{}/sample_data/".format(version)) +@app.route("/api/v_{}/sample_data/.".format(version)) +def all_sample_data(dataset_name, file_format = "csv"): + trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name) + + if len(trait_ids) > 0: + sample_list = get_samplelist(dataset_name) + + if data_type == "ProbeSet": + query = """ + SELECT + Strain.Name, Strain.Name2, ProbeSetData.value, ProbeSetData.Id, ProbeSetSE.error + FROM + (ProbeSetData, Strain, ProbeSetXRef) + LEFT JOIN ProbeSetSE ON + (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) + WHERE + ProbeSetXRef.ProbeSetFreezeId = '{0}' AND + ProbeSetXRef.ProbeSetId = '{1}' AND + ProbeSetXRef.DataId = ProbeSetData.Id AND + ProbeSetData.StrainId = Strain.Id + ORDER BY + Strain.Name + """ + elif data_type == "Geno": + query = """ + SELECT + Strain.Name, Strain.Name2, GenoData.value, GenoData.Id, GenoSE.error + FROM + (GenoData, Strain, GenoXRef) + LEFT JOIN GenoSE ON + (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) + WHERE + GenoXRef.GenoFreezeId = '{0}' AND + GenoXRef.GenoId = '{1}' AND + GenoXRef.DataId = GenoData.Id AND + GenoData.StrainId = Strain.Id + ORDER BY + Strain.Name + """ + else: + query = """ + SELECT + Strain.Name, Strain.Name2, PublishData.value, PublishData.Id, PublishSE.error, NStrain.count + FROM + (PublishData, Strain, PublishXRef) + LEFT JOIN PublishSE ON + (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId) + LEFT JOIN NStrain ON + (NStrain.DataId = PublishData.Id AND + NStrain.StrainId = PublishData.StrainId) + WHERE + PublishXRef.InbredSetId = '{0}' AND + PublishXRef.PhenotypeId = '{1}' AND + PublishData.Id = PublishXRef.DataId AND + PublishData.StrainId = Strain.Id + ORDER BY + Strain.Name + """ + + if file_format == "csv": + filename = dataset_name + "_sample_data.csv" + + results_list = [] + header_list = [] + header_list.append("Trait ID") + header_list += sample_list + results_list.append(header_list) + for i, trait_id in enumerate(trait_ids): + line_list = [] + line_list.append(str(trait_names[i])) + final_query = query.format(dataset_id, trait_id) + results = g.db.execute(final_query).fetchall() + results_dict = {} + for item in results: + results_dict[item[0]] = item[2] + for sample in sample_list: + if sample in results_dict: + line_list.append(results_dict[sample]) + else: + line_list.append("x") + results_list.append(line_list) + + si = StringIO.StringIO() + csv_writer = csv.writer(si) + csv_writer.writerows(results_list) + output = make_response(si.getvalue()) + output.headers["Content-Disposition"] = "attachment; filename=" + filename + output.headers["Content-type"] = "text/csv" + return output + else: + return render_template("/api/no_results.html") + else: + return render_template("/api/no_results.html") + +@app.route("/api/v_{}/sample_data//".format(version)) +@app.route("/api/v_{}/sample_data//.".format(version)) +def trait_sample_data(dataset_name, trait_name, file_format = "json"): + probeset_query = """ + SELECT + Strain.Name, Strain.Name2, ProbeSetData.value, ProbeSetData.Id, ProbeSetSE.error + FROM + (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef) + LEFT JOIN ProbeSetSE ON + (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) + WHERE + ProbeSet.Name = '{0}' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND + ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND + ProbeSetFreeze.Name = '{1}' AND + ProbeSetXRef.DataId = ProbeSetData.Id AND + ProbeSetData.StrainId = Strain.Id + ORDER BY + Strain.Name + """.format(trait_name, dataset_name) + + probeset_results = g.db.execute(probeset_query) + + sample_data = probeset_results.fetchall() + if len(sample_data) > 0: + sample_list = [] + for sample in sample_data: + sample_dict = { + 'sample_name' : sample[0], + 'sample_name_2' : sample[1], + 'value' : sample[2], + 'data_id' : sample[3], + } + if sample[4]: + sample_dict['se'] = sample[4] + sample_list.append(sample_dict) + + return flask.jsonify(sample_list) + else: + if not dataset_name.isdigit(): + group_id = get_group_id(dataset_name) + if group_id: + dataset_or_group = group_id + else: + dataset_or_group = dataset_name + else: + dataset_or_group = dataset_name + + pheno_query = """ + SELECT + Strain.Name, Strain.Name2, PublishData.value, PublishData.Id, PublishSE.error, NStrain.count + FROM + (PublishData, Strain, PublishXRef, PublishFreeze) + LEFT JOIN PublishSE ON + (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId) + LEFT JOIN NStrain ON + (NStrain.DataId = PublishData.Id AND + NStrain.StrainId = PublishData.StrainId) + WHERE + PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND + PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = '{1}' AND + (PublishFreeze.Id = '{0}' OR PublishFreeze.Name = '{0}' OR + PublishFreeze.ShortName = '{0}' OR PublishXRef.InbredSetId = '{0}') AND + PublishData.StrainId = Strain.Id + ORDER BY + Strain.Name + """.format(dataset_or_group, trait_name) + + pheno_results = g.db.execute(pheno_query) + + sample_data = pheno_results.fetchall() + if len(sample_data) > 0: + sample_list = [] + for sample in sample_data: + sample_dict = { + 'sample_name' : sample[0], + 'sample_name_2' : sample[1], + 'value' : sample[2], + 'data_id' : sample[3] + } + if sample[4]: + sample_dict['se'] = sample[4] + if sample[5]: + sample_dict['n_cases'] = sample[5] + sample_list.append(sample_dict) + + return flask.jsonify(sample_list) + else: + return render_template("/api/no_results.html") + +@app.route("/api/v_{}/trait//".format(version)) +@app.route("/api/v_{}/trait//.".format(version)) +@app.route("/api/v_{}/trait_info//".format(version)) +@app.route("/api/v_{}/trait_info//.".format(version)) +def get_trait_info(dataset_name, trait_name, file_format = "json"): + probeset_query = """ + SELECT + ProbeSet.Id, ProbeSet.Name, ProbeSet.Symbol, ProbeSet.description, ProbeSet.Chr, ProbeSet.Mb, ProbeSet.alias, + ProbeSetXRef.mean, ProbeSetXRef.se, ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.additive + FROM + ProbeSet, ProbeSetXRef, ProbeSetFreeze + WHERE + ProbeSet.Name = '{0}' AND + ProbeSetXRef.ProbeSetId = ProbeSet.Id AND + ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND + ProbeSetFreeze.Name = '{1}' + """.format(trait_name, dataset_name) + + probeset_results = g.db.execute(probeset_query) + + trait_info = probeset_results.fetchone() + if trait_info: + trait_dict = { + 'id' : trait_info[0], + 'name' : trait_info[1], + 'symbol' : trait_info[2], + 'description' : trait_info[3], + 'chr' : trait_info[4], + 'mb' : trait_info[5], + 'alias' :trait_info[6], + 'mean' : trait_info[7], + 'se' : trait_info[8], + 'locus' : trait_info[9], + 'lrs' : trait_info[10], + 'p_value' : trait_info[11], + 'additive' : trait_info[12] + } + + return flask.jsonify(trait_dict) + else: + if "Publish" in dataset_name: #ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish" + dataset_name = dataset_name.replace("Publish", "") + + group_id = get_group_id(dataset_name) + pheno_query = """ + SELECT + PublishXRef.PhenotypeId, PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive + FROM + PublishXRef + WHERE + PublishXRef.Id = '{0}' AND + PublishXRef.InbredSetId = '{1}' + """.format(trait_name, group_id) + + logger.debug("QUERY:", pheno_query) + + pheno_results = g.db.execute(pheno_query) + + trait_info = pheno_results.fetchone() + if trait_info: + trait_dict = { + 'id' : trait_info[0], + 'locus' : trait_info[1], + 'lrs' : trait_info[2], + 'additive' : trait_info[3] + } + + return flask.jsonify(trait_dict) + else: + return render_template("/api/no_results.html") + +@app.route("/api/v_{}/correlation".format(version), methods=('GET',)) +def get_corr_results(): + results = correlation.do_correlation(request.args) + + if len(results) > 0: + return flask.jsonify(results) #ZS: I think flask.jsonify expects a dict/list instead of JSON + else: + return render_template("/api/no_results.html") + +@app.route("/api/v_{}/mapping".format(version), methods=('GET',)) +def get_mapping_results(): + results = mapping.do_mapping_for_api(request.args) + + if len(results) > 0: + filename = "mapping_" + datetime.datetime.utcnow().strftime('%b_%d_%Y_%I:%M%p') + ".csv" + + si = StringIO.StringIO() + csv_writer = csv.writer(si) + csv_writer.writerows(results) + output = make_response(si.getvalue()) + output.headers["Content-Disposition"] = "attachment; filename=" + filename + output.headers["Content-type"] = "text/csv" + + return output + else: + return render_template("/api/no_results.html") + +@app.route("/api/v_{}/genotypes/".format(version)) +@app.route("/api/v_{}/genotypes/.".format(version)) +def get_genotypes(group_name, file_format="csv"): + si = StringIO.StringIO() + if file_format == "csv" or file_format == "geno": + filename = group_name + ".geno" + + if os.path.isfile('{0}/{1}.geno'.format(flat_files('genotype'), group_name)): + output_lines = [] + with open('{0}/{1}.geno'.format(flat_files('genotype'), group_name)) as genofile: + for line in genofile: + if line[0] == "#" or line[0] == "@": + output_lines.append([line.strip()]) + else: + output_lines.append(line.split()) + + csv_writer = csv.writer(si, delimiter = '\t', escapechar = "\\", quoting = csv.QUOTE_NONE) + else: + return render_template("/api/no_results.html") + else: + filename = group_name + ".bimbam" + + if os.path.isfile('{0}/{1}.geno'.format(flat_files('genotype'), group_name)): + output_lines = [] + with open('{0}/{1}_geno.txt'.format(flat_files('genotype/bimbam'), group_name)) as genofile: + for line in genofile: + output_lines.append([line.strip() for line in line.split(",")]) + + csv_writer = csv.writer(si, delimiter = ',') + else: + return render_template("/api/no_results.html") + + csv_writer.writerows(output_lines) + output = make_response(si.getvalue()) + output.headers["Content-Disposition"] = "attachment; filename=" + filename + output.headers["Content-type"] = "text/csv" + + return output + +@app.route("/api/v_{}/traits/".format(version), methods=('GET',)) +@app.route("/api/v_{}/traits/.".format(version), methods=('GET',)) +def get_traits(dataset_name, file_format = "json"): + #ZS: Need to check about the "start" and "stop" stuff since it seems to just limit the number of results to stop - start + 1 in Pjotr's elixir code + + NotImplemented + +def get_dataset_trait_ids(dataset_name): + if "Geno" in dataset_name: + data_type = "Geno" #ZS: Need to pass back the dataset type + query = """ + SELECT + GenoXRef.GenoId, Geno.Name, GenoXRef.GenoFreezeId + FROM + Geno, GenoXRef, GenoFreeze + WHERE + Geno.Id = GenoXRef.GenoId AND + GenoXRef.GenoFreezeId = GenoFreeze.Id AND + GenoFreeze.Name = '{0}' + """.format(dataset_name) + + results = g.db.execute(query).fetchall() + + trait_ids = [result[0] for result in results] + trait_names = [result[1] for result in results] + dataset_id = results[0][2] + return trait_ids, trait_names, data_type, dataset_id + + elif "Publish" in dataset_name: + data_type = "Publish" + dataset_name = dataset_name.replace("Publish", "") + dataset_id = get_group_id(dataset_name) + + query = """ + SELECT + PublishXRef.PhenotypeId + FROM + PublishXRef + WHERE + PublishXRef.InbredSetId = '{0}' + """.format(dataset_id) + + results = g.db.execute(query).fetchall() + + trait_ids = [result[0] for result in results] + trait_names = trait_ids + return trait_ids, trait_names, data_type, dataset_id + + else: + data_type = "ProbeSet" + query = """ + SELECT + ProbeSetXRef.ProbeSetId, ProbeSet.Name, ProbeSetXRef.ProbeSetFreezeId + FROM + ProbeSet, ProbeSetXRef, ProbeSetFreeze + WHERE + ProbeSet.Id = ProbeSetXRef.ProbeSetId AND + ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND + ProbeSetFreeze.Name = '{0}' + """.format(dataset_name) + + results = g.db.execute(query).fetchall() + + trait_ids = [result[0] for result in results] + trait_names = [result[1] for result in results] + dataset_id = results[0][2] + return trait_ids, trait_names, data_type, dataset_id + +def get_samplelist(dataset_name): + group_id = get_group_id_from_dataset(dataset_name) + + query = """ + SELECT Strain.Name + FROM Strain, StrainXRef + WHERE StrainXRef.StrainId = Strain.Id AND + StrainXRef.InbredSetId = {} + """.format(group_id) + + results = g.db.execute(query).fetchall() + + samplelist = [result[0] for result in results] + + return samplelist + +def get_group_id_from_dataset(dataset_name): + if "Publish" in dataset_name: + query = """ + SELECT + InbredSet.Id + FROM + InbredSet, PublishFreeze + WHERE + PublishFreeze.InbredSetId = InbredSet.Id AND + PublishFreeze.Name = "{}" + """.format(dataset_name) + elif "Geno" in dataset_name: + query = """ + SELECT + InbredSet.Id + FROM + InbredSet, GenoFreeze + WHERE + GenoFreeze.InbredSetId = InbredSet.Id AND + GenoFreeze.Name = "{}" + """.format(dataset_name) + else: + query = """ + SELECT + InbredSet.Id + FROM + InbredSet, ProbeSetFreeze, ProbeFreeze + WHERE + ProbeFreeze.InbredSetId = InbredSet.Id AND + ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND + ProbeSetFreeze.Name = "{}" + """.format(dataset_name) + + result = g.db.execute(query).fetchone() + + return result[0] + +def get_group_id(group_name): + query = """ + SELECT InbredSet.Id + FROM InbredSet + WHERE InbredSet.Name = '{}' + """.format(group_name) + + group_id = g.db.execute(query).fetchone() + if group_id: + return group_id[0] + else: + return None \ No newline at end of file -- cgit v1.2.3