aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/API_readme.md155
-rw-r--r--wqflask/base/data_set.py7
-rw-r--r--wqflask/utility/gen_geno_ob.py135
-rw-r--r--wqflask/wqflask/__init__.py1
-rw-r--r--wqflask/wqflask/api/__init__.py0
-rw-r--r--wqflask/wqflask/api/correlation.py237
-rw-r--r--wqflask/wqflask/api/mapping.py122
-rw-r--r--wqflask/wqflask/api/router.py759
8 files changed, 1414 insertions, 2 deletions
diff --git a/doc/API_readme.md b/doc/API_readme.md
new file mode 100644
index 00000000..96e8b246
--- /dev/null
+++ b/doc/API_readme.md
@@ -0,0 +1,155 @@
+# API Query Documentation #
+---
+# Fetching Dataset/Trait info/data #
+---
+## Fetch Species List ##
+
+To get a list of species with data available in GN (and their associated names and ids):
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/species
+[ { "FullName": "Mus musculus", "Id": 1, "Name": "mouse", "TaxonomyId": 10090 }, ... { "FullName": "Populus trichocarpa", "Id": 10, "Name": "poplar", "TaxonomyId": 3689 } ]
+```
+
+Or to get a single species info:
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/species/mouse
+```
+OR
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/species/mouse.json
+```
+
+*For all queries where the last field is a user-specified name/ID, there will be the option to append a file format type. Currently there is only JSON (and it will default to JSON if none is provided), but other formats will be added later*
+
+## Fetch Groups/RISets ##
+
+This query can optionally filter by species:
+
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/groups (for all species)
+```
+OR
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/mouse/groups (for just mouse groups/RISets)
+[ { "DisplayName": "BXD", "FullName": "BXD RI Family", "GeneticType": "riset", "Id": 1, "MappingMethodId": "1", "Name": "BXD", "SpeciesId": 1, "public": 2 }, ... { "DisplayName": "AIL LGSM F34 and F39-43 (GBS)", "FullName": "AIL LGSM F34 and F39-43 (GBS)", "GeneticType": "intercross", "Id": 72, "MappingMethodId": "2", "Name": "AIL-LGSM-F34-F39-43-GBS", "SpeciesId": 1, "public": 2 } ]
+```
+
+## Fetch Genotypes for Group/RISet ##
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/genotypes/BXD
+```
+Returns a CSV file with metadata in the first few rows, sample/strain names as columns, and markers as rows. Currently only works for genotypes we have stored in .geno files; I'll add the option to download BIMBAM files soon.
+
+## Fetch Datasets ##
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/datasets/bxd
+```
+OR
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/datasets/mouse/bxd
+[ { "AvgID": 1, "CreateTime": "Fri, 01 Aug 2003 00:00:00 GMT", "DataScale": "log2", "FullName": "UTHSC/ETHZ/EPFL BXD Liver Polar Metabolites Extraction A, CD Cohorts (Mar 2017) log2", "Id": 1, "Long_Abbreviation": "BXDMicroArray_ProbeSet_August03", "ProbeFreezeId": 3, "ShortName": "Brain U74Av2 08/03 MAS5", "Short_Abbreviation": "Br_U_0803_M", "confidentiality": 0, "public": 0 }, ... { "AvgID": 3, "CreateTime": "Tue, 14 Aug 2018 00:00:00 GMT", "DataScale": "log2", "FullName": "EPFL/LISP BXD CD Liver Affy Mouse Gene 1.0 ST (Aug18) RMA", "Id": 859, "Long_Abbreviation": "EPFLMouseLiverCDRMAApr18", "ProbeFreezeId": 181, "ShortName": "EPFL/LISP BXD CD Liver Affy Mouse Gene 1.0 ST (Aug18) RMA", "Short_Abbreviation": "EPFLMouseLiverCDRMA0818", "confidentiality": 0, "public": 1 } ]
+```
+(I added the option to specify species just in case we end up with the same group name across multiple species at some point, though it's currently unnecessary)
+
+## Fetch Sample Data for Dataset ##
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/sample_data/HSNIH-PalmerPublish.csv
+```
+
+Returns a CSV file with sample/strain names as the columns and trait IDs as rows
+
+## Fetch Individual Dataset Info ##
+### For mRNA Assay/"ProbeSet" ###
+
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/dataset/HC_M2_0606_P
+```
+OR
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/dataset/bxd/HC_M2_0606_P```
+{ "confidential": 0, "data_scale": "log2", "dataset_type": "mRNA expression", "full_name": "Hippocampus Consortium M430v2 (Jun06) PDNN", "id": 112, "name": "HC_M2_0606_P", "public": 2, "short_name": "Hippocampus M430v2 BXD 06/06 PDNN", "tissue": "Hippocampus mRNA", "tissue_id": 9 }
+```
+(This also has the option to specify group/riset)
+
+### For "Phenotypes" (basically non-mRNA Expression; stuff like weight, sex, etc) ###
+For these traits, the query fetches publication info and takes the group and phenotype 'ID' as input. For example:
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/dataset/bxd/10001
+{ "dataset_type": "phenotype", "description": "Central nervous system, morphology: Cerebellum weight, whole, bilateral in adults of both sexes [mg]", "id": 10001, "name": "CBLWT2", "pubmed_id": 11438585, "title": "Genetic control of the mouse cerebellum: identification of quantitative trait loci modulating size and architecture", "year": "2001" }
+```
+
+## Fetch Sample Data for Single Trait ##
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/sample_data/HC_M2_0606_P/1436869_at
+[ { "data_id": 23415463, "sample_name": "129S1/SvImJ", "sample_name_2": "129S1/SvImJ", "se": 0.123, "value": 8.201 }, { "data_id": 23415463, "sample_name": "A/J", "sample_name_2": "A/J", "se": 0.046, "value": 8.413 }, { "data_id": 23415463, "sample_name": "AKR/J", "sample_name_2": "AKR/J", "se": 0.134, "value": 8.856 }, ... ]
+```
+
+## Fetch Trait Info (Name, Description, Location, etc) ##
+### For mRNA Expression/"ProbeSet" ###
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/trait/HC_M2_0606_P/1436869_at
+{ "additive": -0.214087568058076, "alias": "HHG1; HLP3; HPE3; SMMCI; Dsh; Hhg1", "chr": "5", "description": "sonic hedgehog (hedgehog)", "id": 99602, "locus": "rs8253327", "lrs": 12.7711275309832, "mb": 28.457155, "mean": 9.27909090909091, "name": "1436869_at", "p_value": 0.306, "se": null, "symbol": "Shh" }
+```
+
+### For "Phenotypes" ###
+For phenotypes this just gets the max LRS, its location, and additive effect (as calculated by qtlreaper)
+
+Since each group/riset only has one phenotype "dataset", this query takes either the group/riset name or the group/riset name + "Publish" (for example "BXDPublish", which is the dataset name in the DB) as input
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/trait/BXD/10001
+{ "additive": 2.39444435069444, "id": 4, "locus": "rs48756159", "lrs": 13.4974911471087 }
+```
+
+---
+
+# Analyses #
+---
+## Mapping ##
+Currently two mapping tools can be used - GEMMA and R/qtl. qtlreaper will be added later with Christian Fischer's RUST implementation - https://github.com/chfi/rust-qtlreaper
+
+Each method's query takes the following parameters respectively (more will be added):
+### GEMMA ###
+* trait_id (*required*) - ID for trait being mapped
+* db (*required*) - DB name for trait above (Short_Abbreviation listed when you query for datasets)
+* use_loco - Whether to use LOCO (leave one chromosome out) method (default = false)
+* maf - minor allele frequency (default = 0.01)
+
+Example query:
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/mapping?trait_id=10015&db=BXDPublish&method=gemma&use_loco=true
+```
+
+### R/qtl ###
+(See the R/qtl guide for information on some of these options - http://www.rqtl.org/manual/qtl-manual.pdf)
+* trait_id (*required*) - ID for trait being mapped
+* db (*required*) - DB name for trait above (Short_Abbreviation listed when you query for datasets)
+* rqtl_method - hk (default) | ehk | em | imp | mr | mr-imp | mr-argmax ; Corresponds to the "method" option for the R/qtl scanone function.
+* rqtl_model - normal (default) | binary | 2-part | np ; corresponds to the "model" option for the R/qtl scanone function
+* num_perm - number of permutations; 0 by default
+* control_marker - Name of marker to use as control; this relies on the user knowing the name of the marker they want to use as a covariate
+* interval_mapping - Whether to use interval mapping; "false" by default
+* pair_scan - *NYI*
+
+Example query:
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/mapping?trait_id=1418701_at&db=HC_M2_0606_P&method=rqtl&num_perm=100
+```
+
+Some combinations of methods/models may not make sense. The R/qtl manual should be referred to for any questions on its use (specifically the scanone function in this case)
+
+## Calculate Correlation ##
+Currently only Sample and Tissue correlations are implemented
+
+This query currently takes the following parameters (though more will be added):
+* trait_id (*required*) - ID for trait used for correlation
+* db (*required*) - DB name for the trait above (this is the Short_Abbreviation listed when you query for datasets)
+* target_db (*required*) - Target DB name to be correlated against
+* type - sample (default) | tissue
+* method - pearson (default) | spearman
+* return - Number of results to return (default = 500)
+
+Example query:
+```
+curl http://gn2-zach.genenetwork.org/api/v_pre1/correlation?trait_id=1427571_at&db=HC_M2_0606_P&target_db=BXDPublish&type=sample&return_count=100
+[ { "#_strains": 6, "p_value": 0.004804664723032055, "sample_r": -0.942857142857143, "trait": 20511 }, { "#_strains": 6, "p_value": 0.004804664723032055, "sample_r": -0.942857142857143, "trait": 20724 }, { "#_strains": 12, "p_value": 1.8288943424888848e-05, "sample_r": -0.9233615170820528, "trait": 13536 }, { "#_strains": 7, "p_value": 0.006807187408935392, "sample_r": 0.8928571428571429, "trait": 10157 }, { "#_strains": 7, "p_value": 0.006807187408935392, "sample_r": -0.8928571428571429, "trait": 20392 }, ... ]
+```
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index beb2a8a2..b324ac74 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -43,6 +43,7 @@ from db import webqtlDatabaseFunction
from utility import webqtlUtil
from utility.benchmark import Bench
from utility import chunks
+from utility import gen_geno_ob
from utility.tools import locate, locate_ignore_error, flat_files
from maintenance import get_group_samplelists
@@ -388,14 +389,16 @@ class DatasetGroup(object):
#genotype_1 is Dataset Object without parents and f1
#genotype_2 is Dataset Object with parents and f1 (not for intercross)
- genotype_1 = reaper.Dataset()
+ #genotype_1 = reaper.Dataset()
# reaper barfs on unicode filenames, so here we ensure it's a string
if self.genofile:
full_filename = str(locate(self.genofile, 'genotype'))
else:
full_filename = str(locate(self.name + '.geno', 'genotype'))
- genotype_1.read(full_filename)
+ #genotype_1.read(full_filename)
+
+ genotype_1 = gen_geno_ob.genotype(full_filename)
if genotype_1.type == "group" and self.parlist:
genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1]) #, F1=_f1)
diff --git a/wqflask/utility/gen_geno_ob.py b/wqflask/utility/gen_geno_ob.py
new file mode 100644
index 00000000..5824b0b3
--- /dev/null
+++ b/wqflask/utility/gen_geno_ob.py
@@ -0,0 +1,135 @@
+from __future__ import absolute_import, division, print_function
+
+class genotype(object):
+ """
+ Replacement for reaper.Dataset so we can remove qtlreaper use while still generating mapping output figure
+ """
+
+ def __init__(self, filename):
+ self.group = None
+ self.type = "riset"
+ self.prgy = []
+ self.nprgy = 0
+ self.mat = -1
+ self.pat = 1
+ self.het = 0
+ self.unk = "U"
+ self.filler = False
+ self.mb_exists = False
+
+ #ZS: This is because I'm not sure if some files switch the column that contains Mb/cM positions; might be unnecessary
+ self.cm_column = 2
+ self.mb_column = 3
+
+ self.chromosomes = []
+
+ self.read_file(filename)
+
+ def __iter__(self):
+ return iter(self.chromosomes)
+
+ def __getitem__(self, index):
+ return self.chromosomes[index]
+
+ def __len__(self):
+ return len(self.chromosomes)
+
+ def read_file(self, filename):
+
+ with open(filename, 'r') as geno_file:
+ lines = geno_file.readlines()
+
+ this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers
+ chr_ob = None
+ for line in lines:
+ if line[0] == "#":
+ continue
+ elif line[0] == "@":
+ label = line.split(":")[0][1:]
+ if label == "name":
+ self.group = line.split(":")[1]
+ elif label == "filler":
+ if line.split(":")[1] == "yes":
+ self.filler = True
+ elif label == "type":
+ self.type = line.split(":")[1]
+ elif label == "mat":
+ self.mat = line.split(":")[1]
+ elif label == "pat":
+ self.pat = line.split(":")[1]
+ elif label == "het":
+ self.het = line.split(":")[1]
+ elif label == "unk":
+ self.unk = line.split(":")[1]
+ else:
+ continue
+ elif line[:3] == "Chr":
+ header_row = line.split("\t")
+ if header_row[2] == "Mb":
+ self.mb_exists = True
+ self.mb_column = 2
+ self.cm_column = 3
+ elif header_row[3] == "Mb":
+ self.mb_exists = True
+ self.mb_column = 3
+ elif header_row[2] == "cM":
+ self.cm_column = 2
+
+ if self.mb_exists:
+ self.prgy = header_row[4:]
+ else:
+ self.prgy = header_row[3:]
+ self.nprgy = len(self.prgy)
+ else:
+ if line.split("\t")[0] != this_chr:
+ if this_chr != "":
+ self.chromosomes.append(chr_ob)
+ this_chr = line.split("\t")[0]
+ chr_ob = Chr(line.split("\t")[0], self)
+ chr_ob.add_marker(line.split("\t"))
+
+class Chr(object):
+ def __init__(self, name, geno_ob):
+ self.name = name
+ self.loci = []
+ self.mb_exists = geno_ob.mb_exists
+ self.cm_column = geno_ob.cm_column
+ self.mb_column = geno_ob.mb_column
+ self.geno_ob = geno_ob
+
+ def __iter__(self):
+ return iter(self.loci)
+
+ def __getitem__(self, index):
+ return self.loci[index]
+
+ def __len__(self):
+ return len(self.loci)
+
+ def add_marker(self, marker_row):
+ self.loci.append(Locus(marker_row, self.geno_ob))
+
+class Locus(object):
+ def __init__(self, marker_row, geno_ob):
+ self.chr = marker_row[0]
+ self.name = marker_row[1]
+ self.cM = float(marker_row[geno_ob.cm_column])
+ self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None
+
+ geno_table = {
+ geno_ob.mat: -1,
+ geno_ob.pat: 1,
+ geno_ob.het: 0,
+ geno_ob.unk: "U"
+ }
+
+ self.genotype = []
+ if geno_ob.mb_exists:
+ start_pos = 4
+ else:
+ start_pos = 3
+ for allele in marker_row[start_pos:]:
+ if allele in geno_table.keys():
+ self.genotype.append(geno_table[allele])
+ else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown
+ self.genotype.append("U") \ No newline at end of file
diff --git a/wqflask/wqflask/__init__.py b/wqflask/wqflask/__init__.py
index bc8e9900..399e794d 100644
--- a/wqflask/wqflask/__init__.py
+++ b/wqflask/wqflask/__init__.py
@@ -22,3 +22,4 @@ app.jinja_env.globals.update(
)
import wqflask.views
+from wqflask.api import router \ No newline at end of file
diff --git a/wqflask/wqflask/api/__init__.py b/wqflask/wqflask/api/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/wqflask/wqflask/api/__init__.py
diff --git a/wqflask/wqflask/api/correlation.py b/wqflask/wqflask/api/correlation.py
new file mode 100644
index 00000000..66eb94ac
--- /dev/null
+++ b/wqflask/wqflask/api/correlation.py
@@ -0,0 +1,237 @@
+from __future__ import absolute_import, division, print_function
+
+import collections
+
+import scipy
+
+from MySQLdb import escape_string as escape
+
+from flask import g
+
+from base import data_set
+from base.trait import GeneralTrait, retrieve_sample_data
+
+from wqflask.correlation.show_corr_results import generate_corr_json
+from wqflask.correlation import correlation_functions
+
+from utility import webqtlUtil, helper_functions, corr_result_helpers
+from utility.benchmark import Bench
+
+import utility.logger
+logger = utility.logger.getLogger(__name__ )
+
+def do_correlation(start_vars):
+ assert('db' in start_vars)
+ assert('target_db' in start_vars)
+ assert('trait_id' in start_vars)
+
+ this_dataset = data_set.create_dataset(dataset_name = start_vars['db'])
+ target_dataset = data_set.create_dataset(dataset_name = start_vars['target_db'])
+ this_trait = GeneralTrait(dataset = this_dataset, name = start_vars['trait_id'])
+ this_trait = retrieve_sample_data(this_trait, this_dataset)
+
+ corr_params = init_corr_params(start_vars)
+
+ corr_results = calculate_results(this_trait, this_dataset, target_dataset, corr_params)
+ #corr_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0])))
+
+ final_results = []
+ for _trait_counter, trait in enumerate(corr_results.keys()[:corr_params['return_count']]):
+ if corr_params['type'] == "tissue":
+ [sample_r, num_overlap, sample_p, symbol] = corr_results[trait]
+ result_dict = {
+ "trait" : trait,
+ "sample_r" : sample_r,
+ "#_strains" : num_overlap,
+ "p_value" : sample_p,
+ "symbol" : symbol
+ }
+ elif corr_params['type'] == "literature" or corr_params['type'] == "lit":
+ [gene_id, sample_r] = corr_results[trait]
+ result_dict = {
+ "trait" : trait,
+ "sample_r" : sample_r,
+ "gene_id" : gene_id
+ }
+ else:
+ [sample_r, sample_p, num_overlap] = corr_results[trait]
+ result_dict = {
+ "trait" : trait,
+ "sample_r" : sample_r,
+ "#_strains" : num_overlap,
+ "p_value" : sample_p
+ }
+
+ final_results.append(result_dict)
+
+ # json_corr_results = generate_corr_json(final_corr_results, this_trait, this_dataset, target_dataset, for_api = True)
+
+ return final_results
+
+def calculate_results(this_trait, this_dataset, target_dataset, corr_params):
+ corr_results = {}
+
+ target_dataset.get_trait_data()
+
+ if corr_params['type'] == "tissue":
+ trait_symbol_dict = this_dataset.retrieve_genes("Symbol")
+ corr_results = do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params)
+ sorted_results = collections.OrderedDict(sorted(corr_results.items(),
+ key=lambda t: -abs(t[1][1])))
+ elif corr_params['type'] == "literature" or corr_params['type'] == "lit": #ZS: Just so a user can use either "lit" or "literature"
+ trait_geneid_dict = this_dataset.retrieve_genes("GeneId")
+ corr_results = do_literature_correlation_for_all_traits(this_trait, this_dataset, trait_geneid_dict, corr_params)
+ sorted_results = collections.OrderedDict(sorted(corr_results.items(),
+ key=lambda t: -abs(t[1][1])))
+ else:
+ for target_trait, target_vals in target_dataset.trait_data.iteritems():
+ result = get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, corr_params['type'])
+ if result is not None:
+ corr_results[target_trait] = result
+
+ sorted_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0])))
+
+ return sorted_results
+
+def do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params, tissue_dataset_id=1):
+ #Gets tissue expression values for the primary trait
+ primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list = [this_trait.symbol])
+
+ if this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
+ primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower()]
+
+ corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list=trait_symbol_dict.values())
+
+ tissue_corr_data = {}
+ for trait, symbol in trait_symbol_dict.iteritems():
+ if symbol and symbol.lower() in corr_result_tissue_vals_dict:
+ this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()]
+
+ result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values,
+ this_trait_tissue_values,
+ corr_params['method'])
+
+ tissue_corr_data[trait] = [result[0], result[1], result[2], symbol]
+
+ return tissue_corr_data
+
+def do_literature_correlation_for_all_traits(this_trait, target_dataset, trait_geneid_dict, corr_params):
+ input_trait_mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), this_trait.geneid)
+
+ lit_corr_data = {}
+ for trait, gene_id in trait_geneid_dict.iteritems():
+ mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), gene_id)
+
+ if mouse_gene_id and str(mouse_gene_id).find(";") == -1:
+ result = g.db.execute(
+ """SELECT value
+ FROM LCorrRamin3
+ WHERE GeneId1='%s' and
+ GeneId2='%s'
+ """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id))
+ ).fetchone()
+ if not result:
+ result = g.db.execute("""SELECT value
+ FROM LCorrRamin3
+ WHERE GeneId2='%s' and
+ GeneId1='%s'
+ """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id))
+ ).fetchone()
+ if result:
+ lit_corr = result.value
+ lit_corr_data[trait] = [gene_id, lit_corr]
+ else:
+ lit_corr_data[trait] = [gene_id, 0]
+ else:
+ lit_corr_data[trait] = [gene_id, 0]
+
+ return lit_corr_data
+
+def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, type):
+ """
+ Calculates the sample r (or rho) and p-value
+
+ Given a primary trait and a target trait's sample values,
+ calculates either the pearson r or spearman rho and the p-value
+ using the corresponding scipy functions.
+ """
+
+ this_trait_vals = []
+ shared_target_vals = []
+ for i, sample in enumerate(target_dataset.group.samplelist):
+ if sample in this_trait.data:
+ this_sample_value = this_trait.data[sample].value
+ target_sample_value = target_vals[i]
+ this_trait_vals.append(this_sample_value)
+ shared_target_vals.append(target_sample_value)
+
+ this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, shared_target_vals)
+
+ if type == 'pearson':
+ sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, shared_target_vals)
+ else:
+ sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, shared_target_vals)
+
+ if num_overlap > 5:
+ if scipy.isnan(sample_r):
+ return None
+ else:
+ return [sample_r, sample_p, num_overlap]
+
+def convert_to_mouse_gene_id(species=None, gene_id=None):
+ """If the species is rat or human, translate the gene_id to the mouse geneid
+
+ If there is no input gene_id or there's no corresponding mouse gene_id, return None
+
+ """
+ if not gene_id:
+ return None
+
+ mouse_gene_id = None
+
+ if species == 'mouse':
+ mouse_gene_id = gene_id
+
+ elif species == 'rat':
+
+ query = """SELECT mouse
+ FROM GeneIDXRef
+ WHERE rat='%s'""" % escape(gene_id)
+
+ result = g.db.execute(query).fetchone()
+ if result != None:
+ mouse_gene_id = result.mouse
+
+ elif species == 'human':
+
+ query = """SELECT mouse
+ FROM GeneIDXRef
+ WHERE human='%s'""" % escape(gene_id)
+
+ result = g.db.execute(query).fetchone()
+ if result != None:
+ mouse_gene_id = result.mouse
+
+ return mouse_gene_id
+
+def init_corr_params(start_vars):
+ method = "pearson"
+ if 'method' in start_vars:
+ method = start_vars['method']
+
+ type = "sample"
+ if 'type' in start_vars:
+ type = start_vars['type']
+
+ return_count = 500
+ if 'return_count' in start_vars:
+ assert(start_vars['return_count'].isdigit())
+ return_count = int(start_vars['return_count'])
+
+ corr_params = {
+ 'method' : method,
+ 'type' : type,
+ 'return_count' : return_count
+ }
+
+ return corr_params \ No newline at end of file
diff --git a/wqflask/wqflask/api/mapping.py b/wqflask/wqflask/api/mapping.py
new file mode 100644
index 00000000..83c61796
--- /dev/null
+++ b/wqflask/wqflask/api/mapping.py
@@ -0,0 +1,122 @@
+from __future__ import absolute_import, division, print_function
+
+import string
+
+from base import data_set
+from base import webqtlConfig
+from base.trait import GeneralTrait, retrieve_sample_data
+
+from utility import helper_functions
+from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping
+
+import utility.logger
+logger = utility.logger.getLogger(__name__ )
+
+def do_mapping_for_api(start_vars):
+ assert('db' in start_vars)
+ assert('trait_id' in start_vars)
+
+ dataset = data_set.create_dataset(dataset_name = start_vars['db'])
+ dataset.group.get_markers()
+ this_trait = GeneralTrait(dataset = dataset, name = start_vars['trait_id'])
+ this_trait = retrieve_sample_data(this_trait, dataset)
+
+ samples = []
+ vals = []
+
+ for sample in dataset.group.samplelist:
+ in_trait_data = False
+ for item in this_trait.data:
+ if this_trait.data[item].name == sample:
+ value = str(this_trait.data[item].value)
+ samples.append(item)
+ vals.append(value)
+ in_trait_data = True
+ break
+ if not in_trait_data:
+ vals.append("x")
+
+ mapping_params = initialize_parameters(start_vars, dataset, this_trait)
+
+ covariates = "" #ZS: It seems to take an empty string as default. This should probably be changed.
+
+ if mapping_params['mapping_method'] == "gemma":
+ header_row = ["name", "chr", "Mb", "lod_score", "p_value"]
+ if mapping_params['use_loco'] == "True": #ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api
+ result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0]
+ else:
+ result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])
+ elif mapping_params['mapping_method'] == "rqtl":
+ header_row = ["name", "chr", "Mb", "lod_score"]
+ if mapping_params['num_perm'] > 0:
+ _sperm_output, _suggestive, _significant, result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'],
+ mapping_params['perm_check'], mapping_params['num_perm'],
+ mapping_params['do_control'], mapping_params['control_marker'],
+ mapping_params['manhattan_plot'], mapping_params['pair_scan'])
+ else:
+ result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'],
+ mapping_params['perm_check'], mapping_params['num_perm'],
+ mapping_params['do_control'], mapping_params['control_marker'],
+ mapping_params['manhattan_plot'], mapping_params['pair_scan'])
+
+ output_rows = []
+ output_rows.append(header_row)
+ for marker in result_markers:
+ this_row = [marker[header] for header in header_row]
+ output_rows.append(this_row)
+
+ return output_rows
+
+
+def initialize_parameters(start_vars, dataset, this_trait):
+ mapping_params = {}
+ mapping_params['mapping_method'] = "gemma"
+ if 'method' in start_vars:
+ mapping_params['mapping_method'] = start_vars['method']
+
+ if mapping_params['mapping_method'] == "rqtl":
+ mapping_params['rqtl_method'] = "hk"
+ mapping_params['rqtl_model'] = "normal"
+ mapping_params['do_control'] = False
+ mapping_params['control_marker'] = ""
+ mapping_params['manhattan_plot'] = True
+ mapping_params['pair_scan'] = False
+ if 'rqtl_method' in start_vars:
+ mapping_params['rqtl_method'] = start_vars['rqtl_method']
+ if 'rqtl_model' in start_vars:
+ mapping_params['rqtl_model'] = start_vars['rqtl_model']
+ if 'control_marker' in start_vars:
+ mapping_params['control_marker'] = start_vars['control_marker']
+ mapping_params['do_control'] = True
+ if 'pair_scan' in start_vars:
+ if start_vars['pair_scan'].lower() == "true":
+ mapping_params['pair_scan'] = True
+
+ if 'interval_mapping' in start_vars:
+ if start_vars['interval_mapping'].lower() == "true":
+ mapping_params['manhattan_plot'] = False
+ elif 'manhattan_plot' in start_vars:
+ if start_vars['manhattan_plot'].lower() != "true":
+ mapping_params['manhattan_plot'] = False
+
+ mapping_params['maf'] = 0.01
+ if 'maf' in start_vars:
+ mapping_params['maf'] = start_vars['maf'] # Minor allele frequency
+
+ mapping_params['use_loco'] = False
+ if 'use_loco' in start_vars:
+ if start_vars['use_loco'].lower() != "false":
+ mapping_params['use_loco'] = start_vars['use_loco']
+
+ mapping_params['num_perm'] = 0
+ mapping_params['perm_check'] = False
+ if 'num_perm' in start_vars:
+ try:
+ mapping_params['num_perm'] = int(start_vars['num_perm'])
+ mapping_params['perm_check'] = "ON"
+ except:
+ mapping_params['perm_check'] = False
+
+ return mapping_params
+
+
diff --git a/wqflask/wqflask/api/router.py b/wqflask/wqflask/api/router.py
new file mode 100644
index 00000000..845873a0
--- /dev/null
+++ b/wqflask/wqflask/api/router.py
@@ -0,0 +1,759 @@
+# GN2 API
+
+from __future__ import absolute_import, division, print_function
+
+import os, io, csv, json, datetime
+
+import StringIO
+
+import flask
+from flask import g, Response, request, make_response, render_template, send_from_directory, jsonify, redirect
+import sqlalchemy
+from wqflask import app
+
+from wqflask.api import correlation, mapping
+
+from utility.tools import flat_files
+
+import utility.logger
+logger = utility.logger.getLogger(__name__ )
+
+version = "pre1"
+
+@app.route("/api/v_{}/".format(version))
+def hello_world():
+ return flask.jsonify({'hello':'world'})
+
+@app.route("/api/v_{}/species".format(version))
+def get_species_list():
+ results = g.db.execute("SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species;")
+ the_species = results.fetchall()
+ species_list = []
+ for species in the_species:
+ species_dict = {
+ 'Id' : species[0],
+ 'Name' : species[1],
+ 'FullName' : species[2],
+ 'TaxonomyId' : species[3]
+ }
+ species_list.append(species_dict)
+
+ return flask.jsonify(species_list)
+
+@app.route("/api/v_{}/species/<path:species_name>".format(version))
+@app.route("/api/v_{}/species/<path:species_name>.<path:file_format>".format(version))
+def get_species_info(species_name, file_format = "json"):
+ results = g.db.execute("""SELECT SpeciesId, Name, FullName, TaxonomyId
+ FROM Species
+ WHERE (Name='{0}' OR FullName='{0}' OR SpeciesName='{0}');""".format(species_name))
+
+ the_species = results.fetchone()
+ species_dict = {
+ 'Id' : the_species[0],
+ 'Name' : the_species[1],
+ 'FullName' : the_species[2],
+ 'TaxonomyId' : the_species[3]
+ }
+
+ return flask.jsonify(species_dict)
+
+@app.route("/api/v_{}/groups".format(version))
+@app.route("/api/v_{}/<path:species_name>/groups".format(version))
+def get_groups_list(species_name=None):
+ if species_name:
+ results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName,
+ InbredSet.Name, InbredSet.FullName, InbredSet.public,
+ InbredSet.MappingMethodId, InbredSet.GeneticType
+ FROM InbredSet, Species
+ WHERE InbredSet.SpeciesId = Species.Id AND
+ (Species.Name = '{0}' OR
+ Species.FullName='{0}' OR
+ Species.SpeciesName='{0}');""".format(species_name))
+ else:
+ results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName,
+ InbredSet.Name, InbredSet.FullName, InbredSet.public,
+ InbredSet.MappingMethodId, InbredSet.GeneticType
+ FROM InbredSet;""")
+
+ the_groups = results.fetchall()
+ if the_groups:
+ groups_list = []
+ for group in the_groups:
+ group_dict = {
+ 'Id' : group[0],
+ 'SpeciesId' : group[1],
+ 'DisplayName' : group[2],
+ 'Name' : group[3],
+ 'FullName' : group[4],
+ 'public' : group[5],
+ 'MappingMethodId' : group[6],
+ 'GeneticType' : group[7]
+ }
+ groups_list.append(group_dict)
+
+ return flask.jsonify(groups_list)
+ else:
+ return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+
+@app.route("/api/v_{}/group/<path:group_name>".format(version))
+@app.route("/api/v_{}/group/<path:group_name>.<path:file_format>".format(version))
+@app.route("/api/v_{}/group/<path:species_name>/<path:group_name>".format(version))
+@app.route("/api/v_{}/group/<path:species_name>/<path:group_name>.<path:file_format>".format(version))
+def get_group_info(group_name, species_name = None, file_format = "json"):
+ if species_name:
+ results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName,
+ InbredSet.Name, InbredSet.FullName, InbredSet.public,
+ InbredSet.MappingMethodId, InbredSet.GeneticType
+ FROM InbredSet, Species
+ WHERE InbredSet.SpeciesId = Species.Id AND
+ (InbredSet.InbredSetName = '{0}' OR
+ InbredSet.Name = '{0}' OR
+ InbredSet.FullName = '{0}') AND
+ (Species.Name = '{1}' OR
+ Species.FullName='{1}' OR
+ Species.SpeciesName='{1}');""".format(group_name, species_name))
+ else:
+ results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName,
+ InbredSet.Name, InbredSet.FullName, InbredSet.public,
+ InbredSet.MappingMethodId, InbredSet.GeneticType
+ FROM InbredSet
+ WHERE (InbredSet.InbredSetName = '{0}' OR
+ InbredSet.Name = '{0}' OR
+ InbredSet.FullName = '{0}');""".format(group_name))
+
+ group = results.fetchone()
+ if group:
+ group_dict = {
+ 'Id' : group[0],
+ 'SpeciesId' : group[1],
+ 'DisplayName' : group[2],
+ 'Name' : group[3],
+ 'FullName' : group[4],
+ 'public' : group[5],
+ 'MappingMethodId' : group[6],
+ 'GeneticType' : group[7]
+ }
+
+ return flask.jsonify(group_dict)
+ else:
+ return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+
+@app.route("/api/v_{}/datasets/<path:group_name>".format(version))
+@app.route("/api/v_{}/datasets/<path:species_name>/<path:group_name>".format(version))
+def get_datasets_for_group(group_name, species_name=None):
+ if species_name:
+ results = g.db.execute("""
+ SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, ProbeSetFreeze.AvgID,
+ ProbeSetFreeze.Name, ProbeSetFreeze.Name2, ProbeSetFreeze.FullName,
+ ProbeSetFreeze.ShortName, ProbeSetFreeze.CreateTime, ProbeSetFreeze.public,
+ ProbeSetFreeze.confidentiality, ProbeSetFreeze.DataScale
+ FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Species
+ WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
+ ProbeFreeze.InbredSetId = InbredSet.Id AND
+ (InbredSet.Name = '{0}' OR InbredSet.InbredSetName = '{0}' OR InbredSet.FullName = '{0}') AND
+ InbredSet.SpeciesId = Species.Id AND
+ (Species.SpeciesName = '{1}' OR Species.MenuName = '{1}' OR Species.FullName = '{1}');
+ """.format(group_name, species_name))
+ else:
+ results = g.db.execute("""
+ SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, ProbeSetFreeze.AvgID,
+ ProbeSetFreeze.Name, ProbeSetFreeze.Name2, ProbeSetFreeze.FullName,
+ ProbeSetFreeze.ShortName, ProbeSetFreeze.CreateTime, ProbeSetFreeze.public,
+ ProbeSetFreeze.confidentiality, ProbeSetFreeze.DataScale
+ FROM ProbeSetFreeze, ProbeFreeze, InbredSet
+ WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
+ ProbeFreeze.InbredSetId = InbredSet.Id AND
+ (InbredSet.Name = '{0}' OR InbredSet.InbredSetName = '{0}' OR InbredSet.FullName = '{0}');
+ """.format(group_name))
+
+ the_datasets = results.fetchall()
+
+ if the_datasets:
+ datasets_list = []
+ for dataset in the_datasets:
+ dataset_dict = {
+ 'Id' : dataset[0],
+ 'ProbeFreezeId' : dataset[1],
+ 'AvgID' : dataset[2],
+ 'Short_Abbreviation' : dataset[3],
+ 'Long_Abbreviation' : dataset[4],
+ 'FullName' : dataset[5],
+ 'ShortName' : dataset[6],
+ 'CreateTime' : dataset[7],
+ 'public' : dataset[8],
+ 'confidentiality' : dataset[9],
+ 'DataScale' : dataset[10]
+ }
+ datasets_list.append(dataset_dict)
+
+ return flask.jsonify(datasets_list)
+ else:
+ return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+
+@app.route("/api/v_{}/dataset/<path:dataset_name>".format(version))
+@app.route("/api/v_{}/dataset/<path:dataset_name>.<path:file_format>".format(version))
+@app.route("/api/v_{}/dataset/<path:group_name>/<path:dataset_name>".format(version))
+@app.route("/api/v_{}/dataset/<path:group_name>/<path:dataset_name>.<path:file_format>".format(version))
+def get_dataset_info(dataset_name, group_name = None, file_format="json"):
+ #ZS: First get ProbeSet (mRNA expression) datasets and then get Phenotype datasets
+
+ datasets_list = [] #ZS: I figure I might as well return a list if there are multiple matches, though I don't know if this will actually happen in practice
+
+ probeset_query = """
+ SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName,
+ ProbeSetFreeze.ShortName, ProbeSetFreeze.DataScale, ProbeFreeze.TissueId,
+ Tissue.Name, ProbeSetFreeze.public, ProbeSetFreeze.confidentiality
+ FROM ProbeSetFreeze, ProbeFreeze, Tissue
+ """
+
+ where_statement = """
+ WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
+ ProbeFreeze.TissueId = Tissue.Id AND
+ """
+ if dataset_name.isdigit():
+ where_statement += """
+ ProbeSetFreeze.Id = '{}'
+ """.format(dataset_name)
+ else:
+ where_statement += """
+ (ProbeSetFreeze.Name = '{0}' OR ProbeSetFreeze.Name2 = '{0}' OR
+ ProbeSetFreeze.FullName = '{0}' OR ProbeSetFreeze.ShortName = '{0}')
+ """.format(dataset_name)
+
+ probeset_query += where_statement
+ probeset_results = g.db.execute(probeset_query)
+ dataset = probeset_results.fetchone()
+
+ if dataset:
+ dataset_dict = {
+ 'dataset_type' : "mRNA expression",
+ 'id' : dataset[0],
+ 'name' : dataset[1],
+ 'full_name' : dataset[2],
+ 'short_name' : dataset[3],
+ 'data_scale' : dataset[4],
+ 'tissue_id' : dataset[5],
+ 'tissue' : dataset[6],
+ 'public' : dataset[7],
+ 'confidential' : dataset[8]
+ }
+
+ datasets_list.append(dataset_dict)
+
+ if group_name:
+ pheno_query = """
+ SELECT PublishXRef.Id, Phenotype.Post_publication_abbreviation, Phenotype.Post_publication_description,
+ Phenotype.Pre_publication_abbreviation, Phenotype.Pre_publication_description,
+ Publication.PubMed_ID, Publication.Title, Publication.Year
+ FROM PublishXRef, Phenotype, Publication, InbredSet
+ WHERE PublishXRef.InbredSetId = InbredSet.Id AND
+ PublishXRef.PhenotypeId = Phenotype.Id AND
+ PublishXRef.PublicationId = Publication.Id AND
+ InbredSet.Name = '{0}' AND PublishXRef.Id = '{1}'
+ """.format(group_name, dataset_name)
+
+ logger.debug("QUERY:", pheno_query)
+
+ pheno_results = g.db.execute(pheno_query)
+ dataset = pheno_results.fetchone()
+
+ if dataset:
+ if dataset[5]:
+ dataset_dict = {
+ 'dataset_type' : "phenotype",
+ 'id' : dataset[0],
+ 'name' : dataset[1],
+ 'description' : dataset[2],
+ 'pubmed_id' : dataset[5],
+ 'title' : dataset[6],
+ 'year' : dataset[7]
+ }
+ elif dataset[4]:
+ dataset_dict = {
+ 'dataset_type' : "phenotype",
+ 'id' : dataset[0],
+ 'name' : dataset[3],
+ 'description' : dataset[4]
+ }
+ else:
+ dataset_dict = {
+ 'dataset_type' : "phenotype",
+ 'id' : dataset[0]
+ }
+
+ datasets_list.append(dataset_dict)
+
+ if len(datasets_list) > 1:
+ return flask.jsonify(datasets_list)
+ elif len(datasets_list) == 1:
+ return flask.jsonify(dataset_dict)
+ else:
+ return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+
+
+@app.route("/api/v_{}/sample_data/<path:dataset_name>".format(version))
+@app.route("/api/v_{}/sample_data/<path:dataset_name>.<path:file_format>".format(version))
+def all_sample_data(dataset_name, file_format = "csv"):
+ trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name)
+
+ if len(trait_ids) > 0:
+ sample_list = get_samplelist(dataset_name)
+
+ if data_type == "ProbeSet":
+ query = """
+ SELECT
+ Strain.Name, Strain.Name2, ProbeSetData.value, ProbeSetData.Id, ProbeSetSE.error
+ FROM
+ (ProbeSetData, Strain, ProbeSetXRef)
+ LEFT JOIN ProbeSetSE ON
+ (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
+ WHERE
+ ProbeSetXRef.ProbeSetFreezeId = '{0}' AND
+ ProbeSetXRef.ProbeSetId = '{1}' AND
+ ProbeSetXRef.DataId = ProbeSetData.Id AND
+ ProbeSetData.StrainId = Strain.Id
+ ORDER BY
+ Strain.Name
+ """
+ elif data_type == "Geno":
+ query = """
+ SELECT
+ Strain.Name, Strain.Name2, GenoData.value, GenoData.Id, GenoSE.error
+ FROM
+ (GenoData, Strain, GenoXRef)
+ LEFT JOIN GenoSE ON
+ (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
+ WHERE
+ GenoXRef.GenoFreezeId = '{0}' AND
+ GenoXRef.GenoId = '{1}' AND
+ GenoXRef.DataId = GenoData.Id AND
+ GenoData.StrainId = Strain.Id
+ ORDER BY
+ Strain.Name
+ """
+ else:
+ query = """
+ SELECT
+ Strain.Name, Strain.Name2, PublishData.value, PublishData.Id, PublishSE.error, NStrain.count
+ FROM
+ (PublishData, Strain, PublishXRef)
+ LEFT JOIN PublishSE ON
+ (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
+ LEFT JOIN NStrain ON
+ (NStrain.DataId = PublishData.Id AND
+ NStrain.StrainId = PublishData.StrainId)
+ WHERE
+ PublishXRef.InbredSetId = '{0}' AND
+ PublishXRef.PhenotypeId = '{1}' AND
+ PublishData.Id = PublishXRef.DataId AND
+ PublishData.StrainId = Strain.Id
+ ORDER BY
+ Strain.Name
+ """
+
+ if file_format == "csv":
+ filename = dataset_name + "_sample_data.csv"
+
+ results_list = []
+ header_list = []
+ header_list.append("Trait ID")
+ header_list += sample_list
+ results_list.append(header_list)
+ for i, trait_id in enumerate(trait_ids):
+ line_list = []
+ line_list.append(str(trait_names[i]))
+ final_query = query.format(dataset_id, trait_id)
+ results = g.db.execute(final_query).fetchall()
+ results_dict = {}
+ for item in results:
+ results_dict[item[0]] = item[2]
+ for sample in sample_list:
+ if sample in results_dict:
+ line_list.append(results_dict[sample])
+ else:
+ line_list.append("x")
+ results_list.append(line_list)
+
+ si = StringIO.StringIO()
+ csv_writer = csv.writer(si)
+ csv_writer.writerows(results_list)
+ output = make_response(si.getvalue())
+ output.headers["Content-Disposition"] = "attachment; filename=" + filename
+ output.headers["Content-type"] = "text/csv"
+ return output
+ else:
+ return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+ else:
+ return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+
+@app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>".format(version))
+@app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version))
+def trait_sample_data(dataset_name, trait_name, file_format = "json"):
+ probeset_query = """
+ SELECT
+ Strain.Name, Strain.Name2, ProbeSetData.value, ProbeSetData.Id, ProbeSetSE.error
+ FROM
+ (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
+ LEFT JOIN ProbeSetSE ON
+ (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
+ WHERE
+ ProbeSet.Name = '{0}' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+ ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+ ProbeSetFreeze.Name = '{1}' AND
+ ProbeSetXRef.DataId = ProbeSetData.Id AND
+ ProbeSetData.StrainId = Strain.Id
+ ORDER BY
+ Strain.Name
+ """.format(trait_name, dataset_name)
+
+ probeset_results = g.db.execute(probeset_query)
+
+ sample_data = probeset_results.fetchall()
+ if len(sample_data) > 0:
+ sample_list = []
+ for sample in sample_data:
+ sample_dict = {
+ 'sample_name' : sample[0],
+ 'sample_name_2' : sample[1],
+ 'value' : sample[2],
+ 'data_id' : sample[3],
+ }
+ if sample[4]:
+ sample_dict['se'] = sample[4]
+ sample_list.append(sample_dict)
+
+ return flask.jsonify(sample_list)
+ else:
+ if not dataset_name.isdigit():
+ group_id = get_group_id(dataset_name)
+ if group_id:
+ dataset_or_group = group_id
+ else:
+ dataset_or_group = dataset_name
+ else:
+ dataset_or_group = dataset_name
+
+ pheno_query = """
+ SELECT
+ Strain.Name, Strain.Name2, PublishData.value, PublishData.Id, PublishSE.error, NStrain.count
+ FROM
+ (PublishData, Strain, PublishXRef, PublishFreeze)
+ LEFT JOIN PublishSE ON
+ (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
+ LEFT JOIN NStrain ON
+ (NStrain.DataId = PublishData.Id AND
+ NStrain.StrainId = PublishData.StrainId)
+ WHERE
+ PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+ PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = '{1}' AND
+ (PublishFreeze.Id = '{0}' OR PublishFreeze.Name = '{0}' OR
+ PublishFreeze.ShortName = '{0}' OR PublishXRef.InbredSetId = '{0}') AND
+ PublishData.StrainId = Strain.Id
+ ORDER BY
+ Strain.Name
+ """.format(dataset_or_group, trait_name)
+
+ pheno_results = g.db.execute(pheno_query)
+
+ sample_data = pheno_results.fetchall()
+ if len(sample_data) > 0:
+ sample_list = []
+ for sample in sample_data:
+ sample_dict = {
+ 'sample_name' : sample[0],
+ 'sample_name_2' : sample[1],
+ 'value' : sample[2],
+ 'data_id' : sample[3]
+ }
+ if sample[4]:
+ sample_dict['se'] = sample[4]
+ if sample[5]:
+ sample_dict['n_cases'] = sample[5]
+ sample_list.append(sample_dict)
+
+ return flask.jsonify(sample_list)
+ else:
+ return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+
+@app.route("/api/v_{}/trait/<path:dataset_name>/<path:trait_name>".format(version))
+@app.route("/api/v_{}/trait/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version))
+@app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>".format(version))
+@app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version))
+def get_trait_info(dataset_name, trait_name, file_format = "json"):
+ probeset_query = """
+ SELECT
+ ProbeSet.Id, ProbeSet.Name, ProbeSet.Symbol, ProbeSet.description, ProbeSet.Chr, ProbeSet.Mb, ProbeSet.alias,
+ ProbeSetXRef.mean, ProbeSetXRef.se, ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.additive
+ FROM
+ ProbeSet, ProbeSetXRef, ProbeSetFreeze
+ WHERE
+ ProbeSet.Name = '{0}' AND
+ ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+ ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+ ProbeSetFreeze.Name = '{1}'
+ """.format(trait_name, dataset_name)
+
+ probeset_results = g.db.execute(probeset_query)
+
+ trait_info = probeset_results.fetchone()
+ if trait_info:
+ trait_dict = {
+ 'id' : trait_info[0],
+ 'name' : trait_info[1],
+ 'symbol' : trait_info[2],
+ 'description' : trait_info[3],
+ 'chr' : trait_info[4],
+ 'mb' : trait_info[5],
+ 'alias' :trait_info[6],
+ 'mean' : trait_info[7],
+ 'se' : trait_info[8],
+ 'locus' : trait_info[9],
+ 'lrs' : trait_info[10],
+ 'p_value' : trait_info[11],
+ 'additive' : trait_info[12]
+ }
+
+ return flask.jsonify(trait_dict)
+ else:
+ if "Publish" in dataset_name: #ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish"
+ dataset_name = dataset_name.replace("Publish", "")
+
+ group_id = get_group_id(dataset_name)
+ pheno_query = """
+ SELECT
+ PublishXRef.PhenotypeId, PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive
+ FROM
+ PublishXRef
+ WHERE
+ PublishXRef.Id = '{0}' AND
+ PublishXRef.InbredSetId = '{1}'
+ """.format(trait_name, group_id)
+
+ logger.debug("QUERY:", pheno_query)
+
+ pheno_results = g.db.execute(pheno_query)
+
+ trait_info = pheno_results.fetchone()
+ if trait_info:
+ trait_dict = {
+ 'id' : trait_info[0],
+ 'locus' : trait_info[1],
+ 'lrs' : trait_info[2],
+ 'additive' : trait_info[3]
+ }
+
+ return flask.jsonify(trait_dict)
+ else:
+ return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+
+@app.route("/api/v_{}/correlation".format(version), methods=('GET',))
+def get_corr_results():
+ results = correlation.do_correlation(request.args)
+
+ if len(results) > 0:
+ return flask.jsonify(results) #ZS: I think flask.jsonify expects a dict/list instead of JSON
+ else:
+ return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+
+@app.route("/api/v_{}/mapping".format(version), methods=('GET',))
+def get_mapping_results():
+ results = mapping.do_mapping_for_api(request.args)
+
+ if len(results) > 0:
+ filename = "mapping_" + datetime.datetime.utcnow().strftime('%b_%d_%Y_%I:%M%p') + ".csv"
+
+ si = StringIO.StringIO()
+ csv_writer = csv.writer(si)
+ csv_writer.writerows(results)
+ output = make_response(si.getvalue())
+ output.headers["Content-Disposition"] = "attachment; filename=" + filename
+ output.headers["Content-type"] = "text/csv"
+
+ return output
+ else:
+ return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+
+@app.route("/api/v_{}/genotypes/<path:group_name>".format(version))
+@app.route("/api/v_{}/genotypes/<path:group_name>.<path:file_format>".format(version))
+def get_genotypes(group_name, file_format="csv"):
+ si = StringIO.StringIO()
+ if file_format == "csv" or file_format == "geno":
+ filename = group_name + ".geno"
+
+ if os.path.isfile('{0}/{1}.geno'.format(flat_files('genotype'), group_name)):
+ output_lines = []
+ with open('{0}/{1}.geno'.format(flat_files('genotype'), group_name)) as genofile:
+ for line in genofile:
+ if line[0] == "#" or line[0] == "@":
+ output_lines.append([line.strip()])
+ else:
+ output_lines.append(line.split())
+
+ csv_writer = csv.writer(si, delimiter = '\t', escapechar = "\\", quoting = csv.QUOTE_NONE)
+ else:
+ return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+ else:
+ filename = group_name + ".bimbam"
+
+ if os.path.isfile('{0}/{1}.geno'.format(flat_files('genotype'), group_name)):
+ output_lines = []
+ with open('{0}/{1}_geno.txt'.format(flat_files('genotype/bimbam'), group_name)) as genofile:
+ for line in genofile:
+ output_lines.append([line.strip() for line in line.split(",")])
+
+ csv_writer = csv.writer(si, delimiter = ',')
+ else:
+ return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+
+ csv_writer.writerows(output_lines)
+ output = make_response(si.getvalue())
+ output.headers["Content-Disposition"] = "attachment; filename=" + filename
+ output.headers["Content-type"] = "text/csv"
+
+ return output
+
+@app.route("/api/v_{}/traits/<path:dataset_name>".format(version), methods=('GET',))
+@app.route("/api/v_{}/traits/<path:dataset_name>.<path:file_format>".format(version), methods=('GET',))
+def get_traits(dataset_name, file_format = "json"):
+ #ZS: Need to check about the "start" and "stop" stuff since it seems to just limit the number of results to stop - start + 1 in Pjotr's elixir code
+
+ NotImplemented
+
+def return_error(code, source, title, details):
+ json_ob = {"errors": [
+ {
+ "status": code,
+ "source": { "pointer": source },
+ "title" : title,
+ "detail": details
+ }
+ ]}
+
+ return flask.jsonify(json_ob)
+
+def get_dataset_trait_ids(dataset_name):
+ if "Geno" in dataset_name:
+ data_type = "Geno" #ZS: Need to pass back the dataset type
+ query = """
+ SELECT
+ GenoXRef.GenoId, Geno.Name, GenoXRef.GenoFreezeId
+ FROM
+ Geno, GenoXRef, GenoFreeze
+ WHERE
+ Geno.Id = GenoXRef.GenoId AND
+ GenoXRef.GenoFreezeId = GenoFreeze.Id AND
+ GenoFreeze.Name = '{0}'
+ """.format(dataset_name)
+
+ results = g.db.execute(query).fetchall()
+
+ trait_ids = [result[0] for result in results]
+ trait_names = [result[1] for result in results]
+ dataset_id = results[0][2]
+ return trait_ids, trait_names, data_type, dataset_id
+
+ elif "Publish" in dataset_name:
+ data_type = "Publish"
+ dataset_name = dataset_name.replace("Publish", "")
+ dataset_id = get_group_id(dataset_name)
+
+ query = """
+ SELECT
+ PublishXRef.PhenotypeId
+ FROM
+ PublishXRef
+ WHERE
+ PublishXRef.InbredSetId = '{0}'
+ """.format(dataset_id)
+
+ results = g.db.execute(query).fetchall()
+
+ trait_ids = [result[0] for result in results]
+ trait_names = trait_ids
+ return trait_ids, trait_names, data_type, dataset_id
+
+ else:
+ data_type = "ProbeSet"
+ query = """
+ SELECT
+ ProbeSetXRef.ProbeSetId, ProbeSet.Name, ProbeSetXRef.ProbeSetFreezeId
+ FROM
+ ProbeSet, ProbeSetXRef, ProbeSetFreeze
+ WHERE
+ ProbeSet.Id = ProbeSetXRef.ProbeSetId AND
+ ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+ ProbeSetFreeze.Name = '{0}'
+ """.format(dataset_name)
+
+ results = g.db.execute(query).fetchall()
+
+ trait_ids = [result[0] for result in results]
+ trait_names = [result[1] for result in results]
+ dataset_id = results[0][2]
+ return trait_ids, trait_names, data_type, dataset_id
+
+def get_samplelist(dataset_name):
+ group_id = get_group_id_from_dataset(dataset_name)
+
+ query = """
+ SELECT Strain.Name
+ FROM Strain, StrainXRef
+ WHERE StrainXRef.StrainId = Strain.Id AND
+ StrainXRef.InbredSetId = {}
+ """.format(group_id)
+
+ results = g.db.execute(query).fetchall()
+
+ samplelist = [result[0] for result in results]
+
+ return samplelist
+
+def get_group_id_from_dataset(dataset_name):
+ if "Publish" in dataset_name:
+ query = """
+ SELECT
+ InbredSet.Id
+ FROM
+ InbredSet, PublishFreeze
+ WHERE
+ PublishFreeze.InbredSetId = InbredSet.Id AND
+ PublishFreeze.Name = "{}"
+ """.format(dataset_name)
+ elif "Geno" in dataset_name:
+ query = """
+ SELECT
+ InbredSet.Id
+ FROM
+ InbredSet, GenoFreeze
+ WHERE
+ GenoFreeze.InbredSetId = InbredSet.Id AND
+ GenoFreeze.Name = "{}"
+ """.format(dataset_name)
+ else:
+ query = """
+ SELECT
+ InbredSet.Id
+ FROM
+ InbredSet, ProbeSetFreeze, ProbeFreeze
+ WHERE
+ ProbeFreeze.InbredSetId = InbredSet.Id AND
+ ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND
+ ProbeSetFreeze.Name = "{}"
+ """.format(dataset_name)
+
+ result = g.db.execute(query).fetchone()
+
+ return result[0]
+
+def get_group_id(group_name):
+ query = """
+ SELECT InbredSet.Id
+ FROM InbredSet
+ WHERE InbredSet.Name = '{}'
+ """.format(group_name)
+
+ group_id = g.db.execute(query).fetchone()
+ if group_id:
+ return group_id[0]
+ else:
+ return None \ No newline at end of file