aboutsummaryrefslogtreecommitdiff
path: root/wqflask/wqflask/api/mapping.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/wqflask/api/mapping.py')
-rw-r--r--wqflask/wqflask/api/mapping.py105
1 files changed, 75 insertions, 30 deletions
diff --git a/wqflask/wqflask/api/mapping.py b/wqflask/wqflask/api/mapping.py
index d1cc368e..5eacc83a 100644
--- a/wqflask/wqflask/api/mapping.py
+++ b/wqflask/wqflask/api/mapping.py
@@ -1,19 +1,12 @@
-import string
-
from base import data_set
-from base import webqtlConfig
from base.trait import create_trait, retrieve_sample_data
-from utility import helper_functions
-from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping
-
-import utility.logger
-logger = utility.logger.getLogger(__name__)
-
+from wqflask.marker_regression import gemma_mapping, rqtl_mapping
+from wqflask.show_trait.show_trait import normf
def do_mapping_for_api(start_vars):
- assert('db' in start_vars)
- assert('trait_id' in start_vars)
+ if ('db' not in start_vars) or ("trait_id" not in start_vars):
+ raise ValueError("Mapping: db and trait_id are not in start_vars")
dataset = data_set.create_dataset(dataset_name=start_vars['db'])
dataset.group.get_markers()
@@ -23,33 +16,67 @@ def do_mapping_for_api(start_vars):
samples = []
vals = []
- for sample in dataset.group.samplelist:
- in_trait_data = False
- for item in this_trait.data:
- if this_trait.data[item].name == sample:
- value = str(this_trait.data[item].value)
- samples.append(item)
- vals.append(value)
- in_trait_data = True
- break
- if not in_trait_data:
- vals.append("x")
-
mapping_params = initialize_parameters(start_vars, dataset, this_trait)
- # ZS: It seems to take an empty string as default. This should probably be changed.
+ genofile_samplelist = []
+ if mapping_params.get('genofile'):
+ dataset.group.genofile = mapping_params['genofile']
+ genofile_samplelist = get_genofile_samplelist(dataset)
+
+ if (len(genofile_samplelist) > 0):
+ samplelist = genofile_samplelist
+ for sample in samplelist:
+ in_trait_data = False
+ for item in this_trait.data:
+ if this_trait.data[item].name == sample:
+ value = str(this_trait.data[item].value)
+ samples.append(item)
+ vals.append(value)
+ in_trait_data = True
+ break
+ if not in_trait_data:
+ vals.append("x")
+ else:
+ samplelist = dataset.group.samplelist
+ for sample in samplelist:
+ in_trait_data = False
+ for item in this_trait.data:
+ if this_trait.data[item].name == sample:
+ value = str(this_trait.data[item].value)
+ samples.append(item)
+ vals.append(value)
+ in_trait_data = True
+ break
+ if not in_trait_data:
+ vals.append("x")
+
+ if mapping_params.get('transform') == "qnorm":
+ vals_minus_x = [float(val) for val in vals if val != "x"]
+ qnorm_vals = normf(vals_minus_x)
+ qnorm_vals_with_x = []
+ counter = 0
+ for val in vals:
+ if val == "x":
+ qnorm_vals_with_x.append("x")
+ else:
+ qnorm_vals_with_x.append(qnorm_vals[counter])
+ counter += 1
+
+ vals = qnorm_vals_with_x
+
+ # It seems to take an empty string as default. This should probably be changed.
covariates = ""
- if mapping_params['mapping_method'] == "gemma":
+ if mapping_params.get('mapping_method') == "gemma":
header_row = ["name", "chr", "Mb", "lod_score", "p_value"]
- # ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api
- if mapping_params['use_loco'] == "True":
+ # gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api
+ if mapping_params.get('use_loco') == "True":
result_markers = gemma_mapping.run_gemma(
this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0]
else:
result_markers = gemma_mapping.run_gemma(
this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])
- elif mapping_params['mapping_method'] == "rqtl":
+ elif mapping_params.get('mapping_method') == "rqtl":
header_row = ["name", "chr", "cM", "lod_score"]
if mapping_params['num_perm'] > 0:
_sperm_output, _suggestive, _significant, result_markers = rqtl_mapping.run_rqtl(this_trait.name, vals, samples, dataset, None, "Mb", mapping_params['rqtl_model'],
@@ -62,10 +89,10 @@ def do_mapping_for_api(start_vars):
mapping_params['do_control'], mapping_params['control_marker'],
mapping_params['manhattan_plot'], None)
- if mapping_params['limit_to']:
+ if mapping_params.get('limit_to'):
result_markers = result_markers[:mapping_params['limit_to']]
- if mapping_params['format'] == "csv":
+ if mapping_params.get('format') == "csv":
output_rows = []
output_rows.append(header_row)
for marker in result_markers:
@@ -138,4 +165,22 @@ def initialize_parameters(start_vars, dataset, this_trait):
except:
mapping_params['perm_check'] = False
+ mapping_params['transform'] = False
+ if 'transform' in start_vars:
+ mapping_params['transform'] = start_vars['transform']
+
+ mapping_params['genofile'] = False
+ if 'genofile' in start_vars:
+ mapping_params['genofile'] = start_vars['genofile']
+
return mapping_params
+
+def get_genofile_samplelist(dataset):
+ genofile_samplelist = []
+
+ genofile_json = dataset.group.get_genofiles()
+ for genofile in genofile_json:
+ if genofile['location'] == dataset.group.genofile and 'sample_list' in genofile:
+ genofile_samplelist = genofile['sample_list']
+
+ return genofile_samplelist