aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
authorzsloan2020-02-07 12:44:15 -0600
committerzsloan2020-02-07 12:44:15 -0600
commit7d9ead16f45cc68d1543c673487891b03c990efe (patch)
treecbd7a7cef91e86085ac7fec4aab11b80b5be20b7 /wqflask
parent2668dabc88add8485e9b1b8216c82c2545c1d8e4 (diff)
downloadgenenetwork2-7d9ead16f45cc68d1543c673487891b03c990efe.tar.gz
Users can now add and do mapping on genotype files that only include a subset of samples/strains
Also filtered the results used by Christian's genome browser, so now it should have an easier time loading when dealing with larger numbers of markers
Diffstat (limited to 'wqflask')
-rw-r--r--wqflask/base/data_set.py9
-rw-r--r--wqflask/wqflask/marker_regression/gemma_mapping.py15
-rw-r--r--wqflask/wqflask/marker_regression/run_mapping.py101
-rw-r--r--wqflask/wqflask/show_trait/show_trait.py11
4 files changed, 89 insertions, 47 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 49600c64..7fe9a8ac 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -347,6 +347,15 @@ class DatasetGroup(object):
if maternal and paternal:
self.parlist = [maternal, paternal]
+ def get_genofiles(self):
+ jsonfile = "%s/%s.json" % (webqtlConfig.GENODIR, self.name)
+ try:
+ f = open(jsonfile)
+ except:
+ return None
+ jsondata = json.load(f)
+ return jsondata['genofile']
+
def get_samplelist(self):
result = None
key = "samplelist:v3:" + self.name
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py
index 3d3f26f3..e2b15c26 100644
--- a/wqflask/wqflask/marker_regression/gemma_mapping.py
+++ b/wqflask/wqflask/marker_regression/gemma_mapping.py
@@ -39,7 +39,7 @@ def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco, maf
chr_list_string += this_chromosomes[i+1].name
if covariates != "":
- gen_covariates_file(this_dataset, covariates)
+ gen_covariates_file(this_dataset, covariates, samples)
if use_loco == "True":
generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
@@ -122,7 +122,7 @@ def gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename):
else:
outfile.write(value + "\n")
-def gen_covariates_file(this_dataset, covariates):
+def gen_covariates_file(this_dataset, covariates, samples):
covariate_list = covariates.split(",")
covariate_data_object = []
for covariate in covariate_list:
@@ -140,11 +140,12 @@ def gen_covariates_file(this_dataset, covariates):
trait_sample_data = trait_ob.data
logger.debug("SAMPLE DATA:", trait_sample_data)
for index, sample in enumerate(trait_samples):
- if sample in trait_sample_data:
- sample_value = trait_sample_data[sample].value
- this_covariate_data.append(sample_value)
- else:
- this_covariate_data.append("-9")
+ if sample in samples:
+ if sample in trait_sample_data:
+ sample_value = trait_sample_data[sample].value
+ this_covariate_data.append(sample_value)
+ else:
+ this_covariate_data.append("-9")
covariate_data_object.append(this_covariate_data)
with open("{}/{}_covariates.txt".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile:
diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py
index a65dafd5..ac05c4ad 100644
--- a/wqflask/wqflask/marker_regression/run_mapping.py
+++ b/wqflask/wqflask/marker_regression/run_mapping.py
@@ -61,33 +61,42 @@ class RunMapping(object):
self.json_data = {}
self.json_data['lodnames'] = ['lod.hk']
+ #ZS: Sometimes a group may have a genofile that only includes a subset of samples
+ genofile_samplelist = []
+ if 'genofile' in start_vars:
+ if start_vars['genofile'] != "":
+ self.genofile_string = start_vars['genofile']
+ self.dataset.group.genofile = self.genofile_string.split(":")[0]
+ genofile_samplelist = get_genofile_samplelist(self.dataset)
+
all_samples_ordered = self.dataset.group.all_samples_ordered()
self.vals = []
if 'samples' in start_vars:
self.samples = start_vars['samples'].split(",")
for sample in self.samples:
- value = start_vars.get('value:' + sample)
- if value:
- self.vals.append(value)
- else:
- self.samples = []
-
- for sample in self.dataset.group.samplelist:
- # sample is actually the name of an individual
- in_trait_data = False
- for item in self.this_trait.data:
- if self.this_trait.data[item].name == sample:
- value = start_vars['value:' + self.this_trait.data[item].name]
- self.samples.append(self.this_trait.data[item].name)
- self.vals.append(value)
- in_trait_data = True
- break
- if not in_trait_data:
+ if (len(genofile_samplelist) == 0) or (sample in genofile_samplelist):
value = start_vars.get('value:' + sample)
if value:
- self.samples.append(sample)
self.vals.append(value)
+ else:
+ self.samples = []
+
+ for sample in self.dataset.group.samplelist: # sample is actually the name of an individual
+ if (len(genofile_samplelist) == 0) or (sample in genofile_samplelist):
+ in_trait_data = False
+ for item in self.this_trait.data:
+ if self.this_trait.data[item].name == sample:
+ value = start_vars['value:' + self.this_trait.data[item].name]
+ self.samples.append(self.this_trait.data[item].name)
+ self.vals.append(value)
+ in_trait_data = True
+ break
+ if not in_trait_data:
+ value = start_vars.get('value:' + sample)
+ if value:
+ self.samples.append(sample)
+ self.vals.append(value)
self.num_vals = start_vars['num_vals']
@@ -177,10 +186,6 @@ class RunMapping(object):
self.showGenes = "ON"
self.viewLegend = "ON"
- if 'genofile' in start_vars:
- if start_vars['genofile'] != "":
- self.genofile_string = start_vars['genofile']
- self.dataset.group.genofile = self.genofile_string.split(":")[0]
self.dataset.group.get_markers()
if self.mapping_method == "gemma":
self.first_run = True
@@ -321,7 +326,7 @@ class RunMapping(object):
else:
self.qtl_results = []
- self.qtl_results_for_browser = []
+ self.results_for_browser = []
self.annotations_for_browser = []
highest_chr = 1 #This is needed in order to convert the highest chr to X/Y
for marker in results:
@@ -357,7 +362,7 @@ class RunMapping(object):
else:
browser_marker['p_wald'] = 0
- self.qtl_results_for_browser.append(browser_marker)
+ self.results_for_browser.append(browser_marker)
self.annotations_for_browser.append(annot_marker)
if marker['chr'] > 0 or marker['chr'] == "X" or marker['chr'] == "X/Y":
if marker['chr'] > highest_chr or marker['chr'] == "X" or marker['chr'] == "X/Y":
@@ -365,19 +370,28 @@ class RunMapping(object):
if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()):
self.qtl_results.append(marker)
- browser_files = write_input_for_browser(self.dataset, self.qtl_results_for_browser, self.annotations_for_browser)
-
with Bench("Exporting Results"):
export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type)
with Bench("Trimming Markers for Figure"):
if len(self.qtl_results) > 30000:
self.qtl_results = trim_markers_for_figure(self.qtl_results)
+ self.results_for_browser = trim_markers_for_figure(self.results_for_browser)
+ filtered_annotations = []
+ for marker in self.results_for_browser:
+ for annot_marker in self.annotations_for_browser:
+ if annot_marker['rs'] == marker['rs']:
+ filtered_annotations.append(annot_marker)
+ break
+ self.annotations_for_browser = filtered_annotations
+ browser_files = write_input_for_browser(self.dataset, self.results_for_browser, self.annotations_for_browser)
+ else:
+ browser_files = write_input_for_browser(self.dataset, self.results_for_browser, self.annotations_for_browser)
with Bench("Trimming Markers for Table"):
self.trimmed_markers = trim_markers_for_table(results)
- chr_lengths = get_chr_lengths(self.mapping_scale, self.dataset, self.qtl_results_for_browser)
+ chr_lengths = get_chr_lengths(self.mapping_scale, self.dataset, self.qtl_results)
#ZS: For zooming into genome browser, need to pass chromosome name instead of number
if self.dataset.group.species == "mouse":
@@ -498,7 +512,9 @@ def export_mapping_results(dataset, trait, markers, results_path, mapping_scale,
output_file.write("\n")
def trim_markers_for_figure(markers):
- if 'lod_score' in markers[0].keys():
+ if 'p_wald' in markers[0].keys():
+ score_type = 'p_wald'
+ elif 'lod_score' in markers[0].keys():
score_type = 'lod_score'
else:
score_type = 'lrs_value'
@@ -508,7 +524,22 @@ def trim_markers_for_figure(markers):
med_counter = 0
high_counter = 0
for marker in markers:
- if score_type == 'lod_score':
+ if score_type == 'p_wald':
+ if marker[score_type] > 0.1:
+ if low_counter % 20 == 0:
+ filtered_markers.append(marker)
+ low_counter += 1
+ elif 0.1 >= marker[score_type] > 0.01:
+ if med_counter % 10 == 0:
+ filtered_markers.append(marker)
+ med_counter += 1
+ elif 0.01 >= marker[score_type] > 0.001:
+ if high_counter % 2 == 0:
+ filtered_markers.append(marker)
+ high_counter += 1
+ else:
+ filtered_markers.append(marker)
+ elif score_type == 'lod_score':
if marker[score_type] < 1:
if low_counter % 20 == 0:
filtered_markers.append(marker)
@@ -595,4 +626,14 @@ def get_chr_lengths(mapping_scale, dataset, qtl_results):
if float(result['ps']) > highest_pos:
highest_pos = float(result['ps'])
- return chr_lengths \ No newline at end of file
+ return chr_lengths
+
+def get_genofile_samplelist(dataset):
+ genofile_samplelist = []
+
+ genofile_json = dataset.group.get_genofiles()
+ for genofile in genofile_json:
+ if genofile['location'] == dataset.group.genofile and 'sample_list' in genofile:
+ genofile_samplelist = genofile['sample_list']
+
+ return genofile_samplelist \ No newline at end of file
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index c81d68d0..4efaa968 100644
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -161,7 +161,7 @@ class ShowTrait(object):
for i, this_chr in enumerate(self.dataset.species.chromosomes.chromosomes):
self.chr_list.append([self.dataset.species.chromosomes.chromosomes[this_chr].name, i])
- self.genofiles = get_genofiles(self.dataset)
+ self.genofiles = self.dataset.group.get_genofiles()
self.has_num_cases = has_num_cases(self.this_trait)
@@ -508,15 +508,6 @@ def get_nearest_marker(this_trait, this_db):
else:
return result[0][0]
-def get_genofiles(this_dataset):
- jsonfile = "%s/%s.json" % (webqtlConfig.GENODIR, this_dataset.group.name)
- try:
- f = open(jsonfile)
- except:
- return None
- jsondata = json.load(f)
- return jsondata['genofile']
-
def get_table_widths(sample_groups, has_num_cases=False):
stats_table_width = 250
if len(sample_groups) > 1: