aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzsloan2018-04-23 16:19:24 +0000
committerzsloan2018-04-23 16:19:24 +0000
commit36ceda09d76c898c5f818236122f5d431261a5b1 (patch)
tree996b174a11e0e8d27a8b404216f7a4a23de15933
parent72db6b91baf15de4fbd64cd4aef022cf32067b0c (diff)
downloadgenenetwork2-36ceda09d76c898c5f818236122f5d431261a5b1.tar.gz
Changed GEMMA mapping to use -lmm 2 (Likelihood ratio test) as a parameter instead of -lmm 1 (Wald test)
Added script to convert .geno files to JSON to maintenance folder (geno_to_json.py)
-rw-r--r--wqflask/maintenance/geno_to_json.py195
-rw-r--r--wqflask/wqflask/marker_regression/gemma_mapping.py10
-rw-r--r--wqflask/wqflask/marker_regression/marker_regression_gn1.py8
-rw-r--r--wqflask/wqflask/templates/show_trait_mapping_tools.html13
4 files changed, 214 insertions, 12 deletions
diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py
new file mode 100644
index 00000000..789a1691
--- /dev/null
+++ b/wqflask/maintenance/geno_to_json.py
@@ -0,0 +1,195 @@
+#!/usr/bin/python
+
+"""
+Convert .geno files to json
+
+This file goes through all of the genofiles in the genofile directory (.geno)
+and converts them to json files that are used when running the marker regression
+code
+
+"""
+
+from __future__ import print_function, division, absolute_import
+import sys
+sys.path.append("..")
+import os
+import glob
+import traceback
+import gzip
+
+#import numpy as np
+#from pyLMM import lmm
+
+import simplejson as json
+
+from pprint import pformat as pf
+
+class EmptyConfigurations(Exception): pass
+
+
+
+class Marker(object):
+ def __init__(self):
+ self.name = None
+ self.chr = None
+ self.cM = None
+ self.Mb = None
+ self.genotypes = []
+
+class ConvertGenoFile(object):
+
+ def __init__(self, input_file, output_file):
+
+ self.input_file = input_file
+ self.output_file = output_file
+
+ self.mb_exists = False
+ self.cm_exists = False
+ self.markers = []
+
+ self.latest_row_pos = None
+ self.latest_col_pos = None
+
+ self.latest_row_value = None
+ self.latest_col_value = None
+
+ def convert(self):
+
+ self.haplotype_notation = {
+ '@mat': "1",
+ '@pat': "0",
+ '@het': "0.5",
+ '@unk': "NA"
+ }
+
+ self.configurations = {}
+ #self.skipped_cols = 3
+
+ #if self.input_file.endswith(".geno.gz"):
+ # print("self.input_file: ", self.input_file)
+ # self.input_fh = gzip.open(self.input_file)
+ #else:
+ self.input_fh = open(self.input_file)
+
+ with open(self.output_file, "w") as self.output_fh:
+ #if self.file_type == "geno":
+ self.process_csv()
+ #elif self.file_type == "snps":
+ # self.process_snps_file()
+
+
+ def process_csv(self):
+ for row_count, row in enumerate(self.process_rows()):
+ row_items = row.split("\t")
+
+ this_marker = Marker()
+ this_marker.name = row_items[1]
+ this_marker.chr = row_items[0]
+ if self.cm_exists and self.mb_exists:
+ this_marker.cM = row_items[2]
+ this_marker.Mb = row_items[3]
+ genotypes = row_items[4:]
+ elif self.cm_exists:
+ this_marker.cM = row_items[2]
+ genotypes = row_items[3:]
+ elif self.mb_exists:
+ this_marker.Mb = row_items[2]
+ genotypes = row_items[3:]
+ else:
+ genotypes = row_items[2:]
+ for item_count, genotype in enumerate(genotypes):
+ if genotype.upper() in self.configurations:
+ this_marker.genotypes.append(self.configurations[genotype.upper()])
+ else:
+ this_marker.genotypes.append("NA")
+
+ #print("this_marker is:", pf(this_marker.__dict__))
+ #if this_marker.chr == "14":
+ self.markers.append(this_marker.__dict__)
+
+ with open(self.output_file, 'w') as fh:
+ json.dump(self.markers, fh, indent=" ", sort_keys=True)
+
+ # print('configurations:', str(configurations))
+ #self.latest_col_pos = item_count + self.skipped_cols
+ #self.latest_col_value = item
+
+ #if item_count != 0:
+ # self.output_fh.write(" ")
+ #self.output_fh.write(self.configurations[item.upper()])
+
+ #self.output_fh.write("\n")
+
+
+ def process_rows(self):
+ for self.latest_row_pos, row in enumerate(self.input_fh):
+ #if self.input_file.endswith(".geno.gz"):
+ # print("row: ", row)
+ self.latest_row_value = row
+ # Take care of headers
+ if not row.strip():
+ continue
+ if row.startswith('#'):
+ continue
+ if row.startswith('Chr'):
+ if 'Mb' in row.split():
+ self.mb_exists = True
+ if 'cM' in row.split():
+ self.cm_exists = True
+ continue
+ if row.startswith('@'):
+ key, _separater, value = row.partition(':')
+ key = key.strip()
+ value = value.strip()
+ if key in self.haplotype_notation:
+ self.configurations[value] = self.haplotype_notation[key]
+ continue
+ if not len(self.configurations):
+ raise EmptyConfigurations
+ yield row
+
+ @classmethod
+ def process_all(cls, old_directory, new_directory):
+ os.chdir(old_directory)
+ for input_file in glob.glob("*"):
+ if not input_file.endswith(('geno', '.geno.gz')):
+ continue
+ group_name = ".".join(input_file.split('.')[:-1])
+ output_file = os.path.join(new_directory, group_name + ".json")
+ print("%s -> %s" % (
+ os.path.join(old_directory, input_file), output_file))
+ convertob = ConvertGenoFile(input_file, output_file)
+ try:
+ convertob.convert()
+ except EmptyConfigurations as why:
+ print(" No config info? Continuing...")
+ #excepted = True
+ continue
+ except Exception as why:
+
+ print(" Exception:", why)
+ print(traceback.print_exc())
+ print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos,
+ convertob.latest_col_pos))
+ print(" Column is:", convertob.latest_col_value)
+ print(" Row is:", convertob.latest_row_value)
+ break
+
+ #def process_snps_file(cls, snps_file, new_directory):
+ # output_file = os.path.join(new_directory, "mouse_families.json")
+ # print("%s -> %s" % (snps_file, output_file))
+ # convertob = ConvertGenoFile(input_file, output_file)
+
+
+
+if __name__=="__main__":
+ Old_Geno_Directory = """/home/zas1024/genotype_files/genotype/"""
+ New_Geno_Directory = """/home/zas1024/genotype_files/genotype/json/"""
+ #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
+ #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
+ #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
+ #convertob.convert()
+ ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory)
+ #ConvertGenoFiles(Geno_Directory)
+
+ #process_csv(Input_File, Output_File) \ No newline at end of file
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py
index 157e4f33..0e31e73e 100644
--- a/wqflask/wqflask/marker_regression/gemma_mapping.py
+++ b/wqflask/wqflask/marker_regression/gemma_mapping.py
@@ -33,7 +33,7 @@ def run_gemma(this_dataset, samples, vals, covariates, method, use_loco):
gen_covariates_file(this_dataset, covariates)
if method == "gemma_plink":
- gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (flat_files('mapping'),
+ gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 2 -maf 0.1' % (flat_files('mapping'),
this_dataset.group.name,
flat_files('mapping'),
this_dataset.group.name)
@@ -43,7 +43,7 @@ def run_gemma(this_dataset, samples, vals, covariates, method, use_loco):
webqtlConfig.GENERATED_IMAGE_DIR,
this_dataset.group.name)
else:
- #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -o %s_output' % (flat_files('mapping'),
+ #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 2 -maf 0.1 -o %s_output' % (flat_files('mapping'),
gemma_command += ' -outdir %s -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR,
this_dataset.group.name)
else:
@@ -69,20 +69,20 @@ def run_gemma(this_dataset, samples, vals, covariates, method, use_loco):
gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
if covariates != "":
- gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('mapping'),
+ gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 2 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('mapping'),
this_dataset.group.name,
flat_files('genotype/bimbam'),
genofile_name,
TEMPDIR,
gwa_output_filename)
else:
- gemma_command += ' -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
+ gemma_command += ' -a %s/%s_snps.txt -lmm 2 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
genofile_name,
TEMPDIR,
gwa_output_filename)
else:
- gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (flat_files('genotype/bimbam'),
+ gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 2 -maf 0.1' % (flat_files('genotype/bimbam'),
genofile_name,
flat_files('genotype/bimbam'),
genofile_name,
diff --git a/wqflask/wqflask/marker_regression/marker_regression_gn1.py b/wqflask/wqflask/marker_regression/marker_regression_gn1.py
index da713325..8e9c5b4c 100644
--- a/wqflask/wqflask/marker_regression/marker_regression_gn1.py
+++ b/wqflask/wqflask/marker_regression/marker_regression_gn1.py
@@ -1062,10 +1062,6 @@ class MarkerRegression(object):
#draw gray blocks for 3' and 5' UTR blocks
if cdsStart and cdsEnd:
- logger.debug("txStart:", txStart)
- logger.debug("cdsStart:", cdsStart)
- logger.debug("txEnd:", txEnd)
- logger.debug("cdsEnd:", cdsEnd)
utrStartPix = (txStart-startMb)*plotXScale + xLeftOffset
utrEndPix = (cdsStart-startMb)*plotXScale + xLeftOffset
if (utrStartPix < xLeftOffset):
@@ -1721,8 +1717,8 @@ class MarkerRegression(object):
for i, qtlresult in enumerate(self.qtlresults):
m = 0
thisLRSColor = self.colorCollection[0]
-
if qtlresult['chr'] != previous_chr and self.selectedChr == -1:
+
if self.manhattan_plot != True:
canvas.drawPolygon(LRSCoordXY,edgeColor=thisLRSColor,closed=0, edgeWidth=lrsEdgeWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth))
@@ -1760,7 +1756,6 @@ class MarkerRegression(object):
AdditiveCoordXY = []
previous_chr = qtlresult['chr']
previous_chr_as_int += 1
-
newStartPosX = (self.ChrLengthDistList[previous_chr_as_int - 1]+self.GraphInterval)*plotXScale
if newStartPosX != oldStartPosX:
startPosX += newStartPosX
@@ -1773,7 +1768,6 @@ class MarkerRegression(object):
this_chr = str(self.ChrList[self.selectedChr][1]+1)
if self.selectedChr == -1 or str(qtlresult['chr']) == this_chr:
Xc = startPosX + (qtlresult['Mb']-startMb)*plotXScale
-
# updated by NL 06-18-2011:
# fix the over limit LRS graph issue since genotype trait may give infinite LRS;
# for any lrs is over than 460(LRS max in this system), it will be reset to 460
diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html
index 0ecf1eb9..d40a7bd6 100644
--- a/wqflask/wqflask/templates/show_trait_mapping_tools.html
+++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html
@@ -343,6 +343,19 @@
</div>
</div>
<div class="mapping_method_fields form-group">
+ <label style="text-align: right;" class="col-xs-3 control-label">Use LOCO</label>
+ <div style="margin-left:20px;" class="col-xs-6 controls">
+ <label class="radio-inline">
+ <input type="radio" name="use_loco" value="True" checked="">
+ Yes
+ </label>
+ <label class="radio-inline">
+ <input type="radio" name="use_loco" value="False">
+ No
+ </label>
+ </div>
+ </div>
+ <div class="mapping_method_fields form-group">
<label style="text-align: right;" class="col-xs-3 control-label">Covariates</label>
<div style="margin-left:20px;" class="col-xs-7">
{% if g.user_session.user_ob and (g.user_session.user_ob.display_num_collections() == "") %}