-
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 83496000..2d4fd0f2 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -514,6 +514,7 @@ def loading_page():
'bootCheck',
'bootstrap_results',
'LRSCheck',
+ 'covariates',
'maf',
'manhattan_plot',
'control_marker',
@@ -569,6 +570,7 @@ def marker_regression_page():
'bootCheck',
'bootstrap_results',
'LRSCheck',
+ 'covariates',
'maf',
'manhattan_plot',
'control_marker',
--
cgit v1.2.3
From 4f06516befcb16b802afcb81bae28b55279a3c1e Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 4 Sep 2017 02:15:11 -0500
Subject: gemma-wrapper: find binary in profile
---
bin/genenetwork2 | 1 +
etc/default_settings.py | 1 +
wqflask/utility/tools.py | 12 ++++++++----
3 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/bin/genenetwork2 b/bin/genenetwork2
index 236d8f63..e07a4e32 100755
--- a/bin/genenetwork2
+++ b/bin/genenetwork2
@@ -78,6 +78,7 @@ else
export PLINK_COMMAND="$GN2_PROFILE/bin/plink2"
export PYLMM_COMMAND="$GN2_PROFILE/bin/pylmm_redis"
export GEMMA_COMMAND="$GN2_PROFILE/bin/gemma"
+ export GEMMA_WRAPPER_COMMAND="$GN2_PROFILE/bin/gemma-wrapper"
fi
if [ -z $PYTHONPATH ] ; then
echo "ERROR PYTHONPATH has not been set - use GN2_PROFILE!"
diff --git a/etc/default_settings.py b/etc/default_settings.py
index 1b609414..c00f6c8f 100644
--- a/etc/default_settings.py
+++ b/etc/default_settings.py
@@ -69,3 +69,4 @@ JS_GN_PATH = os.environ['HOME']+"/genenetwork/javascript"
# PYLMM_COMMAND = str.strip(os.popen("which pylmm_redis").read())
# PLINK_COMMAND = str.strip(os.popen("which plink2").read())
# GEMMA_COMMAND = str.strip(os.popen("which gemma").read())
+# GEMMA_WRAPPER_COMMAND = str.strip(os.popen("which gemma-wrapper").read())
diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py
index 867bc5c8..c5685cdd 100644
--- a/wqflask/utility/tools.py
+++ b/wqflask/utility/tools.py
@@ -113,6 +113,9 @@ def pylmm_command(guess=None):
def gemma_command(guess=None):
return assert_bin(get_setting("GEMMA_COMMAND",guess))
+def gemma_wrapper_command(guess=None):
+ return assert_bin(get_setting("GEMMA_WRAPPER_COMMAND",guess))
+
def plink_command(guess=None):
return assert_bin(get_setting("PLINK_COMMAND",guess))
@@ -240,10 +243,11 @@ JS_GUIX_PATH = get_setting('JS_GUIX_PATH')
JS_GN_PATH = get_setting('JS_GN_PATH')
# assert_dir(JS_GN_PATH)
-PYLMM_COMMAND = pylmm_command()
-GEMMA_COMMAND = gemma_command()
-PLINK_COMMAND = plink_command()
-TEMPDIR = tempdir() # defaults to UNIX TMPDIR
+PYLMM_COMMAND = pylmm_command()
+GEMMA_COMMAND = gemma_command()
+GEMMA_WRAPPER_COMMAND = gemma_wrapper_command()
+PLINK_COMMAND = plink_command()
+TEMPDIR = tempdir() # defaults to UNIX TMPDIR
# ---- Handle specific JS modules
JS_TWITTER_POST_FETCHER_PATH = get_setting("JS_TWITTER_POST_FETCHER_PATH",js_path("Twitter-Post-Fetcher"))
--
cgit v1.2.3
From 03e2facd6279f5413667dfcca4b2d223a54e4f4e Mon Sep 17 00:00:00 2001
From: zsloan
Date: Tue, 5 Sep 2017 16:48:55 +0000
Subject: Added file converting genofiles to bimbam
---
wqflask/maintenance/convert_geno_to_bimbam.py | 239 ++++++++++++++++++++++++++
1 file changed, 239 insertions(+)
create mode 100644 wqflask/maintenance/convert_geno_to_bimbam.py
diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py
new file mode 100644
index 00000000..05006d5c
--- /dev/null
+++ b/wqflask/maintenance/convert_geno_to_bimbam.py
@@ -0,0 +1,239 @@
+#!/usr/bin/python
+
+"""
+Convert .geno files to json
+
+This file goes through all of the genofiles in the genofile directory (.geno)
+and converts them to json files that are used when running the marker regression
+code
+
+"""
+
+from __future__ import print_function, division, absolute_import
+import sys
+sys.path.append("..")
+import os
+import glob
+import traceback
+import gzip
+
+#import numpy as np
+#from pyLMM import lmm
+
+import simplejson as json
+
+from pprint import pformat as pf
+
+class EmptyConfigurations(Exception): pass
+
+
+
+class Marker(object):
+ def __init__(self):
+ self.name = None
+ self.chr = None
+ self.cM = None
+ self.Mb = None
+ self.genotypes = []
+
+class ConvertGenoFile(object):
+
+ def __init__(self, input_file, output_files):
+
+ self.input_file = input_file
+ self.output_files = output_files
+
+ self.mb_exists = False
+ self.cm_exists = False
+ self.markers = []
+
+ self.latest_row_pos = None
+ self.latest_col_pos = None
+
+ self.latest_row_value = None
+ self.latest_col_value = None
+
+ def convert(self):
+
+ self.haplotype_notation = {
+ '@mat': "1",
+ '@pat': "0",
+ '@het': "0.5",
+ '@unk': "NA"
+ }
+
+ self.configurations = {}
+ #self.skipped_cols = 3
+
+ #if self.input_file.endswith(".geno.gz"):
+ # print("self.input_file: ", self.input_file)
+ # self.input_fh = gzip.open(self.input_file)
+ #else:
+ self.input_fh = open(self.input_file)
+
+ with open(self.output_files[0], "w") as self.geno_fh:
+ #if self.file_type == "geno":
+ self.process_csv()
+ #elif self.file_type == "snps":
+ # self.process_snps_file()
+
+
+ def process_csv(self):
+ for row_count, row in enumerate(self.process_rows()):
+ row_items = row.split("\t")
+
+ this_marker = Marker()
+ this_marker.name = row_items[1]
+ this_marker.chr = row_items[0]
+ if self.cm_exists and self.mb_exists:
+ this_marker.cM = row_items[2]
+ this_marker.Mb = row_items[3]
+ genotypes = row_items[4:]
+ elif self.cm_exists:
+ this_marker.cM = row_items[2]
+ genotypes = row_items[3:]
+ elif self.mb_exists:
+ this_marker.Mb = row_items[2]
+ genotypes = row_items[3:]
+ else:
+ genotypes = row_items[2:]
+ for item_count, genotype in enumerate(genotypes):
+ if genotype.upper().strip() in self.configurations:
+ this_marker.genotypes.append(self.configurations[genotype.upper().strip()])
+ else:
+ this_marker.genotypes.append("NA")
+
+ #print("this_marker is:", pf(this_marker.__dict__))
+ #if this_marker.chr == "14":
+ self.markers.append(this_marker.__dict__)
+
+ self.write_to_bimbam()
+
+ # with open(self.output_file, 'w') as fh:
+ # json.dump(self.markers, fh, indent=" ", sort_keys=True)
+
+ # print('configurations:', str(configurations))
+ #self.latest_col_pos = item_count + self.skipped_cols
+ #self.latest_col_value = item
+
+ #if item_count != 0:
+ # self.output_fh.write(" ")
+ #self.output_fh.write(self.configurations[item.upper()])
+
+ #self.output_fh.write("\n")
+
+ def write_to_bimbam(self):
+ with open(self.output_files[0], "w") as geno_fh:
+ # geno_fh.write(str(len(self.sample_list)) + "\n")
+ # geno_fh.write("2\n")
+ # geno_fh.write("IND")
+ # for sample in self.sample_list:
+ # geno_fh.write(" " + sample)
+ # geno_fh.write("\n")
+ for marker in self.markers:
+ geno_fh.write(marker['name'])
+ geno_fh.write(", X, Y")
+ geno_fh.write(", " + ", ".join(marker['genotypes']))
+ geno_fh.write("\n")
+
+ #pheno_fh = open(self.output_files[1], 'w')
+ with open(self.output_files[1], "w") as pheno_fh:
+ for sample in self.sample_list:
+ pheno_fh.write("1\n")
+
+ with open(self.output_files[2], "w") as snp_fh:
+ for marker in self.markers:
+ if self.mb_exists:
+ snp_fh.write(marker['name'] +", " + str(int(float(marker['Mb'])*1000000)) + ", " + marker['chr'] + "\n")
+ else:
+ snp_fh.write(marker['name'] +", " + str(int(float(marker['cM'])*1000000)) + ", " + marker['chr'] + "\n")
+
+
+ def get_sample_list(self, row_contents):
+ self.sample_list = []
+ if self.mb_exists:
+ if self.cm_exists:
+ self.sample_list = row_contents[4:]
+ else:
+ self.sample_list = row_contents[3:]
+ else:
+ if self.cm_exists:
+ self.sample_list = row_contents[3:]
+ else:
+ self.sample_list = row_contents[2:]
+
+ def process_rows(self):
+ for self.latest_row_pos, row in enumerate(self.input_fh):
+ #if self.input_file.endswith(".geno.gz"):
+ # print("row: ", row)
+ self.latest_row_value = row
+ # Take care of headers
+ if not row.strip():
+ continue
+ if row.startswith('#'):
+ continue
+ if row.startswith('Chr'):
+ if 'Mb' in row.split():
+ self.mb_exists = True
+ if 'cM' in row.split():
+ self.cm_exists = True
+ self.get_sample_list(row.split())
+ continue
+ if row.startswith('@'):
+ key, _separater, value = row.partition(':')
+ key = key.strip()
+ value = value.strip()
+ if key in self.haplotype_notation:
+ self.configurations[value] = self.haplotype_notation[key]
+ continue
+ if not len(self.configurations):
+ raise EmptyConfigurations
+ yield row
+
+ @classmethod
+ def process_all(cls, old_directory, new_directory):
+ os.chdir(old_directory)
+ for input_file in glob.glob("*"):
+ if not input_file.endswith(('geno', '.geno.gz')):
+ continue
+ group_name = ".".join(input_file.split('.')[:-1])
+ geno_output_file = os.path.join(new_directory, group_name + "_geno.txt")
+ pheno_output_file = os.path.join(new_directory, group_name + "_pheno.txt")
+ snp_output_file = os.path.join(new_directory, group_name + "_snps.txt")
+ output_files = [geno_output_file, pheno_output_file, snp_output_file]
+ print("%s -> %s" % (
+ os.path.join(old_directory, input_file), geno_output_file))
+ convertob = ConvertGenoFile(input_file, output_files)
+ try:
+ convertob.convert()
+ except EmptyConfigurations as why:
+ print(" No config info? Continuing...")
+ #excepted = True
+ continue
+ except Exception as why:
+
+ print(" Exception:", why)
+ print(traceback.print_exc())
+ print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos,
+ convertob.latest_col_pos))
+ print(" Column is:", convertob.latest_col_value)
+ print(" Row is:", convertob.latest_row_value)
+ break
+
+ #def process_snps_file(cls, snps_file, new_directory):
+ # output_file = os.path.join(new_directory, "mouse_families.json")
+ # print("%s -> %s" % (snps_file, output_file))
+ # convertob = ConvertGenoFile(input_file, output_file)
+
+
+if __name__=="__main__":
+ Old_Geno_Directory = """/home/zas1024/genotype_files/genotype/"""
+ New_Geno_Directory = """/home/zas1024/genotype_files/genotype/bimbam/"""
+ #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
+ #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
+ #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
+ #convertob.convert()
+ ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory)
+ #ConvertGenoFiles(Geno_Directory)
+
+ #process_csv(Input_File, Output_File)
\ No newline at end of file
--
cgit v1.2.3
From 425f0e9d8977f8cf741f596315a56c91b750988a Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 7 Sep 2017 16:13:42 +0000
Subject: Added the script to convert bimbam to kinship matrices
---
.../maintenance/generate_kinship_from_bimbam.py | 59 ++++++++++++++++++++++
1 file changed, 59 insertions(+)
create mode 100644 wqflask/maintenance/generate_kinship_from_bimbam.py
diff --git a/wqflask/maintenance/generate_kinship_from_bimbam.py b/wqflask/maintenance/generate_kinship_from_bimbam.py
new file mode 100644
index 00000000..f322341d
--- /dev/null
+++ b/wqflask/maintenance/generate_kinship_from_bimbam.py
@@ -0,0 +1,59 @@
+#!/usr/bin/python
+
+"""
+Generate relatedness matrix files for GEMMA from BIMBAM genotype/phenotype files
+
+This file goes through all of the BIMBAM files in the bimbam diretory
+and uses GEMMA to generate their corresponding kinship/relatedness matrix file
+
+"""
+
+from __future__ import print_function, division, absolute_import
+import sys
+sys.path.append("..")
+import os
+import glob
+
+class GenerateKinshipMatrices(object):
+ def __init__(self, group_name, geno_file, pheno_file):
+ self.group_name = group_name
+ self.geno_file = geno_file
+ self.pheno_file = pheno_file
+
+ def generate_kinship(self):
+ gemma_command = "/gnu/store/xhzgjr0jvakxv6h3blj8z496xjig69b0-profile/bin/gemma -g " + self.geno_file + " -p " + self.pheno_file + " -gk 1 -outdir /home/zas1024/genotype_files/genotype/bimbam/ -o " + self.group_name
+ print("command:", gemma_command)
+ os.system(gemma_command)
+
+ @classmethod
+ def process_all(self, geno_dir, bimbam_dir):
+ os.chdir(geno_dir)
+ for input_file in glob.glob("*"):
+ if not input_file.endswith(('geno', '.geno.gz')):
+ continue
+ group_name = ".".join(input_file.split('.')[:-1])
+ geno_input_file = os.path.join(bimbam_dir, group_name + "_geno.txt")
+ pheno_input_file = os.path.join(bimbam_dir, group_name + "_pheno.txt")
+ convertob = GenerateKinshipMatrices(group_name, geno_input_file, pheno_input_file)
+ try:
+ convertob.generate_kinship()
+ except EmptyConfigurations as why:
+ print(" No config info? Continuing...")
+ continue
+ except Exception as why:
+
+ print(" Exception:", why)
+ print(traceback.print_exc())
+ print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos,
+ convertob.latest_col_pos))
+ print(" Column is:", convertob.latest_col_value)
+ print(" Row is:", convertob.latest_row_value)
+ break
+
+
+if __name__=="__main__":
+ Geno_Directory = """/home/zas1024/genotype_files/genotype/"""
+ Bimbam_Directory = """/home/zas1024/genotype_files/genotype/bimbam/"""
+ GenerateKinshipMatrices.process_all(Geno_Directory, Bimbam_Directory)
+
+ #./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD
\ No newline at end of file
--
cgit v1.2.3