aboutsummaryrefslogtreecommitdiff
path: root/wqflask/maintenance
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r--wqflask/maintenance/convert_dryad_to_bimbam.py70
-rw-r--r--wqflask/maintenance/gen_select_dataset.py2
-rw-r--r--wqflask/maintenance/quantile_normalize.py4
3 files changed, 73 insertions, 3 deletions
diff --git a/wqflask/maintenance/convert_dryad_to_bimbam.py b/wqflask/maintenance/convert_dryad_to_bimbam.py
new file mode 100644
index 00000000..e833b395
--- /dev/null
+++ b/wqflask/maintenance/convert_dryad_to_bimbam.py
@@ -0,0 +1,70 @@
+#!/usr/bin/python
+
+"""
+Convert data dryad files to a BIMBAM _geno and _snps file
+
+
+"""
+
+from __future__ import print_function, division, absolute_import
+import sys
+sys.path.append("..")
+
+
+def read_dryad_file(filename):
+ exclude_count = 0
+ marker_list = []
+ sample_dict = {}
+ sample_list = []
+ geno_rows = []
+ with open(filename, 'r') as the_file:
+ for i, line in enumerate(the_file):
+ if i > 0:
+ if line.split(" ")[1] == "no":
+ sample_name = line.split(" ")[0]
+ sample_list.append(sample_name)
+ sample_dict[sample_name] = line.split(" ")[2:]
+ else:
+ exclude_count += 1
+ else:
+ marker_list = line.split(" ")[2:]
+
+ for i, marker in enumerate(marker_list):
+ this_row = []
+ this_row.append(marker)
+ this_row.append("X")
+ this_row.append("Y")
+ for sample in sample_list:
+ this_row.append(sample_dict[sample][i])
+ geno_rows.append(this_row)
+
+ print(exclude_count)
+
+ return geno_rows
+
+ #for i, marker in enumerate(marker_list):
+ # this_row = []
+ # this_row.append(marker)
+ # this_row.append("X")
+ # this_row.append("Y")
+ # with open(filename, 'r') as the_file:
+ # for j, line in enumerate(the_file):
+ # if j > 0:
+ # this_row.append(line.split(" ")[i+2])
+ # print("row: " + str(i))
+ # geno_rows.append(this_row)
+ #
+ #return geno_rows
+
+def write_bimbam_files(geno_rows):
+ with open('/home/zas1024/cfw_data/CFW_geno.txt', 'w') as geno_fh:
+ for row in geno_rows:
+ geno_fh.write(", ".join(row) + "\n")
+
+def convert_dryad_to_bimbam(filename):
+ geno_file_rows = read_dryad_file(filename)
+ write_bimbam_files(geno_file_rows)
+
+if __name__=="__main__":
+ input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt"
+ convert_dryad_to_bimbam(input_filename) \ No newline at end of file
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
index 2825c6ea..18b2dac9 100644
--- a/wqflask/maintenance/gen_select_dataset.py
+++ b/wqflask/maintenance/gen_select_dataset.py
@@ -63,7 +63,7 @@ from pprint import pformat as pf
#conn = Engine.connect()
-def parse_db_uri(db_uri):
+def parse_db_uri():
"""Converts a database URI to the db name, host name, user name, and password"""
parsed_uri = urlparse.urlparse(SQL_URI)
diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py
index c11073fb..41a3aad8 100644
--- a/wqflask/maintenance/quantile_normalize.py
+++ b/wqflask/maintenance/quantile_normalize.py
@@ -59,7 +59,7 @@ def set_data(dataset_name):
orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt"
sample_list = []
- with open(orig_file, 'r') as orig_fh, open('quant_norm.csv', 'r') as quant_fh:
+ with open(orig_file, 'r') as orig_fh, open('/home/zas1024/cfw_data/quant_norm.csv', 'r') as quant_fh:
for i, (line1, line2) in enumerate(izip(orig_fh, quant_fh)):
trait_dict = {}
sample_list = []
@@ -118,7 +118,7 @@ if __name__ == '__main__':
#out_filename = sys.argv[1][:-4] + '_quantnorm.txt'
- #success, _ = bulk(es, set_data(sys.argv[1]))
+ success, _ = bulk(es, set_data(sys.argv[1]))
response = es.search(
index = "traits", doc_type = "trait", body = {