aboutsummaryrefslogtreecommitdiff
path: root/wqflask/maintenance
diff options
context:
space:
mode:
authorzsloan2018-05-25 15:52:40 +0000
committerzsloan2018-05-25 15:52:40 +0000
commit9bb60bb18ae5ac70fe480095554796b7c18f1b6c (patch)
tree58e1882cdb742e7d4a8e793a2545902fec803ece /wqflask/maintenance
parent67e8f12e103f48329d8b3e38125c0e84b9dc089d (diff)
downloadgenenetwork2-9bb60bb18ae5ac70fe480095554796b7c18f1b6c.tar.gz
Fixed issue causing anonymous collections to not work on my branch and staging, though still not sure why it's working on production without that change
Added script to convert the dryad format genotype files to BIMBAM removed db_uri from parameters of parse_db_uri in gen_select_dataset.py, since it can now just pull it from settings as a global variable
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r--wqflask/maintenance/convert_dryad_to_bimbam.py70
-rw-r--r--wqflask/maintenance/gen_select_dataset.py2
-rw-r--r--wqflask/maintenance/quantile_normalize.py4
3 files changed, 73 insertions, 3 deletions
diff --git a/wqflask/maintenance/convert_dryad_to_bimbam.py b/wqflask/maintenance/convert_dryad_to_bimbam.py
new file mode 100644
index 00000000..e833b395
--- /dev/null
+++ b/wqflask/maintenance/convert_dryad_to_bimbam.py
@@ -0,0 +1,70 @@
+#!/usr/bin/python
+
+"""
+Convert data dryad files to a BIMBAM _geno and _snps file
+
+
+"""
+
+from __future__ import print_function, division, absolute_import
+import sys
+sys.path.append("..")
+
+
+def read_dryad_file(filename):
+ exclude_count = 0
+ marker_list = []
+ sample_dict = {}
+ sample_list = []
+ geno_rows = []
+ with open(filename, 'r') as the_file:
+ for i, line in enumerate(the_file):
+ if i > 0:
+ if line.split(" ")[1] == "no":
+ sample_name = line.split(" ")[0]
+ sample_list.append(sample_name)
+ sample_dict[sample_name] = line.split(" ")[2:]
+ else:
+ exclude_count += 1
+ else:
+ marker_list = line.split(" ")[2:]
+
+ for i, marker in enumerate(marker_list):
+ this_row = []
+ this_row.append(marker)
+ this_row.append("X")
+ this_row.append("Y")
+ for sample in sample_list:
+ this_row.append(sample_dict[sample][i])
+ geno_rows.append(this_row)
+
+ print(exclude_count)
+
+ return geno_rows
+
+ #for i, marker in enumerate(marker_list):
+ # this_row = []
+ # this_row.append(marker)
+ # this_row.append("X")
+ # this_row.append("Y")
+ # with open(filename, 'r') as the_file:
+ # for j, line in enumerate(the_file):
+ # if j > 0:
+ # this_row.append(line.split(" ")[i+2])
+ # print("row: " + str(i))
+ # geno_rows.append(this_row)
+ #
+ #return geno_rows
+
+def write_bimbam_files(geno_rows):
+ with open('/home/zas1024/cfw_data/CFW_geno.txt', 'w') as geno_fh:
+ for row in geno_rows:
+ geno_fh.write(", ".join(row) + "\n")
+
+def convert_dryad_to_bimbam(filename):
+ geno_file_rows = read_dryad_file(filename)
+ write_bimbam_files(geno_file_rows)
+
+if __name__=="__main__":
+ input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt"
+ convert_dryad_to_bimbam(input_filename) \ No newline at end of file
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
index 2825c6ea..18b2dac9 100644
--- a/wqflask/maintenance/gen_select_dataset.py
+++ b/wqflask/maintenance/gen_select_dataset.py
@@ -63,7 +63,7 @@ from pprint import pformat as pf
#conn = Engine.connect()
-def parse_db_uri(db_uri):
+def parse_db_uri():
"""Converts a database URI to the db name, host name, user name, and password"""
parsed_uri = urlparse.urlparse(SQL_URI)
diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py
index c11073fb..41a3aad8 100644
--- a/wqflask/maintenance/quantile_normalize.py
+++ b/wqflask/maintenance/quantile_normalize.py
@@ -59,7 +59,7 @@ def set_data(dataset_name):
orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt"
sample_list = []
- with open(orig_file, 'r') as orig_fh, open('quant_norm.csv', 'r') as quant_fh:
+ with open(orig_file, 'r') as orig_fh, open('/home/zas1024/cfw_data/quant_norm.csv', 'r') as quant_fh:
for i, (line1, line2) in enumerate(izip(orig_fh, quant_fh)):
trait_dict = {}
sample_list = []
@@ -118,7 +118,7 @@ if __name__ == '__main__':
#out_filename = sys.argv[1][:-4] + '_quantnorm.txt'
- #success, _ = bulk(es, set_data(sys.argv[1]))
+ success, _ = bulk(es, set_data(sys.argv[1]))
response = es.search(
index = "traits", doc_type = "trait", body = {