about summary refs log tree commit diff
path: root/wqflask/maintenance
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r--wqflask/maintenance/convert_dryad_to_bimbam.py70
-rw-r--r--wqflask/maintenance/gen_select_dataset.py2
-rw-r--r--wqflask/maintenance/quantile_normalize.py4
3 files changed, 73 insertions, 3 deletions
diff --git a/wqflask/maintenance/convert_dryad_to_bimbam.py b/wqflask/maintenance/convert_dryad_to_bimbam.py
new file mode 100644
index 00000000..e833b395
--- /dev/null
+++ b/wqflask/maintenance/convert_dryad_to_bimbam.py
@@ -0,0 +1,70 @@
+#!/usr/bin/python
+
+"""
+Convert data dryad files to a BIMBAM _geno and _snps file
+
+
+"""
+
+from __future__ import print_function, division, absolute_import
+import sys
+sys.path.append("..")
+
+
+def read_dryad_file(filename):
+    exclude_count = 0
+    marker_list = []
+    sample_dict = {}
+    sample_list = []
+    geno_rows = []
+    with open(filename, 'r') as the_file:
+        for i, line in enumerate(the_file):
+            if i > 0:
+                if line.split(" ")[1] == "no":
+                    sample_name = line.split(" ")[0]
+                    sample_list.append(sample_name)
+                    sample_dict[sample_name] = line.split(" ")[2:]
+                else:
+                    exclude_count += 1
+            else:
+                marker_list = line.split(" ")[2:]
+
+    for i, marker in enumerate(marker_list):
+        this_row = []
+        this_row.append(marker)
+        this_row.append("X")
+        this_row.append("Y")
+        for sample in sample_list:
+            this_row.append(sample_dict[sample][i])
+        geno_rows.append(this_row)
+
+    print(exclude_count)
+
+    return geno_rows
+
+    #for i, marker in enumerate(marker_list):
+    #    this_row = []
+    #    this_row.append(marker)
+    #    this_row.append("X")
+    #    this_row.append("Y")
+    #    with open(filename, 'r') as the_file:
+    #        for j, line in enumerate(the_file):
+    #            if j > 0:
+    #                this_row.append(line.split(" ")[i+2])
+    #        print("row: " + str(i))
+    #        geno_rows.append(this_row)
+    #            
+    #return geno_rows
+
+def write_bimbam_files(geno_rows):
+    with open('/home/zas1024/cfw_data/CFW_geno.txt', 'w') as geno_fh:
+        for row in geno_rows:
+            geno_fh.write(", ".join(row) + "\n")
+
+def convert_dryad_to_bimbam(filename):
+    geno_file_rows = read_dryad_file(filename)
+    write_bimbam_files(geno_file_rows)
+
+if __name__=="__main__":
+    input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt"
+    convert_dryad_to_bimbam(input_filename)
\ No newline at end of file
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
index 2825c6ea..18b2dac9 100644
--- a/wqflask/maintenance/gen_select_dataset.py
+++ b/wqflask/maintenance/gen_select_dataset.py
@@ -63,7 +63,7 @@ from pprint import pformat as pf
 
 #conn = Engine.connect()
 
-def parse_db_uri(db_uri):
+def parse_db_uri():
     """Converts a database URI to the db name, host name, user name, and password"""
 
     parsed_uri = urlparse.urlparse(SQL_URI)
diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py
index c11073fb..41a3aad8 100644
--- a/wqflask/maintenance/quantile_normalize.py
+++ b/wqflask/maintenance/quantile_normalize.py
@@ -59,7 +59,7 @@ def set_data(dataset_name):
     orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt"
 
     sample_list = []
-    with open(orig_file, 'r') as orig_fh, open('quant_norm.csv', 'r') as quant_fh:
+    with open(orig_file, 'r') as orig_fh, open('/home/zas1024/cfw_data/quant_norm.csv', 'r') as quant_fh:
         for i, (line1, line2) in enumerate(izip(orig_fh, quant_fh)):
             trait_dict = {}
             sample_list = []
@@ -118,7 +118,7 @@ if __name__ == '__main__':
 
     #out_filename = sys.argv[1][:-4] + '_quantnorm.txt'
 
-    #success, _ = bulk(es, set_data(sys.argv[1]))
+    success, _ = bulk(es, set_data(sys.argv[1]))
 
     response = es.search(
         index = "traits", doc_type = "trait", body = {