about summary refs log tree commit diff
path: root/wqflask/maintenance
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r--wqflask/maintenance/__init__.py0
-rw-r--r--wqflask/maintenance/get_group_samplelists.py26
2 files changed, 12 insertions, 14 deletions
diff --git a/wqflask/maintenance/__init__.py b/wqflask/maintenance/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/wqflask/maintenance/__init__.py
diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py
index 2434038e..99e22904 100644
--- a/wqflask/maintenance/get_group_samplelists.py
+++ b/wqflask/maintenance/get_group_samplelists.py
@@ -7,37 +7,35 @@ import gzip
 from base import webqtlConfig
 
 
-def get_sample_list_dir(geno_dir="/home/zas1024/gene/web/genotypes/"):
+def process_genofiles(geno_dir=webqtlConfig.GENODIR):
     os.chdir(geno_dir)
-    
-    for group_file in glob.glob("*"):
-        if group_file.lower().endswith(('.geno', '.geno.gz')):
+    for geno_file in glob.glob("*"):
+        if geno_file.lower().endswith(('.geno', '.geno.gz')):
             #group_name = genofilename.split('.')[0]
-            sample_list = get_sample_list(group_file)
-            print("\n\n{}\n\n".format(sample_list))
+            sample_list = get_samplelist(geno_file)
 
 
-def get_sample_list(group_file):
-    print(group_file)
-    genofilename = str(os.path.join(webqtlConfig.GENODIR, group_file))
-    if genofilename.lower().endswith('.geno.gz'):
+def get_samplelist(geno_file):
+    genofilename = os.path.join(webqtlConfig.GENODIR, geno_file)
+    if os.path.isfile(genofilename + '.gz'):
+        genofilename += '.gz'
         genofile = gzip.open(genofilename)
     else:
         genofile = open(genofilename)
+        
     for line in genofile:
         line = line.strip()
         if not line:
             continue
         if line.startswith(("#", "@")):
             continue
-        headline = line
         break
-    headers = headline.split("\t")
+    
+    headers = line.split()
+    
     if headers[3] == "Mb":
         samplelist = headers[4:]
     else:
         samplelist = headers[3:]
     return samplelist
 
-if __name__ == '__main__':
-    get_sample_list_dir()