From 6aaefdaae3a9fb068278d9b94d8cdf25d4f8d852 Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Fri, 19 Jul 2013 16:13:47 -0500
Subject: Created file gen_group_samplelists that iterates through all
 genofiles and builds each groups' samplelist

---
 wqflask/base/data_set.py     | 50 ++++++++++++++++++++++++++++++++++++++------
 wqflask/base/webqtlConfig.py |  8 +++----
 2 files changed, 48 insertions(+), 10 deletions(-)

(limited to 'wqflask/base')

diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 30221503..cf219fda 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -16,8 +16,6 @@
 # Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
 # at rwilliams@uthsc.edu and xzhou15@uthsc.edu
 #
-#we
-#
 # This module is used by GeneNetwork project (www.genenetwork.org)
 
 from __future__ import absolute_import, print_function, division
@@ -27,6 +25,7 @@ import string
 import collections
 
 import json
+import gzip
 import cPickle as pickle
 import itertools
 
@@ -52,8 +51,6 @@ from pprint import pformat as pf
 DS_NAME_MAP = {}
 
 def create_dataset(dataset_name, dataset_type = None):
-    
-    print("dataset_type:", dataset_type)
     if not dataset_type:
         dataset_type = Dataset_Getter(dataset_name)
         #dataset_type = get_dataset_type_from_json(dataset_name)
@@ -129,7 +126,7 @@ def create_datasets_list():
                 for result in g.db.execute(query).fetchall():
                     #The query at the beginning of this function isn't necessary here, but still would
                     #rather just reuse it
-                    print("type: {}\tname: {}".format(dataset_type, result.Name))
+                    #print("type: {}\tname: {}".format(dataset_type, result.Name))
                     dataset = create_dataset(result.Name, dataset_type)
                     datasets.append(dataset)
             
@@ -261,6 +258,36 @@ class DatasetGroup(object):
         if maternal and paternal:
             self.parlist = [maternal, paternal]
 
+    def get_sample_list(self):
+        genofilename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno'))
+        genofile = open(genofilename, "r")
+        for line in genofile:
+            line = line.strip()
+            if line.startswith(("#", "@")):
+                continue
+            headline = line
+            break
+        headers = headline.split("\t")
+        if headers[3] == "Mb":
+            self.samplelist = headers[4:]
+        else:
+            self.samplelist = headers[3:]
+
+        #if genotype_1.type == "group" and self.parlist:
+        #    genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1])       #, F1=_f1)
+        #else:
+        #    genotype_2 = genotype_1 
+
+        #determine default genotype object
+        #if self.incparentsf1 and genotype_1.type != "intercross":
+        #    genotype = genotype_2
+        #else:
+        #    self.incparentsf1 = 0
+        #    genotype = genotype_1
+
+        #self.samplelist = list(genotype.prgy)
+    
+
     def read_genotype_file(self):
         '''Read genotype from .geno file instead of database'''
         #if self.group == 'BXD300':
@@ -275,7 +302,18 @@ class DatasetGroup(object):
 
         # reaper barfs on unicode filenames, so here we ensure it's a string
         full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno'))
-        genotype_1.read(full_filename)
+        if os.path.isfile(full_filename):
+            print("Reading file: ", full_filename)
+            genotype_1.read(full_filename)
+            print("File read")
+        else:
+            try:
+                full_filename = str(os.path.join(webqtlConfig.TMPDIR, self.name + '.geno'))
+                #print("Reading file")
+                genotype_1.read(full_filename)
+                #print("File read")
+            except IOError:
+                print("File doesn't exist!")
 
         if genotype_1.type == "group" and self.parlist:
             genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1])       #, F1=_f1)
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
index d4511212..67a9c63f 100755
--- a/wqflask/base/webqtlConfig.py
+++ b/wqflask/base/webqtlConfig.py
@@ -35,7 +35,7 @@ NCBI_LOCUSID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrie
 UCSC_REFSEQ = "http://genome.cse.ucsc.edu/cgi-bin/hgGene?db=%s&hgg_gene=%s&hgg_chrom=chr%s&hgg_start=%s&hgg_end=%s"
 GENBANK_ID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&doptcmdl=DocSum&term=%s"
 OMIM_ID = "http://www.ncbi.nlm.nih.gov/omim/%s"
-UNIGEN_ID = "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=%s&CID=%s"
+UNIGEN_ID = "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=%s&CID=%s";
 HOMOLOGENE_ID = "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=homologene&Cmd=DetailsSearch&Term=%s"
 PUBMEDLINK_URL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract"
 UCSC_POS = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=mammal&org=%s&db=%s&position=chr%s:%s-%s&pix=800&Submit=submit"
@@ -53,12 +53,12 @@ GNROOT = "/home/zas1024/gene/" # Will remove this and dependent items later
 SECUREDIR = GNROOT + 'secure/'
 COMMON_LIB = GNROOT + 'support/admin'
 HTMLPATH = GNROOT + 'web/'
-PYLMM_PATH = '/home/zas1024/plink/'
-SNP_PATH = '/home/zas1024/snps/' 
+PYLMM_PATH = '/home/zas1024/'
+SNP_PATH = '/mnt/xvdf1/snps/' 
 IMGDIR = HTMLPATH +'image/'
 IMAGESPATH = HTMLPATH + 'images/'
 UPLOADPATH = IMAGESPATH + 'upload/'
-TMPDIR = HTMLPATH + 'tmp/'
+TMPDIR = '/tmp/'
 GENODIR = HTMLPATH + 'genotypes/'
 NEWGENODIR = HTMLPATH + 'new_genotypes/'
 GENO_ARCHIVE_DIR = GENODIR + 'archive/'
-- 
cgit 1.4.1