about summary refs log tree commit diff
path: root/wqflask/base/data_set.py
diff options
context:
space:
mode:
authorPjotr Prins2019-02-13 12:19:49 +0000
committerPjotr Prins2019-02-13 12:19:49 +0000
commitb6350b0d4cf6eb8002d40e86378d71e2823dacfb (patch)
tree6fa6716af822e613cb81171471819a469394cc6f /wqflask/base/data_set.py
parent5162d1484f01a51d65e7e70143146f018d96edb4 (diff)
parent27a08a81107705f213d13fb7334a2b8eb4fd68d2 (diff)
downloadgenenetwork2-b6350b0d4cf6eb8002d40e86378d71e2823dacfb.tar.gz
Merge branch 'testing' of github.com:genenetwork/genenetwork2 into gn-testing
Diffstat (limited to 'wqflask/base/data_set.py')
-rw-r--r--wqflask/base/data_set.py56
1 files changed, 22 insertions, 34 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 4a422ee4..79f72390 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -109,6 +109,7 @@ Publish or ProbeSet. E.g.
                         else:
                             new_type = "ProbeSet"
                         self.datasets[short_dataset_name] = new_type
+
         # Set LOG_LEVEL_DEBUG=5 to see the following:
         logger.debugf(5, "datasets",self.datasets)
 
@@ -170,28 +171,22 @@ class Markers(object):
     def __init__(self, name):
         json_data_fh = open(locate(name + ".json",'genotype/json'))
 
-        try:
-            markers = []
-            with open(locate(name + "_snps.txt", 'r')) as bimbam_fh:
+        markers = []
+        with open("%s/%s_snps.txt" % (flat_files('genotype/bimbam'), name), 'r') as bimbam_fh:
+            if len(bimbam_fh.readline().split(", ")) > 2:
+                delimiter = ", "
+            elif len(bimbam_fh.readline().split(",")) > 2:
+                delimiter = ","
+            elif len(bimbam_fh.readline().split("\t")) > 2:
+                delimiter = "\t"
+            else:
+                delimiter = " "
+            for line in bimbam_fh:
                 marker = {}
-                if len(bimbam_fh[0].split(", ")) > 2:
-                    delimiter = ", "
-                elif len(bimbam_fh[0].split(",")) > 2:
-                    delimiter = ","
-                elif len(bimbam_fh[0].split("\t")) > 2:
-                    delimiter = "\t"
-                else:
-                    delimiter = " "
-                for line in bimbam_fh:
-                    marker['name'] = line.split(delimiter)[0]
-                    marker['Mb']
-                    marker['chr'] = line.split(delimiter)[2]
-                    marker['cM']
-                    markers.append(marker)
-        #try:
-        #    markers = json.load(json_data_fh)
-        except:
-            markers = []
+                marker['name'] = line.split(delimiter)[0].rstrip()
+                marker['Mb'] = float(line.split(delimiter)[1].rstrip())/1000000
+                marker['chr'] = line.split(delimiter)[2].rstrip()
+                markers.append(marker)
 
         for marker in markers:
             if (marker['chr'] != "X") and (marker['chr'] != "Y"):
@@ -333,8 +328,6 @@ class DatasetGroup(object):
         return mapping_id, mapping_names
 
     def get_markers(self):
-        logger.debug("self.species is:", self.species)
-
         def check_plink_gemma():
             if flat_file_exists("mapping"):
                 MAPPING_PATH = flat_files("mapping")+"/"
@@ -371,23 +364,16 @@ class DatasetGroup(object):
             result = Redis.get(key)
 
         if result is not None:
-            #logger.debug("Sample List Cache hit!!!")
-            #logger.debug("Before unjsonifying {}: {}".format(type(result), result))
             self.samplelist = json.loads(result)
-            #logger.debug("  type: ", type(self.samplelist))
-            #logger.debug("  self.samplelist: ", self.samplelist)
         else:
             logger.debug("Cache not hit")
 
             genotype_fn = locate_ignore_error(self.name+".geno",'genotype')
-            mapping_fn = locate_ignore_error(self.name+".fam",'mapping')
-            if mapping_fn:
-                self.samplelist = get_group_samplelists.get_samplelist("plink", mapping_fn)
-            elif genotype_fn:
+            if genotype_fn:
                 self.samplelist = get_group_samplelists.get_samplelist("geno", genotype_fn)
             else:
                 self.samplelist = None
-            logger.debug("Sample list: ",self.samplelist)
+
             if USE_REDIS:
                 Redis.set(key, json.dumps(self.samplelist))
                 Redis.expire(key, 60*5)
@@ -457,12 +443,14 @@ def datasets(group_name, this_group = None):
             and InbredSet.Name like %s
             and ProbeSetFreeze.public > %s
             and ProbeSetFreeze.confidentiality < 1
-          ORDER BY Tissue.Name, ProbeSetFreeze.CreateTime desc, ProbeSetFreeze.AvgId)
+          ORDER BY Tissue.Name)
         ''' % (group_name, webqtlConfig.PUBLICTHRESH,
               group_name, webqtlConfig.PUBLICTHRESH,
               "'" + group_name + "'", webqtlConfig.PUBLICTHRESH))
 
-    for dataset_item in the_results:
+    sorted_results = sorted(the_results, key=lambda kv: kv[0])
+
+    for dataset_item in sorted_results:
         tissue_name = dataset_item[0]
         dataset = dataset_item[1]
         dataset_short = dataset_item[2]