diff options
author | Pjotr Prins | 2019-02-13 12:19:49 +0000 |
---|---|---|
committer | Pjotr Prins | 2019-02-13 12:19:49 +0000 |
commit | b6350b0d4cf6eb8002d40e86378d71e2823dacfb (patch) | |
tree | 6fa6716af822e613cb81171471819a469394cc6f /wqflask/base/data_set.py | |
parent | 5162d1484f01a51d65e7e70143146f018d96edb4 (diff) | |
parent | 27a08a81107705f213d13fb7334a2b8eb4fd68d2 (diff) | |
download | genenetwork2-b6350b0d4cf6eb8002d40e86378d71e2823dacfb.tar.gz |
Merge branch 'testing' of github.com:genenetwork/genenetwork2 into gn-testing
Diffstat (limited to 'wqflask/base/data_set.py')
-rw-r--r-- | wqflask/base/data_set.py | 56 |
1 files changed, 22 insertions, 34 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 4a422ee4..79f72390 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -109,6 +109,7 @@ Publish or ProbeSet. E.g. else: new_type = "ProbeSet" self.datasets[short_dataset_name] = new_type + # Set LOG_LEVEL_DEBUG=5 to see the following: logger.debugf(5, "datasets",self.datasets) @@ -170,28 +171,22 @@ class Markers(object): def __init__(self, name): json_data_fh = open(locate(name + ".json",'genotype/json')) - try: - markers = [] - with open(locate(name + "_snps.txt", 'r')) as bimbam_fh: + markers = [] + with open("%s/%s_snps.txt" % (flat_files('genotype/bimbam'), name), 'r') as bimbam_fh: + if len(bimbam_fh.readline().split(", ")) > 2: + delimiter = ", " + elif len(bimbam_fh.readline().split(",")) > 2: + delimiter = "," + elif len(bimbam_fh.readline().split("\t")) > 2: + delimiter = "\t" + else: + delimiter = " " + for line in bimbam_fh: marker = {} - if len(bimbam_fh[0].split(", ")) > 2: - delimiter = ", " - elif len(bimbam_fh[0].split(",")) > 2: - delimiter = "," - elif len(bimbam_fh[0].split("\t")) > 2: - delimiter = "\t" - else: - delimiter = " " - for line in bimbam_fh: - marker['name'] = line.split(delimiter)[0] - marker['Mb'] - marker['chr'] = line.split(delimiter)[2] - marker['cM'] - markers.append(marker) - #try: - # markers = json.load(json_data_fh) - except: - markers = [] + marker['name'] = line.split(delimiter)[0].rstrip() + marker['Mb'] = float(line.split(delimiter)[1].rstrip())/1000000 + marker['chr'] = line.split(delimiter)[2].rstrip() + markers.append(marker) for marker in markers: if (marker['chr'] != "X") and (marker['chr'] != "Y"): @@ -333,8 +328,6 @@ class DatasetGroup(object): return mapping_id, mapping_names def get_markers(self): - logger.debug("self.species is:", self.species) - def check_plink_gemma(): if flat_file_exists("mapping"): MAPPING_PATH = flat_files("mapping")+"/" @@ -371,23 +364,16 @@ class DatasetGroup(object): result = Redis.get(key) if result is not None: - #logger.debug("Sample List Cache hit!!!") - #logger.debug("Before unjsonifying {}: {}".format(type(result), result)) self.samplelist = json.loads(result) - #logger.debug(" type: ", type(self.samplelist)) - #logger.debug(" self.samplelist: ", self.samplelist) else: logger.debug("Cache not hit") genotype_fn = locate_ignore_error(self.name+".geno",'genotype') - mapping_fn = locate_ignore_error(self.name+".fam",'mapping') - if mapping_fn: - self.samplelist = get_group_samplelists.get_samplelist("plink", mapping_fn) - elif genotype_fn: + if genotype_fn: self.samplelist = get_group_samplelists.get_samplelist("geno", genotype_fn) else: self.samplelist = None - logger.debug("Sample list: ",self.samplelist) + if USE_REDIS: Redis.set(key, json.dumps(self.samplelist)) Redis.expire(key, 60*5) @@ -457,12 +443,14 @@ def datasets(group_name, this_group = None): and InbredSet.Name like %s and ProbeSetFreeze.public > %s and ProbeSetFreeze.confidentiality < 1 - ORDER BY Tissue.Name, ProbeSetFreeze.CreateTime desc, ProbeSetFreeze.AvgId) + ORDER BY Tissue.Name) ''' % (group_name, webqtlConfig.PUBLICTHRESH, group_name, webqtlConfig.PUBLICTHRESH, "'" + group_name + "'", webqtlConfig.PUBLICTHRESH)) - for dataset_item in the_results: + sorted_results = sorted(the_results, key=lambda kv: kv[0]) + + for dataset_item in sorted_results: tissue_name = dataset_item[0] dataset = dataset_item[1] dataset_short = dataset_item[2] |