From 6aaefdaae3a9fb068278d9b94d8cdf25d4f8d852 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Fri, 19 Jul 2013 16:13:47 -0500 Subject: Created file gen_group_samplelists that iterates through all genofiles and builds each groups' samplelist --- wqflask/base/data_set.py | 50 ++++- wqflask/base/webqtlConfig.py | 8 +- wqflask/maintenance/gen_select_dataset.py | 4 +- wqflask/maintenance/get_group_samplelists.py | 43 ++++ .../new/javascript/dataset_menu_structure.json | 224 ++++++++++----------- 5 files changed, 206 insertions(+), 123 deletions(-) create mode 100644 wqflask/maintenance/get_group_samplelists.py (limited to 'wqflask') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 30221503..cf219fda 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -16,8 +16,6 @@ # Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) # at rwilliams@uthsc.edu and xzhou15@uthsc.edu # -#we -# # This module is used by GeneNetwork project (www.genenetwork.org) from __future__ import absolute_import, print_function, division @@ -27,6 +25,7 @@ import string import collections import json +import gzip import cPickle as pickle import itertools @@ -52,8 +51,6 @@ from pprint import pformat as pf DS_NAME_MAP = {} def create_dataset(dataset_name, dataset_type = None): - - print("dataset_type:", dataset_type) if not dataset_type: dataset_type = Dataset_Getter(dataset_name) #dataset_type = get_dataset_type_from_json(dataset_name) @@ -129,7 +126,7 @@ def create_datasets_list(): for result in g.db.execute(query).fetchall(): #The query at the beginning of this function isn't necessary here, but still would #rather just reuse it - print("type: {}\tname: {}".format(dataset_type, result.Name)) + #print("type: {}\tname: {}".format(dataset_type, result.Name)) dataset = create_dataset(result.Name, dataset_type) datasets.append(dataset) @@ -261,6 +258,36 @@ class DatasetGroup(object): if maternal and paternal: self.parlist = [maternal, paternal] + def get_sample_list(self): + genofilename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno')) + genofile = open(genofilename, "r") + for line in genofile: + line = line.strip() + if line.startswith(("#", "@")): + continue + headline = line + break + headers = headline.split("\t") + if headers[3] == "Mb": + self.samplelist = headers[4:] + else: + self.samplelist = headers[3:] + + #if genotype_1.type == "group" and self.parlist: + # genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1]) #, F1=_f1) + #else: + # genotype_2 = genotype_1 + + #determine default genotype object + #if self.incparentsf1 and genotype_1.type != "intercross": + # genotype = genotype_2 + #else: + # self.incparentsf1 = 0 + # genotype = genotype_1 + + #self.samplelist = list(genotype.prgy) + + def read_genotype_file(self): '''Read genotype from .geno file instead of database''' #if self.group == 'BXD300': @@ -275,7 +302,18 @@ class DatasetGroup(object): # reaper barfs on unicode filenames, so here we ensure it's a string full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno')) - genotype_1.read(full_filename) + if os.path.isfile(full_filename): + print("Reading file: ", full_filename) + genotype_1.read(full_filename) + print("File read") + else: + try: + full_filename = str(os.path.join(webqtlConfig.TMPDIR, self.name + '.geno')) + #print("Reading file") + genotype_1.read(full_filename) + #print("File read") + except IOError: + print("File doesn't exist!") if genotype_1.type == "group" and self.parlist: genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1]) #, F1=_f1) diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index d4511212..67a9c63f 100755 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -35,7 +35,7 @@ NCBI_LOCUSID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrie UCSC_REFSEQ = "http://genome.cse.ucsc.edu/cgi-bin/hgGene?db=%s&hgg_gene=%s&hgg_chrom=chr%s&hgg_start=%s&hgg_end=%s" GENBANK_ID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&doptcmdl=DocSum&term=%s" OMIM_ID = "http://www.ncbi.nlm.nih.gov/omim/%s" -UNIGEN_ID = "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=%s&CID=%s" +UNIGEN_ID = "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=%s&CID=%s"; HOMOLOGENE_ID = "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=homologene&Cmd=DetailsSearch&Term=%s" PUBMEDLINK_URL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract" UCSC_POS = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=mammal&org=%s&db=%s&position=chr%s:%s-%s&pix=800&Submit=submit" @@ -53,12 +53,12 @@ GNROOT = "/home/zas1024/gene/" # Will remove this and dependent items later SECUREDIR = GNROOT + 'secure/' COMMON_LIB = GNROOT + 'support/admin' HTMLPATH = GNROOT + 'web/' -PYLMM_PATH = '/home/zas1024/plink/' -SNP_PATH = '/home/zas1024/snps/' +PYLMM_PATH = '/home/zas1024/' +SNP_PATH = '/mnt/xvdf1/snps/' IMGDIR = HTMLPATH +'image/' IMAGESPATH = HTMLPATH + 'images/' UPLOADPATH = IMAGESPATH + 'upload/' -TMPDIR = HTMLPATH + 'tmp/' +TMPDIR = '/tmp/' GENODIR = HTMLPATH + 'genotypes/' NEWGENODIR = HTMLPATH + 'new_genotypes/' GENO_ARCHIVE_DIR = GENODIR + 'archive/' diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py index 8cb94f20..d4e47327 100644 --- a/wqflask/maintenance/gen_select_dataset.py +++ b/wqflask/maintenance/gen_select_dataset.py @@ -115,7 +115,7 @@ def build_types(species, group): (all types except phenotype/genotype are tissues) """ - Cursor.execute("""select distinct Tissue.Name, Tissue.Name + Cursor.execute("""select distinct Tissue.Name, concat(Tissue.Name, ' mRNA') from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species where Species.Name = %s and Species.Id = InbredSet.SpeciesId and InbredSet.Name = %s and @@ -192,6 +192,8 @@ def main(): datasets=datasets, ) + print("data:", data) + output_file = """../wqflask/static/new/javascript/dataset_menu_structure.json""" with open(output_file, 'w') as fh: diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py new file mode 100644 index 00000000..2434038e --- /dev/null +++ b/wqflask/maintenance/get_group_samplelists.py @@ -0,0 +1,43 @@ +from __future__ import absolute_import, print_function, division + +import os +import glob +import gzip + +from base import webqtlConfig + + +def get_sample_list_dir(geno_dir="/home/zas1024/gene/web/genotypes/"): + os.chdir(geno_dir) + + for group_file in glob.glob("*"): + if group_file.lower().endswith(('.geno', '.geno.gz')): + #group_name = genofilename.split('.')[0] + sample_list = get_sample_list(group_file) + print("\n\n{}\n\n".format(sample_list)) + + +def get_sample_list(group_file): + print(group_file) + genofilename = str(os.path.join(webqtlConfig.GENODIR, group_file)) + if genofilename.lower().endswith('.geno.gz'): + genofile = gzip.open(genofilename) + else: + genofile = open(genofilename) + for line in genofile: + line = line.strip() + if not line: + continue + if line.startswith(("#", "@")): + continue + headline = line + break + headers = headline.split("\t") + if headers[3] == "Mb": + samplelist = headers[4:] + else: + samplelist = headers[3:] + return samplelist + +if __name__ == '__main__': + get_sample_list_dir() diff --git a/wqflask/wqflask/static/new/javascript/dataset_menu_structure.json b/wqflask/wqflask/static/new/javascript/dataset_menu_structure.json index 49c44fbd..4aae20ba 100644 --- a/wqflask/wqflask/static/new/javascript/dataset_menu_structure.json +++ b/wqflask/wqflask/static/new/javascript/dataset_menu_structure.json @@ -2556,7 +2556,7 @@ ], [ "Leaf mRNA", - "Leaf mRNA" + "Leaf mRNA mRNA" ] ], "SXM": [ @@ -2570,11 +2570,11 @@ ], [ "Embryo mRNA", - "Embryo mRNA" + "Embryo mRNA mRNA" ], [ "Leaf mRNA", - "Leaf mRNA" + "Leaf mRNA mRNA" ] ] }, @@ -2590,7 +2590,7 @@ ], [ "Whole Body mRNA", - "Whole Body mRNA" + "Whole Body mRNA mRNA" ] ], "Oregon-R_x_2b3": [ @@ -2604,7 +2604,7 @@ ], [ "Whole Body mRNA", - "Whole Body mRNA" + "Whole Body mRNA mRNA" ] ] }, @@ -2620,7 +2620,7 @@ ], [ "Brain mRNA", - "Brain mRNA" + "Brain mRNA mRNA" ] ], "AD-cases-controls-Myers": [ @@ -2634,7 +2634,7 @@ ], [ "Brain mRNA", - "Brain mRNA" + "Brain mRNA mRNA" ] ], "CANDLE": [ @@ -2648,11 +2648,11 @@ ], [ "Methylation", - "Methylation" + "Methylation mRNA" ], [ "Newborn Cord Blood mRNA", - "Newborn Cord Blood mRNA" + "Newborn Cord Blood mRNA mRNA" ] ], "CEPH-2004": [ @@ -2666,7 +2666,7 @@ ], [ "Lymphoblast B-cell mRNA", - "Lymphoblast B-cell mRNA" + "Lymphoblast B-cell mRNA mRNA" ] ], "HB": [ @@ -2680,15 +2680,15 @@ ], [ "Cerebellum mRNA", - "Cerebellum mRNA" + "Cerebellum mRNA mRNA" ], [ "Prefrontal Cortex mRNA", - "Prefrontal Cortex mRNA" + "Prefrontal Cortex mRNA mRNA" ], [ "Primary Visual Cortex mRNA", - "Primary Visual Cortex mRNA" + "Primary Visual Cortex mRNA mRNA" ] ], "HLC": [ @@ -2702,7 +2702,7 @@ ], [ "Liver mRNA", - "Liver mRNA" + "Liver mRNA mRNA" ] ], "HLT": [ @@ -2716,7 +2716,7 @@ ], [ "Lung mRNA", - "Lung mRNA" + "Lung mRNA mRNA" ] ], "HSB": [ @@ -2730,67 +2730,67 @@ ], [ "Amygdala mRNA", - "Amygdala mRNA" + "Amygdala mRNA mRNA" ], [ "Cerebellar Cortex mRNA", - "Cerebellar Cortex mRNA" + "Cerebellar Cortex mRNA mRNA" ], [ "Dorsolateral Prefrontal Cortex mRNA", - "Dorsolateral Prefrontal Cortex mRNA" + "Dorsolateral Prefrontal Cortex mRNA mRNA" ], [ "Hippocampus mRNA", - "Hippocampus mRNA" + "Hippocampus mRNA mRNA" ], [ "Inferior Temporal Cortex mRNA", - "Inferior Temporal Cortex mRNA" + "Inferior Temporal Cortex mRNA mRNA" ], [ "Medial Prefrontal Cortex mRNA", - "Medial Prefrontal Cortex mRNA" + "Medial Prefrontal Cortex mRNA mRNA" ], [ "Mediodorsal Nucleus of Thalamus mRNA", - "Mediodorsal Nucleus of Thalamus mRNA" + "Mediodorsal Nucleus of Thalamus mRNA mRNA" ], [ "Orbital Prefrontal Cortex mRNA", - "Orbital Prefrontal Cortex mRNA" + "Orbital Prefrontal Cortex mRNA mRNA" ], [ "Posterior Inferior Parietal Cortex mRNA", - "Posterior Inferior Parietal Cortex mRNA" + "Posterior Inferior Parietal Cortex mRNA mRNA" ], [ "Posterior Superior Temporal Cortex mRNA", - "Posterior Superior Temporal Cortex mRNA" + "Posterior Superior Temporal Cortex mRNA mRNA" ], [ "Primary Auditory (A1) Cortex mRNA", - "Primary Auditory (A1) Cortex mRNA" + "Primary Auditory (A1) Cortex mRNA mRNA" ], [ "Primary Motor (M1) Cortex mRNA", - "Primary Motor (M1) Cortex mRNA" + "Primary Motor (M1) Cortex mRNA mRNA" ], [ "Primary Somatosensory (S1) Cortex mRNA", - "Primary Somatosensory (S1) Cortex mRNA" + "Primary Somatosensory (S1) Cortex mRNA mRNA" ], [ "Primary Visual Cortex mRNA", - "Primary Visual Cortex mRNA" + "Primary Visual Cortex mRNA mRNA" ], [ "Striatum mRNA", - "Striatum mRNA" + "Striatum mRNA mRNA" ], [ "Ventrolateral Prefrontal Cortex mRNA", - "Ventrolateral Prefrontal Cortex mRNA" + "Ventrolateral Prefrontal Cortex mRNA mRNA" ] ] }, @@ -2806,23 +2806,23 @@ ], [ "Amygdala mRNA", - "Amygdala mRNA" + "Amygdala mRNA mRNA" ], [ "Brain mRNA", - "Brain mRNA" + "Brain mRNA mRNA" ], [ "Hippocampus mRNA", - "Hippocampus mRNA" + "Hippocampus mRNA mRNA" ], [ "Nucleus Accumbens mRNA", - "Nucleus Accumbens mRNA" + "Nucleus Accumbens mRNA mRNA" ], [ "Prefrontal Cortex mRNA", - "Prefrontal Cortex mRNA" + "Prefrontal Cortex mRNA mRNA" ] ] }, @@ -2838,7 +2838,7 @@ ], [ "Mammary Tumors mRNA", - "Mammary Tumors mRNA" + "Mammary Tumors mRNA mRNA" ] ], "AXBXA": [ @@ -2852,19 +2852,19 @@ ], [ "Bone Femur mRNA", - "Bone Femur mRNA" + "Bone Femur mRNA mRNA" ], [ "Eye mRNA", - "Eye mRNA" + "Eye mRNA mRNA" ], [ "Heart mRNA", - "Heart mRNA" + "Heart mRNA mRNA" ], [ "Liver mRNA", - "Liver mRNA" + "Liver mRNA mRNA" ] ], "B6BTBRF2": [ @@ -2878,7 +2878,7 @@ ], [ "Liver mRNA", - "Liver mRNA" + "Liver mRNA mRNA" ] ], "B6D2F2": [ @@ -2892,7 +2892,7 @@ ], [ "Brain mRNA", - "Brain mRNA" + "Brain mRNA mRNA" ] ], "B6D2F2-PSU": [ @@ -2906,7 +2906,7 @@ ], [ "Muscle mRNA", - "Muscle mRNA" + "Muscle mRNA mRNA" ] ], "B6D2RI": [ @@ -2920,7 +2920,7 @@ ], [ "Hippocampus mRNA", - "Hippocampus mRNA" + "Hippocampus mRNA mRNA" ] ], "BDF2-1999": [ @@ -2934,7 +2934,7 @@ ], [ "Liver mRNA", - "Liver mRNA" + "Liver mRNA mRNA" ] ], "BDF2-2005": [ @@ -2948,7 +2948,7 @@ ], [ "Striatum mRNA", - "Striatum mRNA" + "Striatum mRNA mRNA" ] ], "BHF2": [ @@ -2962,19 +2962,19 @@ ], [ "Adipose mRNA", - "Adipose mRNA" + "Adipose mRNA mRNA" ], [ "Brain mRNA", - "Brain mRNA" + "Brain mRNA mRNA" ], [ "Liver mRNA", - "Liver mRNA" + "Liver mRNA mRNA" ], [ "Muscle mRNA", - "Muscle mRNA" + "Muscle mRNA mRNA" ] ], "BHHBF2": [ @@ -2988,19 +2988,19 @@ ], [ "Adipose mRNA", - "Adipose mRNA" + "Adipose mRNA mRNA" ], [ "Brain mRNA", - "Brain mRNA" + "Brain mRNA mRNA" ], [ "Liver mRNA", - "Liver mRNA" + "Liver mRNA mRNA" ], [ "Muscle mRNA", - "Muscle mRNA" + "Muscle mRNA mRNA" ] ], "BXD": [ @@ -3014,119 +3014,119 @@ ], [ "Adrenal Gland mRNA", - "Adrenal Gland mRNA" + "Adrenal Gland mRNA mRNA" ], [ "Amygdala mRNA", - "Amygdala mRNA" + "Amygdala mRNA mRNA" ], [ "Bone Femur mRNA", - "Bone Femur mRNA" + "Bone Femur mRNA mRNA" ], [ "Brain mRNA", - "Brain mRNA" + "Brain mRNA mRNA" ], [ "Cartilage mRNA", - "Cartilage mRNA" + "Cartilage mRNA mRNA" ], [ "Cerebellum mRNA", - "Cerebellum mRNA" + "Cerebellum mRNA mRNA" ], [ "Eye mRNA", - "Eye mRNA" + "Eye mRNA mRNA" ], [ "Hematopoietic Cells mRNA", - "Hematopoietic Cells mRNA" + "Hematopoietic Cells mRNA mRNA" ], [ "Hippocampus mRNA", - "Hippocampus mRNA" + "Hippocampus mRNA mRNA" ], [ "Hypothalamus mRNA", - "Hypothalamus mRNA" + "Hypothalamus mRNA mRNA" ], [ "Kidney mRNA", - "Kidney mRNA" + "Kidney mRNA mRNA" ], [ "Leucocytes mRNA", - "Leucocytes mRNA" + "Leucocytes mRNA mRNA" ], [ "Liver mRNA", - "Liver mRNA" + "Liver mRNA mRNA" ], [ "Liver Proteome", - "Liver Proteome" + "Liver Proteome mRNA" ], [ "Lung mRNA", - "Lung mRNA" + "Lung mRNA mRNA" ], [ "Midbrain mRNA", - "Midbrain mRNA" + "Midbrain mRNA mRNA" ], [ "Muscle mRNA", - "Muscle mRNA" + "Muscle mRNA mRNA" ], [ "Neocortex mRNA", - "Neocortex mRNA" + "Neocortex mRNA mRNA" ], [ "Nucleus Accumbens mRNA", - "Nucleus Accumbens mRNA" + "Nucleus Accumbens mRNA mRNA" ], [ "Pituitary Gland mRNA", - "Pituitary Gland mRNA" + "Pituitary Gland mRNA mRNA" ], [ "Popliteal Lymph Node mRNA", - "Popliteal Lymph Node mRNA" + "Popliteal Lymph Node mRNA mRNA" ], [ "Prefrontal Cortex mRNA", - "Prefrontal Cortex mRNA" + "Prefrontal Cortex mRNA mRNA" ], [ "Retina mRNA", - "Retina mRNA" + "Retina mRNA mRNA" ], [ "Spleen mRNA", - "Spleen mRNA" + "Spleen mRNA mRNA" ], [ "Striatum mRNA", - "Striatum mRNA" + "Striatum mRNA mRNA" ], [ "T Cell (helper) mRNA", - "T Cell (helper) mRNA" + "T Cell (helper) mRNA mRNA" ], [ "T Cell (regulatory) mRNA", - "T Cell (regulatory) mRNA" + "T Cell (regulatory) mRNA mRNA" ], [ "Thymus mRNA", - "Thymus mRNA" + "Thymus mRNA mRNA" ], [ "Ventral Tegmental Area mRNA", - "Ventral Tegmental Area mRNA" + "Ventral Tegmental Area mRNA mRNA" ] ], "BXH": [ @@ -3140,15 +3140,15 @@ ], [ "Bone Femur mRNA", - "Bone Femur mRNA" + "Bone Femur mRNA mRNA" ], [ "Cartilage mRNA", - "Cartilage mRNA" + "Cartilage mRNA mRNA" ], [ "Liver mRNA", - "Liver mRNA" + "Liver mRNA mRNA" ] ], "C57BL-6JxC57BL-6NJF2": [ @@ -3172,19 +3172,19 @@ ], [ "Adipose mRNA", - "Adipose mRNA" + "Adipose mRNA mRNA" ], [ "Brain mRNA", - "Brain mRNA" + "Brain mRNA mRNA" ], [ "Liver mRNA", - "Liver mRNA" + "Liver mRNA mRNA" ], [ "Muscle mRNA", - "Muscle mRNA" + "Muscle mRNA mRNA" ] ], "CXB": [ @@ -3198,11 +3198,11 @@ ], [ "Hippocampus mRNA", - "Hippocampus mRNA" + "Hippocampus mRNA mRNA" ], [ "Spleen mRNA", - "Spleen mRNA" + "Spleen mRNA mRNA" ] ], "HS": [ @@ -3216,15 +3216,15 @@ ], [ "Hippocampus mRNA", - "Hippocampus mRNA" + "Hippocampus mRNA mRNA" ], [ "Liver mRNA", - "Liver mRNA" + "Liver mRNA mRNA" ], [ "Lung mRNA", - "Lung mRNA" + "Lung mRNA mRNA" ] ], "HS-CC": [ @@ -3238,7 +3238,7 @@ ], [ "Striatum mRNA", - "Striatum mRNA" + "Striatum mRNA mRNA" ] ], "LXS": [ @@ -3252,11 +3252,11 @@ ], [ "Hippocampus mRNA", - "Hippocampus mRNA" + "Hippocampus mRNA mRNA" ], [ "Prefrontal Cortex mRNA", - "Prefrontal Cortex mRNA" + "Prefrontal Cortex mRNA mRNA" ] ], "MDP": [ @@ -3270,19 +3270,19 @@ ], [ "Bone Femur mRNA", - "Bone Femur mRNA" + "Bone Femur mRNA mRNA" ], [ "Dorsal Root Ganglia mRNA", - "Dorsal Root Ganglia mRNA" + "Dorsal Root Ganglia mRNA mRNA" ], [ "Hippocampus mRNA", - "Hippocampus mRNA" + "Hippocampus mRNA mRNA" ], [ "Liver mRNA", - "Liver mRNA" + "Liver mRNA mRNA" ] ], "NZBXFVB-N2": [ @@ -3296,7 +3296,7 @@ ], [ "Mammary Tumors mRNA", - "Mammary Tumors mRNA" + "Mammary Tumors mRNA mRNA" ] ], "SOTNOT-OHSU": [ @@ -3320,7 +3320,7 @@ ], [ "Anterior Cingulate Cortex mRNA", - "Anterior Cingulate Cortex mRNA" + "Anterior Cingulate Cortex mRNA mRNA" ] ] }, @@ -3346,27 +3346,27 @@ ], [ "Adrenal Gland mRNA", - "Adrenal Gland mRNA" + "Adrenal Gland mRNA mRNA" ], [ "Heart mRNA", - "Heart mRNA" + "Heart mRNA mRNA" ], [ "Hippocampus mRNA", - "Hippocampus mRNA" + "Hippocampus mRNA mRNA" ], [ "Kidney mRNA", - "Kidney mRNA" + "Kidney mRNA mRNA" ], [ "Liver mRNA", - "Liver mRNA" + "Liver mRNA mRNA" ], [ "Peritoneal Fat mRNA", - "Peritoneal Fat mRNA" + "Peritoneal Fat mRNA mRNA" ] ], "SRxSHRSPF2": [ @@ -3380,7 +3380,7 @@ ], [ "Eye mRNA", - "Eye mRNA" + "Eye mRNA mRNA" ] ] }, -- cgit v1.2.3