diff options
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r-- | wqflask/maintenance/convert_geno_to_bimbam.py | 77 | ||||
-rw-r--r-- | wqflask/maintenance/gen_select_dataset.py | 7 | ||||
-rw-r--r-- | wqflask/maintenance/get_group_samplelists.py | 10 |
3 files changed, 15 insertions, 79 deletions
diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py index 05006d5c..45522705 100644 --- a/wqflask/maintenance/convert_geno_to_bimbam.py +++ b/wqflask/maintenance/convert_geno_to_bimbam.py @@ -17,17 +17,12 @@ import glob import traceback import gzip -#import numpy as np -#from pyLMM import lmm - import simplejson as json from pprint import pformat as pf class EmptyConfigurations(Exception): pass - - class Marker(object): def __init__(self): self.name = None @@ -39,47 +34,34 @@ class Marker(object): class ConvertGenoFile(object): def __init__(self, input_file, output_files): - self.input_file = input_file self.output_files = output_files - + self.mb_exists = False self.cm_exists = False self.markers = [] - + self.latest_row_pos = None self.latest_col_pos = None - + self.latest_row_value = None self.latest_col_value = None - - def convert(self): + def convert(self): self.haplotype_notation = { '@mat': "1", '@pat': "0", '@het': "0.5", '@unk': "NA" } - + self.configurations = {} - #self.skipped_cols = 3 - - #if self.input_file.endswith(".geno.gz"): - # print("self.input_file: ", self.input_file) - # self.input_fh = gzip.open(self.input_file) - #else: self.input_fh = open(self.input_file) - - with open(self.output_files[0], "w") as self.geno_fh: - #if self.file_type == "geno": - self.process_csv() - #elif self.file_type == "snps": - # self.process_snps_file() + self.process_csv() def process_csv(self): - for row_count, row in enumerate(self.process_rows()): + for row in self.process_rows(): row_items = row.split("\t") this_marker = Marker() @@ -102,53 +84,30 @@ class ConvertGenoFile(object): this_marker.genotypes.append(self.configurations[genotype.upper().strip()]) else: this_marker.genotypes.append("NA") - - #print("this_marker is:", pf(this_marker.__dict__)) - #if this_marker.chr == "14": + self.markers.append(this_marker.__dict__) self.write_to_bimbam() - - # with open(self.output_file, 'w') as fh: - # json.dump(self.markers, fh, indent=" ", sort_keys=True) - - # print('configurations:', str(configurations)) - #self.latest_col_pos = item_count + self.skipped_cols - #self.latest_col_value = item - - #if item_count != 0: - # self.output_fh.write(" ") - #self.output_fh.write(self.configurations[item.upper()]) - - #self.output_fh.write("\n") def write_to_bimbam(self): with open(self.output_files[0], "w") as geno_fh: - # geno_fh.write(str(len(self.sample_list)) + "\n") - # geno_fh.write("2\n") - # geno_fh.write("IND") - # for sample in self.sample_list: - # geno_fh.write(" " + sample) - # geno_fh.write("\n") for marker in self.markers: geno_fh.write(marker['name']) geno_fh.write(", X, Y") geno_fh.write(", " + ", ".join(marker['genotypes'])) geno_fh.write("\n") - - #pheno_fh = open(self.output_files[1], 'w') + with open(self.output_files[1], "w") as pheno_fh: for sample in self.sample_list: pheno_fh.write("1\n") - + with open(self.output_files[2], "w") as snp_fh: for marker in self.markers: if self.mb_exists: snp_fh.write(marker['name'] +", " + str(int(float(marker['Mb'])*1000000)) + ", " + marker['chr'] + "\n") else: snp_fh.write(marker['name'] +", " + str(int(float(marker['cM'])*1000000)) + ", " + marker['chr'] + "\n") - - + def get_sample_list(self, row_contents): self.sample_list = [] if self.mb_exists: @@ -164,8 +123,6 @@ class ConvertGenoFile(object): def process_rows(self): for self.latest_row_pos, row in enumerate(self.input_fh): - #if self.input_file.endswith(".geno.gz"): - # print("row: ", row) self.latest_row_value = row # Take care of headers if not row.strip(): @@ -208,10 +165,8 @@ class ConvertGenoFile(object): convertob.convert() except EmptyConfigurations as why: print(" No config info? Continuing...") - #excepted = True continue except Exception as why: - print(" Exception:", why) print(traceback.print_exc()) print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, @@ -219,12 +174,6 @@ class ConvertGenoFile(object): print(" Column is:", convertob.latest_col_value) print(" Row is:", convertob.latest_row_value) break - - #def process_snps_file(cls, snps_file, new_directory): - # output_file = os.path.join(new_directory, "mouse_families.json") - # print("%s -> %s" % (snps_file, output_file)) - # convertob = ConvertGenoFile(input_file, output_file) - if __name__=="__main__": Old_Geno_Directory = """/home/zas1024/genotype_files/genotype/""" @@ -234,6 +183,4 @@ if __name__=="__main__": #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json") #convertob.convert() ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory) - #ConvertGenoFiles(Geno_Directory) - - #process_csv(Input_File, Output_File)
\ No newline at end of file + #ConvertGenoFiles(Geno_Directory)
\ No newline at end of file diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py index 79242661..2825c6ea 100644 --- a/wqflask/maintenance/gen_select_dataset.py +++ b/wqflask/maintenance/gen_select_dataset.py @@ -78,7 +78,6 @@ def parse_db_uri(db_uri): return db_conn_info - def get_species(): """Build species list""" Cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId") @@ -265,7 +264,7 @@ def build_datasets(species, group, type_name): def main(): """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" - parse_db_uri(SQL_URI) + parse_db_uri() species = get_species() groups = get_groups(species) @@ -304,6 +303,6 @@ def _test_it(): #print("build_datasets:", pf(datasets)) if __name__ == '__main__': - Conn = MySQLdb.Connect(**parse_db_uri(SQL_URI)) + Conn = MySQLdb.Connect(**parse_db_uri()) Cursor = Conn.cursor() - main() + main()
\ No newline at end of file diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py index 04e94886..1dc6c46c 100644 --- a/wqflask/maintenance/get_group_samplelists.py +++ b/wqflask/maintenance/get_group_samplelists.py @@ -6,16 +6,6 @@ import gzip from base import webqtlConfig -def process_genofiles(geno_dir=webqtlConfig.GENODIR): - print("Yabba") - #sys.exit("Dabba") - os.chdir(geno_dir) - for geno_file in glob.glob("*"): - if geno_file.lower().endswith(('.geno', '.geno.gz')): - #group_name = genofilename.split('.')[0] - sample_list = get_samplelist(geno_file) - - def get_samplelist(file_type, geno_file): if file_type == "geno": return get_samplelist_from_geno(geno_file) |