diff options
64 files changed, 1431 insertions, 705 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index c14808d6..8ec0aaad 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -168,7 +168,8 @@ class DatasetType: results = g.db.execute(sql_query_mapping[t] % group_name).fetchone() if results: self.datasets[name] = dataset_name_mapping[t] - self.redis_instance.set("dataset_structure", json.dumps(self.datasets)) + self.redis_instance.set( + "dataset_structure", json.dumps(self.datasets)) return True return None @@ -239,7 +240,8 @@ class Markers: for line in bimbam_fh: marker = {} marker['name'] = line.split(delimiter)[0].rstrip() - marker['Mb'] = float(line.split(delimiter)[1].rstrip()) / 1000000 + marker['Mb'] = float(line.split(delimiter)[ + 1].rstrip()) / 1000000 marker['chr'] = line.split(delimiter)[2].rstrip() markers.append(marker) diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index cbc05738..9bb29664 100644 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -88,6 +88,7 @@ class MrnaAssayTissueData: if result.Symbol.lower() not in symbol_values_dict: symbol_values_dict[result.Symbol.lower()] = [result.value] else: - symbol_values_dict[result.Symbol.lower()].append(result.value) + symbol_values_dict[result.Symbol.lower()].append( + result.value) return symbol_values_dict diff --git a/wqflask/base/species.py b/wqflask/base/species.py index 44f133b5..e3c29916 100644 --- a/wqflask/base/species.py +++ b/wqflask/base/species.py @@ -55,4 +55,5 @@ class Chromosomes: results = g.db.execute(query).fetchall() for item in results: - self.chromosomes[item.OrderId] = IndChromosome(item.Name, item.Length) + self.chromosomes[item.OrderId] = IndChromosome( + item.Name, item.Length) diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py index 2d07ab9d..25b6cb8a 100644 --- a/wqflask/base/webqtlCaseData.py +++ b/wqflask/base/webqtlCaseData.py @@ -34,12 +34,14 @@ class webqtlCaseData: def __init__(self, name, value=None, variance=None, num_cases=None, name2=None): self.name = name - self.name2 = name2 # Other name (for traits like BXD65a) + # Other name (for traits like BXD65a) + self.name2 = name2 self.value = value # Trait Value self.variance = variance # Trait Variance self.num_cases = num_cases # Number of individuals/cases self.extra_attributes = None - self.this_id = None # Set a sane default (can't be just "id" cause that's a reserved word) + # Set a sane default (can't be just "id" cause that's a reserved word) + self.this_id = None self.outlier = None # Not set to True/False until later def __repr__(self): diff --git a/wqflask/db/webqtlDatabaseFunction.py b/wqflask/db/webqtlDatabaseFunction.py index 50ac06fd..29112949 100644 --- a/wqflask/db/webqtlDatabaseFunction.py +++ b/wqflask/db/webqtlDatabaseFunction.py @@ -36,13 +36,15 @@ def retrieve_species(group): """Get the species of a group (e.g. returns string "mouse" on "BXD" """ - result = fetch1("select Species.Name from Species, InbredSet where InbredSet.Name = '%s' and InbredSet.SpeciesId = Species.Id" % (group), "/cross/" + group + ".json", lambda r: (r["species"],))[0] + result = fetch1("select Species.Name from Species, InbredSet where InbredSet.Name = '%s' and InbredSet.SpeciesId = Species.Id" % ( + group), "/cross/" + group + ".json", lambda r: (r["species"],))[0] logger.debug("retrieve_species result:", result) return result def retrieve_species_id(group): - result = fetch1("select SpeciesId from InbredSet where Name = '%s'" % (group), "/cross/" + group + ".json", lambda r: (r["species_id"],))[0] + result = fetch1("select SpeciesId from InbredSet where Name = '%s'" % ( + group), "/cross/" + group + ".json", lambda r: (r["species_id"],))[0] logger.debug("retrieve_species_id result:", result) return result diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py index 5b2369c9..a1712500 100644 --- a/wqflask/maintenance/convert_geno_to_bimbam.py +++ b/wqflask/maintenance/convert_geno_to_bimbam.py @@ -83,7 +83,8 @@ class ConvertGenoFile: genotypes = row_items[2:] for item_count, genotype in enumerate(genotypes): if genotype.upper().strip() in self.configurations: - this_marker.genotypes.append(self.configurations[genotype.upper().strip()]) + this_marker.genotypes.append( + self.configurations[genotype.upper().strip()]) else: this_marker.genotypes.append("NA") @@ -106,9 +107,11 @@ class ConvertGenoFile: with open(self.output_files[2], "w") as snp_fh: for marker in self.markers: if self.mb_exists: - snp_fh.write(marker['name'] + ", " + str(int(float(marker['Mb']) * 1000000)) + ", " + marker['chr'] + "\n") + snp_fh.write( + marker['name'] + ", " + str(int(float(marker['Mb']) * 1000000)) + ", " + marker['chr'] + "\n") else: - snp_fh.write(marker['name'] + ", " + str(int(float(marker['cM']) * 1000000)) + ", " + marker['chr'] + "\n") + snp_fh.write( + marker['name'] + ", " + str(int(float(marker['cM']) * 1000000)) + ", " + marker['chr'] + "\n") def get_sample_list(self, row_contents): self.sample_list = [] @@ -160,10 +163,14 @@ class ConvertGenoFile: group_name = ".".join(input_file.split('.')[:-1]) if group_name == "HSNIH-Palmer": continue - geno_output_file = os.path.join(new_directory, group_name + "_geno.txt") - pheno_output_file = os.path.join(new_directory, group_name + "_pheno.txt") - snp_output_file = os.path.join(new_directory, group_name + "_snps.txt") - output_files = [geno_output_file, pheno_output_file, snp_output_file] + geno_output_file = os.path.join( + new_directory, group_name + "_geno.txt") + pheno_output_file = os.path.join( + new_directory, group_name + "_pheno.txt") + snp_output_file = os.path.join( + new_directory, group_name + "_snps.txt") + output_files = [geno_output_file, + pheno_output_file, snp_output_file] print("%s -> %s" % ( os.path.join(old_directory, input_file), geno_output_file)) convertob = ConvertGenoFile(input_file, output_files) diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py index 583a06e1..484336a6 100644 --- a/wqflask/maintenance/gen_select_dataset.py +++ b/wqflask/maintenance/gen_select_dataset.py @@ -120,14 +120,16 @@ def get_types(groups): else: if not phenotypes_exist(group_name) and not genotypes_exist(group_name): types[species].pop(group_name, None) - groups[species] = tuple(group for group in groups[species] if group[0] != group_name) + groups[species] = tuple( + group for group in groups[species] if group[0] != group_name) else: # ZS: This whole else statement might be unnecessary, need to check types_list = build_types(species, group_name) if len(types_list) > 0: types[species][group_name] = types_list else: types[species].pop(group_name, None) - groups[species] = tuple(group for group in groups[species] if group[0] != group_name) + groups[species] = tuple( + group for group in groups[species] if group[0] != group_name) return types diff --git a/wqflask/maintenance/generate_kinship_from_bimbam.py b/wqflask/maintenance/generate_kinship_from_bimbam.py index 7cc60c9e..bed634fa 100644 --- a/wqflask/maintenance/generate_kinship_from_bimbam.py +++ b/wqflask/maintenance/generate_kinship_from_bimbam.py @@ -21,7 +21,9 @@ class GenerateKinshipMatrices: self.pheno_file = pheno_file def generate_kinship(self): - gemma_command = "/gnu/store/xhzgjr0jvakxv6h3blj8z496xjig69b0-profile/bin/gemma -g " + self.geno_file + " -p " + self.pheno_file + " -gk 1 -outdir /home/zas1024/genotype_files/genotype/bimbam/ -o " + self.group_name + gemma_command = "/gnu/store/xhzgjr0jvakxv6h3blj8z496xjig69b0-profile/bin/gemma -g " + self.geno_file + \ + " -p " + self.pheno_file + \ + " -gk 1 -outdir /home/zas1024/genotype_files/genotype/bimbam/ -o " + self.group_name print("command:", gemma_command) os.system(gemma_command) @@ -34,9 +36,12 @@ class GenerateKinshipMatrices: group_name = ".".join(input_file.split('.')[:-1]) if group_name == "HSNIH-Palmer": continue - geno_input_file = os.path.join(bimbam_dir, group_name + "_geno.txt") - pheno_input_file = os.path.join(bimbam_dir, group_name + "_pheno.txt") - convertob = GenerateKinshipMatrices(group_name, geno_input_file, pheno_input_file) + geno_input_file = os.path.join( + bimbam_dir, group_name + "_geno.txt") + pheno_input_file = os.path.join( + bimbam_dir, group_name + "_pheno.txt") + convertob = GenerateKinshipMatrices( + group_name, geno_input_file, pheno_input_file) try: convertob.generate_kinship() except EmptyConfigurations as why: diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py index ad3f2b72..7bdf2b53 100644 --- a/wqflask/maintenance/geno_to_json.py +++ b/wqflask/maintenance/geno_to_json.py @@ -100,7 +100,8 @@ class ConvertGenoFile: genotypes = row_items[2:] for item_count, genotype in enumerate(genotypes): if genotype.upper() in self.configurations: - this_marker.genotypes.append(self.configurations[genotype.upper()]) + this_marker.genotypes.append( + self.configurations[genotype.upper()]) else: this_marker.genotypes.append("NA") diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py index 1896bc52..ac7689f5 100644 --- a/wqflask/maintenance/quantile_normalize.py +++ b/wqflask/maintenance/quantile_normalize.py @@ -34,7 +34,8 @@ def create_dataframe(input_file): with open(input_file) as f: ncols = len(f.readline().split("\t")) - input_array = np.loadtxt(open(input_file, "rb"), delimiter="\t", skiprows=1, usecols=list(range(1, ncols))) + input_array = np.loadtxt(open( + input_file, "rb"), delimiter="\t", skiprows=1, usecols=list(range(1, ncols))) return pd.DataFrame(input_array) # This function taken from https://github.com/ShawnLYU/Quantile_Normalize diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_display_mapping_results.py b/wqflask/tests/unit/wqflask/marker_regression/test_display_mapping_results.py index 219a6a29..f4869c45 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_display_mapping_results.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_display_mapping_results.py @@ -39,7 +39,8 @@ class TestHtmlGenWrapper(unittest.TestCase): cgi="/testing/", enctype='multipart/form-data', name="formName", - submit=HtmlGenWrapper.create_input_tag(type_='hidden', name='Default_Name') + submit=HtmlGenWrapper.create_input_tag( + type_='hidden', name='Default_Name') ) test_image = HtmlGenWrapper.create_image_tag( src="test.png", diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_gemma_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_gemma_mapping.py index f194c6c9..5cbaf0e0 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_gemma_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_gemma_mapping.py @@ -70,7 +70,8 @@ class TestGemmaMapping(unittest.TestCase): ], vals=[], covariates="", use_loco=True) self.assertEqual(mock_os.system.call_count, 2) mock_gen_pheno_txt.assert_called_once() - mock_parse_loco.assert_called_once_with(dataset, "GP1_GWA_RRRRRR", True) + mock_parse_loco.assert_called_once_with( + dataset, "GP1_GWA_RRRRRR", True) mock_os.path.isfile.assert_called_once_with( ('/home/user/imgfile_output.assoc.txt')) self.assertEqual(mock_flat_files.call_count, 4) @@ -102,7 +103,8 @@ class TestGemmaMapping(unittest.TestCase): create_trait_side_effect = [] for i in range(4): - create_dataset_side_effect.append(AttributeSetter({"name": f'name_{i}'})) + create_dataset_side_effect.append( + AttributeSetter({"name": f'name_{i}'})) create_trait_side_effect.append( AttributeSetter({"data": [f'data_{i}']})) @@ -160,9 +162,12 @@ X\tM5\t12\tQ\tE\tMMB\tR\t21.1\tW\t0.65\t0.6""" results = parse_loco_output( this_dataset={}, gwa_output_filename=".xw/") expected_results = [ - {'name': 'M1', 'chr': 'X/Y', 'Mb': 2.8457155e-05, 'p_value': 0.85, 'additive': 23.3, 'lod_score': 0.07058107428570727}, - {'name': 'M2', 'chr': 4, 'Mb': 1.2e-05, 'p_value': 0.5, 'additive': 24.0, 'lod_score': 0.3010299956639812}, - {'name': 'M4', 'chr': 'Y', 'Mb': 1.2e-05, 'p_value': 0.7, 'additive': 11.6, 'lod_score': 0.1549019599857432}, + {'name': 'M1', 'chr': 'X/Y', 'Mb': 2.8457155e-05, 'p_value': 0.85, + 'additive': 23.3, 'lod_score': 0.07058107428570727}, + {'name': 'M2', 'chr': 4, 'Mb': 1.2e-05, 'p_value': 0.5, + 'additive': 24.0, 'lod_score': 0.3010299956639812}, + {'name': 'M4', 'chr': 'Y', 'Mb': 1.2e-05, 'p_value': 0.7, + 'additive': 11.6, 'lod_score': 0.1549019599857432}, {'name': 'M5', 'chr': 'X', 'Mb': 1.2e-05, 'p_value': 0.6, 'additive': 21.1, 'lod_score': 0.22184874961635637}] self.assertEqual(expected_results, results) diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_qtlreaper_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_qtlreaper_mapping.py index 5cc8fd0f..c762982b 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_qtlreaper_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_qtlreaper_mapping.py @@ -13,10 +13,12 @@ class TestQtlReaperMapping(unittest.TestCase): samples = ["S1", "S2", "S3", "S4","S5"] trait_filename = "trait_file" with mock.patch("builtins.open", mock.mock_open())as mock_open: - gen_pheno_txt_file(samples=samples, vals=vals, trait_filename=trait_filename) + gen_pheno_txt_file(samples=samples, vals=vals, + trait_filename=trait_filename) mock_open.assert_called_once_with("/home/user/data/gn2/trait_file.txt", "w") filehandler = mock_open() - write_calls = [mock.call('Trait\t'), mock.call('S1\tS3\tS4\n'), mock.call('T1\t'), mock.call('V1\tV4\tV3')] + write_calls = [mock.call('Trait\t'), mock.call( + 'S1\tS3\tS4\n'), mock.call('T1\t'), mock.call('V1\tV4\tV3')] filehandler.write.assert_has_calls(write_calls) diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py index 6267ce9a..6996c275 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py @@ -20,7 +20,8 @@ class TestRqtlMapping(unittest.TestCase): def test_get_trait_data(self, mock_logger, mock_db): """test for getting trait data_type return True""" query_value = """SELECT value FROM TraitMetadata WHERE type='trait_data_type'""" - mock_db.db.execute.return_value.fetchone.return_value = ["""{"type":"trait_data_type","name":"T1","traid_id":"fer434f"}"""] + mock_db.db.execute.return_value.fetchone.return_value = [ + """{"type":"trait_data_type","name":"T1","traid_id":"fer434f"}"""] results = get_trait_data_type("traid_id") mock_db.db.execute.assert_called_with(query_value) self.assertEqual(results, "fer434f") diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py index a29d8cfb..78cd3be9 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py @@ -187,7 +187,8 @@ class TestRunMapping(unittest.TestCase): mock.call('Time/Date: 09/01/19 / 10:12:12\n'), mock.call('Population: Human GP1_\n'), mock.call( 'Data Set: dataser_1\n'), - mock.call('N Samples: 100\n'), mock.call('Transform - Quantile Normalized\n'), + mock.call('N Samples: 100\n'), mock.call( + 'Transform - Quantile Normalized\n'), mock.call('Gene Symbol: IGFI\n'), mock.call( 'Location: X1 @ 123313 Mb\n'), mock.call('Cofactors (dataset - trait):\n'), diff --git a/wqflask/tests/unit/wqflask/test_server_side.py b/wqflask/tests/unit/wqflask/test_server_side.py index 4f91d8ca..69977146 100644 --- a/wqflask/tests/unit/wqflask/test_server_side.py +++ b/wqflask/tests/unit/wqflask/test_server_side.py @@ -22,10 +22,13 @@ class TestServerSideTableTests(unittest.TestCase): {'first': 'c', 'second': 1, 'third': 'ss'}, ] headers = ['first', 'second', 'third'] - request_args = {'sEcho': '1', 'iSortCol_0': '1', 'iSortingCols': '1', 'sSortDir_0': 'asc', 'iDisplayStart': '0', 'iDisplayLength': '3'} + request_args = {'sEcho': '1', 'iSortCol_0': '1', 'iSortingCols': '1', + 'sSortDir_0': 'asc', 'iDisplayStart': '0', 'iDisplayLength': '3'} - test_page = ServerSideTable(rows_count, table_rows, headers, request_args).get_page() + test_page = ServerSideTable( + rows_count, table_rows, headers, request_args).get_page() self.assertEqual(test_page['sEcho'], '1') self.assertEqual(test_page['iTotalRecords'], 'nan') self.assertEqual(test_page['iTotalDisplayRecords'], '3') - self.assertEqual(test_page['data'], [{'first': 'b', 'second': 2, 'third': 'aa'}, {'first': 'c', 'second': 1, 'third': 'ss'}, {'first': 'd', 'second': 4, 'third': 'zz'}]) + self.assertEqual(test_page['data'], [{'first': 'b', 'second': 2, 'third': 'aa'}, { + 'first': 'c', 'second': 1, 'third': 'ss'}, {'first': 'd', 'second': 4, 'third': 'zz'}]) diff --git a/wqflask/tests/wqflask/show_trait/testSampleList.py b/wqflask/tests/wqflask/show_trait/testSampleList.py index 441a88a7..305586ce 100644 --- a/wqflask/tests/wqflask/show_trait/testSampleList.py +++ b/wqflask/tests/wqflask/show_trait/testSampleList.py @@ -13,4 +13,5 @@ class TestSampleList(unittest.TestCase): sorted_list_a = natural_sort(characters_list) sorted_list_b = natural_sort(names_list) self.assertEqual(sorted_list_a, ["a", "f", "g", "q", "s", "t", "z"]) - self.assertEqual(sorted_list_b, ["Dataset", "Sample", "publish", "temp1"]) + self.assertEqual( + sorted_list_b, ["Dataset", "Sample", "publish", "temp1"]) diff --git a/wqflask/tests/wqflask/show_trait/test_show_trait.py b/wqflask/tests/wqflask/show_trait/test_show_trait.py index 24c3923e..63df2ba5 100644 --- a/wqflask/tests/wqflask/show_trait/test_show_trait.py +++ b/wqflask/tests/wqflask/show_trait/test_show_trait.py @@ -72,7 +72,8 @@ class TestTraits(unittest.TestCase): mock_get.return_value = get_return_obj results = get_ncbi_summary(trait) mock_exists.assert_called_once() - mock_get.assert_called_once_with(f"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id={trait.geneid}&retmode=json") + mock_get.assert_called_once_with( + f"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id={trait.geneid}&retmode=json") self.assertEqual(results, "this is a summary of the geneid") diff --git a/wqflask/utility/Plot.py b/wqflask/utility/Plot.py index d35b2089..4f5691c1 100644 --- a/wqflask/utility/Plot.py +++ b/wqflask/utility/Plot.py @@ -178,7 +178,8 @@ def plotBar(canvas, data, barColor=BLUE, axesColor=BLACK, labelColor=BLACK, XLab # draw drawing region im_drawer.rectangle( - xy=((xLeftOffset, yTopOffset), (xLeftOffset + plotWidth, yTopOffset + plotHeight)) + xy=((xLeftOffset, yTopOffset), + (xLeftOffset + plotWidth, yTopOffset + plotHeight)) ) # draw scale @@ -199,11 +200,13 @@ def plotBar(canvas, data, barColor=BLUE, axesColor=BLACK, labelColor=BLACK, XLab y = yLow for i in range(int(stepY) + 1): yc = yTopOffset + plotHeight - (y - yLow) * yScale - im_drawer.line(xy=((xLeftOffset, yc), (xLeftOffset - 5, yc)), fill=axesColor) + im_drawer.line( + xy=((xLeftOffset, yc), (xLeftOffset - 5, yc)), fill=axesColor) strY = "%d" % y im_drawer.text( text=strY, - xy=(xLeftOffset - im_drawer.textsize(strY, font=scaleFont)[0] - 6, yc + 5), + xy=(xLeftOffset - im_drawer.textsize(strY, + font=scaleFont)[0] - 6, yc + 5), font=scaleFont) y += (yTop - yLow) / stepY diff --git a/wqflask/utility/benchmark.py b/wqflask/utility/benchmark.py index 48ab1dc0..6ece2f21 100644 --- a/wqflask/utility/benchmark.py +++ b/wqflask/utility/benchmark.py @@ -19,7 +19,8 @@ class Bench: if self.name: logger.debug("Starting benchmark: %s" % (self.name)) else: - logger.debug("Starting benchmark at: %s [%i]" % (inspect.stack()[1][3], inspect.stack()[1][2])) + logger.debug("Starting benchmark at: %s [%i]" % ( + inspect.stack()[1][3], inspect.stack()[1][2])) self.start_time = time.time() def __exit__(self, type, value, traceback): @@ -33,11 +34,13 @@ class Bench: logger.info(" %s took: %f seconds" % (name, (time_taken))) if self.name: - Bench.entries[self.name] = Bench.entries.get(self.name, 0) + time_taken + Bench.entries[self.name] = Bench.entries.get( + self.name, 0) + time_taken @classmethod def report(cls): - total_time = sum((time_taken for time_taken in list(cls.entries.values()))) + total_time = sum( + (time_taken for time_taken in list(cls.entries.values()))) print("\nTiming report\n") for name, time_taken in list(cls.entries.items()): percent = int(round((time_taken / total_time) * 100)) diff --git a/wqflask/utility/corestats.py b/wqflask/utility/corestats.py index 523280a1..da0a21db 100644 --- a/wqflask/utility/corestats.py +++ b/wqflask/utility/corestats.py @@ -65,7 +65,8 @@ class Stats: if len(self.sequence) < 1: value = None elif (percentile >= 100): - sys.stderr.write('ERROR: percentile must be < 100. you supplied: %s\n' % percentile) + sys.stderr.write( + 'ERROR: percentile must be < 100. you supplied: %s\n' % percentile) value = None else: element_idx = int(len(self.sequence) * (percentile / 100.0)) diff --git a/wqflask/utility/elasticsearch_tools.py b/wqflask/utility/elasticsearch_tools.py index 9415cef0..55907dd5 100644 --- a/wqflask/utility/elasticsearch_tools.py +++ b/wqflask/utility/elasticsearch_tools.py @@ -49,7 +49,8 @@ from utility.tools import ELASTICSEARCH_HOST, ELASTICSEARCH_PORT def test_elasticsearch_connection(): - es = Elasticsearch(['http://' + ELASTICSEARCH_HOST + ":" + str(ELASTICSEARCH_PORT) + '/'], verify_certs=True) + es = Elasticsearch(['http://' + ELASTICSEARCH_HOST + \ + ":" + str(ELASTICSEARCH_PORT) + '/'], verify_certs=True) if not es.ping(): logger.warning("Elasticsearch is DOWN") @@ -88,7 +89,8 @@ def setup_users_index(es_connection): "type": "keyword"}}} es_connection.indices.create(index='users', ignore=400) - es_connection.indices.put_mapping(body=index_settings, index="users", doc_type="local") + es_connection.indices.put_mapping( + body=index_settings, index="users", doc_type="local") def get_user_by_unique_column(es, column_name, column_value, index="users", doc_type="local"): diff --git a/wqflask/utility/gen_geno_ob.py b/wqflask/utility/gen_geno_ob.py index 24604e58..e619b7b6 100644 --- a/wqflask/utility/gen_geno_ob.py +++ b/wqflask/utility/gen_geno_ob.py @@ -38,13 +38,15 @@ class genotype: def read_rdata_output(self, qtl_results): # ZS: This is necessary because R/qtl requires centimorgan marker positions, which it normally gets from the .geno file, but that doesn't exist for HET3-ITP (which only has RData), so it needs to read in the marker cM positions from the results - self.chromosomes = [] # ZS: Overwriting since the .geno file's contents are just placeholders + # ZS: Overwriting since the .geno file's contents are just placeholders + self.chromosomes = [] this_chr = "" # ZS: This is so it can track when the chromosome changes as it iterates through markers chr_ob = None for marker in qtl_results: locus = Locus(self) - if (str(marker['chr']) != this_chr) and this_chr != "X": # ZS: This is really awkward but works as a temporary fix + # ZS: This is really awkward but works as a temporary fix + if (str(marker['chr']) != this_chr) and this_chr != "X": if this_chr != "": self.chromosomes.append(chr_ob) this_chr = str(marker['chr']) @@ -156,9 +158,11 @@ class Locus: try: self.cM = float(marker_row[geno_ob.cm_column]) except: - self.cM = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else 0 + self.cM = float( + marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else 0 try: - self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None + self.Mb = float( + marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None except: self.Mb = self.cM diff --git a/wqflask/utility/genofile_parser.py b/wqflask/utility/genofile_parser.py index 94a08c17..09100bd9 100644 --- a/wqflask/utility/genofile_parser.py +++ b/wqflask/utility/genofile_parser.py @@ -92,7 +92,8 @@ class ConvertGenoFile: genotypes = row_items[2:] for item_count, genotype in enumerate(genotypes): if genotype.upper().strip() in self.configurations: - this_marker.genotypes.append(self.configurations[genotype.upper().strip()]) + this_marker.genotypes.append( + self.configurations[genotype.upper().strip()]) else: print("WARNING:", genotype.upper()) this_marker.genotypes.append("NA") diff --git a/wqflask/utility/logger.py b/wqflask/utility/logger.py index 47079818..d706e32a 100644 --- a/wqflask/utility/logger.py +++ b/wqflask/utility/logger.py @@ -151,5 +151,6 @@ def getLogger(name, level=None): else: logger.setLevel(LOG_LEVEL) - logger.info("Log level of " + name + " set to " + logging.getLevelName(logger.getEffectiveLevel())) + logger.info("Log level of " + name + " set to " + \ + logging.getLevelName(logger.getEffectiveLevel())) return gnlogger diff --git a/wqflask/utility/redis_tools.py b/wqflask/utility/redis_tools.py index 8052035f..96a4be12 100644 --- a/wqflask/utility/redis_tools.py +++ b/wqflask/utility/redis_tools.py @@ -133,8 +133,10 @@ def get_user_groups(user_id): for key in groups_list: try: group_ob = json.loads(groups_list[key]) - group_admins = set([this_admin.encode('utf-8') if this_admin else None for this_admin in group_ob['admins']]) - group_members = set([this_member.encode('utf-8') if this_member else None for this_member in group_ob['members']]) + group_admins = set([this_admin.encode( + 'utf-8') if this_admin else None for this_admin in group_ob['admins']]) + group_members = set([this_member.encode( + 'utf-8') if this_member else None for this_member in group_ob['members']]) if user_id in group_admins: admin_group_ids.append(group_ob['id']) elif user_id in group_members: @@ -203,7 +205,8 @@ def get_groups_like_unique_column(column_name, column_value): if column_value in group_info[column_name]: matched_groups.append(group_info) else: - matched_groups.append(load_json_from_redis(group_list, column_value)) + matched_groups.append( + load_json_from_redis(group_list, column_value)) return matched_groups diff --git a/wqflask/utility/startup_config.py b/wqflask/utility/startup_config.py index 92f944bc..05f8a2b0 100644 --- a/wqflask/utility/startup_config.py +++ b/wqflask/utility/startup_config.py @@ -28,7 +28,8 @@ def app_config(): port = get_setting_int("SERVER_PORT") if get_setting_bool("USE_GN_SERVER"): - print(("GN2 API server URL is [" + BLUE + get_setting("GN_SERVER_URL") + ENDC + "]")) + print( + ("GN2 API server URL is [" + BLUE + get_setting("GN_SERVER_URL") + ENDC + "]")) import requests page = requests.get(get_setting("GN_SERVER_URL")) if page.status_code != 200: @@ -37,4 +38,5 @@ def app_config(): # import utility.elasticsearch_tools as es # es.test_elasticsearch_connection() - print(("GN2 is running. Visit %s[http://localhost:%s/%s](%s)" % (BLUE, str(port), ENDC, get_setting("WEBSERVER_URL")))) + print(("GN2 is running. Visit %s[http://localhost:%s/%s](%s)" % + (BLUE, str(port), ENDC, get_setting("WEBSERVER_URL")))) diff --git a/wqflask/utility/svg.py b/wqflask/utility/svg.py index f5ef81e1..8d2e13ab 100644 --- a/wqflask/utility/svg.py +++ b/wqflask/utility/svg.py @@ -239,19 +239,23 @@ class pathdata: def smbezier(self, x2, y2, x, y): """smooth bezier with xy2 to xy absolut""" - self.path.append('S' + str(x2) + ',' + str(y2) + ' ' + str(x) + ',' + str(y)) + self.path.append('S' + str(x2) + ',' + str(y2) + \ + ' ' + str(x) + ',' + str(y)) def relsmbezier(self, x2, y2, x, y): """smooth bezier with xy2 to xy relative""" - self.path.append('s' + str(x2) + ',' + str(y2) + ' ' + str(x) + ',' + str(y)) + self.path.append('s' + str(x2) + ',' + str(y2) + \ + ' ' + str(x) + ',' + str(y)) def qbezier(self, x1, y1, x, y): """quadratic bezier with xy1 to xy absolut""" - self.path.append('Q' + str(x1) + ',' + str(y1) + ' ' + str(x) + ',' + str(y)) + self.path.append('Q' + str(x1) + ',' + str(y1) + \ + ' ' + str(x) + ',' + str(y)) def relqbezier(self, x1, y1, x, y): """quadratic bezier with xy1 to xy relative""" - self.path.append('q' + str(x1) + ',' + str(y1) + ' ' + str(x) + ',' + str(y)) + self.path.append('q' + str(x1) + ',' + str(y1) + \ + ' ' + str(x) + ',' + str(y)) def smqbezier(self, x, y): """smooth quadratic bezier to xy absolut""" @@ -447,7 +451,8 @@ class rect(SVGelement): if width == None or height == None: raise ValueError('both height and width are required') - SVGelement.__init__(self, 'rect', {'width': width, 'height': height}, **args) + SVGelement.__init__( + self, 'rect', {'width': width, 'height': height}, **args) if x != None: self.attributes['x'] = x if y != None: @@ -545,7 +550,8 @@ class polyline(SVGelement): """ def __init__(self, points, fill=None, stroke=None, stroke_width=None,**args): - SVGelement.__init__(self, 'polyline', {'points': _xypointlist(points)}, **args) + SVGelement.__init__(self, 'polyline', { + 'points': _xypointlist(points)}, **args) if fill != None: self.attributes['fill'] = fill if stroke_width != None: @@ -561,7 +567,8 @@ class polygon(SVGelement): """ def __init__(self, points, fill=None, stroke=None, stroke_width=None,**args): - SVGelement.__init__(self, 'polygon', {'points': _xypointlist(points)}, **args) + SVGelement.__init__( + self, 'polygon', {'points': _xypointlist(points)}, **args) if fill != None: self.attributes['fill'] = fill if stroke_width != None: @@ -745,7 +752,8 @@ class image(SVGelement): def __init__(self, url, x=None, y=None, width=None,height=None,**args): if width == None or height == None: raise ValueError('both height and width are required') - SVGelement.__init__(self, 'image', {'xlink:href': url, 'width': width, 'height':height}, **args) + SVGelement.__init__( + self, 'image', {'xlink:href': url, 'width': width, 'height':height}, **args) if x != None: self.attributes['x'] = x if y != None: @@ -886,7 +894,8 @@ class script(SVGelement): """ def __init__(self, type, cdata=None, **args): - SVGelement.__init__(self, 'script', {'type': type}, cdata=cdata, **args) + SVGelement.__init__( + self, 'script', {'type': type}, cdata=cdata, **args) class animate(SVGelement): @@ -896,7 +905,8 @@ class animate(SVGelement): """ def __init__(self, attribute, fr=None, to=None, dur=None,**args): - SVGelement.__init__(self, 'animate', {'attributeName': attribute}, **args) + SVGelement.__init__( + self, 'animate', {'attributeName': attribute}, **args) if fr != None: self.attributes['from'] = fr if to != None: @@ -926,7 +936,8 @@ class animateTransform(SVGelement): """ def __init__(self, type=None, fr=None, to=None, dur=None,**args): - SVGelement.__init__(self, 'animateTransform', {'attributeName': 'transform'}, **args) + SVGelement.__init__(self, 'animateTransform', { + 'attributeName': 'transform'}, **args) # As far as I know the attributeName is always transform if type != None: self.attributes['type'] = type @@ -945,7 +956,8 @@ class animateColor(SVGelement): """ def __init__(self, attribute, type=None, fr=None, to=None,dur=None,**args): - SVGelement.__init__(self, 'animateColor', {'attributeName': attribute}, **args) + SVGelement.__init__(self, 'animateColor', { + 'attributeName': attribute}, **args) if type != None: self.attributes['type'] = type if fr != None: @@ -1020,11 +1032,13 @@ class drawing: import io xml = io.StringIO() xml.write("<?xml version='1.0' encoding='UTF-8'?>\n") - xml.write("<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.0//EN\" \"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd\"") + xml.write( + "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.0//EN\" \"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd\"") if self.entity: xml.write(" [\n") for item in list(self.entity.keys()): - xml.write("<!ENTITY %s \"%s\">\n" % (item, self.entity[item])) + xml.write("<!ENTITY %s \"%s\">\n" % + (item, self.entity[item])) xml.write("]") xml.write(">\n") self.svg.toXml(0, xml) @@ -1042,7 +1056,8 @@ class drawing: else: if filename[-4:] == 'svgz': import gzip - f = gzip.GzipFile(filename=filename, mode="wb", compresslevel=9) + f = gzip.GzipFile(filename=filename, + mode="wb", compresslevel=9) f.write(xml.getvalue()) f.close() else: @@ -1057,7 +1072,8 @@ class drawing: writes a svg drawing to the screen or to a file compresses if filename ends with svgz or if compress is true """ - doctype = implementation.createDocumentType('svg', "-//W3C//DTD SVG 1.0//EN""", 'http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd ') + doctype = implementation.createDocumentType( + 'svg', "-//W3C//DTD SVG 1.0//EN""", 'http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd ') global root # root is defined global so it can be used by the appender. Its also possible to use it as an arugument but @@ -1076,8 +1092,10 @@ class drawing: if element.text: textnode = root.createTextNode(element.text) e.appendChild(textnode) - for attribute in list(element.attributes.keys()): # in element.attributes is supported from python 2.2 - e.setAttribute(attribute, str(element.attributes[attribute])) + # in element.attributes is supported from python 2.2 + for attribute in list(element.attributes.keys()): + e.setAttribute(attribute, str( + element.attributes[attribute])) if element.elements: for el in element.elements: e = appender(el, e) @@ -1105,7 +1123,8 @@ class drawing: import io xml = io.StringIO() PrettyPrint(root, xml) - f = gzip.GzipFile(filename=filename, mode='wb', compresslevel=9) + f = gzip.GzipFile(filename=filename, + mode='wb', compresslevel=9) f.write(xml.getvalue()) f.close() else: @@ -1119,7 +1138,8 @@ class drawing: try: import xml.parsers.xmlproc.xmlval except: - raise exceptions.ImportError('PyXml is required for validating SVG') + raise exceptions.ImportError( + 'PyXml is required for validating SVG') svg = self.toXml() xv = xml.parsers.xmlproc.xmlval.XMLValidator() try: diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py index 4fe4db08..263c3948 100644 --- a/wqflask/utility/tools.py +++ b/wqflask/utility/tools.py @@ -64,7 +64,8 @@ def get_setting(command_id, guess=None): command = value(guess) if command is None or command == "": # print command - raise Exception(command_id + ' setting unknown or faulty (update default_settings.py?).') + raise Exception( + command_id + ' setting unknown or faulty (update default_settings.py?).') # print("Set "+command_id+"="+str(command)) return command @@ -113,7 +114,8 @@ def js_path(module=None): try_guix = get_setting("JS_GUIX_PATH") + "/" + module if valid_path(try_guix): return try_guix - raise "No JS path found for " + module + " (if not in Guix check JS_GN_PATH)" + raise "No JS path found for " + module + \ + " (if not in Guix check JS_GN_PATH)" def reaper_command(guess=None): @@ -292,7 +294,8 @@ ORCID_CLIENT_SECRET = get_setting('ORCID_CLIENT_SECRET') ORCID_AUTH_URL = None if ORCID_CLIENT_ID != 'UNKNOWN' and ORCID_CLIENT_SECRET: ORCID_AUTH_URL = "https://orcid.org/oauth/authorize?response_type=code&scope=/authenticate&show_login=true&client_id=" + \ - ORCID_CLIENT_ID + "&client_secret=" + ORCID_CLIENT_SECRET + "&redirect_uri=" + GN2_BRANCH_URL + "n/login/orcid_oauth2" + ORCID_CLIENT_ID + "&client_secret=" + ORCID_CLIENT_SECRET + \ + "&redirect_uri=" + GN2_BRANCH_URL + "n/login/orcid_oauth2" ORCID_TOKEN_URL = get_setting('ORCID_TOKEN_URL') ELASTICSEARCH_HOST = get_setting('ELASTICSEARCH_HOST') @@ -320,7 +323,8 @@ assert_dir(JS_GUIX_PATH + '/cytoscape-panzoom') CSS_PATH = JS_GUIX_PATH # The CSS is bundled together with the JS # assert_dir(JS_PATH) -JS_TWITTER_POST_FETCHER_PATH = get_setting("JS_TWITTER_POST_FETCHER_PATH", js_path("javascript-twitter-post-fetcher")) +JS_TWITTER_POST_FETCHER_PATH = get_setting( + "JS_TWITTER_POST_FETCHER_PATH", js_path("javascript-twitter-post-fetcher")) assert_dir(JS_TWITTER_POST_FETCHER_PATH) assert_file(JS_TWITTER_POST_FETCHER_PATH + "/js/twitterFetcher_min.js") diff --git a/wqflask/wqflask/api/correlation.py b/wqflask/wqflask/api/correlation.py index f1dd148f..52026a82 100644 --- a/wqflask/wqflask/api/correlation.py +++ b/wqflask/wqflask/api/correlation.py @@ -25,13 +25,16 @@ def do_correlation(start_vars): assert('trait_id' in start_vars) this_dataset = data_set.create_dataset(dataset_name=start_vars['db']) - target_dataset = data_set.create_dataset(dataset_name=start_vars['target_db']) - this_trait = create_trait(dataset=this_dataset, name=start_vars['trait_id']) + target_dataset = data_set.create_dataset( + dataset_name=start_vars['target_db']) + this_trait = create_trait(dataset=this_dataset, + name=start_vars['trait_id']) this_trait = retrieve_sample_data(this_trait, this_dataset) corr_params = init_corr_params(start_vars) - corr_results = calculate_results(this_trait, this_dataset, target_dataset, corr_params) + corr_results = calculate_results( + this_trait, this_dataset, target_dataset, corr_params) #corr_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0]))) final_results = [] @@ -75,54 +78,66 @@ def calculate_results(this_trait, this_dataset, target_dataset, corr_params): if corr_params['type'] == "tissue": trait_symbol_dict = this_dataset.retrieve_genes("Symbol") - corr_results = do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params) + corr_results = do_tissue_correlation_for_all_traits( + this_trait, trait_symbol_dict, corr_params) sorted_results = collections.OrderedDict(sorted(list(corr_results.items()), key=lambda t: -abs(t[1][1]))) - elif corr_params['type'] == "literature" or corr_params['type'] == "lit": # ZS: Just so a user can use either "lit" or "literature" + # ZS: Just so a user can use either "lit" or "literature" + elif corr_params['type'] == "literature" or corr_params['type'] == "lit": trait_geneid_dict = this_dataset.retrieve_genes("GeneId") - corr_results = do_literature_correlation_for_all_traits(this_trait, this_dataset, trait_geneid_dict, corr_params) + corr_results = do_literature_correlation_for_all_traits( + this_trait, this_dataset, trait_geneid_dict, corr_params) sorted_results = collections.OrderedDict(sorted(list(corr_results.items()), key=lambda t: -abs(t[1][1]))) else: for target_trait, target_vals in list(target_dataset.trait_data.items()): - result = get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, corr_params['type']) + result = get_sample_r_and_p_values( + this_trait, this_dataset, target_vals, target_dataset, corr_params['type']) if result is not None: corr_results[target_trait] = result - sorted_results = collections.OrderedDict(sorted(list(corr_results.items()), key=lambda t: -abs(t[1][0]))) + sorted_results = collections.OrderedDict( + sorted(list(corr_results.items()), key=lambda t: -abs(t[1][0]))) return sorted_results def do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params, tissue_dataset_id=1): # Gets tissue expression values for the primary trait - primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list=[this_trait.symbol]) + primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=[this_trait.symbol]) if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower()] + primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower( + )] - corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list=list(trait_symbol_dict.values())) + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=list(trait_symbol_dict.values())) tissue_corr_data = {} for trait, symbol in list(trait_symbol_dict.items()): if symbol and symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] + this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower( + )] result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, this_trait_tissue_values, corr_params['method']) - tissue_corr_data[trait] = [result[0], result[1], result[2], symbol] + tissue_corr_data[trait] = [ + result[0], result[1], result[2], symbol] return tissue_corr_data def do_literature_correlation_for_all_traits(this_trait, target_dataset, trait_geneid_dict, corr_params): - input_trait_mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), this_trait.geneid) + input_trait_mouse_gene_id = convert_to_mouse_gene_id( + target_dataset.group.species.lower(), this_trait.geneid) lit_corr_data = {} for trait, gene_id in list(trait_geneid_dict.items()): - mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), gene_id) + mouse_gene_id = convert_to_mouse_gene_id( + target_dataset.group.species.lower(), gene_id) if mouse_gene_id and str(mouse_gene_id).find(";") == -1: result = g.db.execute( @@ -168,12 +183,15 @@ def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_data this_trait_vals.append(this_sample_value) shared_target_vals.append(target_sample_value) - this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, shared_target_vals) + this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values( + this_trait_vals, shared_target_vals) if type == 'pearson': - sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, shared_target_vals) + sample_r, sample_p = scipy.stats.pearsonr( + this_trait_vals, shared_target_vals) else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, shared_target_vals) + sample_r, sample_p = scipy.stats.spearmanr( + this_trait_vals, shared_target_vals) if num_overlap > 5: if scipy.isnan(sample_r): diff --git a/wqflask/wqflask/api/mapping.py b/wqflask/wqflask/api/mapping.py index 662090d5..c22b44a9 100644 --- a/wqflask/wqflask/api/mapping.py +++ b/wqflask/wqflask/api/mapping.py @@ -37,20 +37,25 @@ def do_mapping_for_api(start_vars): mapping_params = initialize_parameters(start_vars, dataset, this_trait) - covariates = "" # ZS: It seems to take an empty string as default. This should probably be changed. + # ZS: It seems to take an empty string as default. This should probably be changed. + covariates = "" if mapping_params['mapping_method'] == "gemma": header_row = ["name", "chr", "Mb", "lod_score", "p_value"] - if mapping_params['use_loco'] == "True": # ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api - result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0] + # ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api + if mapping_params['use_loco'] == "True": + result_markers = gemma_mapping.run_gemma( + this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0] else: - result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf']) + result_markers = gemma_mapping.run_gemma( + this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf']) elif mapping_params['mapping_method'] == "rqtl": header_row = ["name", "chr", "cM", "lod_score"] if mapping_params['num_perm'] > 0: _sperm_output, _suggestive, _significant, result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], mapping_params['perm_check'], mapping_params['num_perm'], - mapping_params['do_control'], mapping_params['control_marker'], + mapping_params['do_control'], mapping_params[ + 'control_marker'], mapping_params['manhattan_plot'], mapping_params['pair_scan']) else: result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], diff --git a/wqflask/wqflask/api/router.py b/wqflask/wqflask/api/router.py index 4f9cc6e5..e7dfa4e0 100644 --- a/wqflask/wqflask/api/router.py +++ b/wqflask/wqflask/api/router.py @@ -35,7 +35,8 @@ def hello_world(): @app.route("/api/v_{}/species".format(version)) def get_species_list(): - results = g.db.execute("SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species;") + results = g.db.execute( + "SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species;") the_species = results.fetchall() species_list = [] for species in the_species: @@ -313,7 +314,8 @@ def get_dataset_info(dataset_name, group_name=None, file_format="json"): @app.route("/api/v_{}/traits/<path:dataset_name>".format(version), methods=("GET",)) @app.route("/api/v_{}/traits/<path:dataset_name>.<path:file_format>".format(version), methods=("GET",)) def fetch_traits(dataset_name, file_format="json"): - trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name, request.args) + trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids( + dataset_name, request.args) if ("ids_only" in request.args) and (len(trait_ids) > 0): if file_format == "json": filename = dataset_name + "_trait_ids.json" @@ -361,7 +363,8 @@ def fetch_traits(dataset_name, file_format="json"): ProbeSet.Id """ - field_list = ["Id", "Name", "Symbol", "Description", "Chr", "Mb", "Aliases", "Mean", "SE", "Locus", "LRS", "P-Value", "Additive", "h2"] + field_list = ["Id", "Name", "Symbol", "Description", "Chr", "Mb", + "Aliases", "Mean", "SE", "Locus", "LRS", "P-Value", "Additive", "h2"] elif data_type == "Geno": query = """ SELECT @@ -378,7 +381,8 @@ def fetch_traits(dataset_name, file_format="json"): Geno.Id """ - field_list = ["Id", "Name", "Marker_Name", "Chr", "Mb", "Sequence", "Source"] + field_list = ["Id", "Name", "Marker_Name", + "Chr", "Mb", "Sequence", "Source"] else: query = """ SELECT @@ -394,7 +398,8 @@ def fetch_traits(dataset_name, file_format="json"): PublishXRef.Id """ - field_list = ["Id", "PhenotypeId", "PublicationId", "Locus", "LRS", "Additive", "Sequence"] + field_list = ["Id", "PhenotypeId", "PublicationId", + "Locus", "LRS", "Additive", "Sequence"] if 'limit_to' in request.args: limit_number = request.args['limit_to'] @@ -442,7 +447,8 @@ def fetch_traits(dataset_name, file_format="json"): @app.route("/api/v_{}/sample_data/<path:dataset_name>".format(version)) @app.route("/api/v_{}/sample_data/<path:dataset_name>.<path:file_format>".format(version)) def all_sample_data(dataset_name, file_format="csv"): - trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name, request.args) + trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids( + dataset_name, request.args) if len(trait_ids) > 0: sample_list = get_samplelist(dataset_name) @@ -676,7 +682,8 @@ def get_trait_info(dataset_name, trait_name, file_format="json"): return flask.jsonify(trait_dict) else: - if "Publish" in dataset_name: # ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish" + # ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish" + if "Publish" in dataset_name: dataset_name = dataset_name.replace("Publish", "") group_id = get_group_id(dataset_name) @@ -711,7 +718,8 @@ def get_corr_results(): results = correlation.do_correlation(request.args) if len(results) > 0: - return flask.jsonify(results) # ZS: I think flask.jsonify expects a dict/list instead of JSON + # ZS: I think flask.jsonify expects a dict/list instead of JSON + return flask.jsonify(results) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") @@ -768,7 +776,8 @@ def get_genotypes(group_name, file_format="csv", dataset_name=None): output_lines.append(line.split()) i += 1 - csv_writer = csv.writer(si, delimiter="\t", escapechar="\\", quoting = csv.QUOTE_NONE) + csv_writer = csv.writer( + si, delimiter="\t", escapechar="\\", quoting = csv.QUOTE_NONE) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") elif file_format == "rqtl2": @@ -779,18 +788,23 @@ def get_genotypes(group_name, file_format="csv", dataset_name=None): filename = group_name if os.path.isfile("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name)): - yaml_file = json.load(open("{0}/{1}.json".format(flat_files("genotype/rqtl2"), group_name))) + yaml_file = json.load( + open("{0}/{1}.json".format(flat_files("genotype/rqtl2"), group_name))) yaml_file["geno"] = filename + "_geno.csv" yaml_file["gmap"] = filename + "_gmap.csv" yaml_file["pheno"] = filename + "_pheno.csv" config_file = [filename + ".json", json.dumps(yaml_file)] #config_file = [filename + ".yaml", open("{0}/{1}.yaml".format(flat_files("genotype/rqtl2"), group_name))] - geno_file = [filename + "_geno.csv", open("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name))] - gmap_file = [filename + "_gmap.csv", open("{0}/{1}_gmap.csv".format(flat_files("genotype/rqtl2"), group_name))] + geno_file = [filename + "_geno.csv", + open("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name))] + gmap_file = [filename + "_gmap.csv", + open("{0}/{1}_gmap.csv".format(flat_files("genotype/rqtl2"), group_name))] if dataset_name: - phenotypes = requests.get("http://gn2.genenetwork.org/api/v_pre1/sample_data/" + dataset_name) + phenotypes = requests.get( + "http://gn2.genenetwork.org/api/v_pre1/sample_data/" + dataset_name) else: - phenotypes = requests.get("http://gn2.genenetwork.org/api/v_pre1/sample_data/" + group_name + "Publish") + phenotypes = requests.get( + "http://gn2.genenetwork.org/api/v_pre1/sample_data/" + group_name + "Publish") with ZipFile(memory_file, 'w', compression=ZIP_DEFLATED) as zf: zf.writestr(config_file[0], config_file[1]) @@ -813,7 +827,8 @@ def get_genotypes(group_name, file_format="csv", dataset_name=None): for line in genofile: if limit_num and i >= limit_num: break - output_lines.append([line.strip() for line in line.split(",")]) + output_lines.append([line.strip() + for line in line.split(",")]) i += 1 csv_writer = csv.writer(si, delimiter=",") @@ -898,7 +913,8 @@ def get_dataset_trait_ids(dataset_name, start_vars): results = g.db.execute(query).fetchall() trait_ids = [result[0] for result in results] - trait_names = [str(result[2]) + "_" + str(result[1]) for result in results] + trait_names = [str(result[2]) + "_" + str(result[1]) + for result in results] return trait_ids, trait_names, data_type, dataset_id diff --git a/wqflask/wqflask/collect.py b/wqflask/wqflask/collect.py index 61f73106..1fcf15f0 100644 --- a/wqflask/wqflask/collect.py +++ b/wqflask/wqflask/collect.py @@ -158,7 +158,8 @@ def remove_traits(): traits_to_remove = params['trait_list'] traits_to_remove = process_traits(traits_to_remove) - members_now = g.user_session.remove_traits_from_collection(uc_id, traits_to_remove) + members_now = g.user_session.remove_traits_from_collection( + uc_id, traits_to_remove) return redirect(url_for("view_collection", uc_id=uc_id)) @@ -181,7 +182,8 @@ def delete_collection(): else: flash("We've deleted the selected collection.", "alert-info") else: - flash("We've deleted the collection: {}.".format(collection_name), "alert-info") + flash("We've deleted the collection: {}.".format( + collection_name), "alert-info") return redirect(url_for('list_collections')) @@ -191,7 +193,8 @@ def view_collection(): params = request.args uc_id = params['uc_id'] - uc = next((collection for collection in g.user_session.user_collections if collection["id"] == uc_id)) + uc = next( + (collection for collection in g.user_session.user_collections if collection["id"] == uc_id)) traits = uc["members"] trait_obs = [] @@ -203,12 +206,14 @@ def view_collection(): name, dataset_name = atrait.split(':') if dataset_name == "Temp": group = name.split("_")[2] - dataset = create_dataset(dataset_name, dataset_type="Temp", group_name=group) + dataset = create_dataset( + dataset_name, dataset_type="Temp", group_name=group) trait_ob = create_trait(name=name, dataset=dataset) else: dataset = create_dataset(dataset_name) trait_ob = create_trait(name=name, dataset=dataset) - trait_ob = retrieve_trait_info(trait_ob, dataset, get_qtl_info=True) + trait_ob = retrieve_trait_info( + trait_ob, dataset, get_qtl_info=True) trait_obs.append(trait_ob) json_version.append(jsonable(trait_ob)) diff --git a/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py b/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py index c135faa3..0fabb833 100644 --- a/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py +++ b/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py @@ -34,14 +34,16 @@ from flask import Flask, g class ComparisonBarChart: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.all_sample_list = [] self.traits = [] self.insufficient_shared_samples = False - this_group = self.trait_list[0][1].group.name # ZS: Getting initial group name before verifying all traits are in the same group in the following loop + # ZS: Getting initial group name before verifying all traits are in the same group in the following loop + this_group = self.trait_list[0][1].group.name for trait_db in self.trait_list: if trait_db[1].group.name != this_group: diff --git a/wqflask/wqflask/correlation/corr_scatter_plot.py b/wqflask/wqflask/correlation/corr_scatter_plot.py index 6afea715..f9a0ea11 100644 --- a/wqflask/wqflask/correlation/corr_scatter_plot.py +++ b/wqflask/wqflask/correlation/corr_scatter_plot.py @@ -17,17 +17,21 @@ class CorrScatterPlot: def __init__(self, params): if "Temp" in params['dataset_1']: - self.dataset_1 = data_set.create_dataset(dataset_name="Temp", dataset_type="Temp", group_name = params['dataset_1'].split("_")[1]) + self.dataset_1 = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name = params['dataset_1'].split("_")[1]) else: self.dataset_1 = data_set.create_dataset(params['dataset_1']) if "Temp" in params['dataset_2']: - self.dataset_2 = data_set.create_dataset(dataset_name="Temp", dataset_type="Temp", group_name = params['dataset_2'].split("_")[1]) + self.dataset_2 = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name = params['dataset_2'].split("_")[1]) else: self.dataset_2 = data_set.create_dataset(params['dataset_2']) #self.dataset_3 = data_set.create_dataset(params['dataset_3']) - self.trait_1 = create_trait(name=params['trait_1'], dataset=self.dataset_1) - self.trait_2 = create_trait(name=params['trait_2'], dataset=self.dataset_2) + self.trait_1 = create_trait( + name=params['trait_1'], dataset=self.dataset_1) + self.trait_2 = create_trait( + name=params['trait_2'], dataset=self.dataset_2) #self.trait_3 = create_trait(name=params['trait_3'], dataset=self.dataset_3) self.method = params['method'] @@ -38,10 +42,13 @@ class CorrScatterPlot: if self.dataset_1.group.f1list != None: primary_samples += self.dataset_1.group.f1list - self.trait_1 = retrieve_sample_data(self.trait_1, self.dataset_1, primary_samples) - self.trait_2 = retrieve_sample_data(self.trait_2, self.dataset_2, primary_samples) + self.trait_1 = retrieve_sample_data( + self.trait_1, self.dataset_1, primary_samples) + self.trait_2 = retrieve_sample_data( + self.trait_2, self.dataset_2, primary_samples) - samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples(self.trait_1.data, self.trait_2.data) + samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples( + self.trait_1.data, self.trait_2.data) self.data = [] self.indIDs = list(samples_1.keys()) @@ -54,7 +61,8 @@ class CorrScatterPlot: vals_2.append(samples_2[sample].value) self.data.append(vals_2) - slope, intercept, r_value, p_value, std_err = stats.linregress(vals_1, vals_2) + slope, intercept, r_value, p_value, std_err = stats.linregress( + vals_1, vals_2) if slope < 0.001: slope_string = '%.3E' % slope @@ -67,14 +75,16 @@ class CorrScatterPlot: x_range = [min(vals_1) - x_buffer, max(vals_1) + x_buffer] y_range = [min(vals_2) - y_buffer, max(vals_2) + y_buffer] - intercept_coords = get_intercept_coords(slope, intercept, x_range, y_range) + intercept_coords = get_intercept_coords( + slope, intercept, x_range, y_range) rx = stats.rankdata(vals_1) ry = stats.rankdata(vals_2) self.rdata = [] self.rdata.append(rx.tolist()) self.rdata.append(ry.tolist()) - srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress(rx, ry) + srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress( + rx, ry) if srslope < 0.001: srslope_string = '%.3E' % srslope @@ -86,7 +96,8 @@ class CorrScatterPlot: sr_range = [min(rx) - x_buffer, max(rx) + x_buffer] - sr_intercept_coords = get_intercept_coords(srslope, srintercept, sr_range, sr_range) + sr_intercept_coords = get_intercept_coords( + srslope, srintercept, sr_range, sr_range) self.collections_exist = "False" if g.user_session.num_collections > 0: diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index e2fe1ff4..e75c4a85 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -78,7 +78,8 @@ class CorrelationResults: with Bench("Doing correlations"): if start_vars['dataset'] == "Temp": - self.dataset = data_set.create_dataset(dataset_name="Temp", dataset_type="Temp", group_name = start_vars['group']) + self.dataset = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name = start_vars['group']) self.trait_id = start_vars['trait_id'] self.this_trait = create_trait(dataset=self.dataset, name=self.trait_id, @@ -129,12 +130,15 @@ class CorrelationResults: if corr_samples_group == 'samples_other': primary_samples = [x for x in primary_samples if x not in ( self.dataset.group.parlist + self.dataset.group.f1list)] - self.process_samples(start_vars, list(self.this_trait.data.keys()), primary_samples) + self.process_samples(start_vars, list( + self.this_trait.data.keys()), primary_samples) - self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) + self.target_dataset = data_set.create_dataset( + start_vars['corr_dataset']) self.target_dataset.get_trait_data(list(self.sample_data.keys())) - self.header_fields = get_header_fields(self.target_dataset.type, self.corr_method) + self.header_fields = get_header_fields( + self.target_dataset.type, self.corr_method) if self.target_dataset.type == "ProbeSet": self.filter_cols = [7, 6] @@ -153,7 +157,8 @@ class CorrelationResults: tissue_corr_data = self.do_tissue_correlation_for_all_traits() if tissue_corr_data != None: for trait in list(tissue_corr_data.keys())[:self.return_number]: - self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) + self.get_sample_r_and_p_values( + trait, self.target_dataset.trait_data[trait]) else: for trait, values in list(self.target_dataset.trait_data.items()): self.get_sample_r_and_p_values(trait, values) @@ -163,7 +168,8 @@ class CorrelationResults: lit_corr_data = self.do_lit_correlation_for_all_traits() for trait in list(lit_corr_data.keys())[:self.return_number]: - self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) + self.get_sample_r_and_p_values( + trait, self.target_dataset.trait_data[trait]) elif self.corr_type == "sample": for trait, values in list(self.target_dataset.trait_data.items()): @@ -180,7 +186,8 @@ class CorrelationResults: range_chr_as_int = order_id for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]): - trait_object = create_trait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) + trait_object = create_trait( + dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) if not trait_object: continue @@ -235,7 +242,8 @@ class CorrelationResults: if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": self.do_tissue_correlation_for_trait_list() - self.json_results = generate_corr_json(self.correlation_results, self.this_trait, self.dataset, self.target_dataset) + self.json_results = generate_corr_json( + self.correlation_results, self.this_trait, self.dataset, self.target_dataset) ############################################################################################################################################ @@ -263,15 +271,18 @@ class CorrelationResults: symbol_list=[self.this_trait.symbol]) if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] - gene_symbol_list = [trait.symbol for trait in self.correlation_results if trait.symbol] + primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower( + )] + gene_symbol_list = [ + trait.symbol for trait in self.correlation_results if trait.symbol] corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( symbol_list=gene_symbol_list) for trait in self.correlation_results: if trait.symbol and trait.symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower()] + this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower( + )] result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, this_trait_tissue_values, @@ -286,7 +297,8 @@ class CorrelationResults: symbol_list=[self.this_trait.symbol]) if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] + primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower( + )] #print("trait_gene_symbols: ", pf(trait_gene_symbols.values())) corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( @@ -299,7 +311,8 @@ class CorrelationResults: tissue_corr_data = {} for trait, symbol in list(self.trait_symbol_dict.items()): if symbol and symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] + this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower( + )] result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, this_trait_tissue_values, @@ -314,12 +327,14 @@ class CorrelationResults: def do_lit_correlation_for_trait_list(self): - input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) + input_trait_mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), self.this_trait.geneid) for trait in self.correlation_results: if trait.geneid: - trait.mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), trait.geneid) + trait.mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), trait.geneid) else: trait.mouse_gene_id = None @@ -348,11 +363,13 @@ class CorrelationResults: trait.lit_corr = 0 def do_lit_correlation_for_all_traits(self): - input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) + input_trait_mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), self.this_trait.geneid) lit_corr_data = {} for trait, gene_id in list(self.trait_geneid_dict.items()): - mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), gene_id) + mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), gene_id) if mouse_gene_id and str(mouse_gene_id).find(";") == -1: #print("gene_symbols:", input_trait_mouse_gene_id + " / " + mouse_gene_id) @@ -438,21 +455,26 @@ class CorrelationResults: self.this_trait_vals.append(sample_value) target_vals.append(target_sample_value) - self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(self.this_trait_vals, target_vals) + self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( + self.this_trait_vals, target_vals) if num_overlap > 5: # ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/ if self.corr_method == 'bicor': - sample_r, sample_p = do_bicor(self.this_trait_vals, target_vals) + sample_r, sample_p = do_bicor( + self.this_trait_vals, target_vals) elif self.corr_method == 'pearson': - sample_r, sample_p = scipy.stats.pearsonr(self.this_trait_vals, target_vals) + sample_r, sample_p = scipy.stats.pearsonr( + self.this_trait_vals, target_vals) else: - sample_r, sample_p = scipy.stats.spearmanr(self.this_trait_vals, target_vals) + sample_r, sample_p = scipy.stats.spearmanr( + self.this_trait_vals, target_vals) if numpy.isnan(sample_r): pass else: - self.correlation_data[trait] = [sample_r, sample_p, num_overlap] + self.correlation_data[trait] = [ + sample_r, sample_p, num_overlap] def process_samples(self, start_vars, sample_names, excluded_samples=None): if not excluded_samples: @@ -478,7 +500,8 @@ def do_bicor(this_trait_vals, target_trait_vals): this_vals = ro.Vector(this_trait_vals) target_vals = ro.Vector(target_trait_vals) - the_r, the_p, _fisher_transform, _the_t, _n_obs = [numpy.asarray(x) for x in r_bicor(x=this_vals, y=target_vals)] + the_r, the_p, _fisher_transform, _the_t, _n_obs = [ + numpy.asarray(x) for x in r_bicor(x=this_vals, y=target_vals)] return the_r, the_p @@ -492,7 +515,8 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap results_dict['index'] = i + 1 results_dict['trait_id'] = trait.name results_dict['dataset'] = trait.dataset.name - results_dict['hmac'] = hmac.data_hmac('{}:{}'.format(trait.name, trait.dataset.name)) + results_dict['hmac'] = hmac.data_hmac( + '{}:{}'.format(trait.name, trait.dataset.name)) if target_dataset.type == "ProbeSet": results_dict['symbol'] = trait.symbol results_dict['description'] = "N/A" @@ -543,7 +567,8 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap if bool(trait.authors): authors_list = trait.authors.split(',') if len(authors_list) > 6: - results_dict['authors_display'] = ", ".join(authors_list[:6]) + ", et al." + results_dict['authors_display'] = ", ".join( + authors_list[:6]) + ", et al." else: results_dict['authors_display'] = trait.authors if bool(trait.pubmed_id): diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index 331cb1dc..aefb4453 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -44,7 +44,8 @@ THIRTY_DAYS = 60 * 60 * 24 * 30 class CorrelationMatrix: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) @@ -52,7 +53,8 @@ class CorrelationMatrix: self.traits = [] self.insufficient_shared_samples = False self.do_PCA = True - this_group = self.trait_list[0][1].group.name # ZS: Getting initial group name before verifying all traits are in the same group in the following loop + # ZS: Getting initial group name before verifying all traits are in the same group in the following loop + this_group = self.trait_list[0][1].group.name for trait_db in self.trait_list: this_group = trait_db[1].group.name this_trait = trait_db[0] @@ -76,10 +78,12 @@ class CorrelationMatrix: this_trait_vals.append('') self.sample_data.append(this_trait_vals) - if len(this_trait_vals) < len(self.trait_list): # Shouldn't do PCA if there are more traits than observations/samples + # Shouldn't do PCA if there are more traits than observations/samples + if len(this_trait_vals) < len(self.trait_list): self.do_PCA = False - self.lowest_overlap = 8 # ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) + # ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) + self.lowest_overlap = 8 self.corr_results = [] self.pca_corr_results = [] @@ -112,7 +116,8 @@ class CorrelationMatrix: if sample in self.shared_samples_list: self.shared_samples_list.remove(sample) - this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) + this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( + this_trait_vals, target_vals) if num_overlap < self.lowest_overlap: self.lowest_overlap = num_overlap @@ -120,21 +125,25 @@ class CorrelationMatrix: corr_result_row.append([target_trait, 0, num_overlap]) pca_corr_result_row.append(0) else: - pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) + pearson_r, pearson_p = scipy.stats.pearsonr( + this_trait_vals, target_vals) if is_spearman == False: sample_r, sample_p = pearson_r, pearson_p if sample_r == 1: is_spearman = True else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) + sample_r, sample_p = scipy.stats.spearmanr( + this_trait_vals, target_vals) - corr_result_row.append([target_trait, sample_r, num_overlap]) + corr_result_row.append( + [target_trait, sample_r, num_overlap]) pca_corr_result_row.append(pearson_r) self.corr_results.append(corr_result_row) self.pca_corr_results.append(pca_corr_result_row) - self.export_filename, self.export_filepath = export_corr_matrix(self.corr_results) + self.export_filename, self.export_filepath = export_corr_matrix( + self.corr_results) self.trait_data_array = [] for trait_db in self.trait_list: @@ -156,12 +165,14 @@ class CorrelationMatrix: try: corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) - corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen) + corr_eigen_value, corr_eigen_vectors = sortEigenVectors( + corr_result_eigen) if self.do_PCA == True: self.pca_works = "True" self.pca_trait_ids = [] - pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) + pca = self.calculate_pca( + list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) self.loadings_array = self.process_loadings() else: self.pca_works = "False" @@ -179,7 +190,8 @@ class CorrelationMatrix: base = importr('base') stats = importr('stats') - corr_results_to_list = robjects.FloatVector([item for sublist in self.pca_corr_results for item in sublist]) + corr_results_to_list = robjects.FloatVector( + [item for sublist in self.pca_corr_results for item in sublist]) m = robjects.r.matrix(corr_results_to_list, nrow=len(cols)) eigen = base.eigen(m) @@ -198,10 +210,12 @@ class CorrelationMatrix: pca_traits.append((vector * -1.0).tolist()) this_group_name = self.trait_list[0][1].group.name - temp_dataset = data_set.create_dataset(dataset_name="Temp", dataset_type="Temp", group_name = this_group_name) + temp_dataset = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name = this_group_name) temp_dataset.group.get_samplelist() for i, pca_trait in enumerate(pca_traits): - trait_id = "PCA" + str(i + 1) + "_" + temp_dataset.group.species + "_" + this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") + trait_id = "PCA" + str(i + 1) + "_" + temp_dataset.group.species + "_" + \ + this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") this_vals_string = "" position = 0 for sample in temp_dataset.group.all_samples_ordered(): @@ -235,17 +249,23 @@ class CorrelationMatrix: def export_corr_matrix(corr_results): - corr_matrix_filename = "corr_matrix_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - matrix_export_path = "{}{}.csv".format(GENERATED_TEXT_DIR, corr_matrix_filename) + corr_matrix_filename = "corr_matrix_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + matrix_export_path = "{}{}.csv".format( + GENERATED_TEXT_DIR, corr_matrix_filename) with open(matrix_export_path, "w+") as output_file: - output_file.write("Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") + output_file.write( + "Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") output_file.write("\n") output_file.write("Correlation ") for i, item in enumerate(corr_results[0]): - output_file.write("Trait" + str(i + 1) + ": " + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") + output_file.write("Trait" + str(i + 1) + ": " + \ + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") output_file.write("\n") for i, row in enumerate(corr_results): - output_file.write("Trait" + str(i + 1) + ": " + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") + output_file.write("Trait" + str(i + 1) + ": " + \ + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") for item in row: output_file.write(str(item[1]) + "\t") output_file.write("\n") @@ -254,10 +274,12 @@ def export_corr_matrix(corr_results): output_file.write("\n") output_file.write("N ") for i, item in enumerate(corr_results[0]): - output_file.write("Trait" + str(i) + ": " + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") + output_file.write("Trait" + str(i) + ": " + \ + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") output_file.write("\n") for i, row in enumerate(corr_results): - output_file.write("Trait" + str(i) + ": " + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") + output_file.write("Trait" + str(i) + ": " + \ + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") for item in row: output_file.write(str(item[2]) + "\t") output_file.write("\n") diff --git a/wqflask/wqflask/ctl/ctl_analysis.py b/wqflask/wqflask/ctl/ctl_analysis.py index ec66e59f..48a82435 100644 --- a/wqflask/wqflask/ctl/ctl_analysis.py +++ b/wqflask/wqflask/ctl/ctl_analysis.py @@ -46,13 +46,18 @@ class CTL: #log = r_file("/tmp/genenetwork_ctl.log", open = "wt") # r_sink(log) # Uncomment the r_sink() commands to log output from stdout/stderr to a file #r_sink(log, type = "message") - r_library("ctl") # Load CTL - Should only be done once, since it is quite expensive + # Load CTL - Should only be done once, since it is quite expensive + r_library("ctl") r_options(stringsAsFactors=False) logger.info("Initialization of CTL done, package loaded in R session") - self.r_CTLscan = ro.r["CTLscan"] # Map the CTLscan function - self.r_CTLsignificant = ro.r["CTLsignificant"] # Map the CTLsignificant function - self.r_lineplot = ro.r["ctl.lineplot"] # Map the ctl.lineplot function - self.r_plotCTLobject = ro.r["plot.CTLobject"] # Map the CTLsignificant function + # Map the CTLscan function + self.r_CTLscan = ro.r["CTLscan"] + # Map the CTLsignificant function + self.r_CTLsignificant = ro.r["CTLsignificant"] + # Map the ctl.lineplot function + self.r_lineplot = ro.r["ctl.lineplot"] + # Map the CTLsignificant function + self.r_plotCTLobject = ro.r["plot.CTLobject"] self.nodes_list = [] self.edges_list = [] logger.info("Obtained pointers to CTL functions") @@ -81,7 +86,8 @@ class CTL: def run_analysis(self, requestform): logger.info("Starting CTL analysis on dataset") - self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] + self.trait_db_list = [trait.strip() + for trait in requestform['trait_list'].split(',')] self.trait_db_list = [x for x in self.trait_db_list if x] logger.debug("strategy:", requestform.get("strategy")) @@ -113,9 +119,11 @@ class CTL: markers.append(marker["genotypes"]) genotypes = list(itertools.chain(*markers)) - logger.debug(len(genotypes) / len(individuals), "==", len(parser.markers)) + logger.debug(len(genotypes) / len(individuals), + "==", len(parser.markers)) - rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames=r_list(markernames, individuals), byrow=True)) + rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len( + individuals), dimnames=r_list(markernames, individuals), byrow=True)) # Create a phenotype matrix traits = [] @@ -131,7 +139,8 @@ class CTL: else: traits.append("-999") - rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames=r_list(self.trait_db_list, individuals), byrow=True)) + rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len( + individuals), dimnames=r_list(self.trait_db_list, individuals), byrow=True)) logger.debug(rPheno) @@ -144,7 +153,8 @@ class CTL: #r_write_table(rPheno, "~/outputGN/pheno.csv") # Perform the CTL scan - res = self.r_CTLscan(rGeno, rPheno, strategy=strategy, nperm=nperm, parametric = parametric, nthreads=6) + res = self.r_CTLscan(rGeno, rPheno, strategy=strategy, + nperm=nperm, parametric = parametric, nthreads=6) # Get significant interactions significant = self.r_CTLsignificant(res, significance=significance) @@ -155,20 +165,27 @@ class CTL: self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1'] self.results['ctlresult'] = significant - self.results['requestform'] = requestform # Store the user specified parameters for the output page + # Store the user specified parameters for the output page + self.results['requestform'] = requestform # Create the lineplot - r_png(self.results['imgloc1'], width=1000, height=600, type='cairo-png') + r_png(self.results['imgloc1'], width=1000, + height=600, type='cairo-png') self.r_lineplot(res, significance=significance) r_dev_off() - n = 2 # We start from 2, since R starts from 1 :) + # We start from 2, since R starts from 1 :) + n = 2 for trait in self.trait_db_list: # Create the QTL like CTL plots - self.results['imgurl' + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png" - self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)] - r_png(self.results['imgloc' + str(n)], width=1000, height=600, type='cairo-png') - self.r_plotCTLobject(res, (n - 1), significance=significance, main='Phenotype ' + trait) + self.results['imgurl' + \ + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png" + self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + \ + self.results['imgurl' + str(n)] + r_png(self.results['imgloc' + str(n)], + width=1000, height=600, type='cairo-png') + self.r_plotCTLobject( + res, (n - 1), significance=significance, main='Phenotype ' + trait) r_dev_off() n = n + 1 @@ -178,17 +195,24 @@ class CTL: # Create the interactive graph for cytoscape visualization (Nodes and Edges) if not isinstance(significant, ri.RNULLType): for x in range(len(significant[0])): - logger.debug(significant[0][x], significant[1][x], significant[2][x]) # Debug to console - tsS = significant[0][x].split(':') # Source - tsT = significant[2][x].split(':') # Target - gtS = create_trait(name=tsS[0], dataset_name=tsS[1]) # Retrieve Source info from the DB - gtT = create_trait(name=tsT[0], dataset_name=tsT[1]) # Retrieve Target info from the DB + logger.debug(significant[0][x], significant[1] + [x], significant[2][x]) # Debug to console + # Source + tsS = significant[0][x].split(':') + # Target + tsT = significant[2][x].split(':') + # Retrieve Source info from the DB + gtS = create_trait(name=tsS[0], dataset_name=tsS[1]) + # Retrieve Target info from the DB + gtT = create_trait(name=tsT[0], dataset_name=tsT[1]) self.addNode(gtS) self.addNode(gtT) self.addEdge(gtS, gtT, significant, x) - significant[0][x] = "{} ({})".format(gtS.symbol, gtS.name) # Update the trait name for the displayed table - significant[2][x] = "{} ({})".format(gtT.symbol, gtT.name) # Update the trait name for the displayed table + # Update the trait name for the displayed table + significant[0][x] = "{} ({})".format(gtS.symbol, gtS.name) + # Update the trait name for the displayed table + significant[2][x] = "{} ({})".format(gtT.symbol, gtT.name) self.elements = json.dumps(self.nodes_list + self.edges_list) diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index 7442dc72..e50ff50b 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -26,14 +26,16 @@ class DoSearch: def __init__(self, search_term, search_operator=None, dataset=None, search_type=None): self.search_term = search_term # Make sure search_operator is something we expect - assert search_operator in (None, "=", "<", ">", "<=", ">="), "Bad search operator" + assert search_operator in ( + None, "=", "<", ">", "<=", ">="), "Bad search operator" self.search_operator = search_operator self.dataset = dataset self.search_type = search_type if self.dataset: # Get group information for dataset and the species id - self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group.name) + self.species_id = webqtlDatabaseFunction.retrieve_species_id( + self.dataset.group.name) def execute(self, query): """Executes query and returns results""" @@ -104,7 +106,8 @@ class MrnaAssaySearch(DoSearch): search_string = escape(self.search_term[0]) if self.search_term[0] != "*": - match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % (search_string) + match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % ( + search_string) else: match_clause = "" @@ -223,16 +226,19 @@ class PhenotypeSearch(DoSearch): # and comment here # if "'" not in self.search_term[0]: - search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0]) + "[[:>:]]" + search_term = "[[:<:]]" + \ + self.handle_wildcard(self.search_term[0]) + "[[:>:]]" if "_" in self.search_term[0]: if len(self.search_term[0].split("_")[0]) == 3: - search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0].split("_")[1]) + "[[:>:]]" + search_term = "[[:<:]]" + self.handle_wildcard( + self.search_term[0].split("_")[1]) + "[[:>:]]" # This adds a clause to the query that matches the search term # against each field in the search_fields tuple where_clause_list = [] for field in self.search_fields: - where_clause_list.append('''%s REGEXP "%s"''' % (field, search_term)) + where_clause_list.append('''%s REGEXP "%s"''' % + (field, search_term)) where_clause = "(%s) " % ' OR '.join(where_clause_list) return where_clause @@ -364,7 +370,8 @@ class GenotypeSearch(DoSearch): if self.search_term[0] == "*": self.query = self.compile_final_query() else: - self.query = self.compile_final_query(where_clause=self.get_where_clause()) + self.query = self.compile_final_query( + where_clause=self.get_where_clause()) return self.execute(self.query) @@ -497,7 +504,8 @@ class LrsSearch(DoSearch): where_clause = """ %sXRef.LRS > %s and %sXRef.LRS < %s """ % self.mescape(self.dataset.type, - min(lrs_min, lrs_max), + min(lrs_min, + lrs_max), self.dataset.type, max(lrs_min, lrs_max)) @@ -537,7 +545,8 @@ class LrsSearch(DoSearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(self.from_clause, self.where_clause) + self.query = self.compile_final_query( + self.from_clause, self.where_clause) return self.execute(self.query) @@ -551,7 +560,8 @@ class MrnaLrsSearch(LrsSearch, MrnaAssaySearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(from_clause=self.from_clause, where_clause=self.where_clause) + self.query = self.compile_final_query( + from_clause=self.from_clause, where_clause=self.where_clause) return self.execute(self.query) @@ -566,7 +576,8 @@ class PhenotypeLrsSearch(LrsSearch, PhenotypeSearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(from_clause=self.from_clause, where_clause=self.where_clause) + self.query = self.compile_final_query( + from_clause=self.from_clause, where_clause=self.where_clause) return self.execute(self.query) @@ -593,7 +604,8 @@ class CisTransLrsSearch(DoSearch): elif len(self.search_term) == 3: lrs_min, lrs_max, self.mb_buffer = self.search_term elif len(self.search_term) == 4: - lrs_min, lrs_max, self.mb_buffer = [float(value) for value in self.search_term[:3]] + lrs_min, lrs_max, self.mb_buffer = [ + float(value) for value in self.search_term[:3]] chromosome = self.search_term[3] if "Chr" in chromosome or "chr" in chromosome: chromosome = int(chromosome[3:]) @@ -636,14 +648,19 @@ class CisTransLrsSearch(DoSearch): if chromosome: location_clause = "(%s.Chr = '%s' and %s.Chr = Geno.Chr and ABS(%s.Mb-Geno.Mb) %s %s) or (%s.Chr != Geno.Chr and Geno.Chr = '%s')" % (escape(self.dataset.type), chromosome, - escape(self.dataset.type), - escape(self.dataset.type), + escape( + self.dataset.type), + escape( + self.dataset.type), the_operator, - escape(str(self.mb_buffer)), - escape(self.dataset.type), + escape( + str(self.mb_buffer)), + escape( + self.dataset.type), chromosome) else: - location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape(self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type), escape(self.dataset.type)) + location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape( + self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type), escape(self.dataset.type)) where_clause = sub_clause + """ %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and @@ -683,7 +700,8 @@ class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(self.from_clause, self.where_clause) + self.query = self.compile_final_query( + self.from_clause, self.where_clause) return self.execute(self.query) @@ -714,7 +732,8 @@ class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(self.from_clause, self.where_clause) + self.query = self.compile_final_query( + self.from_clause, self.where_clause) return self.execute(self.query) @@ -733,7 +752,8 @@ class MeanSearch(MrnaAssaySearch): where_clause = """ %sXRef.mean > %s and %sXRef.mean < %s """ % self.mescape(self.dataset.type, - min(self.mean_min, self.mean_max), + min(self.mean_min, + self.mean_max), self.dataset.type, max(self.mean_min, self.mean_max)) else: @@ -796,7 +816,8 @@ class PositionSearch(DoSearch): DoSearch.search_types[search_key] = "PositionSearch" def get_where_clause(self): - self.search_term = [float(value) if is_number(value) else value for value in self.search_term] + self.search_term = [float(value) if is_number( + value) else value for value in self.search_term] chr, self.mb_min, self.mb_max = self.search_term[:3] self.chr = str(chr).lower() self.get_chr() @@ -806,7 +827,8 @@ class PositionSearch(DoSearch): %s.Mb < %s """ % self.mescape(self.dataset.type, self.chr, self.dataset.type, - min(self.mb_min, self.mb_max), + min(self.mb_min, + self.mb_max), self.dataset.type, max(self.mb_min, self.mb_max)) @@ -923,7 +945,8 @@ def get_aliases(symbol, species): return [] filtered_aliases = [] - response = requests.get(GN2_BASE_URL + "/gn3/gene/aliases/" + symbol_string) + response = requests.get( + GN2_BASE_URL + "/gn3/gene/aliases/" + symbol_string) if response: alias_list = json.loads(response.content) diff --git a/wqflask/wqflask/docs.py b/wqflask/wqflask/docs.py index 81424b9c..fc93248a 100644 --- a/wqflask/wqflask/docs.py +++ b/wqflask/wqflask/docs.py @@ -35,11 +35,13 @@ class Docs: def update_text(start_vars): content = start_vars['ckcontent'] - content = content.replace('%', '%%').replace('"', '\\"').replace("'", "\\'") + content = content.replace('%', '%%').replace( + '"', '\\"').replace("'", "\\'") try: if g.user_session.record['user_email_address'] == "zachary.a.sloan@gmail.com" or g.user_session.record['user_email_address'] == "labwilliams@gmail.com": - sql = "UPDATE Docs SET content='{0}' WHERE entry='{1}';".format(content, start_vars['entry_type']) + sql = "UPDATE Docs SET content='{0}' WHERE entry='{1}';".format( + content, start_vars['entry_type']) g.db.execute(sql) except: pass diff --git a/wqflask/wqflask/export_traits.py b/wqflask/wqflask/export_traits.py index d0745ef7..5bd54f9d 100644 --- a/wqflask/wqflask/export_traits.py +++ b/wqflask/wqflask/export_traits.py @@ -35,9 +35,12 @@ def export_search_results_csv(targs): metadata.append(["Data Set: " + targs['database_name']]) if 'accession_id' in targs: if targs['accession_id'] != "None": - metadata.append(["Metadata Link: http://genenetwork.org/webqtl/main.py?FormID=sharinginfo&GN_AccessionId=" + targs['accession_id']]) - metadata.append(["Export Date: " + datetime.datetime.now().strftime("%B %d, %Y")]) - metadata.append(["Export Time: " + datetime.datetime.now().strftime("%H:%M GMT")]) + metadata.append( + ["Metadata Link: http://genenetwork.org/webqtl/main.py?FormID=sharinginfo&GN_AccessionId=" + targs['accession_id']]) + metadata.append( + ["Export Date: " + datetime.datetime.now().strftime("%B %d, %Y")]) + metadata.append( + ["Export Time: " + datetime.datetime.now().strftime("%H:%M GMT")]) if 'search_string' in targs: if targs['search_string'] != "None": metadata.append(["Search Query: " + targs['search_string']]) @@ -52,10 +55,12 @@ def export_search_results_csv(targs): for trait in table_rows: trait_name, dataset_name, _hash = trait.split(":") trait_ob = create_trait(name=trait_name, dataset_name=dataset_name) - trait_ob = retrieve_trait_info(trait_ob, trait_ob.dataset, get_qtl_info=True) + trait_ob = retrieve_trait_info( + trait_ob, trait_ob.dataset, get_qtl_info=True) trait_list.append(trait_ob) - table_headers = ['Index', 'URL', 'Species', 'Group', 'Dataset', 'Record ID', 'Symbol', 'Description', 'ProbeTarget', 'PubMed_ID', 'Chr', 'Mb', 'Alias', 'Gene_ID', 'Homologene_ID', 'UniGene_ID', 'Strand_Probe', 'Probe_set_specificity', 'Probe_set_BLAT_score', 'Probe_set_BLAT_Mb_start', 'Probe_set_BLAT_Mb_end', 'QTL_Chr', 'QTL_Mb', 'Locus_at_Peak', 'Max_LRS', 'P_value_of_MAX', 'Mean_Expression'] + table_headers = ['Index', 'URL', 'Species', 'Group', 'Dataset', 'Record ID', 'Symbol', 'Description', 'ProbeTarget', 'PubMed_ID', 'Chr', 'Mb', 'Alias', 'Gene_ID', 'Homologene_ID', 'UniGene_ID', + 'Strand_Probe', 'Probe_set_specificity', 'Probe_set_BLAT_score', 'Probe_set_BLAT_Mb_start', 'Probe_set_BLAT_Mb_end', 'QTL_Chr', 'QTL_Mb', 'Locus_at_Peak', 'Max_LRS', 'P_value_of_MAX', 'Mean_Expression'] traits_by_group = sort_traits_by_group(trait_list) @@ -87,7 +92,8 @@ def export_search_results_csv(targs): trait_symbol = "N/A" row_contents = [ i + 1, - "https://genenetwork.org/show_trait?trait_id=" + str(trait.name) + "&dataset=" + str(trait.dataset.name), + "https://genenetwork.org/show_trait?trait_id=" + \ + str(trait.name) + "&dataset=" + str(trait.dataset.name), trait.dataset.group.species, trait.dataset.group.name, trait.dataset.name, @@ -117,13 +123,15 @@ def export_search_results_csv(targs): for sample in trait.dataset.group.samplelist: if sample in trait.data: - row_contents += [trait.data[sample].value, trait.data[sample].variance] + row_contents += [trait.data[sample].value, + trait.data[sample].variance] else: row_contents += ["x", "x"] csv_rows.append(row_contents) - csv_rows = list(map(list, itertools.zip_longest(*[row for row in csv_rows]))) + csv_rows = list( + map(list, itertools.zip_longest(*[row for row in csv_rows]))) writer.writerows(csv_rows) csv_data = buff.getvalue() buff.close() diff --git a/wqflask/wqflask/external_tools/send_to_bnw.py b/wqflask/wqflask/external_tools/send_to_bnw.py index 1556c6a0..3c0f2ca7 100644 --- a/wqflask/wqflask/external_tools/send_to_bnw.py +++ b/wqflask/wqflask/external_tools/send_to_bnw.py @@ -27,7 +27,8 @@ logger = utility.logger.getLogger(__name__) class SendToBNW: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) trait_samples_list = [] @@ -39,7 +40,8 @@ class SendToBNW: trait1_samples = list(this_sample_data.keys()) trait_samples_list.append(trait1_samples) - shared_samples = list(set(trait_samples_list[0]).intersection(*trait_samples_list)) + shared_samples = list( + set(trait_samples_list[0]).intersection(*trait_samples_list)) self.form_value = "" # ZS: string that is passed to BNW through form values_list = [] diff --git a/wqflask/wqflask/external_tools/send_to_geneweaver.py b/wqflask/wqflask/external_tools/send_to_geneweaver.py index c55c43e6..8af9bee9 100644 --- a/wqflask/wqflask/external_tools/send_to_geneweaver.py +++ b/wqflask/wqflask/external_tools/send_to_geneweaver.py @@ -32,7 +32,8 @@ logger = utility.logger.getLogger(__name__) class SendToGeneWeaver: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.chip_name = test_chip(self.trait_list) diff --git a/wqflask/wqflask/external_tools/send_to_webgestalt.py b/wqflask/wqflask/external_tools/send_to_webgestalt.py index 6b78725c..fd12562f 100644 --- a/wqflask/wqflask/external_tools/send_to_webgestalt.py +++ b/wqflask/wqflask/external_tools/send_to_webgestalt.py @@ -32,7 +32,8 @@ logger = utility.logger.getLogger(__name__) class SendToWebGestalt: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.chip_name = test_chip(self.trait_list) diff --git a/wqflask/wqflask/group_manager.py b/wqflask/wqflask/group_manager.py index 69ce3e7a..55a7da0e 100644 --- a/wqflask/wqflask/group_manager.py +++ b/wqflask/wqflask/group_manager.py @@ -77,8 +77,10 @@ def remove_users(): admin_ids_to_remove = request.form['selected_admin_ids'] member_ids_to_remove = request.form['selected_member_ids'] - remove_users_from_group(g.user_session.user_id, admin_ids_to_remove.split(":"), group_id, user_type="admins") - remove_users_from_group(g.user_session.user_id, member_ids_to_remove.split(":"), group_id, user_type="members") + remove_users_from_group(g.user_session.user_id, admin_ids_to_remove.split( + ":"), group_id, user_type="admins") + remove_users_from_group(g.user_session.user_id, member_ids_to_remove.split( + ":"), group_id, user_type="members") return redirect(url_for('view_group', id=group_id)) @@ -88,10 +90,12 @@ def add_users(user_type='members'): group_id = request.form['group_id'] if user_type == "admins": user_emails = request.form['admin_emails_to_add'].split(",") - add_users_to_group(g.user_session.user_id, group_id, user_emails, admins=True) + add_users_to_group(g.user_session.user_id, group_id, + user_emails, admins=True) elif user_type == "members": user_emails = request.form['member_emails_to_add'].split(",") - add_users_to_group(g.user_session.user_id, group_id, user_emails, admins=False) + add_users_to_group(g.user_session.user_id, group_id, + user_emails, admins=False) return redirect(url_for('view_group', id=group_id)) @@ -111,7 +115,8 @@ def add_or_edit_group(): if "group_name" in params: member_user_ids = set() admin_user_ids = set() - admin_user_ids.add(g.user_session.user_id) # ZS: Always add the user creating the group as an admin + # ZS: Always add the user creating the group as an admin + admin_user_ids.add(g.user_session.user_id) if "admin_emails_to_add" in params: admin_emails = params['admin_emails_to_add'].split(",") for email in admin_emails: @@ -127,7 +132,8 @@ def add_or_edit_group(): member_user_ids.add(user_details['user_id']) #send_group_invites(params['group_id'], user_email_list = user_emails, user_type="members") - create_group(list(admin_user_ids), list(member_user_ids), params['group_name']) + create_group(list(admin_user_ids), list( + member_user_ids), params['group_name']) return redirect(url_for('manage_groups')) else: return render_template("admin/create_group.html") @@ -149,9 +155,11 @@ def send_group_invites(group_id, user_email_list=[], user_type="members"): ((user_type == "members") and (user_details['user_id'] in group_info['members'])): continue else: - send_verification_email(user_details, template_name="email/group_verification.txt", key_prefix="verification_code", subject = "You've been invited to join a GeneNetwork user group") + send_verification_email(user_details, template_name="email/group_verification.txt", + key_prefix="verification_code", subject = "You've been invited to join a GeneNetwork user group") else: - temp_password = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + temp_password = ''.join(random.choice( + string.ascii_uppercase + string.digits) for _ in range(6)) user_details = { 'user_id': str(uuid.uuid4()), 'email_address': user_email, diff --git a/wqflask/wqflask/gsearch.py b/wqflask/wqflask/gsearch.py index a21dae84..9548d130 100644 --- a/wqflask/wqflask/gsearch.py +++ b/wqflask/wqflask/gsearch.py @@ -77,18 +77,21 @@ class GSearch: this_trait['name'] = line[5] this_trait['dataset'] = line[3] this_trait['dataset_fullname'] = line[4] - this_trait['hmac'] = hmac.data_hmac('{}:{}'.format(line[5], line[3])) + this_trait['hmac'] = hmac.data_hmac( + '{}:{}'.format(line[5], line[3])) this_trait['species'] = line[0] this_trait['group'] = line[1] this_trait['tissue'] = line[2] this_trait['symbol'] = line[6] if line[7]: - this_trait['description'] = line[7].decode('utf-8', 'replace') + this_trait['description'] = line[7].decode( + 'utf-8', 'replace') else: this_trait['description'] = "N/A" this_trait['location_repr'] = 'N/A' if (line[8] != "NULL" and line[8] != "") and (line[9] != 0): - this_trait['location_repr'] = 'Chr%s: %.6f' % (line[8], float(line[9])) + this_trait['location_repr'] = 'Chr%s: %.6f' % ( + line[8], float(line[9])) try: this_trait['mean'] = '%.3f' % line[10] except: @@ -103,7 +106,8 @@ class GSearch: this_trait['locus_chr'] = line[16] this_trait['locus_mb'] = line[17] - dataset_ob = SimpleNamespace(id=this_trait["dataset_id"], type="ProbeSet", species=this_trait["species"]) + dataset_ob = SimpleNamespace( + id=this_trait["dataset_id"], type="ProbeSet", species=this_trait["species"]) if dataset_ob.id not in dataset_to_permissions: permissions = check_resource_availability(dataset_ob) dataset_to_permissions[dataset_ob.id] = permissions @@ -118,7 +122,9 @@ class GSearch: max_lrs_text = "N/A" if this_trait['locus_chr'] != None and this_trait['locus_mb'] != None: - max_lrs_text = "Chr" + str(this_trait['locus_chr']) + ": " + str(this_trait['locus_mb']) + max_lrs_text = "Chr" + \ + str(this_trait['locus_chr']) + \ + ": " + str(this_trait['locus_mb']) this_trait['max_lrs_text'] = max_lrs_text trait_list.append(this_trait) @@ -146,7 +152,8 @@ class GSearch: if "_" in self.terms: if len(self.terms.split("_")[0]) == 3: search_term = self.terms.split("_")[1] - group_clause = "AND InbredSet.`InbredSetCode` = '{}'".format(self.terms.split("_")[0]) + group_clause = "AND InbredSet.`InbredSetCode` = '{}'".format( + self.terms.split("_")[0]) sql = """ SELECT Species.`Name`, @@ -192,18 +199,22 @@ class GSearch: this_trait['index'] = i + 1 this_trait['name'] = str(line[4]) if len(str(line[12])) == 3: - this_trait['display_name'] = str(line[12]) + "_" + this_trait['name'] + this_trait['display_name'] = str( + line[12]) + "_" + this_trait['name'] else: this_trait['display_name'] = this_trait['name'] this_trait['dataset'] = line[2] this_trait['dataset_fullname'] = line[3] - this_trait['hmac'] = hmac.data_hmac('{}:{}'.format(line[4], line[2])) + this_trait['hmac'] = hmac.data_hmac( + '{}:{}'.format(line[4], line[2])) this_trait['species'] = line[0] this_trait['group'] = line[1] if line[9] != None and line[6] != None: - this_trait['description'] = line[6].decode('utf-8', 'replace') + this_trait['description'] = line[6].decode( + 'utf-8', 'replace') elif line[5] != None: - this_trait['description'] = line[5].decode('utf-8', 'replace') + this_trait['description'] = line[5].decode( + 'utf-8', 'replace') else: this_trait['description'] = "N/A" if line[13] != None and line[13] != "": @@ -221,7 +232,8 @@ class GSearch: else: this_trait['pubmed_link'] = "N/A" if line[12]: - this_trait['display_name'] = line[12] + "_" + str(this_trait['name']) + this_trait['display_name'] = line[12] + \ + "_" + str(this_trait['name']) this_trait['LRS_score_repr'] = "N/A" if line[10] != "" and line[10] != None: this_trait['LRS_score_repr'] = '%3.1f' % line[10] @@ -230,13 +242,16 @@ class GSearch: this_trait['additive'] = '%.3f' % line[11] this_trait['max_lrs_text'] = "N/A" - trait_ob = create_trait(dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False) + trait_ob = create_trait( + dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False) if not trait_ob: continue if this_trait['dataset'] == this_trait['group'] + "Publish": try: if trait_ob.locus_chr != "" and trait_ob.locus_mb != "": - this_trait['max_lrs_text'] = "Chr" + str(trait_ob.locus_chr) + ": " + str(trait_ob.locus_mb) + this_trait['max_lrs_text'] = "Chr" + \ + str(trait_ob.locus_chr) + \ + ": " + str(trait_ob.locus_mb) except: this_trait['max_lrs_text'] = "N/A" diff --git a/wqflask/wqflask/heatmap/heatmap.py b/wqflask/wqflask/heatmap/heatmap.py index 0b477446..aa11caa8 100644 --- a/wqflask/wqflask/heatmap/heatmap.py +++ b/wqflask/wqflask/heatmap/heatmap.py @@ -18,7 +18,8 @@ logger = getLogger(__name__) class Heatmap: def __init__(self, start_vars, temp_uuid): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.temp_uuid = temp_uuid @@ -33,7 +34,8 @@ class Heatmap: chrnames = [] self.species = species.TheSpecies(dataset=self.trait_list[0][1]) for key in list(self.species.chromosomes.chromosomes.keys()): - chrnames.append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) + chrnames.append([self.species.chromosomes.chromosomes[key].name, + self.species.chromosomes.chromosomes[key].mb_length]) for trait_db in self.trait_list: @@ -108,10 +110,13 @@ class Heatmap: trimmed_samples.append(str(samples[i])) trimmed_values.append(values[i]) - trait_filename = str(this_trait.name) + "_" + str(self.dataset.name) + "_pheno" + trait_filename = str(this_trait.name) + "_" + \ + str(self.dataset.name) + "_pheno" gen_pheno_txt_file(trimmed_samples, trimmed_values, trait_filename) - output_filename = self.dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + output_filename = self.dataset.group.name + "_GWA_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) reaper_command = REAPER_COMMAND + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt -n 1000 -o {4}{5}.txt'.format(flat_files('genotype'), genofile_name, @@ -129,9 +134,11 @@ class Heatmap: self.trait_results[this_trait.name] = [] for qtl in reaper_results: if qtl['additive'] > 0: - self.trait_results[this_trait.name].append(-float(qtl['lrs_value'])) + self.trait_results[this_trait.name].append( + -float(qtl['lrs_value'])) else: - self.trait_results[this_trait.name].append(float(qtl['lrs_value'])) + self.trait_results[this_trait.name].append( + float(qtl['lrs_value'])) def gen_pheno_txt_file(samples, vals, filename): diff --git a/wqflask/wqflask/interval_analyst/GeneUtil.py b/wqflask/wqflask/interval_analyst/GeneUtil.py index 8dd1c7c0..cadff080 100644 --- a/wqflask/wqflask/interval_analyst/GeneUtil.py +++ b/wqflask/wqflask/interval_analyst/GeneUtil.py @@ -54,7 +54,8 @@ def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): Mb >= %2.6f AND Mb < %2.6f AND StrainId1 = %d AND StrainId2 = %d """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1])).fetchone()[0] - newdict["snpDensity"] = newdict["snpCount"] / (newdict["TxEnd"] - newdict["TxStart"]) / 1000.0 + newdict["snpDensity"] = newdict["snpCount"] / \ + (newdict["TxEnd"] - newdict["TxStart"]) / 1000.0 else: newdict["snpDensity"] = newdict["snpCount"] = 0 @@ -86,12 +87,14 @@ def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): StrainId1 = %d AND StrainId2 = %d """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1])).fetchone()[0] - newdict2["snpDensity"] = newdict2["snpCount"] / (newdict2["TxEnd"] - newdict2["TxStart"]) / 1000.0 + newdict2["snpDensity"] = newdict2["snpCount"] / \ + (newdict2["TxEnd"] - newdict2["TxStart"]) / 1000.0 else: newdict2["snpDensity"] = newdict2["snpCount"] = 0 try: - newdict2['GeneLength'] = 1000.0 * (newdict2['TxEnd'] - newdict2['TxStart']) + newdict2['GeneLength'] = 1000.0 * \ + (newdict2['TxEnd'] - newdict2['TxStart']) except: pass diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py index 5c7b81dd..cde822e8 100644 --- a/wqflask/wqflask/marker_regression/display_mapping_results.py +++ b/wqflask/wqflask/marker_regression/display_mapping_results.py @@ -307,7 +307,8 @@ class DisplayMappingResults: if 'color_scheme' in start_vars: self.color_scheme = start_vars['color_scheme'] if self.color_scheme == "single": - self.manhattan_single_color = ImageColor.getrgb("#" + start_vars['manhattan_single_color']) + self.manhattan_single_color = ImageColor.getrgb( + "#" + start_vars['manhattan_single_color']) if 'permCheck' in list(start_vars.keys()): self.permChecked = start_vars['permCheck'] @@ -357,7 +358,8 @@ class DisplayMappingResults: if 'reaper_version' in list(start_vars.keys()) and self.mapping_method == "reaper": self.reaper_version = start_vars['reaper_version'] if 'output_files' in start_vars: - self.output_files = ",".join([(the_file if the_file is not None else "") for the_file in start_vars['output_files']]) + self.output_files = ",".join( + [(the_file if the_file is not None else "") for the_file in start_vars['output_files']]) self.categorical_vars = "" self.perm_strata = "" @@ -386,16 +388,19 @@ class DisplayMappingResults: self.dataset.group.genofile = self.genofile_string.split(":")[0] if self.mapping_method == "reaper" and self.manhattan_plot != True: - self.genotype = self.dataset.group.read_genotype_file(use_reaper=True) + self.genotype = self.dataset.group.read_genotype_file( + use_reaper=True) else: self.genotype = self.dataset.group.read_genotype_file() # Darwing Options try: if self.selectedChr > -1: - self.graphWidth = min(self.GRAPH_MAX_WIDTH, max(self.GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) + self.graphWidth = min(self.GRAPH_MAX_WIDTH, max( + self.GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) else: - self.graphWidth = min(self.GRAPH_MAX_WIDTH, max(self.MULT_GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) + self.graphWidth = min(self.GRAPH_MAX_WIDTH, max( + self.MULT_GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) except: if self.selectedChr > -1: self.graphWidth = self.GRAPH_DEFAULT_WIDTH @@ -472,9 +477,11 @@ class DisplayMappingResults: """ % (self.dataset.group.name, ", ".join(["'%s'" % X[0] for X in self.ChrList[1:]]))) self.ChrLengthMbList = [x[0] / 1000000.0 for x in self.ChrLengthMbList] - self.ChrLengthMbSum = reduce(lambda x, y: x + y, self.ChrLengthMbList, 0.0) + self.ChrLengthMbSum = reduce( + lambda x, y: x + y, self.ChrLengthMbList, 0.0) if self.ChrLengthMbList: - self.MbGraphInterval = self.ChrLengthMbSum / (len(self.ChrLengthMbList) * 12) # Empirical Mb interval + self.MbGraphInterval = self.ChrLengthMbSum / \ + (len(self.ChrLengthMbList) * 12) # Empirical Mb interval else: self.MbGraphInterval = 1 @@ -482,7 +489,8 @@ class DisplayMappingResults: for i, _chr in enumerate(self.genotype): self.ChrLengthCMList.append(_chr[-1].cM - _chr[0].cM) - self.ChrLengthCMSum = reduce(lambda x, y: x + y, self.ChrLengthCMList, 0.0) + self.ChrLengthCMSum = reduce( + lambda x, y: x + y, self.ChrLengthCMList, 0.0) if self.plotScale == 'physic': self.GraphInterval = self.MbGraphInterval # Mb @@ -496,7 +504,8 @@ class DisplayMappingResults: smd = [] for sample in self.sample_vals_dict.keys(): if self.sample_vals_dict[sample] != "x": - temp = GeneralObject(name=sample, value=float(self.sample_vals_dict[sample])) + temp = GeneralObject(name=sample, value=float( + self.sample_vals_dict[sample])) smd.append(temp) else: continue @@ -506,7 +515,8 @@ class DisplayMappingResults: if item.name == samplelist[j]: self.NR_INDIVIDUALS = self.NR_INDIVIDUALS + 1 # default: - self.graphHeight = self.graphHeight + 2 * (self.NR_INDIVIDUALS + 10) * self.EACH_GENE_HEIGHT + self.graphHeight = self.graphHeight + 2 * \ + (self.NR_INDIVIDUALS + 10) * self.EACH_GENE_HEIGHT # END HaplotypeAnalyst ######################### @@ -529,7 +539,8 @@ class DisplayMappingResults: self.diffCol = [] for i, strain in enumerate(self.diffCol): - self.diffCol[i] = g.db.execute("select Id from Strain where Symbol = %s", strain).fetchone()[0] + self.diffCol[i] = g.db.execute( + "select Id from Strain where Symbol = %s", strain).fetchone()[0] ################################################################ # GeneCollection goes here @@ -552,13 +563,15 @@ class DisplayMappingResults: chrName = "X" else: chrName = self.selectedChr - self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, "mouse") + self.geneCol = GeneUtil.loadGenes( + chrName, self.diffCol, self.startMb, self.endMb, "mouse") elif self.dataset.group.species == "rat": if self.selectedChr == 21: chrName = "X" else: chrName = self.selectedChr - self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, "rat") + self.geneCol = GeneUtil.loadGenes( + chrName, self.diffCol, self.startMb, self.endMb, "rat") if self.geneCol and self.intervalAnalystChecked: ####################################################################### @@ -577,7 +590,8 @@ class DisplayMappingResults: showLocusForm = "" intCanvas = Image.new("RGBA", size=(self.graphWidth, self.graphHeight)) with Bench("Drawing Plot"): - gifmap = self.plotIntMapping(intCanvas, startMb=self.startMb, endMb=self.endMb, showLocusForm= showLocusForm) + gifmap = self.plotIntMapping( + intCanvas, startMb=self.startMb, endMb=self.endMb, showLocusForm= showLocusForm) self.gifmap = gifmap.__str__() @@ -593,8 +607,10 @@ class DisplayMappingResults: # Scales plot differently for high resolution if self.draw2X: - intCanvasX2 = Image.new("RGBA", size=(self.graphWidth * 2, self.graphHeight * 2)) - gifmapX2 = self.plotIntMapping(intCanvasX2, startMb=self.startMb, endMb=self.endMb, showLocusForm= showLocusForm, zoom=2) + intCanvasX2 = Image.new("RGBA", size=( + self.graphWidth * 2, self.graphHeight * 2)) + gifmapX2 = self.plotIntMapping( + intCanvasX2, startMb=self.startMb, endMb=self.endMb, showLocusForm= showLocusForm, zoom=2) intCanvasX2.save( "{}.png".format( os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, @@ -612,7 +628,8 @@ class DisplayMappingResults: name=showLocusForm, submit=HtmlGenWrapper.create_input_tag(type_='hidden')) - hddn = {'FormID': 'showDatabase', 'ProbeSetID': '_', 'database': fd.RISet+"Geno",'CellID':'_', 'RISet':fd.RISet, 'incparentsf1':'ON'} + hddn = {'FormID': 'showDatabase', 'ProbeSetID': '_', 'database': fd.RISet+ \ + "Geno",'CellID':'_', 'RISet':fd.RISet, 'incparentsf1':'ON'} for key in hddn.keys(): showLocusForm.append(HtmlGenWrapper.create_input_tag( name=key, value=hddn[key], type_='hidden')) @@ -631,7 +648,8 @@ class DisplayMappingResults: if self.traitList and self.traitList[0].dataset and self.traitList[0].dataset.type == 'Geno': btminfo.append(HtmlGenWrapper.create_br_tag()) - btminfo.append('Mapping using genotype data as a trait will result in infinity LRS at one locus. In order to display the result properly, all LRSs higher than 100 are capped at 100.') + btminfo.append( + 'Mapping using genotype data as a trait will result in infinity LRS at one locus. In order to display the result properly, all LRSs higher than 100 are capped at 100.') def plotIntMapping(self, canvas, offset=(80, 120, 90, 100), zoom=1, startMb= None, endMb = None, showLocusForm = ""): im_drawer = ImageDraw.Draw(canvas) @@ -673,7 +691,8 @@ class DisplayMappingResults: else: drawAreaHeight -= 3 * self.BAND_HEIGHT + 3 * self.BAND_SPACING + 10 * zoom if self.geneChecked: - drawAreaHeight -= self.NUM_GENE_ROWS * self.EACH_GENE_HEIGHT + 3 * self.BAND_SPACING + 10 * zoom + drawAreaHeight -= self.NUM_GENE_ROWS * \ + self.EACH_GENE_HEIGHT + 3 * self.BAND_SPACING + 10 * zoom else: if self.selectedChr > -1: drawAreaHeight -= 20 @@ -682,7 +701,8 @@ class DisplayMappingResults: # BEGIN HaplotypeAnalyst if self.haplotypeAnalystChecked and self.selectedChr > -1: - drawAreaHeight -= self.EACH_GENE_HEIGHT * (self.NR_INDIVIDUALS + 10) * 2 * zoom + drawAreaHeight -= self.EACH_GENE_HEIGHT * \ + (self.NR_INDIVIDUALS + 10) * 2 * zoom # END HaplotypeAnalyst if zoom == 2: @@ -693,38 +713,48 @@ class DisplayMappingResults: newoffset = (xLeftOffset, xRightOffset, yTopOffset, yBottomOffset) # Draw the alternating-color background first and get plotXScale - plotXScale = self.drawGraphBackground(canvas, gifmap, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) + plotXScale = self.drawGraphBackground( + canvas, gifmap, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) # draw bootstap if self.bootChecked and not self.multipleInterval: - self.drawBootStrapResult(canvas, self.nboot, drawAreaHeight, plotXScale, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) + self.drawBootStrapResult(canvas, self.nboot, drawAreaHeight, plotXScale, + offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) # Draw clickable region and gene band if selected if self.plotScale == 'physic' and self.selectedChr > -1: - self.drawClickBand(canvas, gifmap, plotXScale, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) + self.drawClickBand(canvas, gifmap, plotXScale, offset=newoffset, + zoom=zoom, startMb=startMb, endMb=endMb) if self.geneChecked and self.geneCol: - self.drawGeneBand(canvas, gifmap, plotXScale, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) + self.drawGeneBand(canvas, gifmap, plotXScale, offset=newoffset, + zoom=zoom, startMb=startMb, endMb=endMb) if self.SNPChecked: - self.drawSNPTrackNew(canvas, offset=newoffset, zoom=2 * zoom, startMb=startMb, endMb = endMb) + self.drawSNPTrackNew( + canvas, offset=newoffset, zoom=2 * zoom, startMb=startMb, endMb = endMb) # BEGIN HaplotypeAnalyst if self.haplotypeAnalystChecked: - self.drawHaplotypeBand(canvas, gifmap, plotXScale, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) + self.drawHaplotypeBand( + canvas, gifmap, plotXScale, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) # END HaplotypeAnalyst # Draw X axis - self.drawXAxis(canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) + self.drawXAxis(canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, + offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) # Draw QTL curve - self.drawQTL(canvas, drawAreaHeight, gifmap, plotXScale, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) + self.drawQTL(canvas, drawAreaHeight, gifmap, plotXScale, + offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) # draw legend if self.multipleInterval: - self.drawMultiTraitName(fd, canvas, gifmap, showLocusForm, offset=newoffset) + self.drawMultiTraitName( + fd, canvas, gifmap, showLocusForm, offset=newoffset) elif self.legendChecked: self.drawLegendPanel(canvas, offset=newoffset, zoom=zoom) else: pass # draw position, no need to use a separate function - self.drawProbeSetPosition(canvas, plotXScale, offset=newoffset, zoom=zoom) + self.drawProbeSetPosition( + canvas, plotXScale, offset=newoffset, zoom=zoom) return gifmap @@ -756,19 +786,24 @@ class DisplayMappingResults: if previous_chr_as_int != 1: BootCoord.append(BootChrCoord) BootChrCoord = [] - startX += (self.ChrLengthDistList[previous_chr_as_int - 2] + self.GraphInterval) * plotXScale + startX += ( + self.ChrLengthDistList[previous_chr_as_int - 2] + self.GraphInterval) * plotXScale if self.plotScale == 'physic': Xc = startX + (result['Mb'] - self.startMb) * plotXScale else: - Xc = startX + (result['cM'] - self.qtlresults[0]['cM']) * plotXScale + Xc = startX + \ + (result['cM'] - self.qtlresults[0]['cM']) * plotXScale BootChrCoord.append([Xc, self.bootResult[i]]) else: for i, result in enumerate(self.qtlresults): if str(result['chr']) == str(self.ChrList[self.selectedChr][0]): if self.plotScale == 'physic': - Xc = startX + (result['Mb'] - self.startMb) * plotXScale + Xc = startX + (result['Mb'] - \ + self.startMb) * plotXScale else: - Xc = startX + (result['cM'] - self.qtlresults[0]['cM']) * plotXScale + Xc = startX + \ + (result['cM'] - self.qtlresults[0] + ['cM']) * plotXScale BootChrCoord.append([Xc, self.bootResult[i]]) BootCoord = [BootChrCoord] @@ -793,14 +828,16 @@ class DisplayMappingResults: if maxBootCount < bootCount: maxBootCount = bootCount # end if - reducedBootCoord.append([bootStartPixX, BootChrCoord[i][0], bootCount]) + reducedBootCoord.append( + [bootStartPixX, BootChrCoord[i][0], bootCount]) bootStartPixX = BootChrCoord[i][0] bootCount = BootChrCoord[i][1] # end else # end for # add last piece if BootChrCoord[-1][0] - bootStartPixX > stepBootStrap / 2.0: - reducedBootCoord.append([bootStartPixX, BootChrCoord[-1][0], bootCount]) + reducedBootCoord.append( + [bootStartPixX, BootChrCoord[-1][0], bootCount]) else: reducedBootCoord[-1][2] += bootCount reducedBootCoord[-1][1] = BootChrCoord[-1][0] @@ -827,11 +864,13 @@ class DisplayMappingResults: # draw boot scale highestPercent = (maxBootCount * 100.0) / nboot bootScale = Plot.detScale(0, highestPercent) - bootScale = Plot.frange(bootScale[0], bootScale[1], bootScale[1] / bootScale[2]) + bootScale = Plot.frange( + bootScale[0], bootScale[1], bootScale[1] / bootScale[2]) bootScale = bootScale[:-1] + [highestPercent] bootOffset = 50 * fontZoom - bootScaleFont = ImageFont.truetype(font=VERDANA_FILE, size=13 * fontZoom) + bootScaleFont = ImageFont.truetype( + font=VERDANA_FILE, size=13 * fontZoom) im_drawer.rectangle( xy=((canvas.size[0] - bootOffset, yZero - bootHeightThresh), (canvas.size[0] - bootOffset - 15*zoom, yZero)), @@ -860,10 +899,12 @@ class DisplayMappingResults: startPosY = 30 else: startPosY = 15 - smallLabelFont = ImageFont.truetype(font=TREBUC_FILE, size=12 * fontZoom) + smallLabelFont = ImageFont.truetype( + font=TREBUC_FILE, size=12 * fontZoom) leftOffset = canvas.size[0] - xRightOffset - 190 im_drawer.rectangle( - xy=((leftOffset, startPosY - 6), (leftOffset + 12, startPosY + 6)), + xy=((leftOffset, startPosY - 6), + (leftOffset + 12, startPosY + 6)), fill=YELLOW, outline=BLACK) im_drawer.text(xy=(canvas.size[0] - xRightOffset - 170, startPosY + TEXT_Y_DISPLACEMENT), text='Frequency of the Peak LRS', @@ -905,7 +946,8 @@ class DisplayMappingResults: locPixel = xLeftOffset for i, _chr in enumerate(self.ChrList[1:]): if _chr[0] != Chr: - locPixel += (self.ChrLengthDistList[i] + self.GraphInterval) * plotXScale + locPixel += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale else: locPixel += Mb * plotXScale break @@ -921,7 +963,8 @@ class DisplayMappingResults: # the trait's position is between two traits if i > 0 and self.qtlresults[i - 1]['Mb'] < Mb and qtlresult['Mb'] >= Mb: - locPixel = xLeftOffset + plotXScale * (self.qtlresults[i - 1]['Mb'] + (qtlresult['Mb'] - self.qtlresults[i - 1]['Mb']) * (Mb - self.qtlresults[i - 1]['Mb']) / (qtlresult['Mb'] - self.qtlresults[i - 1]['Mb'])) + locPixel = xLeftOffset + plotXScale * (self.qtlresults[i - 1]['Mb'] + (qtlresult['Mb'] - self.qtlresults[i - 1]['Mb']) * ( + Mb - self.qtlresults[i - 1]['Mb']) / (qtlresult['Mb'] - self.qtlresults[i - 1]['Mb'])) break # the trait's position is on the right of the last genotype @@ -932,12 +975,15 @@ class DisplayMappingResults: for i, _chr in enumerate(self.ChrList): if i < (len(self.ChrList) - 1): if _chr != Chr: - locPixel += (self.ChrLengthDistList[i] + self.GraphInterval) * plotXScale + locPixel += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale else: - locPixel += (Mb * (_chr[-1].cM - _chr[0].cM) / self.ChrLengthCMList[i]) * plotXScale + locPixel += (Mb * (_chr[-1].cM - _chr[0].cM) / \ + self.ChrLengthCMList[i]) * plotXScale break if locPixel >= 0 and self.plotScale == 'physic': - traitPixel = ((locPixel, yZero), (locPixel - 7, yZero + 14), (locPixel + 7, yZero + 14)) + traitPixel = ((locPixel, yZero), (locPixel - 7, + yZero + 14), (locPixel + 7, yZero + 14)) draw_open_polygon(canvas, xy=traitPixel, outline=BLACK, fill=self.TRANSCRIPT_LOCATION_COLOR) @@ -979,7 +1025,8 @@ class DisplayMappingResults: maxCount = max(SNPCounts) if maxCount > 0: for i in range(xLeftOffset, xLeftOffset + plotWidth): - snpDensity = float(SNPCounts[i - xLeftOffset] * SNP_HEIGHT_MODIFIER / maxCount) + snpDensity = float( + SNPCounts[i - xLeftOffset] * SNP_HEIGHT_MODIFIER / maxCount) im_drawer.line( xy=((i, drawSNPLocationY + (snpDensity) * zoom), (i, drawSNPLocationY - (snpDensity) * zoom)), @@ -1015,12 +1062,16 @@ class DisplayMappingResults: (rectWidth + rightShift, yPaddingTop + 10+kstep*15)), fill=thisLRSColor, outline=BLACK) im_drawer.text( - text=name, xy=(rectWidth + 2 + rightShift, yPaddingTop + 10 + kstep * 15), + text=name, xy=(rectWidth + 2 + rightShift, + yPaddingTop + 10 + kstep * 15), font=colorFont, fill=BLACK) if thisTrait.db: - COORDS = "%d,%d,%d,%d" % (rectWidth + 2 + rightShift, yPaddingTop + kstep * 15, rectWidth + 2 + rightShift + nameWidth, yPaddingTop + 10 + kstep * 15,) - HREF = "javascript:showDatabase3('%s','%s','%s','');" % (showLocusForm, thisTrait.db.name, thisTrait.name) - Areas = HtmlGenWrapper.create_area_tag(shape='rect', coords=COORDS, href=HREF) + COORDS = "%d,%d,%d,%d" % (rectWidth + 2 + rightShift, yPaddingTop + kstep * \ + 15, rectWidth + 2 + rightShift + nameWidth, yPaddingTop + 10 + kstep * 15,) + HREF = "javascript:showDatabase3('%s','%s','%s','');" % ( + showLocusForm, thisTrait.db.name, thisTrait.name) + Areas = HtmlGenWrapper.create_area_tag( + shape='rect', coords=COORDS, href=HREF) gifmap.append(Areas) # TODO def drawLegendPanel(self, canvas, offset=(40, 120, 80, 10), zoom=1): @@ -1042,7 +1093,8 @@ class DisplayMappingResults: if hasattr(self.traitList[0], 'chr') and hasattr(self.traitList[0], 'mb'): startPosY = 15 nCol = 2 - smallLabelFont = ImageFont.truetype(font=TREBUC_FILE, size=12 * fontZoom) + smallLabelFont = ImageFont.truetype( + font=TREBUC_FILE, size=12 * fontZoom) leftOffset = canvas.size[0] - xRightOffset - 190 draw_open_polygon( @@ -1064,7 +1116,8 @@ class DisplayMappingResults: xy=((startPosX, startPosY), (startPosX + 32, startPosY)), fill=self.LRS_COLOR, width=2) im_drawer.text( - text=self.LRS_LOD, xy=(startPosX + 40, startPosY + TEXT_Y_DISPLACEMENT), + text=self.LRS_LOD, xy=( + startPosX + 40, startPosY + TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) startPosY += stepPosY @@ -1118,10 +1171,12 @@ class DisplayMappingResults: xy=((thisStartX, startPosY), (startPosX + 32, startPosY)), fill=self.SIGNIFICANT_COLOR, width=self.SIGNIFICANT_WIDTH) im_drawer.line( - xy=((thisStartX, startPosY + stepPosY), (startPosX + 32, startPosY + stepPosY)), + xy=((thisStartX, startPosY + stepPosY), + (startPosX + 32, startPosY + stepPosY)), fill=self.SUGGESTIVE_COLOR, width=self.SUGGESTIVE_WIDTH) im_drawer.text( - text='Significant %s = %2.2f' % (self.LRS_LOD, self.significant), + text='Significant %s = %2.2f' % ( + self.LRS_LOD, self.significant), xy=(thisStartX + 40, startPosY + TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) im_drawer.text( text='Suggestive %s = %2.2f' % (self.LRS_LOD, self.suggestive), @@ -1134,7 +1189,8 @@ class DisplayMappingResults: if self.dataset.type == "Publish" or self.dataset.type == "Geno": dataset_label = self.dataset.fullname else: - dataset_label = "%s - %s" % (self.dataset.group.name, self.dataset.fullname) + dataset_label = "%s - %s" % (self.dataset.group.name, + self.dataset.fullname) string1 = 'Dataset: %s' % (dataset_label) @@ -1151,7 +1207,8 @@ class DisplayMappingResults: string3 = 'Using GEMMA mapping method with ' if self.covariates != "": string3 += 'the cofactors below:' - cofactor_names = ", ".join([covar.split(":")[0] for covar in self.covariates.split(",")]) + cofactor_names = ", ".join( + [covar.split(":")[0] for covar in self.covariates.split(",")]) string4 = cofactor_names else: string3 += 'no cofactors' @@ -1159,7 +1216,8 @@ class DisplayMappingResults: string3 = 'Using R/qtl mapping method with ' if self.covariates != "": string3 += 'the cofactors below:' - cofactor_names = ", ".join([covar.split(":")[0] for covar in self.covariates.split(",")]) + cofactor_names = ", ".join( + [covar.split(":")[0] for covar in self.covariates.split(",")]) string4 = cofactor_names elif self.controlLocus and self.doControl != "false": string3 += '%s as control' % self.controlLocus @@ -1177,15 +1235,19 @@ class DisplayMappingResults: if self.selectedChr == -1: identification = "Mapping on All Chromosomes for " else: - identification = "Mapping on Chromosome %s for " % (self.ChrList[self.selectedChr][0]) + identification = "Mapping on Chromosome %s for " % ( + self.ChrList[self.selectedChr][0]) if self.this_trait.symbol: - identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.symbol) + identification += "Trait: %s - %s" % ( + self.this_trait.name, self.this_trait.symbol) elif self.dataset.type == "Publish": if self.this_trait.post_publication_abbreviation: - identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.post_publication_abbreviation) + identification += "Trait: %s - %s" % ( + self.this_trait.name, self.this_trait.post_publication_abbreviation) elif self.this_trait.pre_publication_abbreviation: - identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.pre_publication_abbreviation) + identification += "Trait: %s - %s" % ( + self.this_trait.name, self.this_trait.pre_publication_abbreviation) else: identification += "Trait: %s" % (self.this_trait.name) else: @@ -1265,7 +1327,8 @@ class DisplayMappingResults: tenPercentLength = geneLength * 0.0001 SNPdensity = theGO["snpCount"] / geneLength - exonStarts = list(map(float, theGO['exonStarts'].split(",")[:-1])) + exonStarts = list( + map(float, theGO['exonStarts'].split(",")[:-1])) exonEnds = list(map(float, theGO['exonEnds'].split(",")[:-1])) cdsStart = theGO['cdsStart'] cdsEnd = theGO['cdsEnd'] @@ -1274,8 +1337,10 @@ class DisplayMappingResults: strand = theGO["Strand"] exonCount = theGO["exonCount"] - geneStartPix = xLeftOffset + plotXScale * (float(txStart) - startMb) - geneEndPix = xLeftOffset + plotXScale * (float(txEnd) - startMb) # at least one pixel + geneStartPix = xLeftOffset + \ + plotXScale * (float(txStart) - startMb) + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) # at least one pixel if (geneEndPix < xLeftOffset): return; # this gene is not on the screen @@ -1290,7 +1355,8 @@ class DisplayMappingResults: # found earlier, needs to be recomputed as snps are added # always apply colors now, even if SNP Track not checked - Zach 11/24/2010 - densities = [1.0000000000000001e-05, 0.094094033555233408, 0.3306166377816987, 0.88246026851027781, 2.6690084029581951, 4.1, 61.0] + densities = [1.0000000000000001e-05, 0.094094033555233408, + 0.3306166377816987, 0.88246026851027781, 2.6690084029581951, 4.1, 61.0] if SNPdensity < densities[0]: myColor = BLACK elif SNPdensity < densities[1]: @@ -1309,7 +1375,8 @@ class DisplayMappingResults: outlineColor = myColor fillColor = myColor - TITLE = "Gene: %s (%s)\nFrom %2.3f to %2.3f Mb (%s)\nNum. exons: %d." % (geneSymbol, accession, float(txStart), float(txEnd), strand, exonCount) + TITLE = "Gene: %s (%s)\nFrom %2.3f to %2.3f Mb (%s)\nNum. exons: %d." % ( + geneSymbol, accession, float(txStart), float(txEnd), strand, exonCount) # NL: 06-02-2011 Rob required to change this link for gene related HREF = geneNCBILink % geneSymbol @@ -1324,8 +1391,10 @@ class DisplayMappingResults: strand = theGO["Strand"] exonCount = 0 - geneStartPix = xLeftOffset + plotXScale * (float(txStart) - startMb) - geneEndPix = xLeftOffset + plotXScale * (float(txEnd) - startMb) # at least one pixel + geneStartPix = xLeftOffset + \ + plotXScale * (float(txStart) - startMb) + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) # at least one pixel if (geneEndPix < xLeftOffset): return; # this gene is not on the screen @@ -1338,7 +1407,8 @@ class DisplayMappingResults: outlineColor = DARKBLUE fillColor = DARKBLUE - TITLE = "Gene: %s\nFrom %2.3f to %2.3f Mb (%s)" % (geneSymbol, float(txStart), float(txEnd), strand) + TITLE = "Gene: %s\nFrom %2.3f to %2.3f Mb (%s)" % ( + geneSymbol, float(txStart), float(txEnd), strand) # NL: 06-02-2011 Rob required to change this link for gene related HREF = geneNCBILink % geneSymbol else: @@ -1347,7 +1417,8 @@ class DisplayMappingResults: TITLE = "Gene: %s" % geneSymbol # Draw Genes - geneYLocation = yPaddingTop + (gIndex % self.NUM_GENE_ROWS) * self.EACH_GENE_HEIGHT * zoom + geneYLocation = yPaddingTop + \ + (gIndex % self.NUM_GENE_ROWS) * self.EACH_GENE_HEIGHT * zoom if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": geneYLocation += 4 * self.BAND_HEIGHT + 4 * self.BAND_SPACING else: @@ -1361,7 +1432,8 @@ class DisplayMappingResults: # draw the line that runs the entire length of the gene im_drawer.line( xy=( - (geneStartPix, geneYLocation + self.EACH_GENE_HEIGHT / 2 * zoom), + (geneStartPix, geneYLocation + \ + self.EACH_GENE_HEIGHT / 2 * zoom), (geneEndPix, geneYLocation + self.EACH_GENE_HEIGHT / 2 *zoom)), fill=outlineColor, width=1) @@ -1401,8 +1473,10 @@ class DisplayMappingResults: # draw the blocks for the exon regions for i in range(0, len(exonStarts)): - exonStartPix = (exonStarts[i] - startMb) * plotXScale + xLeftOffset - exonEndPix = (exonEnds[i] - startMb) * plotXScale + xLeftOffset + exonStartPix = ( + exonStarts[i] - startMb) * plotXScale + xLeftOffset + exonEndPix = (exonEnds[i] - startMb) * \ + plotXScale + xLeftOffset if (exonStartPix < xLeftOffset): exonStartPix = xLeftOffset if (exonEndPix < xLeftOffset): @@ -1418,7 +1492,8 @@ class DisplayMappingResults: # draw gray blocks for 3' and 5' UTR blocks if cdsStart and cdsEnd: - utrStartPix = (txStart - startMb) * plotXScale + xLeftOffset + utrStartPix = (txStart - startMb) * \ + plotXScale + xLeftOffset utrEndPix = (cdsStart - startMb) * plotXScale + xLeftOffset if (utrStartPix < xLeftOffset): utrStartPix = xLeftOffset @@ -1436,7 +1511,8 @@ class DisplayMappingResults: labelText = "5'" im_drawer.text( text=labelText, - xy=(utrStartPix - 9, geneYLocation + self.EACH_GENE_HEIGHT), + xy=(utrStartPix - 9, geneYLocation + \ + self.EACH_GENE_HEIGHT), font=ImageFont.truetype(font=ARIAL_FILE, size=2)) # the second UTR region @@ -1459,7 +1535,8 @@ class DisplayMappingResults: labelText = "3'" im_drawer.text( text=labelText, - xy=(utrEndPix + 2, geneYLocation + self.EACH_GENE_HEIGHT), + xy=(utrEndPix + 2, geneYLocation + \ + self.EACH_GENE_HEIGHT), font=ImageFont.truetype(font=ARIAL_FILE, size=2)) # draw the genes as rectangles @@ -1469,7 +1546,8 @@ class DisplayMappingResults: (geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT * zoom))), outline=outlineColor, fill=fillColor) - COORDS = "%d, %d, %d, %d" % (geneStartPix, geneYLocation, geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT)) + COORDS = "%d, %d, %d, %d" % ( + geneStartPix, geneYLocation, geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT)) # NL: 06-02-2011 Rob required to display NCBI info in a new window gifmap.append( HtmlGenWrapper.create_area_tag( @@ -1496,7 +1574,8 @@ class DisplayMappingResults: smd = [] for sample in self.sample_vals_dict.keys(): if self.sample_vals_dict[sample] != "x" and sample in samplelist: - temp = GeneralObject(name=sample, value=float(self.sample_vals_dict[sample])) + temp = GeneralObject(name=sample, value=float( + self.sample_vals_dict[sample])) smd.append(temp) else: continue @@ -1517,8 +1596,10 @@ class DisplayMappingResults: txStart = _chr[i].Mb txEnd = _chr[i].Mb - geneStartPix = xLeftOffset + plotXScale * (float(txStart) - startMb) - 0 - geneEndPix = xLeftOffset + plotXScale * (float(txEnd) - startMb) - 0 + geneStartPix = xLeftOffset + plotXScale * \ + (float(txStart) - startMb) - 0 + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) - 0 drawit = 1 if (geneStartPix < xLeftOffset): @@ -1546,8 +1627,10 @@ class DisplayMappingResults: txStart = _chr[j].Mb txEnd = _chr[j].Mb - geneStartPix = xLeftOffset + plotXScale * (float(txStart) - startMb) - 0 - geneEndPix = xLeftOffset + plotXScale * (float(txEnd) - startMb) + 0 + geneStartPix = xLeftOffset + plotXScale * \ + (float(txStart) - startMb) - 0 + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) + 0 if oldgeneEndPix >= xLeftOffset: drawStart = oldgeneEndPix + 4 @@ -1585,7 +1668,8 @@ class DisplayMappingResults: # Draw Genes - geneYLocation = yPaddingTop + self.NUM_GENE_ROWS * (self.EACH_GENE_HEIGHT) * zoom + geneYLocation = yPaddingTop + self.NUM_GENE_ROWS * \ + (self.EACH_GENE_HEIGHT) * zoom if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": geneYLocation += 4 * self.BAND_HEIGHT + 4 * self.BAND_SPACING else: @@ -1606,7 +1690,8 @@ class DisplayMappingResults: if (plotbxd == 1): ind = 0 if samplelist[k] in [item.name for item in smd]: - ind = [item.name for item in smd].index(samplelist[k]) + ind = [item.name for item in smd].index( + samplelist[k]) maxind = max(ind, maxind) @@ -1637,8 +1722,10 @@ class DisplayMappingResults: geneYLocation + 2 *ind*self.EACH_GENE_HEIGHT + 2*self.EACH_GENE_HEIGHT*zoom)), outline=outlineColor, fill=fillColor) - COORDS = "%d, %d, %d, %d" % (geneStartPix, geneYLocation + ind * self.EACH_GENE_HEIGHT, geneEndPix + 1, (geneYLocation + ind * self.EACH_GENE_HEIGHT)) - TITLE = "Strain: %s, marker (%s) \n Position %2.3f Mb." % (samplelist[k], _chr[j].name, float(txStart)) + COORDS = "%d, %d, %d, %d" % ( + geneStartPix, geneYLocation + ind * self.EACH_GENE_HEIGHT, geneEndPix + 1, (geneYLocation + ind * self.EACH_GENE_HEIGHT)) + TITLE = "Strain: %s, marker (%s) \n Position %2.3f Mb." % ( + samplelist[k], _chr[j].name, float(txStart)) HREF = '' gifmap.append( HtmlGenWrapper.create_area_tag( @@ -1683,7 +1770,8 @@ class DisplayMappingResults: plotbxd = 1 if (plotbxd == 1): - ind = [item.name for item in smd].index(samplelist[j]) - 1 + ind = [item.name for item in smd].index( + samplelist[j]) - 1 expr = smd[ind].value # Place where font is hardcoded @@ -1691,13 +1779,15 @@ class DisplayMappingResults: text="%s" % (samplelist[j]), xy=((xLeftOffset + plotWidth + 10), geneYLocation + 11 + 2*ind*self.EACH_GENE_HEIGHT*zoom), - font=ImageFont.truetype(font=VERDANA_FILE, size=12), + font=ImageFont.truetype( + font=VERDANA_FILE, size=12), fill=BLACK) im_drawer.text( text="%2.2f" % (expr), xy=((xLeftOffset + plotWidth + 60), geneYLocation + 11 + 2*ind*self.EACH_GENE_HEIGHT*zoom), - font=ImageFont.truetype(font=VERDANA_FILE, size=12), + font=ImageFont.truetype( + font=VERDANA_FILE, size=12), fill=BLACK) # END HaplotypeAnalyst @@ -1719,12 +1809,16 @@ class DisplayMappingResults: # but it makes the HTML huge, and takes forever to render the page in the first place) # Draw the bands that you can click on to go to UCSC / Ensembl MAX_CLICKABLE_REGION_DIVISIONS = 100 - clickableRegionLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=9) - pixelStep = max(5, int(float(plotWidth) / MAX_CLICKABLE_REGION_DIVISIONS)) + clickableRegionLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=9) + pixelStep = max( + 5, int(float(plotWidth) / MAX_CLICKABLE_REGION_DIVISIONS)) # pixelStep: every N pixels, we make a new clickable area for the user to go to that area of the genome. - numBasesCurrentlyOnScreen = self.kONE_MILLION * abs(startMb - endMb) # Number of bases on screen now - flankingWidthInBases = int (min((float(numBasesCurrentlyOnScreen) / 2.0), (5*self.kONE_MILLION))) + numBasesCurrentlyOnScreen = self.kONE_MILLION * \ + abs(startMb - endMb) # Number of bases on screen now + flankingWidthInBases = int ( + min((float(numBasesCurrentlyOnScreen) / 2.0), (5*self.kONE_MILLION))) webqtlZoomWidth = numBasesCurrentlyOnScreen / 16.0 # Flanking width should be such that we either zoom in to a 10 million base region, or we show the clicked region at the same scale as we are currently seeing. @@ -1733,23 +1827,33 @@ class DisplayMappingResults: paddingTop = yTopOffset if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - phenogenPaddingTop = paddingTop + (self.BAND_HEIGHT + self.BAND_SPACING) - ucscPaddingTop = paddingTop + 2 * (self.BAND_HEIGHT + self.BAND_SPACING) - ensemblPaddingTop = paddingTop + 3 * (self.BAND_HEIGHT + self.BAND_SPACING) + phenogenPaddingTop = paddingTop + \ + (self.BAND_HEIGHT + self.BAND_SPACING) + ucscPaddingTop = paddingTop + 2 * \ + (self.BAND_HEIGHT + self.BAND_SPACING) + ensemblPaddingTop = paddingTop + 3 * \ + (self.BAND_HEIGHT + self.BAND_SPACING) else: - ucscPaddingTop = paddingTop + (self.BAND_HEIGHT + self.BAND_SPACING) - ensemblPaddingTop = paddingTop + 2 * (self.BAND_HEIGHT + self.BAND_SPACING) + ucscPaddingTop = paddingTop + \ + (self.BAND_HEIGHT + self.BAND_SPACING) + ensemblPaddingTop = paddingTop + 2 * \ + (self.BAND_HEIGHT + self.BAND_SPACING) if zoom == 1: for pixel in range(xLeftOffset, xLeftOffset + plotWidth, pixelStep): - calBase = self.kONE_MILLION * (startMb + (endMb - startMb) * (pixel - xLeftOffset - 0.0) / plotWidth) + calBase = self.kONE_MILLION * \ + (startMb + (endMb - startMb) * \ + (pixel - xLeftOffset - 0.0) / plotWidth) xBrowse1 = pixel - xBrowse2 = min(xLeftOffset + plotWidth, (pixel + pixelStep - 1)) + xBrowse2 = min(xLeftOffset + plotWidth, + (pixel + pixelStep - 1)) - WEBQTL_COORDS = "%d, %d, %d, %d" % (xBrowse1, paddingTop, xBrowse2, (paddingTop + self.BAND_HEIGHT)) - WEBQTL_HREF = "javascript:rangeView('%s', %f, %f)" % (self.selectedChr - 1, max(0, (calBase - webqtlZoomWidth)) / 1000000.0, (calBase + webqtlZoomWidth) / 1000000.0) + WEBQTL_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, paddingTop, xBrowse2, (paddingTop + self.BAND_HEIGHT)) + WEBQTL_HREF = "javascript:rangeView('%s', %f, %f)" % (self.selectedChr - 1, max( + 0, (calBase - webqtlZoomWidth)) / 1000000.0, (calBase + webqtlZoomWidth) / 1000000.0) WEBQTL_TITLE = "Click to view this section of the genome in WebQTL" gifmap.append( @@ -1764,15 +1868,19 @@ class DisplayMappingResults: outline=self.CLICKABLE_WEBQTL_REGION_COLOR, fill=self.CLICKABLE_WEBQTL_REGION_COLOR) im_drawer.line( - xy=((xBrowse1, paddingTop), (xBrowse1, (paddingTop + self.BAND_HEIGHT))), + xy=((xBrowse1, paddingTop), (xBrowse1, + (paddingTop + self.BAND_HEIGHT))), fill=self.CLICKABLE_WEBQTL_REGION_OUTLINE_COLOR) if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - PHENOGEN_COORDS = "%d, %d, %d, %d" % (xBrowse1, phenogenPaddingTop, xBrowse2, (phenogenPaddingTop + self.BAND_HEIGHT)) + PHENOGEN_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, phenogenPaddingTop, xBrowse2, (phenogenPaddingTop + self.BAND_HEIGHT)) if self.dataset.group.species == "mouse": - PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % (self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) + PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) else: - PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % (self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) + PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) PHENOGEN_TITLE = "Click to view this section of the genome in PhenoGen" gifmap.append( HtmlGenWrapper.create_area_tag( @@ -1786,14 +1894,18 @@ class DisplayMappingResults: outline=self.CLICKABLE_PHENOGEN_REGION_COLOR, fill=self.CLICKABLE_PHENOGEN_REGION_COLOR) im_drawer.line( - xy=((xBrowse1, phenogenPaddingTop), (xBrowse1, (phenogenPaddingTop + self.BAND_HEIGHT))), + xy=((xBrowse1, phenogenPaddingTop), (xBrowse1, + (phenogenPaddingTop + self.BAND_HEIGHT))), fill=self.CLICKABLE_PHENOGEN_REGION_OUTLINE_COLOR) - UCSC_COORDS = "%d, %d, %d, %d" % (xBrowse1, ucscPaddingTop, xBrowse2, (ucscPaddingTop + self.BAND_HEIGHT)) + UCSC_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, ucscPaddingTop, xBrowse2, (ucscPaddingTop + self.BAND_HEIGHT)) if self.dataset.group.species == "mouse": - UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d&hgt.customText=%s/snp/chr%s" % (self._ucscDb, self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases, webqtlConfig.PORTADDR, self.selectedChr) + UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d&hgt.customText=%s/snp/chr%s" % ( + self._ucscDb, self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases, webqtlConfig.PORTADDR, self.selectedChr) else: - UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d" % (self._ucscDb, self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) + UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d" % ( + self._ucscDb, self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) UCSC_TITLE = "Click to view this section of the genome in the UCSC Genome Browser" gifmap.append( HtmlGenWrapper.create_area_tag( @@ -1811,11 +1923,14 @@ class DisplayMappingResults: (xBrowse1, (ucscPaddingTop + self.BAND_HEIGHT))), fill=self.CLICKABLE_UCSC_REGION_OUTLINE_COLOR) - ENSEMBL_COORDS = "%d, %d, %d, %d" % (xBrowse1, ensemblPaddingTop, xBrowse2, (ensemblPaddingTop + self.BAND_HEIGHT)) + ENSEMBL_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, ensemblPaddingTop, xBrowse2, (ensemblPaddingTop + self.BAND_HEIGHT)) if self.dataset.group.species == "mouse": - ENSEMBL_HREF = "http://www.ensembl.org/Mus_musculus/contigview?highlight=&chr=%s&vc_start=%d&vc_end=%d&x=35&y=12" % (self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) + ENSEMBL_HREF = "http://www.ensembl.org/Mus_musculus/contigview?highlight=&chr=%s&vc_start=%d&vc_end=%d&x=35&y=12" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) else: - ENSEMBL_HREF = "http://www.ensembl.org/Rattus_norvegicus/contigview?chr=%s&start=%d&end=%d" % (self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) + ENSEMBL_HREF = "http://www.ensembl.org/Rattus_norvegicus/contigview?chr=%s&start=%d&end=%d" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) ENSEMBL_TITLE = "Click to view this section of the genome in the Ensembl Genome Browser" gifmap.append(HtmlGenWrapper.create_area_tag( shape='rect', @@ -1841,19 +1956,23 @@ class DisplayMappingResults: if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": im_drawer.text( text="Click to view the corresponding section of the genome in PhenoGen", - xy=((xLeftOffset + 10), phenogenPaddingTop), # + self.BAND_HEIGHT/2), + # + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), phenogenPaddingTop), font=clickableRegionLabelFont, fill=self.CLICKABLE_PHENOGEN_TEXT_COLOR) im_drawer.text( text="Click to view the corresponding section of the genome in the UCSC Genome Browser", - xy=((xLeftOffset + 10), ucscPaddingTop), # + self.BAND_HEIGHT/2), + # + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), ucscPaddingTop), font=clickableRegionLabelFont, fill=self.CLICKABLE_UCSC_TEXT_COLOR) im_drawer.text( text="Click to view the corresponding section of the genome in the Ensembl Genome Browser", - xy=((xLeftOffset + 10), ensemblPaddingTop), # + self.BAND_HEIGHT/2), + # + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), ensemblPaddingTop), font=clickableRegionLabelFont, fill=self.CLICKABLE_ENSEMBL_TEXT_COLOR) # draw the gray text - chrFont = ImageFont.truetype(font=VERDANA_BOLD_FILE, size=26 * zoom) + chrFont = ImageFont.truetype( + font=VERDANA_BOLD_FILE, size=26 * zoom) chrX = xLeftOffset + plotWidth - 2 - im_drawer.textsize( "Chr %s" % self.ChrList[self.selectedChr][0], font=chrFont)[0] im_drawer.text( @@ -1922,8 +2041,10 @@ class DisplayMappingResults: (Xc, yZero + xMajorTickHeight)), fill=xAxisTickMarkColor, width=X_MAJOR_TICK_THICKNESS) # Draw the MAJOR tick mark - labelStr = str(formatStr % _Mb) # What Mbase location to put on the label - strWidth, strHeight = im_drawer.textsize(labelStr, font=MBLabelFont) + # What Mbase location to put on the label + labelStr = str(formatStr % _Mb) + strWidth, strHeight = im_drawer.textsize( + labelStr, font=MBLabelFont) drawStringXc = (Xc - (strWidth / 2.0)) im_drawer.text(xy=(drawStringXc, strYLoc), text=labelStr, font=MBLabelFont, @@ -1956,9 +2077,11 @@ class DisplayMappingResults: canvas, text=str(tickdists), font=MBLabelFont, xy=(startPosX + tickdists * plotXScale, yZero + 10 * zoom), fill=BLACK, angle=270) - startPosX += (self.ChrLengthDistList[i] + self.GraphInterval) * plotXScale + startPosX += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale - megabaseLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=int(18 * zoom * 1.5)) + megabaseLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) im_drawer.text( text="Megabases", xy=( @@ -1984,7 +2107,8 @@ class DisplayMappingResults: if _locus.cM != preLpos: distinctCount += 1 preLpos = _locus.cM - thisChr.append([_locus.name, _locus.cM - Locus0CM]) + thisChr.append( + [_locus.name, _locus.cM - Locus0CM]) else: for j in (0, nLoci / 4, nLoci / 2, nLoci*3/4, -1): while _chr[j].name == ' - ': @@ -1992,7 +2116,8 @@ class DisplayMappingResults: if _chr[j].cM != preLpos: distinctCount += 1 preLpos = _chr[j].cM - thisChr.append([_chr[j].name, _chr[j].cM - Locus0CM]) + thisChr.append( + [_chr[j].name, _chr[j].cM - Locus0CM]) ChrAInfo.append(thisChr) else: for i, _chr in enumerate(self.genotype): @@ -2004,7 +2129,8 @@ class DisplayMappingResults: if _locus.cM != preLpos: distinctCount += 1 preLpos = _locus.cM - thisChr.append([_locus.name, _locus.cM - Locus0CM]) + thisChr.append( + [_locus.name, _locus.cM - Locus0CM]) ChrAInfo.append(thisChr) stepA = (plotWidth + 0.0) / distinctCount @@ -2053,7 +2179,8 @@ class DisplayMappingResults: outline=rectColor, fill=rectColor, width=0) COORDS = "%d,%d,%d,%d" % (xLeftOffset+offsetA-LRectHeight, yZero+40+Zorder*(LRectWidth+3),\ xLeftOffset + offsetA, yZero +40+Zorder*(LRectWidth+3)+LRectWidth) - HREF = "/show_trait?trait_id=%s&dataset=%s" % (Lname, self.dataset.group.name + "Geno") + HREF = "/show_trait?trait_id=%s&dataset=%s" % ( + Lname, self.dataset.group.name + "Geno") #HREF="javascript:showDatabase3('%s','%s','%s','');" % (showLocusForm,fd.RISet+"Geno", Lname) Areas = HtmlGenWrapper.create_area_tag( shape='rect', @@ -2067,9 +2194,11 @@ class DisplayMappingResults: im_drawer.line( xy=((startPosX, yZero), (startPosX, yZero + 40)), fill=lineColor) - startPosX += (self.ChrLengthDistList[j] + self.GraphInterval) * plotXScale + startPosX += (self.ChrLengthDistList[j] + \ + self.GraphInterval) * plotXScale - centimorganLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=int(18 * zoom * 1.5)) + centimorganLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) im_drawer.text( text="Centimorgans", xy=(xLeftOffset + (plotWidth - im_drawer.textsize( @@ -2100,29 +2229,39 @@ class DisplayMappingResults: # ZS: This is a mess, but I don't know a better way to account for different mapping methods returning results in different formats + the option to change between LRS and LOD if self.lrsMax <= 0: # sliding scale if "lrs_value" in self.qtlresults[0]: - LRS_LOD_Max = max([result['lrs_value'] for result in self.qtlresults]) + LRS_LOD_Max = max([result['lrs_value'] + for result in self.qtlresults]) if self.LRS_LOD == "LOD" or self.LRS_LOD == "-logP": LRS_LOD_Max = LRS_LOD_Max / self.LODFACTOR if self.permChecked and self.nperm > 0 and not self.multipleInterval: - self.significant = min(self.significant / self.LODFACTOR, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive / self.LODFACTOR, webqtlConfig.MAXLRS) + self.significant = min( + self.significant / self.LODFACTOR, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive / self.LODFACTOR, webqtlConfig.MAXLRS) else: if self.permChecked and self.nperm > 0 and not self.multipleInterval: - self.significant = min(self.significant, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive, webqtlConfig.MAXLRS) + self.significant = min( + self.significant, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive, webqtlConfig.MAXLRS) else: pass else: - LRS_LOD_Max = max([result['lod_score'] for result in self.qtlresults]) + LRS_LOD_Max = max([result['lod_score'] + for result in self.qtlresults]) if self.LRS_LOD == "LRS": LRS_LOD_Max = LRS_LOD_Max * self.LODFACTOR if self.permChecked and self.nperm > 0 and not self.multipleInterval: - self.significant = min(self.significant * self.LODFACTOR, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive * self.LODFACTOR, webqtlConfig.MAXLRS) + self.significant = min( + self.significant * self.LODFACTOR, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive * self.LODFACTOR, webqtlConfig.MAXLRS) else: if self.permChecked and self.nperm > 0 and not self.multipleInterval: - self.significant = min(self.significant, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive, webqtlConfig.MAXLRS) + self.significant = min( + self.significant, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive, webqtlConfig.MAXLRS) else: pass @@ -2143,7 +2282,8 @@ class DisplayMappingResults: self.js_data = json.dumps(js_data) LRSScaleFont = ImageFont.truetype(font=VERDANA_FILE, size=16 * zoom) - LRSLODFont = ImageFont.truetype(font=VERDANA_FILE, size=int(18 * zoom * 1.5)) + LRSLODFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) yZero = yTopOffset + plotHeight # LRSHeightThresh = drawAreaHeight @@ -2188,7 +2328,8 @@ class DisplayMappingResults: # draw the "LRS" or "LOD" string to the left of the axis LRSScaleFont = ImageFont.truetype(font=VERDANA_FILE, size=16 * zoom) - LRSLODFont = ImageFont.truetype(font=VERDANA_FILE, size=int(18 * zoom * 1.5)) + LRSLODFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) yZero = yTopOffset + plotHeight # TEXT_X_DISPLACEMENT = -20 @@ -2239,7 +2380,8 @@ class DisplayMappingResults: # ZS: I don't know if what I did here with this inner function is clever or overly complicated, but it's the only way I could think of to avoid duplicating the code inside this function def add_suggestive_significant_lines_and_legend(start_pos_x, chr_length_dist): - rightEdge = int(start_pos_x + chr_length_dist * plotXScale - self.SUGGESTIVE_WIDTH / 1.5) + rightEdge = int(start_pos_x + chr_length_dist * \ + plotXScale - self.SUGGESTIVE_WIDTH / 1.5) im_drawer.line( xy=((start_pos_x + self.SUGGESTIVE_WIDTH / 1.5, suggestiveY), (rightEdge, suggestiveY)), @@ -2253,15 +2395,19 @@ class DisplayMappingResults: width=self.SIGNIFICANT_WIDTH * zoom # , clipX=(xLeftOffset, xLeftOffset + plotWidth-2) ) - sugg_coords = "%d, %d, %d, %d" % (start_pos_x, suggestiveY - 2, rightEdge + 2 * zoom, suggestiveY + 2) - sig_coords = "%d, %d, %d, %d" % (start_pos_x, significantY - 2, rightEdge + 2 * zoom, significantY + 2) + sugg_coords = "%d, %d, %d, %d" % ( + start_pos_x, suggestiveY - 2, rightEdge + 2 * zoom, suggestiveY + 2) + sig_coords = "%d, %d, %d, %d" % ( + start_pos_x, significantY - 2, rightEdge + 2 * zoom, significantY + 2) if self.LRS_LOD == 'LRS': sugg_title = "Suggestive LRS = %0.2f" % self.suggestive sig_title = "Significant LRS = %0.2f" % self.significant else: - sugg_title = "Suggestive LOD = %0.2f" % (self.suggestive / 4.61) - sig_title = "Significant LOD = %0.2f" % (self.significant / 4.61) + sugg_title = "Suggestive LOD = %0.2f" % ( + self.suggestive / 4.61) + sig_title = "Significant LOD = %0.2f" % ( + self.significant / 4.61) Areas1 = HtmlGenWrapper.create_area_tag( shape='rect', coords=sugg_coords, @@ -2273,24 +2419,28 @@ class DisplayMappingResults: gifmap.append(Areas1) gifmap.append(Areas2) - start_pos_x += (chr_length_dist + self.GraphInterval) * plotXScale + start_pos_x += (chr_length_dist + \ + self.GraphInterval) * plotXScale return start_pos_x for i, _chr in enumerate(self.genotype): if self.selectedChr != -1: if _chr.name == self.ChrList[self.selectedChr][0]: - startPosX = add_suggestive_significant_lines_and_legend(startPosX, self.ChrLengthDistList[0]) + startPosX = add_suggestive_significant_lines_and_legend( + startPosX, self.ChrLengthDistList[0]) break else: continue else: - startPosX = add_suggestive_significant_lines_and_legend(startPosX, self.ChrLengthDistList[i]) + startPosX = add_suggestive_significant_lines_and_legend( + startPosX, self.ChrLengthDistList[i]) if self.multipleInterval: lrsEdgeWidth = 1 else: if self.additiveChecked: - additiveMax = max([abs(X['additive']) for X in self.qtlresults]) + additiveMax = max([abs(X['additive']) + for X in self.qtlresults]) lrsEdgeWidth = 3 if zoom == 2: @@ -2300,7 +2450,8 @@ class DisplayMappingResults: AdditiveCoordXY = [] DominanceCoordXY = [] - symbolFont = ImageFont.truetype(font=FNT_BS_FILE, size=5) # ZS: For Manhattan Plot + symbolFont = ImageFont.truetype( + font=FNT_BS_FILE, size=5) # ZS: For Manhattan Plot previous_chr = 1 previous_chr_as_int = 0 @@ -2332,7 +2483,8 @@ class DisplayMappingResults: if Xc == Xc0: # genotype , locus distance is 0 Xcm = Xc else: - Xcm = (yZero - Yc0) / ((Yc - Yc0) / (Xc - Xc0)) + Xc0 + Xcm = (yZero - Yc0) / \ + ((Yc - Yc0) / (Xc - Xc0)) + Xc0 if Yc0 < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xcm, yZero)), @@ -2340,7 +2492,8 @@ class DisplayMappingResults: # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( - xy=((Xcm, yZero), (Xc, yZero - (Yc - yZero))), + xy=((Xcm, yZero), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) @@ -2391,7 +2544,8 @@ class DisplayMappingResults: AdditiveCoordXY = [] previous_chr = qtlresult['chr'] previous_chr_as_int += 1 - newStartPosX = (self.ChrLengthDistList[previous_chr_as_int - 1] + self.GraphInterval) * plotXScale + newStartPosX = ( + self.ChrLengthDistList[previous_chr_as_int - 1] + self.GraphInterval) * plotXScale if newStartPosX != oldStartPosX: startPosX += newStartPosX oldStartPosX = newStartPosX @@ -2408,10 +2562,12 @@ class DisplayMappingResults: if self.genotype.filler: if self.selectedChr != -1: start_cm = self.genotype[self.selectedChr - 1][0].cM - Xc = startPosX + (qtlresult['Mb'] - start_cm) * plotXScale + Xc = startPosX + \ + (qtlresult['Mb'] - start_cm) * plotXScale else: start_cm = self.genotype[previous_chr_as_int][0].cM - Xc = startPosX + ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) * (((qtlresult['Mb'] - start_cm - startMb) * plotXScale) / ((qtlresult['Mb'] - start_cm - startMb + self.GraphInterval) * plotXScale)) + Xc = startPosX + ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) * ( + ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) / ((qtlresult['Mb'] - start_cm - startMb + self.GraphInterval) * plotXScale)) else: Xc = startPosX + (qtlresult['Mb'] - startMb) * plotXScale @@ -2425,17 +2581,23 @@ class DisplayMappingResults: if self.LRS_LOD == "LOD" or self.LRS_LOD == "-logP": if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value'] == 'inf': #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) - Yc = yZero - webqtlConfig.MAXLRS * LRSHeightThresh / (LRS_LOD_Max * self.LODFACTOR) + Yc = yZero - webqtlConfig.MAXLRS * \ + LRSHeightThresh / \ + (LRS_LOD_Max * self.LODFACTOR) else: #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) - Yc = yZero - qtlresult['lrs_value'] * LRSHeightThresh / (LRS_LOD_Max * self.LODFACTOR) + Yc = yZero - \ + qtlresult['lrs_value'] * LRSHeightThresh / \ + (LRS_LOD_Max * self.LODFACTOR) else: if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value'] == 'inf': #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] Yc = yZero - webqtlConfig.MAXLRS * LRSHeightThresh / LRS_LOD_Max else: #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - qtlresult['lrs_value'] * LRSHeightThresh / LRS_LOD_Max + Yc = yZero - \ + qtlresult['lrs_value'] * \ + LRSHeightThresh / LRS_LOD_Max else: if qtlresult['lod_score'] > 100 or qtlresult['lod_score'] == 'inf': #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] @@ -2443,10 +2605,14 @@ class DisplayMappingResults: else: if self.LRS_LOD == "LRS": #Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - qtlresult['lod_score'] * self.LODFACTOR * LRSHeightThresh / LRS_LOD_Max + Yc = yZero - \ + qtlresult['lod_score'] * self.LODFACTOR * \ + LRSHeightThresh / LRS_LOD_Max else: #Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - qtlresult['lod_score'] * LRSHeightThresh / LRS_LOD_Max + Yc = yZero - \ + qtlresult['lod_score'] * \ + LRSHeightThresh / LRS_LOD_Max if self.manhattan_plot == True: if self.color_scheme == "single": @@ -2462,7 +2628,8 @@ class DisplayMappingResults: im_drawer.text( text="5", xy=( - Xc - im_drawer.textsize("5", font=symbolFont)[0] / 2 + 1, + Xc - im_drawer.textsize("5", + font=symbolFont)[0] / 2 + 1, Yc - 4), fill=point_color, font=symbolFont) else: @@ -2471,7 +2638,8 @@ class DisplayMappingResults: if not self.multipleInterval and self.additiveChecked: if additiveMax == 0.0: additiveMax = 0.000001 - Yc = yZero - qtlresult['additive'] * AdditiveHeightThresh / additiveMax + Yc = yZero - qtlresult['additive'] * \ + AdditiveHeightThresh / additiveMax AdditiveCoordXY.append((Xc, Yc)) m += 1 @@ -2496,7 +2664,8 @@ class DisplayMappingResults: if Xc == Xc0: # genotype , locus distance is 0 Xcm = Xc else: - Xcm = (yZero - Yc0) / ((Yc - Yc0) / (Xc - Xc0)) + Xc0 + Xcm = (yZero - Yc0) / \ + ((Yc - Yc0) / (Xc - Xc0)) + Xc0 if Yc0 < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xcm, yZero)), @@ -2561,7 +2730,8 @@ class DisplayMappingResults: if Xc == Xc0: # genotype , locus distance is 0 Xcm = Xc else: - Xcm = (yZero - Yc0) / ((Yc - Yc0) / (Xc - Xc0)) + Xc0 + Xcm = (yZero - Yc0) / \ + ((Yc - Yc0) / (Xc - Xc0)) + Xc0 if Yc0 < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xcm, yZero)), @@ -2616,9 +2786,11 @@ class DisplayMappingResults: # draw additive scale if not self.multipleInterval and self.additiveChecked: - additiveScaleFont = ImageFont.truetype(font=VERDANA_FILE, size=16 * zoom) + additiveScaleFont = ImageFont.truetype( + font=VERDANA_FILE, size=16 * zoom) additiveScale = Plot.detScaleOld(0, additiveMax) - additiveStep = (additiveScale[1] - additiveScale[0]) / additiveScale[2] + additiveStep = (additiveScale[1] - \ + additiveScale[0]) / additiveScale[2] additiveAxisList = Plot.frange(0, additiveScale[1], additiveStep) addPlotScale = AdditiveHeightThresh / additiveMax TEXT_Y_DISPLACEMENT = -8 @@ -2633,7 +2805,8 @@ class DisplayMappingResults: scaleStr = "%2.3f" % item im_drawer.text( text=scaleStr, - xy= (xLeftOffset + plotWidth + 6, additiveY + TEXT_Y_DISPLACEMENT), + xy= (xLeftOffset + plotWidth + 6, + additiveY + TEXT_Y_DISPLACEMENT), font=additiveScaleFont, fill=self.ADDITIVE_COLOR_POSITIVE) im_drawer.line( @@ -2690,14 +2863,16 @@ class DisplayMappingResults: plotXScale = plotWidth / drawRegionDistance else: # multiple chromosome view - plotXScale = plotWidth / ((len(self.genotype) - 1) * self.GraphInterval + drawRegionDistance) + plotXScale = plotWidth / \ + ((len(self.genotype) - 1) * self.GraphInterval + drawRegionDistance) startPosX = xLeftOffset if fontZoom == 1.5: chrFontZoom = 2 else: chrFontZoom = 1 - chrLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=24 * chrFontZoom) + chrLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=24 * chrFontZoom) for i, _chr in enumerate(self.genotype): if (i % 2 == 0): @@ -2712,14 +2887,18 @@ class DisplayMappingResults: outline=GAINSBORO, fill=theBackColor) - chrNameWidth, chrNameHeight = im_drawer.textsize(_chr.name, font=chrLabelFont) - chrStartPix = startPosX + (self.ChrLengthDistList[i] * plotXScale - chrNameWidth) / 2 - chrEndPix = startPosX + (self.ChrLengthDistList[i] * plotXScale + chrNameWidth) / 2 + chrNameWidth, chrNameHeight = im_drawer.textsize( + _chr.name, font=chrLabelFont) + chrStartPix = startPosX + \ + (self.ChrLengthDistList[i] * plotXScale - chrNameWidth) / 2 + chrEndPix = startPosX + \ + (self.ChrLengthDistList[i] * plotXScale + chrNameWidth) / 2 TEXT_Y_DISPLACEMENT = 0 im_drawer.text(xy=(chrStartPix, yTopOffset + TEXT_Y_DISPLACEMENT), text=_chr.name, font=chrLabelFont, fill=BLACK) - COORDS = "%d,%d,%d,%d" % (chrStartPix, yTopOffset, chrEndPix, yTopOffset + 20) + COORDS = "%d,%d,%d,%d" % ( + chrStartPix, yTopOffset, chrEndPix, yTopOffset + 20) # add by NL 09-03-2010 HREF = "javascript:chrView(%d,%s);" % (i, self.ChrLengthMbList) @@ -2729,7 +2908,8 @@ class DisplayMappingResults: coords=COORDS, href=HREF) gifmap.append(Areas) - startPosX += (self.ChrLengthDistList[i] + self.GraphInterval) * plotXScale + startPosX += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale return plotXScale @@ -2746,7 +2926,8 @@ class DisplayMappingResults: perm_output = self.perm_output filename = webqtlUtil.genRandStr("Reg_") - Plot.plotBar(myCanvas, perm_output, XLabel=self.LRS_LOD, YLabel='Frequency', title=' Histogram of Permutation Test') + Plot.plotBar(myCanvas, perm_output, XLabel=self.LRS_LOD, + YLabel='Frequency', title=' Histogram of Permutation Test') myCanvas.save("{}.gif".format(GENERATED_IMAGE_DIR + filename), format='gif') @@ -2825,7 +3006,8 @@ class DisplayMappingResults: txEnd = theGO["TxEnd"] theGO["snpDensity"] = theGO["snpCount"] / geneLength if self.ALEX_DEBUG_BOOL_PRINT_GENE_LIST: - geneIdString = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s' % theGO["GeneID"] + geneIdString = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s' % theGO[ + "GeneID"] if theGO["snpCount"]: snpString = HT.Link( @@ -2842,7 +3024,9 @@ class DisplayMappingResults: else: snpString = 0 - mouseStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Mouse&db=mm9&position=chr" + theGO["Chromosome"] + "%3A" + str(int(theGO["TxStart"] * 1000000.0)) + "-" + str(int(theGO["TxEnd"] * 1000000.0)) + "&pix=620&Submit=submit" + mouseStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Mouse&db=mm9&position=chr" + \ + theGO["Chromosome"] + "%3A" + str(int(theGO["TxStart"] * 1000000.0)) + "-" + str( + int(theGO["TxEnd"] * 1000000.0)) + "&pix=620&Submit=submit" # the chromosomes for human 1 are 1qXX.XX if theGO['humanGene']: @@ -2854,7 +3038,8 @@ class DisplayMappingResults: humanChr = theGO['humanGene']["Chromosome"] humanTxStart = theGO['humanGene']["TxStart"] - humanStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Human&db=hg17&position=chr%s:%d-%d" % (humanChr, int(1000000 * theGO['humanGene']["TxStart"]), int(1000000 * theGO['humanGene']["TxEnd"])) + humanStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Human&db=hg17&position=chr%s:%d-%d" % ( + humanChr, int(1000000 * theGO['humanGene']["TxStart"]), int(1000000 * theGO['humanGene']["TxEnd"])) else: humanStartString = humanChr = humanStartDisplay = "--" @@ -2879,7 +3064,8 @@ class DisplayMappingResults: else: chr_as_int = int(theGO["Chromosome"]) - 1 if refGene: - literatureCorrelationString = str(self.getLiteratureCorrelation(self.cursor, refGene, theGO['GeneID']) or "N/A") + literatureCorrelationString = str(self.getLiteratureCorrelation( + self.cursor, refGene, theGO['GeneID']) or "N/A") this_row = [selectCheck.__str__(), str(tableIterationsCnt), @@ -2947,7 +3133,8 @@ class DisplayMappingResults: if theGO["GeneID"] != "": geneSymbolNCBI = str(HtmlGenWrapper.create_link_tag( - "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids={}".format(theGO["GeneID"]), + "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids={}".format( + theGO["GeneID"]), theGO["GeneSymbol"], Class="normalsize", target="_blank")) @@ -2960,7 +3147,8 @@ class DisplayMappingResults: chr_as_int = int(theGO["Chromosome"]) - 1 geneLength = (float(theGO["TxEnd"]) - float(theGO["TxStart"])) - geneLengthURL = "javascript:rangeView('%s', %f, %f)" % (theGO["Chromosome"], float(theGO["TxStart"]) - (geneLength * 0.1), float(theGO["TxEnd"]) + (geneLength * 0.1)) + geneLengthURL = "javascript:rangeView('%s', %f, %f)" % (theGO["Chromosome"], float( + theGO["TxStart"]) - (geneLength * 0.1), float(theGO["TxEnd"]) + (geneLength * 0.1)) avgExprVal = [] if avgExprVal != "" and avgExprVal: diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py index 68689104..289f1d5c 100644 --- a/wqflask/wqflask/marker_regression/gemma_mapping.py +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -149,7 +149,8 @@ def gen_covariates_file(this_dataset, covariates, samples): dataset_name = covariate.split(":")[1] if dataset_name == "Temp": temp_group = trait_name.split("_")[2] - dataset_ob = create_dataset(dataset_name="Temp", dataset_type="Temp", group_name = temp_group) + dataset_ob = create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name = temp_group) else: dataset_ob = create_dataset(covariate.split(":")[1]) trait_ob = create_trait(dataset=dataset_ob, diff --git a/wqflask/wqflask/marker_regression/plink_mapping.py b/wqflask/wqflask/marker_regression/plink_mapping.py index 22a50bb8..2fa80841 100644 --- a/wqflask/wqflask/marker_regression/plink_mapping.py +++ b/wqflask/wqflask/marker_regression/plink_mapping.py @@ -10,7 +10,8 @@ logger = utility.logger.getLogger(__name__) def run_plink(this_trait, dataset, species, vals, maf): - plink_output_filename = webqtlUtil.genRandStr(f"{dataset.group.name}_{this_trait.name}_") + plink_output_filename = webqtlUtil.genRandStr( + f"{dataset.group.name}_{this_trait.name}_") gen_pheno_txt_file(dataset, vals) plink_command = f"{PLINK_COMMAND} --noweb --bfile {flat_files('mapping')}/{dataset.group.name} --no-pheno --no-fid --no-parents --no-sex --maf {maf} --out { TMPDIR}{plink_output_filename} --assoc " @@ -41,7 +42,8 @@ def gen_pheno_txt_file(this_dataset, vals): this_val = -9 else: this_val = vals[i] - outfile.write("0 " + line[1] + " " + line[2] + " " + line[3] + " " + line[4] + " " + str(this_val) + "\n") + outfile.write("0 " + line[1] + " " + line[2] + " " + \ + line[3] + " " + line[4] + " " + str(this_val) + "\n") def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename=''): @@ -162,7 +164,8 @@ def parse_plink_output(output_filename, species): def build_line_list(line=""): - line_list = line.strip().split(' ') # irregular number of whitespaces between columns + # irregular number of whitespaces between columns + line_list = line.strip().split(' ') line_list = [item for item in line_list if item != ''] line_list = [item.strip() for item in line_list] diff --git a/wqflask/wqflask/marker_regression/qtlreaper_mapping.py b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py index b8fe2c37..f932498f 100644 --- a/wqflask/wqflask/marker_regression/qtlreaper_mapping.py +++ b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py @@ -26,7 +26,8 @@ def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boo gen_pheno_txt_file(samples, vals, trait_filename) output_filename = (f"{this_dataset.group.name}_GWA_" + - ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) ) bootstrap_filename = None permu_filename = None @@ -34,19 +35,22 @@ def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boo opt_list = [] if boot_check and num_bootstrap > 0: bootstrap_filename = (f"{this_dataset.group.name}_BOOTSTRAP_" + - ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) ) opt_list.append("-b") opt_list.append(f"--n_bootstrap {str(num_bootstrap)}") - opt_list.append(f"--bootstrap_output {webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt") + opt_list.append( + f"--bootstrap_output {webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt") if num_perm > 0: permu_filename = ("{this_dataset.group.name}_PERM_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) ) opt_list.append("-n " + str(num_perm)) - opt_list.append("--permu_output " + webqtlConfig.GENERATED_IMAGE_DIR + permu_filename + ".txt") + opt_list.append( + "--permu_output " + webqtlConfig.GENERATED_IMAGE_DIR + permu_filename + ".txt") if control_marker != "" and do_control == "true": opt_list.append("-c " + control_marker) if manhattan_plot != True: @@ -58,7 +62,8 @@ def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boo genofile_name, TEMPDIR, trait_filename, - " ".join(opt_list), + " ".join( + opt_list), webqtlConfig.GENERATED_IMAGE_DIR, output_filename)) @@ -67,7 +72,8 @@ def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boo else: output_filename, permu_filename, bootstrap_filename = output_files - marker_obs, permu_vals, bootstrap_vals = parse_reaper_output(output_filename, permu_filename, bootstrap_filename) + marker_obs, permu_vals, bootstrap_vals = parse_reaper_output( + output_filename, permu_filename, bootstrap_filename) suggestive = 0 significant = 0 @@ -193,7 +199,8 @@ def run_original_reaper(this_trait, dataset, samples_before, trait_vals, json_da suggestive = 0 significant = 0 else: - perm_output = genotype.permutation(strains=trimmed_samples, trait=trimmed_values, nperm=num_perm) + perm_output = genotype.permutation( + strains=trimmed_samples, trait=trimmed_values, nperm=num_perm) suggestive = perm_output[int(num_perm * 0.37 - 1)] significant = perm_output[int(num_perm * 0.95 - 1)] # highly_significant = perm_output[int(num_perm*0.99-1)] #ZS: Currently not used, but leaving it here just in case @@ -257,7 +264,8 @@ def run_original_reaper(this_trait, dataset, samples_before, trait_vals, json_da json_data['markernames'].append(reaper_locus.name) # if self.additive: # self.json_data['additive'].append(qtl.additive) - locus = {"name": reaper_locus.name, "chr": reaper_locus.chr, "cM": reaper_locus.cM, "Mb": reaper_locus.Mb} + locus = {"name": reaper_locus.name, "chr": reaper_locus.chr, + "cM": reaper_locus.cM, "Mb": reaper_locus.Mb} qtl = {"lrs_value": qtl.lrs, "chr": converted_chr, "Mb": reaper_locus.Mb, "cM": reaper_locus.cM, "name": reaper_locus.name, "additive": qtl.additive, "dominance": qtl.dominance} qtl_results.append(qtl) @@ -270,5 +278,6 @@ def natural_sort(marker_list): Changed to return indices instead of values, though, since the same reordering needs to be applied to bootstrap results """ convert = lambda text: int(text) if text.isdigit() else text.lower() - alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', str(marker_list[key]['chr']))] + alphanum_key = lambda key: [convert(c) for c in re.split( + '([0-9]+)', str(marker_list[key]['chr']))] return sorted(list(range(len(marker_list))), key=alphanum_key) diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index 588600f5..c2b165a4 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -51,7 +51,8 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec # Get pointers to some R/qtl functions scanone = ro.r["scanone"] # Map the scanone function scantwo = ro.r["scantwo"] # Map the scantwo function - calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function + # Map the calc.genoprob function + calc_genoprob = ro.r["calc.genoprob"] crossname = dataset.group.name # try: @@ -67,14 +68,16 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec scale_units = "Mb" generate_cross_from_geno(dataset, scale_units) - GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function + # Map the local GENOtoCSVR function + GENOtoCSVR = ro.r["GENOtoCSVR"] crossfilelocation = TMPDIR + crossname + ".cross" if dataset.group.genofile: genofilelocation = locate(dataset.group.genofile, "genotype") else: genofilelocation = locate(dataset.group.name + ".geno", "genotype") logger.info("Going to create a cross from geno"); - cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available + # TODO: Add the SEX if that is available + cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) logger.info("before calc_genoprob"); if manhattan_plot: cross_object = calc_genoprob(cross_object) @@ -85,14 +88,19 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec logger.info("phenostring done"); names_string = sanitize_rqtl_names(samples) logger.info("sanitized pheno and names"); - cross_object = add_phenotype(cross_object, pheno_string, "the_pheno") # Add the phenotype - cross_object = add_names(cross_object, names_string, "the_names") # Add the phenotype + # Add the phenotype + cross_object = add_phenotype(cross_object, pheno_string, "the_pheno") + # Add the phenotype + cross_object = add_names(cross_object, names_string, "the_names") logger.info("Added pheno and names"); - marker_covars = create_marker_covariates(control_marker, cross_object) # Create the additive covariate markers + # Create the additive covariate markers + marker_covars = create_marker_covariates(control_marker, cross_object) logger.info("Marker covars done"); if cofactors != "": logger.info("Cofactors: " + cofactors); - cross_object, trait_covars = add_cofactors(cross_object, dataset, cofactors, samples) # Create the covariates from selected traits + # Create the covariates from selected traits + cross_object, trait_covars = add_cofactors( + cross_object, dataset, cofactors, samples) ro.r('all_covars <- cbind(marker_covars, trait_covars)') else: ro.r('all_covars <- marker_covars') @@ -100,9 +108,11 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec # DEBUG to save the session object to file if pair_scan: if do_control == "true": - logger.info("Using covariate"); result_data_frame = scantwo(cross_object, pheno="the_pheno", addcovar=covars, model=model, method=method, n_cluster = 16) + logger.info("Using covariate"); result_data_frame = scantwo( + cross_object, pheno="the_pheno", addcovar=covars, model=model, method=method, n_cluster = 16) else: - logger.info("No covariates"); result_data_frame = scantwo(cross_object, pheno="the_pheno", model=model, method=method, n_cluster=16) + logger.info("No covariates"); result_data_frame = scantwo( + cross_object, pheno="the_pheno", model=model, method=method, n_cluster=16) pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png" png(file=TEMPDIR + pair_scan_filename) @@ -112,25 +122,36 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec return process_pair_scan_results(result_data_frame) else: if do_control == "true" or cofactors != "": - logger.info("Using covariate"); result_data_frame = scanone(cross_object, pheno="the_pheno", addcovar=covars, model=model, method=method) + logger.info("Using covariate"); result_data_frame = scanone( + cross_object, pheno="the_pheno", addcovar=covars, model=model, method=method) ro.r('save.image(file = "/home/zas1024/gn2-zach/itp_cofactor_test.RData")') else: - logger.info("No covariates"); result_data_frame = scanone(cross_object, pheno="the_pheno", model=model, method=method) - - if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user) - if len(perm_strata_list) > 0: # ZS: The strata list would only be populated if "Stratified" was checked on before mapping - cross_object, strata_ob = add_perm_strata(cross_object, perm_strata_list) + logger.info("No covariates"); result_data_frame = scanone( + cross_object, pheno="the_pheno", model=model, method=method) + + # Do permutation (if requested by user) + if num_perm > 0 and permCheck == "ON": + # ZS: The strata list would only be populated if "Stratified" was checked on before mapping + if len(perm_strata_list) > 0: + cross_object, strata_ob = add_perm_strata( + cross_object, perm_strata_list) if do_control == "true" or cofactors != "": - perm_data_frame = scanone(cross_object, pheno_col="the_pheno", addcovar=covars, n_perm = int(num_perm), perm_strata = strata_ob, model=model, method=method) + perm_data_frame = scanone(cross_object, pheno_col="the_pheno", addcovar=covars, n_perm = int( + num_perm), perm_strata = strata_ob, model=model, method=method) else: - perm_data_frame = scanone(cross_object, pheno_col="the_pheno", n_perm=num_perm, perm_strata = strata_ob, model=model, method=method) + perm_data_frame = scanone( + cross_object, pheno_col="the_pheno", n_perm=num_perm, perm_strata = strata_ob, model=model, method=method) else: if do_control == "true" or cofactors != "": - perm_data_frame = scanone(cross_object, pheno_col="the_pheno", addcovar=covars, n_perm = int(num_perm), model=model, method=method) + perm_data_frame = scanone(cross_object, pheno_col="the_pheno", addcovar=covars, n_perm = int( + num_perm), model=model, method=method) else: - perm_data_frame = scanone(cross_object, pheno_col="the_pheno", n_perm=num_perm, model=model, method=method) + perm_data_frame = scanone( + cross_object, pheno_col="the_pheno", n_perm=num_perm, model=model, method=method) - perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface + # Functions that sets the thresholds for the webinterface + perm_output, suggestive, significant = process_rqtl_perm_results( + num_perm, perm_data_frame) return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species) else: return process_rqtl_results(result_data_frame, dataset.group.species) @@ -147,7 +168,8 @@ def generate_cross_from_rdata(dataset): """ % (rdata_location)) -def generate_cross_from_geno(dataset, scale_units): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly +# TODO: Need to figure out why some genofiles have the wrong format and don't convert properly +def generate_cross_from_geno(dataset, scale_units): ro.r(""" trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } @@ -246,7 +268,8 @@ def sanitize_rqtl_names(vals): def add_phenotype(cross, pheno_as_string, col_name): ro.globalenv["the_cross"] = cross ro.r('pheno <- data.frame(pull.pheno(the_cross))') - ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + ' = as.numeric(' + pheno_as_string + '))') + ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + \ + ' = as.numeric(' + pheno_as_string + '))') return ro.r["the_cross"] @@ -270,7 +293,8 @@ def add_categorical_covar(cross, covar_as_string, i): #logger.info("loop" + str(x)); col_name = "covar_" + str(i) + "_" + str(x) #logger.info("col_name" + col_name); - ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + ' = newcovar[,' + str(x) + '])') + ro.r('the_cross$pheno <- cbind(pheno, ' + \ + col_name + ' = newcovar[,' + str(x) + '])') col_names.append(col_name) #logger.info("loop" + str(x) + "done"); @@ -281,7 +305,8 @@ def add_categorical_covar(cross, covar_as_string, i): def add_names(cross, names_as_string, col_name): ro.globalenv["the_cross"] = cross ro.r('pheno <- data.frame(pull.pheno(the_cross))') - ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + ' = ' + names_as_string + ')') + ro.r('the_cross$pheno <- cbind(pheno, ' + \ + col_name + ' = ' + names_as_string + ')') return ro.r["the_cross"] @@ -330,9 +355,11 @@ def add_cofactors(cross, this_dataset, covariates, samples): logger.info("Covariate: " + covariate + " is of type: " + datatype); if(datatype == "categorical"): # Cat variable logger.info("call of add_categorical_covar"); - cross, col_names = add_categorical_covar(cross, covar_as_string, i) # Expand and add it to the cross + cross, col_names = add_categorical_covar( + cross, covar_as_string, i) # Expand and add it to the cross logger.info("add_categorical_covar returned"); - for z, col_name in enumerate(col_names): # Go through the additional covar names + # Go through the additional covar names + for z, col_name in enumerate(col_names): if i < (len(covariate_list) - 1): covar_name_string += '"' + col_name + '", ' else: @@ -355,18 +382,22 @@ def add_cofactors(cross, this_dataset, covariates, samples): def create_marker_covariates(control_marker, cross): ro.globalenv["the_cross"] = cross - ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix - userinput_sanitized = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user + # Get the genotype matrix + ro.r('genotypes <- pull.geno(the_cross)') + # TODO: sanitize user input, Never Ever trust a user + userinput_sanitized = control_marker.replace(" ", "").split(",") logger.debug(userinput_sanitized) if len(userinput_sanitized) > 0: - covariate_names = ', '.join('"{0}"'.format(w) for w in userinput_sanitized) + covariate_names = ', '.join('"{0}"'.format(w) + for w in userinput_sanitized) ro.r('covnames <- c(' + covariate_names + ')') else: ro.r('covnames <- c()') ro.r('covInGeno <- which(covnames %in% colnames(genotypes))') ro.r('covnames <- covnames[covInGeno]') ro.r("cat('covnames (purged): ', covnames,'\n')") - ro.r('marker_covars <- genotypes[,covnames]') # Get the covariate matrix by using the marker name as index to the genotype file + # Get the covariate matrix by using the marker name as index to the genotype file + ro.r('marker_covars <- genotypes[,covnames]') # TODO: Create a design matrix from the marker covars for the markers in case of an F2, 4way, etc return ro.r["marker_covars"] @@ -375,7 +406,8 @@ def process_pair_scan_results(result): pair_scan_results = [] result = result[1] - output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] + output = [tuple([result[j][i] for j in range(result.ncol)]) + for i in range(result.nrow)] for i, line in enumerate(result.iter_row()): marker = {} @@ -401,14 +433,17 @@ def process_rqtl_perm_results(num_perm, results): return perm_output, suggestive, significant -def process_rqtl_results(result, species_name): # TODO: how to make this a one liner and not copy the stuff in a loop +# TODO: how to make this a one liner and not copy the stuff in a loop +def process_rqtl_results(result, species_name): qtl_results = [] - output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] + output = [tuple([result[j][i] for j in range(result.ncol)]) + for i in range(result.nrow)] for i, line in enumerate(result.iter_row()): marker = {} marker['name'] = result.rownames[i] - if species_name == "mouse" and output[i][0] == 20: # ZS: This is awkward, but I'm not sure how to change the 20s to Xs in the RData file + # ZS: This is awkward, but I'm not sure how to change the 20s to Xs in the RData file + if species_name == "mouse" and output[i][0] == 20: marker['chr'] = "X" else: marker['chr'] = output[i][0] diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py index 31c58083..d9b28fba 100644 --- a/wqflask/wqflask/marker_regression/run_mapping.py +++ b/wqflask/wqflask/marker_regression/run_mapping.py @@ -51,7 +51,8 @@ class RunMapping: def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) - self.temp_uuid = temp_uuid # needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) + # needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) + self.temp_uuid = temp_uuid # ZS: Needed to zoom in or remap temp traits like PCA traits if "temp_trait" in start_vars and start_vars['temp_trait'] != "False": @@ -102,8 +103,11 @@ class RunMapping: if "results_path" in start_vars: self.mapping_results_path = start_vars['results_path'] else: - mapping_results_filename = self.dataset.group.name + "_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - self.mapping_results_path = "{}{}.csv".format(webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) + mapping_results_filename = self.dataset.group.name + "_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + self.mapping_results_path = "{}{}.csv".format( + webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) self.manhattan_plot = False if 'manhattan_plot' in start_vars: @@ -122,7 +126,8 @@ class RunMapping: self.use_loco = None self.suggestive = "" self.significant = "" - self.pair_scan = False # Initializing this since it is checked in views to determine which template to use + # Initializing this since it is checked in views to determine which template to use + self.pair_scan = False if 'transform' in start_vars: self.transform = start_vars['transform'] else: @@ -140,7 +145,8 @@ class RunMapping: # ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: - if int(start_vars['selected_chr']) != -1: # ZS: Needs to be -1 if showing full map; there's probably a better way to fix this + # ZS: Needs to be -1 if showing full map; there's probably a better way to fix this + if int(start_vars['selected_chr']) != -1: self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) @@ -198,28 +204,33 @@ class RunMapping: self.output_files = None if 'output_files' in start_vars: self.output_files = start_vars['output_files'] - if 'first_run' in start_vars: # ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + # ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + if 'first_run' in start_vars: self.first_run = False self.score_type = "-logP" self.manhattan_plot = True with Bench("Running GEMMA"): if self.use_loco == "True": - marker_obs, self.output_files = gemma_mapping.run_gemma(self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) + marker_obs, self.output_files = gemma_mapping.run_gemma( + self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) else: - marker_obs, self.output_files = gemma_mapping.run_gemma(self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) + marker_obs, self.output_files = gemma_mapping.run_gemma( + self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": perm_strata = [] if "perm_strata" in start_vars and "categorical_vars" in start_vars: - self.categorical_vars = start_vars["categorical_vars"].split(",") + self.categorical_vars = start_vars["categorical_vars"].split( + ",") if len(self.categorical_vars) and start_vars["perm_strata"] == "True": primary_samples = SampleList(dataset=self.dataset, sample_names=self.samples, this_trait=self.this_trait) - perm_strata = get_perm_strata(self.this_trait, primary_samples, self.categorical_vars, self.samples) + perm_strata = get_perm_strata( + self.this_trait, primary_samples, self.categorical_vars, self.samples) self.score_type = "LOD" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] @@ -231,9 +242,11 @@ class RunMapping: # if start_vars['pair_scan'] == "true": # self.pair_scan = True if self.permCheck and self.num_perm > 0: - self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno( + self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) else: - results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, + self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) elif self.mapping_method == "reaper": if "startMb" in start_vars: # ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: @@ -268,10 +281,12 @@ class RunMapping: if self.reaper_version == "new": self.first_run = True self.output_files = None - if 'first_run' in start_vars: # ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + # ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + if 'first_run' in start_vars: self.first_run = False if 'output_files' in start_vars: - self.output_files = start_vars['output_files'].split(",") + self.output_files = start_vars['output_files'].split( + ",") results, self.perm_output, self.suggestive, self.significant, self.bootstrap_results, self.output_files = qtlreaper_mapping.run_reaper(self.this_trait, self.dataset, @@ -301,7 +316,8 @@ class RunMapping: elif self.mapping_method == "plink": self.score_type = "-logP" self.manhattan_plot = True - results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) + results = plink_mapping.run_plink( + self.this_trait, self.dataset, self.species, self.vals, self.maf) #results = self.run_plink() else: logger.debug("RUNNING NOTHING") @@ -353,7 +369,9 @@ class RunMapping: chr=str(marker['chr']), rs=marker['name'], ps=this_ps, - url="/show_trait?trait_id=" + marker['name'] + "&dataset=" + self.dataset.group.name + "Geno" + url="/show_trait?trait_id=" + \ + marker['name'] + "&dataset=" + \ + self.dataset.group.name + "Geno" ) if self.geno_db_exists == "True": @@ -362,7 +380,9 @@ class RunMapping: chr=str(marker['chr']), rs=marker['name'], pos=this_ps, - url="/show_trait?trait_id=" + marker['name'] + "&dataset=" + self.dataset.group.name + "Geno" + url="/show_trait?trait_id=" + \ + marker['name'] + "&dataset=" + \ + self.dataset.group.name + "Geno" ) else: annot_marker = dict( @@ -373,7 +393,8 @@ class RunMapping: ) if 'lrs_value' in marker and marker['lrs_value'] > 0: - browser_marker['p_wald'] = 10**-(marker['lrs_value'] / 4.61) + browser_marker['p_wald'] = 10**- \ + (marker['lrs_value'] / 4.61) elif 'lod_score' in marker and marker['lod_score'] > 0: browser_marker['p_wald'] = 10**-(marker['lod_score']) else: @@ -386,9 +407,13 @@ class RunMapping: highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): if 'Mb' in marker.keys(): - marker['display_pos'] = "Chr" + str(marker['chr']) + ": " + "{:.6f}".format(marker['Mb']) + marker['display_pos'] = "Chr" + \ + str(marker['chr']) + ": " + \ + "{:.6f}".format(marker['Mb']) elif 'cM' in marker.keys(): - marker['display_pos'] = "Chr" + str(marker['chr']) + ": " + "{:.3f}".format(marker['cM']) + marker['display_pos'] = "Chr" + \ + str(marker['chr']) + ": " + \ + "{:.3f}".format(marker['cM']) else: marker['display_pos'] = "N/A" self.qtl_results.append(marker) @@ -396,12 +421,15 @@ class RunMapping: total_markers = len(self.qtl_results) with Bench("Exporting Results"): - export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type, self.transform, self.covariates, self.n_samples) + export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, + self.mapping_scale, self.score_type, self.transform, self.covariates, self.n_samples) with Bench("Trimming Markers for Figure"): if len(self.qtl_results) > 30000: - self.qtl_results = trim_markers_for_figure(self.qtl_results) - self.results_for_browser = trim_markers_for_figure(self.results_for_browser) + self.qtl_results = trim_markers_for_figure( + self.qtl_results) + self.results_for_browser = trim_markers_for_figure( + self.results_for_browser) filtered_annotations = [] for marker in self.results_for_browser: for annot_marker in self.annotations_for_browser: @@ -409,14 +437,17 @@ class RunMapping: filtered_annotations.append(annot_marker) break self.annotations_for_browser = filtered_annotations - browser_files = write_input_for_browser(self.dataset, self.results_for_browser, self.annotations_for_browser) + browser_files = write_input_for_browser( + self.dataset, self.results_for_browser, self.annotations_for_browser) else: - browser_files = write_input_for_browser(self.dataset, self.results_for_browser, self.annotations_for_browser) + browser_files = write_input_for_browser( + self.dataset, self.results_for_browser, self.annotations_for_browser) with Bench("Trimming Markers for Table"): self.trimmed_markers = trim_markers_for_table(results) - chr_lengths = get_chr_lengths(self.mapping_scale, self.mapping_method, self.dataset, self.qtl_results) + chr_lengths = get_chr_lengths( + self.mapping_scale, self.mapping_method, self.dataset, self.qtl_results) # ZS: For zooming into genome browser, need to pass chromosome name instead of number if self.dataset.group.species == "mouse": @@ -472,11 +503,14 @@ class RunMapping: def run_rqtl_plink(self): # os.chdir("") never do this inside a webserver!! - output_filename = webqtlUtil.genRandStr("%s_%s_" % (self.dataset.group.name, self.this_trait.name)) + output_filename = webqtlUtil.genRandStr("%s_%s_" % ( + self.dataset.group.name, self.this_trait.name)) - plink_mapping.gen_pheno_txt_file_plink(self.this_trait, self.dataset, self.vals, pheno_filename=output_filename) + plink_mapping.gen_pheno_txt_file_plink( + self.this_trait, self.dataset, self.vals, pheno_filename=output_filename) - rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename) + rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % ( + self.dataset.group.name, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename) os.system(rqtl_command) @@ -508,8 +542,10 @@ class RunMapping: def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, score_type, transform, covariates, n_samples): with open(results_path, "w+") as output_file: - output_file.write("Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") - output_file.write("Population: " + dataset.group.species.title() + " " + dataset.group.name + "\n") + output_file.write( + "Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") + output_file.write( + "Population: " + dataset.group.species.title() + " " + dataset.group.name + "\n") output_file.write("Data Set: " + dataset.fullname + "\n") output_file.write("N Samples: " + str(n_samples) + "\n") if len(transform) > 0: @@ -529,7 +565,8 @@ def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, output_file.write(transform_text + "\n") if dataset.type == "ProbeSet": output_file.write("Gene Symbol: " + trait.symbol + "\n") - output_file.write("Location: " + str(trait.chr) + " @ " + str(trait.mb) + " Mb\n") + output_file.write("Location: " + str(trait.chr) + \ + " @ " + str(trait.mb) + " Mb\n") if len(covariates) > 0: output_file.write("Cofactors (dataset - trait):\n") for covariate in covariates.split(","): @@ -630,9 +667,11 @@ def trim_markers_for_figure(markers): def trim_markers_for_table(markers): if 'lod_score' in list(markers[0].keys()): - sorted_markers = sorted(markers, key=lambda k: k['lod_score'], reverse=True) + sorted_markers = sorted( + markers, key=lambda k: k['lod_score'], reverse=True) else: - sorted_markers = sorted(markers, key=lambda k: k['lrs_value'], reverse=True) + sorted_markers = sorted( + markers, key=lambda k: k['lrs_value'], reverse=True) # ZS: So we end up with a list of just 2000 markers if len(sorted_markers) >= 2000: @@ -643,7 +682,9 @@ def trim_markers_for_table(markers): def write_input_for_browser(this_dataset, gwas_results, annotations): - file_base = this_dataset.group.name + "_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + file_base = this_dataset.group.name + "_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) gwas_filename = file_base + "_GWAS" annot_filename = file_base + "_ANNOT" gwas_path = "{}/gn2/".format(TEMPDIR) + gwas_filename @@ -659,7 +700,8 @@ def write_input_for_browser(this_dataset, gwas_results, annotations): def geno_db_exists(this_dataset): geno_db_name = this_dataset.group.name + "Geno" try: - geno_db = data_set.create_dataset(dataset_name=geno_db_name, get_samplelist=False) + geno_db = data_set.create_dataset( + dataset_name=geno_db_name, get_samplelist=False) return "True" except: return "False" @@ -689,9 +731,11 @@ def get_chr_lengths(mapping_scale, mapping_method, dataset, qtl_results): highest_pos = float(result['cM']) * 1000000 else: highest_pos = float(result['Mb']) * 1000000 - chr_lengths.append({"chr": str(this_chr), "size": str(highest_pos)}) + chr_lengths.append( + {"chr": str(this_chr), "size": str(highest_pos)}) else: - chr_lengths.append({"chr": str(this_chr), "size": str(highest_pos)}) + chr_lengths.append( + {"chr": str(this_chr), "size": str(highest_pos)}) this_chr = chr_as_num else: if mapping_method == "reaper": @@ -722,7 +766,8 @@ def get_perm_strata(this_trait, sample_list, categorical_vars, used_samples): combined_string = "" for var in categorical_vars: if var in list(sample_list.sample_attribute_values[sample].keys()): - combined_string += str(sample_list.sample_attribute_values[sample][var]) + combined_string += str( + sample_list.sample_attribute_values[sample][var]) else: combined_string += "NA" else: @@ -730,7 +775,8 @@ def get_perm_strata(this_trait, sample_list, categorical_vars, used_samples): perm_strata_strings.append(combined_string) - d = dict([(y, x + 1) for x, y in enumerate(sorted(set(perm_strata_strings)))]) + d = dict([(y, x + 1) + for x, y in enumerate(sorted(set(perm_strata_strings)))]) list_to_numbers = [d[x] for x in perm_strata_strings] perm_strata = list_to_numbers diff --git a/wqflask/wqflask/model.py b/wqflask/wqflask/model.py index d7c9ef95..7b9ff8fe 100644 --- a/wqflask/wqflask/model.py +++ b/wqflask/wqflask/model.py @@ -17,7 +17,8 @@ from wqflask.database import Base, init_db class User(Base): __tablename__ = "user" - id = Column(Unicode(36), primary_key=True, default=lambda: str(uuid.uuid4())) + id = Column(Unicode(36), primary_key=True, + default=lambda: str(uuid.uuid4())) email_address = Column(Unicode(50), unique=True, nullable=False) # Todo: Turn on strict mode for Mysql @@ -28,11 +29,13 @@ class User(Base): active = Column(Boolean(), nullable=False, default=True) - registration_info = Column(Text) # json detailing when they were registered, etc. + # json detailing when they were registered, etc. + registration_info = Column(Text) confirmed = Column(Text) # json detailing when they confirmed, etc. - superuser = Column(Text) # json detailing when they became a superuser, otherwise empty + # json detailing when they became a superuser, otherwise empty + superuser = Column(Text) # if not superuser logins = relationship("Login", @@ -66,7 +69,8 @@ class User(Base): def get_collection_by_name(self, collection_name): try: - collect = self.user_collections.filter_by(name=collection_name).first() + collect = self.user_collections.filter_by( + name=collection_name).first() except sqlalchemy.orm.exc.NoResultFound: collect = None return collect @@ -118,12 +122,15 @@ class User(Base): class Login(Base): __tablename__ = "login" - id = Column(Unicode(36), primary_key=True, default=lambda: str(uuid.uuid4())) + id = Column(Unicode(36), primary_key=True, + default=lambda: str(uuid.uuid4())) user = Column(Unicode(36), ForeignKey('user.id')) timestamp = Column(DateTime(), default=lambda: datetime.datetime.utcnow()) ip_address = Column(Unicode(39)) - successful = Column(Boolean(), nullable=False) # False if wrong password was entered - session_id = Column(Text) # Set only if successfully logged in, otherwise should be blank + # False if wrong password was entered + successful = Column(Boolean(), nullable=False) + # Set only if successfully logged in, otherwise should be blank + session_id = Column(Text) # Set to user who assumes identity if this was a login for debugging purposes by a superuser assumed_by = Column(Unicode(36), ForeignKey('user.id')) @@ -137,13 +144,16 @@ class Login(Base): class UserCollection(Base): __tablename__ = "user_collection" - id = Column(Unicode(36), primary_key=True, default=lambda: str(uuid.uuid4())) + id = Column(Unicode(36), primary_key=True, + default=lambda: str(uuid.uuid4())) user = Column(Unicode(36), ForeignKey('user.id')) # I'd prefer this to not have a length, but for the index below it needs one name = Column(Unicode(50)) - created_timestamp = Column(DateTime(), default=lambda: datetime.datetime.utcnow()) - changed_timestamp = Column(DateTime(), default=lambda: datetime.datetime.utcnow()) + created_timestamp = Column( + DateTime(), default=lambda: datetime.datetime.utcnow()) + changed_timestamp = Column( + DateTime(), default=lambda: datetime.datetime.utcnow()) members = Column(Text) # We're going to store them as a json list # This index ensures a user doesn't have more than one collection with the same name diff --git a/wqflask/wqflask/resource_manager.py b/wqflask/wqflask/resource_manager.py index 4591710c..36d4cd61 100644 --- a/wqflask/wqflask/resource_manager.py +++ b/wqflask/wqflask/resource_manager.py @@ -47,7 +47,8 @@ def search_for_user(): params = request.form user_list = [] user_list += get_users_like_unique_column("full_name", params['user_name']) - user_list += get_users_like_unique_column("email_address", params['user_email']) + user_list += get_users_like_unique_column( + "email_address", params['user_email']) return json.dumps(user_list) @@ -61,7 +62,8 @@ def search_for_groups(): user_list = [] user_list += get_users_like_unique_column("full_name", params['user_name']) - user_list += get_users_like_unique_column("email_address", params['user_email']) + user_list += get_users_like_unique_column( + "email_address", params['user_email']) for user in user_list: group_list += get_groups_like_unique_column("admins", user['user_id']) group_list += get_groups_like_unique_column("members", user['user_id']) @@ -124,7 +126,8 @@ def add_group_to_resource(): 'admin': request.form['admin_privilege'] } add_access_mask(resource_id, group_id, access_mask) - flash("Privileges have been added for group {}.".format(group_name), "alert-info") + flash("Privileges have been added for group {}.".format( + group_name), "alert-info") return redirect(url_for("manage_resource", resource_id=resource_id)) else: return render_template("admin/search_for_groups.html", resource_id=resource_id) diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index ed5f9bad..273a97a4 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -52,7 +52,8 @@ class SearchResultPage: search = self.search_terms self.original_search_string = self.search_terms # check for dodgy search terms - rx = re.compile(r'.*\W(href|http|sql|select|update)\W.*', re.IGNORECASE) + rx = re.compile( + r'.*\W(href|http|sql|select|update)\W.*', re.IGNORECASE) if rx.match(search): logger.info("Regex failed search") self.search_term_exists = False @@ -96,7 +97,8 @@ class SearchResultPage: trait_list = [] json_trait_list = [] - species = webqtlDatabaseFunction.retrieve_species(self.dataset.group.name) + species = webqtlDatabaseFunction.retrieve_species( + self.dataset.group.name) # result_set represents the results for each search term; a search of # "shh grin2b" would have two sets of results, one for each term logger.debug("self.results is:", pf(self.results)) @@ -109,7 +111,8 @@ class SearchResultPage: trait_dict = {} trait_id = result[0] - this_trait = create_trait(dataset=self.dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + this_trait = create_trait( + dataset=self.dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) if this_trait: trait_dict['index'] = index + 1 trait_dict['name'] = this_trait.name @@ -118,7 +121,8 @@ class SearchResultPage: else: trait_dict['display_name'] = this_trait.name trait_dict['dataset'] = this_trait.dataset.name - trait_dict['hmac'] = hmac.data_hmac('{}:{}'.format(this_trait.name, this_trait.dataset.name)) + trait_dict['hmac'] = hmac.data_hmac( + '{}:{}'.format(this_trait.name, this_trait.dataset.name)) if this_trait.dataset.type == "ProbeSet": trait_dict['symbol'] = this_trait.symbol if this_trait.symbol else "N/A" trait_dict['description'] = "N/A" @@ -168,9 +172,11 @@ class SearchResultPage: self.trait_list = trait_list if self.dataset.type == "ProbeSet": - self.header_data_names = ['index', 'display_name', 'symbol', 'description', 'location', 'mean', 'lrs_score', 'lrs_location', 'additive'] + self.header_data_names = ['index', 'display_name', 'symbol', 'description', + 'location', 'mean', 'lrs_score', 'lrs_location', 'additive'] elif self.dataset.type == "Publish": - self.header_data_names = ['index', 'display_name', 'description', 'mean', 'authors', 'pubmed_text', 'lrs_score', 'lrs_location', 'additive'] + self.header_data_names = ['index', 'display_name', 'description', 'mean', + 'authors', 'pubmed_text', 'lrs_score', 'lrs_location', 'additive'] elif self.dataset.type == "Geno": self.header_data_names = ['index', 'display_name', 'location'] @@ -184,7 +190,8 @@ class SearchResultPage: combined_from_clause = "" combined_where_clause = "" - previous_from_clauses = [] # The same table can't be referenced twice in the from clause + # The same table can't be referenced twice in the from clause + previous_from_clauses = [] logger.debug("len(search_terms)>1") symbol_list = [] @@ -198,7 +205,8 @@ class SearchResultPage: for i, a_search in enumerate(alias_terms): the_search = self.get_search_ob(a_search) if the_search != None: - get_from_clause = getattr(the_search, "get_from_clause", None) + get_from_clause = getattr( + the_search, "get_from_clause", None) if callable(get_from_clause): from_clause = the_search.get_from_clause() if from_clause in previous_from_clauses: @@ -222,7 +230,8 @@ class SearchResultPage: else: the_search = self.get_search_ob(a_search) if the_search != None: - get_from_clause = getattr(the_search, "get_from_clause", None) + get_from_clause = getattr( + the_search, "get_from_clause", None) if callable(get_from_clause): from_clause = the_search.get_from_clause() if from_clause in previous_from_clauses: @@ -241,7 +250,8 @@ class SearchResultPage: self.search_term_exists = False if self.search_term_exists: combined_where_clause = "(" + combined_where_clause + ")" - final_query = the_search.compile_final_query(combined_from_clause, combined_where_clause) + final_query = the_search.compile_final_query( + combined_from_clause, combined_where_clause) results = the_search.execute(final_query) self.results.extend(results) @@ -312,7 +322,8 @@ def get_aliases(symbol_list, species): symbols_string = ",".join(updated_symbols) filtered_aliases = [] - response = requests.get(GN2_BASE_URL + "/gn3/gene/aliases2/" + symbols_string) + response = requests.get( + GN2_BASE_URL + "/gn3/gene/aliases2/" + symbols_string) if response: alias_lists = json.loads(response.content) seen = set() diff --git a/wqflask/wqflask/show_trait/SampleList.py b/wqflask/wqflask/show_trait/SampleList.py index 496dee57..6419335e 100644 --- a/wqflask/wqflask/show_trait/SampleList.py +++ b/wqflask/wqflask/show_trait/SampleList.py @@ -74,14 +74,20 @@ class SampleList: if 'rrid' in sample.extra_attributes: if self.dataset.group.species == "mouse": if len(sample.extra_attributes['rrid'].split(":")) > 1: - the_rrid = sample.extra_attributes['rrid'].split(":")[1] - sample.extra_attributes['rrid'] = [sample.extra_attributes['rrid']] - sample.extra_attributes['rrid'].append(webqtlConfig.RRID_MOUSE_URL % the_rrid) + the_rrid = sample.extra_attributes['rrid'].split(":")[ + 1] + sample.extra_attributes['rrid'] = [ + sample.extra_attributes['rrid']] + sample.extra_attributes['rrid'].append( + webqtlConfig.RRID_MOUSE_URL % the_rrid) elif self.dataset.group.species == "rat": if len(str(sample.extra_attributes['rrid'])): - the_rrid = sample.extra_attributes['rrid'].split("_")[1] - sample.extra_attributes['rrid'] = [sample.extra_attributes['rrid']] - sample.extra_attributes['rrid'].append(webqtlConfig.RRID_RAT_URL % the_rrid) + the_rrid = sample.extra_attributes['rrid'].split("_")[ + 1] + sample.extra_attributes['rrid'] = [ + sample.extra_attributes['rrid']] + sample.extra_attributes['rrid'].append( + webqtlConfig.RRID_RAT_URL % the_rrid) self.sample_list.append(sample) @@ -130,7 +136,8 @@ class SampleList: self.attributes[key].name = name self.attributes[key].distinct_values = [ item.Value for item in values] - self.attributes[key].distinct_values = natural_sort(self.attributes[key].distinct_values) + self.attributes[key].distinct_values = natural_sort( + self.attributes[key].distinct_values) all_numbers = True for value in self.attributes[key].distinct_values: try: @@ -170,7 +177,8 @@ class SampleList: except ValueError: pass - attribute_values[self.attributes[item.Id].name.lower()] = attribute_value + attribute_values[self.attributes[item.Id].name.lower( + )] = attribute_value self.sample_attribute_values[sample_name] = attribute_values def get_first_attr_col(self): diff --git a/wqflask/wqflask/show_trait/export_trait_data.py b/wqflask/wqflask/show_trait/export_trait_data.py index f0fcd27d..81e7903b 100644 --- a/wqflask/wqflask/show_trait/export_trait_data.py +++ b/wqflask/wqflask/show_trait/export_trait_data.py @@ -40,16 +40,23 @@ def get_export_metadata(trait_id, dataset_name): metadata = [] if dataset.type == "Publish": metadata.append(["Phenotype ID: " + trait_id]) - metadata.append(["Phenotype URL: " + "http://genenetwork.org/show_trait?trait_id=" + trait_id + "&dataset=" + dataset_name]) + metadata.append(["Phenotype URL: " + "http://genenetwork.org/show_trait?trait_id=" + \ + trait_id + "&dataset=" + dataset_name]) metadata.append(["Group: " + dataset.group.name]) - metadata.append(["Phenotype: " + this_trait.description_display.replace(",", "\",\"")]) - metadata.append(["Authors: " + (this_trait.authors if this_trait.authors else "N/A")]) - metadata.append(["Title: " + (this_trait.title if this_trait.title else "N/A")]) - metadata.append(["Journal: " + (this_trait.journal if this_trait.journal else "N/A")]) - metadata.append(["Dataset Link: http://gn1.genenetwork.org/webqtl/main.py?FormID=sharinginfo&InfoPageName=" + dataset.name]) + metadata.append( + ["Phenotype: " + this_trait.description_display.replace(",", "\",\"")]) + metadata.append( + ["Authors: " + (this_trait.authors if this_trait.authors else "N/A")]) + metadata.append( + ["Title: " + (this_trait.title if this_trait.title else "N/A")]) + metadata.append( + ["Journal: " + (this_trait.journal if this_trait.journal else "N/A")]) + metadata.append( + ["Dataset Link: http://gn1.genenetwork.org/webqtl/main.py?FormID=sharinginfo&InfoPageName=" + dataset.name]) else: metadata.append(["Record ID: " + trait_id]) - metadata.append(["Trait URL: " + "http://genenetwork.org/show_trait?trait_id=" + trait_id + "&dataset=" + dataset_name]) + metadata.append(["Trait URL: " + "http://genenetwork.org/show_trait?trait_id=" + \ + trait_id + "&dataset=" + dataset_name]) if this_trait.symbol: metadata.append(["Symbol: " + this_trait.symbol]) metadata.append(["Dataset: " + dataset.name]) diff --git a/wqflask/wqflask/snp_browser/snp_browser.py b/wqflask/wqflask/snp_browser/snp_browser.py index e5c67165..7fcbe984 100644 --- a/wqflask/wqflask/snp_browser/snp_browser.py +++ b/wqflask/wqflask/snp_browser/snp_browser.py @@ -27,9 +27,11 @@ class SnpBrowser: self.table_rows = [] if self.limit_strains == "true": - self.header_fields, self.empty_field_count, self.header_data_names = get_header_list(variant_type=self.variant_type, strains=self.chosen_strains, empty_columns = self.empty_columns) + self.header_fields, self.empty_field_count, self.header_data_names = get_header_list( + variant_type=self.variant_type, strains=self.chosen_strains, empty_columns = self.empty_columns) else: - self.header_fields, self.empty_field_count, self.header_data_names = get_header_list(variant_type=self.variant_type, strains=self.strain_lists, species = self.species_name, empty_columns = self.empty_columns) + self.header_fields, self.empty_field_count, self.header_data_names = get_header_list( + variant_type=self.variant_type, strains=self.strain_lists, species = self.species_name, empty_columns = self.empty_columns) def initialize_parameters(self, start_vars): if 'first_run' in start_vars: @@ -53,10 +55,12 @@ class SnpBrowser: self.rat_chr_list = [] mouse_species_ob = species.TheSpecies(species_name="Mouse") for key in mouse_species_ob.chromosomes.chromosomes: - self.mouse_chr_list.append(mouse_species_ob.chromosomes.chromosomes[key].name) + self.mouse_chr_list.append( + mouse_species_ob.chromosomes.chromosomes[key].name) rat_species_ob = species.TheSpecies(species_name="Rat") for key in rat_species_ob.chromosomes.chromosomes: - self.rat_chr_list.append(rat_species_ob.chromosomes.chromosomes[key].name) + self.rat_chr_list.append( + rat_species_ob.chromosomes.chromosomes[key].name) if self.species_id == 1: self.this_chr_list = self.mouse_chr_list @@ -109,9 +113,11 @@ class SnpBrowser: "CAST/EiJ"] self.chosen_strains_rat = ["BN", "F344", "WLI", "WMI"] if 'chosen_strains_mouse' in start_vars: - self.chosen_strains_mouse = start_vars['chosen_strains_mouse'].split(",") + self.chosen_strains_mouse = start_vars['chosen_strains_mouse'].split( + ",") if 'chosen_strains_rat' in start_vars: - self.chosen_strains_rat = start_vars['chosen_strains_rat'].split(",") + self.chosen_strains_rat = start_vars['chosen_strains_rat'].split( + ",") if self.species_id == 1: self.chosen_strains = self.chosen_strains_mouse @@ -150,9 +156,11 @@ class SnpBrowser: if self.gene_name != "": if self.species_id != 0: - query = "SELECT geneSymbol, chromosome, txStart, txEnd FROM GeneList WHERE SpeciesId = %s AND geneSymbol = '%s'" % (self.species_id, self.gene_name) + query = "SELECT geneSymbol, chromosome, txStart, txEnd FROM GeneList WHERE SpeciesId = %s AND geneSymbol = '%s'" % ( + self.species_id, self.gene_name) else: - query = "SELECT geneSymbol, chromosome, txStart, txEnd FROM GeneList WHERE geneSymbol = '%s'" % (self.gene_name) + query = "SELECT geneSymbol, chromosome, txStart, txEnd FROM GeneList WHERE geneSymbol = '%s'" % ( + self.gene_name) result = g.db.execute(query).fetchone() if result: self.gene_name, self.chr, self.start_mb, self.end_mb = result @@ -163,9 +171,11 @@ class SnpBrowser: query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll WHERE Rs = '%s'" % self.gene_name else: if self.species_id != 0: - query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll where SpeciesId = %s AND SnpName = '%s'" % (self.species_id, self.gene_name) + query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll where SpeciesId = %s AND SnpName = '%s'" % ( + self.species_id, self.gene_name) else: - query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll where SnpName = '%s'" % (self.gene_name) + query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll where SnpName = '%s'" % ( + self.gene_name) result_snp = g.db.execute(query).fetchall() if result_snp: self.snp_list = [item[0] for item in result_snp] @@ -177,9 +187,11 @@ class SnpBrowser: elif self.variant_type == "InDel": if self.gene_name[0] == "I": if self.species_id != 0: - query = "SELECT Id, Chromosome, Mb_start, Mb_end FROM IndelAll WHERE SpeciesId = %s AND Name = '%s'" % (self.species_id, self.gene_name) + query = "SELECT Id, Chromosome, Mb_start, Mb_end FROM IndelAll WHERE SpeciesId = %s AND Name = '%s'" % ( + self.species_id, self.gene_name) else: - query = "SELECT Id, Chromosome, Mb_start, Mb_end FROM IndelAll WHERE Name = '%s'" % (self.gene_name) + query = "SELECT Id, Chromosome, Mb_start, Mb_end FROM IndelAll WHERE Name = '%s'" % ( + self.gene_name) result_snp = g.db.execute(query).fetchall() if result_snp: self.snp_list = [item[0] for item in result_snp] @@ -255,7 +267,8 @@ class SnpBrowser: if self.limit_strains == "true" and len(self.chosen_strains) > 0: for item in self.chosen_strains: - index = self.strain_lists[self.species_name.lower()].index(item) + index = self.strain_lists[self.species_name.lower()].index( + item) strain_index_list.append(index) for seq, result in enumerate(results): @@ -263,7 +276,8 @@ class SnpBrowser: if self.variant_type == "SNP": display_strains = [] - snp_id, species_id, snp_name, rs, chr, mb, mb_2016, alleles, snp_source, conservation_score = result[:10] + snp_id, species_id, snp_name, rs, chr, mb, mb_2016, alleles, snp_source, conservation_score = result[ + :10] effect_list = result[10:28] if self.species_id == 1: self.allele_list = result[30:] @@ -279,7 +293,8 @@ class SnpBrowser: self.allele_list = display_strains effect_info_dict = get_effect_info(effect_list) - coding_domain_list = ['Start Gained', 'Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] + coding_domain_list = ['Start Gained', 'Start Lost', + 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] intron_domain_list = ['Splice Site', 'Nonsplice Site'] for key in effect_info_dict: @@ -296,19 +311,22 @@ class SnpBrowser: if 'Intergenic' in domain: if self.gene_name != "": - gene_id = get_gene_id(self.species_id, self.gene_name) + gene_id = get_gene_id( + self.species_id, self.gene_name) gene = [gene_id, self.gene_name] else: gene = check_if_in_gene(species_id, chr, mb) transcript = exon = function = function_details = '' if self.redundant == "false" or last_mb != mb: # filter redundant if self.include_record(domain, function, snp_source, conservation_score): - info_list = [snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, function, function_details, snp_source, conservation_score, snp_id] + info_list = [snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, + function, function_details, snp_source, conservation_score, snp_id] info_list.extend(self.allele_list) filtered_results.append(info_list) last_mb = mb else: - gene_list, transcript_list, exon_list, function_list, function_details_list = effect_info_dict[key] + gene_list, transcript_list, exon_list, function_list, function_details_list = effect_info_dict[ + key] for index, item in enumerate(gene_list): gene = item transcript = transcript_list[index] @@ -325,13 +343,15 @@ class SnpBrowser: function = "" if function_details_list: - function_details = "Biotype: " + function_details_list[index] + function_details = "Biotype: " + \ + function_details_list[index] else: function_details = "" if self.redundant == "false" or last_mb != mb: if self.include_record(domain, function, snp_source, conservation_score): - info_list = [snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, function, function_details, snp_source, conservation_score, snp_id] + info_list = [snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, + function, function_details, snp_source, conservation_score, snp_id] info_list.extend(self.allele_list) filtered_results.append(info_list) last_mb = mb @@ -345,7 +365,8 @@ class SnpBrowser: gene = "No Gene" domain = conservation_score = snp_id = snp_name = rs = flank_3 = flank_5 = ncbi = function = "" if self.include_record(domain, function, source_name, conservation_score): - filtered_results.append([indel_name, indel_chr, indel_mb_start, indel_mb_end, indel_strand, indel_type, indel_size, indel_sequence, source_name]) + filtered_results.append([indel_name, indel_chr, indel_mb_start, indel_mb_end, + indel_strand, indel_type, indel_size, indel_sequence, source_name]) last_mb = indel_mb_start else: @@ -365,7 +386,8 @@ class SnpBrowser: if gene_name and (gene_name not in gene_name_list): gene_name_list.append(gene_name) if len(gene_name_list) > 0: - gene_id_name_dict = get_gene_id_name_dict(self.species_id, gene_name_list) + gene_id_name_dict = get_gene_id_name_dict( + self.species_id, gene_name_list) # ZS: list of booleans representing which columns are entirely empty, so they aren't displayed on the page; only including ones that are sometimes empty (since there's always a location, etc) self.empty_columns = { @@ -383,7 +405,8 @@ class SnpBrowser: for i, result in enumerate(self.filtered_results): this_row = {} if self.variant_type == "SNP": - snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, function, function_details, snp_source, conservation_score, snp_id = result[:14] + snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, function, function_details, snp_source, conservation_score, snp_id = result[ + :14] allele_value_list = result[14:] if rs: snp_url = webqtlConfig.DBSNP % (rs) @@ -394,9 +417,11 @@ class SnpBrowser: end_bp = int(mb * 1000000 + 100) position_info = "chr%s:%d-%d" % (chr, start_bp, end_bp) if self.species_id == 2: - snp_url = webqtlConfig.GENOMEBROWSER_URL % ("rn6", position_info) + snp_url = webqtlConfig.GENOMEBROWSER_URL % ( + "rn6", position_info) else: - snp_url = webqtlConfig.GENOMEBROWSER_URL % ("mm10", position_info) + snp_url = webqtlConfig.GENOMEBROWSER_URL % ( + "mm10", position_info) mb = float(mb) mb_formatted = "%2.6f" % mb @@ -429,7 +454,8 @@ class SnpBrowser: gene_link = "" if transcript: - transcript_link = webqtlConfig.ENSEMBLETRANSCRIPT_URL % (transcript) + transcript_link = webqtlConfig.ENSEMBLETRANSCRIPT_URL % ( + transcript) self.empty_columns['transcript'] = "true" else: transcript_link = "" @@ -460,7 +486,8 @@ class SnpBrowser: function_list = function_details.strip().split(",") function_list = [item.strip() for item in function_list] function_list[0] = function_list[0].title() - function_details = ", ".join(item for item in function_list) + function_details = ", ".join( + item for item in function_list) function_details = function_details.replace("_", " ") function_details = function_details.replace("/", " -> ") if function_details == "Biotype: Protein Coding": @@ -675,8 +702,10 @@ def get_header_list(variant_type, strains, species=None, empty_columns=None): header_fields = [] header_data_names = [] if variant_type == "SNP": - header_fields.append(['Index', 'SNP ID', 'Chr', 'Mb', 'Alleles', 'Source', 'ConScore', 'Gene', 'Transcript', 'Exon', 'Domain 1', 'Domain 2', 'Function', 'Details']) - header_data_names = ['index', 'snp_name', 'chr', 'mb_formatted', 'alleles', 'snp_source', 'conservation_score', 'gene_name', 'transcript', 'exon', 'domain_1', 'domain_2', 'function', 'function_details'] + header_fields.append(['Index', 'SNP ID', 'Chr', 'Mb', 'Alleles', 'Source', 'ConScore', + 'Gene', 'Transcript', 'Exon', 'Domain 1', 'Domain 2', 'Function', 'Details']) + header_data_names = ['index', 'snp_name', 'chr', 'mb_formatted', 'alleles', 'snp_source', 'conservation_score', + 'gene_name', 'transcript', 'exon', 'domain_1', 'domain_2', 'function', 'function_details'] header_fields.append(strain_list) header_data_names += strain_list @@ -712,8 +741,10 @@ def get_header_list(variant_type, strains, species=None, empty_columns=None): header_data_names.remove(col) elif variant_type == "InDel": - header_fields = ['Index', 'ID', 'Type', 'InDel Chr', 'Mb Start', 'Mb End', 'Strand', 'Size', 'Sequence', 'Source'] - header_data_names = ['index', 'indel_name', 'indel_type', 'indel_chr', 'indel_mb_s', 'indel_mb_e', 'indel_strand', 'indel_size', 'indel_sequence', 'source_name'] + header_fields = ['Index', 'ID', 'Type', 'InDel Chr', + 'Mb Start', 'Mb End', 'Strand', 'Size', 'Sequence', 'Source'] + header_data_names = ['index', 'indel_name', 'indel_type', 'indel_chr', 'indel_mb_s', + 'indel_mb_e', 'indel_strand', 'indel_size', 'indel_sequence', 'source_name'] return header_fields, empty_field_count, header_data_names @@ -726,10 +757,13 @@ def get_effect_details_by_category(effect_name=None, effect_value=None): function_detail_list = [] tmp_list = [] - gene_group_list = ['Upstream', 'Downstream', 'Splice Site', 'Nonsplice Site', '3\' UTR'] - biotype_group_list = ['Unknown Effect In Exon', 'Start Gained', 'Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] + gene_group_list = ['Upstream', 'Downstream', + 'Splice Site', 'Nonsplice Site', '3\' UTR'] + biotype_group_list = ['Unknown Effect In Exon', 'Start Gained', + 'Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] new_codon_group_list = ['Start Gained'] - codon_effect_group_list = ['Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] + codon_effect_group_list = [ + 'Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] effect_detail_list = effect_value.strip().split('|') effect_detail_list = [item.strip() for item in effect_detail_list] @@ -773,8 +807,10 @@ def get_effect_info(effect_list): effect_detail_list = [] effect_info_dict = {} - prime3_utr, prime5_utr, upstream, downstream, intron, nonsplice_site, splice_site, intergenic = effect_list[:8] - exon, non_synonymous_coding, synonymous_coding, start_gained, start_lost, stop_gained, stop_lost, unknown_effect_in_exon = effect_list[8:16] + prime3_utr, prime5_utr, upstream, downstream, intron, nonsplice_site, splice_site, intergenic = effect_list[ + :8] + exon, non_synonymous_coding, synonymous_coding, start_gained, start_lost, stop_gained, stop_lost, unknown_effect_in_exon = effect_list[ + 8:16] if intergenic: domain = "Intergenic" @@ -783,59 +819,72 @@ def get_effect_info(effect_list): # if not exon, get gene list/transcript list info if upstream: domain = "Upstream" - effect_detail_list = get_effect_details_by_category(effect_name='Upstream', effect_value=upstream) + effect_detail_list = get_effect_details_by_category( + effect_name='Upstream', effect_value=upstream) effect_info_dict[domain] = effect_detail_list if downstream: domain = "Downstream" - effect_detail_list = get_effect_details_by_category(effect_name='Downstream', effect_value=downstream) + effect_detail_list = get_effect_details_by_category( + effect_name='Downstream', effect_value=downstream) effect_info_dict[domain] = effect_detail_list if intron: if splice_site: domain = "Splice Site" - effect_detail_list = get_effect_details_by_category(effect_name='Splice Site', effect_value=splice_site) + effect_detail_list = get_effect_details_by_category( + effect_name='Splice Site', effect_value=splice_site) effect_info_dict[domain] = effect_detail_list if nonsplice_site: domain = "Nonsplice Site" - effect_detail_list = get_effect_details_by_category(effect_name='Nonsplice Site', effect_value=nonsplice_site) + effect_detail_list = get_effect_details_by_category( + effect_name='Nonsplice Site', effect_value=nonsplice_site) effect_info_dict[domain] = effect_detail_list # get gene, transcript_list, and exon info if prime3_utr: domain = "3\' UTR" - effect_detail_list = get_effect_details_by_category(effect_name='3\' UTR', effect_value=prime3_utr) + effect_detail_list = get_effect_details_by_category( + effect_name='3\' UTR', effect_value=prime3_utr) effect_info_dict[domain] = effect_detail_list if prime5_utr: domain = "5\' UTR" - effect_detail_list = get_effect_details_by_category(effect_name='5\' UTR', effect_value=prime5_utr) + effect_detail_list = get_effect_details_by_category( + effect_name='5\' UTR', effect_value=prime5_utr) effect_info_dict[domain] = effect_detail_list if start_gained: domain = "Start Gained" - effect_detail_list = get_effect_details_by_category(effect_name='Start Gained', effect_value=start_gained) + effect_detail_list = get_effect_details_by_category( + effect_name='Start Gained', effect_value=start_gained) effect_info_dict[domain] = effect_detail_list if unknown_effect_in_exon: domain = "Unknown Effect In Exon" - effect_detail_list = get_effect_details_by_category(effect_name='Unknown Effect In Exon', effect_value=unknown_effect_in_exon) + effect_detail_list = get_effect_details_by_category( + effect_name='Unknown Effect In Exon', effect_value=unknown_effect_in_exon) effect_info_dict[domain] = effect_detail_list if start_lost: domain = "Start Lost" - effect_detail_list = get_effect_details_by_category(effect_name='Start Lost', effect_value=start_lost) + effect_detail_list = get_effect_details_by_category( + effect_name='Start Lost', effect_value=start_lost) effect_info_dict[domain] = effect_detail_list if stop_gained: domain = "Stop Gained" - effect_detail_list = get_effect_details_by_category(effect_name='Stop Gained', effect_value=stop_gained) + effect_detail_list = get_effect_details_by_category( + effect_name='Stop Gained', effect_value=stop_gained) effect_info_dict[domain] = effect_detail_list if stop_lost: domain = "Stop Lost" - effect_detail_list = get_effect_details_by_category(effect_name='Stop Lost', effect_value=stop_lost) + effect_detail_list = get_effect_details_by_category( + effect_name='Stop Lost', effect_value=stop_lost) effect_info_dict[domain] = effect_detail_list if non_synonymous_coding: domain = "Nonsynonymous" - effect_detail_list = get_effect_details_by_category(effect_name='Nonsynonymous', effect_value=non_synonymous_coding) + effect_detail_list = get_effect_details_by_category( + effect_name='Nonsynonymous', effect_value=non_synonymous_coding) effect_info_dict[domain] = effect_detail_list if synonymous_coding: domain = "Synonymous" - effect_detail_list = get_effect_details_by_category(effect_name='Synonymous', effect_value=synonymous_coding) + effect_detail_list = get_effect_details_by_category( + effect_name='Synonymous', effect_value=synonymous_coding) effect_info_dict[domain] = effect_detail_list return effect_info_dict @@ -863,7 +912,8 @@ def get_gene_id_name_dict(species_id, gene_name_list): gene_id_name_dict = {} if len(gene_name_list) == 0: return "" - gene_name_str_list = ["'" + gene_name + "'" for gene_name in gene_name_list] + gene_name_str_list = ["'" + gene_name + \ + "'" for gene_name in gene_name_list] gene_name_str = ",".join(gene_name_str_list) query = """ diff --git a/wqflask/wqflask/update_search_results.py b/wqflask/wqflask/update_search_results.py index 07073d6a..2e467dc8 100644 --- a/wqflask/wqflask/update_search_results.py +++ b/wqflask/wqflask/update_search_results.py @@ -52,10 +52,12 @@ class GSearch: self.trait_list = [] with Bench("Creating trait objects"): for line in re: - dataset = create_dataset(line[3], "ProbeSet", get_samplelist=False) + dataset = create_dataset( + line[3], "ProbeSet", get_samplelist=False) trait_id = line[4] # with Bench("Building trait object"): - this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + this_trait = GeneralTrait( + dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) self.trait_list.append(this_trait) elif self.type == "phenotype": @@ -97,7 +99,8 @@ class GSearch: for line in re: dataset = create_dataset(line[2], "Publish") trait_id = line[3] - this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + this_trait = GeneralTrait( + dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) self.trait_list.append(this_trait) self.results = self.convert_to_json() diff --git a/wqflask/wqflask/user_login.py b/wqflask/wqflask/user_login.py index 2a2f8484..708d43d2 100644 --- a/wqflask/wqflask/user_login.py +++ b/wqflask/wqflask/user_login.py @@ -70,7 +70,8 @@ def set_password(password): assert len(password) >= 6, "Password shouldn't be shorter than 6 characters" - encoded_password = encode_password(pass_gen_fields, pass_gen_fields['unencrypted_password']) + encoded_password = encode_password( + pass_gen_fields, pass_gen_fields['unencrypted_password']) return encoded_password @@ -161,12 +162,16 @@ def verify_email(): # As long as they have access to the email account # We might as well log them in session_id_signed = get_signed_session_id(user_details) - flash("Thank you for logging in {}.".format(user_details['full_name']), "alert-success") - response = make_response(redirect(url_for('index_page', import_collections=import_col, anon_id=anon_id))) - response.set_cookie(UserSession.user_cookie_name, session_id_signed, max_age=None) + flash("Thank you for logging in {}.".format( + user_details['full_name']), "alert-success") + response = make_response(redirect( + url_for('index_page', import_collections=import_col, anon_id=anon_id))) + response.set_cookie(UserSession.user_cookie_name, + session_id_signed, max_age=None) return response else: - flash("Invalid code: Password reset code does not exist or might have expired!", "error") + flash( + "Invalid code: Password reset code does not exist or might have expired!", "error") @app.route("/n/login", methods=('GET', 'POST')) @@ -195,23 +200,28 @@ def login(): display_id = user_details['orcid'] else: display_id = "" - flash("Thank you for logging in {}.".format(display_id), "alert-success") + flash("Thank you for logging in {}.".format( + display_id), "alert-success") response = make_response(redirect(url_for('index_page'))) - response.set_cookie(UserSession.user_cookie_name, session_id_signed, max_age=None) + response.set_cookie( + UserSession.user_cookie_name, session_id_signed, max_age=None) else: flash("Something went unexpectedly wrong.", "alert-danger") response = make_response(redirect(url_for('index_page'))) return response else: - user_details = get_user_by_unique_column("email_address", params['email_address']) + user_details = get_user_by_unique_column( + "email_address", params['email_address']) password_match = False if user_details: submitted_password = params['password'] pwfields = user_details['password'] if isinstance(pwfields, str): pwfields = json.loads(pwfields) - encrypted_pass_fields = encode_password(pwfields, submitted_password) - password_match = pbkdf2.safe_str_cmp(encrypted_pass_fields['password'], pwfields['password']) + encrypted_pass_fields = encode_password( + pwfields, submitted_password) + password_match = pbkdf2.safe_str_cmp( + encrypted_pass_fields['password'], pwfields['password']) else: # Invalid e-mail flash("Invalid e-mail address. Please try again.", "alert-danger") @@ -227,12 +237,16 @@ def login(): anon_id = params['anon_id'] session_id_signed = get_signed_session_id(user_details) - flash("Thank you for logging in {}.".format(user_details['full_name']), "alert-success") - response = make_response(redirect(url_for('index_page', import_collections=import_col, anon_id=anon_id))) - response.set_cookie(UserSession.user_cookie_name, session_id_signed, max_age=None) + flash("Thank you for logging in {}.".format( + user_details['full_name']), "alert-success") + response = make_response(redirect( + url_for('index_page', import_collections=import_col, anon_id=anon_id))) + response.set_cookie( + UserSession.user_cookie_name, session_id_signed, max_age=None) return response else: - email_ob = send_verification_email(user_details, template_name="email/user_verification.txt") + email_ob = send_verification_email( + user_details, template_name="email/user_verification.txt") return render_template("newsecurity/verification_still_needed.html", subject=email_ob['subject']) else: # Incorrect password # ZS: It previously seemed to store that there was an incorrect log-in attempt here, but it did so in the MySQL DB so this might need to be reproduced with Redis @@ -252,8 +266,10 @@ def github_oauth2(): "code": code } - result = requests.post("https://github.com/login/oauth/access_token", json=data) - result_dict = {arr[0]: arr[1] for arr in [tok.split("=") for tok in result.text.split("&")]} + result = requests.post( + "https://github.com/login/oauth/access_token", json=data) + result_dict = {arr[0]: arr[1] + for arr in [tok.split("=") for tok in result.text.split("&")]} github_user = get_github_user_details(result_dict["access_token"]) @@ -277,7 +293,8 @@ def github_oauth2(): def get_github_user_details(access_token): from utility.tools import GITHUB_API_URL - result = requests.get(GITHUB_API_URL, headers={'Authorization': 'token ' + access_token}).content + result = requests.get(GITHUB_API_URL, headers={ + 'Authorization': 'token ' + access_token}).content return json.loads(result) @@ -323,7 +340,8 @@ def orcid_oauth2(): def get_github_user_details(access_token): from utility.tools import GITHUB_API_URL - result = requests.get(GITHUB_API_URL, headers={'Authorization': 'token ' + access_token}).content + result = requests.get(GITHUB_API_URL, headers={ + 'Authorization': 'token ' + access_token}).content return json.loads(result) @@ -389,13 +407,16 @@ def forgot_password_submit(): next_page = None if email_address != "": logger.debug("Wants to send password E-mail to ", email_address) - user_details = get_user_by_unique_column("email_address", email_address) + user_details = get_user_by_unique_column( + "email_address", email_address) if user_details: - email_subject = send_forgot_password_email(user_details["email_address"]) + email_subject = send_forgot_password_email( + user_details["email_address"]) return render_template("new_security/forgot_password_step2.html", subject=email_subject) else: - flash("The e-mail entered is not associated with an account.", "alert-danger") + flash("The e-mail entered is not associated with an account.", + "alert-danger") return redirect(url_for("forgot_password")) else: @@ -417,7 +438,8 @@ def password_reset(): return render_template( "new_security/password_reset.html", user_encode=user_details["email_address"]) else: - flash("Invalid code: Password reset code does not exist or might have expired!", "error") + flash( + "Invalid code: Password reset code does not exist or might have expired!", "error") return redirect(url_for("login")) else: return redirect(url_for("login")) @@ -446,21 +468,27 @@ def register_user(params): errors = [] user_details = {} - user_details['email_address'] = params.get('email_address', '').encode("utf-8").strip() + user_details['email_address'] = params.get( + 'email_address', '').encode("utf-8").strip() if not (5 <= len(user_details['email_address']) <= 50): - errors.append('Email Address needs to be between 5 and 50 characters.') + errors.append( + 'Email Address needs to be between 5 and 50 characters.') else: - email_exists = get_user_by_unique_column("email_address", user_details['email_address']) + email_exists = get_user_by_unique_column( + "email_address", user_details['email_address']) if email_exists: errors.append('User already exists with that email') - user_details['full_name'] = params.get('full_name', '').encode("utf-8").strip() + user_details['full_name'] = params.get( + 'full_name', '').encode("utf-8").strip() if not (5 <= len(user_details['full_name']) <= 50): errors.append('Full Name needs to be between 5 and 50 characters.') - user_details['organization'] = params.get('organization', '').encode("utf-8").strip() + user_details['organization'] = params.get( + 'organization', '').encode("utf-8").strip() if user_details['organization'] and not (5 <= len(user_details['organization']) <= 50): - errors.append('Organization needs to be empty or between 5 and 50 characters.') + errors.append( + 'Organization needs to be empty or between 5 and 50 characters.') password = str(params.get('password', '')) if not (6 <= len(password)): @@ -493,7 +521,8 @@ def register(): errors = register_user(params) if len(errors) == 0: - flash("Registration successful. You may login with your new account", "alert-info") + flash( + "Registration successful. You may login with your new account", "alert-info") return redirect(url_for("login")) return render_template("new_security/register_user.html", values=params, errors=errors) diff --git a/wqflask/wqflask/user_manager.py b/wqflask/wqflask/user_manager.py index a9bd65e6..5610833b 100644 --- a/wqflask/wqflask/user_manager.py +++ b/wqflask/wqflask/user_manager.py @@ -93,8 +93,10 @@ class AnonUser: this_collection = {} this_collection['id'] = collection['id'] this_collection['name'] = collection['name'] - this_collection['created_timestamp'] = collection['created_timestamp'].strftime('%b %d %Y %I:%M%p') - this_collection['changed_timestamp'] = collection['changed_timestamp'].strftime('%b %d %Y %I:%M%p') + this_collection['created_timestamp'] = collection['created_timestamp'].strftime( + '%b %d %Y %I:%M%p') + this_collection['changed_timestamp'] = collection['changed_timestamp'].strftime( + '%b %d %Y %I:%M%p') this_collection['num_members'] = collection['num_members'] this_collection['members'] = collection['members'] updated_collections.append(this_collection) @@ -108,21 +110,26 @@ class AnonUser: else: collections = json.loads(json_collections) for collection in collections: - collection['created_timestamp'] = datetime.datetime.strptime(collection['created_timestamp'], '%b %d %Y %I:%M%p') - collection['changed_timestamp'] = datetime.datetime.strptime(collection['changed_timestamp'], '%b %d %Y %I:%M%p') + collection['created_timestamp'] = datetime.datetime.strptime( + collection['created_timestamp'], '%b %d %Y %I:%M%p') + collection['changed_timestamp'] = datetime.datetime.strptime( + collection['changed_timestamp'], '%b %d %Y %I:%M%p') - collections = sorted(collections, key=lambda i: i['changed_timestamp'], reverse=True) + collections = sorted( + collections, key=lambda i: i['changed_timestamp'], reverse=True) return collections def import_traits_to_user(self): result = Redis.get(self.key) collections_list = json.loads(result if result else "[]") for collection in collections_list: - collection_exists = g.user_session.get_collection_by_name(collection['name']) + collection_exists = g.user_session.get_collection_by_name( + collection['name']) if collection_exists: continue else: - g.user_session.add_collection(collection['name'], collection['members']) + g.user_session.add_collection( + collection['name'], collection['members']) def display_num_collections(self): """ @@ -148,7 +155,8 @@ def verify_cookie(cookie): the_uuid, separator, the_signature = cookie.partition(':') assert len(the_uuid) == 36, "Is session_id a uuid?" assert separator == ":", "Expected a : here" - assert the_signature == actual_hmac_creation(the_uuid), "Uh-oh, someone tampering with the cookie?" + assert the_signature == actual_hmac_creation( + the_uuid), "Uh-oh, someone tampering with the cookie?" return the_uuid @@ -282,7 +290,8 @@ class UserSession: updated_collection['members'] = updated_traits updated_collection['num_members'] = len(updated_traits) - updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime( + '%b %d %Y %I:%M%p') updated_collections = [] for collection in self.user_collections: @@ -308,7 +317,8 @@ class UserSession: updated_collection['members'] = updated_traits updated_collection['num_members'] = len(updated_traits) - updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime( + '%b %d %Y %I:%M%p') updated_collections = [] for collection in self.user_collections: @@ -355,7 +365,8 @@ def get_cookie(): def set_cookie(response): if not request.cookies.get(g.cookie_session.cookie_name): - response.set_cookie(g.cookie_session.cookie_name, g.cookie_session.cookie) + response.set_cookie(g.cookie_session.cookie_name, + g.cookie_session.cookie) return response @@ -390,22 +401,28 @@ class RegisterUser: self.errors = [] self.user = Bunch() - self.user.email_address = kw.get('email_address', '').encode("utf-8").strip() + self.user.email_address = kw.get( + 'email_address', '').encode("utf-8").strip() if not (5 <= len(self.user.email_address) <= 50): - self.errors.append('Email Address needs to be between 5 and 50 characters.') + self.errors.append( + 'Email Address needs to be between 5 and 50 characters.') else: - email_exists = get_user_by_unique_column("email_address", self.user.email_address) + email_exists = get_user_by_unique_column( + "email_address", self.user.email_address) #email_exists = get_user_by_unique_column(es, "email_address", self.user.email_address) if email_exists: self.errors.append('User already exists with that email') self.user.full_name = kw.get('full_name', '').encode("utf-8").strip() if not (5 <= len(self.user.full_name) <= 50): - self.errors.append('Full Name needs to be between 5 and 50 characters.') + self.errors.append( + 'Full Name needs to be between 5 and 50 characters.') - self.user.organization = kw.get('organization', '').encode("utf-8").strip() + self.user.organization = kw.get( + 'organization', '').encode("utf-8").strip() if self.user.organization and not (5 <= len(self.user.organization) <= 50): - self.errors.append('Organization needs to be empty or between 5 and 50 characters.') + self.errors.append( + 'Organization needs to be empty or between 5 and 50 characters.') password = str(kw.get('password', '')) if not (6 <= len(password)): @@ -568,14 +585,16 @@ def password_reset(): if verification_code: user_email = check_verification_code(verification_code) if user_email: - user_details = get_user_by_unique_column('email_address', user_email) + user_details = get_user_by_unique_column( + 'email_address', user_email) if user_details: return render_template( "new_security/password_reset.html", user_encode=user_details["user_id"]) else: flash("Invalid code: User no longer exists!", "error") else: - flash("Invalid code: Password reset code does not exist or might have expired!", "error") + flash( + "Invalid code: Password reset code does not exist or might have expired!", "error") else: return redirect(url_for("login")) @@ -648,8 +667,10 @@ def github_oauth2(): "client_secret": GITHUB_CLIENT_SECRET, "code": code } - result = requests.post("https://github.com/login/oauth/access_token", json=data) - result_dict = {arr[0]: arr[1] for arr in [tok.split("=") for tok in [token.encode("utf-8") for token in result.text.split("&")]]} + result = requests.post( + "https://github.com/login/oauth/access_token", json=data) + result_dict = {arr[0]: arr[1] for arr in [tok.split( + "=") for tok in [token.encode("utf-8") for token in result.text.split("&")]]} github_user = get_github_user_details(result_dict["access_token"]) @@ -696,7 +717,8 @@ def orcid_oauth2(): def get_github_user_details(access_token): from utility.tools import GITHUB_API_URL - result = requests.get(GITHUB_API_URL, params={"access_token": access_token}) + result = requests.get(GITHUB_API_URL, params={ + "access_token": access_token}) return result.json() @@ -737,7 +759,8 @@ class LoginUser: return render_template( "new_security/login_user.html", external_login=external_login, redis_is_available=is_redis_available()) else: - user_details = get_user_by_unique_column("email_address", params["email_address"]) + user_details = get_user_by_unique_column( + "email_address", params["email_address"]) #user_details = get_user_by_unique_column(es, "email_address", params["email_address"]) user = None valid = None @@ -755,8 +778,10 @@ class LoginUser: pwfields.iterations, pwfields.keylength, pwfields.hashfunc) - logger.debug("\n\nComparing:\n{}\n{}\n".format(encrypted.password, pwfields.password)) - valid = pbkdf2.safe_str_cmp(encrypted.password, pwfields.password) + logger.debug("\n\nComparing:\n{}\n{}\n".format( + encrypted.password, pwfields.password)) + valid = pbkdf2.safe_str_cmp( + encrypted.password, pwfields.password) logger.debug("valid is:", valid) if valid and not user.confirmed: @@ -782,7 +807,8 @@ class LoginUser: else: if user: self.unsuccessful_login(user) - flash("Invalid email-address or password. Please try again.", "alert-danger") + flash("Invalid email-address or password. Please try again.", + "alert-danger") response = make_response(redirect(url_for('login'))) return response @@ -790,14 +816,17 @@ class LoginUser: def actual_login(self, user, assumed_by=None, import_collections=None): """The meat of the logging in process""" session_id_signed = self.successful_login(user, assumed_by) - flash("Thank you for logging in {}.".format(user.full_name), "alert-success") - response = make_response(redirect(url_for('index_page', import_collections=import_collections))) + flash("Thank you for logging in {}.".format( + user.full_name), "alert-success") + response = make_response( + redirect(url_for('index_page', import_collections=import_collections))) if self.remember_me: max_age = self.remember_time else: max_age = None - response.set_cookie(UserSession.cookie_name, session_id_signed, max_age=max_age) + response.set_cookie(UserSession.cookie_name, + session_id_signed, max_age=max_age) return response def successful_login(self, user, assumed_by=None): @@ -866,13 +895,15 @@ def forgot_password_submit(): next_page = None if email_address != "": logger.debug("Wants to send password E-mail to ", email_address) - user_details = get_user_by_unique_column("email_address", email_address) + user_details = get_user_by_unique_column( + "email_address", email_address) if user_details: ForgotPasswordEmail(user_details["email_address"]) return render_template("new_security/forgot_password_step2.html", subject=ForgotPasswordEmail.subject) else: - flash("The e-mail entered is not associated with an account.", "alert-danger") + flash("The e-mail entered is not associated with an account.", + "alert-danger") return redirect(url_for("forgot_password")) else: @@ -959,7 +990,8 @@ def register(): errors = result.errors if len(errors) == 0: - flash("Registration successful. You may login with your new account", "alert-info") + flash( + "Registration successful. You may login with your new account", "alert-info") return redirect(url_for("login")) return render_template("new_security/register_user.html", values=params, errors=errors) diff --git a/wqflask/wqflask/user_session.py b/wqflask/wqflask/user_session.py index f0f0d60c..6ccb2e80 100644 --- a/wqflask/wqflask/user_session.py +++ b/wqflask/wqflask/user_session.py @@ -36,7 +36,8 @@ def get_user_session(): def set_user_session(response): if hasattr(g, 'user_session'): if not request.cookies.get(g.user_session.cookie_name): - response.set_cookie(g.user_session.cookie_name, g.user_session.cookie) + response.set_cookie(g.user_session.cookie_name, + g.user_session.cookie) return response @@ -44,7 +45,8 @@ def verify_cookie(cookie): the_uuid, separator, the_signature = cookie.partition(':') assert len(the_uuid) == 36, "Is session_id a uuid?" assert separator == ":", "Expected a : here" - assert the_signature == hmac.hmac_creation(the_uuid), "Uh-oh, someone tampering with the cookie?" + assert the_signature == hmac.hmac_creation( + the_uuid), "Uh-oh, someone tampering with the cookie?" return the_uuid @@ -60,9 +62,11 @@ def create_signed_cookie(): def manage_user(): params = request.form if request.form else request.args if 'new_full_name' in params: - set_user_attribute(g.user_session.user_id, 'full_name', params['new_full_name']) + set_user_attribute(g.user_session.user_id, + 'full_name', params['new_full_name']) if 'new_organization' in params: - set_user_attribute(g.user_session.user_id, 'organization', params['new_organization']) + set_user_attribute(g.user_session.user_id, + 'organization', params['new_organization']) user_details = get_user_by_unique_column("user_id", g.user_session.user_id) @@ -108,7 +112,8 @@ class UserSession: # Grrr...this won't work because of the way flask handles cookies # Delete the cookie - flash("Due to inactivity your session has expired. If you'd like please login again.") + flash( + "Due to inactivity your session has expired. If you'd like please login again.") return None else: self.record = dict(login_time=time.time(), @@ -178,7 +183,9 @@ class UserSession: # ZS: Get user's collections if they exist collections = get_user_collections(self.user_id) - collections = [item for item in collections if item['name'] != "Your Default Collection"] + [item for item in collections if item['name'] == "Your Default Collection"] # ZS: Ensure Default Collection is last in list + collections = [item for item in collections if item['name'] != "Your Default Collection"] + \ + [item for item in collections if item['name'] == + "Your Default Collection"] # ZS: Ensure Default Collection is last in list return collections @property @@ -234,12 +241,14 @@ class UserSession: this_collection = self.get_collection_by_id(collection_id) updated_collection = this_collection - current_members_minus_new = [member for member in this_collection['members'] if member not in traits_to_add] + current_members_minus_new = [ + member for member in this_collection['members'] if member not in traits_to_add] updated_traits = traits_to_add + current_members_minus_new updated_collection['members'] = updated_traits updated_collection['num_members'] = len(updated_traits) - updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime( + '%b %d %Y %I:%M%p') updated_collections = [] for collection in self.user_collections: @@ -265,7 +274,8 @@ class UserSession: updated_collection['members'] = updated_traits updated_collection['num_members'] = len(updated_traits) - updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime( + '%b %d %Y %I:%M%p') updated_collections = [] for collection in self.user_collections: |