diff options
author | zsloan | 2021-04-30 18:40:13 +0000 |
---|---|---|
committer | zsloan | 2021-04-30 18:40:13 +0000 |
commit | 21b2e2a552b8b6bedb789263543a4d6d039ac8a9 (patch) | |
tree | fe55511b99a522198c1b840909d7b91c9b45ab22 /wqflask/wqflask | |
parent | 699b952c7bda5426f3f3e947454f656a4ec7538b (diff) | |
parent | 799b25481fffc97e2adb07004adf502096bf371c (diff) | |
download | genenetwork2-21b2e2a552b8b6bedb789263543a4d6d039ac8a9.tar.gz |
Merge branch 'testing' of github.com:genenetwork/genenetwork2 into testing
Diffstat (limited to 'wqflask/wqflask')
51 files changed, 3293 insertions, 2466 deletions
diff --git a/wqflask/wqflask/__init__.py b/wqflask/wqflask/__init__.py index 0564cfa7..a2bf3085 100644 --- a/wqflask/wqflask/__init__.py +++ b/wqflask/wqflask/__init__.py @@ -7,11 +7,11 @@ from flask import g from flask import Flask from utility import formatting from wqflask.markdown_routes import glossary_blueprint -from wqflask.markdown_routes import references_blueprint -from wqflask.markdown_routes import links_blueprint +from wqflask.markdown_routes import references_blueprint +from wqflask.markdown_routes import links_blueprint from wqflask.markdown_routes import policies_blueprint -from wqflask.markdown_routes import environments_blueprint -from wqflask.markdown_routes import facilities_blueprint +from wqflask.markdown_routes import environments_blueprint +from wqflask.markdown_routes import facilities_blueprint app = Flask(__name__) @@ -30,6 +30,7 @@ app.register_blueprint(policies_blueprint, url_prefix="/policies") app.register_blueprint(environments_blueprint, url_prefix="/environments") app.register_blueprint(facilities_blueprint, url_prefix="/facilities") + @app.before_request def before_request(): g.request_start_time = time.time() @@ -49,4 +50,4 @@ from wqflask import db_info from wqflask import user_login from wqflask import user_session -import wqflask.views +import wqflask.views diff --git a/wqflask/wqflask/api/correlation.py b/wqflask/wqflask/api/correlation.py index f5b50dcd..870f3275 100644 --- a/wqflask/wqflask/api/correlation.py +++ b/wqflask/wqflask/api/correlation.py @@ -16,21 +16,25 @@ from utility import webqtlUtil, helper_functions, corr_result_helpers from utility.benchmark import Bench import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + def do_correlation(start_vars): assert('db' in start_vars) assert('target_db' in start_vars) assert('trait_id' in start_vars) - this_dataset = data_set.create_dataset(dataset_name = start_vars['db']) - target_dataset = data_set.create_dataset(dataset_name = start_vars['target_db']) - this_trait = create_trait(dataset = this_dataset, name = start_vars['trait_id']) + this_dataset = data_set.create_dataset(dataset_name=start_vars['db']) + target_dataset = data_set.create_dataset( + dataset_name=start_vars['target_db']) + this_trait = create_trait(dataset=this_dataset, + name=start_vars['trait_id']) this_trait = retrieve_sample_data(this_trait, this_dataset) corr_params = init_corr_params(start_vars) - corr_results = calculate_results(this_trait, this_dataset, target_dataset, corr_params) + corr_results = calculate_results( + this_trait, this_dataset, target_dataset, corr_params) #corr_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0]))) final_results = [] @@ -38,26 +42,26 @@ def do_correlation(start_vars): if corr_params['type'] == "tissue": [sample_r, num_overlap, sample_p, symbol] = corr_results[trait] result_dict = { - "trait" : trait, - "sample_r" : sample_r, - "#_strains" : num_overlap, - "p_value" : sample_p, - "symbol" : symbol + "trait": trait, + "sample_r": sample_r, + "#_strains": num_overlap, + "p_value": sample_p, + "symbol": symbol } elif corr_params['type'] == "literature" or corr_params['type'] == "lit": [gene_id, sample_r] = corr_results[trait] result_dict = { - "trait" : trait, - "sample_r" : sample_r, - "gene_id" : gene_id + "trait": trait, + "sample_r": sample_r, + "gene_id": gene_id } else: [sample_r, sample_p, num_overlap] = corr_results[trait] result_dict = { - "trait" : trait, - "sample_r" : sample_r, - "#_strains" : num_overlap, - "p_value" : sample_p + "trait": trait, + "sample_r": sample_r, + "#_strains": num_overlap, + "p_value": sample_p } final_results.append(result_dict) @@ -66,6 +70,7 @@ def do_correlation(start_vars): return final_results + def calculate_results(this_trait, this_dataset, target_dataset, corr_params): corr_results = {} @@ -73,52 +78,66 @@ def calculate_results(this_trait, this_dataset, target_dataset, corr_params): if corr_params['type'] == "tissue": trait_symbol_dict = this_dataset.retrieve_genes("Symbol") - corr_results = do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params) + corr_results = do_tissue_correlation_for_all_traits( + this_trait, trait_symbol_dict, corr_params) sorted_results = collections.OrderedDict(sorted(list(corr_results.items()), key=lambda t: -abs(t[1][1]))) - elif corr_params['type'] == "literature" or corr_params['type'] == "lit": #ZS: Just so a user can use either "lit" or "literature" + # ZS: Just so a user can use either "lit" or "literature" + elif corr_params['type'] == "literature" or corr_params['type'] == "lit": trait_geneid_dict = this_dataset.retrieve_genes("GeneId") - corr_results = do_literature_correlation_for_all_traits(this_trait, this_dataset, trait_geneid_dict, corr_params) + corr_results = do_literature_correlation_for_all_traits( + this_trait, this_dataset, trait_geneid_dict, corr_params) sorted_results = collections.OrderedDict(sorted(list(corr_results.items()), - key=lambda t: -abs(t[1][1]))) + key=lambda t: -abs(t[1][1]))) else: for target_trait, target_vals in list(target_dataset.trait_data.items()): - result = get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, corr_params['type']) + result = get_sample_r_and_p_values( + this_trait, this_dataset, target_vals, target_dataset, corr_params['type']) if result is not None: corr_results[target_trait] = result - sorted_results = collections.OrderedDict(sorted(list(corr_results.items()), key=lambda t: -abs(t[1][0]))) + sorted_results = collections.OrderedDict( + sorted(list(corr_results.items()), key=lambda t: -abs(t[1][0]))) return sorted_results + def do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params, tissue_dataset_id=1): - #Gets tissue expression values for the primary trait - primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list = [this_trait.symbol]) + # Gets tissue expression values for the primary trait + primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=[this_trait.symbol]) if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower()] + primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower( + )] - corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list=list(trait_symbol_dict.values())) + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=list(trait_symbol_dict.values())) tissue_corr_data = {} for trait, symbol in list(trait_symbol_dict.items()): if symbol and symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] + this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower( + )] result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, this_trait_tissue_values, corr_params['method']) - tissue_corr_data[trait] = [result[0], result[1], result[2], symbol] + tissue_corr_data[trait] = [ + result[0], result[1], result[2], symbol] return tissue_corr_data + def do_literature_correlation_for_all_traits(this_trait, target_dataset, trait_geneid_dict, corr_params): - input_trait_mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), this_trait.geneid) + input_trait_mouse_gene_id = convert_to_mouse_gene_id( + target_dataset.group.species.lower(), this_trait.geneid) lit_corr_data = {} for trait, gene_id in list(trait_geneid_dict.items()): - mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), gene_id) + mouse_gene_id = convert_to_mouse_gene_id( + target_dataset.group.species.lower(), gene_id) if mouse_gene_id and str(mouse_gene_id).find(";") == -1: result = g.db.execute( @@ -145,6 +164,7 @@ def do_literature_correlation_for_all_traits(this_trait, target_dataset, trait_g return lit_corr_data + def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, type): """ Calculates the sample r (or rho) and p-value @@ -163,12 +183,15 @@ def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_data this_trait_vals.append(this_sample_value) shared_target_vals.append(target_sample_value) - this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, shared_target_vals) + this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values( + this_trait_vals, shared_target_vals) if type == 'pearson': - sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, shared_target_vals) + sample_r, sample_p = scipy.stats.pearsonr( + this_trait_vals, shared_target_vals) else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, shared_target_vals) + sample_r, sample_p = scipy.stats.spearmanr( + this_trait_vals, shared_target_vals) if num_overlap > 5: if scipy.isnan(sample_r): @@ -176,6 +199,7 @@ def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_data else: return [sample_r, sample_p, num_overlap] + def convert_to_mouse_gene_id(species=None, gene_id=None): """If the species is rat or human, translate the gene_id to the mouse geneid @@ -212,6 +236,7 @@ def convert_to_mouse_gene_id(species=None, gene_id=None): return mouse_gene_id + def init_corr_params(start_vars): method = "pearson" if 'method' in start_vars: @@ -227,9 +252,9 @@ def init_corr_params(start_vars): return_count = int(start_vars['return_count']) corr_params = { - 'method' : method, - 'type' : type, - 'return_count' : return_count + 'method': method, + 'type': type, + 'return_count': return_count } return corr_params diff --git a/wqflask/wqflask/api/gen_menu.py b/wqflask/wqflask/api/gen_menu.py index eaddecd7..e65b36e4 100644 --- a/wqflask/wqflask/api/gen_menu.py +++ b/wqflask/wqflask/api/gen_menu.py @@ -87,14 +87,14 @@ def phenotypes_exist(group_name): results = g.db.execute( ("SELECT Name FROM PublishFreeze " "WHERE PublishFreeze.Name = " - "'{}'").format(group_name+"Publish")).fetchone() + "'{}'").format(group_name + "Publish")).fetchone() return bool(results) def genotypes_exist(group_name): results = g.db.execute( ("SELECT Name FROM GenoFreeze " + - "WHERE GenoFreeze.Name = '{}'").format(group_name+"Geno")).fetchone() + "WHERE GenoFreeze.Name = '{}'").format(group_name + "Geno")).fetchone() return bool(results) @@ -179,11 +179,11 @@ def build_datasets(species, group, type_name): elif type_name == "Genotypes": results = g.db.execute( ("SELECT InfoFiles.GN_AccesionId " + - "FROM InfoFiles, GenoFreeze, InbredSet " + - "WHERE InbredSet.Name = '{}' AND " + - "GenoFreeze.InbredSetId = InbredSet.Id AND " + - "InfoFiles.InfoPageName = GenoFreeze.ShortName " + - "ORDER BY GenoFreeze.CreateTime DESC").format(group)).fetchone() + "FROM InfoFiles, GenoFreeze, InbredSet " + + "WHERE InbredSet.Name = '{}' AND " + + "GenoFreeze.InbredSetId = InbredSet.Id AND " + + "InfoFiles.InfoPageName = GenoFreeze.ShortName " + + "ORDER BY GenoFreeze.CreateTime DESC").format(group)).fetchone() dataset_id = "None" if bool(results): diff --git a/wqflask/wqflask/api/mapping.py b/wqflask/wqflask/api/mapping.py index d59a69df..f8b0d8bd 100644 --- a/wqflask/wqflask/api/mapping.py +++ b/wqflask/wqflask/api/mapping.py @@ -8,15 +8,16 @@ from utility import helper_functions from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + def do_mapping_for_api(start_vars): assert('db' in start_vars) assert('trait_id' in start_vars) - dataset = data_set.create_dataset(dataset_name = start_vars['db']) + dataset = data_set.create_dataset(dataset_name=start_vars['db']) dataset.group.get_markers() - this_trait = create_trait(dataset = dataset, name = start_vars['trait_id']) + this_trait = create_trait(dataset=dataset, name=start_vars['trait_id']) this_trait = retrieve_sample_data(this_trait, dataset) samples = [] @@ -36,26 +37,32 @@ def do_mapping_for_api(start_vars): mapping_params = initialize_parameters(start_vars, dataset, this_trait) - covariates = "" #ZS: It seems to take an empty string as default. This should probably be changed. + # ZS: It seems to take an empty string as default. This should probably be changed. + covariates = "" if mapping_params['mapping_method'] == "gemma": header_row = ["name", "chr", "Mb", "lod_score", "p_value"] - if mapping_params['use_loco'] == "True": #ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api - result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0] + # ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api + if mapping_params['use_loco'] == "True": + result_markers = gemma_mapping.run_gemma( + this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0] else: - result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf']) + result_markers = gemma_mapping.run_gemma( + this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf']) elif mapping_params['mapping_method'] == "rqtl": header_row = ["name", "chr", "cM", "lod_score"] if mapping_params['num_perm'] > 0: _sperm_output, _suggestive, _significant, result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], - mapping_params['perm_check'], mapping_params['num_perm'], - mapping_params['do_control'], mapping_params['control_marker'], - mapping_params['manhattan_plot'], mapping_params['pair_scan']) + mapping_params['perm_check'], mapping_params[ + 'num_perm'], + mapping_params['do_control'], mapping_params[ + 'control_marker'], + mapping_params['manhattan_plot'], mapping_params['pair_scan']) else: result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], - mapping_params['perm_check'], mapping_params['num_perm'], - mapping_params['do_control'], mapping_params['control_marker'], - mapping_params['manhattan_plot'], mapping_params['pair_scan']) + mapping_params['perm_check'], mapping_params['num_perm'], + mapping_params['do_control'], mapping_params['control_marker'], + mapping_params['manhattan_plot'], mapping_params['pair_scan']) if mapping_params['limit_to']: result_markers = result_markers[:mapping_params['limit_to']] @@ -74,7 +81,6 @@ def do_mapping_for_api(start_vars): return result_markers, None - def initialize_parameters(start_vars, dataset, this_trait): mapping_params = {} @@ -118,7 +124,7 @@ def initialize_parameters(start_vars, dataset, this_trait): mapping_params['maf'] = 0.01 if 'maf' in start_vars: - mapping_params['maf'] = start_vars['maf'] # Minor allele frequency + mapping_params['maf'] = start_vars['maf'] # Minor allele frequency mapping_params['use_loco'] = True if 'use_loco' in start_vars: @@ -135,5 +141,3 @@ def initialize_parameters(start_vars, dataset, this_trait): mapping_params['perm_check'] = False return mapping_params - - diff --git a/wqflask/wqflask/api/router.py b/wqflask/wqflask/api/router.py index 60e163f2..aec74c9e 100644 --- a/wqflask/wqflask/api/router.py +++ b/wqflask/wqflask/api/router.py @@ -23,47 +23,52 @@ from wqflask.api import correlation, mapping, gen_menu from utility.tools import flat_files import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) version = "pre1" + @app.route("/api/v_{}/".format(version)) def hello_world(): - return flask.jsonify({"hello":"world"}) + return flask.jsonify({"hello": "world"}) + @app.route("/api/v_{}/species".format(version)) def get_species_list(): - results = g.db.execute("SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species;") + results = g.db.execute( + "SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species;") the_species = results.fetchall() species_list = [] for species in the_species: species_dict = { - "Id" : species[0], - "Name" : species[1], - "FullName" : species[2], - "TaxonomyId" : species[3] + "Id": species[0], + "Name": species[1], + "FullName": species[2], + "TaxonomyId": species[3] } species_list.append(species_dict) return flask.jsonify(species_list) + @app.route("/api/v_{}/species/<path:species_name>".format(version)) @app.route("/api/v_{}/species/<path:species_name>.<path:file_format>".format(version)) -def get_species_info(species_name, file_format = "json"): +def get_species_info(species_name, file_format="json"): results = g.db.execute("""SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species WHERE (Name="{0}" OR FullName="{0}" OR SpeciesName="{0}");""".format(species_name)) the_species = results.fetchone() - species_dict = { - "Id" : the_species[0], - "Name" : the_species[1], - "FullName" : the_species[2], - "TaxonomyId" : the_species[3] + species_dict = { + "Id": the_species[0], + "Name": the_species[1], + "FullName": the_species[2], + "TaxonomyId": the_species[3] } - + return flask.jsonify(species_dict) + @app.route("/api/v_{}/groups".format(version)) @app.route("/api/v_{}/groups/<path:species_name>".format(version)) def get_groups_list(species_name=None): @@ -87,14 +92,14 @@ def get_groups_list(species_name=None): groups_list = [] for group in the_groups: group_dict = { - "Id" : group[0], - "SpeciesId" : group[1], - "DisplayName" : group[2], - "Name" : group[3], - "FullName" : group[4], - "public" : group[5], - "MappingMethodId" : group[6], - "GeneticType" : group[7] + "Id": group[0], + "SpeciesId": group[1], + "DisplayName": group[2], + "Name": group[3], + "FullName": group[4], + "public": group[5], + "MappingMethodId": group[6], + "GeneticType": group[7] } groups_list.append(group_dict) @@ -102,11 +107,12 @@ def get_groups_list(species_name=None): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/group/<path:group_name>".format(version)) @app.route("/api/v_{}/group/<path:group_name>.<path:file_format>".format(version)) @app.route("/api/v_{}/group/<path:species_name>/<path:group_name>".format(version)) @app.route("/api/v_{}/group/<path:species_name>/<path:group_name>.<path:file_format>".format(version)) -def get_group_info(group_name, species_name = None, file_format = "json"): +def get_group_info(group_name, species_name=None, file_format="json"): if species_name: results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName, InbredSet.Name, InbredSet.FullName, InbredSet.public, @@ -131,20 +137,21 @@ def get_group_info(group_name, species_name = None, file_format = "json"): group = results.fetchone() if group: group_dict = { - "Id" : group[0], - "SpeciesId" : group[1], - "DisplayName" : group[2], - "Name" : group[3], - "FullName" : group[4], - "public" : group[5], - "MappingMethodId" : group[6], - "GeneticType" : group[7] + "Id": group[0], + "SpeciesId": group[1], + "DisplayName": group[2], + "Name": group[3], + "FullName": group[4], + "public": group[5], + "MappingMethodId": group[6], + "GeneticType": group[7] } return flask.jsonify(group_dict) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/datasets/<path:group_name>".format(version)) @app.route("/api/v_{}/datasets/<path:species_name>/<path:group_name>".format(version)) def get_datasets_for_group(group_name, species_name=None): @@ -179,17 +186,17 @@ def get_datasets_for_group(group_name, species_name=None): datasets_list = [] for dataset in the_datasets: dataset_dict = { - "Id" : dataset[0], - "ProbeFreezeId" : dataset[1], - "AvgID" : dataset[2], - "Short_Abbreviation" : dataset[3], - "Long_Abbreviation" : dataset[4], - "FullName" : dataset[5], - "ShortName" : dataset[6], - "CreateTime" : dataset[7], - "public" : dataset[8], - "confidentiality" : dataset[9], - "DataScale" : dataset[10] + "Id": dataset[0], + "ProbeFreezeId": dataset[1], + "AvgID": dataset[2], + "Short_Abbreviation": dataset[3], + "Long_Abbreviation": dataset[4], + "FullName": dataset[5], + "ShortName": dataset[6], + "CreateTime": dataset[7], + "public": dataset[8], + "confidentiality": dataset[9], + "DataScale": dataset[10] } datasets_list.append(dataset_dict) @@ -197,14 +204,15 @@ def get_datasets_for_group(group_name, species_name=None): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/dataset/<path:dataset_name>".format(version)) @app.route("/api/v_{}/dataset/<path:dataset_name>.<path:file_format>".format(version)) @app.route("/api/v_{}/dataset/<path:group_name>/<path:dataset_name>".format(version)) @app.route("/api/v_{}/dataset/<path:group_name>/<path:dataset_name>.<path:file_format>".format(version)) -def get_dataset_info(dataset_name, group_name = None, file_format="json"): - #ZS: First get ProbeSet (mRNA expression) datasets and then get Phenotype datasets +def get_dataset_info(dataset_name, group_name=None, file_format="json"): + # ZS: First get ProbeSet (mRNA expression) datasets and then get Phenotype datasets - datasets_list = [] #ZS: I figure I might as well return a list if there are multiple matches, though I don"t know if this will actually happen in practice + datasets_list = [] # ZS: I figure I might as well return a list if there are multiple matches, though I don"t know if this will actually happen in practice probeset_query = """ SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName, @@ -235,16 +243,16 @@ def get_dataset_info(dataset_name, group_name = None, file_format="json"): if dataset: dataset_dict = { - "dataset_type" : "mRNA expression", - "id" : dataset[0], - "name" : dataset[1], - "full_name" : dataset[2], - "short_name" : dataset[3], - "data_scale" : dataset[4], - "tissue_id" : dataset[5], - "tissue" : dataset[6], - "public" : dataset[7], - "confidential" : dataset[8] + "dataset_type": "mRNA expression", + "id": dataset[0], + "name": dataset[1], + "full_name": dataset[2], + "short_name": dataset[3], + "data_scale": dataset[4], + "tissue_id": dataset[5], + "tissue": dataset[6], + "public": dataset[7], + "confidential": dataset[8] } datasets_list.append(dataset_dict) @@ -272,25 +280,25 @@ def get_dataset_info(dataset_name, group_name = None, file_format="json"): if dataset: if dataset[5]: dataset_dict = { - "dataset_type" : "phenotype", - "id" : dataset[0], - "name" : dataset[1], - "description" : dataset[2], - "pubmed_id" : dataset[5], - "title" : dataset[6], - "year" : dataset[7] + "dataset_type": "phenotype", + "id": dataset[0], + "name": dataset[1], + "description": dataset[2], + "pubmed_id": dataset[5], + "title": dataset[6], + "year": dataset[7] } elif dataset[4]: dataset_dict = { - "dataset_type" : "phenotype", - "id" : dataset[0], - "name" : dataset[3], - "description" : dataset[4] + "dataset_type": "phenotype", + "id": dataset[0], + "name": dataset[3], + "description": dataset[4] } else: dataset_dict = { - "dataset_type" : "phenotype", - "id" : dataset[0] + "dataset_type": "phenotype", + "id": dataset[0] } datasets_list.append(dataset_dict) @@ -302,10 +310,12 @@ def get_dataset_info(dataset_name, group_name = None, file_format="json"): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/traits/<path:dataset_name>".format(version), methods=("GET",)) @app.route("/api/v_{}/traits/<path:dataset_name>.<path:file_format>".format(version), methods=("GET",)) -def fetch_traits(dataset_name, file_format = "json"): - trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name, request.args) +def fetch_traits(dataset_name, file_format="json"): + trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids( + dataset_name, request.args) if ("ids_only" in request.args) and (len(trait_ids) > 0): if file_format == "json": filename = dataset_name + "_trait_ids.json" @@ -353,7 +363,8 @@ def fetch_traits(dataset_name, file_format = "json"): ProbeSet.Id """ - field_list = ["Id", "Name", "Symbol", "Description", "Chr", "Mb", "Aliases", "Mean", "SE", "Locus", "LRS", "P-Value", "Additive", "h2"] + field_list = ["Id", "Name", "Symbol", "Description", "Chr", "Mb", + "Aliases", "Mean", "SE", "Locus", "LRS", "P-Value", "Additive", "h2"] elif data_type == "Geno": query = """ SELECT @@ -370,7 +381,8 @@ def fetch_traits(dataset_name, file_format = "json"): Geno.Id """ - field_list = ["Id", "Name", "Marker_Name", "Chr", "Mb", "Sequence", "Source"] + field_list = ["Id", "Name", "Marker_Name", + "Chr", "Mb", "Sequence", "Source"] else: query = """ SELECT @@ -386,7 +398,8 @@ def fetch_traits(dataset_name, file_format = "json"): PublishXRef.Id """ - field_list = ["Id", "PhenotypeId", "PublicationId", "Locus", "LRS", "Additive", "Sequence"] + field_list = ["Id", "PhenotypeId", "PublicationId", + "Locus", "LRS", "Additive", "Sequence"] if 'limit_to' in request.args: limit_number = request.args['limit_to'] @@ -430,10 +443,12 @@ def fetch_traits(dataset_name, file_format = "json"): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/sample_data/<path:dataset_name>".format(version)) @app.route("/api/v_{}/sample_data/<path:dataset_name>.<path:file_format>".format(version)) -def all_sample_data(dataset_name, file_format = "csv"): - trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name, request.args) +def all_sample_data(dataset_name, file_format="csv"): + trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids( + dataset_name, request.args) if len(trait_ids) > 0: sample_list = get_samplelist(dataset_name) @@ -536,9 +551,10 @@ def all_sample_data(dataset_name, file_format = "csv"): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>".format(version)) @app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version)) -def trait_sample_data(dataset_name, trait_name, file_format = "json"): +def trait_sample_data(dataset_name, trait_name, file_format="json"): probeset_query = """ SELECT Strain.Name, Strain.Name2, ProbeSetData.value, ProbeSetData.Id, ProbeSetSE.error @@ -563,10 +579,10 @@ def trait_sample_data(dataset_name, trait_name, file_format = "json"): sample_list = [] for sample in sample_data: sample_dict = { - "sample_name": sample[0], - "sample_name_2": sample[1], - "value": sample[2], - "data_id": sample[3], + "sample_name": sample[0], + "sample_name_2": sample[1], + "value": sample[2], + "data_id": sample[3], } if sample[4]: sample_dict["se"] = sample[4] @@ -610,10 +626,10 @@ def trait_sample_data(dataset_name, trait_name, file_format = "json"): sample_list = [] for sample in sample_data: sample_dict = { - "sample_name" : sample[0], - "sample_name_2" : sample[1], - "value" : sample[2], - "data_id" : sample[3] + "sample_name": sample[0], + "sample_name_2": sample[1], + "value": sample[2], + "data_id": sample[3] } if sample[4]: sample_dict["se"] = sample[4] @@ -623,13 +639,14 @@ def trait_sample_data(dataset_name, trait_name, file_format = "json"): return flask.jsonify(sample_list) else: - return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/trait/<path:dataset_name>/<path:trait_name>".format(version)) @app.route("/api/v_{}/trait/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version)) @app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>".format(version)) @app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version)) -def get_trait_info(dataset_name, trait_name, file_format = "json"): +def get_trait_info(dataset_name, trait_name, file_format="json"): probeset_query = """ SELECT ProbeSet.Id, ProbeSet.Name, ProbeSet.Symbol, ProbeSet.description, ProbeSet.Chr, ProbeSet.Mb, ProbeSet.alias, @@ -648,26 +665,27 @@ def get_trait_info(dataset_name, trait_name, file_format = "json"): trait_info = probeset_results.fetchone() if trait_info: trait_dict = { - "id" : trait_info[0], - "name" : trait_info[1], - "symbol" : trait_info[2], - "description" : trait_info[3], - "chr" : trait_info[4], - "mb" : trait_info[5], - "alias" :trait_info[6], - "mean" : trait_info[7], - "se" : trait_info[8], - "locus" : trait_info[9], - "lrs" : trait_info[10], - "p_value" : trait_info[11], - "additive" : trait_info[12] + "id": trait_info[0], + "name": trait_info[1], + "symbol": trait_info[2], + "description": trait_info[3], + "chr": trait_info[4], + "mb": trait_info[5], + "alias": trait_info[6], + "mean": trait_info[7], + "se": trait_info[8], + "locus": trait_info[9], + "lrs": trait_info[10], + "p_value": trait_info[11], + "additive": trait_info[12] } return flask.jsonify(trait_dict) else: - if "Publish" in dataset_name: #ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish" + # ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish" + if "Publish" in dataset_name: dataset_name = dataset_name.replace("Publish", "") - + group_id = get_group_id(dataset_name) pheno_query = """ SELECT @@ -684,25 +702,28 @@ def get_trait_info(dataset_name, trait_name, file_format = "json"): trait_info = pheno_results.fetchone() if trait_info: trait_dict = { - "id" : trait_info[0], - "locus" : trait_info[1], - "lrs" : trait_info[2], - "additive" : trait_info[3] + "id": trait_info[0], + "locus": trait_info[1], + "lrs": trait_info[2], + "additive": trait_info[3] } return flask.jsonify(trait_dict) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/correlation".format(version), methods=("GET",)) def get_corr_results(): results = correlation.do_correlation(request.args) if len(results) > 0: - return flask.jsonify(results) #ZS: I think flask.jsonify expects a dict/list instead of JSON + # ZS: I think flask.jsonify expects a dict/list instead of JSON + return flask.jsonify(results) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/mapping".format(version), methods=("GET",)) def get_mapping_results(): results, format = mapping.do_mapping_for_api(request.args) @@ -726,6 +747,7 @@ def get_mapping_results(): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>/<string:dataset_name>.zip".format(version)) @app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>/<string:dataset_name>".format(version)) @app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>.zip".format(version)) @@ -754,7 +776,8 @@ def get_genotypes(group_name, file_format="csv", dataset_name=None): output_lines.append(line.split()) i += 1 - csv_writer = csv.writer(si, delimiter = "\t", escapechar = "\\", quoting = csv.QUOTE_NONE) + csv_writer = csv.writer( + si, delimiter="\t", escapechar="\\", quoting=csv.QUOTE_NONE) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") elif file_format == "rqtl2": @@ -765,18 +788,23 @@ def get_genotypes(group_name, file_format="csv", dataset_name=None): filename = group_name if os.path.isfile("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name)): - yaml_file = json.load(open("{0}/{1}.json".format(flat_files("genotype/rqtl2"), group_name))) + yaml_file = json.load( + open("{0}/{1}.json".format(flat_files("genotype/rqtl2"), group_name))) yaml_file["geno"] = filename + "_geno.csv" yaml_file["gmap"] = filename + "_gmap.csv" yaml_file["pheno"] = filename + "_pheno.csv" config_file = [filename + ".json", json.dumps(yaml_file)] #config_file = [filename + ".yaml", open("{0}/{1}.yaml".format(flat_files("genotype/rqtl2"), group_name))] - geno_file = [filename + "_geno.csv", open("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name))] - gmap_file = [filename + "_gmap.csv", open("{0}/{1}_gmap.csv".format(flat_files("genotype/rqtl2"), group_name))] + geno_file = [filename + "_geno.csv", + open("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name))] + gmap_file = [filename + "_gmap.csv", + open("{0}/{1}_gmap.csv".format(flat_files("genotype/rqtl2"), group_name))] if dataset_name: - phenotypes = requests.get("http://gn2.genenetwork.org/api/v_pre1/sample_data/" + dataset_name) + phenotypes = requests.get( + "http://gn2.genenetwork.org/api/v_pre1/sample_data/" + dataset_name) else: - phenotypes = requests.get("http://gn2.genenetwork.org/api/v_pre1/sample_data/" + group_name + "Publish") + phenotypes = requests.get( + "http://gn2.genenetwork.org/api/v_pre1/sample_data/" + group_name + "Publish") with ZipFile(memory_file, 'w', compression=ZIP_DEFLATED) as zf: zf.writestr(config_file[0], config_file[1]) @@ -799,10 +827,11 @@ def get_genotypes(group_name, file_format="csv", dataset_name=None): for line in genofile: if limit_num and i >= limit_num: break - output_lines.append([line.strip() for line in line.split(",")]) + output_lines.append([line.strip() + for line in line.split(",")]) i += 1 - csv_writer = csv.writer(si, delimiter = ",") + csv_writer = csv.writer(si, delimiter=",") else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") @@ -813,6 +842,7 @@ def get_genotypes(group_name, file_format="csv", dataset_name=None): return output + @app.route("/api/v_{}/gen_dropdown".format(version), methods=("GET",)) def gen_dropdown_menu(): results = gen_menu.gen_dropdown_json() @@ -822,18 +852,20 @@ def gen_dropdown_menu(): else: return return_error(code=500, source=request.url_rule.rule, title="Some error occurred", details="") + def return_error(code, source, title, details): json_ob = {"errors": [ { "status": code, - "source": { "pointer": source }, - "title" : title, + "source": {"pointer": source}, + "title": title, "detail": details } ]} return flask.jsonify(json_ob) + def get_dataset_trait_ids(dataset_name, start_vars): if 'limit_to' in start_vars: @@ -842,8 +874,8 @@ def get_dataset_trait_ids(dataset_name, start_vars): limit_string = "" if "Geno" in dataset_name: - data_type = "Geno" #ZS: Need to pass back the dataset type - query = """ + data_type = "Geno" # ZS: Need to pass back the dataset type + query = """ SELECT GenoXRef.GenoId, Geno.Name, GenoXRef.GenoFreezeId FROM @@ -866,7 +898,7 @@ def get_dataset_trait_ids(dataset_name, start_vars): data_type = "Publish" dataset_name = dataset_name.replace("Publish", "") dataset_id = get_group_id(dataset_name) - + query = """ SELECT PublishXRef.PhenotypeId, PublishXRef.Id, InbredSet.InbredSetCode @@ -881,7 +913,8 @@ def get_dataset_trait_ids(dataset_name, start_vars): results = g.db.execute(query).fetchall() trait_ids = [result[0] for result in results] - trait_names = [str(result[2]) + "_" + str(result[1]) for result in results] + trait_names = [str(result[2]) + "_" + str(result[1]) + for result in results] return trait_ids, trait_names, data_type, dataset_id @@ -906,6 +939,7 @@ def get_dataset_trait_ids(dataset_name, start_vars): dataset_id = results[0][2] return trait_ids, trait_names, data_type, dataset_id + def get_samplelist(dataset_name): group_id = get_group_id_from_dataset(dataset_name) @@ -915,13 +949,14 @@ def get_samplelist(dataset_name): WHERE StrainXRef.StrainId = Strain.Id AND StrainXRef.InbredSetId = {} """.format(group_id) - + results = g.db.execute(query).fetchall() - + samplelist = [result[0] for result in results] return samplelist + def get_group_id_from_dataset(dataset_name): if "Publish" in dataset_name: query = """ @@ -962,6 +997,7 @@ def get_group_id_from_dataset(dataset_name): else: return None + def get_group_id(group_name): query = """ SELECT InbredSet.Id diff --git a/wqflask/wqflask/collect.py b/wqflask/wqflask/collect.py index 0291f2b8..01274ba9 100644 --- a/wqflask/wqflask/collect.py +++ b/wqflask/wqflask/collect.py @@ -35,11 +35,12 @@ def process_traits(unprocessed_traits): data, _separator, the_hmac = trait.rpartition(':') data = data.strip() if g.user_session.logged_in: - assert the_hmac == hmac.hmac_creation(data), "Data tampering?" + assert the_hmac == hmac.hmac_creation(data), "Data tampering?" traits.add(str(data)) return traits + def report_change(len_before, len_now): new_length = len_now - len_before if new_length: @@ -48,16 +49,18 @@ def report_change(len_before, len_now): else: logger.debug("No new traits were added.") + @app.route("/collections/store_trait_list", methods=('POST',)) def store_traits_list(): - params = request.form + params = request.form - traits = params['traits'] - hash = params['hash'] + traits = params['traits'] + hash = params['hash'] - Redis.set(hash, traits) + Redis.set(hash, traits) + + return hash - return hash @app.route("/collections/add") def collections_add(): @@ -68,19 +71,20 @@ def collections_add(): uc_id = g.user_session.add_collection(collection_name, set()) collections = g.user_session.user_collections - #ZS: One of these might be unnecessary + # ZS: One of these might be unnecessary if 'traits' in request.args: - traits=request.args['traits'] + traits = request.args['traits'] return render_template("collections/add.html", - traits = traits, - collections = collections, - ) + traits=traits, + collections=collections, + ) else: hash = request.args['hash'] return render_template("collections/add.html", - hash = hash, - collections = collections, - ) + hash=hash, + collections=collections, + ) + @app.route("/collections/new") def collections_new(): @@ -118,6 +122,7 @@ def collections_new(): # CauseAnError pass + def create_new(collection_name): params = request.args @@ -133,15 +138,17 @@ def create_new(collection_name): return redirect(url_for('view_collection', uc_id=uc_id)) + @app.route("/collections/list") def list_collections(): params = request.args user_collections = list(g.user_session.user_collections) return render_template("collections/list.html", - params = params, - collections = user_collections, - ) + params=params, + collections=user_collections, + ) + @app.route("/collections/remove", methods=('POST',)) def remove_traits(): @@ -151,7 +158,8 @@ def remove_traits(): traits_to_remove = params['trait_list'] traits_to_remove = process_traits(traits_to_remove) - members_now = g.user_session.remove_traits_from_collection(uc_id, traits_to_remove) + members_now = g.user_session.remove_traits_from_collection( + uc_id, traits_to_remove) return redirect(url_for("view_collection", uc_id=uc_id)) @@ -174,7 +182,8 @@ def delete_collection(): else: flash("We've deleted the selected collection.", "alert-info") else: - flash("We've deleted the collection: {}.".format(collection_name), "alert-info") + flash("We've deleted the collection: {}.".format( + collection_name), "alert-info") return redirect(url_for('list_collections')) @@ -184,7 +193,8 @@ def view_collection(): params = request.args uc_id = params['uc_id'] - uc = next((collection for collection in g.user_session.user_collections if collection["id"] == uc_id)) + uc = next( + (collection for collection in g.user_session.user_collections if collection["id"] == uc_id)) traits = uc["members"] trait_obs = [] @@ -196,25 +206,28 @@ def view_collection(): name, dataset_name = atrait.split(':') if dataset_name == "Temp": group = name.split("_")[2] - dataset = create_dataset(dataset_name, dataset_type = "Temp", group_name = group) + dataset = create_dataset( + dataset_name, dataset_type="Temp", group_name=group) trait_ob = create_trait(name=name, dataset=dataset) else: dataset = create_dataset(dataset_name) trait_ob = create_trait(name=name, dataset=dataset) - trait_ob = retrieve_trait_info(trait_ob, dataset, get_qtl_info=True) + trait_ob = retrieve_trait_info( + trait_ob, dataset, get_qtl_info=True) trait_obs.append(trait_ob) json_version.append(jsonable(trait_ob)) collection_info = dict(trait_obs=trait_obs, - uc = uc) + uc=uc) if "json" in params: return json.dumps(json_version) else: return render_template("collections/view.html", - **collection_info - ) + **collection_info + ) + @app.route("/collections/change_name", methods=('POST',)) def change_collection_name(): @@ -226,4 +239,3 @@ def change_collection_name(): g.user_session.change_collection_name(collection_id, new_name) return new_name - diff --git a/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py b/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py index 5855ccf0..cb88eb53 100644 --- a/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py +++ b/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -23,7 +23,7 @@ from pprint import pformat as pf from base.trait import create_trait from base import data_set from utility import webqtlUtil, helper_functions, corr_result_helpers -import utility.webqtlUtil #this is for parallel computing only. +import utility.webqtlUtil # this is for parallel computing only. from wqflask.correlation import correlation_functions from MySQLdb import escape_string as escape @@ -34,16 +34,18 @@ from flask import Flask, g class ComparisonBarChart: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.all_sample_list = [] self.traits = [] self.insufficient_shared_samples = False - this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop + # ZS: Getting initial group name before verifying all traits are in the same group in the following loop + this_group = self.trait_list[0][1].group.name for trait_db in self.trait_list: - + if trait_db[1].group.name != this_group: self.insufficient_shared_samples = True break @@ -51,7 +53,7 @@ class ComparisonBarChart: this_group = trait_db[1].group.name this_trait = trait_db[0] self.traits.append(this_trait) - + this_sample_data = this_trait.data for sample in this_sample_data: @@ -74,10 +76,10 @@ class ComparisonBarChart: this_trait_vals.append('') self.sample_data.append(this_trait_vals) - self.js_data = dict(traits = [trait.name for trait in self.traits], - samples = self.all_sample_list, - sample_data = self.sample_data,) - + self.js_data = dict(traits=[trait.name for trait in self.traits], + samples=self.all_sample_list, + sample_data=self.sample_data,) + def get_trait_db_obs(self, trait_db_list): self.trait_list = [] @@ -88,9 +90,8 @@ class ComparisonBarChart: #print("dataset_name:", dataset_name) dataset_ob = data_set.create_dataset(dataset_name) trait_ob = create_trait(dataset=dataset_ob, - name=trait_name, - cellid=None) + name=trait_name, + cellid=None) self.trait_list.append((trait_ob, dataset_ob)) #print("trait_list:", self.trait_list) - diff --git a/wqflask/wqflask/correlation/corr_scatter_plot.py b/wqflask/wqflask/correlation/corr_scatter_plot.py index d5dc26f5..cafb9265 100644 --- a/wqflask/wqflask/correlation/corr_scatter_plot.py +++ b/wqflask/wqflask/correlation/corr_scatter_plot.py @@ -9,24 +9,29 @@ from scipy import stats import numpy as np import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + class CorrScatterPlot: """Page that displays a correlation scatterplot with a line fitted to it""" def __init__(self, params): if "Temp" in params['dataset_1']: - self.dataset_1 = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = params['dataset_1'].split("_")[1]) + self.dataset_1 = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=params['dataset_1'].split("_")[1]) else: self.dataset_1 = data_set.create_dataset(params['dataset_1']) if "Temp" in params['dataset_2']: - self.dataset_2 = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = params['dataset_2'].split("_")[1]) + self.dataset_2 = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=params['dataset_2'].split("_")[1]) else: self.dataset_2 = data_set.create_dataset(params['dataset_2']) #self.dataset_3 = data_set.create_dataset(params['dataset_3']) - self.trait_1 = create_trait(name=params['trait_1'], dataset=self.dataset_1) - self.trait_2 = create_trait(name=params['trait_2'], dataset=self.dataset_2) + self.trait_1 = create_trait( + name=params['trait_1'], dataset=self.dataset_1) + self.trait_2 = create_trait( + name=params['trait_2'], dataset=self.dataset_2) #self.trait_3 = create_trait(name=params['trait_3'], dataset=self.dataset_3) self.method = params['method'] @@ -37,10 +42,13 @@ class CorrScatterPlot: if self.dataset_1.group.f1list != None: primary_samples += self.dataset_1.group.f1list - self.trait_1 = retrieve_sample_data(self.trait_1, self.dataset_1, primary_samples) - self.trait_2 = retrieve_sample_data(self.trait_2, self.dataset_2, primary_samples) + self.trait_1 = retrieve_sample_data( + self.trait_1, self.dataset_1, primary_samples) + self.trait_2 = retrieve_sample_data( + self.trait_2, self.dataset_2, primary_samples) - samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples(self.trait_1.data, self.trait_2.data) + samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples( + self.trait_1.data, self.trait_2.data) self.data = [] self.indIDs = list(samples_1.keys()) @@ -53,72 +61,76 @@ class CorrScatterPlot: vals_2.append(samples_2[sample].value) self.data.append(vals_2) - slope, intercept, r_value, p_value, std_err = stats.linregress(vals_1, vals_2) + slope, intercept, r_value, p_value, std_err = stats.linregress( + vals_1, vals_2) if slope < 0.001: slope_string = '%.3E' % slope else: slope_string = '%.3f' % slope - - x_buffer = (max(vals_1) - min(vals_1))*0.1 - y_buffer = (max(vals_2) - min(vals_2))*0.1 + + x_buffer = (max(vals_1) - min(vals_1)) * 0.1 + y_buffer = (max(vals_2) - min(vals_2)) * 0.1 x_range = [min(vals_1) - x_buffer, max(vals_1) + x_buffer] y_range = [min(vals_2) - y_buffer, max(vals_2) + y_buffer] - intercept_coords = get_intercept_coords(slope, intercept, x_range, y_range) + intercept_coords = get_intercept_coords( + slope, intercept, x_range, y_range) rx = stats.rankdata(vals_1) ry = stats.rankdata(vals_2) self.rdata = [] self.rdata.append(rx.tolist()) - self.rdata.append(ry.tolist()) - srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress(rx, ry) + self.rdata.append(ry.tolist()) + srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress( + rx, ry) if srslope < 0.001: srslope_string = '%.3E' % srslope else: srslope_string = '%.3f' % srslope - x_buffer = (max(rx) - min(rx))*0.1 - y_buffer = (max(ry) - min(ry))*0.1 + x_buffer = (max(rx) - min(rx)) * 0.1 + y_buffer = (max(ry) - min(ry)) * 0.1 sr_range = [min(rx) - x_buffer, max(rx) + x_buffer] - sr_intercept_coords = get_intercept_coords(srslope, srintercept, sr_range, sr_range) + sr_intercept_coords = get_intercept_coords( + srslope, srintercept, sr_range, sr_range) self.collections_exist = "False" if g.user_session.num_collections > 0: self.collections_exist = "True" self.js_data = dict( - data = self.data, - rdata = self.rdata, - indIDs = self.indIDs, - trait_1 = self.trait_1.dataset.name + ": " + str(self.trait_1.name), - trait_2 = self.trait_2.dataset.name + ": " + str(self.trait_2.name), - samples_1 = samples_1, - samples_2 = samples_2, - num_overlap = num_overlap, - vals_1 = vals_1, - vals_2 = vals_2, - x_range = x_range, - y_range = y_range, - sr_range = sr_range, - intercept_coords = intercept_coords, - sr_intercept_coords = sr_intercept_coords, - - slope = slope, - slope_string = slope_string, - intercept = intercept, - r_value = r_value, - p_value = p_value, - - srslope = srslope, - srslope_string = srslope_string, - srintercept = srintercept, - srr_value = srr_value, - srp_value = srp_value + data=self.data, + rdata=self.rdata, + indIDs=self.indIDs, + trait_1=self.trait_1.dataset.name + ": " + str(self.trait_1.name), + trait_2=self.trait_2.dataset.name + ": " + str(self.trait_2.name), + samples_1=samples_1, + samples_2=samples_2, + num_overlap=num_overlap, + vals_1=vals_1, + vals_2=vals_2, + x_range=x_range, + y_range=y_range, + sr_range=sr_range, + intercept_coords=intercept_coords, + sr_intercept_coords=sr_intercept_coords, + + slope=slope, + slope_string=slope_string, + intercept=intercept, + r_value=r_value, + p_value=p_value, + + srslope=srslope, + srslope_string=srslope_string, + srintercept=srintercept, + srr_value=srr_value, + srp_value=srp_value #trait3 = self.trait_3.data, #vals_3 = vals_3 @@ -129,10 +141,10 @@ class CorrScatterPlot: def get_intercept_coords(slope, intercept, x_range, y_range): intercept_coords = [] - y1 = slope*x_range[0] + intercept - y2 = slope*x_range[1] + intercept - x1 = (y1-intercept)/slope - x2 = (y2-intercept)/slope + y1 = slope * x_range[0] + intercept + y2 = slope * x_range[1] + intercept + x1 = (y1 - intercept) / slope + x2 = (y2 - intercept) / slope intercept_coords.append([x1, y1]) intercept_coords.append([x2, y2]) diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py index fd7691d4..0f24241a 100644 --- a/wqflask/wqflask/correlation/correlation_functions.py +++ b/wqflask/wqflask/correlation/correlation_functions.py @@ -34,19 +34,19 @@ from flask import Flask, g ##################################################################################### -#Input: primaryValue(list): one list of expression values of one probeSet, +# Input: primaryValue(list): one list of expression values of one probeSet, # targetValue(list): one list of expression values of one probeSet, # method(string): indicate correlation method ('pearson' or 'spearman') -#Output: corr_result(list): first item is Correlation Value, second item is tissue number, +# Output: corr_result(list): first item is Correlation Value, second item is tissue number, # third item is PValue -#Function: get correlation value,Tissue quantity ,p value result by using R; -#Note : This function is special case since both primaryValue and targetValue are from -#the same dataset. So the length of these two parameters is the same. They are pairs. -#Also, in the datatable TissueProbeSetData, all Tissue values are loaded based on -#the same tissue order +# Function: get correlation value,Tissue quantity ,p value result by using R; +# Note : This function is special case since both primaryValue and targetValue are from +# the same dataset. So the length of these two parameters is the same. They are pairs. +# Also, in the datatable TissueProbeSetData, all Tissue values are loaded based on +# the same tissue order ##################################################################################### -def cal_zero_order_corr_for_tiss (primaryValue=[], targetValue=[], method='pearson'): +def cal_zero_order_corr_for_tiss(primaryValue=[], targetValue=[], method='pearson'): R_primary = rpy2.robjects.FloatVector(list(range(len(primaryValue)))) N = len(primaryValue) @@ -55,27 +55,27 @@ def cal_zero_order_corr_for_tiss (primaryValue=[], targetValue=[], method='pears R_target = rpy2.robjects.FloatVector(list(range(len(targetValue)))) for i in range(len(targetValue)): - R_target[i]=targetValue[i] + R_target[i] = targetValue[i] R_corr_test = rpy2.robjects.r['cor.test'] - if method =='spearman': + if method == 'spearman': R_result = R_corr_test(R_primary, R_target, method='spearman') else: R_result = R_corr_test(R_primary, R_target) - corr_result =[] - corr_result.append( R_result[3][0]) - corr_result.append( N ) - corr_result.append( R_result[2][0]) + corr_result = [] + corr_result.append(R_result[3][0]) + corr_result.append(N) + corr_result.append(R_result[2][0]) return corr_result ######################################################################################################## -#input: cursor, symbolList (list), dataIdDict(Dict): key is symbol -#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair. +# input: cursor, symbolList (list), dataIdDict(Dict): key is symbol +# output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair. # key is symbol, value is one list of expression values of one probeSet. -#function: wrapper function for getSymbolValuePairDict function +# function: wrapper function for getSymbolValuePairDict function # build gene symbol list if necessary, cut it into small lists if necessary, # then call getSymbolValuePairDict function and merge the results. ######################################################################################################## diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index cb341e79..9b0b6118 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -78,11 +78,12 @@ class CorrelationResults: with Bench("Doing correlations"): if start_vars['dataset'] == "Temp": - self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group']) + self.dataset = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=start_vars['group']) self.trait_id = start_vars['trait_id'] self.this_trait = create_trait(dataset=self.dataset, - name=self.trait_id, - cellid=None) + name=self.trait_id, + cellid=None) else: helper_functions.get_species_dataset_trait(self, start_vars) @@ -95,9 +96,9 @@ class CorrelationResults: self.p_range_lower = get_float(start_vars, 'p_range_lower', -1.0) self.p_range_upper = get_float(start_vars, 'p_range_upper', 1.0) - if ('loc_chr' in start_vars and - 'min_loc_mb' in start_vars and - 'max_loc_mb' in start_vars): + if ('loc_chr' in start_vars + and 'min_loc_mb' in start_vars + and 'max_loc_mb' in start_vars): self.location_type = get_string(start_vars, 'location_type') self.location_chr = get_string(start_vars, 'loc_chr') @@ -109,8 +110,8 @@ class CorrelationResults: self.get_formatted_corr_type() self.return_number = int(start_vars['corr_return_results']) - #The two if statements below append samples to the sample list based upon whether the user - #rselected Primary Samples Only, Other Samples Only, or All Samples + # The two if statements below append samples to the sample list based upon whether the user + # rselected Primary Samples Only, Other Samples Only, or All Samples primary_samples = self.dataset.group.samplelist if self.dataset.group.parlist != None: @@ -118,23 +119,26 @@ class CorrelationResults: if self.dataset.group.f1list != None: primary_samples += self.dataset.group.f1list - #If either BXD/whatever Only or All Samples, append all of that group's samplelist + # If either BXD/whatever Only or All Samples, append all of that group's samplelist if corr_samples_group != 'samples_other': self.process_samples(start_vars, primary_samples) - #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and - #exclude the primary samples (because they would have been added in the previous - #if statement if the user selected All Samples) + # If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and + # exclude the primary samples (because they would have been added in the previous + # if statement if the user selected All Samples) if corr_samples_group != 'samples_primary': if corr_samples_group == 'samples_other': primary_samples = [x for x in primary_samples if x not in ( - self.dataset.group.parlist + self.dataset.group.f1list)] - self.process_samples(start_vars, list(self.this_trait.data.keys()), primary_samples) + self.dataset.group.parlist + self.dataset.group.f1list)] + self.process_samples(start_vars, list( + self.this_trait.data.keys()), primary_samples) - self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) + self.target_dataset = data_set.create_dataset( + start_vars['corr_dataset']) self.target_dataset.get_trait_data(list(self.sample_data.keys())) - self.header_fields = get_header_fields(self.target_dataset.type, self.corr_method) + self.header_fields = get_header_fields( + self.target_dataset.type, self.corr_method) if self.target_dataset.type == "ProbeSet": self.filter_cols = [7, 6] @@ -153,7 +157,8 @@ class CorrelationResults: tissue_corr_data = self.do_tissue_correlation_for_all_traits() if tissue_corr_data != None: for trait in list(tissue_corr_data.keys())[:self.return_number]: - self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) + self.get_sample_r_and_p_values( + trait, self.target_dataset.trait_data[trait]) else: for trait, values in list(self.target_dataset.trait_data.items()): self.get_sample_r_and_p_values(trait, values) @@ -163,7 +168,8 @@ class CorrelationResults: lit_corr_data = self.do_lit_correlation_for_all_traits() for trait in list(lit_corr_data.keys())[:self.return_number]: - self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) + self.get_sample_r_and_p_values( + trait, self.target_dataset.trait_data[trait]) elif self.corr_type == "sample": for trait, values in list(self.target_dataset.trait_data.items()): @@ -172,8 +178,7 @@ class CorrelationResults: self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()), key=lambda t: -abs(t[1][0]))) - - #ZS: Convert min/max chromosome to an int for the location range option + # ZS: Convert min/max chromosome to an int for the location range option range_chr_as_int = None for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): if 'loc_chr' in start_vars: @@ -181,7 +186,8 @@ class CorrelationResults: range_chr_as_int = order_id for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]): - trait_object = create_trait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) + trait_object = create_trait( + dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) if not trait_object: continue @@ -194,8 +200,8 @@ class CorrelationResults: if chr_info.name == trait_object.chr: chr_as_int = order_id - if (float(self.correlation_data[trait][0]) >= self.p_range_lower and - float(self.correlation_data[trait][0]) <= self.p_range_upper): + if (float(self.correlation_data[trait][0]) >= self.p_range_lower + and float(self.correlation_data[trait][0]) <= self.p_range_upper): if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean): if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr): @@ -215,8 +221,8 @@ class CorrelationResults: continue (trait_object.sample_r, - trait_object.sample_p, - trait_object.num_overlap) = self.correlation_data[trait] + trait_object.sample_p, + trait_object.num_overlap) = self.correlation_data[trait] # Set some sane defaults trait_object.tissue_corr = 0 @@ -236,7 +242,8 @@ class CorrelationResults: if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": self.do_tissue_correlation_for_trait_list() - self.json_results = generate_corr_json(self.correlation_results, self.this_trait, self.dataset, self.target_dataset) + self.json_results = generate_corr_json( + self.correlation_results, self.this_trait, self.dataset, self.target_dataset) ############################################################################################################################################ @@ -259,39 +266,43 @@ class CorrelationResults: def do_tissue_correlation_for_trait_list(self, tissue_dataset_id=1): """Given a list of correlation results (self.correlation_results), gets the tissue correlation value for each""" - #Gets tissue expression values for the primary trait + # Gets tissue expression values for the primary trait primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list = [self.this_trait.symbol]) + symbol_list=[self.this_trait.symbol]) if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] - gene_symbol_list = [trait.symbol for trait in self.correlation_results if trait.symbol] + primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower( + )] + gene_symbol_list = [ + trait.symbol for trait in self.correlation_results if trait.symbol] - corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list=gene_symbol_list) + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=gene_symbol_list) for trait in self.correlation_results: if trait.symbol and trait.symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower()] + this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower( + )] result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, - this_trait_tissue_values, - self.corr_method) + this_trait_tissue_values, + self.corr_method) trait.tissue_corr = result[0] trait.tissue_pvalue = result[2] def do_tissue_correlation_for_all_traits(self, tissue_dataset_id=1): - #Gets tissue expression values for the primary trait + # Gets tissue expression values for the primary trait primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list = [self.this_trait.symbol]) + symbol_list=[self.this_trait.symbol]) if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] + primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower( + )] #print("trait_gene_symbols: ", pf(trait_gene_symbols.values())) - corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list=list(self.trait_symbol_dict.values())) + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=list(self.trait_symbol_dict.values())) #print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict)) @@ -300,27 +311,30 @@ class CorrelationResults: tissue_corr_data = {} for trait, symbol in list(self.trait_symbol_dict.items()): if symbol and symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] + this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower( + )] result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, - this_trait_tissue_values, - self.corr_method) + this_trait_tissue_values, + self.corr_method) tissue_corr_data[trait] = [symbol, result[0], result[2]] tissue_corr_data = collections.OrderedDict(sorted(list(tissue_corr_data.items()), - key=lambda t: -abs(t[1][1]))) + key=lambda t: -abs(t[1][1]))) return tissue_corr_data def do_lit_correlation_for_trait_list(self): - input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) + input_trait_mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), self.this_trait.geneid) for trait in self.correlation_results: if trait.geneid: - trait.mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), trait.geneid) + trait.mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), trait.geneid) else: trait.mouse_gene_id = None @@ -348,13 +362,14 @@ class CorrelationResults: else: trait.lit_corr = 0 - def do_lit_correlation_for_all_traits(self): - input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) + input_trait_mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), self.this_trait.geneid) lit_corr_data = {} for trait, gene_id in list(self.trait_geneid_dict.items()): - mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), gene_id) + mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), gene_id) if mouse_gene_id and str(mouse_gene_id).find(";") == -1: #print("gene_symbols:", input_trait_mouse_gene_id + " / " + mouse_gene_id) @@ -382,7 +397,7 @@ class CorrelationResults: lit_corr_data[trait] = [gene_id, 0] lit_corr_data = collections.OrderedDict(sorted(list(lit_corr_data.items()), - key=lambda t: -abs(t[1][1]))) + key=lambda t: -abs(t[1][1]))) return lit_corr_data @@ -440,21 +455,26 @@ class CorrelationResults: self.this_trait_vals.append(sample_value) target_vals.append(target_sample_value) - self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(self.this_trait_vals, target_vals) + self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( + self.this_trait_vals, target_vals) if num_overlap > 5: - #ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/ + # ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/ if self.corr_method == 'bicor': - sample_r, sample_p = do_bicor(self.this_trait_vals, target_vals) + sample_r, sample_p = do_bicor( + self.this_trait_vals, target_vals) elif self.corr_method == 'pearson': - sample_r, sample_p = scipy.stats.pearsonr(self.this_trait_vals, target_vals) + sample_r, sample_p = scipy.stats.pearsonr( + self.this_trait_vals, target_vals) else: - sample_r, sample_p = scipy.stats.spearmanr(self.this_trait_vals, target_vals) + sample_r, sample_p = scipy.stats.spearmanr( + self.this_trait_vals, target_vals) if numpy.isnan(sample_r): pass else: - self.correlation_data[trait] = [sample_r, sample_p, num_overlap] + self.correlation_data[trait] = [ + sample_r, sample_p, num_overlap] def process_samples(self, start_vars, sample_names, excluded_samples=None): if not excluded_samples: @@ -475,16 +495,18 @@ def do_bicor(this_trait_vals, target_trait_vals): r_library("WGCNA") r_bicor = ro.r["bicorAndPvalue"] # Map the bicorAndPvalue function - r_options(stringsAsFactors = False) + r_options(stringsAsFactors=False) this_vals = ro.Vector(this_trait_vals) target_vals = ro.Vector(target_trait_vals) - the_r, the_p, _fisher_transform, _the_t, _n_obs = [numpy.asarray(x) for x in r_bicor(x = this_vals, y = target_vals)] + the_r, the_p, _fisher_transform, _the_t, _n_obs = [ + numpy.asarray(x) for x in r_bicor(x=this_vals, y=target_vals)] return the_r, the_p -def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_api = False): + +def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_api=False): results_list = [] for i, trait in enumerate(corr_results): if trait.view == False: @@ -493,7 +515,8 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap results_dict['index'] = i + 1 results_dict['trait_id'] = trait.name results_dict['dataset'] = trait.dataset.name - results_dict['hmac'] = hmac.data_hmac('{}:{}'.format(trait.name, trait.dataset.name)) + results_dict['hmac'] = hmac.data_hmac( + '{}:{}'.format(trait.name, trait.dataset.name)) if target_dataset.type == "ProbeSet": results_dict['symbol'] = trait.symbol results_dict['description'] = "N/A" @@ -544,7 +567,8 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap if bool(trait.authors): authors_list = trait.authors.split(',') if len(authors_list) > 6: - results_dict['authors_display'] = ", ".join(authors_list[:6]) + ", et al." + results_dict['authors_display'] = ", ".join( + authors_list[:6]) + ", et al." else: results_dict['authors_display'] = trait.authors if bool(trait.pubmed_id): @@ -574,85 +598,85 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap return json.dumps(results_list) + def get_header_fields(data_type, corr_method): if data_type == "ProbeSet": if corr_method == "spearman": header_fields = ['Index', - 'Record', - 'Symbol', - 'Description', - 'Location', - 'Mean', - 'Sample rho', - 'N', - 'Sample p(rho)', - 'Lit rho', - 'Tissue rho', - 'Tissue p(rho)', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Symbol', + 'Description', + 'Location', + 'Mean', + 'Sample rho', + 'N', + 'Sample p(rho)', + 'Lit rho', + 'Tissue rho', + 'Tissue p(rho)', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] else: header_fields = ['Index', - 'Record', - 'Symbol', - 'Description', - 'Location', - 'Mean', - 'Sample r', - 'N', - 'Sample p(r)', - 'Lit r', - 'Tissue r', - 'Tissue p(r)', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Symbol', + 'Description', + 'Location', + 'Mean', + 'Sample r', + 'N', + 'Sample p(r)', + 'Lit r', + 'Tissue r', + 'Tissue p(r)', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] elif data_type == "Publish": if corr_method == "spearman": header_fields = ['Index', - 'Record', - 'Abbreviation', - 'Description', - 'Mean', - 'Authors', - 'Year', - 'Sample rho', - 'N', - 'Sample p(rho)', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Abbreviation', + 'Description', + 'Mean', + 'Authors', + 'Year', + 'Sample rho', + 'N', + 'Sample p(rho)', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] else: header_fields = ['Index', - 'Record', - 'Abbreviation', - 'Description', - 'Mean', - 'Authors', - 'Year', - 'Sample r', - 'N', - 'Sample p(r)', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Abbreviation', + 'Description', + 'Mean', + 'Authors', + 'Year', + 'Sample r', + 'N', + 'Sample p(r)', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] else: if corr_method == "spearman": header_fields = ['Index', - 'ID', - 'Location', - 'Sample rho', - 'N', - 'Sample p(rho)'] + 'ID', + 'Location', + 'Sample rho', + 'N', + 'Sample p(rho)'] else: header_fields = ['Index', - 'ID', - 'Location', - 'Sample r', - 'N', - 'Sample p(r)'] + 'ID', + 'Location', + 'Sample r', + 'N', + 'Sample p(r)'] return header_fields - diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index d0b4a156..3a54a218 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -44,7 +44,8 @@ THIRTY_DAYS = 60 * 60 * 24 * 30 class CorrelationMatrix: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) @@ -52,7 +53,8 @@ class CorrelationMatrix: self.traits = [] self.insufficient_shared_samples = False self.do_PCA = True - this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop + # ZS: Getting initial group name before verifying all traits are in the same group in the following loop + this_group = self.trait_list[0][1].group.name for trait_db in self.trait_list: this_group = trait_db[1].group.name this_trait = trait_db[0] @@ -76,10 +78,12 @@ class CorrelationMatrix: this_trait_vals.append('') self.sample_data.append(this_trait_vals) - if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples + # Shouldn't do PCA if there are more traits than observations/samples + if len(this_trait_vals) < len(self.trait_list): self.do_PCA = False - self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) + # ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) + self.lowest_overlap = 8 self.corr_results = [] self.pca_corr_results = [] @@ -93,7 +97,7 @@ class CorrelationMatrix: corr_result_row = [] pca_corr_result_row = [] - is_spearman = False #ZS: To determine if it's above or below the diagonal + is_spearman = False # ZS: To determine if it's above or below the diagonal for target in self.trait_list: target_trait = target[0] target_db = target[1] @@ -112,7 +116,8 @@ class CorrelationMatrix: if sample in self.shared_samples_list: self.shared_samples_list.remove(sample) - this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) + this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( + this_trait_vals, target_vals) if num_overlap < self.lowest_overlap: self.lowest_overlap = num_overlap @@ -120,21 +125,25 @@ class CorrelationMatrix: corr_result_row.append([target_trait, 0, num_overlap]) pca_corr_result_row.append(0) else: - pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) + pearson_r, pearson_p = scipy.stats.pearsonr( + this_trait_vals, target_vals) if is_spearman == False: sample_r, sample_p = pearson_r, pearson_p if sample_r == 1: is_spearman = True else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) + sample_r, sample_p = scipy.stats.spearmanr( + this_trait_vals, target_vals) - corr_result_row.append([target_trait, sample_r, num_overlap]) + corr_result_row.append( + [target_trait, sample_r, num_overlap]) pca_corr_result_row.append(pearson_r) self.corr_results.append(corr_result_row) self.pca_corr_results.append(pca_corr_result_row) - self.export_filename, self.export_filepath = export_corr_matrix(self.corr_results) + self.export_filename, self.export_filepath = export_corr_matrix( + self.corr_results) self.trait_data_array = [] for trait_db in self.trait_list: @@ -156,34 +165,37 @@ class CorrelationMatrix: try: corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) - corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen) + corr_eigen_value, corr_eigen_vectors = sortEigenVectors( + corr_result_eigen) if self.do_PCA == True: self.pca_works = "True" self.pca_trait_ids = [] - pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) + pca = self.calculate_pca( + list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) self.loadings_array = self.process_loadings() else: self.pca_works = "False" except: self.pca_works = "False" - self.js_data = dict(traits = [trait.name for trait in self.traits], - groups = groups, - cols = list(range(len(self.traits))), - rows = list(range(len(self.traits))), - samples = self.all_sample_list, - sample_data = self.sample_data,) + self.js_data = dict(traits=[trait.name for trait in self.traits], + groups=groups, + cols=list(range(len(self.traits))), + rows=list(range(len(self.traits))), + samples=self.all_sample_list, + sample_data=self.sample_data,) def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): base = importr('base') stats = importr('stats') - corr_results_to_list = robjects.FloatVector([item for sublist in self.pca_corr_results for item in sublist]) + corr_results_to_list = robjects.FloatVector( + [item for sublist in self.pca_corr_results for item in sublist]) m = robjects.r.matrix(corr_results_to_list, nrow=len(cols)) eigen = base.eigen(m) - pca = stats.princomp(m, cor = "TRUE") + pca = stats.princomp(m, cor="TRUE") self.loadings = pca.rx('loadings') self.scores = pca.rx('scores') self.scale = pca.rx('scale') @@ -193,15 +205,17 @@ class CorrelationMatrix: pca_traits = [] for i, vector in enumerate(trait_array_vectors): - #ZS: Check if below check is necessary - #if corr_eigen_value[i-1] > 100.0/len(self.trait_list): - pca_traits.append((vector*-1.0).tolist()) + # ZS: Check if below check is necessary + # if corr_eigen_value[i-1] > 100.0/len(self.trait_list): + pca_traits.append((vector * -1.0).tolist()) this_group_name = self.trait_list[0][1].group.name - temp_dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = this_group_name) + temp_dataset = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=this_group_name) temp_dataset.group.get_samplelist() for i, pca_trait in enumerate(pca_traits): - trait_id = "PCA" + str(i+1) + "_" + temp_dataset.group.species + "_" + this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") + trait_id = "PCA" + str(i + 1) + "_" + temp_dataset.group.species + "_" + \ + this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") this_vals_string = "" position = 0 for sample in temp_dataset.group.all_samples_ordered(): @@ -224,27 +238,34 @@ class CorrelationMatrix: for i in range(len(self.trait_list)): loadings_row = [] if len(self.trait_list) > 2: - the_range = 3 + the_range = 3 else: - the_range = 2 + the_range = 2 for j in range(the_range): - position = i + len(self.trait_list)*j + position = i + len(self.trait_list) * j loadings_row.append(self.loadings[0][position]) loadings_array.append(loadings_row) return loadings_array + def export_corr_matrix(corr_results): - corr_matrix_filename = "corr_matrix_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - matrix_export_path = "{}{}.csv".format(GENERATED_TEXT_DIR, corr_matrix_filename) + corr_matrix_filename = "corr_matrix_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + matrix_export_path = "{}{}.csv".format( + GENERATED_TEXT_DIR, corr_matrix_filename) with open(matrix_export_path, "w+") as output_file: - output_file.write("Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") + output_file.write( + "Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") output_file.write("\n") output_file.write("Correlation ") for i, item in enumerate(corr_results[0]): - output_file.write("Trait" + str(i + 1) + ": " + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") + output_file.write("Trait" + str(i + 1) + ": " + \ + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") output_file.write("\n") for i, row in enumerate(corr_results): - output_file.write("Trait" + str(i + 1) + ": " + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") + output_file.write("Trait" + str(i + 1) + ": " + \ + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") for item in row: output_file.write(str(item[1]) + "\t") output_file.write("\n") @@ -253,16 +274,19 @@ def export_corr_matrix(corr_results): output_file.write("\n") output_file.write("N ") for i, item in enumerate(corr_results[0]): - output_file.write("Trait" + str(i) + ": " + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") + output_file.write("Trait" + str(i) + ": " + \ + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") output_file.write("\n") for i, row in enumerate(corr_results): - output_file.write("Trait" + str(i) + ": " + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") + output_file.write("Trait" + str(i) + ": " + \ + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") for item in row: output_file.write(str(item[2]) + "\t") output_file.write("\n") return corr_matrix_filename, matrix_export_path + def zScore(trait_data_array): NN = len(trait_data_array[0]) if NN < 10: @@ -271,18 +295,19 @@ def zScore(trait_data_array): i = 0 for data in trait_data_array: N = len(data) - S = reduce(lambda x, y: x+y, data, 0.) - SS = reduce(lambda x, y: x+y*y, data, 0.) - mean = S/N - var = SS - S*S/N - stdev = math.sqrt(var/(N-1)) + S = reduce(lambda x, y: x + y, data, 0.) + SS = reduce(lambda x, y: x + y * y, data, 0.) + mean = S / N + var = SS - S * S / N + stdev = math.sqrt(var / (N - 1)) if stdev == 0: stdev = 1e-100 - data2 = [(x-mean)/stdev for x in data] + data2 = [(x - mean) / stdev for x in data] trait_data_array[i] = data2 i += 1 return trait_data_array + def sortEigenVectors(vector): try: eigenValues = vector[0].tolist() @@ -298,8 +323,8 @@ def sortEigenVectors(vector): for item in combines: A.append(item[0]) B.append(item[1]) - sum = reduce(lambda x, y: x+y, A, 0.0) - A = [x*100.0/sum for x in A] + sum = reduce(lambda x, y: x + y, A, 0.0) + A = [x * 100.0 / sum for x in A] return [A, B] except: return [] diff --git a/wqflask/wqflask/ctl/ctl_analysis.py b/wqflask/wqflask/ctl/ctl_analysis.py index 1556e370..bb928ec5 100644 --- a/wqflask/wqflask/ctl/ctl_analysis.py +++ b/wqflask/wqflask/ctl/ctl_analysis.py @@ -25,33 +25,39 @@ from utility.tools import locate, GN2_BRANCH_URL from rpy2.robjects.packages import importr import utility.logger -logger = utility.logger.getLogger(__name__ ) - -## Get pointers to some common R functions -r_library = ro.r["library"] # Map the library function -r_options = ro.r["options"] # Map the options function -r_t = ro.r["t"] # Map the t function -r_unlist = ro.r["unlist"] # Map the unlist function -r_list = ro.r.list # Map the list function -r_png = ro.r["png"] # Map the png function for plotting -r_dev_off = ro.r["dev.off"] # Map the dev.off function -r_write_table = ro.r["write.table"] # Map the write.table function -r_data_frame = ro.r["data.frame"] # Map the write.table function -r_as_numeric = ro.r["as.numeric"] # Map the write.table function +logger = utility.logger.getLogger(__name__) + +# Get pointers to some common R functions +r_library = ro.r["library"] # Map the library function +r_options = ro.r["options"] # Map the options function +r_t = ro.r["t"] # Map the t function +r_unlist = ro.r["unlist"] # Map the unlist function +r_list = ro.r.list # Map the list function +r_png = ro.r["png"] # Map the png function for plotting +r_dev_off = ro.r["dev.off"] # Map the dev.off function +r_write_table = ro.r["write.table"] # Map the write.table function +r_data_frame = ro.r["data.frame"] # Map the write.table function +r_as_numeric = ro.r["as.numeric"] # Map the write.table function + class CTL: def __init__(self): logger.info("Initialization of CTL") #log = r_file("/tmp/genenetwork_ctl.log", open = "wt") - #r_sink(log) # Uncomment the r_sink() commands to log output from stdout/stderr to a file + # r_sink(log) # Uncomment the r_sink() commands to log output from stdout/stderr to a file #r_sink(log, type = "message") - r_library("ctl") # Load CTL - Should only be done once, since it is quite expensive - r_options(stringsAsFactors = False) + # Load CTL - Should only be done once, since it is quite expensive + r_library("ctl") + r_options(stringsAsFactors=False) logger.info("Initialization of CTL done, package loaded in R session") - self.r_CTLscan = ro.r["CTLscan"] # Map the CTLscan function - self.r_CTLsignificant = ro.r["CTLsignificant"] # Map the CTLsignificant function - self.r_lineplot = ro.r["ctl.lineplot"] # Map the ctl.lineplot function - self.r_plotCTLobject = ro.r["plot.CTLobject"] # Map the CTLsignificant function + # Map the CTLscan function + self.r_CTLscan = ro.r["CTLscan"] + # Map the CTLsignificant function + self.r_CTLsignificant = ro.r["CTLsignificant"] + # Map the ctl.lineplot function + self.r_lineplot = ro.r["ctl.lineplot"] + # Map the CTLsignificant function + self.r_plotCTLobject = ro.r["plot.CTLobject"] self.nodes_list = [] self.edges_list = [] logger.info("Obtained pointers to CTL functions") @@ -59,28 +65,29 @@ class CTL: self.gn2_url = GN2_BRANCH_URL def addNode(self, gt): - node_dict = { 'data' : {'id' : str(gt.name) + ":" + str(gt.dataset.name), - 'sid' : str(gt.name), - 'dataset' : str(gt.dataset.name), - 'label' : gt.name, - 'symbol' : gt.symbol, - 'geneid' : gt.geneid, - 'omim' : gt.omim } } + node_dict = {'data': {'id': str(gt.name) + ":" + str(gt.dataset.name), + 'sid': str(gt.name), + 'dataset': str(gt.dataset.name), + 'label': gt.name, + 'symbol': gt.symbol, + 'geneid': gt.geneid, + 'omim': gt.omim}} self.nodes_list.append(node_dict) def addEdge(self, gtS, gtT, significant, x): - edge_data = {'id' : str(gtS.symbol) + '_' + significant[1][x] + '_' + str(gtT.symbol), - 'source' : str(gtS.name) + ":" + str(gtS.dataset.name), - 'target' : str(gtT.name) + ":" + str(gtT.dataset.name), - 'lod' : significant[3][x], - 'color' : "#ff0000", - 'width' : significant[3][x] } - edge_dict = { 'data' : edge_data } + edge_data = {'id': str(gtS.symbol) + '_' + significant[1][x] + '_' + str(gtT.symbol), + 'source': str(gtS.name) + ":" + str(gtS.dataset.name), + 'target': str(gtT.name) + ":" + str(gtT.dataset.name), + 'lod': significant[3][x], + 'color': "#ff0000", + 'width': significant[3][x]} + edge_dict = {'data': edge_data} self.edges_list.append(edge_dict) def run_analysis(self, requestform): logger.info("Starting CTL analysis on dataset") - self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] + self.trait_db_list = [trait.strip() + for trait in requestform['trait_list'].split(',')] self.trait_db_list = [x for x in self.trait_db_list if x] logger.debug("strategy:", requestform.get("strategy")) @@ -108,45 +115,49 @@ class CTL: markers = [] markernames = [] for marker in parser.markers: - markernames.append(marker["name"]) - markers.append(marker["genotypes"]) + markernames.append(marker["name"]) + markers.append(marker["genotypes"]) genotypes = list(itertools.chain(*markers)) - logger.debug(len(genotypes) / len(individuals), "==", len(parser.markers)) + logger.debug(len(genotypes) / len(individuals), + "==", len(parser.markers)) - rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True)) + rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len( + individuals), dimnames=r_list(markernames, individuals), byrow=True)) # Create a phenotype matrix traits = [] for trait in self.trait_db_list: - logger.debug("retrieving data for", trait) - if trait != "": - ts = trait.split(':') - gt = create_trait(name = ts[0], dataset_name = ts[1]) - gt = retrieve_sample_data(gt, dataset, individuals) - for ind in individuals: - if ind in list(gt.data.keys()): - traits.append(gt.data[ind].value) - else: - traits.append("-999") - - rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True)) + logger.debug("retrieving data for", trait) + if trait != "": + ts = trait.split(':') + gt = create_trait(name=ts[0], dataset_name=ts[1]) + gt = retrieve_sample_data(gt, dataset, individuals) + for ind in individuals: + if ind in list(gt.data.keys()): + traits.append(gt.data[ind].value) + else: + traits.append("-999") + + rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len( + individuals), dimnames=r_list(self.trait_db_list, individuals), byrow=True)) logger.debug(rPheno) # Use a data frame to store the objects - rPheno = r_data_frame(rPheno, check_names = False) - rGeno = r_data_frame(rGeno, check_names = False) + rPheno = r_data_frame(rPheno, check_names=False) + rGeno = r_data_frame(rGeno, check_names=False) # Debug: Print the genotype and phenotype files to disk #r_write_table(rGeno, "~/outputGN/geno.csv") #r_write_table(rPheno, "~/outputGN/pheno.csv") # Perform the CTL scan - res = self.r_CTLscan(rGeno, rPheno, strategy = strategy, nperm = nperm, parametric = parametric, nthreads=6) + res = self.r_CTLscan(rGeno, rPheno, strategy=strategy, + nperm=nperm, parametric=parametric, nthreads=6) # Get significant interactions - significant = self.r_CTLsignificant(res, significance = significance) + significant = self.r_CTLsignificant(res, significance=significance) # Create an image for output self.results = {} @@ -154,40 +165,54 @@ class CTL: self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1'] self.results['ctlresult'] = significant - self.results['requestform'] = requestform # Store the user specified parameters for the output page + # Store the user specified parameters for the output page + self.results['requestform'] = requestform # Create the lineplot - r_png(self.results['imgloc1'], width=1000, height=600, type='cairo-png') - self.r_lineplot(res, significance = significance) + r_png(self.results['imgloc1'], width=1000, + height=600, type='cairo-png') + self.r_lineplot(res, significance=significance) r_dev_off() - n = 2 # We start from 2, since R starts from 1 :) + # We start from 2, since R starts from 1 :) + n = 2 for trait in self.trait_db_list: - # Create the QTL like CTL plots - self.results['imgurl' + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png" - self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)] - r_png(self.results['imgloc' + str(n)], width=1000, height=600, type='cairo-png') - self.r_plotCTLobject(res, (n-1), significance = significance, main='Phenotype ' + trait) - r_dev_off() - n = n + 1 + # Create the QTL like CTL plots + self.results['imgurl' + \ + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png" + self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + \ + self.results['imgurl' + str(n)] + r_png(self.results['imgloc' + str(n)], + width=1000, height=600, type='cairo-png') + self.r_plotCTLobject( + res, (n - 1), significance=significance, main='Phenotype ' + trait) + r_dev_off() + n = n + 1 # Flush any output from R sys.stdout.flush() # Create the interactive graph for cytoscape visualization (Nodes and Edges) if not isinstance(significant, ri.RNULLType): - for x in range(len(significant[0])): - logger.debug(significant[0][x], significant[1][x], significant[2][x]) # Debug to console - tsS = significant[0][x].split(':') # Source - tsT = significant[2][x].split(':') # Target - gtS = create_trait(name = tsS[0], dataset_name = tsS[1]) # Retrieve Source info from the DB - gtT = create_trait(name = tsT[0], dataset_name = tsT[1]) # Retrieve Target info from the DB - self.addNode(gtS) - self.addNode(gtT) - self.addEdge(gtS, gtT, significant, x) - - significant[0][x] = "{} ({})".format(gtS.symbol, gtS.name) # Update the trait name for the displayed table - significant[2][x] = "{} ({})".format(gtT.symbol, gtT.name) # Update the trait name for the displayed table + for x in range(len(significant[0])): + logger.debug(significant[0][x], significant[1] + [x], significant[2][x]) # Debug to console + # Source + tsS = significant[0][x].split(':') + # Target + tsT = significant[2][x].split(':') + # Retrieve Source info from the DB + gtS = create_trait(name=tsS[0], dataset_name=tsS[1]) + # Retrieve Target info from the DB + gtT = create_trait(name=tsT[0], dataset_name=tsT[1]) + self.addNode(gtS) + self.addNode(gtT) + self.addEdge(gtS, gtT, significant, x) + + # Update the trait name for the displayed table + significant[0][x] = "{} ({})".format(gtS.symbol, gtS.name) + # Update the trait name for the displayed table + significant[2][x] = "{} ({})".format(gtT.symbol, gtT.name) self.elements = json.dumps(self.nodes_list + self.edges_list) @@ -202,8 +227,8 @@ class CTL: self.loadImage("imgloc1", "imgdata1") n = 2 for trait in self.trait_db_list: - self.loadImage("imgloc" + str(n), "imgdata" + str(n)) - n = n + 1 + self.loadImage("imgloc" + str(n), "imgdata" + str(n)) + n = n + 1 def process_results(self, results): logger.info("Processing CTL output") @@ -213,4 +238,3 @@ class CTL: self.render_image(self.results) sys.stdout.flush() return(dict(template_vars)) - diff --git a/wqflask/wqflask/database.py b/wqflask/wqflask/database.py index adeed6ad..e743c4b3 100644 --- a/wqflask/wqflask/database.py +++ b/wqflask/wqflask/database.py @@ -6,7 +6,7 @@ from sqlalchemy.ext.declarative import declarative_base from utility.tools import SQL_URI import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) engine = create_engine(SQL_URI, encoding="latin1") @@ -17,6 +17,7 @@ db_session = scoped_session(sessionmaker(autocommit=False, Base = declarative_base() Base.query = db_session.query_property() + def init_db(): # import all modules here that might define models so that # they will be registered properly on the metadata. Otherwise @@ -27,4 +28,5 @@ def init_db(): Base.metadata.create_all(bind=engine) logger.info("Done creating all model metadata") + init_db() diff --git a/wqflask/wqflask/db_info.py b/wqflask/wqflask/db_info.py index 25e624ef..938c453e 100644 --- a/wqflask/wqflask/db_info.py +++ b/wqflask/wqflask/db_info.py @@ -23,23 +23,23 @@ class InfoPage: def get_info(self, create=False): query_base = ("SELECT InfoPageName, GN_AccesionId, Species.MenuName, Species.TaxonomyId, Tissue.Name, InbredSet.Name, " + - "GeneChip.GeneChipName, GeneChip.GeoPlatform, AvgMethod.Name, Datasets.DatasetName, Datasets.GeoSeries, " + - "Datasets.PublicationTitle, DatasetStatus.DatasetStatusName, Datasets.Summary, Datasets.AboutCases, " + - "Datasets.AboutTissue, Datasets.AboutDataProcessing, Datasets.Acknowledgment, Datasets.ExperimentDesign, " + - "Datasets.Contributors, Datasets.Citation, Datasets.Notes, Investigators.FirstName, Investigators.LastName, " + - "Investigators.Address, Investigators.City, Investigators.State, Investigators.ZipCode, Investigators.Country, " + - "Investigators.Phone, Investigators.Email, Investigators.Url, Organizations.OrganizationName, " + - "InvestigatorId, DatasetId, DatasetStatusId, Datasets.AboutPlatform, InfoFileTitle, Specifics " + - "FROM InfoFiles " + - "LEFT JOIN Species USING (SpeciesId) " + - "LEFT JOIN Tissue USING (TissueId) " + - "LEFT JOIN InbredSet USING (InbredSetId) " + - "LEFT JOIN GeneChip USING (GeneChipId) " + - "LEFT JOIN AvgMethod USING (AvgMethodId) " + - "LEFT JOIN Datasets USING (DatasetId) " + - "LEFT JOIN Investigators USING (InvestigatorId) " + - "LEFT JOIN Organizations USING (OrganizationId) " + - "LEFT JOIN DatasetStatus USING (DatasetStatusId) WHERE ") + "GeneChip.GeneChipName, GeneChip.GeoPlatform, AvgMethod.Name, Datasets.DatasetName, Datasets.GeoSeries, " + + "Datasets.PublicationTitle, DatasetStatus.DatasetStatusName, Datasets.Summary, Datasets.AboutCases, " + + "Datasets.AboutTissue, Datasets.AboutDataProcessing, Datasets.Acknowledgment, Datasets.ExperimentDesign, " + + "Datasets.Contributors, Datasets.Citation, Datasets.Notes, Investigators.FirstName, Investigators.LastName, " + + "Investigators.Address, Investigators.City, Investigators.State, Investigators.ZipCode, Investigators.Country, " + + "Investigators.Phone, Investigators.Email, Investigators.Url, Organizations.OrganizationName, " + + "InvestigatorId, DatasetId, DatasetStatusId, Datasets.AboutPlatform, InfoFileTitle, Specifics " + + "FROM InfoFiles " + + "LEFT JOIN Species USING (SpeciesId) " + + "LEFT JOIN Tissue USING (TissueId) " + + "LEFT JOIN InbredSet USING (InbredSetId) " + + "LEFT JOIN GeneChip USING (GeneChipId) " + + "LEFT JOIN AvgMethod USING (AvgMethodId) " + + "LEFT JOIN Datasets USING (DatasetId) " + + "LEFT JOIN Investigators USING (InvestigatorId) " + + "LEFT JOIN Organizations USING (OrganizationId) " + + "LEFT JOIN DatasetStatus USING (DatasetStatusId) WHERE ") if self.gn_accession_id: final_query = query_base + \ @@ -90,6 +90,7 @@ class InfoPage: except Exception as e: pass + def process_query_results(results): info_ob = { 'info_page_name': results[0], @@ -134,5 +135,3 @@ def process_query_results(results): } return info_ob - - diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index 364a3eed..761ae326 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -26,14 +26,16 @@ class DoSearch: def __init__(self, search_term, search_operator=None, dataset=None, search_type=None): self.search_term = search_term # Make sure search_operator is something we expect - assert search_operator in (None, "=", "<", ">", "<=", ">="), "Bad search operator" + assert search_operator in ( + None, "=", "<", ">", "<=", ">="), "Bad search operator" self.search_operator = search_operator self.dataset = dataset self.search_type = search_type if self.dataset: - #Get group information for dataset and the species id - self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group.name) + # Get group information for dataset and the species id + self.species_id = webqtlDatabaseFunction.retrieve_species_id( + self.dataset.group.name) def execute(self, query): """Executes query and returns results""" @@ -73,6 +75,7 @@ class DoSearch: else: return None + class MrnaAssaySearch(DoSearch): """A search within an expression dataset, including mRNA, protein, SNP, but not phenotype or metabolites""" @@ -103,12 +106,13 @@ class MrnaAssaySearch(DoSearch): search_string = escape(self.search_term[0]) if self.search_term[0] != "*": - match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % (search_string) + match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % ( + search_string) else: match_clause = "" - where_clause = (match_clause + - """ProbeSet.Id = ProbeSetXRef.ProbeSetId + where_clause = (match_clause + + """ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s """ % (escape(str(self.dataset.id)))) @@ -130,30 +134,30 @@ class MrnaAssaySearch(DoSearch): else: match_clause = "" - where_clause = (match_clause + - """ProbeSet.Id = ProbeSetXRef.ProbeSetId + where_clause = (match_clause + + """ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s """ % (escape(str(self.dataset.id)))) return where_clause - def compile_final_query(self, from_clause = '', where_clause = ''): + def compile_final_query(self, from_clause='', where_clause=''): """Generates the final query string""" from_clause = self.normalize_spaces(from_clause) query = (self.base_query + - """%s + """%s WHERE %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s ORDER BY ProbeSet.symbol ASC """ % (escape(from_clause), - where_clause, - escape(str(self.dataset.id)))) + where_clause, + escape(str(self.dataset.id)))) return query - def run_combined(self, from_clause = '', where_clause = ''): + def run_combined(self, from_clause='', where_clause=''): """Generates and runs a combined search of an mRNA expression dataset""" logger.debug("Running ProbeSetSearch") @@ -162,14 +166,14 @@ class MrnaAssaySearch(DoSearch): from_clause = self.normalize_spaces(from_clause) query = (self.base_query + - """%s + """%s WHERE %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s ORDER BY ProbeSet.symbol ASC """ % (escape(from_clause), - where_clause, - escape(str(self.dataset.id)))) + where_clause, + escape(str(self.dataset.id)))) return self.execute(query) @@ -195,15 +199,15 @@ class PhenotypeSearch(DoSearch): FROM Phenotype, PublishFreeze, Publication, PublishXRef """ search_fields = ('Phenotype.Post_publication_description', - 'Phenotype.Pre_publication_description', - 'Phenotype.Pre_publication_abbreviation', - 'Phenotype.Post_publication_abbreviation', - 'Phenotype.Lab_code', - 'Publication.PubMed_ID', - 'Publication.Abstract', - 'Publication.Title', - 'Publication.Authors', - 'PublishXRef.Id') + 'Phenotype.Pre_publication_description', + 'Phenotype.Pre_publication_abbreviation', + 'Phenotype.Post_publication_abbreviation', + 'Phenotype.Lab_code', + 'Publication.PubMed_ID', + 'Publication.Abstract', + 'Publication.Title', + 'Publication.Authors', + 'PublishXRef.Id') header_fields = ['Index', 'Record', @@ -218,53 +222,56 @@ class PhenotypeSearch(DoSearch): def get_where_clause(self): """Generate clause for WHERE portion of query""" - #Todo: Zach will figure out exactly what both these lines mean - #and comment here + # Todo: Zach will figure out exactly what both these lines mean + # and comment here - #if "'" not in self.search_term[0]: - search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0]) + "[[:>:]]" + # if "'" not in self.search_term[0]: + search_term = "[[:<:]]" + \ + self.handle_wildcard(self.search_term[0]) + "[[:>:]]" if "_" in self.search_term[0]: if len(self.search_term[0].split("_")[0]) == 3: - search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0].split("_")[1]) + "[[:>:]]" + search_term = "[[:<:]]" + self.handle_wildcard( + self.search_term[0].split("_")[1]) + "[[:>:]]" # This adds a clause to the query that matches the search term # against each field in the search_fields tuple where_clause_list = [] for field in self.search_fields: - where_clause_list.append('''%s REGEXP "%s"''' % (field, search_term)) + where_clause_list.append('''%s REGEXP "%s"''' % + (field, search_term)) where_clause = "(%s) " % ' OR '.join(where_clause_list) return where_clause - def compile_final_query(self, from_clause = '', where_clause = ''): + def compile_final_query(self, from_clause='', where_clause=''): """Generates the final query string""" from_clause = self.normalize_spaces(from_clause) if self.search_term[0] == "*": query = (self.base_query + - """%s + """%s WHERE PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s ORDER BY PublishXRef.Id""" % ( - from_clause, - escape(str(self.dataset.group.id)), - escape(str(self.dataset.id)))) + from_clause, + escape(str(self.dataset.group.id)), + escape(str(self.dataset.id)))) else: query = (self.base_query + - """%s + """%s WHERE %s and PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s ORDER BY PublishXRef.Id""" % ( - from_clause, - where_clause, - escape(str(self.dataset.group.id)), - escape(str(self.dataset.id)))) + from_clause, + where_clause, + escape(str(self.dataset.group.id)), + escape(str(self.dataset.id)))) return query @@ -276,26 +283,27 @@ class PhenotypeSearch(DoSearch): from_clause = self.normalize_spaces(from_clause) query = (self.base_query + - """%s + """%s WHERE %s PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s""" % ( - from_clause, - where_clause, - escape(str(self.dataset.group.id)), - escape(str(self.dataset.id)))) + from_clause, + where_clause, + escape(str(self.dataset.group.id)), + escape(str(self.dataset.id)))) return self.execute(query) def run(self): """Generates and runs a simple search of a phenotype dataset""" - query = self.compile_final_query(where_clause = self.get_where_clause()) + query = self.compile_final_query(where_clause=self.get_where_clause()) return self.execute(query) + class GenotypeSearch(DoSearch): """A search within a genotype dataset""" @@ -328,45 +336,46 @@ class GenotypeSearch(DoSearch): for field in self.search_fields: where_clause.append('''%s REGEXP "%s"''' % ("%s.%s" % self.mescape(self.dataset.type, field), - self.search_term)) + self.search_term)) logger.debug("hello ;where_clause is:", pf(where_clause)) where_clause = "(%s) " % ' OR '.join(where_clause) return where_clause - def compile_final_query(self, from_clause = '', where_clause = ''): + def compile_final_query(self, from_clause='', where_clause=''): """Generates the final query string""" from_clause = self.normalize_spaces(from_clause) - if self.search_term[0] == "*": - query = (self.base_query + - """WHERE Geno.Id = GenoXRef.GenoId + query = (self.base_query + + """WHERE Geno.Id = GenoXRef.GenoId and GenoXRef.GenoFreezeId = GenoFreeze.Id - and GenoFreeze.Id = %s"""% (escape(str(self.dataset.id)))) + and GenoFreeze.Id = %s""" % (escape(str(self.dataset.id)))) else: query = (self.base_query + - """WHERE %s + """WHERE %s and Geno.Id = GenoXRef.GenoId and GenoXRef.GenoFreezeId = GenoFreeze.Id - and GenoFreeze.Id = %s"""% (where_clause, - escape(str(self.dataset.id)))) + and GenoFreeze.Id = %s""" % (where_clause, + escape(str(self.dataset.id)))) return query def run(self): """Generates and runs a simple search of a genotype dataset""" - #Todo: Zach will figure out exactly what both these lines mean - #and comment here + # Todo: Zach will figure out exactly what both these lines mean + # and comment here if self.search_term[0] == "*": self.query = self.compile_final_query() else: - self.query = self.compile_final_query(where_clause = self.get_where_clause()) + self.query = self.compile_final_query( + where_clause=self.get_where_clause()) return self.execute(self.query) + class RifSearch(MrnaAssaySearch): """Searches for traits with a Gene RIF entry including the search term.""" @@ -390,10 +399,11 @@ class RifSearch(MrnaAssaySearch): return self.execute(query) + class WikiSearch(MrnaAssaySearch): """Searches GeneWiki for traits other people have annotated""" - DoSearch.search_types['ProbeSet_WIKI'] = "WikiSearch" + DoSearch.search_types['ProbeSet_WIKI'] = "WikiSearch" def get_from_clause(self): return ", GeneRIF " @@ -403,7 +413,7 @@ class WikiSearch(MrnaAssaySearch): and GeneRIF.versionId=0 and GeneRIF.display>0 and (GeneRIF.comment REGEXP '%s' or GeneRIF.initial = '%s') """ % (self.dataset.type, - "[[:<:]]"+str(self.search_term[0])+"[[:>:]]", + "[[:<:]]" + str(self.search_term[0]) + "[[:>:]]", str(self.search_term[0])) return where_clause @@ -415,10 +425,11 @@ class WikiSearch(MrnaAssaySearch): return self.execute(query) + class GoSearch(MrnaAssaySearch): """Searches for synapse-associated genes listed in the Gene Ontology.""" - DoSearch.search_types['ProbeSet_GO'] = "GoSearch" + DoSearch.search_types['ProbeSet_GO'] = "GoSearch" def get_from_clause(self): from_clause = """, db_GeneOntology.term as GOterm, @@ -429,7 +440,7 @@ class GoSearch(MrnaAssaySearch): def get_where_clause(self): field = 'GOterm.acc' - go_id = 'GO:' + ('0000000'+self.search_term[0])[-7:] + go_id = 'GO:' + ('0000000' + self.search_term[0])[-7:] statements = ("""%s.symbol=GOgene_product.symbol and GOassociation.gene_product_id=GOgene_product.id and @@ -448,7 +459,9 @@ class GoSearch(MrnaAssaySearch): return self.execute(query) -#ZS: Not sure what the best way to deal with LRS searches is +# ZS: Not sure what the best way to deal with LRS searches is + + class LrsSearch(DoSearch): """Searches for genes with a QTL within the given LRS values @@ -486,17 +499,18 @@ class LrsSearch(DoSearch): assert isinstance(self.search_term, (list, tuple)) lrs_min, lrs_max = self.search_term[:2] if self.search_type == "LOD": - lrs_min = lrs_min*4.61 - lrs_max = lrs_max*4.61 + lrs_min = lrs_min * 4.61 + lrs_max = lrs_max * 4.61 where_clause = """ %sXRef.LRS > %s and %sXRef.LRS < %s """ % self.mescape(self.dataset.type, - min(lrs_min, lrs_max), + min(lrs_min, + lrs_max), self.dataset.type, max(lrs_min, lrs_max)) if len(self.search_term) > 2: - #If the user typed, for example "Chr4", the "Chr" substring needs to be removed so that all search elements can be converted to floats + # If the user typed, for example "Chr4", the "Chr" substring needs to be removed so that all search elements can be converted to floats chr_num = self.search_term[2] if "chr" in self.search_term[2].lower(): chr_num = self.search_term[2].lower().replace("chr", "") @@ -512,27 +526,27 @@ class LrsSearch(DoSearch): where_clause += """ and %sXRef.Locus = Geno.name and Geno.SpeciesId = %s """ % self.mescape(self.dataset.type, - self.species_id) + self.species_id) else: # Deal with >, <, >=, and <= logger.debug("self.search_term is:", self.search_term) lrs_val = self.search_term[0] if self.search_type == "LOD": - lrs_val = lrs_val*4.61 + lrs_val = lrs_val * 4.61 where_clause = """ %sXRef.LRS %s %s """ % self.mescape(self.dataset.type, - self.search_operator, - self.search_term[0]) + self.search_operator, + self.search_term[0]) return where_clause - def run(self): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(self.from_clause, self.where_clause) + self.query = self.compile_final_query( + self.from_clause, self.where_clause) return self.execute(self.query) @@ -546,10 +560,12 @@ class MrnaLrsSearch(LrsSearch, MrnaAssaySearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(from_clause = self.from_clause, where_clause = self.where_clause) + self.query = self.compile_final_query( + from_clause=self.from_clause, where_clause=self.where_clause) return self.execute(self.query) + class PhenotypeLrsSearch(LrsSearch, PhenotypeSearch): for search_key in ('LRS', 'LOD'): @@ -560,7 +576,8 @@ class PhenotypeLrsSearch(LrsSearch, PhenotypeSearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(from_clause = self.from_clause, where_clause = self.where_clause) + self.query = self.compile_final_query( + from_clause=self.from_clause, where_clause=self.where_clause) return self.execute(self.query) @@ -587,7 +604,8 @@ class CisTransLrsSearch(DoSearch): elif len(self.search_term) == 3: lrs_min, lrs_max, self.mb_buffer = self.search_term elif len(self.search_term) == 4: - lrs_min, lrs_max, self.mb_buffer = [float(value) for value in self.search_term[:3]] + lrs_min, lrs_max, self.mb_buffer = [ + float(value) for value in self.search_term[:3]] chromosome = self.search_term[3] if "Chr" in chromosome or "chr" in chromosome: chromosome = int(chromosome[3:]) @@ -599,19 +617,19 @@ class CisTransLrsSearch(DoSearch): lrs_max = lrs_max * 4.61 sub_clause = """ %sXRef.LRS > %s and - %sXRef.LRS < %s and """ % ( - escape(self.dataset.type), - escape(str(min(lrs_min, lrs_max))), - escape(self.dataset.type), - escape(str(max(lrs_min, lrs_max))) - ) + %sXRef.LRS < %s and """ % ( + escape(self.dataset.type), + escape(str(min(lrs_min, lrs_max))), + escape(self.dataset.type), + escape(str(max(lrs_min, lrs_max))) + ) else: # Deal with >, <, >=, and <= - sub_clause = """ %sXRef.LRS %s %s and """ % ( - escape(self.dataset.type), - escape(self.search_operator), - escape(self.search_term[0]) - ) + sub_clause = """ %sXRef.LRS %s %s and """ % ( + escape(self.dataset.type), + escape(self.search_operator), + escape(self.search_term[0]) + ) if cis_trans == "cis": where_clause = sub_clause + """ @@ -619,36 +637,42 @@ class CisTransLrsSearch(DoSearch): %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and %s.Chr = Geno.Chr""" % ( - escape(self.dataset.type), - the_operator, - escape(str(self.mb_buffer)), - escape(self.dataset.type), - escape(str(self.species_id)), - escape(self.dataset.type) - ) + escape(self.dataset.type), + the_operator, + escape(str(self.mb_buffer)), + escape(self.dataset.type), + escape(str(self.species_id)), + escape(self.dataset.type) + ) else: if chromosome: location_clause = "(%s.Chr = '%s' and %s.Chr = Geno.Chr and ABS(%s.Mb-Geno.Mb) %s %s) or (%s.Chr != Geno.Chr and Geno.Chr = '%s')" % (escape(self.dataset.type), - chromosome, - escape(self.dataset.type), - escape(self.dataset.type), - the_operator, - escape(str(self.mb_buffer)), - escape(self.dataset.type), - chromosome) + chromosome, + escape( + self.dataset.type), + escape( + self.dataset.type), + the_operator, + escape( + str(self.mb_buffer)), + escape( + self.dataset.type), + chromosome) else: - location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape(self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type), escape(self.dataset.type)) + location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape( + self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type), escape(self.dataset.type)) where_clause = sub_clause + """ %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and (%s)""" % ( - escape(self.dataset.type), - escape(str(self.species_id)), - location_clause - ) + escape(self.dataset.type), + escape(str(self.species_id)), + location_clause + ) return where_clause + class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """ Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values @@ -667,7 +691,7 @@ class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """ for search_key in ('LRS', 'LOD'): - DoSearch.search_types['ProbeSet_CIS'+search_key] = "CisLrsSearch" + DoSearch.search_types['ProbeSet_CIS' + search_key] = "CisLrsSearch" def get_where_clause(self): return CisTransLrsSearch.get_where_clause(self, "cis") @@ -676,10 +700,12 @@ class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(self.from_clause, self.where_clause) + self.query = self.compile_final_query( + self.from_clause, self.where_clause) return self.execute(self.query) + class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values @@ -697,7 +723,7 @@ class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """ for search_key in ('LRS', 'LOD'): - DoSearch.search_types['ProbeSet_TRANS'+search_key] = "TransLrsSearch" + DoSearch.search_types['ProbeSet_TRANS' + search_key] = "TransLrsSearch" def get_where_clause(self): return CisTransLrsSearch.get_where_clause(self, "trans") @@ -706,7 +732,8 @@ class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(self.from_clause, self.where_clause) + self.query = self.compile_final_query( + self.from_clause, self.where_clause) return self.execute(self.query) @@ -725,14 +752,15 @@ class MeanSearch(MrnaAssaySearch): where_clause = """ %sXRef.mean > %s and %sXRef.mean < %s """ % self.mescape(self.dataset.type, - min(self.mean_min, self.mean_max), - self.dataset.type, - max(self.mean_min, self.mean_max)) + min(self.mean_min, + self.mean_max), + self.dataset.type, + max(self.mean_min, self.mean_max)) else: # Deal with >, <, >=, and <= where_clause = """ %sXRef.mean %s %s """ % self.mescape(self.dataset.type, - self.search_operator, - self.search_term[0]) + self.search_operator, + self.search_term[0]) return where_clause @@ -740,10 +768,11 @@ class MeanSearch(MrnaAssaySearch): self.where_clause = self.get_where_clause() logger.debug("where_clause is:", pf(self.where_clause)) - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class RangeSearch(MrnaAssaySearch): """Searches for genes with a range of expression varying between two values""" @@ -775,10 +804,11 @@ class RangeSearch(MrnaAssaySearch): def run(self): self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class PositionSearch(DoSearch): """Searches for genes/markers located within a specified range on a specified chromosome""" @@ -786,7 +816,8 @@ class PositionSearch(DoSearch): DoSearch.search_types[search_key] = "PositionSearch" def get_where_clause(self): - self.search_term = [float(value) if is_number(value) else value for value in self.search_term] + self.search_term = [float(value) if is_number( + value) else value for value in self.search_term] chr, self.mb_min, self.mb_max = self.search_term[:3] self.chr = str(chr).lower() self.get_chr() @@ -796,11 +827,11 @@ class PositionSearch(DoSearch): %s.Mb < %s """ % self.mescape(self.dataset.type, self.chr, self.dataset.type, - min(self.mb_min, self.mb_max), + min(self.mb_min, + self.mb_max), self.dataset.type, max(self.mb_min, self.mb_max)) - return where_clause def get_chr(self): @@ -815,36 +846,39 @@ class PositionSearch(DoSearch): def run(self): self.get_where_clause() - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class MrnaPositionSearch(PositionSearch, MrnaAssaySearch): """Searches for genes located within a specified range on a specified chromosome""" for search_key in ('POSITION', 'POS', 'MB'): - DoSearch.search_types['ProbeSet_'+search_key] = "MrnaPositionSearch" + DoSearch.search_types['ProbeSet_' + search_key] = "MrnaPositionSearch" def run(self): self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class GenotypePositionSearch(PositionSearch, GenotypeSearch): """Searches for genes located within a specified range on a specified chromosome""" for search_key in ('POSITION', 'POS', 'MB'): - DoSearch.search_types['Geno_'+search_key] = "GenotypePositionSearch" + DoSearch.search_types['Geno_' + search_key] = "GenotypePositionSearch" def run(self): self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class PvalueSearch(MrnaAssaySearch): """Searches for traits with a permutationed p-value between low and high""" @@ -859,25 +893,26 @@ class PvalueSearch(MrnaAssaySearch): self.pvalue_min, self.pvalue_max = self.search_term[:2] self.where_clause = """ %sXRef.pValue > %s and %sXRef.pValue < %s """ % self.mescape( - self.dataset.type, - min(self.pvalue_min, self.pvalue_max), - self.dataset.type, - max(self.pvalue_min, self.pvalue_max)) + self.dataset.type, + min(self.pvalue_min, self.pvalue_max), + self.dataset.type, + max(self.pvalue_min, self.pvalue_max)) else: # Deal with >, <, >=, and <= self.where_clause = """ %sXRef.pValue %s %s """ % self.mescape( - self.dataset.type, - self.search_operator, - self.search_term[0]) + self.dataset.type, + self.search_operator, + self.search_term[0]) logger.debug("where_clause is:", pf(self.where_clause)) - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) logger.sql(self.query) return self.execute(self.query) + class AuthorSearch(PhenotypeSearch): """Searches for phenotype traits with specified author(s)""" @@ -888,7 +923,7 @@ class AuthorSearch(PhenotypeSearch): self.where_clause = """ Publication.Authors REGEXP "[[:<:]]%s[[:>:]]" and """ % (self.search_term[0]) - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) @@ -900,6 +935,7 @@ def is_number(s): except ValueError: return False + def get_aliases(symbol, species): if species == "mouse": symbol_string = symbol.capitalize() @@ -909,7 +945,8 @@ def get_aliases(symbol, species): return [] filtered_aliases = [] - response = requests.get(GN2_BASE_URL + "/gn3/gene/aliases/" + symbol_string) + response = requests.get( + GN2_BASE_URL + "/gn3/gene/aliases/" + symbol_string) if response: alias_list = json.loads(response.content) @@ -923,9 +960,10 @@ def get_aliases(symbol, species): return filtered_aliases + if __name__ == "__main__": - ### Usually this will be used as a library, but call it from the command line for testing - ### And it runs the code below + # Usually this will be used as a library, but call it from the command line for testing + # And it runs the code below import MySQLdb import sys diff --git a/wqflask/wqflask/docs.py b/wqflask/wqflask/docs.py index 207767c4..0a1a597d 100644 --- a/wqflask/wqflask/docs.py +++ b/wqflask/wqflask/docs.py @@ -5,6 +5,7 @@ from flask import g from utility.logger import getLogger logger = getLogger(__name__) + class Docs: def __init__(self, entry, start_vars={}): @@ -19,11 +20,10 @@ class Docs: self.title = self.entry.capitalize() self.content = "" else: - + self.title = result[0] self.content = result[1].decode("utf-8") - self.editable = "false" # ZS: Removing option to edit to see if text still gets vandalized try: @@ -35,11 +35,13 @@ class Docs: def update_text(start_vars): content = start_vars['ckcontent'] - content = content.replace('%', '%%').replace('"', '\\"').replace("'", "\\'") + content = content.replace('%', '%%').replace( + '"', '\\"').replace("'", "\\'") try: if g.user_session.record['user_email_address'] == "zachary.a.sloan@gmail.com" or g.user_session.record['user_email_address'] == "labwilliams@gmail.com": - sql = "UPDATE Docs SET content='{0}' WHERE entry='{1}';".format(content, start_vars['entry_type']) + sql = "UPDATE Docs SET content='{0}' WHERE entry='{1}';".format( + content, start_vars['entry_type']) g.db.execute(sql) except: pass diff --git a/wqflask/wqflask/export_traits.py b/wqflask/wqflask/export_traits.py index 6fb760e0..a22d6acc 100644 --- a/wqflask/wqflask/export_traits.py +++ b/wqflask/wqflask/export_traits.py @@ -1,6 +1,6 @@ import csv import xlsxwriter -import io +import io import datetime import itertools @@ -13,13 +13,14 @@ from base.trait import create_trait, retrieve_trait_info from pprint import pformat as pf from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) + def export_search_results_csv(targs): table_data = json.loads(targs['export_data']) table_rows = table_data['rows'] - + now = datetime.datetime.now() time_str = now.strftime('%H:%M_%d%B%Y') if 'file_name' in targs: @@ -34,9 +35,12 @@ def export_search_results_csv(targs): metadata.append(["Data Set: " + targs['database_name']]) if 'accession_id' in targs: if targs['accession_id'] != "None": - metadata.append(["Metadata Link: http://genenetwork.org/webqtl/main.py?FormID=sharinginfo&GN_AccessionId=" + targs['accession_id']]) - metadata.append(["Export Date: " + datetime.datetime.now().strftime("%B %d, %Y")]) - metadata.append(["Export Time: " + datetime.datetime.now().strftime("%H:%M GMT")]) + metadata.append( + ["Metadata Link: http://genenetwork.org/webqtl/main.py?FormID=sharinginfo&GN_AccessionId=" + targs['accession_id']]) + metadata.append( + ["Export Date: " + datetime.datetime.now().strftime("%B %d, %Y")]) + metadata.append( + ["Export Time: " + datetime.datetime.now().strftime("%H:%M GMT")]) if 'search_string' in targs: if targs['search_string'] != "None": metadata.append(["Search Query: " + targs['search_string']]) @@ -51,10 +55,12 @@ def export_search_results_csv(targs): for trait in table_rows: trait_name, dataset_name, _hash = trait.split(":") trait_ob = create_trait(name=trait_name, dataset_name=dataset_name) - trait_ob = retrieve_trait_info(trait_ob, trait_ob.dataset, get_qtl_info=True) + trait_ob = retrieve_trait_info( + trait_ob, trait_ob.dataset, get_qtl_info=True) trait_list.append(trait_ob) - table_headers = ['Index', 'URL', 'Species', 'Group', 'Dataset', 'Record ID', 'Symbol', 'Description', 'ProbeTarget', 'PubMed_ID', 'Chr', 'Mb', 'Alias', 'Gene_ID', 'Homologene_ID', 'UniGene_ID', 'Strand_Probe', 'Probe_set_specificity', 'Probe_set_BLAT_score', 'Probe_set_BLAT_Mb_start', 'Probe_set_BLAT_Mb_end', 'QTL_Chr', 'QTL_Mb', 'Locus_at_Peak', 'Max_LRS', 'P_value_of_MAX', 'Mean_Expression'] + table_headers = ['Index', 'URL', 'Species', 'Group', 'Dataset', 'Record ID', 'Symbol', 'Description', 'ProbeTarget', 'PubMed_ID', 'Chr', 'Mb', 'Alias', 'Gene_ID', 'Homologene_ID', 'UniGene_ID', + 'Strand_Probe', 'Probe_set_specificity', 'Probe_set_BLAT_score', 'Probe_set_BLAT_Mb_start', 'Probe_set_BLAT_Mb_end', 'QTL_Chr', 'QTL_Mb', 'Locus_at_Peak', 'Max_LRS', 'P_value_of_MAX', 'Mean_Expression'] traits_by_group = sort_traits_by_group(trait_list) @@ -86,7 +92,8 @@ def export_search_results_csv(targs): trait_symbol = "N/A" row_contents = [ i + 1, - "https://genenetwork.org/show_trait?trait_id=" + str(trait.name) + "&dataset=" + str(trait.dataset.name), + "https://genenetwork.org/show_trait?trait_id=" + \ + str(trait.name) + "&dataset=" + str(trait.dataset.name), trait.dataset.group.species, trait.dataset.group.name, trait.dataset.name, @@ -116,13 +123,15 @@ def export_search_results_csv(targs): for sample in trait.dataset.group.samplelist: if sample in trait.data: - row_contents += [trait.data[sample].value, trait.data[sample].variance] + row_contents += [trait.data[sample].value, + trait.data[sample].variance] else: row_contents += ["x", "x"] csv_rows.append(row_contents) - csv_rows = list(map(list, itertools.zip_longest(*[row for row in csv_rows]))) + csv_rows = list( + map(list, itertools.zip_longest(*[row for row in csv_rows]))) writer.writerows(csv_rows) csv_data = buff.getvalue() buff.close() @@ -132,6 +141,7 @@ def export_search_results_csv(targs): return file_list + def sort_traits_by_group(trait_list=[]): traits_by_group = {} for trait in trait_list: diff --git a/wqflask/wqflask/external_tools/send_to_bnw.py b/wqflask/wqflask/external_tools/send_to_bnw.py index c5c79e98..c1b14ede 100644 --- a/wqflask/wqflask/external_tools/send_to_bnw.py +++ b/wqflask/wqflask/external_tools/send_to_bnw.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -22,11 +22,13 @@ from base.trait import GeneralTrait from utility import helper_functions, corr_result_helpers import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + class SendToBNW: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) trait_samples_list = [] @@ -38,9 +40,10 @@ class SendToBNW: trait1_samples = list(this_sample_data.keys()) trait_samples_list.append(trait1_samples) - shared_samples = list(set(trait_samples_list[0]).intersection(*trait_samples_list)) + shared_samples = list( + set(trait_samples_list[0]).intersection(*trait_samples_list)) - self.form_value = "" #ZS: string that is passed to BNW through form + self.form_value = "" # ZS: string that is passed to BNW through form values_list = [] for trait_db in self.trait_list: this_trait = trait_db[0] diff --git a/wqflask/wqflask/external_tools/send_to_geneweaver.py b/wqflask/wqflask/external_tools/send_to_geneweaver.py index 47e4c53a..9a4f7150 100644 --- a/wqflask/wqflask/external_tools/send_to_geneweaver.py +++ b/wqflask/wqflask/external_tools/send_to_geneweaver.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -27,11 +27,13 @@ from base.species import TheSpecies from utility import helper_functions, corr_result_helpers import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + class SendToGeneWeaver: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.chip_name = test_chip(self.trait_list) @@ -52,11 +54,12 @@ class SendToGeneWeaver: trait_name_list = get_trait_name_list(self.trait_list) self.hidden_vars = { - 'client': "genenetwork", - 'species': species_name, - 'idtype': self.chip_name, - 'list': ",".join(trait_name_list), - } + 'client': "genenetwork", + 'species': species_name, + 'idtype': self.chip_name, + 'list': ",".join(trait_name_list), + } + def get_trait_name_list(trait_list): name_list = [] @@ -65,6 +68,7 @@ def get_trait_name_list(trait_list): return name_list + def test_chip(trait_list): final_chip_name = "" @@ -74,7 +78,7 @@ def test_chip(trait_list): FROM GeneChip, ProbeFreeze, ProbeSetFreeze WHERE GeneChip.Id = ProbeFreeze.ChipId and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeSetFreeze.Name = '%s'""" % dataset.name).fetchone() + ProbeSetFreeze.Name = '%s'""" % dataset.name).fetchone() if result: chip_name = result[0] diff --git a/wqflask/wqflask/external_tools/send_to_webgestalt.py b/wqflask/wqflask/external_tools/send_to_webgestalt.py index e1e5e655..6e74f4fe 100644 --- a/wqflask/wqflask/external_tools/send_to_webgestalt.py +++ b/wqflask/wqflask/external_tools/send_to_webgestalt.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -27,11 +27,13 @@ from base.species import TheSpecies from utility import helper_functions, corr_result_helpers import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + class SendToWebGestalt: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.chip_name = test_chip(self.trait_list) @@ -46,18 +48,18 @@ class SendToWebGestalt: id_type = "entrezgene" - self.hidden_vars = { - 'gene_list' : "\n".join(gene_id_list), - 'id_type' : "entrezgene", - 'ref_set' : "genome", - 'enriched_database_category' : "geneontology", - 'enriched_database_name' : "Biological_Process", - 'sig_method' : "fdr", - 'sig_value' : "0.05", - 'enrich_method' : "ORA", - 'fdr_method' : "BH", - 'min_num' : "2" - } + self.hidden_vars = { + 'gene_list': "\n".join(gene_id_list), + 'id_type': "entrezgene", + 'ref_set': "genome", + 'enriched_database_category': "geneontology", + 'enriched_database_name': "Biological_Process", + 'sig_method': "fdr", + 'sig_value': "0.05", + 'enrich_method': "ORA", + 'fdr_method': "BH", + 'min_num': "2" + } species = self.trait_list[0][1].group.species if species == "rat": @@ -69,6 +71,7 @@ class SendToWebGestalt: else: self.hidden_vars['organism'] = "others" + def test_chip(trait_list): final_chip_name = "" @@ -78,7 +81,7 @@ def test_chip(trait_list): FROM GeneChip, ProbeFreeze, ProbeSetFreeze WHERE GeneChip.Id = ProbeFreeze.ChipId and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeSetFreeze.Name = '%s'""" % dataset.name).fetchone() + ProbeSetFreeze.Name = '%s'""" % dataset.name).fetchone() if result: chip_name = result[0] @@ -113,6 +116,7 @@ def test_chip(trait_list): return chip_name + def gen_gene_id_list(trait_list): trait_name_list = [] gene_id_list = [] diff --git a/wqflask/wqflask/group_manager.py b/wqflask/wqflask/group_manager.py index 69ee9623..04a100ba 100644 --- a/wqflask/wqflask/group_manager.py +++ b/wqflask/wqflask/group_manager.py @@ -1,4 +1,5 @@ -import random, string +import random +import string from flask import (Flask, g, render_template, url_for, request, make_response, redirect, flash) @@ -7,149 +8,168 @@ from wqflask import app from wqflask.user_login import send_verification_email, send_invitation_email, basic_info, set_password from utility.redis_tools import get_user_groups, get_group_info, save_user, create_group, delete_group, add_users_to_group, remove_users_from_group, \ - change_group_name, save_verification_code, check_verification_code, get_user_by_unique_column, get_resources, get_resource_info + change_group_name, save_verification_code, check_verification_code, get_user_by_unique_column, get_resources, get_resource_info from utility.logger import getLogger logger = getLogger(__name__) + @app.route("/groups/manage", methods=('GET', 'POST')) def manage_groups(): - params = request.form if request.form else request.args - if "add_new_group" in params: - return redirect(url_for('add_group')) - else: - admin_groups, member_groups = get_user_groups(g.user_session.user_id) - return render_template("admin/group_manager.html", admin_groups=admin_groups, member_groups=member_groups) + params = request.form if request.form else request.args + if "add_new_group" in params: + return redirect(url_for('add_group')) + else: + admin_groups, member_groups = get_user_groups(g.user_session.user_id) + return render_template("admin/group_manager.html", admin_groups=admin_groups, member_groups=member_groups) + @app.route("/groups/view", methods=('GET', 'POST')) def view_group(): - params = request.form if request.form else request.args - group_id = params['id'] - group_info = get_group_info(group_id) - admins_info = [] - user_is_admin = False - if g.user_session.user_id in group_info['admins']: - user_is_admin = True - for user_id in group_info['admins']: - if user_id: - user_info = get_user_by_unique_column("user_id", user_id) - admins_info.append(user_info) - members_info = [] - for user_id in group_info['members']: - if user_id: - user_info = get_user_by_unique_column("user_id", user_id) - members_info.append(user_info) - - #ZS: This whole part might not scale well with many resources - resources_info = [] - all_resources = get_resources() - for resource_id in all_resources: - resource_info = get_resource_info(resource_id) - group_masks = resource_info['group_masks'] - if group_id in group_masks: - this_resource = {} - privileges = group_masks[group_id] - this_resource['id'] = resource_id - this_resource['name'] = resource_info['name'] - this_resource['data'] = privileges['data'] - this_resource['metadata'] = privileges['metadata'] - this_resource['admin'] = privileges['admin'] - resources_info.append(this_resource) - - return render_template("admin/view_group.html", group_info=group_info, admins=admins_info, members=members_info, user_is_admin=user_is_admin, resources=resources_info) + params = request.form if request.form else request.args + group_id = params['id'] + group_info = get_group_info(group_id) + admins_info = [] + user_is_admin = False + if g.user_session.user_id in group_info['admins']: + user_is_admin = True + for user_id in group_info['admins']: + if user_id: + user_info = get_user_by_unique_column("user_id", user_id) + admins_info.append(user_info) + members_info = [] + for user_id in group_info['members']: + if user_id: + user_info = get_user_by_unique_column("user_id", user_id) + members_info.append(user_info) + + # ZS: This whole part might not scale well with many resources + resources_info = [] + all_resources = get_resources() + for resource_id in all_resources: + resource_info = get_resource_info(resource_id) + group_masks = resource_info['group_masks'] + if group_id in group_masks: + this_resource = {} + privileges = group_masks[group_id] + this_resource['id'] = resource_id + this_resource['name'] = resource_info['name'] + this_resource['data'] = privileges['data'] + this_resource['metadata'] = privileges['metadata'] + this_resource['admin'] = privileges['admin'] + resources_info.append(this_resource) + + return render_template("admin/view_group.html", group_info=group_info, admins=admins_info, members=members_info, user_is_admin=user_is_admin, resources=resources_info) + @app.route("/groups/remove", methods=('POST',)) def remove_groups(): - group_ids_to_remove = request.form['selected_group_ids'] - for group_id in group_ids_to_remove.split(":"): - delete_group(g.user_session.user_id, group_id) + group_ids_to_remove = request.form['selected_group_ids'] + for group_id in group_ids_to_remove.split(":"): + delete_group(g.user_session.user_id, group_id) + + return redirect(url_for('manage_groups')) - return redirect(url_for('manage_groups')) @app.route("/groups/remove_users", methods=('POST',)) def remove_users(): - group_id = request.form['group_id'] - admin_ids_to_remove = request.form['selected_admin_ids'] - member_ids_to_remove = request.form['selected_member_ids'] + group_id = request.form['group_id'] + admin_ids_to_remove = request.form['selected_admin_ids'] + member_ids_to_remove = request.form['selected_member_ids'] - remove_users_from_group(g.user_session.user_id, admin_ids_to_remove.split(":"), group_id, user_type="admins") - remove_users_from_group(g.user_session.user_id, member_ids_to_remove.split(":"), group_id, user_type="members") + remove_users_from_group(g.user_session.user_id, admin_ids_to_remove.split( + ":"), group_id, user_type="admins") + remove_users_from_group(g.user_session.user_id, member_ids_to_remove.split( + ":"), group_id, user_type="members") + + return redirect(url_for('view_group', id=group_id)) - return redirect(url_for('view_group', id=group_id)) @app.route("/groups/add_<path:user_type>", methods=('POST',)) def add_users(user_type='members'): - group_id = request.form['group_id'] - if user_type == "admins": - user_emails = request.form['admin_emails_to_add'].split(",") - add_users_to_group(g.user_session.user_id, group_id, user_emails, admins = True) - elif user_type == "members": - user_emails = request.form['member_emails_to_add'].split(",") - add_users_to_group(g.user_session.user_id, group_id, user_emails, admins = False) + group_id = request.form['group_id'] + if user_type == "admins": + user_emails = request.form['admin_emails_to_add'].split(",") + add_users_to_group(g.user_session.user_id, group_id, + user_emails, admins=True) + elif user_type == "members": + user_emails = request.form['member_emails_to_add'].split(",") + add_users_to_group(g.user_session.user_id, group_id, + user_emails, admins=False) + + return redirect(url_for('view_group', id=group_id)) - return redirect(url_for('view_group', id=group_id)) @app.route("/groups/change_name", methods=('POST',)) def change_name(): - group_id = request.form['group_id'] - new_name = request.form['new_name'] - group_info = change_group_name(g.user_session.user_id, group_id, new_name) + group_id = request.form['group_id'] + new_name = request.form['new_name'] + group_info = change_group_name(g.user_session.user_id, group_id, new_name) + + return new_name - return new_name @app.route("/groups/create", methods=('GET', 'POST')) def add_or_edit_group(): - params = request.form if request.form else request.args - if "group_name" in params: - member_user_ids = set() - admin_user_ids = set() - admin_user_ids.add(g.user_session.user_id) #ZS: Always add the user creating the group as an admin - if "admin_emails_to_add" in params: - admin_emails = params['admin_emails_to_add'].split(",") - for email in admin_emails: - user_details = get_user_by_unique_column("email_address", email) - if user_details: - admin_user_ids.add(user_details['user_id']) - #send_group_invites(params['group_id'], user_email_list = admin_emails, user_type="admins") - if "member_emails_to_add" in params: - member_emails = params['member_emails_to_add'].split(",") - for email in member_emails: - user_details = get_user_by_unique_column("email_address", email) - if user_details: - member_user_ids.add(user_details['user_id']) - #send_group_invites(params['group_id'], user_email_list = user_emails, user_type="members") - - create_group(list(admin_user_ids), list(member_user_ids), params['group_name']) - return redirect(url_for('manage_groups')) - else: - return render_template("admin/create_group.html") - -#ZS: Will integrate this later, for now just letting users be added directly -def send_group_invites(group_id, user_email_list = [], user_type="members"): - for user_email in user_email_list: - user_details = get_user_by_unique_column("email_address", user_email) - if user_details: - group_info = get_group_info(group_id) - #ZS: Probably not necessary since the group should normally always exist if group_id is being passed here, - # but it's technically possible to hit it if Redis is cleared out before submitting the new users or something - if group_info: - #ZS: Don't add user if they're already an admin or if they're being added a regular user and are already a regular user, - # but do add them if they're a regular user and are added as an admin - if (user_details['user_id'] in group_info['admins']) or \ - ((user_type == "members") and (user_details['user_id'] in group_info['members'])): - continue - else: - send_verification_email(user_details, template_name = "email/group_verification.txt", key_prefix = "verification_code", subject = "You've been invited to join a GeneNetwork user group") - else: - temp_password = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - user_details = { - 'user_id': str(uuid.uuid4()), - 'email_address': user_email, - 'registration_info': basic_info(), - 'password': set_password(temp_password), - 'confirmed': 0 - } - save_user(user_details, user_details['user_id']) - send_invitation_email(user_email, temp_password) - -#@app.route() + params = request.form if request.form else request.args + if "group_name" in params: + member_user_ids = set() + admin_user_ids = set() + # ZS: Always add the user creating the group as an admin + admin_user_ids.add(g.user_session.user_id) + if "admin_emails_to_add" in params: + admin_emails = params['admin_emails_to_add'].split(",") + for email in admin_emails: + user_details = get_user_by_unique_column( + "email_address", email) + if user_details: + admin_user_ids.add(user_details['user_id']) + #send_group_invites(params['group_id'], user_email_list = admin_emails, user_type="admins") + if "member_emails_to_add" in params: + member_emails = params['member_emails_to_add'].split(",") + for email in member_emails: + user_details = get_user_by_unique_column( + "email_address", email) + if user_details: + member_user_ids.add(user_details['user_id']) + #send_group_invites(params['group_id'], user_email_list = user_emails, user_type="members") + + create_group(list(admin_user_ids), list( + member_user_ids), params['group_name']) + return redirect(url_for('manage_groups')) + else: + return render_template("admin/create_group.html") + +# ZS: Will integrate this later, for now just letting users be added directly + + +def send_group_invites(group_id, user_email_list=[], user_type="members"): + for user_email in user_email_list: + user_details = get_user_by_unique_column("email_address", user_email) + if user_details: + group_info = get_group_info(group_id) + # ZS: Probably not necessary since the group should normally always exist if group_id is being passed here, + # but it's technically possible to hit it if Redis is cleared out before submitting the new users or something + if group_info: + # ZS: Don't add user if they're already an admin or if they're being added a regular user and are already a regular user, + # but do add them if they're a regular user and are added as an admin + if (user_details['user_id'] in group_info['admins']) or \ + ((user_type == "members") and (user_details['user_id'] in group_info['members'])): + continue + else: + send_verification_email(user_details, template_name="email/group_verification.txt", + key_prefix="verification_code", subject="You've been invited to join a GeneNetwork user group") + else: + temp_password = ''.join(random.choice( + string.ascii_uppercase + string.digits) for _ in range(6)) + user_details = { + 'user_id': str(uuid.uuid4()), + 'email_address': user_email, + 'registration_info': basic_info(), + 'password': set_password(temp_password), + 'confirmed': 0 + } + save_user(user_details, user_details['user_id']) + send_invitation_email(user_email, temp_password) + +# @app.route() diff --git a/wqflask/wqflask/gsearch.py b/wqflask/wqflask/gsearch.py index 9bf23d57..fb8bdc55 100644 --- a/wqflask/wqflask/gsearch.py +++ b/wqflask/wqflask/gsearch.py @@ -18,6 +18,7 @@ from utility.type_checking import is_float, is_int, is_str, get_float, get_int, from utility.logger import getLogger logger = getLogger(__name__) + class GSearch: def __init__(self, kw): @@ -76,18 +77,21 @@ class GSearch: this_trait['name'] = line[5] this_trait['dataset'] = line[3] this_trait['dataset_fullname'] = line[4] - this_trait['hmac'] = hmac.data_hmac('{}:{}'.format(line[5], line[3])) + this_trait['hmac'] = hmac.data_hmac( + '{}:{}'.format(line[5], line[3])) this_trait['species'] = line[0] this_trait['group'] = line[1] this_trait['tissue'] = line[2] this_trait['symbol'] = line[6] if line[7]: - this_trait['description'] = line[7].decode('utf-8', 'replace') + this_trait['description'] = line[7].decode( + 'utf-8', 'replace') else: this_trait['description'] = "N/A" this_trait['location_repr'] = 'N/A' if (line[8] != "NULL" and line[8] != "") and (line[9] != 0): - this_trait['location_repr'] = 'Chr%s: %.6f' % (line[8], float(line[9])) + this_trait['location_repr'] = 'Chr%s: %.6f' % ( + line[8], float(line[9])) try: this_trait['mean'] = '%.3f' % line[10] except: @@ -102,7 +106,8 @@ class GSearch: this_trait['locus_chr'] = line[16] this_trait['locus_mb'] = line[17] - dataset_ob = SimpleNamespace(id=this_trait["dataset_id"], type="ProbeSet",species=this_trait["species"]) + dataset_ob = SimpleNamespace( + id=this_trait["dataset_id"], type="ProbeSet", species=this_trait["species"]) if dataset_ob.id not in dataset_to_permissions: permissions = check_resource_availability(dataset_ob) dataset_to_permissions[dataset_ob.id] = permissions @@ -117,7 +122,9 @@ class GSearch: max_lrs_text = "N/A" if this_trait['locus_chr'] != None and this_trait['locus_mb'] != None: - max_lrs_text = "Chr" + str(this_trait['locus_chr']) + ": " + str(this_trait['locus_mb']) + max_lrs_text = "Chr" + \ + str(this_trait['locus_chr']) + \ + ": " + str(this_trait['locus_mb']) this_trait['max_lrs_text'] = max_lrs_text trait_list.append(this_trait) @@ -126,18 +133,18 @@ class GSearch: self.trait_list = json.dumps(trait_list) self.header_fields = ['Index', - 'Record', - 'Species', - 'Group', - 'Tissue', - 'Dataset', - 'Symbol', - 'Description', - 'Location', - 'Mean', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Species', + 'Group', + 'Tissue', + 'Dataset', + 'Symbol', + 'Description', + 'Location', + 'Mean', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] elif self.type == "phenotype": search_term = self.terms @@ -145,7 +152,8 @@ class GSearch: if "_" in self.terms: if len(self.terms.split("_")[0]) == 3: search_term = self.terms.split("_")[1] - group_clause = "AND InbredSet.`InbredSetCode` = '{}'".format(self.terms.split("_")[0]) + group_clause = "AND InbredSet.`InbredSetCode` = '{}'".format( + self.terms.split("_")[0]) sql = """ SELECT Species.`Name`, @@ -191,18 +199,22 @@ class GSearch: this_trait['index'] = i + 1 this_trait['name'] = str(line[4]) if len(str(line[12])) == 3: - this_trait['display_name'] = str(line[12]) + "_" + this_trait['name'] + this_trait['display_name'] = str( + line[12]) + "_" + this_trait['name'] else: this_trait['display_name'] = this_trait['name'] this_trait['dataset'] = line[2] this_trait['dataset_fullname'] = line[3] - this_trait['hmac'] = hmac.data_hmac('{}:{}'.format(line[4], line[2])) + this_trait['hmac'] = hmac.data_hmac( + '{}:{}'.format(line[4], line[2])) this_trait['species'] = line[0] this_trait['group'] = line[1] if line[9] != None and line[6] != None: - this_trait['description'] = line[6].decode('utf-8', 'replace') + this_trait['description'] = line[6].decode( + 'utf-8', 'replace') elif line[5] != None: - this_trait['description'] = line[5].decode('utf-8', 'replace') + this_trait['description'] = line[5].decode( + 'utf-8', 'replace') else: this_trait['description'] = "N/A" if line[13] != None and line[13] != "": @@ -220,7 +232,8 @@ class GSearch: else: this_trait['pubmed_link'] = "N/A" if line[12]: - this_trait['display_name'] = line[12] + "_" + str(this_trait['name']) + this_trait['display_name'] = line[12] + \ + "_" + str(this_trait['name']) this_trait['LRS_score_repr'] = "N/A" if line[10] != "" and line[10] != None: this_trait['LRS_score_repr'] = '%3.1f' % line[10] @@ -229,15 +242,18 @@ class GSearch: this_trait['additive'] = '%.3f' % line[11] this_trait['max_lrs_text'] = "N/A" - trait_ob = create_trait(dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False) + trait_ob = create_trait( + dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False) if not trait_ob: continue if this_trait['dataset'] == this_trait['group'] + "Publish": - try: - if trait_ob.locus_chr != "" and trait_ob.locus_mb != "": - this_trait['max_lrs_text'] = "Chr" + str(trait_ob.locus_chr) + ": " + str(trait_ob.locus_mb) - except: - this_trait['max_lrs_text'] = "N/A" + try: + if trait_ob.locus_chr != "" and trait_ob.locus_mb != "": + this_trait['max_lrs_text'] = "Chr" + \ + str(trait_ob.locus_chr) + \ + ": " + str(trait_ob.locus_mb) + except: + this_trait['max_lrs_text'] = "N/A" trait_list.append(this_trait) @@ -245,12 +261,12 @@ class GSearch: self.trait_list = json.dumps(trait_list) self.header_fields = ['Index', - 'Species', - 'Group', - 'Record', - 'Description', - 'Authors', - 'Year', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Species', + 'Group', + 'Record', + 'Description', + 'Authors', + 'Year', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] diff --git a/wqflask/wqflask/heatmap/heatmap.py b/wqflask/wqflask/heatmap/heatmap.py index 20e3559a..001bab3b 100644 --- a/wqflask/wqflask/heatmap/heatmap.py +++ b/wqflask/wqflask/heatmap/heatmap.py @@ -12,19 +12,21 @@ from utility.logger import getLogger Redis = Redis() -logger = getLogger(__name__ ) +logger = getLogger(__name__) + class Heatmap: def __init__(self, start_vars, temp_uuid): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.temp_uuid = temp_uuid self.num_permutations = 5000 self.dataset = self.trait_list[0][1] - self.json_data = {} #The dictionary that will be used to create the json object that contains all the data needed to create the figure + self.json_data = {} # The dictionary that will be used to create the json object that contains all the data needed to create the figure self.all_sample_list = [] self.traits = [] @@ -32,7 +34,8 @@ class Heatmap: chrnames = [] self.species = species.TheSpecies(dataset=self.trait_list[0][1]) for key in list(self.species.chromosomes.chromosomes.keys()): - chrnames.append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) + chrnames.append([self.species.chromosomes.chromosomes[key].name, + self.species.chromosomes.chromosomes[key].mb_length]) for trait_db in self.trait_list: @@ -83,7 +86,7 @@ class Heatmap: self.json_data[trait] = self.trait_results[trait] self.js_data = dict( - json_data = self.json_data + json_data=self.json_data ) def gen_reaper_results(self): @@ -107,19 +110,22 @@ class Heatmap: trimmed_samples.append(str(samples[i])) trimmed_values.append(values[i]) - trait_filename = str(this_trait.name) + "_" + str(self.dataset.name) + "_pheno" + trait_filename = str(this_trait.name) + "_" + \ + str(self.dataset.name) + "_pheno" gen_pheno_txt_file(trimmed_samples, trimmed_values, trait_filename) - output_filename = self.dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + output_filename = self.dataset.group.name + "_GWA_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) reaper_command = REAPER_COMMAND + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt -n 1000 -o {4}{5}.txt'.format(flat_files('genotype'), - genofile_name, - TEMPDIR, - trait_filename, - webqtlConfig.GENERATED_IMAGE_DIR, - output_filename) + genofile_name, + TEMPDIR, + trait_filename, + webqtlConfig.GENERATED_IMAGE_DIR, + output_filename) - os.system(reaper_command) + os.system(reaper_command) reaper_results = parse_reaper_output(output_filename) @@ -128,9 +134,12 @@ class Heatmap: self.trait_results[this_trait.name] = [] for qtl in reaper_results: if qtl['additive'] > 0: - self.trait_results[this_trait.name].append(-float(qtl['lrs_value'])) + self.trait_results[this_trait.name].append( + -float(qtl['lrs_value'])) else: - self.trait_results[this_trait.name].append(float(qtl['lrs_value'])) + self.trait_results[this_trait.name].append( + float(qtl['lrs_value'])) + def gen_pheno_txt_file(samples, vals, filename): """Generates phenotype file for GEMMA""" @@ -151,6 +160,7 @@ def gen_pheno_txt_file(samples, vals, filename): values_string = "\t".join(filtered_vals_list) outfile.write(values_string) + def parse_reaper_output(gwa_filename): included_markers = [] p_values = [] diff --git a/wqflask/wqflask/interval_analyst/GeneUtil.py b/wqflask/wqflask/interval_analyst/GeneUtil.py index d0dd7aea..5e86ae31 100644 --- a/wqflask/wqflask/interval_analyst/GeneUtil.py +++ b/wqflask/wqflask/interval_analyst/GeneUtil.py @@ -2,30 +2,32 @@ import string from flask import Flask, g -#Just return a list of dictionaries -#each dictionary contains sub-dictionary +# Just return a list of dictionaries +# each dictionary contains sub-dictionary + + def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): - fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd', - 'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID', - 'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd'] - - ##List All Species in the Gene Table - speciesDict = {} - results = g.db.execute(""" + fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd', + 'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID', + 'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd'] + + # List All Species in the Gene Table + speciesDict = {} + results = g.db.execute(""" SELECT Species.Name, GeneList.SpeciesId FROM Species, GeneList WHERE GeneList.SpeciesId = Species.Id GROUP BY GeneList.SpeciesId""").fetchall() - for item in results: - speciesDict[item[0]] = item[1] - - ##List current Species and other Species - speciesId = speciesDict[species] - otherSpecies = [[X, speciesDict[X]] for X in list(speciesDict.keys())] - otherSpecies.remove([species, speciesId]) + for item in results: + speciesDict[item[0]] = item[1] - results = g.db.execute(""" + # List current Species and other Species + speciesId = speciesDict[species] + otherSpecies = [[X, speciesDict[X]] for X in list(speciesDict.keys())] + otherSpecies.remove([species, speciesId]) + + results = g.db.execute(""" SELECT %s FROM GeneList WHERE SpeciesId = %d AND Chromosome = '%s' AND @@ -36,47 +38,49 @@ def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): startMb, endMb, startMb, endMb)).fetchall() - GeneList = [] + GeneList = [] - if results: - for result in results: - newdict = {} - for j, item in enumerate(fetchFields): - newdict[item] = result[j] - #count SNPs if possible - if diffCol and species=='mouse': - newdict["snpCount"] = g.db.execute(""" + if results: + for result in results: + newdict = {} + for j, item in enumerate(fetchFields): + newdict[item] = result[j] + # count SNPs if possible + if diffCol and species == 'mouse': + newdict["snpCount"] = g.db.execute(""" SELECT count(*) FROM BXDSnpPosition WHERE Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND StrainId1 = %d AND StrainId2 = %d """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1])).fetchone()[0] - newdict["snpDensity"] = newdict["snpCount"]/(newdict["TxEnd"]-newdict["TxStart"])/1000.0 - else: - newdict["snpDensity"] = newdict["snpCount"] = 0 - - try: - newdict['GeneLength'] = 1000.0*(newdict['TxEnd'] - newdict['TxStart']) - except: - pass - - #load gene from other Species by the same name - for item in otherSpecies: - othSpec, othSpecId = item - newdict2 = {} - - resultsOther = g.db.execute("SELECT %s FROM GeneList WHERE SpeciesId = %d AND geneSymbol= '%s' LIMIT 1" % (", ".join(fetchFields), + newdict["snpDensity"] = newdict["snpCount"] / \ + (newdict["TxEnd"] - newdict["TxStart"]) / 1000.0 + else: + newdict["snpDensity"] = newdict["snpCount"] = 0 + + try: + newdict['GeneLength'] = 1000.0 * \ + (newdict['TxEnd'] - newdict['TxStart']) + except: + pass + + # load gene from other Species by the same name + for item in otherSpecies: + othSpec, othSpecId = item + newdict2 = {} + + resultsOther = g.db.execute("SELECT %s FROM GeneList WHERE SpeciesId = %d AND geneSymbol= '%s' LIMIT 1" % (", ".join(fetchFields), othSpecId, newdict["GeneSymbol"])).fetchone() - if resultsOther: - for j, item in enumerate(fetchFields): - newdict2[item] = resultsOther[j] - - #count SNPs if possible, could be a separate function - if diffCol and othSpec == 'mouse': - newdict2["snpCount"] = g.db.execute(""" + if resultsOther: + for j, item in enumerate(fetchFields): + newdict2[item] = resultsOther[j] + + # count SNPs if possible, could be a separate function + if diffCol and othSpec == 'mouse': + newdict2["snpCount"] = g.db.execute(""" SELECT count(*) FROM BXDSnpPosition WHERE Chr = '%s' AND @@ -84,19 +88,19 @@ def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): StrainId1 = %d AND StrainId2 = %d """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1])).fetchone()[0] - newdict2["snpDensity"] = newdict2["snpCount"]/(newdict2["TxEnd"]-newdict2["TxStart"])/1000.0 - else: - newdict2["snpDensity"] = newdict2["snpCount"] = 0 - - try: - newdict2['GeneLength'] = 1000.0*(newdict2['TxEnd'] - newdict2['TxStart']) - except: - pass - - newdict['%sGene' % othSpec] = newdict2 - - GeneList.append(newdict) + newdict2["snpDensity"] = newdict2["snpCount"] / \ + (newdict2["TxEnd"] - newdict2["TxStart"]) / 1000.0 + else: + newdict2["snpDensity"] = newdict2["snpCount"] = 0 + + try: + newdict2['GeneLength'] = 1000.0 * \ + (newdict2['TxEnd'] - newdict2['TxStart']) + except: + pass - return GeneList + newdict['%sGene' % othSpec] = newdict2 + GeneList.append(newdict) + return GeneList diff --git a/wqflask/wqflask/markdown_routes.py b/wqflask/wqflask/markdown_routes.py index ebf75807..c27ff143 100644 --- a/wqflask/wqflask/markdown_routes.py +++ b/wqflask/wqflask/markdown_routes.py @@ -103,7 +103,7 @@ def environments(): @environments_blueprint.route('/svg-dependency-graph') def svg_graph(): directory, file_name, _ = get_file_from_python_search_path( - "wqflask/dependency-graph.svg").partition("dependency-graph.svg") + "wqflask/dependency-graph.svg").partition("dependency-graph.svg") return send_from_directory(directory, file_name) diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py index 4074f098..5bf8822a 100644 --- a/wqflask/wqflask/marker_regression/display_mapping_results.py +++ b/wqflask/wqflask/marker_regression/display_mapping_results.py @@ -51,7 +51,7 @@ try: # Only import this for Python3 from functools import reduce except: pass -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) RED = ImageColor.getrgb("red") BLUE = ImageColor.getrgb("blue") @@ -307,7 +307,8 @@ class DisplayMappingResults: if 'color_scheme' in start_vars: self.color_scheme = start_vars['color_scheme'] if self.color_scheme == "single": - self.manhattan_single_color = ImageColor.getrgb("#" + start_vars['manhattan_single_color']) + self.manhattan_single_color = ImageColor.getrgb( + "#" + start_vars['manhattan_single_color']) if 'permCheck' in list(start_vars.keys()): self.permChecked = start_vars['permCheck'] @@ -357,7 +358,8 @@ class DisplayMappingResults: if 'reaper_version' in list(start_vars.keys()) and self.mapping_method == "reaper": self.reaper_version = start_vars['reaper_version'] if 'output_files' in start_vars: - self.output_files = ",".join([(the_file if the_file is not None else "") for the_file in start_vars['output_files']]) + self.output_files = ",".join( + [(the_file if the_file is not None else "") for the_file in start_vars['output_files']]) self.categorical_vars = "" self.perm_strata = "" @@ -386,28 +388,31 @@ class DisplayMappingResults: self.dataset.group.genofile = self.genofile_string.split(":")[0] if self.mapping_method == "reaper" and self.manhattan_plot != True: - self.genotype = self.dataset.group.read_genotype_file(use_reaper=True) + self.genotype = self.dataset.group.read_genotype_file( + use_reaper=True) else: self.genotype = self.dataset.group.read_genotype_file() - #Darwing Options + # Darwing Options try: - if self.selectedChr > -1: - self.graphWidth = min(self.GRAPH_MAX_WIDTH, max(self.GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) - else: - self.graphWidth = min(self.GRAPH_MAX_WIDTH, max(self.MULT_GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) + if self.selectedChr > -1: + self.graphWidth = min(self.GRAPH_MAX_WIDTH, max( + self.GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) + else: + self.graphWidth = min(self.GRAPH_MAX_WIDTH, max( + self.MULT_GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) except: - if self.selectedChr > -1: - self.graphWidth = self.GRAPH_DEFAULT_WIDTH - else: - self.graphWidth = self.MULT_GRAPH_DEFAULT_WIDTH + if self.selectedChr > -1: + self.graphWidth = self.GRAPH_DEFAULT_WIDTH + else: + self.graphWidth = self.MULT_GRAPH_DEFAULT_WIDTH -## BEGIN HaplotypeAnalyst +# BEGIN HaplotypeAnalyst if 'haplotypeAnalystCheck' in list(start_vars.keys()): self.haplotypeAnalystChecked = start_vars['haplotypeAnalystCheck'] else: self.haplotypeAnalystChecked = False -## END HaplotypeAnalyst +# END HaplotypeAnalyst self.graphHeight = self.GRAPH_DEFAULT_HEIGHT self.dominanceChecked = False @@ -446,7 +451,7 @@ class DisplayMappingResults: except: self.lrsMax = 0 - #Trait Infos + # Trait Infos self.identification = "" ################################################################ @@ -471,10 +476,12 @@ class DisplayMappingResults: Chr_Length.OrderId """ % (self.dataset.group.name, ", ".join(["'%s'" % X[0] for X in self.ChrList[1:]]))) - self.ChrLengthMbList = [x[0]/1000000.0 for x in self.ChrLengthMbList] - self.ChrLengthMbSum = reduce(lambda x, y:x+y, self.ChrLengthMbList, 0.0) + self.ChrLengthMbList = [x[0] / 1000000.0 for x in self.ChrLengthMbList] + self.ChrLengthMbSum = reduce( + lambda x, y: x + y, self.ChrLengthMbList, 0.0) if self.ChrLengthMbList: - self.MbGraphInterval = self.ChrLengthMbSum/(len(self.ChrLengthMbList)*12) #Empirical Mb interval + self.MbGraphInterval = self.ChrLengthMbSum / \ + (len(self.ChrLengthMbList) * 12) # Empirical Mb interval else: self.MbGraphInterval = 1 @@ -482,38 +489,38 @@ class DisplayMappingResults: for i, _chr in enumerate(self.genotype): self.ChrLengthCMList.append(_chr[-1].cM - _chr[0].cM) - self.ChrLengthCMSum = reduce(lambda x, y:x+y, self.ChrLengthCMList, 0.0) + self.ChrLengthCMSum = reduce( + lambda x, y: x + y, self.ChrLengthCMList, 0.0) if self.plotScale == 'physic': - self.GraphInterval = self.MbGraphInterval #Mb + self.GraphInterval = self.MbGraphInterval # Mb else: - self.GraphInterval = self.cMGraphInterval #cM + self.GraphInterval = self.cMGraphInterval # cM -## BEGIN HaplotypeAnalyst -## count the amount of individuals to be plotted, and increase self.graphHeight +# BEGIN HaplotypeAnalyst +# count the amount of individuals to be plotted, and increase self.graphHeight if self.haplotypeAnalystChecked and self.selectedChr > -1: thisTrait = self.this_trait - smd=[] + smd = [] for sample in self.sample_vals_dict.keys(): if self.sample_vals_dict[sample] != "x": - temp = GeneralObject(name=sample, value=float(self.sample_vals_dict[sample])) + temp = GeneralObject(name=sample, value=float( + self.sample_vals_dict[sample])) smd.append(temp) else: continue samplelist = list(self.genotype.prgy) - for j, _geno in enumerate (self.genotype[0][1].genotype): + for j, _geno in enumerate(self.genotype[0][1].genotype): for item in smd: if item.name == samplelist[j]: self.NR_INDIVIDUALS = self.NR_INDIVIDUALS + 1 # default: - self.graphHeight = self.graphHeight + 2 * (self.NR_INDIVIDUALS+10) * self.EACH_GENE_HEIGHT -## END HaplotypeAnalyst - - - + self.graphHeight = self.graphHeight + 2 * \ + (self.NR_INDIVIDUALS + 10) * self.EACH_GENE_HEIGHT +# END HaplotypeAnalyst ######################### - ## Get the sorting column + # Get the sorting column ######################### RISet = self.dataset.group.name if RISet in ('AXB', 'BXA', 'AXBXA'): @@ -529,10 +536,11 @@ class DisplayMappingResults: elif RISet in ('LXS'): self.diffCol = ['ILS', 'ISS'] else: - self.diffCol= [] + self.diffCol = [] for i, strain in enumerate(self.diffCol): - self.diffCol[i] = g.db.execute("select Id from Strain where Symbol = %s", strain).fetchone()[0] + self.diffCol[i] = g.db.execute( + "select Id from Strain where Symbol = %s", strain).fetchone()[0] ################################################################ # GeneCollection goes here @@ -546,7 +554,7 @@ class DisplayMappingResults: geneTable = "" self.geneCol = None - if self.plotScale == 'physic' and self.selectedChr > -1 and (self.intervalAnalystChecked or self.geneChecked): + if self.plotScale == 'physic' and self.selectedChr > -1 and (self.intervalAnalystChecked or self.geneChecked): # Draw the genes for this chromosome / region of this chromosome webqtldatabase = self.dataset.name @@ -555,24 +563,26 @@ class DisplayMappingResults: chrName = "X" else: chrName = self.selectedChr - self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, "mouse") + self.geneCol = GeneUtil.loadGenes( + chrName, self.diffCol, self.startMb, self.endMb, "mouse") elif self.dataset.group.species == "rat": if self.selectedChr == 21: chrName = "X" else: chrName = self.selectedChr - self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, "rat") + self.geneCol = GeneUtil.loadGenes( + chrName, self.diffCol, self.startMb, self.endMb, "rat") if self.geneCol and self.intervalAnalystChecked: - ####################################################################### - #Nick use GENEID as RefGene to get Literature Correlation Informations# - #For Interval Mapping, Literature Correlation isn't useful, so skip it# - #through set GENEID is None # - ####################################################################### + ####################################################################### + #Nick use GENEID as RefGene to get Literature Correlation Informations# + #For Interval Mapping, Literature Correlation isn't useful, so skip it# + #through set GENEID is None # + ####################################################################### - GENEID = None + GENEID = None - self.geneTable(self.geneCol, GENEID) + self.geneTable(self.geneCol, GENEID) ################################################################ # Plots goes here @@ -580,11 +590,12 @@ class DisplayMappingResults: showLocusForm = "" intCanvas = Image.new("RGBA", size=(self.graphWidth, self.graphHeight)) with Bench("Drawing Plot"): - gifmap = self.plotIntMapping(intCanvas, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm) + gifmap = self.plotIntMapping( + intCanvas, startMb=self.startMb, endMb=self.endMb, showLocusForm=showLocusForm) self.gifmap = gifmap.__str__() - self.filename= webqtlUtil.genRandStr("Itvl_") + self.filename = webqtlUtil.genRandStr("Itvl_") intCanvas.save( "{}.png".format( os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, self.filename)), @@ -594,20 +605,22 @@ class DisplayMappingResults: border="0", usemap='#WebQTLImageMap' ) - #Scales plot differently for high resolution + # Scales plot differently for high resolution if self.draw2X: - intCanvasX2 = Image.new("RGBA", size=(self.graphWidth*2, self.graphHeight*2)) - gifmapX2 = self.plotIntMapping(intCanvasX2, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm, zoom=2) + intCanvasX2 = Image.new("RGBA", size=( + self.graphWidth * 2, self.graphHeight * 2)) + gifmapX2 = self.plotIntMapping( + intCanvasX2, startMb=self.startMb, endMb=self.endMb, showLocusForm=showLocusForm, zoom=2) intCanvasX2.save( "{}.png".format( os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, - self.filename+"X2")), + self.filename + "X2")), format='png') ################################################################ # Outputs goes here ################################################################ - #this form is used for opening Locus page or trait page, only available for genetic mapping + # this form is used for opening Locus page or trait page, only available for genetic mapping if showLocusForm: showLocusForm = HtmlGenWrapper.create_form_tag( cgi=os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), @@ -615,7 +628,8 @@ class DisplayMappingResults: name=showLocusForm, submit=HtmlGenWrapper.create_input_tag(type_='hidden')) - hddn = {'FormID':'showDatabase', 'ProbeSetID':'_','database':fd.RISet+"Geno",'CellID':'_', 'RISet':fd.RISet, 'incparentsf1':'ON'} + hddn = {'FormID': 'showDatabase', 'ProbeSetID': '_', 'database': fd.RISet + \ + "Geno", 'CellID': '_', 'RISet': fd.RISet, 'incparentsf1': 'ON'} for key in hddn.keys(): showLocusForm.append(HtmlGenWrapper.create_input_tag( name=key, value=hddn[key], type_='hidden')) @@ -634,11 +648,12 @@ class DisplayMappingResults: if self.traitList and self.traitList[0].dataset and self.traitList[0].dataset.type == 'Geno': btminfo.append(HtmlGenWrapper.create_br_tag()) - btminfo.append('Mapping using genotype data as a trait will result in infinity LRS at one locus. In order to display the result properly, all LRSs higher than 100 are capped at 100.') + btminfo.append( + 'Mapping using genotype data as a trait will result in infinity LRS at one locus. In order to display the result properly, all LRSs higher than 100 are capped at 100.') - def plotIntMapping(self, canvas, offset= (80, 120, 90, 100), zoom = 1, startMb = None, endMb = None, showLocusForm = ""): + def plotIntMapping(self, canvas, offset=(80, 120, 90, 100), zoom=1, startMb=None, endMb=None, showLocusForm=""): im_drawer = ImageDraw.Draw(canvas) - #calculating margins + # calculating margins xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset if self.multipleInterval: yTopOffset = max(90, yTopOffset) @@ -659,34 +674,36 @@ class DisplayMappingResults: xLeftOffset += 20 fontZoom = 1.5 - xLeftOffset = int(xLeftOffset*fontZoom) - xRightOffset = int(xRightOffset*fontZoom) - yBottomOffset = int(yBottomOffset*fontZoom) + xLeftOffset = int(xLeftOffset * fontZoom) + xRightOffset = int(xRightOffset * fontZoom) + yBottomOffset = int(yBottomOffset * fontZoom) cWidth = canvas.size[0] cHeight = canvas.size[1] plotWidth = cWidth - xLeftOffset - xRightOffset plotHeight = cHeight - yTopOffset - yBottomOffset - #Drawing Area Height + # Drawing Area Height drawAreaHeight = plotHeight if self.plotScale == 'physic' and self.selectedChr > -1: if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - drawAreaHeight -= 4*self.BAND_HEIGHT + 4*self.BAND_SPACING+ 10*zoom + drawAreaHeight -= 4 * self.BAND_HEIGHT + 4 * self.BAND_SPACING + 10 * zoom else: - drawAreaHeight -= 3*self.BAND_HEIGHT + 3*self.BAND_SPACING+ 10*zoom + drawAreaHeight -= 3 * self.BAND_HEIGHT + 3 * self.BAND_SPACING + 10 * zoom if self.geneChecked: - drawAreaHeight -= self.NUM_GENE_ROWS*self.EACH_GENE_HEIGHT + 3*self.BAND_SPACING + 10*zoom + drawAreaHeight -= self.NUM_GENE_ROWS * \ + self.EACH_GENE_HEIGHT + 3 * self.BAND_SPACING + 10 * zoom else: if self.selectedChr > -1: drawAreaHeight -= 20 else: drawAreaHeight -= 30 -## BEGIN HaplotypeAnalyst +# BEGIN HaplotypeAnalyst if self.haplotypeAnalystChecked and self.selectedChr > -1: - drawAreaHeight -= self.EACH_GENE_HEIGHT * (self.NR_INDIVIDUALS+10) * 2 * zoom -## END HaplotypeAnalyst + drawAreaHeight -= self.EACH_GENE_HEIGHT * \ + (self.NR_INDIVIDUALS + 10) * 2 * zoom +# END HaplotypeAnalyst if zoom == 2: drawAreaHeight -= 60 @@ -696,42 +713,52 @@ class DisplayMappingResults: newoffset = (xLeftOffset, xRightOffset, yTopOffset, yBottomOffset) # Draw the alternating-color background first and get plotXScale - plotXScale = self.drawGraphBackground(canvas, gifmap, offset=newoffset, zoom= zoom, startMb=startMb, endMb = endMb) + plotXScale = self.drawGraphBackground( + canvas, gifmap, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) - #draw bootstap + # draw bootstap if self.bootChecked and not self.multipleInterval: - self.drawBootStrapResult(canvas, self.nboot, drawAreaHeight, plotXScale, offset=newoffset, zoom= zoom, startMb=startMb, endMb = endMb) + self.drawBootStrapResult(canvas, self.nboot, drawAreaHeight, plotXScale, + offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) # Draw clickable region and gene band if selected if self.plotScale == 'physic' and self.selectedChr > -1: - self.drawClickBand(canvas, gifmap, plotXScale, offset=newoffset, zoom = zoom, startMb=startMb, endMb = endMb) + self.drawClickBand(canvas, gifmap, plotXScale, offset=newoffset, + zoom=zoom, startMb=startMb, endMb=endMb) if self.geneChecked and self.geneCol: - self.drawGeneBand(canvas, gifmap, plotXScale, offset=newoffset, zoom = zoom, startMb=startMb, endMb = endMb) + self.drawGeneBand(canvas, gifmap, plotXScale, offset=newoffset, + zoom=zoom, startMb=startMb, endMb=endMb) if self.SNPChecked: - self.drawSNPTrackNew(canvas, offset=newoffset, zoom = 2*zoom, startMb=startMb, endMb = endMb) -## BEGIN HaplotypeAnalyst + self.drawSNPTrackNew( + canvas, offset=newoffset, zoom=2 * zoom, startMb=startMb, endMb=endMb) +# BEGIN HaplotypeAnalyst if self.haplotypeAnalystChecked: - self.drawHaplotypeBand(canvas, gifmap, plotXScale, offset=newoffset, zoom = zoom, startMb=startMb, endMb = endMb) -## END HaplotypeAnalyst + self.drawHaplotypeBand( + canvas, gifmap, plotXScale, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) +# END HaplotypeAnalyst # Draw X axis - self.drawXAxis(canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, offset=newoffset, zoom = zoom, startMb=startMb, endMb = endMb) + self.drawXAxis(canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, + offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) # Draw QTL curve - self.drawQTL(canvas, drawAreaHeight, gifmap, plotXScale, offset=newoffset, zoom= zoom, startMb=startMb, endMb = endMb) + self.drawQTL(canvas, drawAreaHeight, gifmap, plotXScale, + offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) - #draw legend + # draw legend if self.multipleInterval: - self.drawMultiTraitName(fd, canvas, gifmap, showLocusForm, offset=newoffset) + self.drawMultiTraitName( + fd, canvas, gifmap, showLocusForm, offset=newoffset) elif self.legendChecked: - self.drawLegendPanel(canvas, offset=newoffset, zoom = zoom) + self.drawLegendPanel(canvas, offset=newoffset, zoom=zoom) else: pass - #draw position, no need to use a separate function - self.drawProbeSetPosition(canvas, plotXScale, offset=newoffset, zoom = zoom) + # draw position, no need to use a separate function + self.drawProbeSetPosition( + canvas, plotXScale, offset=newoffset, zoom=zoom) return gifmap - def drawBootStrapResult(self, canvas, nboot, drawAreaHeight, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawBootStrapResult(self, canvas, nboot, drawAreaHeight, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset @@ -741,9 +768,9 @@ class DisplayMappingResults: if zoom == 2: fontZoom = 1.5 - bootHeightThresh = drawAreaHeight*3/4 + bootHeightThresh = drawAreaHeight * 3 / 4 - #break bootstrap result into groups + # break bootstrap result into groups BootCoord = [] i = 0 previous_chr = None @@ -751,7 +778,7 @@ class DisplayMappingResults: startX = xLeftOffset BootChrCoord = [] - if self.selectedChr == -1: #ZS: If viewing full genome/all chromosomes + if self.selectedChr == -1: # ZS: If viewing full genome/all chromosomes for i, result in enumerate(self.qtlresults): if result['chr'] != previous_chr: previous_chr = result['chr'] @@ -759,28 +786,33 @@ class DisplayMappingResults: if previous_chr_as_int != 1: BootCoord.append(BootChrCoord) BootChrCoord = [] - startX += (self.ChrLengthDistList[previous_chr_as_int - 2] + self.GraphInterval)*plotXScale + startX += ( + self.ChrLengthDistList[previous_chr_as_int - 2] + self.GraphInterval) * plotXScale if self.plotScale == 'physic': - Xc = startX + (result['Mb']-self.startMb)*plotXScale + Xc = startX + (result['Mb'] - self.startMb) * plotXScale else: - Xc = startX + (result['cM']-self.qtlresults[0]['cM'])*plotXScale + Xc = startX + \ + (result['cM'] - self.qtlresults[0]['cM']) * plotXScale BootChrCoord.append([Xc, self.bootResult[i]]) else: for i, result in enumerate(self.qtlresults): if str(result['chr']) == str(self.ChrList[self.selectedChr][0]): if self.plotScale == 'physic': - Xc = startX + (result['Mb']-self.startMb)*plotXScale + Xc = startX + (result['Mb'] - \ + self.startMb) * plotXScale else: - Xc = startX + (result['cM']-self.qtlresults[0]['cM'])*plotXScale + Xc = startX + \ + (result['cM'] - self.qtlresults[0] + ['cM']) * plotXScale BootChrCoord.append([Xc, self.bootResult[i]]) BootCoord = [BootChrCoord] - #reduce bootResult + # reduce bootResult if self.selectedChr > -1: maxBootBar = 80.0 else: maxBootBar = 200.0 - stepBootStrap = plotWidth/maxBootBar + stepBootStrap = plotWidth / maxBootBar reducedBootCoord = [] maxBootCount = 0 @@ -796,14 +828,16 @@ class DisplayMappingResults: if maxBootCount < bootCount: maxBootCount = bootCount # end if - reducedBootCoord.append([bootStartPixX, BootChrCoord[i][0], bootCount]) + reducedBootCoord.append( + [bootStartPixX, BootChrCoord[i][0], bootCount]) bootStartPixX = BootChrCoord[i][0] bootCount = BootChrCoord[i][1] # end else # end for - #add last piece - if BootChrCoord[-1][0] - bootStartPixX > stepBootStrap/2.0: - reducedBootCoord.append([bootStartPixX, BootChrCoord[-1][0], bootCount]) + # add last piece + if BootChrCoord[-1][0] - bootStartPixX > stepBootStrap / 2.0: + reducedBootCoord.append( + [bootStartPixX, BootChrCoord[-1][0], bootCount]) else: reducedBootCoord[-1][2] += bootCount reducedBootCoord[-1][1] = BootChrCoord[-1][0] @@ -815,64 +849,68 @@ class DisplayMappingResults: if item[2] > 0: if item[0] < xLeftOffset: item[0] = xLeftOffset - if item[0] > xLeftOffset+plotWidth: - item[0] = xLeftOffset+plotWidth + if item[0] > xLeftOffset + plotWidth: + item[0] = xLeftOffset + plotWidth if item[1] < xLeftOffset: item[1] = xLeftOffset - if item[1] > xLeftOffset+plotWidth: - item[1] = xLeftOffset+plotWidth + if item[1] > xLeftOffset + plotWidth: + item[1] = xLeftOffset + plotWidth if item[0] != item[1]: im_drawer.rectangle( xy=((item[0], yZero), - (item[1], yZero - item[2]*bootHeightThresh/maxBootCount)), + (item[1], yZero - item[2] * bootHeightThresh / maxBootCount)), fill=self.BOOTSTRAP_BOX_COLOR, outline=BLACK) - ###draw boot scale - highestPercent = (maxBootCount*100.0)/nboot + # draw boot scale + highestPercent = (maxBootCount * 100.0) / nboot bootScale = Plot.detScale(0, highestPercent) - bootScale = Plot.frange(bootScale[0], bootScale[1], bootScale[1]/bootScale[2]) + bootScale = Plot.frange( + bootScale[0], bootScale[1], bootScale[1] / bootScale[2]) bootScale = bootScale[:-1] + [highestPercent] - bootOffset = 50*fontZoom - bootScaleFont=ImageFont.truetype(font=VERDANA_FILE, size=13*fontZoom) + bootOffset = 50 * fontZoom + bootScaleFont = ImageFont.truetype( + font=VERDANA_FILE, size=13 * fontZoom) im_drawer.rectangle( - xy=((canvas.size[0]-bootOffset, yZero-bootHeightThresh), - (canvas.size[0]-bootOffset-15*zoom, yZero)), - fill = YELLOW, outline=BLACK) + xy=((canvas.size[0] - bootOffset, yZero - bootHeightThresh), + (canvas.size[0] - bootOffset - 15 * zoom, yZero)), + fill=YELLOW, outline=BLACK) im_drawer.line( - xy=((canvas.size[0]-bootOffset+4, yZero), - (canvas.size[0]-bootOffset, yZero)), + xy=((canvas.size[0] - bootOffset + 4, yZero), + (canvas.size[0] - bootOffset, yZero)), fill=BLACK) TEXT_Y_DISPLACEMENT = -8 - im_drawer.text(xy=(canvas.size[0]-bootOffset+10, yZero+TEXT_Y_DISPLACEMENT), text='0%', + im_drawer.text(xy=(canvas.size[0] - bootOffset + 10, yZero + TEXT_Y_DISPLACEMENT), text='0%', font=bootScaleFont, fill=BLACK) for item in bootScale: if item == 0: continue - bootY = yZero-bootHeightThresh*item/highestPercent + bootY = yZero - bootHeightThresh * item / highestPercent im_drawer.line( - xy=((canvas.size[0]-bootOffset+4, bootY), - (canvas.size[0]-bootOffset, bootY)), + xy=((canvas.size[0] - bootOffset + 4, bootY), + (canvas.size[0] - bootOffset, bootY)), fill=BLACK) - im_drawer.text(xy=(canvas.size[0]-bootOffset+10, bootY+TEXT_Y_DISPLACEMENT), - text='%2.1f'%item, font=bootScaleFont, fill=BLACK) + im_drawer.text(xy=(canvas.size[0] - bootOffset + 10, bootY + TEXT_Y_DISPLACEMENT), + text='%2.1f' % item, font=bootScaleFont, fill=BLACK) if self.legendChecked: if hasattr(self.traitList[0], 'chr') and hasattr(self.traitList[0], 'mb'): startPosY = 30 else: startPosY = 15 - smallLabelFont = ImageFont.truetype(font=TREBUC_FILE, size=12*fontZoom) + smallLabelFont = ImageFont.truetype( + font=TREBUC_FILE, size=12 * fontZoom) leftOffset = canvas.size[0] - xRightOffset - 190 im_drawer.rectangle( - xy=((leftOffset, startPosY-6), (leftOffset+12, startPosY+6)), + xy=((leftOffset, startPosY - 6), + (leftOffset + 12, startPosY + 6)), fill=YELLOW, outline=BLACK) - im_drawer.text(xy=(canvas.size[0] - xRightOffset - 170, startPosY+TEXT_Y_DISPLACEMENT), + im_drawer.text(xy=(canvas.size[0] - xRightOffset - 170, startPosY + TEXT_Y_DISPLACEMENT), text='Frequency of the Peak LRS', font=smallLabelFont, fill=BLACK) - def drawProbeSetPosition(self, canvas, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawProbeSetPosition(self, canvas, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) if len(self.traitList) != 1: return @@ -896,21 +934,22 @@ class DisplayMappingResults: if self.plotScale == "physic": this_chr = str(self.ChrList[self.selectedChr][0]) else: - this_chr = str(self.ChrList[self.selectedChr][1]+1) + this_chr = str(self.ChrList[self.selectedChr][1] + 1) if self.plotScale == 'physic': if self.selectedChr > -1: if this_chr != Chr or Mb < self.startMb or Mb > self.endMb: return else: - locPixel = xLeftOffset + (Mb-self.startMb)*plotXScale + locPixel = xLeftOffset + (Mb - self.startMb) * plotXScale else: locPixel = xLeftOffset for i, _chr in enumerate(self.ChrList[1:]): if _chr[0] != Chr: - locPixel += (self.ChrLengthDistList[i] + self.GraphInterval)*plotXScale + locPixel += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale else: - locPixel += Mb*plotXScale + locPixel += Mb * plotXScale break else: if self.selectedChr > -1: @@ -918,33 +957,37 @@ class DisplayMappingResults: if qtlresult['chr'] != self.selectedChr: continue - if i==0 and qtlresult['Mb'] >= Mb: - locPixel=-1 + if i == 0 and qtlresult['Mb'] >= Mb: + locPixel = -1 break - #the trait's position is between two traits - if i > 0 and self.qtlresults[i-1]['Mb'] < Mb and qtlresult['Mb'] >= Mb: - locPixel = xLeftOffset + plotXScale*(self.qtlresults[i-1]['Mb']+(qtlresult['Mb']-self.qtlresults[i-1]['Mb'])*(Mb - self.qtlresults[i-1]['Mb'])/(qtlresult['Mb']-self.qtlresults[i-1]['Mb'])) + # the trait's position is between two traits + if i > 0 and self.qtlresults[i - 1]['Mb'] < Mb and qtlresult['Mb'] >= Mb: + locPixel = xLeftOffset + plotXScale * (self.qtlresults[i - 1]['Mb'] + (qtlresult['Mb'] - self.qtlresults[i - 1]['Mb']) * ( + Mb - self.qtlresults[i - 1]['Mb']) / (qtlresult['Mb'] - self.qtlresults[i - 1]['Mb'])) break - #the trait's position is on the right of the last genotype - if i==len(self.qtlresults) and Mb>=qtlresult['Mb']: + # the trait's position is on the right of the last genotype + if i == len(self.qtlresults) and Mb >= qtlresult['Mb']: locPixel = -1 else: locPixel = xLeftOffset for i, _chr in enumerate(self.ChrList): - if i < (len(self.ChrList)-1): + if i < (len(self.ChrList) - 1): if _chr != Chr: - locPixel += (self.ChrLengthDistList[i] + self.GraphInterval)*plotXScale + locPixel += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale else: - locPixel += (Mb*(_chr[-1].cM-_chr[0].cM)/self.ChrLengthCMList[i])*plotXScale + locPixel += (Mb * (_chr[-1].cM - _chr[0].cM) / \ + self.ChrLengthCMList[i]) * plotXScale break if locPixel >= 0 and self.plotScale == 'physic': - traitPixel = ((locPixel, yZero), (locPixel-7, yZero+14), (locPixel+7, yZero+14)) + traitPixel = ((locPixel, yZero), (locPixel - 7, + yZero + 14), (locPixel + 7, yZero + 14)) draw_open_polygon(canvas, xy=traitPixel, outline=BLACK, fill=self.TRANSCRIPT_LOCATION_COLOR) - def drawSNPTrackNew(self, canvas, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawSNPTrackNew(self, canvas, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) if self.plotScale != 'physic' or self.selectedChr == -1 or not self.diffCol: return @@ -963,36 +1006,37 @@ class DisplayMappingResults: #chrName = self.genotype[0].name chrName = self.ChrList[self.selectedChr][0] - stepMb = (endMb-startMb)/plotWidth + stepMb = (endMb - startMb) / plotWidth strainId1, strainId2 = self.diffCol SNPCounts = [] - while startMb<endMb: + while startMb < endMb: snp_count = g.db.execute(""" select count(*) from BXDSnpPosition where Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND StrainId1 = %d AND StrainId2 = %d - """ % (chrName, startMb, startMb+stepMb, strainId1, strainId2)).fetchone()[0] + """ % (chrName, startMb, startMb + stepMb, strainId1, strainId2)).fetchone()[0] SNPCounts.append(snp_count) startMb += stepMb if (len(SNPCounts) > 0): maxCount = max(SNPCounts) - if maxCount>0: + if maxCount > 0: for i in range(xLeftOffset, xLeftOffset + plotWidth): - snpDensity = float(SNPCounts[i-xLeftOffset]*SNP_HEIGHT_MODIFIER/maxCount) + snpDensity = float( + SNPCounts[i - xLeftOffset] * SNP_HEIGHT_MODIFIER / maxCount) im_drawer.line( - xy=((i, drawSNPLocationY+(snpDensity)*zoom), - (i, drawSNPLocationY-(snpDensity)*zoom)), + xy=((i, drawSNPLocationY + (snpDensity) * zoom), + (i, drawSNPLocationY - (snpDensity) * zoom)), fill=self.SNP_COLOR, width=1) - def drawMultiTraitName(self, fd, canvas, gifmap, showLocusForm, offset= (40, 120, 80, 10), zoom = 1): + def drawMultiTraitName(self, fd, canvas, gifmap, showLocusForm, offset=(40, 120, 80, 10), zoom=1): nameWidths = [] yPaddingTop = 10 - colorFont=ImageFont.truetype(font=TREBUC_FILE, size=12) - if len(self.qtlresults) >20 and self.selectedChr > -1: + colorFont = ImageFont.truetype(font=TREBUC_FILE, size=12) + if len(self.qtlresults) > 20 and self.selectedChr > -1: rightShift = 20 rightShiftStep = 60 rectWidth = 10 @@ -1004,7 +1048,7 @@ class DisplayMappingResults: for k, thisTrait in enumerate(self.traitList): thisLRSColor = self.colorCollection[k] kstep = k % 4 - if k!=0 and kstep==0: + if k != 0 and kstep == 0: if nameWidths: rightShiftStep = max(nameWidths[-4:]) + rectWidth + 20 rightShift += rightShiftStep @@ -1014,19 +1058,23 @@ class DisplayMappingResults: nameWidths.append(nameWidth) im_drawer.rectangle( - xy=((rightShift, yPaddingTop+kstep*15), - (rectWidth+rightShift, yPaddingTop+10+kstep*15)), + xy=((rightShift, yPaddingTop + kstep * 15), + (rectWidth + rightShift, yPaddingTop + 10 + kstep * 15)), fill=thisLRSColor, outline=BLACK) im_drawer.text( - text=name, xy=(rectWidth+2+rightShift, yPaddingTop+10+kstep*15), + text=name, xy=(rectWidth + 2 + rightShift, + yPaddingTop + 10 + kstep * 15), font=colorFont, fill=BLACK) if thisTrait.db: - COORDS = "%d,%d,%d,%d" %(rectWidth+2+rightShift, yPaddingTop+kstep*15, rectWidth+2+rightShift+nameWidth, yPaddingTop+10+kstep*15,) - HREF= "javascript:showDatabase3('%s','%s','%s','');" % (showLocusForm, thisTrait.db.name, thisTrait.name) - Areas = HtmlGenWrapper.create_area_tag(shape='rect', coords=COORDS, href=HREF) - gifmap.append(Areas) ### TODO + COORDS = "%d,%d,%d,%d" % (rectWidth + 2 + rightShift, yPaddingTop + kstep * \ + 15, rectWidth + 2 + rightShift + nameWidth, yPaddingTop + 10 + kstep * 15,) + HREF = "javascript:showDatabase3('%s','%s','%s','');" % ( + showLocusForm, thisTrait.db.name, thisTrait.name) + Areas = HtmlGenWrapper.create_area_tag( + shape='rect', coords=COORDS, href=HREF) + gifmap.append(Areas) # TODO - def drawLegendPanel(self, canvas, offset= (40, 120, 80, 10), zoom = 1): + def drawLegendPanel(self, canvas, offset=(40, 120, 80, 10), zoom=1): im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset @@ -1037,80 +1085,82 @@ class DisplayMappingResults: if zoom == 2: fontZoom = 1.5 - labelFont=ImageFont.truetype(font=TREBUC_FILE, size=12*fontZoom) + labelFont = ImageFont.truetype(font=TREBUC_FILE, size=12 * fontZoom) startPosY = 15 - stepPosY = 12*fontZoom + stepPosY = 12 * fontZoom startPosX = canvas.size[0] - xRightOffset - 415 if hasattr(self.traitList[0], 'chr') and hasattr(self.traitList[0], 'mb'): startPosY = 15 nCol = 2 - smallLabelFont = ImageFont.truetype(font=TREBUC_FILE, size=12*fontZoom) + smallLabelFont = ImageFont.truetype( + font=TREBUC_FILE, size=12 * fontZoom) leftOffset = canvas.size[0] - xRightOffset - 190 draw_open_polygon( canvas, xy=( - (leftOffset + 6, startPosY-7), - (leftOffset - 1, startPosY+7), - (leftOffset + 13, startPosY+7)), + (leftOffset + 6, startPosY - 7), + (leftOffset - 1, startPosY + 7), + (leftOffset + 13, startPosY + 7)), outline=BLACK, fill=self.TRANSCRIPT_LOCATION_COLOR ) TEXT_Y_DISPLACEMENT = -8 im_drawer.text( text="Sequence Site", - xy=(leftOffset + 20, startPosY+TEXT_Y_DISPLACEMENT), font=smallLabelFont, + xy=(leftOffset + 20, startPosY + TEXT_Y_DISPLACEMENT), font=smallLabelFont, fill=self.TOP_RIGHT_INFO_COLOR) if self.manhattan_plot != True: im_drawer.line( - xy=((startPosX, startPosY), (startPosX+32, startPosY)), + xy=((startPosX, startPosY), (startPosX + 32, startPosY)), fill=self.LRS_COLOR, width=2) im_drawer.text( - text=self.LRS_LOD, xy=(startPosX+40, startPosY+TEXT_Y_DISPLACEMENT), + text=self.LRS_LOD, xy=( + startPosX + 40, startPosY + TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) startPosY += stepPosY if self.additiveChecked: im_drawer.line( - xy=((startPosX, startPosY), (startPosX+17, startPosY)), + xy=((startPosX, startPosY), (startPosX + 17, startPosY)), fill=self.ADDITIVE_COLOR_POSITIVE, width=2) im_drawer.line( - xy=((startPosX+18, startPosY), (startPosX+32, startPosY)), + xy=((startPosX + 18, startPosY), (startPosX + 32, startPosY)), fill=self.ADDITIVE_COLOR_NEGATIVE, width=2) im_drawer.text( - text='Additive Effect', xy=(startPosX+40, startPosY+TEXT_Y_DISPLACEMENT), + text='Additive Effect', xy=(startPosX + 40, startPosY + TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) startPosY += stepPosY if self.genotype.type == 'intercross' and self.dominanceChecked: im_drawer.line( - xy=((startPosX, startPosY), (startPosX+17, startPosY)), + xy=((startPosX, startPosY), (startPosX + 17, startPosY)), fill=self.DOMINANCE_COLOR_POSITIVE, width=4) im_drawer.line( - xy=((startPosX+18, startPosY), (startPosX+35, startPosY)), + xy=((startPosX + 18, startPosY), (startPosX + 35, startPosY)), fill=self.DOMINANCE_COLOR_NEGATIVE, width=4) im_drawer.text( - text='Dominance Effect', xy=(startPosX+42, startPosY+5), + text='Dominance Effect', xy=(startPosX + 42, startPosY + 5), font=labelFont, fill=BLACK) startPosY += stepPosY if self.haplotypeAnalystChecked: im_drawer.line( - xy=((startPosX-34, startPosY), (startPosX-17, startPosY)), + xy=((startPosX - 34, startPosY), (startPosX - 17, startPosY)), fill=self.HAPLOTYPE_POSITIVE, width=4) im_drawer.line( - xy=((startPosX-17, startPosY), (startPosX, startPosY)), + xy=((startPosX - 17, startPosY), (startPosX, startPosY)), fill=self.HAPLOTYPE_NEGATIVE, width=4) im_drawer.line( - xy=((startPosX, startPosY), (startPosX+17, startPosY)), + xy=((startPosX, startPosY), (startPosX + 17, startPosY)), fill=self.HAPLOTYPE_HETEROZYGOUS, width=4) im_drawer.line( - xy=((startPosX+17, startPosY), (startPosX+34, startPosY)), + xy=((startPosX + 17, startPosY), (startPosX + 34, startPosY)), fill=self.HAPLOTYPE_RECOMBINATION, width=4) im_drawer.text( text='Haplotypes (Pat, Mat, Het, Unk)', - xy=(startPosX+41, startPosY+TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) + xy=(startPosX + 41, startPosY + TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) startPosY += stepPosY if self.permChecked and self.nperm > 0: @@ -1118,26 +1168,29 @@ class DisplayMappingResults: if self.multipleInterval and not self.bootChecked: thisStartX = canvas.size[0] - xRightOffset - 205 im_drawer.line( - xy=((thisStartX, startPosY), ( startPosX + 32, startPosY)), + xy=((thisStartX, startPosY), (startPosX + 32, startPosY)), fill=self.SIGNIFICANT_COLOR, width=self.SIGNIFICANT_WIDTH) im_drawer.line( - xy=((thisStartX, startPosY + stepPosY), ( startPosX + 32, startPosY + stepPosY)), + xy=((thisStartX, startPosY + stepPosY), + (startPosX + 32, startPosY + stepPosY)), fill=self.SUGGESTIVE_COLOR, width=self.SUGGESTIVE_WIDTH) im_drawer.text( - text='Significant %s = %2.2f' % (self.LRS_LOD, self.significant), - xy=(thisStartX+40, startPosY+TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) + text='Significant %s = %2.2f' % ( + self.LRS_LOD, self.significant), + xy=(thisStartX + 40, startPosY + TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) im_drawer.text( text='Suggestive %s = %2.2f' % (self.LRS_LOD, self.suggestive), - xy=(thisStartX+40, startPosY + TEXT_Y_DISPLACEMENT +stepPosY), font=labelFont, + xy=(thisStartX + 40, startPosY + TEXT_Y_DISPLACEMENT + stepPosY), font=labelFont, fill=BLACK) - labelFont = ImageFont.truetype(font=VERDANA_FILE, size=12*fontZoom) + labelFont = ImageFont.truetype(font=VERDANA_FILE, size=12 * fontZoom) labelColor = BLACK if self.dataset.type == "Publish" or self.dataset.type == "Geno": dataset_label = self.dataset.fullname else: - dataset_label = "%s - %s" % (self.dataset.group.name, self.dataset.fullname) + dataset_label = "%s - %s" % (self.dataset.group.name, + self.dataset.fullname) string1 = 'Dataset: %s' % (dataset_label) @@ -1154,7 +1207,8 @@ class DisplayMappingResults: string3 = 'Using GEMMA mapping method with ' if self.covariates != "": string3 += 'the cofactors below:' - cofactor_names = ", ".join([covar.split(":")[0] for covar in self.covariates.split(",")]) + cofactor_names = ", ".join( + [covar.split(":")[0] for covar in self.covariates.split(",")]) string4 = cofactor_names else: string3 += 'no cofactors' @@ -1162,7 +1216,8 @@ class DisplayMappingResults: string3 = 'Using R/qtl mapping method with ' if self.covariates != "": string3 += 'the cofactors below:' - cofactor_names = ", ".join([covar.split(":")[0] for covar in self.covariates.split(",")]) + cofactor_names = ", ".join( + [covar.split(":")[0] for covar in self.covariates.split(",")]) string4 = cofactor_names elif self.controlLocus and self.doControl != "false": string3 += '%s as control' % self.controlLocus @@ -1180,32 +1235,36 @@ class DisplayMappingResults: if self.selectedChr == -1: identification = "Mapping on All Chromosomes for " else: - identification = "Mapping on Chromosome %s for " % (self.ChrList[self.selectedChr][0]) + identification = "Mapping on Chromosome %s for " % ( + self.ChrList[self.selectedChr][0]) if self.this_trait.symbol: - identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.symbol) + identification += "Trait: %s - %s" % ( + self.this_trait.name, self.this_trait.symbol) elif self.dataset.type == "Publish": if self.this_trait.post_publication_abbreviation: - identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.post_publication_abbreviation) + identification += "Trait: %s - %s" % ( + self.this_trait.name, self.this_trait.post_publication_abbreviation) elif self.this_trait.pre_publication_abbreviation: - identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.pre_publication_abbreviation) + identification += "Trait: %s - %s" % ( + self.this_trait.name, self.this_trait.pre_publication_abbreviation) else: identification += "Trait: %s" % (self.this_trait.name) else: identification += "Trait: %s" % (self.this_trait.name) identification += " with %s samples" % (self.n_samples) - d = 4+ max( + d = 4 + max( im_drawer.textsize(identification, font=labelFont)[0], im_drawer.textsize(string1, font=labelFont)[0], im_drawer.textsize(string2, font=labelFont)[0]) im_drawer.text( text=identification, - xy=(xLeftOffset, y_constant*fontZoom), font=labelFont, + xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) y_constant += 15 else: - d = 4+ max( + d = 4 + max( im_drawer.textsize(string1, font=labelFont)[0], im_drawer.textsize(string2, font=labelFont)[0]) @@ -1223,28 +1282,28 @@ class DisplayMappingResults: transform_text += "Invert +/-" im_drawer.text( - text=transform_text, xy=(xLeftOffset, y_constant*fontZoom), + text=transform_text, xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) y_constant += 15 im_drawer.text( - text=string1, xy=(xLeftOffset, y_constant*fontZoom), + text=string1, xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) y_constant += 15 im_drawer.text( - text=string2, xy=(xLeftOffset, y_constant*fontZoom), + text=string2, xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) y_constant += 15 if string3 != '': im_drawer.text( - text=string3, xy=(xLeftOffset, y_constant*fontZoom), + text=string3, xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) y_constant += 15 if string4 != '': im_drawer.text( - text=string4, xy=(xLeftOffset, y_constant*fontZoom), + text=string4, xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) - def drawGeneBand(self, canvas, gifmap, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawGeneBand(self, canvas, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) if self.plotScale != 'physic' or self.selectedChr == -1 or not self.geneCol: return @@ -1264,11 +1323,12 @@ class DisplayMappingResults: if self.dataset.group.species == "mouse": txStart = theGO["TxStart"] txEnd = theGO["TxEnd"] - geneLength = (txEnd - txStart)*1000.0 - tenPercentLength = geneLength*0.0001 - SNPdensity = theGO["snpCount"]/geneLength + geneLength = (txEnd - txStart) * 1000.0 + tenPercentLength = geneLength * 0.0001 + SNPdensity = theGO["snpCount"] / geneLength - exonStarts = list(map(float, theGO['exonStarts'].split(",")[:-1])) + exonStarts = list( + map(float, theGO['exonStarts'].split(",")[:-1])) exonEnds = list(map(float, theGO['exonEnds'].split(",")[:-1])) cdsStart = theGO['cdsStart'] cdsEnd = theGO['cdsEnd'] @@ -1277,23 +1337,26 @@ class DisplayMappingResults: strand = theGO["Strand"] exonCount = theGO["exonCount"] - geneStartPix = xLeftOffset + plotXScale*(float(txStart) - startMb) - geneEndPix = xLeftOffset + plotXScale*(float(txEnd) - startMb) #at least one pixel + geneStartPix = xLeftOffset + \ + plotXScale * (float(txStart) - startMb) + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) # at least one pixel if (geneEndPix < xLeftOffset): - return; # this gene is not on the screen + return # this gene is not on the screen elif (geneEndPix > xLeftOffset + plotWidth): - geneEndPix = xLeftOffset + plotWidth; # clip the last in-range gene + geneEndPix = xLeftOffset + plotWidth # clip the last in-range gene if (geneStartPix > xLeftOffset + plotWidth): - return; # we are outside the valid on-screen range, so stop drawing genes + return # we are outside the valid on-screen range, so stop drawing genes elif (geneStartPix < xLeftOffset): - geneStartPix = xLeftOffset; # clip the first in-range gene + geneStartPix = xLeftOffset # clip the first in-range gene - #color the gene based on SNP density - #found earlier, needs to be recomputed as snps are added - #always apply colors now, even if SNP Track not checked - Zach 11/24/2010 + # color the gene based on SNP density + # found earlier, needs to be recomputed as snps are added + # always apply colors now, even if SNP Track not checked - Zach 11/24/2010 - densities=[1.0000000000000001e-05, 0.094094033555233408, 0.3306166377816987, 0.88246026851027781, 2.6690084029581951, 4.1, 61.0] + densities = [1.0000000000000001e-05, 0.094094033555233408, + 0.3306166377816987, 0.88246026851027781, 2.6690084029581951, 4.1, 61.0] if SNPdensity < densities[0]: myColor = BLACK elif SNPdensity < densities[1]: @@ -1310,11 +1373,12 @@ class DisplayMappingResults: myColor = DARKRED outlineColor = myColor - fillColor = myColor + fillColor = myColor - TITLE = "Gene: %s (%s)\nFrom %2.3f to %2.3f Mb (%s)\nNum. exons: %d." % (geneSymbol, accession, float(txStart), float(txEnd), strand, exonCount) + TITLE = "Gene: %s (%s)\nFrom %2.3f to %2.3f Mb (%s)\nNum. exons: %d." % ( + geneSymbol, accession, float(txStart), float(txEnd), strand, exonCount) # NL: 06-02-2011 Rob required to change this link for gene related - HREF=geneNCBILink %geneSymbol + HREF = geneNCBILink % geneSymbol elif self.dataset.group.species == "rat": exonStarts = [] @@ -1327,85 +1391,92 @@ class DisplayMappingResults: strand = theGO["Strand"] exonCount = 0 - geneStartPix = xLeftOffset + plotXScale*(float(txStart) - startMb) - geneEndPix = xLeftOffset + plotXScale*(float(txEnd) - startMb) #at least one pixel + geneStartPix = xLeftOffset + \ + plotXScale * (float(txStart) - startMb) + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) # at least one pixel if (geneEndPix < xLeftOffset): - return; # this gene is not on the screen + return # this gene is not on the screen elif (geneEndPix > xLeftOffset + plotWidth): - geneEndPix = xLeftOffset + plotWidth; # clip the last in-range gene + geneEndPix = xLeftOffset + plotWidth # clip the last in-range gene if (geneStartPix > xLeftOffset + plotWidth): - return; # we are outside the valid on-screen range, so stop drawing genes + return # we are outside the valid on-screen range, so stop drawing genes elif (geneStartPix < xLeftOffset): - geneStartPix = xLeftOffset; # clip the first in-range gene + geneStartPix = xLeftOffset # clip the first in-range gene outlineColor = DARKBLUE fillColor = DARKBLUE - TITLE = "Gene: %s\nFrom %2.3f to %2.3f Mb (%s)" % (geneSymbol, float(txStart), float(txEnd), strand) + TITLE = "Gene: %s\nFrom %2.3f to %2.3f Mb (%s)" % ( + geneSymbol, float(txStart), float(txEnd), strand) # NL: 06-02-2011 Rob required to change this link for gene related - HREF=geneNCBILink %geneSymbol + HREF = geneNCBILink % geneSymbol else: outlineColor = ORANGE fillColor = ORANGE TITLE = "Gene: %s" % geneSymbol - #Draw Genes - geneYLocation = yPaddingTop + (gIndex % self.NUM_GENE_ROWS) * self.EACH_GENE_HEIGHT*zoom + # Draw Genes + geneYLocation = yPaddingTop + \ + (gIndex % self.NUM_GENE_ROWS) * self.EACH_GENE_HEIGHT * zoom if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - geneYLocation += 4*self.BAND_HEIGHT + 4*self.BAND_SPACING + geneYLocation += 4 * self.BAND_HEIGHT + 4 * self.BAND_SPACING else: - geneYLocation += 3*self.BAND_HEIGHT + 3*self.BAND_SPACING + geneYLocation += 3 * self.BAND_HEIGHT + 3 * self.BAND_SPACING - #draw the detail view + # draw the detail view if self.endMb - self.startMb <= self.DRAW_DETAIL_MB and geneEndPix - geneStartPix > self.EACH_GENE_ARROW_SPACING * 3: utrColor = ImageColor.getrgb("rgb(66%, 66%, 66%)") arrowColor = ImageColor.getrgb("rgb(70%, 70%, 70%)") - #draw the line that runs the entire length of the gene + # draw the line that runs the entire length of the gene im_drawer.line( xy=( - (geneStartPix, geneYLocation + self.EACH_GENE_HEIGHT/2*zoom), - ( geneEndPix, geneYLocation + self.EACH_GENE_HEIGHT/2*zoom)), + (geneStartPix, geneYLocation + \ + self.EACH_GENE_HEIGHT / 2 * zoom), + (geneEndPix, geneYLocation + self.EACH_GENE_HEIGHT / 2 * zoom)), fill=outlineColor, width=1) - #draw the arrows + # draw the arrows if geneEndPix - geneStartPix < 1: genePixRange = 1 else: genePixRange = int(geneEndPix - geneStartPix) for xCoord in range(0, genePixRange): - if (xCoord % self.EACH_GENE_ARROW_SPACING == 0 and xCoord + self.EACH_GENE_ARROW_SPACING < geneEndPix-geneStartPix) or xCoord == 0: + if (xCoord % self.EACH_GENE_ARROW_SPACING == 0 and xCoord + self.EACH_GENE_ARROW_SPACING < geneEndPix - geneStartPix) or xCoord == 0: if strand == "+": im_drawer.line( xy=((geneStartPix + xCoord, geneYLocation), (geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, - geneYLocation +(self.EACH_GENE_HEIGHT / 2)*zoom)), + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), fill=arrowColor, width=1) im_drawer.line( xy=((geneStartPix + xCoord, - geneYLocation + self.EACH_GENE_HEIGHT*zoom), - (geneStartPix + xCoord+self.EACH_GENE_ARROW_WIDTH, + geneYLocation + self.EACH_GENE_HEIGHT * zoom), + (geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), fill=arrowColor, width=1) else: im_drawer.line( xy=((geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, geneYLocation), - ( geneStartPix + xCoord, - geneYLocation +(self.EACH_GENE_HEIGHT / 2)*zoom)), + (geneStartPix + xCoord, + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), fill=arrowColor, width=1) im_drawer.line( xy=((geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, - geneYLocation + self.EACH_GENE_HEIGHT*zoom), - ( geneStartPix + xCoord, - geneYLocation + (self.EACH_GENE_HEIGHT / 2)*zoom)), + geneYLocation + self.EACH_GENE_HEIGHT * zoom), + (geneStartPix + xCoord, + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), fill=arrowColor, width=1) - #draw the blocks for the exon regions + # draw the blocks for the exon regions for i in range(0, len(exonStarts)): - exonStartPix = (exonStarts[i]-startMb)*plotXScale + xLeftOffset - exonEndPix = (exonEnds[i]-startMb)*plotXScale + xLeftOffset + exonStartPix = ( + exonStarts[i] - startMb) * plotXScale + xLeftOffset + exonEndPix = (exonEnds[i] - startMb) * \ + plotXScale + xLeftOffset if (exonStartPix < xLeftOffset): exonStartPix = xLeftOffset if (exonEndPix < xLeftOffset): @@ -1416,13 +1487,14 @@ class DisplayMappingResults: exonStartPix = xLeftOffset + plotWidth im_drawer.rectangle( xy=((exonStartPix, geneYLocation), - (exonEndPix, (geneYLocation + self.EACH_GENE_HEIGHT*zoom))), - outline = outlineColor, fill = fillColor) + (exonEndPix, (geneYLocation + self.EACH_GENE_HEIGHT * zoom))), + outline=outlineColor, fill=fillColor) - #draw gray blocks for 3' and 5' UTR blocks + # draw gray blocks for 3' and 5' UTR blocks if cdsStart and cdsEnd: - utrStartPix = (txStart-startMb)*plotXScale + xLeftOffset - utrEndPix = (cdsStart-startMb)*plotXScale + xLeftOffset + utrStartPix = (txStart - startMb) * \ + plotXScale + xLeftOffset + utrEndPix = (cdsStart - startMb) * plotXScale + xLeftOffset if (utrStartPix < xLeftOffset): utrStartPix = xLeftOffset if (utrEndPix < xLeftOffset): @@ -1439,13 +1511,14 @@ class DisplayMappingResults: labelText = "5'" im_drawer.text( text=labelText, - xy=(utrStartPix-9, geneYLocation+self.EACH_GENE_HEIGHT), + xy=(utrStartPix - 9, geneYLocation + \ + self.EACH_GENE_HEIGHT), font=ImageFont.truetype(font=ARIAL_FILE, size=2)) - #the second UTR region + # the second UTR region - utrStartPix = (cdsEnd-startMb)*plotXScale + xLeftOffset - utrEndPix = (txEnd-startMb)*plotXScale + xLeftOffset + utrStartPix = (cdsEnd - startMb) * plotXScale + xLeftOffset + utrEndPix = (txEnd - startMb) * plotXScale + xLeftOffset if (utrStartPix < xLeftOffset): utrStartPix = xLeftOffset if (utrEndPix < xLeftOffset): @@ -1462,17 +1535,19 @@ class DisplayMappingResults: labelText = "3'" im_drawer.text( text=labelText, - xy=(utrEndPix+2, geneYLocation+self.EACH_GENE_HEIGHT), + xy=(utrEndPix + 2, geneYLocation + \ + self.EACH_GENE_HEIGHT), font=ImageFont.truetype(font=ARIAL_FILE, size=2)) - #draw the genes as rectangles + # draw the genes as rectangles else: im_drawer.rectangle( xy=((geneStartPix, geneYLocation), - (geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT*zoom))), - outline= outlineColor, fill = fillColor) + (geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT * zoom))), + outline=outlineColor, fill=fillColor) - COORDS = "%d, %d, %d, %d" %(geneStartPix, geneYLocation, geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT)) + COORDS = "%d, %d, %d, %d" % ( + geneStartPix, geneYLocation, geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT)) # NL: 06-02-2011 Rob required to display NCBI info in a new window gifmap.append( HtmlGenWrapper.create_area_tag( @@ -1482,8 +1557,8 @@ class DisplayMappingResults: title=TITLE, target="_blank")) -## BEGIN HaplotypeAnalyst - def drawHaplotypeBand(self, canvas, gifmap, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): +# BEGIN HaplotypeAnalyst + def drawHaplotypeBand(self, canvas, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): if self.plotScale != 'physic' or self.selectedChr == -1 or not self.geneCol: return @@ -1496,61 +1571,66 @@ class DisplayMappingResults: samplelist = list(self.genotype.prgy) - smd=[] + smd = [] for sample in self.sample_vals_dict.keys(): if self.sample_vals_dict[sample] != "x" and sample in samplelist: - temp = GeneralObject(name=sample, value=float(self.sample_vals_dict[sample])) + temp = GeneralObject(name=sample, value=float( + self.sample_vals_dict[sample])) smd.append(temp) else: continue - smd.sort(key = lambda A: A.value) + smd.sort(key=lambda A: A.value) smd.reverse() oldgeneEndPix = -1 - #Initializing plotRight, error before + # Initializing plotRight, error before plotRight = xRightOffset im_drawer = ImageDraw.Draw(canvas) -#### find out PlotRight +# find out PlotRight for _chr in self.genotype: if _chr.name == self.ChrList[self.selectedChr][0]: for i, _locus in enumerate(_chr): txStart = _chr[i].Mb - txEnd = _chr[i].Mb + txEnd = _chr[i].Mb - geneStartPix = xLeftOffset + plotXScale*(float(txStart) - startMb) - 0 - geneEndPix = xLeftOffset + plotXScale*(float(txEnd) - startMb) - 0 + geneStartPix = xLeftOffset + plotXScale * \ + (float(txStart) - startMb) - 0 + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) - 0 drawit = 1 if (geneStartPix < xLeftOffset): - drawit = 0; + drawit = 0 if (geneStartPix > xLeftOffset + plotWidth): - drawit = 0; + drawit = 0 if drawit == 1: - if _chr[i].name != " - " : + if _chr[i].name != " - ": plotRight = geneEndPix + 4 -#### end find out PlotRight +# end find out PlotRight firstGene = 1 lastGene = 0 - #Sets the length to the length of the strain list. Beforehand, "oldgeno = self.genotype[0][i].genotype" - #was the only place it was initialized, which worked as long as the very start (startMb = None/0) wasn't being mapped. - #Now there should always be some value set for "oldgeno" - Zach 12/14/2010 - oldgeno = [None]*len(self.strainlist) + # Sets the length to the length of the strain list. Beforehand, "oldgeno = self.genotype[0][i].genotype" + # was the only place it was initialized, which worked as long as the very start (startMb = None/0) wasn't being mapped. + # Now there should always be some value set for "oldgeno" - Zach 12/14/2010 + oldgeno = [None] * len(self.strainlist) for i, _chr in enumerate(self.genotype): if _chr.name == self.ChrList[self.selectedChr][0]: for j, _locus in enumerate(_chr): txStart = _chr[j].Mb - txEnd = _chr[j].Mb + txEnd = _chr[j].Mb - geneStartPix = xLeftOffset + plotXScale*(float(txStart) - startMb) - 0 - geneEndPix = xLeftOffset + plotXScale*(float(txEnd) - startMb) + 0 + geneStartPix = xLeftOffset + plotXScale * \ + (float(txStart) - startMb) - 0 + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) + 0 if oldgeneEndPix >= xLeftOffset: drawStart = oldgeneEndPix + 4 @@ -1582,36 +1662,38 @@ class DisplayMappingResults: if drawit == 1: myColor = DARKBLUE outlineColor = myColor - fillColor = myColor + fillColor = myColor - maxind=0 + maxind = 0 - #Draw Genes + # Draw Genes - geneYLocation = yPaddingTop + self.NUM_GENE_ROWS * (self.EACH_GENE_HEIGHT)*zoom + geneYLocation = yPaddingTop + self.NUM_GENE_ROWS * \ + (self.EACH_GENE_HEIGHT) * zoom if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - geneYLocation += 4*self.BAND_HEIGHT + 4*self.BAND_SPACING + geneYLocation += 4 * self.BAND_HEIGHT + 4 * self.BAND_SPACING else: - geneYLocation += 3*self.BAND_HEIGHT + 3*self.BAND_SPACING + geneYLocation += 3 * self.BAND_HEIGHT + 3 * self.BAND_SPACING - if _chr[j].name != " - " : + if _chr[j].name != " - ": if (firstGene == 1) and (lastGene != 1): oldgeneEndPix = drawStart = xLeftOffset oldgeno = _chr[j].genotype continue - for k, _geno in enumerate (_chr[j].genotype): - plotbxd=0 + for k, _geno in enumerate(_chr[j].genotype): + plotbxd = 0 if samplelist[k] in [item.name for item in smd]: - plotbxd=1 + plotbxd = 1 if (plotbxd == 1): ind = 0 if samplelist[k] in [item.name for item in smd]: - ind = [item.name for item in smd].index(samplelist[k]) + ind = [item.name for item in smd].index( + samplelist[k]) - maxind=max(ind, maxind) + maxind = max(ind, maxind) # lines if (oldgeno[k] == -1 and _geno == -1): @@ -1621,28 +1703,29 @@ class DisplayMappingResults: elif (oldgeno[k] == 0 and _geno == 0): mylineColor = self.HAPLOTYPE_HETEROZYGOUS else: - mylineColor = self.HAPLOTYPE_RECOMBINATION # XZ: Unknown + mylineColor = self.HAPLOTYPE_RECOMBINATION # XZ: Unknown im_drawer.line( xy=((drawStart, - geneYLocation+7+2*ind*self.EACH_GENE_HEIGHT*zoom), + geneYLocation + 7 + 2 * ind * self.EACH_GENE_HEIGHT * zoom), (drawEnd, - geneYLocation+7+2*ind*self.EACH_GENE_HEIGHT*zoom)), - fill= mylineColor, width=zoom*(self.EACH_GENE_HEIGHT+2)) + geneYLocation + 7 + 2 * ind * self.EACH_GENE_HEIGHT * zoom)), + fill=mylineColor, width=zoom * (self.EACH_GENE_HEIGHT + 2)) - fillColor=BLACK - outlineColor=BLACK + fillColor = BLACK + outlineColor = BLACK if lastGene == 0: im_drawer.rectangle( xy=((geneStartPix, - geneYLocation+2*ind*self.EACH_GENE_HEIGHT*zoom), + geneYLocation + 2 * ind * self.EACH_GENE_HEIGHT * zoom), (geneEndPix, - geneYLocation+2*ind*self.EACH_GENE_HEIGHT+ 2*self.EACH_GENE_HEIGHT*zoom)), + geneYLocation + 2 * ind * self.EACH_GENE_HEIGHT + 2 * self.EACH_GENE_HEIGHT * zoom)), outline=outlineColor, fill=fillColor) - - COORDS = "%d, %d, %d, %d" %(geneStartPix, geneYLocation+ind*self.EACH_GENE_HEIGHT, geneEndPix+1, (geneYLocation + ind*self.EACH_GENE_HEIGHT)) - TITLE = "Strain: %s, marker (%s) \n Position %2.3f Mb." % (samplelist[k], _chr[j].name, float(txStart)) + COORDS = "%d, %d, %d, %d" % ( + geneStartPix, geneYLocation + ind * self.EACH_GENE_HEIGHT, geneEndPix + 1, (geneYLocation + ind * self.EACH_GENE_HEIGHT)) + TITLE = "Strain: %s, marker (%s) \n Position %2.3f Mb." % ( + samplelist[k], _chr[j].name, float(txStart)) HREF = '' gifmap.append( HtmlGenWrapper.create_area_tag( @@ -1658,11 +1741,10 @@ class DisplayMappingResults: mylineColor = self.HAPLOTYPE_RECOMBINATION im_drawer.line( xy=((plotRight, - geneYLocation+7+2*ind*self.EACH_GENE_HEIGHT*zoom), + geneYLocation + 7 + 2 * ind * self.EACH_GENE_HEIGHT * zoom), (drawEnd, - geneYLocation+7+2*ind*self.EACH_GENE_HEIGHT*zoom)), - fill= mylineColor, width=zoom*(self.EACH_GENE_HEIGHT+2)) - + geneYLocation + 7 + 2 * ind * self.EACH_GENE_HEIGHT * zoom)), + fill=mylineColor, width=zoom * (self.EACH_GENE_HEIGHT + 2)) if lastGene == 0: draw_rotated_text( @@ -1670,10 +1752,10 @@ class DisplayMappingResults: font=ImageFont.truetype(font=VERDANA_FILE, size=12), xy=(geneStartPix, - geneYLocation+17+2*maxind*self.EACH_GENE_HEIGHT*zoom), + geneYLocation + 17 + 2 * maxind * self.EACH_GENE_HEIGHT * zoom), fill=BLACK, angle=-90) - oldgeneEndPix = geneEndPix; + oldgeneEndPix = geneEndPix oldgeno = _chr[j].genotype firstGene = 0 else: @@ -1683,31 +1765,34 @@ class DisplayMappingResults: if _chr.name == self.ChrList[self.selectedChr][0]: for j, _geno in enumerate(_chr[1].genotype): - plotbxd=0 + plotbxd = 0 if samplelist[j] in [item.name for item in smd]: - plotbxd=1 + plotbxd = 1 if (plotbxd == 1): - ind = [item.name for item in smd].index(samplelist[j]) - 1 + ind = [item.name for item in smd].index( + samplelist[j]) - 1 expr = smd[ind].value # Place where font is hardcoded im_drawer.text( text="%s" % (samplelist[j]), xy=((xLeftOffset + plotWidth + 10), - geneYLocation+11+2*ind*self.EACH_GENE_HEIGHT*zoom), - font=ImageFont.truetype(font=VERDANA_FILE, size=12), + geneYLocation + 11 + 2 * ind * self.EACH_GENE_HEIGHT * zoom), + font=ImageFont.truetype( + font=VERDANA_FILE, size=12), fill=BLACK) im_drawer.text( text="%2.2f" % (expr), xy=((xLeftOffset + plotWidth + 60), - geneYLocation+11+2*ind*self.EACH_GENE_HEIGHT*zoom), - font=ImageFont.truetype(font=VERDANA_FILE, size=12), + geneYLocation + 11 + 2 * ind * self.EACH_GENE_HEIGHT * zoom), + font=ImageFont.truetype( + font=VERDANA_FILE, size=12), fill=BLACK) -## END HaplotypeAnalyst +# END HaplotypeAnalyst - def drawClickBand(self, canvas, gifmap, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawClickBand(self, canvas, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) if self.plotScale != 'physic' or self.selectedChr == -1: return @@ -1724,12 +1809,16 @@ class DisplayMappingResults: # but it makes the HTML huge, and takes forever to render the page in the first place) # Draw the bands that you can click on to go to UCSC / Ensembl MAX_CLICKABLE_REGION_DIVISIONS = 100 - clickableRegionLabelFont=ImageFont.truetype(font=VERDANA_FILE, size=9) - pixelStep = max(5, int(float(plotWidth)/MAX_CLICKABLE_REGION_DIVISIONS)) + clickableRegionLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=9) + pixelStep = max( + 5, int(float(plotWidth) / MAX_CLICKABLE_REGION_DIVISIONS)) # pixelStep: every N pixels, we make a new clickable area for the user to go to that area of the genome. - numBasesCurrentlyOnScreen = self.kONE_MILLION*abs(startMb - endMb) # Number of bases on screen now - flankingWidthInBases = int ( min( (float(numBasesCurrentlyOnScreen) / 2.0), (5*self.kONE_MILLION) ) ) + numBasesCurrentlyOnScreen = self.kONE_MILLION * \ + abs(startMb - endMb) # Number of bases on screen now + flankingWidthInBases = int( + min((float(numBasesCurrentlyOnScreen) / 2.0), (5 * self.kONE_MILLION))) webqtlZoomWidth = numBasesCurrentlyOnScreen / 16.0 # Flanking width should be such that we either zoom in to a 10 million base region, or we show the clicked region at the same scale as we are currently seeing. @@ -1738,23 +1827,33 @@ class DisplayMappingResults: paddingTop = yTopOffset if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - phenogenPaddingTop = paddingTop + (self.BAND_HEIGHT + self.BAND_SPACING) - ucscPaddingTop = paddingTop + 2*(self.BAND_HEIGHT + self.BAND_SPACING) - ensemblPaddingTop = paddingTop + 3*(self.BAND_HEIGHT + self.BAND_SPACING) + phenogenPaddingTop = paddingTop + \ + (self.BAND_HEIGHT + self.BAND_SPACING) + ucscPaddingTop = paddingTop + 2 * \ + (self.BAND_HEIGHT + self.BAND_SPACING) + ensemblPaddingTop = paddingTop + 3 * \ + (self.BAND_HEIGHT + self.BAND_SPACING) else: - ucscPaddingTop = paddingTop + (self.BAND_HEIGHT + self.BAND_SPACING) - ensemblPaddingTop = paddingTop + 2*(self.BAND_HEIGHT + self.BAND_SPACING) + ucscPaddingTop = paddingTop + \ + (self.BAND_HEIGHT + self.BAND_SPACING) + ensemblPaddingTop = paddingTop + 2 * \ + (self.BAND_HEIGHT + self.BAND_SPACING) if zoom == 1: for pixel in range(xLeftOffset, xLeftOffset + plotWidth, pixelStep): - calBase = self.kONE_MILLION*(startMb + (endMb-startMb)*(pixel-xLeftOffset-0.0)/plotWidth) + calBase = self.kONE_MILLION * \ + (startMb + (endMb - startMb) * \ + (pixel - xLeftOffset - 0.0) / plotWidth) xBrowse1 = pixel - xBrowse2 = min(xLeftOffset + plotWidth, (pixel + pixelStep - 1)) + xBrowse2 = min(xLeftOffset + plotWidth, + (pixel + pixelStep - 1)) - WEBQTL_COORDS = "%d, %d, %d, %d" % (xBrowse1, paddingTop, xBrowse2, (paddingTop+self.BAND_HEIGHT)) - WEBQTL_HREF = "javascript:rangeView('%s', %f, %f)" % (self.selectedChr - 1, max(0, (calBase-webqtlZoomWidth))/1000000.0, (calBase+webqtlZoomWidth)/1000000.0) + WEBQTL_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, paddingTop, xBrowse2, (paddingTop + self.BAND_HEIGHT)) + WEBQTL_HREF = "javascript:rangeView('%s', %f, %f)" % (self.selectedChr - 1, max( + 0, (calBase - webqtlZoomWidth)) / 1000000.0, (calBase + webqtlZoomWidth) / 1000000.0) WEBQTL_TITLE = "Click to view this section of the genome in WebQTL" gifmap.append( @@ -1769,15 +1868,19 @@ class DisplayMappingResults: outline=self.CLICKABLE_WEBQTL_REGION_COLOR, fill=self.CLICKABLE_WEBQTL_REGION_COLOR) im_drawer.line( - xy=((xBrowse1, paddingTop), ( xBrowse1, (paddingTop + self.BAND_HEIGHT))), + xy=((xBrowse1, paddingTop), (xBrowse1, + (paddingTop + self.BAND_HEIGHT))), fill=self.CLICKABLE_WEBQTL_REGION_OUTLINE_COLOR) if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - PHENOGEN_COORDS = "%d, %d, %d, %d" % (xBrowse1, phenogenPaddingTop, xBrowse2, (phenogenPaddingTop+self.BAND_HEIGHT)) + PHENOGEN_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, phenogenPaddingTop, xBrowse2, (phenogenPaddingTop + self.BAND_HEIGHT)) if self.dataset.group.species == "mouse": - PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % (self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases) + PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) else: - PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % (self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases) + PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) PHENOGEN_TITLE = "Click to view this section of the genome in PhenoGen" gifmap.append( HtmlGenWrapper.create_area_tag( @@ -1787,18 +1890,22 @@ class DisplayMappingResults: title=PHENOGEN_TITLE)) im_drawer.rectangle( xy=((xBrowse1, phenogenPaddingTop), - (xBrowse2, (phenogenPaddingTop+self.BAND_HEIGHT))), + (xBrowse2, (phenogenPaddingTop + self.BAND_HEIGHT))), outline=self.CLICKABLE_PHENOGEN_REGION_COLOR, fill=self.CLICKABLE_PHENOGEN_REGION_COLOR) im_drawer.line( - xy=((xBrowse1, phenogenPaddingTop), ( xBrowse1, (phenogenPaddingTop+self.BAND_HEIGHT))), + xy=((xBrowse1, phenogenPaddingTop), (xBrowse1, + (phenogenPaddingTop + self.BAND_HEIGHT))), fill=self.CLICKABLE_PHENOGEN_REGION_OUTLINE_COLOR) - UCSC_COORDS = "%d, %d, %d, %d" %(xBrowse1, ucscPaddingTop, xBrowse2, (ucscPaddingTop+self.BAND_HEIGHT)) + UCSC_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, ucscPaddingTop, xBrowse2, (ucscPaddingTop + self.BAND_HEIGHT)) if self.dataset.group.species == "mouse": - UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d&hgt.customText=%s/snp/chr%s" % (self._ucscDb, self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases, webqtlConfig.PORTADDR, self.selectedChr) + UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d&hgt.customText=%s/snp/chr%s" % ( + self._ucscDb, self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases, webqtlConfig.PORTADDR, self.selectedChr) else: - UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d" % (self._ucscDb, self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases) + UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d" % ( + self._ucscDb, self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) UCSC_TITLE = "Click to view this section of the genome in the UCSC Genome Browser" gifmap.append( HtmlGenWrapper.create_area_tag( @@ -1808,19 +1915,22 @@ class DisplayMappingResults: title=UCSC_TITLE)) im_drawer.rectangle( xy=((xBrowse1, ucscPaddingTop), - (xBrowse2, (ucscPaddingTop+self.BAND_HEIGHT))), + (xBrowse2, (ucscPaddingTop + self.BAND_HEIGHT))), outline=self.CLICKABLE_UCSC_REGION_COLOR, fill=self.CLICKABLE_UCSC_REGION_COLOR) im_drawer.line( xy=((xBrowse1, ucscPaddingTop), - (xBrowse1, (ucscPaddingTop+self.BAND_HEIGHT))), + (xBrowse1, (ucscPaddingTop + self.BAND_HEIGHT))), fill=self.CLICKABLE_UCSC_REGION_OUTLINE_COLOR) - ENSEMBL_COORDS = "%d, %d, %d, %d" %(xBrowse1, ensemblPaddingTop, xBrowse2, (ensemblPaddingTop+self.BAND_HEIGHT)) + ENSEMBL_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, ensemblPaddingTop, xBrowse2, (ensemblPaddingTop + self.BAND_HEIGHT)) if self.dataset.group.species == "mouse": - ENSEMBL_HREF = "http://www.ensembl.org/Mus_musculus/contigview?highlight=&chr=%s&vc_start=%d&vc_end=%d&x=35&y=12" % (self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases) + ENSEMBL_HREF = "http://www.ensembl.org/Mus_musculus/contigview?highlight=&chr=%s&vc_start=%d&vc_end=%d&x=35&y=12" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) else: - ENSEMBL_HREF = "http://www.ensembl.org/Rattus_norvegicus/contigview?chr=%s&start=%d&end=%d" % (self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases) + ENSEMBL_HREF = "http://www.ensembl.org/Rattus_norvegicus/contigview?chr=%s&start=%d&end=%d" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) ENSEMBL_TITLE = "Click to view this section of the genome in the Ensembl Genome Browser" gifmap.append(HtmlGenWrapper.create_area_tag( shape='rect', @@ -1829,36 +1939,40 @@ class DisplayMappingResults: title=ENSEMBL_TITLE)) im_drawer.rectangle( xy=((xBrowse1, ensemblPaddingTop), - (xBrowse2, (ensemblPaddingTop+self.BAND_HEIGHT))), + (xBrowse2, (ensemblPaddingTop + self.BAND_HEIGHT))), outline=self.CLICKABLE_ENSEMBL_REGION_COLOR, fill=self.CLICKABLE_ENSEMBL_REGION_COLOR) im_drawer.line( xy=((xBrowse1, ensemblPaddingTop), - (xBrowse1, (ensemblPaddingTop+self.BAND_HEIGHT))), + (xBrowse1, (ensemblPaddingTop + self.BAND_HEIGHT))), fill=self.CLICKABLE_ENSEMBL_REGION_OUTLINE_COLOR) # end for im_drawer.text( text="Click to view the corresponding section of the genome in an 8x expanded WebQTL map", - xy=((xLeftOffset + 10), paddingTop),# + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), paddingTop), # + self.BAND_HEIGHT/2), font=clickableRegionLabelFont, fill=self.CLICKABLE_WEBQTL_TEXT_COLOR) if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": im_drawer.text( text="Click to view the corresponding section of the genome in PhenoGen", - xy=((xLeftOffset + 10), phenogenPaddingTop),# + self.BAND_HEIGHT/2), + # + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), phenogenPaddingTop), font=clickableRegionLabelFont, fill=self.CLICKABLE_PHENOGEN_TEXT_COLOR) im_drawer.text( text="Click to view the corresponding section of the genome in the UCSC Genome Browser", - xy=((xLeftOffset + 10), ucscPaddingTop),# + self.BAND_HEIGHT/2), + # + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), ucscPaddingTop), font=clickableRegionLabelFont, fill=self.CLICKABLE_UCSC_TEXT_COLOR) im_drawer.text( text="Click to view the corresponding section of the genome in the Ensembl Genome Browser", - xy=((xLeftOffset+10), ensemblPaddingTop),# + self.BAND_HEIGHT/2), + # + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), ensemblPaddingTop), font=clickableRegionLabelFont, fill=self.CLICKABLE_ENSEMBL_TEXT_COLOR) - #draw the gray text - chrFont = ImageFont.truetype(font=VERDANA_BOLD_FILE, size=26*zoom) + # draw the gray text + chrFont = ImageFont.truetype( + font=VERDANA_BOLD_FILE, size=26 * zoom) chrX = xLeftOffset + plotWidth - 2 - im_drawer.textsize( "Chr %s" % self.ChrList[self.selectedChr][0], font=chrFont)[0] im_drawer.text( @@ -1866,17 +1980,17 @@ class DisplayMappingResults: xy=(chrX, phenogenPaddingTop), font=chrFont, fill=GRAY) # end of drawBrowserClickableRegions else: - #draw the gray text - chrFont = ImageFont.truetype(font=VERDANA_FILE, size=26*zoom) + # draw the gray text + chrFont = ImageFont.truetype(font=VERDANA_FILE, size=26 * zoom) chrX = xLeftOffset + (plotWidth - im_drawer.textsize( - "Chr %s" % currentChromosome, font=chrFont)[0])/2 + "Chr %s" % currentChromosome, font=chrFont)[0]) / 2 im_drawer.text( text="Chr %s" % currentChromosome, xy=(chrX, 32), font=chrFont, fill=GRAY) # end of drawBrowserClickableRegions pass - def drawXAxis(self, canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawXAxis(self, canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset @@ -1886,33 +2000,33 @@ class DisplayMappingResults: if zoom == 2: fontZoom = 1.5 - #Parameters - NUM_MINOR_TICKS = 5 # Number of minor ticks between major ticks + # Parameters + NUM_MINOR_TICKS = 5 # Number of minor ticks between major ticks X_MAJOR_TICK_THICKNESS = 3 X_MINOR_TICK_THICKNESS = 1 - X_AXIS_THICKNESS = 1*zoom + X_AXIS_THICKNESS = 1 * zoom # ======= Alex: Draw the X-axis labels (megabase location) - MBLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=15*zoom) - xMajorTickHeight = 10 * zoom # How high the tick extends below the axis - xMinorTickHeight = 5*zoom + MBLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=15 * zoom) + xMajorTickHeight = 10 * zoom # How high the tick extends below the axis + xMinorTickHeight = 5 * zoom xAxisTickMarkColor = BLACK xAxisLabelColor = BLACK - fontHeight = 12*fontZoom # How tall the font that we're using is + fontHeight = 12 * fontZoom # How tall the font that we're using is spacingFromLabelToAxis = 10 if self.plotScale == 'physic': - strYLoc = yZero + MBLabelFont.font.height/2 - ###Physical single chromosome view + strYLoc = yZero + MBLabelFont.font.height / 2 + # Physical single chromosome view if self.selectedChr > -1: XScale = Plot.detScale(startMb, endMb) XStart, XEnd, XStep = XScale if XStep < 8: XStep *= 2 - spacingAmtX = spacingAmt = (XEnd-XStart)/XStep + spacingAmtX = spacingAmt = (XEnd - XStart) / XStep j = 0 - while abs(spacingAmtX -int(spacingAmtX)) >= spacingAmtX/100.0 and j < 6: + while abs(spacingAmtX - int(spacingAmtX)) >= spacingAmtX / 100.0 and j < 6: j += 1 spacingAmtX *= 10 @@ -1921,30 +2035,32 @@ class DisplayMappingResults: for counter, _Mb in enumerate(Plot.frange(XStart, XEnd, spacingAmt / NUM_MINOR_TICKS)): if _Mb < startMb or _Mb > endMb: continue - Xc = xLeftOffset + plotXScale*(_Mb - startMb) - if counter % NUM_MINOR_TICKS == 0: # Draw a MAJOR mark, not just a minor tick mark + Xc = xLeftOffset + plotXScale * (_Mb - startMb) + if counter % NUM_MINOR_TICKS == 0: # Draw a MAJOR mark, not just a minor tick mark im_drawer.line(xy=((Xc, yZero), - (Xc, yZero+xMajorTickHeight)), + (Xc, yZero + xMajorTickHeight)), fill=xAxisTickMarkColor, - width=X_MAJOR_TICK_THICKNESS) # Draw the MAJOR tick mark - labelStr = str(formatStr % _Mb) # What Mbase location to put on the label - strWidth, strHeight = im_drawer.textsize(labelStr, font=MBLabelFont) + width=X_MAJOR_TICK_THICKNESS) # Draw the MAJOR tick mark + # What Mbase location to put on the label + labelStr = str(formatStr % _Mb) + strWidth, strHeight = im_drawer.textsize( + labelStr, font=MBLabelFont) drawStringXc = (Xc - (strWidth / 2.0)) im_drawer.text(xy=(drawStringXc, strYLoc), text=labelStr, font=MBLabelFont, fill=xAxisLabelColor) else: im_drawer.line(xy=((Xc, yZero), - (Xc, yZero+xMinorTickHeight)), + (Xc, yZero + xMinorTickHeight)), fill=xAxisTickMarkColor, - width=X_MINOR_TICK_THICKNESS) # Draw the MINOR tick mark + width=X_MINOR_TICK_THICKNESS) # Draw the MINOR tick mark - ###Physical genome wide view + # Physical genome wide view else: distScale = 0 startPosX = xLeftOffset for i, distLen in enumerate(self.ChrLengthDistList): - if distScale == 0: #universal scale in whole genome mapping + if distScale == 0: # universal scale in whole genome mapping if distLen > 75: distScale = 25 elif distLen > 30: @@ -1953,51 +2069,55 @@ class DisplayMappingResults: distScale = 5 for j, tickdists in enumerate(range(distScale, int(ceil(distLen)), distScale)): im_drawer.line( - xy=((startPosX+tickdists*plotXScale, yZero), - (startPosX+tickdists*plotXScale, yZero + 7)), - fill=BLACK, width=1*zoom) + xy=((startPosX + tickdists * plotXScale, yZero), + (startPosX + tickdists * plotXScale, yZero + 7)), + fill=BLACK, width=1 * zoom) if j % 2 == 0: draw_rotated_text( canvas, text=str(tickdists), font=MBLabelFont, - xy=(startPosX+tickdists*plotXScale, - yZero+10*zoom), fill=BLACK, angle=270) - startPosX += (self.ChrLengthDistList[i]+self.GraphInterval)*plotXScale + xy=(startPosX + tickdists * plotXScale, + yZero + 10 * zoom), fill=BLACK, angle=270) + startPosX += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale - megabaseLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=int(18*zoom*1.5)) + megabaseLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) im_drawer.text( text="Megabases", xy=( - xLeftOffset+(plotWidth-im_drawer.textsize( - "Megabases", font=megabaseLabelFont)[0])/2, - strYLoc+MBLabelFont.font.height+10*(zoom%2)), + xLeftOffset + (plotWidth - im_drawer.textsize( + "Megabases", font=megabaseLabelFont)[0]) / 2, + strYLoc + MBLabelFont.font.height + 10 * (zoom % 2)), font=megabaseLabelFont, fill=BLACK) pass else: - strYLoc = yZero + spacingFromLabelToAxis + MBLabelFont.font.height/2 + strYLoc = yZero + spacingFromLabelToAxis + MBLabelFont.font.height / 2 ChrAInfo = [] preLpos = -1 distinctCount = 0.0 - if self.selectedChr == -1: #ZS: If viewing full genome/all chromosomes + if self.selectedChr == -1: # ZS: If viewing full genome/all chromosomes for i, _chr in enumerate(self.genotype): thisChr = [] Locus0CM = _chr[0].cM nLoci = len(_chr) - if nLoci <= 8: + if nLoci <= 8: for _locus in _chr: if _locus.name != ' - ': if _locus.cM != preLpos: distinctCount += 1 preLpos = _locus.cM - thisChr.append([_locus.name, _locus.cM-Locus0CM]) + thisChr.append( + [_locus.name, _locus.cM - Locus0CM]) else: - for j in (0, nLoci/4, nLoci/2, nLoci*3/4, -1): + for j in (0, nLoci / 4, nLoci / 2, nLoci * 3 / 4, -1): while _chr[j].name == ' - ': j += 1 if _chr[j].cM != preLpos: distinctCount += 1 preLpos = _chr[j].cM - thisChr.append([_chr[j].name, _chr[j].cM-Locus0CM]) + thisChr.append( + [_chr[j].name, _chr[j].cM - Locus0CM]) ChrAInfo.append(thisChr) else: for i, _chr in enumerate(self.genotype): @@ -2009,10 +2129,11 @@ class DisplayMappingResults: if _locus.cM != preLpos: distinctCount += 1 preLpos = _locus.cM - thisChr.append([_locus.name, _locus.cM-Locus0CM]) + thisChr.append( + [_locus.name, _locus.cM - Locus0CM]) ChrAInfo.append(thisChr) - stepA = (plotWidth+0.0)/distinctCount + stepA = (plotWidth + 0.0) / distinctCount LRectWidth = 10 LRectHeight = 3 @@ -2037,28 +2158,29 @@ class DisplayMappingResults: Zorder = 0 if differ: im_drawer.line( - xy=((startPosX+Lpos, yZero), (xLeftOffset+offsetA,\ - yZero+25)), + xy=((startPosX + Lpos, yZero), (xLeftOffset + offsetA,\ + yZero + 25)), fill=lineColor) im_drawer.line( - xy=((xLeftOffset+offsetA, yZero+25), (xLeftOffset+offsetA,\ - yZero+40+Zorder*(LRectWidth+3))), + xy=((xLeftOffset + offsetA, yZero + 25), (xLeftOffset + offsetA,\ + yZero + 40 + Zorder * (LRectWidth + 3))), fill=lineColor) rectColor = ORANGE else: im_drawer.line( - xy=((xLeftOffset+offsetA, yZero+40+Zorder*(LRectWidth+3)-3), (\ - xLeftOffset+offsetA, yZero+40+Zorder*(LRectWidth+3))), + xy=((xLeftOffset + offsetA, yZero + 40 + Zorder * (LRectWidth + 3) - 3), (\ + xLeftOffset + offsetA, yZero + 40 + Zorder * (LRectWidth + 3))), fill=lineColor) rectColor = DEEPPINK im_drawer.rectangle( - xy=((xLeftOffset+offsetA, yZero+40+Zorder*(LRectWidth+3)), - (xLeftOffset+offsetA-LRectHeight, - yZero+40+Zorder*(LRectWidth+3)+LRectWidth)), - outline=rectColor, fill=rectColor, width = 0) - COORDS="%d,%d,%d,%d"%(xLeftOffset+offsetA-LRectHeight, yZero+40+Zorder*(LRectWidth+3),\ - xLeftOffset+offsetA,yZero+40+Zorder*(LRectWidth+3)+LRectWidth) - HREF = "/show_trait?trait_id=%s&dataset=%s" % (Lname, self.dataset.group.name+"Geno") + xy=((xLeftOffset + offsetA, yZero + 40 + Zorder * (LRectWidth + 3)), + (xLeftOffset + offsetA - LRectHeight, + yZero + 40 + Zorder * (LRectWidth + 3) + LRectWidth)), + outline=rectColor, fill=rectColor, width=0) + COORDS = "%d,%d,%d,%d" % (xLeftOffset + offsetA - LRectHeight, yZero + 40 + Zorder * (LRectWidth + 3),\ + xLeftOffset + offsetA, yZero + 40 + Zorder * (LRectWidth + 3) + LRectWidth) + HREF = "/show_trait?trait_id=%s&dataset=%s" % ( + Lname, self.dataset.group.name + "Geno") #HREF="javascript:showDatabase3('%s','%s','%s','');" % (showLocusForm,fd.RISet+"Geno", Lname) Areas = HtmlGenWrapper.create_area_tag( shape='rect', @@ -2067,26 +2189,27 @@ class DisplayMappingResults: target="_blank", title="Locus : {}".format(Lname)) gifmap.append(Areas) - ##piddle bug + # piddle bug if j == 0: im_drawer.line( - xy=((startPosX, yZero), (startPosX, yZero+40)), + xy=((startPosX, yZero), (startPosX, yZero + 40)), fill=lineColor) - startPosX += (self.ChrLengthDistList[j]+self.GraphInterval)*plotXScale + startPosX += (self.ChrLengthDistList[j] + \ + self.GraphInterval) * plotXScale - centimorganLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=int(18*zoom*1.5)) + centimorganLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) im_drawer.text( text="Centimorgans", - xy=(xLeftOffset+(plotWidth-im_drawer.textsize( - "Centimorgans", font=centimorganLabelFont)[0])/2, - strYLoc + MBLabelFont.font.height+ 10*(zoom%2)), + xy=(xLeftOffset + (plotWidth - im_drawer.textsize( + "Centimorgans", font=centimorganLabelFont)[0]) / 2, + strYLoc + MBLabelFont.font.height + 10 * (zoom % 2)), font=centimorganLabelFont, fill=BLACK) - im_drawer.line(xy=((xLeftOffset, yZero), (xLeftOffset+plotWidth, yZero)), - fill=BLACK, width=X_AXIS_THICKNESS) # Draw the X axis itself - + im_drawer.line(xy=((xLeftOffset, yZero), (xLeftOffset + plotWidth, yZero)), + fill=BLACK, width=X_AXIS_THICKNESS) # Draw the X axis itself - def drawQTL(self, canvas, drawAreaHeight, gifmap, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawQTL(self, canvas, drawAreaHeight, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset @@ -2095,74 +2218,85 @@ class DisplayMappingResults: if zoom == 2: fontZoom = 1.5 - INTERCROSS = (self.genotype.type=="intercross") + INTERCROSS = (self.genotype.type == "intercross") - #draw the LRS scale - #We first determine whether or not we are using a sliding scale. - #If so, we need to compute the maximum LRS value to determine where the max y-value should be, and call this LRS_LOD_Max. - #LRSTop is then defined to be above the LRS_LOD_Max by enough to add one additional LRSScale increment. - #if we are using a set-scale, then we set LRSTop to be the user's value, and LRS_LOD_Max doesn't matter. + # draw the LRS scale + # We first determine whether or not we are using a sliding scale. + # If so, we need to compute the maximum LRS value to determine where the max y-value should be, and call this LRS_LOD_Max. + # LRSTop is then defined to be above the LRS_LOD_Max by enough to add one additional LRSScale increment. + # if we are using a set-scale, then we set LRSTop to be the user's value, and LRS_LOD_Max doesn't matter. - #ZS: This is a mess, but I don't know a better way to account for different mapping methods returning results in different formats + the option to change between LRS and LOD - if self.lrsMax <= 0: #sliding scale + # ZS: This is a mess, but I don't know a better way to account for different mapping methods returning results in different formats + the option to change between LRS and LOD + if self.lrsMax <= 0: # sliding scale if "lrs_value" in self.qtlresults[0]: - LRS_LOD_Max = max([result['lrs_value'] for result in self.qtlresults]) + LRS_LOD_Max = max([result['lrs_value'] + for result in self.qtlresults]) if self.LRS_LOD == "LOD" or self.LRS_LOD == "-logP": LRS_LOD_Max = LRS_LOD_Max / self.LODFACTOR if self.permChecked and self.nperm > 0 and not self.multipleInterval: - self.significant = min(self.significant / self.LODFACTOR, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive / self.LODFACTOR, webqtlConfig.MAXLRS) + self.significant = min( + self.significant / self.LODFACTOR, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive / self.LODFACTOR, webqtlConfig.MAXLRS) else: if self.permChecked and self.nperm > 0 and not self.multipleInterval: - self.significant = min(self.significant, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive, webqtlConfig.MAXLRS) + self.significant = min( + self.significant, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive, webqtlConfig.MAXLRS) else: pass else: - LRS_LOD_Max = max([result['lod_score'] for result in self.qtlresults]) + LRS_LOD_Max = max([result['lod_score'] + for result in self.qtlresults]) if self.LRS_LOD == "LRS": LRS_LOD_Max = LRS_LOD_Max * self.LODFACTOR if self.permChecked and self.nperm > 0 and not self.multipleInterval: - self.significant = min(self.significant * self.LODFACTOR, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive * self.LODFACTOR, webqtlConfig.MAXLRS) + self.significant = min( + self.significant * self.LODFACTOR, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive * self.LODFACTOR, webqtlConfig.MAXLRS) else: if self.permChecked and self.nperm > 0 and not self.multipleInterval: - self.significant = min(self.significant, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive, webqtlConfig.MAXLRS) + self.significant = min( + self.significant, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive, webqtlConfig.MAXLRS) else: pass if self.permChecked and self.nperm > 0 and not self.multipleInterval: LRS_LOD_Max = max(self.significant, LRS_LOD_Max) - #genotype trait will give infinite LRS + # genotype trait will give infinite LRS LRS_LOD_Max = min(LRS_LOD_Max, webqtlConfig.MAXLRS) else: LRS_LOD_Max = self.lrsMax - #ZS: Needed to pass to genome browser + # ZS: Needed to pass to genome browser js_data = json.loads(self.js_data) if self.LRS_LOD == "LRS": - js_data['max_score'] = LRS_LOD_Max/4.61 + js_data['max_score'] = LRS_LOD_Max / 4.61 else: js_data['max_score'] = LRS_LOD_Max self.js_data = json.dumps(js_data) - LRSScaleFont=ImageFont.truetype(font=VERDANA_FILE, size=16*zoom) - LRSLODFont=ImageFont.truetype(font=VERDANA_FILE, size=int(18*zoom*1.5)) + LRSScaleFont = ImageFont.truetype(font=VERDANA_FILE, size=16 * zoom) + LRSLODFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) yZero = yTopOffset + plotHeight # LRSHeightThresh = drawAreaHeight # AdditiveHeightThresh = drawAreaHeight/2 # DominanceHeightThresh = drawAreaHeight/2 if self.selectedChr == 1: - LRSHeightThresh = drawAreaHeight - yTopOffset + 30*(zoom - 1) - AdditiveHeightThresh = LRSHeightThresh/2 - DominanceHeightThresh = LRSHeightThresh/2 + LRSHeightThresh = drawAreaHeight - yTopOffset + 30 * (zoom - 1) + AdditiveHeightThresh = LRSHeightThresh / 2 + DominanceHeightThresh = LRSHeightThresh / 2 else: LRSHeightThresh = drawAreaHeight - AdditiveHeightThresh = drawAreaHeight/2 - DominanceHeightThresh = drawAreaHeight/2 + AdditiveHeightThresh = drawAreaHeight / 2 + DominanceHeightThresh = drawAreaHeight / 2 # LRSHeightThresh = (yZero - yTopOffset + 30*(zoom - 1)) # AdditiveHeightThresh = LRSHeightThresh/2 # DominanceHeightThresh = LRSHeightThresh/2 @@ -2178,7 +2312,7 @@ class DisplayMappingResults: LRSAxisList = Plot.frange(LRSScale, LRS_LOD_Max, LRSScale) - #ZS: Convert to int if all axis values are whole numbers + # ZS: Convert to int if all axis values are whole numbers all_int = True for item in LRSAxisList: if isinstance(item, int): @@ -2192,9 +2326,10 @@ class DisplayMappingResults: # else: # max_lrs_width = canvas.stringWidth("%2.1f" % LRS_LOD_Max, font=LRSScaleFont) + 30 - #draw the "LRS" or "LOD" string to the left of the axis - LRSScaleFont=ImageFont.truetype(font=VERDANA_FILE, size=16*zoom) - LRSLODFont=ImageFont.truetype(font=VERDANA_FILE, size=int(18*zoom*1.5)) + # draw the "LRS" or "LOD" string to the left of the axis + LRSScaleFont = ImageFont.truetype(font=VERDANA_FILE, size=16 * zoom) + LRSLODFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) yZero = yTopOffset + plotHeight # TEXT_X_DISPLACEMENT = -20 @@ -2210,64 +2345,69 @@ class DisplayMappingResults: draw_rotated_text( canvas, text=self.LRS_LOD, font=LRSLODFont, xy=(xLeftOffset - im_drawer.textsize( - "999.99", font=LRSScaleFont)[0] - 15*(zoom-1) + TEXT_X_DISPLACEMENT, - yZero + TEXT_Y_DISPLACEMENT - 300*(zoom - 1)), + "999.99", font=LRSScaleFont)[0] - 15 * (zoom - 1) + TEXT_X_DISPLACEMENT, + yZero + TEXT_Y_DISPLACEMENT - 300 * (zoom - 1)), fill=BLACK, angle=90) for item in LRSAxisList: if LRS_LOD_Max == 0.0: LRS_LOD_Max = 0.000001 - yTopOffset + 30*(zoom - 1) - yLRS = yZero - (item/LRS_LOD_Max) * LRSHeightThresh - im_drawer.line(xy=((xLeftOffset, yLRS), (xLeftOffset-4, yLRS)), - fill=self.LRS_COLOR, width=1*zoom) + yTopOffset + 30 * (zoom - 1) + yLRS = yZero - (item / LRS_LOD_Max) * LRSHeightThresh + im_drawer.line(xy=((xLeftOffset, yLRS), (xLeftOffset - 4, yLRS)), + fill=self.LRS_COLOR, width=1 * zoom) if all_int: scaleStr = "%d" % item else: scaleStr = "%2.1f" % item - #Draw the LRS/LOD Y axis label + # Draw the LRS/LOD Y axis label TEXT_Y_DISPLACEMENT = -10 im_drawer.text( text=scaleStr, - xy=(xLeftOffset-4-im_drawer.textsize(scaleStr, font=LRSScaleFont)[0]-5, - yLRS+TEXT_Y_DISPLACEMENT), + xy=(xLeftOffset - 4 - im_drawer.textsize(scaleStr, font=LRSScaleFont)[0] - 5, + yLRS + TEXT_Y_DISPLACEMENT), font=LRSScaleFont, fill=self.LRS_COLOR) if self.permChecked and self.nperm > 0 and not self.multipleInterval: - significantY = yZero - self.significant*LRSHeightThresh/LRS_LOD_Max - suggestiveY = yZero - self.suggestive*LRSHeightThresh/LRS_LOD_Max + significantY = yZero - self.significant * LRSHeightThresh / LRS_LOD_Max + suggestiveY = yZero - self.suggestive * LRSHeightThresh / LRS_LOD_Max # significantY = yZero - self.significant*LRSHeightThresh/LRSAxisList[-1] # suggestiveY = yZero - self.suggestive*LRSHeightThresh/LRSAxisList[-1] startPosX = xLeftOffset - #"Significant" and "Suggestive" Drawing Routine + # "Significant" and "Suggestive" Drawing Routine # ======= Draw the thick lines for "Significant" and "Suggestive" ===== (crowell: I tried to make the SNPs draw over these lines, but piddle wouldn't have it...) - #ZS: I don't know if what I did here with this inner function is clever or overly complicated, but it's the only way I could think of to avoid duplicating the code inside this function + # ZS: I don't know if what I did here with this inner function is clever or overly complicated, but it's the only way I could think of to avoid duplicating the code inside this function def add_suggestive_significant_lines_and_legend(start_pos_x, chr_length_dist): - rightEdge = int(start_pos_x + chr_length_dist*plotXScale - self.SUGGESTIVE_WIDTH/1.5) + rightEdge = int(start_pos_x + chr_length_dist * \ + plotXScale - self.SUGGESTIVE_WIDTH / 1.5) im_drawer.line( - xy=((start_pos_x+self.SUGGESTIVE_WIDTH/1.5, suggestiveY), + xy=((start_pos_x + self.SUGGESTIVE_WIDTH / 1.5, suggestiveY), (rightEdge, suggestiveY)), - fill=self.SUGGESTIVE_COLOR, width=self.SUGGESTIVE_WIDTH*zoom - #,clipX=(xLeftOffset, xLeftOffset + plotWidth-2) + fill=self.SUGGESTIVE_COLOR, width=self.SUGGESTIVE_WIDTH * zoom + # ,clipX=(xLeftOffset, xLeftOffset + plotWidth-2) ) im_drawer.line( - xy=((start_pos_x+self.SUGGESTIVE_WIDTH/1.5, significantY), - (rightEdge, significantY)), + xy=((start_pos_x + self.SUGGESTIVE_WIDTH / 1.5, significantY), + (rightEdge, significantY)), fill=self.SIGNIFICANT_COLOR, - width=self.SIGNIFICANT_WIDTH*zoom - #, clipX=(xLeftOffset, xLeftOffset + plotWidth-2) + width=self.SIGNIFICANT_WIDTH * zoom + # , clipX=(xLeftOffset, xLeftOffset + plotWidth-2) ) - sugg_coords = "%d, %d, %d, %d" % (start_pos_x, suggestiveY-2, rightEdge + 2*zoom, suggestiveY+2) - sig_coords = "%d, %d, %d, %d" % (start_pos_x, significantY-2, rightEdge + 2*zoom, significantY+2) + sugg_coords = "%d, %d, %d, %d" % ( + start_pos_x, suggestiveY - 2, rightEdge + 2 * zoom, suggestiveY + 2) + sig_coords = "%d, %d, %d, %d" % ( + start_pos_x, significantY - 2, rightEdge + 2 * zoom, significantY + 2) if self.LRS_LOD == 'LRS': sugg_title = "Suggestive LRS = %0.2f" % self.suggestive sig_title = "Significant LRS = %0.2f" % self.significant else: - sugg_title = "Suggestive LOD = %0.2f" % (self.suggestive/4.61) - sig_title = "Significant LOD = %0.2f" % (self.significant/4.61) + sugg_title = "Suggestive LOD = %0.2f" % ( + self.suggestive / 4.61) + sig_title = "Significant LOD = %0.2f" % ( + self.significant / 4.61) Areas1 = HtmlGenWrapper.create_area_tag( shape='rect', coords=sugg_coords, @@ -2279,24 +2419,28 @@ class DisplayMappingResults: gifmap.append(Areas1) gifmap.append(Areas2) - start_pos_x += (chr_length_dist+self.GraphInterval)*plotXScale + start_pos_x += (chr_length_dist + \ + self.GraphInterval) * plotXScale return start_pos_x for i, _chr in enumerate(self.genotype): if self.selectedChr != -1: if _chr.name == self.ChrList[self.selectedChr][0]: - startPosX = add_suggestive_significant_lines_and_legend(startPosX, self.ChrLengthDistList[0]) + startPosX = add_suggestive_significant_lines_and_legend( + startPosX, self.ChrLengthDistList[0]) break else: continue else: - startPosX = add_suggestive_significant_lines_and_legend(startPosX, self.ChrLengthDistList[i]) + startPosX = add_suggestive_significant_lines_and_legend( + startPosX, self.ChrLengthDistList[i]) if self.multipleInterval: lrsEdgeWidth = 1 else: if self.additiveChecked: - additiveMax = max([abs(X['additive']) for X in self.qtlresults]) + additiveMax = max([abs(X['additive']) + for X in self.qtlresults]) lrsEdgeWidth = 3 if zoom == 2: @@ -2306,7 +2450,8 @@ class DisplayMappingResults: AdditiveCoordXY = [] DominanceCoordXY = [] - symbolFont = ImageFont.truetype(font=FNT_BS_FILE, size=5) #ZS: For Manhattan Plot + symbolFont = ImageFont.truetype( + font=FNT_BS_FILE, size=5) # ZS: For Manhattan Plot previous_chr = 1 previous_chr_as_int = 0 @@ -2332,128 +2477,142 @@ class DisplayMappingResults: minusColor = self.ADDITIVE_COLOR_NEGATIVE for k, aPoint in enumerate(AdditiveCoordXY): if k > 0: - Xc0, Yc0 = AdditiveCoordXY[k-1] + Xc0, Yc0 = AdditiveCoordXY[k - 1] Xc, Yc = aPoint - if (Yc0-yZero)*(Yc-yZero) < 0: - if Xc == Xc0: #genotype , locus distance is 0 + if (Yc0 - yZero) * (Yc - yZero) < 0: + if Xc == Xc0: # genotype , locus distance is 0 Xcm = Xc else: - Xcm = (yZero-Yc0)/((Yc-Yc0)/(Xc-Xc0)) +Xc0 + Xcm = (yZero - Yc0) / \ + ((Yc - Yc0) / (Xc - Xc0)) + Xc0 if Yc0 < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xcm, yZero)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( - xy=((Xcm, yZero), (Xc, yZero-(Yc-yZero))), + xy=((Xcm, yZero), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero-(Yc0-yZero)), + xy=((Xc0, yZero - (Yc0 - yZero)), (Xcm, yZero)), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( xy=((Xcm, yZero), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) - elif (Yc0-yZero)*(Yc-yZero) > 0: + elif (Yc0 - yZero) * (Yc - yZero) > 0: if Yc < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), - (Xc, yZero - (Yc-yZero))), + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: - minYc = min(Yc-yZero, Yc0-yZero) + minYc = min(Yc - yZero, Yc0 - yZero) if minYc < 0: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), - (Xc, yZero - (Yc-yZero))), + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) LRSCoordXY = [] AdditiveCoordXY = [] previous_chr = qtlresult['chr'] previous_chr_as_int += 1 - newStartPosX = (self.ChrLengthDistList[previous_chr_as_int - 1]+self.GraphInterval)*plotXScale + newStartPosX = ( + self.ChrLengthDistList[previous_chr_as_int - 1] + self.GraphInterval) * plotXScale if newStartPosX != oldStartPosX: startPosX += newStartPosX oldStartPosX = newStartPosX - #ZS: This is because the chromosome value stored in qtlresult['chr'] can be (for example) either X or 20 depending upon the mapping method/scale used + # ZS: This is because the chromosome value stored in qtlresult['chr'] can be (for example) either X or 20 depending upon the mapping method/scale used this_chr = str(self.ChrList[self.selectedChr][0]) if self.plotScale != "physic": - this_chr = str(self.ChrList[self.selectedChr][1]+1) + this_chr = str(self.ChrList[self.selectedChr][1] + 1) if self.selectedChr == -1 or str(qtlresult['chr']) == this_chr: if self.plotScale != "physic" and self.mapping_method == "reaper" and not self.manhattan_plot: - Xc = startPosX + (qtlresult['cM']-startMb)*plotXScale + Xc = startPosX + (qtlresult['cM'] - startMb) * plotXScale if hasattr(self.genotype, "filler"): if self.genotype.filler: if self.selectedChr != -1: start_cm = self.genotype[self.selectedChr - 1][0].cM - Xc = startPosX + (qtlresult['Mb'] - start_cm)*plotXScale + Xc = startPosX + \ + (qtlresult['Mb'] - start_cm) * plotXScale else: start_cm = self.genotype[previous_chr_as_int][0].cM - Xc = startPosX + ((qtlresult['Mb']-start_cm-startMb)*plotXScale)*(((qtlresult['Mb']-start_cm-startMb)*plotXScale)/((qtlresult['Mb']-start_cm-startMb+self.GraphInterval)*plotXScale)) + Xc = startPosX + ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) * ( + ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) / ((qtlresult['Mb'] - start_cm - startMb + self.GraphInterval) * plotXScale)) else: - Xc = startPosX + (qtlresult['Mb']-startMb)*plotXScale + Xc = startPosX + (qtlresult['Mb'] - startMb) * plotXScale # updated by NL 06-18-2011: # fix the over limit LRS graph issue since genotype trait may give infinite LRS; # for any lrs is over than 460(LRS max in this system), it will be reset to 460 - yLRS = yZero - (item/LRS_LOD_Max) * LRSHeightThresh - + yLRS = yZero - (item / LRS_LOD_Max) * LRSHeightThresh if 'lrs_value' in qtlresult: if self.LRS_LOD == "LOD" or self.LRS_LOD == "-logP": - if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value']=='inf': + if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value'] == 'inf': #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) - Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRS_LOD_Max*self.LODFACTOR) + Yc = yZero - webqtlConfig.MAXLRS * \ + LRSHeightThresh / \ + (LRS_LOD_Max * self.LODFACTOR) else: #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) - Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRS_LOD_Max*self.LODFACTOR) + Yc = yZero - \ + qtlresult['lrs_value'] * LRSHeightThresh / \ + (LRS_LOD_Max * self.LODFACTOR) else: - if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value']=='inf': + if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value'] == 'inf': #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRS_LOD_Max + Yc = yZero - webqtlConfig.MAXLRS * LRSHeightThresh / LRS_LOD_Max else: #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRS_LOD_Max + Yc = yZero - \ + qtlresult['lrs_value'] * \ + LRSHeightThresh / LRS_LOD_Max else: - if qtlresult['lod_score'] > 100 or qtlresult['lod_score']=='inf': + if qtlresult['lod_score'] > 100 or qtlresult['lod_score'] == 'inf': #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRS_LOD_Max + Yc = yZero - webqtlConfig.MAXLRS * LRSHeightThresh / LRS_LOD_Max else: if self.LRS_LOD == "LRS": #Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRS_LOD_Max + Yc = yZero - \ + qtlresult['lod_score'] * self.LODFACTOR * \ + LRSHeightThresh / LRS_LOD_Max else: #Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRS_LOD_Max + Yc = yZero - \ + qtlresult['lod_score'] * \ + LRSHeightThresh / LRS_LOD_Max if self.manhattan_plot == True: if self.color_scheme == "single": @@ -2469,17 +2628,19 @@ class DisplayMappingResults: im_drawer.text( text="5", xy=( - Xc-im_drawer.textsize("5", font=symbolFont)[0]/2+1, - Yc-4), + Xc - im_drawer.textsize("5", + font=symbolFont)[0] / 2 + 1, + Yc - 4), fill=point_color, font=symbolFont) else: LRSCoordXY.append((Xc, Yc)) if not self.multipleInterval and self.additiveChecked: - if additiveMax == 0.0: - additiveMax = 0.000001 - Yc = yZero - qtlresult['additive']*AdditiveHeightThresh/additiveMax - AdditiveCoordXY.append((Xc, Yc)) + if additiveMax == 0.0: + additiveMax = 0.000001 + Yc = yZero - qtlresult['additive'] * \ + AdditiveHeightThresh / additiveMax + AdditiveCoordXY.append((Xc, Yc)) m += 1 @@ -2497,64 +2658,65 @@ class DisplayMappingResults: minusColor = self.ADDITIVE_COLOR_NEGATIVE for k, aPoint in enumerate(AdditiveCoordXY): if k > 0: - Xc0, Yc0 = AdditiveCoordXY[k-1] + Xc0, Yc0 = AdditiveCoordXY[k - 1] Xc, Yc = aPoint - if (Yc0-yZero)*(Yc-yZero) < 0: - if Xc == Xc0: #genotype , locus distance is 0 + if (Yc0 - yZero) * (Yc - yZero) < 0: + if Xc == Xc0: # genotype , locus distance is 0 Xcm = Xc else: - Xcm = (yZero-Yc0)/((Yc-Yc0)/(Xc-Xc0)) +Xc0 + Xcm = (yZero - Yc0) / \ + ((Yc - Yc0) / (Xc - Xc0)) + Xc0 if Yc0 < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xcm, yZero)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( - xy=((Xcm, yZero), (Xc, yZero-(Yc-yZero))), + xy=((Xcm, yZero), (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), + xy=((Xc0, yZero - (Yc0 - yZero)), (Xcm, yZero)), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( xy=((Xcm, yZero), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) - elif (Yc0-yZero)*(Yc-yZero) > 0: + elif (Yc0 - yZero) * (Yc - yZero) > 0: if Yc < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero-(Yc0-yZero)), - (Xc, yZero-(Yc-yZero))), + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: - minYc = min(Yc-yZero, Yc0-yZero) + minYc = min(Yc - yZero, Yc0 - yZero) if minYc < 0: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), - (Xc, yZero - (Yc-yZero))), + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) if not self.multipleInterval and INTERCROSS and self.dominanceChecked: @@ -2562,114 +2724,116 @@ class DisplayMappingResults: minusColor = self.DOMINANCE_COLOR_NEGATIVE for k, aPoint in enumerate(DominanceCoordXY): if k > 0: - Xc0, Yc0 = DominanceCoordXY[k-1] + Xc0, Yc0 = DominanceCoordXY[k - 1] Xc, Yc = aPoint - if (Yc0-yZero)*(Yc-yZero) < 0: - if Xc == Xc0: #genotype , locus distance is 0 + if (Yc0 - yZero) * (Yc - yZero) < 0: + if Xc == Xc0: # genotype , locus distance is 0 Xcm = Xc else: - Xcm = (yZero-Yc0)/((Yc-Yc0)/(Xc-Xc0)) +Xc0 + Xcm = (yZero - Yc0) / \ + ((Yc - Yc0) / (Xc - Xc0)) + Xc0 if Yc0 < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xcm, yZero)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( - xy=((Xcm, yZero), (Xc, yZero-(Yc-yZero))), + xy=((Xcm, yZero), (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), (Xcm, yZero)), + xy=((Xc0, yZero - (Yc0 - yZero)), (Xcm, yZero)), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( xy=((Xcm, yZero), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) - elif (Yc0-yZero)*(Yc-yZero) > 0: + elif (Yc0 - yZero) * (Yc - yZero) > 0: if Yc < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), - (Xc, yZero - (Yc-yZero))), + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: - minYc = min(Yc-yZero, Yc0-yZero) + minYc = min(Yc - yZero, Yc0 - yZero) if minYc < 0: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), - (Xc, yZero - (Yc-yZero))), fill=minusColor, + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) - - ###draw additive scale + # draw additive scale if not self.multipleInterval and self.additiveChecked: - additiveScaleFont=ImageFont.truetype(font=VERDANA_FILE, size=16*zoom) + additiveScaleFont = ImageFont.truetype( + font=VERDANA_FILE, size=16 * zoom) additiveScale = Plot.detScaleOld(0, additiveMax) - additiveStep = (additiveScale[1]-additiveScale[0])/additiveScale[2] + additiveStep = (additiveScale[1] - \ + additiveScale[0]) / additiveScale[2] additiveAxisList = Plot.frange(0, additiveScale[1], additiveStep) - addPlotScale = AdditiveHeightThresh/additiveMax + addPlotScale = AdditiveHeightThresh / additiveMax TEXT_Y_DISPLACEMENT = -8 additiveAxisList.append(additiveScale[1]) for item in additiveAxisList: - additiveY = yZero - item*addPlotScale + additiveY = yZero - item * addPlotScale im_drawer.line( xy=((xLeftOffset + plotWidth, additiveY), - (xLeftOffset+4+ plotWidth, additiveY)), - fill=self.ADDITIVE_COLOR_POSITIVE, width=1*zoom) + (xLeftOffset + 4 + plotWidth, additiveY)), + fill=self.ADDITIVE_COLOR_POSITIVE, width=1 * zoom) scaleStr = "%2.3f" % item im_drawer.text( text=scaleStr, - xy=(xLeftOffset + plotWidth +6, additiveY+TEXT_Y_DISPLACEMENT), + xy=(xLeftOffset + plotWidth + 6, + additiveY + TEXT_Y_DISPLACEMENT), font=additiveScaleFont, fill=self.ADDITIVE_COLOR_POSITIVE) im_drawer.line( - xy=((xLeftOffset+plotWidth, additiveY), - (xLeftOffset+plotWidth, yZero)), - fill=self.ADDITIVE_COLOR_POSITIVE, width=1*zoom) + xy=((xLeftOffset + plotWidth, additiveY), + (xLeftOffset + plotWidth, yZero)), + fill=self.ADDITIVE_COLOR_POSITIVE, width=1 * zoom) im_drawer.line( - xy=((xLeftOffset, yZero), (xLeftOffset, yTopOffset + 30*(zoom - 1))), - fill=self.LRS_COLOR, width=1*zoom) #the blue line running up the y axis - - - def drawGraphBackground(self, canvas, gifmap, offset= (80, 120, 80, 50), zoom = 1, startMb = None, endMb = None): - ##conditions - ##multiple Chromosome view - ##single Chromosome Physical - ##single Chromosome Genetic + xy=((xLeftOffset, yZero), (xLeftOffset, yTopOffset + 30 * (zoom - 1))), + fill=self.LRS_COLOR, width=1 * zoom) # the blue line running up the y axis + + def drawGraphBackground(self, canvas, gifmap, offset=(80, 120, 80, 50), zoom=1, startMb=None, endMb=None): + # conditions + # multiple Chromosome view + # single Chromosome Physical + # single Chromosome Genetic im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset plotHeight = canvas.size[1] - yTopOffset - yBottomOffset - yBottom = yTopOffset+plotHeight + yBottom = yTopOffset + plotHeight fontZoom = zoom if zoom == 2: fontZoom = 1.5 yTopOffset += 30 - #calculate plot scale + # calculate plot scale if self.plotScale != 'physic': self.ChrLengthDistList = self.ChrLengthCMList drawRegionDistance = self.ChrLengthCMSum @@ -2677,10 +2841,10 @@ class DisplayMappingResults: self.ChrLengthDistList = self.ChrLengthMbList drawRegionDistance = self.ChrLengthMbSum - if self.selectedChr > -1: #single chromosome view - spacingAmt = plotWidth/13.5 + if self.selectedChr > -1: # single chromosome view + spacingAmt = plotWidth / 13.5 i = 0 - for startPix in Plot.frange(xLeftOffset, xLeftOffset+plotWidth, spacingAmt): + for startPix in Plot.frange(xLeftOffset, xLeftOffset + plotWidth, spacingAmt): if (i % 2 == 0): theBackColor = self.GRAPH_BACK_DARK_COLOR else: @@ -2688,25 +2852,27 @@ class DisplayMappingResults: i += 1 im_drawer.rectangle( [(startPix, yTopOffset), - (min(startPix+spacingAmt, xLeftOffset+plotWidth), yBottom)], + (min(startPix + spacingAmt, xLeftOffset + plotWidth), yBottom)], outline=theBackColor, fill=theBackColor) drawRegionDistance = self.ChrLengthDistList[self.ChrList[self.selectedChr][1]] self.ChrLengthDistList = [drawRegionDistance] if self.plotScale == 'physic': - plotXScale = plotWidth / (endMb-startMb) + plotXScale = plotWidth / (endMb - startMb) else: plotXScale = plotWidth / drawRegionDistance - else: #multiple chromosome view - plotXScale = plotWidth / ((len(self.genotype)-1)*self.GraphInterval + drawRegionDistance) + else: # multiple chromosome view + plotXScale = plotWidth / \ + ((len(self.genotype) - 1) * self.GraphInterval + drawRegionDistance) startPosX = xLeftOffset if fontZoom == 1.5: chrFontZoom = 2 else: chrFontZoom = 1 - chrLabelFont=ImageFont.truetype(font=VERDANA_FILE, size=24*chrFontZoom) + chrLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=24 * chrFontZoom) for i, _chr in enumerate(self.genotype): if (i % 2 == 0): @@ -2714,23 +2880,27 @@ class DisplayMappingResults: else: theBackColor = self.GRAPH_BACK_LIGHT_COLOR - #draw the shaded boxes and the sig/sug thick lines + # draw the shaded boxes and the sig/sug thick lines im_drawer.rectangle( ((startPosX, yTopOffset), - (startPosX + self.ChrLengthDistList[i]*plotXScale, yBottom)), + (startPosX + self.ChrLengthDistList[i] * plotXScale, yBottom)), outline=GAINSBORO, fill=theBackColor) - chrNameWidth, chrNameHeight = im_drawer.textsize(_chr.name, font=chrLabelFont) - chrStartPix = startPosX + (self.ChrLengthDistList[i]*plotXScale -chrNameWidth)/2 - chrEndPix = startPosX + (self.ChrLengthDistList[i]*plotXScale +chrNameWidth)/2 + chrNameWidth, chrNameHeight = im_drawer.textsize( + _chr.name, font=chrLabelFont) + chrStartPix = startPosX + \ + (self.ChrLengthDistList[i] * plotXScale - chrNameWidth) / 2 + chrEndPix = startPosX + \ + (self.ChrLengthDistList[i] * plotXScale + chrNameWidth) / 2 TEXT_Y_DISPLACEMENT = 0 im_drawer.text(xy=(chrStartPix, yTopOffset + TEXT_Y_DISPLACEMENT), text=_chr.name, font=chrLabelFont, fill=BLACK) - COORDS = "%d,%d,%d,%d" %(chrStartPix, yTopOffset, chrEndPix, yTopOffset +20) + COORDS = "%d,%d,%d,%d" % ( + chrStartPix, yTopOffset, chrEndPix, yTopOffset + 20) - #add by NL 09-03-2010 + # add by NL 09-03-2010 HREF = "javascript:chrView(%d,%s);" % (i, self.ChrLengthMbList) #HREF = "javascript:changeView(%d,%s);" % (i,self.ChrLengthMbList) Areas = HtmlGenWrapper.create_area_tag( @@ -2738,7 +2908,8 @@ class DisplayMappingResults: coords=COORDS, href=HREF) gifmap.append(Areas) - startPosX += (self.ChrLengthDistList[i]+self.GraphInterval)*plotXScale + startPosX += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale return plotXScale @@ -2748,15 +2919,16 @@ class DisplayMappingResults: ######################################### myCanvas = Image.new("RGBA", size=(500, 300)) if 'lod_score' in self.qtlresults[0] and self.LRS_LOD == "LRS": - perm_output = [value*4.61 for value in self.perm_output] + perm_output = [value * 4.61 for value in self.perm_output] elif 'lod_score' not in self.qtlresults[0] and self.LRS_LOD == "LOD": - perm_output = [value/4.61 for value in self.perm_output] + perm_output = [value / 4.61 for value in self.perm_output] else: perm_output = self.perm_output - filename= webqtlUtil.genRandStr("Reg_") - Plot.plotBar(myCanvas, perm_output, XLabel=self.LRS_LOD, YLabel='Frequency', title=' Histogram of Permutation Test') - myCanvas.save("{}.gif".format(GENERATED_IMAGE_DIR+filename), + filename = webqtlUtil.genRandStr("Reg_") + Plot.plotBar(myCanvas, perm_output, XLabel=self.LRS_LOD, + YLabel='Frequency', title=' Histogram of Permutation Test') + myCanvas.save("{}.gif".format(GENERATED_IMAGE_DIR + filename), format='gif') return filename @@ -2775,16 +2947,16 @@ class DisplayMappingResults: if self.dataset.group.species == "mouse": if refGene: gene_table_header_list = ["Index", - "Symbol", - "Mb Start", - "Length (Kb)", - "SNP Count", - "SNP Density", - "Avg Expr", - "Human Chr", - "Mb Start (hg19)", - "Literature Correlation", - "Gene Description"] + "Symbol", + "Mb Start", + "Length (Kb)", + "SNP Count", + "SNP Density", + "Avg Expr", + "Human Chr", + "Mb Start (hg19)", + "Literature Correlation", + "Gene Description"] else: gene_table_header_list = ["", "Index", @@ -2821,20 +2993,21 @@ class DisplayMappingResults: tableIterationsCnt = tableIterationsCnt + 1 - this_row = [] #container for the cells of each row + this_row = [] # container for the cells of each row selectCheck = HtmlGenWrapper.create_input_tag( type_="checkbox", name="selectCheck", value=theGO["GeneSymbol"], Class="checkbox trait_checkbox") # checkbox for each row - geneLength = (theGO["TxEnd"] - theGO["TxStart"])*1000.0 - tenPercentLength = geneLength*0.0001 + geneLength = (theGO["TxEnd"] - theGO["TxStart"]) * 1000.0 + tenPercentLength = geneLength * 0.0001 txStart = theGO["TxStart"] txEnd = theGO["TxEnd"] - theGO["snpDensity"] = theGO["snpCount"]/geneLength + theGO["snpDensity"] = theGO["snpCount"] / geneLength if self.ALEX_DEBUG_BOOL_PRINT_GENE_LIST: - geneIdString = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s' % theGO["GeneID"] + geneIdString = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s' % theGO[ + "GeneID"] if theGO["snpCount"]: snpString = HT.Link( @@ -2844,16 +3017,18 @@ class DisplayMappingResults: f"end={theGO['TxEnd']}&" f"geneName={theGO['GeneSymbol']}&" f"s1={self.diffCol[0]}&s2=%d"), - str(theGO["snpCount"]) # The text to display + str(theGO["snpCount"]) # The text to display ) snpString.set_blank_target() snpString.set_attribute("class", "normalsize") else: snpString = 0 - mouseStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Mouse&db=mm9&position=chr" + theGO["Chromosome"] + "%3A" + str(int(theGO["TxStart"] * 1000000.0)) + "-" + str(int(theGO["TxEnd"]*1000000.0)) +"&pix=620&Submit=submit" + mouseStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Mouse&db=mm9&position=chr" + \ + theGO["Chromosome"] + "%3A" + str(int(theGO["TxStart"] * 1000000.0)) + "-" + str( + int(theGO["TxEnd"] * 1000000.0)) + "&pix=620&Submit=submit" - #the chromosomes for human 1 are 1qXX.XX + # the chromosomes for human 1 are 1qXX.XX if theGO['humanGene']: if theGO['humanGene']["TxStart"] == '': humanStartDisplay = "" @@ -2863,20 +3038,21 @@ class DisplayMappingResults: humanChr = theGO['humanGene']["Chromosome"] humanTxStart = theGO['humanGene']["TxStart"] - humanStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Human&db=hg17&position=chr%s:%d-%d" % (humanChr, int(1000000*theGO['humanGene']["TxStart"]), int(1000000*theGO['humanGene']["TxEnd"])) + humanStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Human&db=hg17&position=chr%s:%d-%d" % ( + humanChr, int(1000000 * theGO['humanGene']["TxStart"]), int(1000000 * theGO['humanGene']["TxEnd"])) else: humanStartString = humanChr = humanStartDisplay = "--" geneDescription = theGO["GeneDescription"] if len(geneDescription) > 70: - geneDescription = geneDescription[:70]+"..." + geneDescription = geneDescription[:70] + "..." if theGO["snpDensity"] < 0.000001: snpDensityStr = "0" else: snpDensityStr = "%0.6f" % theGO["snpDensity"] - avgExpr = [] #theGO["avgExprVal"] + avgExpr = [] # theGO["avgExprVal"] if avgExpr in ([], None): avgExpr = "--" else: @@ -2888,7 +3064,8 @@ class DisplayMappingResults: else: chr_as_int = int(theGO["Chromosome"]) - 1 if refGene: - literatureCorrelationString = str(self.getLiteratureCorrelation(self.cursor, refGene, theGO['GeneID']) or "N/A") + literatureCorrelationString = str(self.getLiteratureCorrelation( + self.cursor, refGene, theGO['GeneID']) or "N/A") this_row = [selectCheck.__str__(), str(tableIterationsCnt), @@ -2896,17 +3073,17 @@ class DisplayMappingResults: geneIdString, theGO["GeneSymbol"], target="_blank") - ), + ), str(HtmlGenWrapper.create_link_tag( mouseStartString, "{:.6f}".format(txStart), target="_blank") - ), + ), str(HtmlGenWrapper.create_link_tag( "javascript:rangeView('{}', {:f}, {:f})".format( str(chr_as_int), - txStart-tenPercentLength, - txEnd+tenPercentLength), + txStart - tenPercentLength, + txEnd + tenPercentLength), "{:.3f}".format(geneLength))), snpString, snpDensityStr, @@ -2931,8 +3108,8 @@ class DisplayMappingResults: str(HtmlGenWrapper.create_link_tag( "javascript:rangeView('{}', {:f}, {:f})".format( str(chr_as_int), - txStart-tenPercentLength, - txEnd+tenPercentLength), + txStart - tenPercentLength, + txEnd + tenPercentLength), "{:.3f}".format(geneLength))), snpString, snpDensityStr, @@ -2956,7 +3133,8 @@ class DisplayMappingResults: if theGO["GeneID"] != "": geneSymbolNCBI = str(HtmlGenWrapper.create_link_tag( - "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids={}".format(theGO["GeneID"]), + "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids={}".format( + theGO["GeneID"]), theGO["GeneSymbol"], Class="normalsize", target="_blank")) @@ -2969,7 +3147,8 @@ class DisplayMappingResults: chr_as_int = int(theGO["Chromosome"]) - 1 geneLength = (float(theGO["TxEnd"]) - float(theGO["TxStart"])) - geneLengthURL = "javascript:rangeView('%s', %f, %f)" % (theGO["Chromosome"], float(theGO["TxStart"])-(geneLength*0.1), float(theGO["TxEnd"])+(geneLength*0.1)) + geneLengthURL = "javascript:rangeView('%s', %f, %f)" % (theGO["Chromosome"], float( + theGO["TxStart"]) - (geneLength * 0.1), float(theGO["TxEnd"]) + (geneLength * 0.1)) avgExprVal = [] if avgExprVal != "" and avgExprVal: @@ -2977,14 +3156,14 @@ class DisplayMappingResults: else: avgExprVal = "" - #Mouse Gene + # Mouse Gene if theGO['mouseGene']: mouseChr = theGO['mouseGene']["Chromosome"] mouseTxStart = "%0.6f" % theGO['mouseGene']["TxStart"] else: mouseChr = mouseTxStart = "" - #the chromosomes for human 1 are 1qXX.XX + # the chromosomes for human 1 are 1qXX.XX if theGO['humanGene']: humanChr = theGO['humanGene']["Chromosome"] humanTxStart = "%0.6f" % theGO['humanGene']["TxStart"] @@ -2996,12 +3175,12 @@ class DisplayMappingResults: geneDesc = "" this_row = [selectCheck.__str__(), - str(gIndex+1), + str(gIndex + 1), geneSymbolNCBI, "%0.6f" % theGO["TxStart"], str(HtmlGenWrapper.create_link_tag( geneLengthURL, - "{:.3f}".format(geneLength*1000.0))), + "{:.3f}".format(geneLength * 1000.0))), avgExprVal, mouseChr, mouseTxStart, @@ -3013,7 +3192,7 @@ class DisplayMappingResults: return gene_table_body - def getLiteratureCorrelation(cursor,geneId1=None,geneId2=None): + def getLiteratureCorrelation(cursor, geneId1=None, geneId2=None): if not geneId1 or not geneId2: return None if geneId1 == geneId2: @@ -3025,9 +3204,10 @@ class DisplayMappingResults: query = 'SELECT Value FROM LCorrRamin3 WHERE GeneId1 = %s and GeneId2 = %s' for x, y in [(geneId1, geneId2), (geneId2, geneId1)]: cursor.execute(query, (x, y)) - lCorr = cursor.fetchone() + lCorr = cursor.fetchone() if lCorr: lCorr = lCorr[0] break - except: raise #lCorr = None + except: + raise # lCorr = None return lCorr diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py index 06c9300a..f88c5ac8 100644 --- a/wqflask/wqflask/marker_regression/gemma_mapping.py +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -149,7 +149,8 @@ def gen_covariates_file(this_dataset, covariates, samples): dataset_name = covariate.split(":")[1] if dataset_name == "Temp": temp_group = trait_name.split("_")[2] - dataset_ob = create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = temp_group) + dataset_ob = create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=temp_group) else: dataset_ob = create_dataset(covariate.split(":")[1]) trait_ob = create_trait(dataset=dataset_ob, diff --git a/wqflask/wqflask/marker_regression/plink_mapping.py b/wqflask/wqflask/marker_regression/plink_mapping.py index 5d675c38..2fa80841 100644 --- a/wqflask/wqflask/marker_regression/plink_mapping.py +++ b/wqflask/wqflask/marker_regression/plink_mapping.py @@ -6,13 +6,14 @@ from utility import webqtlUtil from utility.tools import flat_files, PLINK_COMMAND import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + def run_plink(this_trait, dataset, species, vals, maf): - plink_output_filename = webqtlUtil.genRandStr(f"{dataset.group.name}_{this_trait.name}_") + plink_output_filename = webqtlUtil.genRandStr( + f"{dataset.group.name}_{this_trait.name}_") gen_pheno_txt_file(dataset, vals) - plink_command = f"{PLINK_COMMAND} --noweb --bfile {flat_files('mapping')}/{dataset.group.name} --no-pheno --no-fid --no-parents --no-sex --maf {maf} --out { TMPDIR}{plink_output_filename} --assoc " logger.debug("plink_command:", plink_command) @@ -25,6 +26,7 @@ def run_plink(this_trait, dataset, species, vals, maf): return dataset.group.markers.markers + def gen_pheno_txt_file(this_dataset, vals): """Generates phenotype file for GEMMA/PLINK""" @@ -34,15 +36,17 @@ def gen_pheno_txt_file(this_dataset, vals): split_line = line.split() current_file_data.append(split_line) - with open(f"{flat_files('mapping')}/{this_dataset.group.name}.fam","w") as outfile: + with open(f"{flat_files('mapping')}/{this_dataset.group.name}.fam", "w") as outfile: for i, line in enumerate(current_file_data): if vals[i] == "x": this_val = -9 else: this_val = vals[i] - outfile.write("0 " + line[1] + " " + line[2] + " " + line[3] + " " + line[4] + " " + str(this_val) + "\n") + outfile.write("0 " + line[1] + " " + line[2] + " " + \ + line[3] + " " + line[4] + " " + str(this_val) + "\n") + -def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''): +def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename=''): ped_sample_list = get_samples_from_ped_file(dataset) output_file = open(f"{TMPDIR}{pheno_filename}.txt", "wb") header = f"FID\tIID\t{this_trait.name}\n" @@ -50,7 +54,7 @@ def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''): new_value_list = [] - #if valueDict does not include some strain, value will be set to -9999 as missing value + # if valueDict does not include some strain, value will be set to -9999 as missing value for i, sample in enumerate(ped_sample_list): try: value = vals[i] @@ -63,11 +67,11 @@ def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''): new_line = '' for i, sample in enumerate(ped_sample_list): - j = i+1 + j = i + 1 value = new_value_list[i] new_line += f"{sample}\t{sample}\t{value}\n" - if j%1000 == 0: + if j % 1000 == 0: output_file.write(newLine) new_line = '' @@ -77,10 +81,12 @@ def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''): output_file.close() # get strain name from ped file in order + + def get_samples_from_ped_file(dataset): - ped_file= open(f"{flat_files('mapping')}{dataset.group.name}.ped","r") + ped_file = open(f"{flat_files('mapping')}{dataset.group.name}.ped", "r") line = ped_file.readline() - sample_list=[] + sample_list = [] while line: lineList = line.strip().split('\t') @@ -93,25 +99,26 @@ def get_samples_from_ped_file(dataset): return sample_list + def parse_plink_output(output_filename, species): - plink_results={} + plink_results = {} threshold_p_value = 1 - result_fp = open(f"{TMPDIR}{output_filename}.qassoc","rb") + result_fp = open(f"{TMPDIR}{output_filename}.qassoc", "rb") line = result_fp.readline() - value_list = [] # initialize value list, this list will include snp, bp and pvalue info + value_list = [] # initialize value list, this list will include snp, bp and pvalue info p_value_dict = {} count = 0 while line: - #convert line from str to list + # convert line from str to list line_list = build_line_list(line=line) # only keep the records whose chromosome name is in db - if int(line_list[0]) in species.chromosomes.chromosomes and line_list[-1] and line_list[-1].strip()!='NA': + if int(line_list[0]) in species.chromosomes.chromosomes and line_list[-1] and line_list[-1].strip() != 'NA': chr_name = species.chromosomes.chromosomes[int(line_list[0])] snp = line_list[1] @@ -125,7 +132,7 @@ def parse_plink_output(output_filename, species): value_list = plink_results[chr_name] # pvalue range is [0,1] - if threshold_p_value >=0 and threshold_p_value <= 1: + if threshold_p_value >= 0 and threshold_p_value <= 1: if p_value < threshold_p_value: value_list.append((snp, BP, p_value)) count += 1 @@ -141,7 +148,7 @@ def parse_plink_output(output_filename, species): if value_list: plink_results[chr_name] = value_list - value_list=[] + value_list = [] line = result_fp.readline() else: @@ -154,9 +161,12 @@ def parse_plink_output(output_filename, species): # function: convert line from str to list; # output: lineList list ####################################################### + + def build_line_list(line=""): - line_list = line.strip().split(' ')# irregular number of whitespaces between columns - line_list = [item for item in line_list if item !=''] + # irregular number of whitespaces between columns + line_list = line.strip().split(' ') + line_list = [item for item in line_list if item != ''] line_list = [item.strip() for item in line_list] return line_list diff --git a/wqflask/wqflask/marker_regression/qtlreaper_mapping.py b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py index 8341ee55..4d6715ba 100644 --- a/wqflask/wqflask/marker_regression/qtlreaper_mapping.py +++ b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py @@ -1,4 +1,9 @@ -import os, math, string, random, json, re +import os +import math +import string +import random +import json +import re from base import webqtlConfig from base.trait import GeneralTrait @@ -6,7 +11,8 @@ from base.data_set import create_dataset from utility.tools import flat_files, REAPER_COMMAND, TEMPDIR import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boot_check, num_bootstrap, do_control, control_marker, manhattan_plot, first_run=True, output_files=None): """Generates p-values for each marker using qtlreaper""" @@ -17,66 +23,73 @@ def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boo else: genofile_name = this_dataset.group.name - trait_filename =f"{str(this_trait.name)}_{str(this_dataset.name)}_pheno" + trait_filename = f"{str(this_trait.name)}_{str(this_dataset.name)}_pheno" gen_pheno_txt_file(samples, vals, trait_filename) - output_filename = (f"{this_dataset.group.name}_GWA_"+ - ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - ) + output_filename = (f"{this_dataset.group.name}_GWA_" + + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + ) bootstrap_filename = None permu_filename = None opt_list = [] if boot_check and num_bootstrap > 0: - bootstrap_filename = (f"{this_dataset.group.name}_BOOTSTRAP_" + - ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - ) + bootstrap_filename = (f"{this_dataset.group.name}_BOOTSTRAP_" + + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + ) opt_list.append("-b") opt_list.append(f"--n_bootstrap {str(num_bootstrap)}") - opt_list.append(f"--bootstrap_output {webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt") + opt_list.append( + f"--bootstrap_output {webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt") if num_perm > 0: - permu_filename =("{this_dataset.group.name}_PERM_" + - ''.join(random.choice(string.ascii_uppercase + - string.digits) for _ in range(6)) - ) + permu_filename = ("{this_dataset.group.name}_PERM_" + + ''.join(random.choice(string.ascii_uppercase + + string.digits) for _ in range(6)) + ) opt_list.append("-n " + str(num_perm)) - opt_list.append("--permu_output " + webqtlConfig.GENERATED_IMAGE_DIR + permu_filename + ".txt") + opt_list.append( + "--permu_output " + webqtlConfig.GENERATED_IMAGE_DIR + permu_filename + ".txt") if control_marker != "" and do_control == "true": opt_list.append("-c " + control_marker) if manhattan_plot != True: opt_list.append("--interval 1") - reaper_command = (REAPER_COMMAND + - ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt {4} -o {5}{6}.txt'.format(flat_files('genotype'), + reaper_command = (REAPER_COMMAND + + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt {4} -o {5}{6}.txt'.format(flat_files('genotype'), - genofile_name, - TEMPDIR, - trait_filename, - " ".join(opt_list), - webqtlConfig.GENERATED_IMAGE_DIR, - output_filename)) + genofile_name, + TEMPDIR, + trait_filename, + " ".join( + opt_list), + webqtlConfig.GENERATED_IMAGE_DIR, + output_filename)) logger.debug("reaper_command:" + reaper_command) os.system(reaper_command) else: output_filename, permu_filename, bootstrap_filename = output_files - marker_obs, permu_vals, bootstrap_vals = parse_reaper_output(output_filename, permu_filename, bootstrap_filename) + marker_obs, permu_vals, bootstrap_vals = parse_reaper_output( + output_filename, permu_filename, bootstrap_filename) suggestive = 0 significant = 0 if len(permu_vals) > 0: - suggestive = permu_vals[int(num_perm*0.37-1)] - significant = permu_vals[int(num_perm*0.95-1)] + suggestive = permu_vals[int(num_perm * 0.37 - 1)] + significant = permu_vals[int(num_perm * 0.95 - 1)] + + return (marker_obs, permu_vals, suggestive, significant, bootstrap_vals, + [output_filename, permu_filename, bootstrap_filename]) - return (marker_obs, permu_vals, suggestive, significant, bootstrap_vals, - [output_filename, permu_filename, bootstrap_filename]) def gen_pheno_txt_file(samples, vals, trait_filename): """Generates phenotype file for GEMMA""" - with open(f"{TEMPDIR}/gn2/{trait_filename}.txt","w") as outfile: + with open(f"{TEMPDIR}/gn2/{trait_filename}.txt", "w") as outfile: outfile.write("Trait\t") filtered_sample_list = [] @@ -92,6 +105,7 @@ def gen_pheno_txt_file(samples, vals, trait_filename): values_string = "\t".join(filtered_vals_list) outfile.write(values_string) + def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): included_markers = [] p_values = [] @@ -121,7 +135,7 @@ def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): marker['cM'] = float(line.split("\t")[3]) else: if float(line.split("\t")[3]) > 1000: - marker['Mb'] = float(line.split("\t")[3])/1000000 + marker['Mb'] = float(line.split("\t")[3]) / 1000000 else: marker['Mb'] = float(line.split("\t")[3]) if float(line.split("\t")[6]) != 1: @@ -132,7 +146,7 @@ def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): else: marker['cM'] = float(line.split("\t")[3]) if float(line.split("\t")[4]) > 1000: - marker['Mb'] = float(line.split("\t")[4])/1000000 + marker['Mb'] = float(line.split("\t")[4]) / 1000000 else: marker['Mb'] = float(line.split("\t")[4]) if float(line.split("\t")[7]) != 1: @@ -142,7 +156,7 @@ def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): marker['additive'] = float(line.split("\t")[6]) marker_obs.append(marker) - #ZS: Results have to be reordered because the new reaper returns results sorted alphabetically by chr for some reason, resulting in chr 1 being followed by 10, etc + # ZS: Results have to be reordered because the new reaper returns results sorted alphabetically by chr for some reason, resulting in chr 1 being followed by 10, etc sorted_indices = natural_sort(marker_obs) permu_vals = [] @@ -163,6 +177,7 @@ def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): return marker_obs, permu_vals, bootstrap_vals + def run_original_reaper(this_trait, dataset, samples_before, trait_vals, json_data, num_perm, bootCheck, num_bootstrap, do_control, control_marker, manhattan_plot): genotype = dataset.group.read_genotype_file(use_reaper=True) @@ -185,18 +200,19 @@ def run_original_reaper(this_trait, dataset, samples_before, trait_vals, json_da suggestive = 0 significant = 0 else: - perm_output = genotype.permutation(strains = trimmed_samples, trait = trimmed_values, nperm=num_perm) - suggestive = perm_output[int(num_perm*0.37-1)] - significant = perm_output[int(num_perm*0.95-1)] - #highly_significant = perm_output[int(num_perm*0.99-1)] #ZS: Currently not used, but leaving it here just in case + perm_output = genotype.permutation( + strains=trimmed_samples, trait=trimmed_values, nperm=num_perm) + suggestive = perm_output[int(num_perm * 0.37 - 1)] + significant = perm_output[int(num_perm * 0.95 - 1)] + # highly_significant = perm_output[int(num_perm*0.99-1)] #ZS: Currently not used, but leaving it here just in case json_data['suggestive'] = suggestive json_data['significant'] = significant if control_marker != "" and do_control == "true": - reaper_results = genotype.regression(strains = trimmed_samples, - trait = trimmed_values, - control = str(control_marker)) + reaper_results = genotype.regression(strains=trimmed_samples, + trait=trimmed_values, + control=str(control_marker)) if bootCheck: control_geno = [] control_geno2 = [] @@ -215,31 +231,31 @@ def run_original_reaper(this_trait, dataset, samples_before, trait_vals, json_da _idx = _prgy.index(_strain) control_geno.append(control_geno2[_idx]) - bootstrap_results = genotype.bootstrap(strains = trimmed_samples, - trait = trimmed_values, - control = control_geno, - nboot = num_bootstrap) + bootstrap_results = genotype.bootstrap(strains=trimmed_samples, + trait=trimmed_values, + control=control_geno, + nboot=num_bootstrap) else: - reaper_results = genotype.regression(strains = trimmed_samples, - trait = trimmed_values) + reaper_results = genotype.regression(strains=trimmed_samples, + trait=trimmed_values) if bootCheck: - bootstrap_results = genotype.bootstrap(strains = trimmed_samples, - trait = trimmed_values, - nboot = num_bootstrap) + bootstrap_results = genotype.bootstrap(strains=trimmed_samples, + trait=trimmed_values, + nboot=num_bootstrap) json_data['chr'] = [] json_data['pos'] = [] json_data['lod.hk'] = [] json_data['markernames'] = [] - #if self.additive: + # if self.additive: # self.json_data['additive'] = [] - #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary + # Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary qtl_results = [] for qtl in reaper_results: reaper_locus = qtl.locus - #ZS: Convert chr to int + # ZS: Convert chr to int converted_chr = reaper_locus.chr if reaper_locus.chr != "X" and reaper_locus.chr != "X/Y": converted_chr = int(reaper_locus.chr) @@ -247,19 +263,22 @@ def run_original_reaper(this_trait, dataset, samples_before, trait_vals, json_da json_data['pos'].append(reaper_locus.Mb) json_data['lod.hk'].append(qtl.lrs) json_data['markernames'].append(reaper_locus.name) - #if self.additive: + # if self.additive: # self.json_data['additive'].append(qtl.additive) - locus = {"name":reaper_locus.name, "chr":reaper_locus.chr, "cM":reaper_locus.cM, "Mb":reaper_locus.Mb} - qtl = {"lrs_value": qtl.lrs, "chr":converted_chr, "Mb":reaper_locus.Mb, - "cM":reaper_locus.cM, "name":reaper_locus.name, "additive":qtl.additive, "dominance":qtl.dominance} + locus = {"name": reaper_locus.name, "chr": reaper_locus.chr, + "cM": reaper_locus.cM, "Mb": reaper_locus.Mb} + qtl = {"lrs_value": qtl.lrs, "chr": converted_chr, "Mb": reaper_locus.Mb, + "cM": reaper_locus.cM, "name": reaper_locus.name, "additive": qtl.additive, "dominance": qtl.dominance} qtl_results.append(qtl) return qtl_results, json_data, perm_output, suggestive, significant, bootstrap_results + def natural_sort(marker_list): """ Function to naturally sort numbers + strings, adopted from user Mark Byers here: https://stackoverflow.com/questions/4836710/does-python-have-a-built-in-function-for-string-natural-sort Changed to return indices instead of values, though, since the same reordering needs to be applied to bootstrap results """ convert = lambda text: int(text) if text.isdigit() else text.lower() - alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', str(marker_list[key]['chr'])) ] - return sorted(list(range(len(marker_list))), key = alphanum_key) \ No newline at end of file + alphanum_key = lambda key: [convert(c) for c in re.split( + '([0-9]+)', str(marker_list[key]['chr']))] + return sorted(list(range(len(marker_list))), key=alphanum_key) diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index 4117a0e5..1fa3dffe 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -13,51 +13,54 @@ from utility.tools import locate, TEMPDIR from flask import g import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) # Get a trait's type (numeric, categorical, etc) from the DB + + def get_trait_data_type(trait_db_string): - logger.info("get_trait_data_type"); + logger.info("get_trait_data_type") the_query = "SELECT value FROM TraitMetadata WHERE type='trait_data_type'" - logger.info("the_query done"); + logger.info("the_query done") results_json = g.db.execute(the_query).fetchone() - logger.info("the_query executed"); + logger.info("the_query executed") results_ob = json.loads(results_json[0]) - logger.info("json results loaded"); + logger.info("json results loaded") if trait_db_string in results_ob: - logger.info("found"); + logger.info("found") return results_ob[trait_db_string] else: - logger.info("not found"); + logger.info("not found") return "numeric" # Run qtl mapping using R/qtl def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors): - logger.info("Start run_rqtl_geno"); - ## Get pointers to some common R functions - r_library = ro.r["library"] # Map the library function - r_c = ro.r["c"] # Map the c function - plot = ro.r["plot"] # Map the plot function - png = ro.r["png"] # Map the png function - dev_off = ro.r["dev.off"] # Map the device off function + logger.info("Start run_rqtl_geno") + # Get pointers to some common R functions + r_library = ro.r["library"] # Map the library function + r_c = ro.r["c"] # Map the c function + plot = ro.r["plot"] # Map the plot function + png = ro.r["png"] # Map the png function + dev_off = ro.r["dev.off"] # Map the device off function print((r_library("qtl"))) # Load R/qtl - logger.info("QTL library loaded"); + logger.info("QTL library loaded") - ## Get pointers to some R/qtl functions - scanone = ro.r["scanone"] # Map the scanone function - scantwo = ro.r["scantwo"] # Map the scantwo function - calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function + # Get pointers to some R/qtl functions + scanone = ro.r["scanone"] # Map the scanone function + scantwo = ro.r["scantwo"] # Map the scantwo function + # Map the calc.genoprob function + calc_genoprob = ro.r["calc.genoprob"] crossname = dataset.group.name - #try: + # try: # generate_cross_from_rdata(dataset) # read_cross_from_rdata = ro.r["generate_cross_from_rdata"] # Map the local read_cross_from_rdata function # genofilelocation = locate(crossname + ".RData", "genotype/rdata") # cross_object = read_cross_from_rdata(genofilelocation) # Map the local GENOtoCSVR function - #except: + # except: if mapping_scale == "morgan": scale_units = "cM" @@ -65,76 +68,101 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec scale_units = "Mb" generate_cross_from_geno(dataset, scale_units) - GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function + # Map the local GENOtoCSVR function + GENOtoCSVR = ro.r["GENOtoCSVR"] crossfilelocation = TMPDIR + crossname + ".cross" if dataset.group.genofile: - genofilelocation = locate(dataset.group.genofile, "genotype") + genofilelocation = locate(dataset.group.genofile, "genotype") else: genofilelocation = locate(dataset.group.name + ".geno", "genotype") - logger.info("Going to create a cross from geno"); - cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available - logger.info("before calc_genoprob"); + logger.info("Going to create a cross from geno") + # TODO: Add the SEX if that is available + cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) + logger.info("before calc_genoprob") if manhattan_plot: cross_object = calc_genoprob(cross_object) else: cross_object = calc_genoprob(cross_object, step=5, stepwidth="max") - logger.info("after calc_genoprob"); + logger.info("after calc_genoprob") pheno_string = sanitize_rqtl_phenotype(vals) - logger.info("phenostring done"); + logger.info("phenostring done") names_string = sanitize_rqtl_names(samples) - logger.info("sanitized pheno and names"); - cross_object = add_phenotype(cross_object, pheno_string, "the_pheno") # Add the phenotype - cross_object = add_names(cross_object, names_string, "the_names") # Add the phenotype - logger.info("Added pheno and names"); - marker_covars = create_marker_covariates(control_marker, cross_object) # Create the additive covariate markers - logger.info("Marker covars done"); + logger.info("sanitized pheno and names") + # Add the phenotype + cross_object = add_phenotype(cross_object, pheno_string, "the_pheno") + # Add the phenotype + cross_object = add_names(cross_object, names_string, "the_names") + logger.info("Added pheno and names") + # Create the additive covariate markers + marker_covars = create_marker_covariates(control_marker, cross_object) + logger.info("Marker covars done") if cofactors != "": - logger.info("Cofactors: " + cofactors); - cross_object, trait_covars = add_cofactors(cross_object, dataset, cofactors, samples) # Create the covariates from selected traits + logger.info("Cofactors: " + cofactors) + # Create the covariates from selected traits + cross_object, trait_covars = add_cofactors( + cross_object, dataset, cofactors, samples) ro.r('all_covars <- cbind(marker_covars, trait_covars)') else: ro.r('all_covars <- marker_covars') covars = ro.r['all_covars'] - #DEBUG to save the session object to file + # DEBUG to save the session object to file if pair_scan: if do_control == "true": - logger.info("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method, n_cluster = 16) + logger.info("Using covariate") + result_data_frame = scantwo( + cross_object, pheno="the_pheno", addcovar=covars, model=model, method=method, n_cluster=16) else: - logger.info("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16) + logger.info("No covariates") + result_data_frame = scantwo( + cross_object, pheno="the_pheno", model=model, method=method, n_cluster=16) pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png" - png(file=TEMPDIR+pair_scan_filename) + png(file=TEMPDIR + pair_scan_filename) plot(result_data_frame) dev_off() return process_pair_scan_results(result_data_frame) else: if do_control == "true" or cofactors != "": - logger.info("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method) + logger.info("Using covariate") + result_data_frame = scanone( + cross_object, pheno="the_pheno", addcovar=covars, model=model, method=method) ro.r('save.image(file = "/home/zas1024/gn2-zach/itp_cofactor_test.RData")') else: - logger.info("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method) - - if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user) - if len(perm_strata_list) > 0: #ZS: The strata list would only be populated if "Stratified" was checked on before mapping - cross_object, strata_ob = add_perm_strata(cross_object, perm_strata_list) + logger.info("No covariates") + result_data_frame = scanone( + cross_object, pheno="the_pheno", model=model, method=method) + + # Do permutation (if requested by user) + if num_perm > 0 and permCheck == "ON": + # ZS: The strata list would only be populated if "Stratified" was checked on before mapping + if len(perm_strata_list) > 0: + cross_object, strata_ob = add_perm_strata( + cross_object, perm_strata_list) if do_control == "true" or cofactors != "": - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), perm_strata = strata_ob, model=model, method=method) + perm_data_frame = scanone(cross_object, pheno_col="the_pheno", addcovar=covars, n_perm=int( + num_perm), perm_strata=strata_ob, model=model, method=method) else: - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, perm_strata = strata_ob, model=model, method=method) + perm_data_frame = scanone( + cross_object, pheno_col="the_pheno", n_perm=num_perm, perm_strata=strata_ob, model=model, method=method) else: if do_control == "true" or cofactors != "": - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), model=model, method=method) + perm_data_frame = scanone(cross_object, pheno_col="the_pheno", addcovar=covars, n_perm=int( + num_perm), model=model, method=method) else: - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method) + perm_data_frame = scanone( + cross_object, pheno_col="the_pheno", n_perm=num_perm, model=model, method=method) - perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface + # Functions that sets the thresholds for the webinterface + perm_output, suggestive, significant = process_rqtl_perm_results( + num_perm, perm_data_frame) return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species) else: return process_rqtl_results(result_data_frame, dataset.group.species) + def generate_cross_from_rdata(dataset): - rdata_location = locate(dataset.group.name + ".RData", "genotype/rdata") + rdata_location = locate(dataset.group.name + ".RData", "genotype/rdata") ro.r(""" generate_cross_from_rdata <- function(filename = '%s') { load(file=filename) @@ -143,7 +171,9 @@ def generate_cross_from_rdata(dataset): } """ % (rdata_location)) -def generate_cross_from_geno(dataset, scale_units): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly + +# TODO: Need to figure out why some genofiles have the wrong format and don't convert properly +def generate_cross_from_geno(dataset, scale_units): ro.r(""" trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } @@ -187,6 +217,7 @@ def generate_cross_from_geno(dataset, scale_units): # TODO: Need to figur } """ % (dataset.group.genofile, scale_units)) + def add_perm_strata(cross, perm_strata): col_string = 'c("the_strata")' perm_strata_string = "c(" @@ -201,12 +232,13 @@ def add_perm_strata(cross, perm_strata): return cross, strata_ob + def sanitize_rqtl_phenotype(vals): pheno_as_string = "c(" for i, val in enumerate(vals): if val == "x": if i < (len(vals) - 1): - pheno_as_string += "NA," + pheno_as_string += "NA," else: pheno_as_string += "NA" else: @@ -218,12 +250,13 @@ def sanitize_rqtl_phenotype(vals): return pheno_as_string + def sanitize_rqtl_names(vals): pheno_as_string = "c(" for i, val in enumerate(vals): if val == "x": if i < (len(vals) - 1): - pheno_as_string += "NA," + pheno_as_string += "NA," else: pheno_as_string += "NA" else: @@ -235,59 +268,66 @@ def sanitize_rqtl_names(vals): return pheno_as_string + def add_phenotype(cross, pheno_as_string, col_name): ro.globalenv["the_cross"] = cross ro.r('pheno <- data.frame(pull.pheno(the_cross))') - ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + ' = as.numeric('+ pheno_as_string +'))') + ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + \ + ' = as.numeric(' + pheno_as_string + '))') return ro.r["the_cross"] + def add_categorical_covar(cross, covar_as_string, i): ro.globalenv["the_cross"] = cross - logger.info("cross set"); + logger.info("cross set") ro.r('covar <- as.factor(' + covar_as_string + ')') - logger.info("covar set"); + logger.info("covar set") ro.r('newcovar <- model.matrix(~covar-1)') - logger.info("model.matrix finished"); + logger.info("model.matrix finished") ro.r('cat("new covar columns", ncol(newcovar), "\n")') nCol = ro.r('ncol(newcovar)') - logger.info("ncol covar done: " + str(nCol[0])); + logger.info("ncol covar done: " + str(nCol[0])) ro.r('pheno <- data.frame(pull.pheno(the_cross))') - logger.info("pheno pulled from cross"); + logger.info("pheno pulled from cross") nCol = int(nCol[0]) - logger.info("nCol python int:" + str(nCol)); + logger.info("nCol python int:" + str(nCol)) col_names = [] - #logger.info("loop") - for x in range(1, (nCol+1)): - #logger.info("loop" + str(x)); - col_name = "covar_" + str(i) + "_" + str(x) - #logger.info("col_name" + col_name); - ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + ' = newcovar[,' + str(x) + '])') - col_names.append(col_name) - #logger.info("loop" + str(x) + "done"); - - logger.info("returning from add_categorical_covar"); + # logger.info("loop") + for x in range(1, (nCol + 1)): + #logger.info("loop" + str(x)); + col_name = "covar_" + str(i) + "_" + str(x) + #logger.info("col_name" + col_name); + ro.r('the_cross$pheno <- cbind(pheno, ' + \ + col_name + ' = newcovar[,' + str(x) + '])') + col_names.append(col_name) + #logger.info("loop" + str(x) + "done"); + + logger.info("returning from add_categorical_covar") return ro.r["the_cross"], col_names def add_names(cross, names_as_string, col_name): ro.globalenv["the_cross"] = cross ro.r('pheno <- data.frame(pull.pheno(the_cross))') - ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + ' = '+ names_as_string +')') + ro.r('the_cross$pheno <- cbind(pheno, ' + \ + col_name + ' = ' + names_as_string + ')') return ro.r["the_cross"] + def pull_var(var_name, cross, var_string): ro.globalenv["the_cross"] = cross - ro.r(var_name +' <- pull.pheno(the_cross, ' + var_string + ')') + ro.r(var_name + ' <- pull.pheno(the_cross, ' + var_string + ')') return ro.r[var_name] + def add_cofactors(cross, this_dataset, covariates, samples): ro.numpy2ri.activate() covariate_list = covariates.split(",") covar_name_string = "c(" for i, covariate in enumerate(covariate_list): - logger.info("Covariate: " + covariate); + logger.info("Covariate: " + covariate) this_covar_data = [] covar_as_string = "c(" trait_name = covariate.split(":")[0] @@ -316,16 +356,18 @@ def add_cofactors(cross, this_dataset, covariates, samples): covar_as_string += ")" datatype = get_trait_data_type(covariate) - logger.info("Covariate: " + covariate + " is of type: " + datatype); - if(datatype == "categorical"): # Cat variable - logger.info("call of add_categorical_covar"); - cross, col_names = add_categorical_covar(cross, covar_as_string, i) # Expand and add it to the cross - logger.info("add_categorical_covar returned"); - for z, col_name in enumerate(col_names): # Go through the additional covar names + logger.info("Covariate: " + covariate + " is of type: " + datatype) + if(datatype == "categorical"): # Cat variable + logger.info("call of add_categorical_covar") + cross, col_names = add_categorical_covar( + cross, covar_as_string, i) # Expand and add it to the cross + logger.info("add_categorical_covar returned") + # Go through the additional covar names + for z, col_name in enumerate(col_names): if i < (len(covariate_list) - 1): covar_name_string += '"' + col_name + '", ' else: - if(z < (len(col_names) -1)): + if(z < (len(col_names) - 1)): covar_name_string += '"' + col_name + '", ' else: covar_name_string += '"' + col_name + '"' @@ -341,28 +383,35 @@ def add_cofactors(cross, this_dataset, covariates, samples): covars_ob = pull_var("trait_covars", cross, covar_name_string) return cross, covars_ob + def create_marker_covariates(control_marker, cross): ro.globalenv["the_cross"] = cross - ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix - userinput_sanitized = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user + # Get the genotype matrix + ro.r('genotypes <- pull.geno(the_cross)') + # TODO: sanitize user input, Never Ever trust a user + userinput_sanitized = control_marker.replace(" ", "").split(",") logger.debug(userinput_sanitized) if len(userinput_sanitized) > 0: - covariate_names = ', '.join('"{0}"'.format(w) for w in userinput_sanitized) + covariate_names = ', '.join('"{0}"'.format(w) + for w in userinput_sanitized) ro.r('covnames <- c(' + covariate_names + ')') else: ro.r('covnames <- c()') ro.r('covInGeno <- which(covnames %in% colnames(genotypes))') ro.r('covnames <- covnames[covInGeno]') ro.r("cat('covnames (purged): ', covnames,'\n')") - ro.r('marker_covars <- genotypes[,covnames]') # Get the covariate matrix by using the marker name as index to the genotype file + # Get the covariate matrix by using the marker name as index to the genotype file + ro.r('marker_covars <- genotypes[,covnames]') # TODO: Create a design matrix from the marker covars for the markers in case of an F2, 4way, etc return ro.r["marker_covars"] + def process_pair_scan_results(result): pair_scan_results = [] result = result[1] - output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] + output = [tuple([result[j][i] for j in range(result.ncol)]) + for i in range(result.nrow)] for i, line in enumerate(result.iter_row()): marker = {} @@ -374,9 +423,10 @@ def process_pair_scan_results(result): return pair_scan_results + def process_rqtl_perm_results(num_perm, results): perm_vals = [] - for line in str(results).split("\n")[1:(num_perm+1)]: + for line in str(results).split("\n")[1:(num_perm + 1)]: #print("R/qtl permutation line:", line.split()) perm_vals.append(float(line.split()[1])) @@ -386,14 +436,18 @@ def process_rqtl_perm_results(num_perm, results): return perm_output, suggestive, significant -def process_rqtl_results(result, species_name): # TODO: how to make this a one liner and not copy the stuff in a loop + +# TODO: how to make this a one liner and not copy the stuff in a loop +def process_rqtl_results(result, species_name): qtl_results = [] - output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] + output = [tuple([result[j][i] for j in range(result.ncol)]) + for i in range(result.nrow)] for i, line in enumerate(result.iter_row()): marker = {} marker['name'] = result.rownames[i] - if species_name == "mouse" and output[i][0] == 20: #ZS: This is awkward, but I'm not sure how to change the 20s to Xs in the RData file + # ZS: This is awkward, but I'm not sure how to change the 20s to Xs in the RData file + if species_name == "mouse" and output[i][0] == 20: marker['chr'] = "X" else: marker['chr'] = output[i][0] @@ -402,4 +456,4 @@ def process_rqtl_results(result, species_name): # TODO: how to make this marker['lod_score'] = output[i][2] qtl_results.append(marker) - return qtl_results \ No newline at end of file + return qtl_results diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py index 7dd0bcb6..32ccec48 100644 --- a/wqflask/wqflask/marker_regression/run_mapping.py +++ b/wqflask/wqflask/marker_regression/run_mapping.py @@ -1,5 +1,5 @@ from base.trait import GeneralTrait -from base import data_set #import create_dataset +from base import data_set # import create_dataset from pprint import pformat as pf @@ -43,16 +43,18 @@ from utility.external import shell from base.webqtlConfig import TMPDIR, GENERATED_TEXT_DIR import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + class RunMapping: def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) - self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) + # needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) + self.temp_uuid = temp_uuid - #ZS: Needed to zoom in or remap temp traits like PCA traits + # ZS: Needed to zoom in or remap temp traits like PCA traits if "temp_trait" in start_vars and start_vars['temp_trait'] != "False": self.temp_trait = "True" self.group = self.dataset.group.name @@ -60,13 +62,14 @@ class RunMapping: self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] - #ZS: Sometimes a group may have a genofile that only includes a subset of samples + # ZS: Sometimes a group may have a genofile that only includes a subset of samples genofile_samplelist = [] if 'genofile' in start_vars: - if start_vars['genofile'] != "": - self.genofile_string = start_vars['genofile'] - self.dataset.group.genofile = self.genofile_string.split(":")[0] - genofile_samplelist = get_genofile_samplelist(self.dataset) + if start_vars['genofile'] != "": + self.genofile_string = start_vars['genofile'] + self.dataset.group.genofile = self.genofile_string.split(":")[ + 0] + genofile_samplelist = get_genofile_samplelist(self.dataset) all_samples_ordered = self.dataset.group.all_samples_ordered() @@ -93,7 +96,7 @@ class RunMapping: else: self.n_samples = len([val for val in self.vals if val != "x"]) - #ZS: Check if genotypes exist in the DB in order to create links for markers + # ZS: Check if genotypes exist in the DB in order to create links for markers self.geno_db_exists = geno_db_exists(self.dataset) @@ -101,8 +104,11 @@ class RunMapping: if "results_path" in start_vars: self.mapping_results_path = start_vars['results_path'] else: - mapping_results_filename = self.dataset.group.name + "_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - self.mapping_results_path = "{}{}.csv".format(webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) + mapping_results_filename = self.dataset.group.name + "_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + self.mapping_results_path = "{}{}.csv".format( + webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) self.manhattan_plot = False if 'manhattan_plot' in start_vars: @@ -114,19 +120,20 @@ class RunMapping: self.manhattan_single_color = start_vars['manhattan_single_color'] self.manhattan_plot = True - self.maf = start_vars['maf'] # Minor allele frequency + self.maf = start_vars['maf'] # Minor allele frequency if "use_loco" in start_vars: self.use_loco = start_vars['use_loco'] else: self.use_loco = None self.suggestive = "" self.significant = "" - self.pair_scan = False # Initializing this since it is checked in views to determine which template to use + # Initializing this since it is checked in views to determine which template to use + self.pair_scan = False if 'transform' in start_vars: self.transform = start_vars['transform'] else: self.transform = "" - self.score_type = "LRS" #ZS: LRS or LOD + self.score_type = "LRS" # ZS: LRS or LOD self.mapping_scale = "physic" if "mapping_scale" in start_vars: self.mapping_scale = start_vars['mapping_scale'] @@ -136,10 +143,11 @@ class RunMapping: self.covariates = start_vars['covariates'] if "covariates" in start_vars else "" self.categorical_vars = [] - #ZS: This is passed to GN1 code for single chr mapping + # ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: - if int(start_vars['selected_chr']) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this + # ZS: Needs to be -1 if showing full map; there's probably a better way to fix this + if int(start_vars['selected_chr']) != -1: self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) @@ -153,7 +161,7 @@ class RunMapping: self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] - if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load + if "startMb" in start_vars: # ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: @@ -191,34 +199,39 @@ class RunMapping: self.showGenes = "ON" self.viewLegend = "ON" - #self.dataset.group.get_markers() + # self.dataset.group.get_markers() if self.mapping_method == "gemma": self.first_run = True self.output_files = None if 'output_files' in start_vars: self.output_files = start_vars['output_files'] - if 'first_run' in start_vars: #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + # ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + if 'first_run' in start_vars: self.first_run = False self.score_type = "-logP" self.manhattan_plot = True with Bench("Running GEMMA"): if self.use_loco == "True": - marker_obs, self.output_files = gemma_mapping.run_gemma(self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) + marker_obs, self.output_files = gemma_mapping.run_gemma( + self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) else: - marker_obs, self.output_files = gemma_mapping.run_gemma(self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) + marker_obs, self.output_files = gemma_mapping.run_gemma( + self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": perm_strata = [] if "perm_strata" in start_vars and "categorical_vars" in start_vars: - self.categorical_vars = start_vars["categorical_vars"].split(",") + self.categorical_vars = start_vars["categorical_vars"].split( + ",") if len(self.categorical_vars) and start_vars["perm_strata"] == "True": - primary_samples = SampleList(dataset = self.dataset, - sample_names = self.samples, - this_trait = self.this_trait) + primary_samples = SampleList(dataset=self.dataset, + sample_names=self.samples, + this_trait=self.this_trait) - perm_strata = get_perm_strata(self.this_trait, primary_samples, self.categorical_vars, self.samples) + perm_strata = get_perm_strata( + self.this_trait, primary_samples, self.categorical_vars, self.samples) self.score_type = "LOD" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] @@ -227,14 +240,16 @@ class RunMapping: else: self.method = "em" self.model = start_vars['mapmodel_rqtl_geno'] - #if start_vars['pair_scan'] == "true": + # if start_vars['pair_scan'] == "true": # self.pair_scan = True if self.permCheck and self.num_perm > 0: - self.perm_output, self.suggestive, self.significant, results= rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno( + self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) else: - results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, + self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) elif self.mapping_method == "reaper": - if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON + if "startMb" in start_vars: # ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: @@ -267,24 +282,26 @@ class RunMapping: if self.reaper_version == "new": self.first_run = True self.output_files = None - if 'first_run' in start_vars: #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + # ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + if 'first_run' in start_vars: self.first_run = False if 'output_files' in start_vars: - self.output_files = start_vars['output_files'].split(",") + self.output_files = start_vars['output_files'].split( + ",") results, self.perm_output, self.suggestive, self.significant, self.bootstrap_results, self.output_files = qtlreaper_mapping.run_reaper(self.this_trait, - self.dataset, - self.samples, - self.vals, - self.json_data, - self.num_perm, - self.bootCheck, - self.num_bootstrap, - self.do_control, - self.control_marker, - self.manhattan_plot, - self.first_run, - self.output_files) + self.dataset, + self.samples, + self.vals, + self.json_data, + self.num_perm, + self.bootCheck, + self.num_bootstrap, + self.do_control, + self.control_marker, + self.manhattan_plot, + self.first_run, + self.output_files) else: results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.run_original_reaper(self.this_trait, self.dataset, @@ -300,182 +317,201 @@ class RunMapping: elif self.mapping_method == "plink": self.score_type = "-logP" self.manhattan_plot = True - results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) + results = plink_mapping.run_plink( + self.this_trait, self.dataset, self.species, self.vals, self.maf) #results = self.run_plink() else: logger.debug("RUNNING NOTHING") self.no_results = False if len(results) == 0: - self.no_results = True + self.no_results = True else: - if self.pair_scan == True: - self.qtl_results = [] - highest_chr = 1 #This is needed in order to convert the highest chr to X/Y - for marker in results: - if marker['chr1'] > 0 or marker['chr1'] == "X" or marker['chr1'] == "X/Y": - if marker['chr1'] > highest_chr or marker['chr1'] == "X" or marker['chr1'] == "X/Y": - highest_chr = marker['chr1'] - if 'lod_score' in list(marker.keys()): - self.qtl_results.append(marker) - - self.trimmed_markers = results - - for qtl in enumerate(self.qtl_results): - self.json_data['chr1'].append(str(qtl['chr1'])) - self.json_data['chr2'].append(str(qtl['chr2'])) - self.json_data['Mb'].append(qtl['Mb']) - self.json_data['markernames'].append(qtl['name']) - - self.js_data = dict( - json_data = self.json_data, - this_trait = self.this_trait.name, - data_set = self.dataset.name, - maf = self.maf, - manhattan_plot = self.manhattan_plot, - mapping_scale = self.mapping_scale, - qtl_results = self.qtl_results - ) - - else: - self.qtl_results = [] - self.results_for_browser = [] - self.annotations_for_browser = [] - highest_chr = 1 #This is needed in order to convert the highest chr to X/Y - for marker in results: - if 'Mb' in marker: - this_ps = marker['Mb']*1000000 - else: - this_ps = marker['cM']*1000000 - - browser_marker = dict( - chr = str(marker['chr']), - rs = marker['name'], - ps = this_ps, - url = "/show_trait?trait_id=" + marker['name'] + "&dataset=" + self.dataset.group.name + "Geno" - ) - - if self.geno_db_exists == "True": - annot_marker = dict( - name = str(marker['name']), - chr = str(marker['chr']), - rs = marker['name'], - pos = this_ps, - url = "/show_trait?trait_id=" + marker['name'] + "&dataset=" + self.dataset.group.name + "Geno" - ) - else: - annot_marker = dict( - name = str(marker['name']), - chr = str(marker['chr']), - rs = marker['name'], - pos = this_ps - ) - - if 'lrs_value' in marker and marker['lrs_value'] > 0: - browser_marker['p_wald'] = 10**-(marker['lrs_value']/4.61) - elif 'lod_score' in marker and marker['lod_score'] > 0: - browser_marker['p_wald'] = 10**-(marker['lod_score']) - else: - browser_marker['p_wald'] = 0 - - self.results_for_browser.append(browser_marker) - self.annotations_for_browser.append(annot_marker) - if str(marker['chr']) > '0' or str(marker['chr']) == "X" or str(marker['chr']) == "X/Y": - if str(marker['chr']) > str(highest_chr) or str(marker['chr']) == "X" or str(marker['chr']) == "X/Y": - highest_chr = marker['chr'] - if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): - if 'Mb' in marker.keys(): - marker['display_pos'] = "Chr" + str(marker['chr']) + ": " + "{:.6f}".format(marker['Mb']) - elif 'cM' in marker.keys(): - marker['display_pos'] = "Chr" + str(marker['chr']) + ": " + "{:.3f}".format(marker['cM']) - else: - marker['display_pos'] = "N/A" - self.qtl_results.append(marker) - - total_markers = len(self.qtl_results) - - with Bench("Exporting Results"): - export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type, self.transform, self.covariates, self.n_samples) - - with Bench("Trimming Markers for Figure"): - if len(self.qtl_results) > 30000: - self.qtl_results = trim_markers_for_figure(self.qtl_results) - self.results_for_browser = trim_markers_for_figure(self.results_for_browser) - filtered_annotations = [] - for marker in self.results_for_browser: - for annot_marker in self.annotations_for_browser: - if annot_marker['rs'] == marker['rs']: - filtered_annotations.append(annot_marker) - break - self.annotations_for_browser = filtered_annotations - browser_files = write_input_for_browser(self.dataset, self.results_for_browser, self.annotations_for_browser) - else: - browser_files = write_input_for_browser(self.dataset, self.results_for_browser, self.annotations_for_browser) - - with Bench("Trimming Markers for Table"): - self.trimmed_markers = trim_markers_for_table(results) - - chr_lengths = get_chr_lengths(self.mapping_scale, self.mapping_method, self.dataset, self.qtl_results) - - #ZS: For zooming into genome browser, need to pass chromosome name instead of number - if self.dataset.group.species == "mouse": - if self.selected_chr == 20: - this_chr = "X" - else: - this_chr = str(self.selected_chr) - elif self.dataset.group.species == "rat": - if self.selected_chr == 21: - this_chr = "X" - else: - this_chr = str(self.selected_chr) - else: - if self.selected_chr == 22: - this_chr = "X" - elif self.selected_chr == 23: - this_chr = "Y" - else: - this_chr = str(self.selected_chr) - - if self.mapping_method != "gemma": - if self.score_type == "LRS": - significant_for_browser = self.significant / 4.61 - else: - significant_for_browser = self.significant - - self.js_data = dict( - #result_score_type = self.score_type, - #this_trait = self.this_trait.name, - #data_set = self.dataset.name, - #maf = self.maf, - #manhattan_plot = self.manhattan_plot, - #mapping_scale = self.mapping_scale, - #chromosomes = chromosome_mb_lengths, - #qtl_results = self.qtl_results, - categorical_vars = self.categorical_vars, - chr_lengths = chr_lengths, - num_perm = self.num_perm, - perm_results = self.perm_output, - significant = significant_for_browser, - browser_files = browser_files, - selected_chr = this_chr, - total_markers = total_markers - ) - else: + if self.pair_scan == True: + self.qtl_results = [] + highest_chr = 1 # This is needed in order to convert the highest chr to X/Y + for marker in results: + if marker['chr1'] > 0 or marker['chr1'] == "X" or marker['chr1'] == "X/Y": + if marker['chr1'] > highest_chr or marker['chr1'] == "X" or marker['chr1'] == "X/Y": + highest_chr = marker['chr1'] + if 'lod_score' in list(marker.keys()): + self.qtl_results.append(marker) + + self.trimmed_markers = results + + for qtl in enumerate(self.qtl_results): + self.json_data['chr1'].append(str(qtl['chr1'])) + self.json_data['chr2'].append(str(qtl['chr2'])) + self.json_data['Mb'].append(qtl['Mb']) + self.json_data['markernames'].append(qtl['name']) + self.js_data = dict( - chr_lengths = chr_lengths, - browser_files = browser_files, - selected_chr = this_chr, - total_markers = total_markers + json_data=self.json_data, + this_trait=self.this_trait.name, + data_set=self.dataset.name, + maf=self.maf, + manhattan_plot=self.manhattan_plot, + mapping_scale=self.mapping_scale, + qtl_results=self.qtl_results ) + else: + self.qtl_results = [] + self.results_for_browser = [] + self.annotations_for_browser = [] + highest_chr = 1 # This is needed in order to convert the highest chr to X/Y + for marker in results: + if 'Mb' in marker: + this_ps = marker['Mb'] * 1000000 + else: + this_ps = marker['cM'] * 1000000 + + browser_marker = dict( + chr=str(marker['chr']), + rs=marker['name'], + ps=this_ps, + url="/show_trait?trait_id=" + \ + marker['name'] + "&dataset=" + \ + self.dataset.group.name + "Geno" + ) + + if self.geno_db_exists == "True": + annot_marker = dict( + name=str(marker['name']), + chr=str(marker['chr']), + rs=marker['name'], + pos=this_ps, + url="/show_trait?trait_id=" + \ + marker['name'] + "&dataset=" + \ + self.dataset.group.name + "Geno" + ) + else: + annot_marker = dict( + name=str(marker['name']), + chr=str(marker['chr']), + rs=marker['name'], + pos=this_ps + ) + + if 'lrs_value' in marker and marker['lrs_value'] > 0: + browser_marker['p_wald'] = 10**- \ + (marker['lrs_value'] / 4.61) + elif 'lod_score' in marker and marker['lod_score'] > 0: + browser_marker['p_wald'] = 10**-(marker['lod_score']) + else: + browser_marker['p_wald'] = 0 + + self.results_for_browser.append(browser_marker) + self.annotations_for_browser.append(annot_marker) + if str(marker['chr']) > '0' or str(marker['chr']) == "X" or str(marker['chr']) == "X/Y": + if str(marker['chr']) > str(highest_chr) or str(marker['chr']) == "X" or str(marker['chr']) == "X/Y": + highest_chr = marker['chr'] + if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): + if 'Mb' in marker.keys(): + marker['display_pos'] = "Chr" + \ + str(marker['chr']) + ": " + \ + "{:.6f}".format(marker['Mb']) + elif 'cM' in marker.keys(): + marker['display_pos'] = "Chr" + \ + str(marker['chr']) + ": " + \ + "{:.3f}".format(marker['cM']) + else: + marker['display_pos'] = "N/A" + self.qtl_results.append(marker) + + total_markers = len(self.qtl_results) + + with Bench("Exporting Results"): + export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, + self.mapping_scale, self.score_type, self.transform, self.covariates, self.n_samples) + + with Bench("Trimming Markers for Figure"): + if len(self.qtl_results) > 30000: + self.qtl_results = trim_markers_for_figure( + self.qtl_results) + self.results_for_browser = trim_markers_for_figure( + self.results_for_browser) + filtered_annotations = [] + for marker in self.results_for_browser: + for annot_marker in self.annotations_for_browser: + if annot_marker['rs'] == marker['rs']: + filtered_annotations.append(annot_marker) + break + self.annotations_for_browser = filtered_annotations + browser_files = write_input_for_browser( + self.dataset, self.results_for_browser, self.annotations_for_browser) + else: + browser_files = write_input_for_browser( + self.dataset, self.results_for_browser, self.annotations_for_browser) + + with Bench("Trimming Markers for Table"): + self.trimmed_markers = trim_markers_for_table(results) + + chr_lengths = get_chr_lengths( + self.mapping_scale, self.mapping_method, self.dataset, self.qtl_results) + + # ZS: For zooming into genome browser, need to pass chromosome name instead of number + if self.dataset.group.species == "mouse": + if self.selected_chr == 20: + this_chr = "X" + else: + this_chr = str(self.selected_chr) + elif self.dataset.group.species == "rat": + if self.selected_chr == 21: + this_chr = "X" + else: + this_chr = str(self.selected_chr) + else: + if self.selected_chr == 22: + this_chr = "X" + elif self.selected_chr == 23: + this_chr = "Y" + else: + this_chr = str(self.selected_chr) + + if self.mapping_method != "gemma": + if self.score_type == "LRS": + significant_for_browser = self.significant / 4.61 + else: + significant_for_browser = self.significant + + self.js_data = dict( + #result_score_type = self.score_type, + #this_trait = self.this_trait.name, + #data_set = self.dataset.name, + #maf = self.maf, + #manhattan_plot = self.manhattan_plot, + #mapping_scale = self.mapping_scale, + #chromosomes = chromosome_mb_lengths, + #qtl_results = self.qtl_results, + categorical_vars=self.categorical_vars, + chr_lengths=chr_lengths, + num_perm=self.num_perm, + perm_results=self.perm_output, + significant=significant_for_browser, + browser_files=browser_files, + selected_chr=this_chr, + total_markers=total_markers + ) + else: + self.js_data = dict( + chr_lengths=chr_lengths, + browser_files=browser_files, + selected_chr=this_chr, + total_markers=total_markers + ) + def run_rqtl_plink(self): # os.chdir("") never do this inside a webserver!! - output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name)) + output_filename = webqtlUtil.genRandStr("%s_%s_" % ( + self.dataset.group.name, self.this_trait.name)) - plink_mapping.gen_pheno_txt_file_plink(self.this_trait, self.dataset, self.vals, pheno_filename = output_filename) + plink_mapping.gen_pheno_txt_file_plink( + self.this_trait, self.dataset, self.vals, pheno_filename=output_filename) - rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename) + rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % ( + self.dataset.group.name, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename) os.system(rqtl_command) @@ -504,10 +540,13 @@ class RunMapping: trimmed_genotype_data.append(new_genotypes) return trimmed_genotype_data + def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, score_type, transform, covariates, n_samples): with open(results_path, "w+") as output_file: - output_file.write("Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") - output_file.write("Population: " + dataset.group.species.title() + " " + dataset.group.name + "\n") + output_file.write( + "Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") + output_file.write( + "Population: " + dataset.group.species.title() + " " + dataset.group.name + "\n") output_file.write("Data Set: " + dataset.fullname + "\n") output_file.write("N Samples: " + str(n_samples) + "\n") if len(transform) > 0: @@ -527,7 +566,8 @@ def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, output_file.write(transform_text + "\n") if dataset.type == "ProbeSet": output_file.write("Gene Symbol: " + trait.symbol + "\n") - output_file.write("Location: " + str(trait.chr) + " @ " + str(trait.mb) + " Mb\n") + output_file.write("Location: " + str(trait.chr) + \ + " @ " + str(trait.mb) + " Mb\n") if len(covariates) > 0: output_file.write("Cofactors (dataset - trait):\n") for covariate in covariates.split(","): @@ -564,6 +604,7 @@ def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, if i < (len(markers) - 1): output_file.write("\n") + def trim_markers_for_figure(markers): if 'p_wald' in list(markers[0].keys()): score_type = 'p_wald' @@ -612,11 +653,11 @@ def trim_markers_for_figure(markers): if low_counter % 20 == 0: filtered_markers.append(marker) low_counter += 1 - elif 4.61 <= marker[score_type] < (2*4.61): + elif 4.61 <= marker[score_type] < (2 * 4.61): if med_counter % 10 == 0: filtered_markers.append(marker) med_counter += 1 - elif (2*4.61) <= marker[score_type] <= (3*4.61): + elif (2 * 4.61) <= marker[score_type] <= (3 * 4.61): if high_counter % 2 == 0: filtered_markers.append(marker) high_counter += 1 @@ -624,21 +665,27 @@ def trim_markers_for_figure(markers): filtered_markers.append(marker) return filtered_markers + def trim_markers_for_table(markers): if 'lod_score' in list(markers[0].keys()): - sorted_markers = sorted(markers, key=lambda k: k['lod_score'], reverse=True) + sorted_markers = sorted( + markers, key=lambda k: k['lod_score'], reverse=True) else: - sorted_markers = sorted(markers, key=lambda k: k['lrs_value'], reverse=True) + sorted_markers = sorted( + markers, key=lambda k: k['lrs_value'], reverse=True) - #ZS: So we end up with a list of just 2000 markers + # ZS: So we end up with a list of just 2000 markers if len(sorted_markers) >= 2000: trimmed_sorted_markers = sorted_markers[:2000] return trimmed_sorted_markers else: return sorted_markers + def write_input_for_browser(this_dataset, gwas_results, annotations): - file_base = this_dataset.group.name + "_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + file_base = this_dataset.group.name + "_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) gwas_filename = file_base + "_GWAS" annot_filename = file_base + "_ANNOT" gwas_path = "{}/gn2/".format(TEMPDIR) + gwas_filename @@ -650,14 +697,17 @@ def write_input_for_browser(this_dataset, gwas_results, annotations): return [gwas_filename, annot_filename] + def geno_db_exists(this_dataset): geno_db_name = this_dataset.group.name + "Geno" try: - geno_db = data_set.create_dataset(dataset_name=geno_db_name, get_samplelist=False) + geno_db = data_set.create_dataset( + dataset_name=geno_db_name, get_samplelist=False) return "True" except: return "False" + def get_chr_lengths(mapping_scale, mapping_method, dataset, qtl_results): chr_lengths = [] if mapping_scale == "physic": @@ -682,9 +732,11 @@ def get_chr_lengths(mapping_scale, mapping_method, dataset, qtl_results): highest_pos = float(result['cM']) * 1000000 else: highest_pos = float(result['Mb']) * 1000000 - chr_lengths.append({ "chr": str(this_chr), "size": str(highest_pos)}) + chr_lengths.append( + {"chr": str(this_chr), "size": str(highest_pos)}) else: - chr_lengths.append({ "chr": str(this_chr), "size": str(highest_pos)}) + chr_lengths.append( + {"chr": str(this_chr), "size": str(highest_pos)}) this_chr = chr_as_num else: if mapping_method == "reaper": @@ -696,6 +748,7 @@ def get_chr_lengths(mapping_scale, mapping_method, dataset, qtl_results): return chr_lengths + def get_genofile_samplelist(dataset): genofile_samplelist = [] @@ -706,6 +759,7 @@ def get_genofile_samplelist(dataset): return genofile_samplelist + def get_perm_strata(this_trait, sample_list, categorical_vars, used_samples): perm_strata_strings = [] for sample in used_samples: @@ -713,7 +767,8 @@ def get_perm_strata(this_trait, sample_list, categorical_vars, used_samples): combined_string = "" for var in categorical_vars: if var in list(sample_list.sample_attribute_values[sample].keys()): - combined_string += str(sample_list.sample_attribute_values[sample][var]) + combined_string += str( + sample_list.sample_attribute_values[sample][var]) else: combined_string += "NA" else: @@ -721,7 +776,8 @@ def get_perm_strata(this_trait, sample_list, categorical_vars, used_samples): perm_strata_strings.append(combined_string) - d = dict([(y, x+1) for x, y in enumerate(sorted(set(perm_strata_strings)))]) + d = dict([(y, x + 1) + for x, y in enumerate(sorted(set(perm_strata_strings)))]) list_to_numbers = [d[x] for x in perm_strata_strings] perm_strata = list_to_numbers diff --git a/wqflask/wqflask/model.py b/wqflask/wqflask/model.py index 772f74e4..a222b87c 100644 --- a/wqflask/wqflask/model.py +++ b/wqflask/wqflask/model.py @@ -14,9 +14,11 @@ from sqlalchemy.orm import relationship from wqflask.database import Base, init_db + class User(Base): __tablename__ = "user" - id = Column(Unicode(36), primary_key=True, default=lambda: str(uuid.uuid4())) + id = Column(Unicode(36), primary_key=True, + default=lambda: str(uuid.uuid4())) email_address = Column(Unicode(50), unique=True, nullable=False) # Todo: Turn on strict mode for Mysql @@ -27,23 +29,25 @@ class User(Base): active = Column(Boolean(), nullable=False, default=True) - registration_info = Column(Text) # json detailing when they were registered, etc. + # json detailing when they were registered, etc. + registration_info = Column(Text) - confirmed = Column(Text) # json detailing when they confirmed, etc. + confirmed = Column(Text) # json detailing when they confirmed, etc. - superuser = Column(Text) # json detailing when they became a superuser, otherwise empty - # if not superuser + # json detailing when they became a superuser, otherwise empty + superuser = Column(Text) + # if not superuser logins = relationship("Login", order_by="desc(Login.timestamp)", - lazy='dynamic', # Necessary for filter in login_count + lazy='dynamic', # Necessary for filter in login_count foreign_keys="Login.user", ) user_collections = relationship("UserCollection", - order_by="asc(UserCollection.name)", - lazy='dynamic', - ) + order_by="asc(UserCollection.name)", + lazy='dynamic', + ) def display_num_collections(self): """ @@ -63,11 +67,11 @@ class User(Base): print("Couldn't display_num_collections:", why) return "" - def get_collection_by_name(self, collection_name): try: - collect = self.user_collections.filter_by(name=collection_name).first() - except sqlalchemy.orm.exc.NoResultFound: + collect = self.user_collections.filter_by( + name=collection_name).first() + except sqlalchemy.orm.exc.NoResultFound: collect = None return collect @@ -83,7 +87,6 @@ class User(Base): def login_count(self): return self.logins.filter_by(successful=True).count() - @property def confirmed_at(self): if self.confirmed: @@ -116,14 +119,18 @@ class User(Base): except IndexError: return None + class Login(Base): __tablename__ = "login" - id = Column(Unicode(36), primary_key=True, default=lambda: str(uuid.uuid4())) + id = Column(Unicode(36), primary_key=True, + default=lambda: str(uuid.uuid4())) user = Column(Unicode(36), ForeignKey('user.id')) timestamp = Column(DateTime(), default=lambda: datetime.datetime.utcnow()) ip_address = Column(Unicode(39)) - successful = Column(Boolean(), nullable=False) # False if wrong password was entered - session_id = Column(Text) # Set only if successfully logged in, otherwise should be blank + # False if wrong password was entered + successful = Column(Boolean(), nullable=False) + # Set only if successfully logged in, otherwise should be blank + session_id = Column(Text) # Set to user who assumes identity if this was a login for debugging purposes by a superuser assumed_by = Column(Unicode(36), ForeignKey('user.id')) @@ -134,15 +141,19 @@ class Login(Base): ################################################################################################## + class UserCollection(Base): __tablename__ = "user_collection" - id = Column(Unicode(36), primary_key=True, default=lambda: str(uuid.uuid4())) + id = Column(Unicode(36), primary_key=True, + default=lambda: str(uuid.uuid4())) user = Column(Unicode(36), ForeignKey('user.id')) # I'd prefer this to not have a length, but for the index below it needs one name = Column(Unicode(50)) - created_timestamp = Column(DateTime(), default=lambda: datetime.datetime.utcnow()) - changed_timestamp = Column(DateTime(), default=lambda: datetime.datetime.utcnow()) + created_timestamp = Column( + DateTime(), default=lambda: datetime.datetime.utcnow()) + changed_timestamp = Column( + DateTime(), default=lambda: datetime.datetime.utcnow()) members = Column(Text) # We're going to store them as a json list # This index ensures a user doesn't have more than one collection with the same name @@ -158,12 +169,14 @@ class UserCollection(Base): def members_as_set(self): return set(json.loads(self.members)) + def display_collapsible(number): if number: return number else: return "" + def user_uuid(): """Unique cookie for a user""" user_uuid = request.cookies.get('user_uuid') diff --git a/wqflask/wqflask/network_graph/network_graph.py b/wqflask/wqflask/network_graph/network_graph.py index a41df1ed..9b70f03d 100644 --- a/wqflask/wqflask/network_graph/network_graph.py +++ b/wqflask/wqflask/network_graph/network_graph.py @@ -27,6 +27,7 @@ from utility import helper_functions from utility import corr_result_helpers from utility.tools import GN2_BRANCH_URL + class NetworkGraph: def __init__(self, start_vars): diff --git a/wqflask/wqflask/news.py b/wqflask/wqflask/news.py index 861a93f2..e262dd51 100644 --- a/wqflask/wqflask/news.py +++ b/wqflask/wqflask/news.py @@ -1,5 +1,6 @@ from flask import g + class News: def __init__(self): diff --git a/wqflask/wqflask/parser.py b/wqflask/wqflask/parser.py index dcd328c9..bd1c4407 100644 --- a/wqflask/wqflask/parser.py +++ b/wqflask/wqflask/parser.py @@ -22,7 +22,8 @@ import re from pprint import pformat as pf from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) + def parse(pstring): """ @@ -33,7 +34,7 @@ def parse(pstring): (\w+\s*[=:\>\<][\w\*]+) | # wiki=bar, GO:foobar, etc (".*?") | ('.*?') | # terms in quotes, i.e. "brain weight" ([\w\*\?]+)) # shh, brain, etc """, pstring, - flags=re.VERBOSE) + flags=re.VERBOSE) pstring = [item.strip() for item in pstring if item and item.strip()] @@ -52,7 +53,7 @@ def parse(pstring): if '(' in value or '[' in value: assert value.startswith(("(", "[")), "Invalid token" assert value.endswith((")", "]")), "Invalid token" - value = value[1:-1] # Get rid of the parenthesis + value = value[1:-1] # Get rid of the parenthesis values = re.split(r"""\s+|,""", value) value = [value.strip() for value in values if value.strip()] else: diff --git a/wqflask/wqflask/pbkdf2.py b/wqflask/wqflask/pbkdf2.py index 6346df03..1a965fc5 100644 --- a/wqflask/wqflask/pbkdf2.py +++ b/wqflask/wqflask/pbkdf2.py @@ -4,6 +4,8 @@ from werkzeug.security import safe_str_cmp as ssc # Replace this because it just wraps around Python3's internal # functions. Added this during migration. + + def pbkdf2_hex(data, salt, iterations=1000, keylen=24, hashfunc="sha1"): """Wrapper function of python's hashlib.pbkdf2_hmac. """ diff --git a/wqflask/wqflask/resource_manager.py b/wqflask/wqflask/resource_manager.py index 7d51a83d..b28c1b04 100644 --- a/wqflask/wqflask/resource_manager.py +++ b/wqflask/wqflask/resource_manager.py @@ -11,6 +11,7 @@ from utility.redis_tools import get_resource_info, get_group_info, get_groups_li from utility.logger import getLogger logger = getLogger(__name__) + @app.route("/resources/manage", methods=('GET', 'POST')) def manage_resource(): params = request.form if request.form else request.args @@ -26,7 +27,7 @@ def manage_resource(): owner_display_name = None if owner_id != "none": - try: #ZS: User IDs are sometimes stored in Redis as bytes and sometimes as strings, so this is just to avoid any errors for the time being + try: # ZS: User IDs are sometimes stored in Redis as bytes and sometimes as strings, so this is just to avoid any errors for the time being owner_id = str.encode(owner_id) except: pass @@ -38,17 +39,20 @@ def manage_resource(): elif 'email_address' in owner_info: owner_display_name = owner_info['email_address'] - return render_template("admin/manage_resource.html", owner_name = owner_display_name, resource_id = resource_id, resource_info=resource_info, default_mask=default_mask, group_masks=group_masks_with_names, admin_status=admin_status) + return render_template("admin/manage_resource.html", owner_name=owner_display_name, resource_id=resource_id, resource_info=resource_info, default_mask=default_mask, group_masks=group_masks_with_names, admin_status=admin_status) + @app.route("/search_for_users", methods=('POST',)) def search_for_user(): params = request.form user_list = [] user_list += get_users_like_unique_column("full_name", params['user_name']) - user_list += get_users_like_unique_column("email_address", params['user_email']) + user_list += get_users_like_unique_column( + "email_address", params['user_email']) return json.dumps(user_list) + @app.route("/search_for_groups", methods=('POST',)) def search_for_groups(): params = request.form @@ -58,13 +62,15 @@ def search_for_groups(): user_list = [] user_list += get_users_like_unique_column("full_name", params['user_name']) - user_list += get_users_like_unique_column("email_address", params['user_email']) + user_list += get_users_like_unique_column( + "email_address", params['user_email']) for user in user_list: group_list += get_groups_like_unique_column("admins", user['user_id']) group_list += get_groups_like_unique_column("members", user['user_id']) return json.dumps(group_list) + @app.route("/resources/change_owner", methods=('POST',)) def change_owner(): resource_id = request.form['resource_id'] @@ -79,7 +85,8 @@ def change_owner(): flash("You lack the permissions to make this change.", "error") return redirect(url_for("manage_resource", resource_id=resource_id)) else: - return render_template("admin/change_resource_owner.html", resource_id = resource_id) + return render_template("admin/change_resource_owner.html", resource_id=resource_id) + @app.route("/resources/change_default_privileges", methods=('POST',)) def change_default_privileges(): @@ -99,6 +106,7 @@ def change_default_privileges(): else: return redirect(url_for("no_access_page")) + @app.route("/resources/add_group", methods=('POST',)) def add_group_to_resource(): resource_id = request.form['resource_id'] @@ -108,7 +116,7 @@ def add_group_to_resource(): group_id = request.form['selected_group'] resource_info = get_resource_info(resource_id) default_privileges = resource_info['default_mask'] - return render_template("admin/set_group_privileges.html", resource_id = resource_id, group_id = group_id, default_privileges = default_privileges) + return render_template("admin/set_group_privileges.html", resource_id=resource_id, group_id=group_id, default_privileges=default_privileges) elif all(key in request.form for key in ('data_privilege', 'metadata_privilege', 'admin_privilege')): group_id = request.form['group_id'] group_name = get_group_info(group_id)['name'] @@ -118,13 +126,15 @@ def add_group_to_resource(): 'admin': request.form['admin_privilege'] } add_access_mask(resource_id, group_id, access_mask) - flash("Privileges have been added for group {}.".format(group_name), "alert-info") + flash("Privileges have been added for group {}.".format( + group_name), "alert-info") return redirect(url_for("manage_resource", resource_id=resource_id)) else: - return render_template("admin/search_for_groups.html", resource_id = resource_id) + return render_template("admin/search_for_groups.html", resource_id=resource_id) else: return redirect(url_for("no_access_page")) + def get_group_names(group_masks): group_masks_with_names = {} for group_id, group_mask in list(group_masks.items()): @@ -132,5 +142,5 @@ def get_group_names(group_masks): group_name = get_group_info(group_id)['name'] this_mask['name'] = group_name group_masks_with_names[group_id] = this_mask - + return group_masks_with_names diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index fd7c132b..3cbda3dd 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -22,7 +22,8 @@ from utility.tools import GN2_BASE_URL from utility.type_checking import is_str from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) + class SearchResultPage: #maxReturn = 3000 @@ -39,7 +40,7 @@ class SearchResultPage: self.uc_id = uuid.uuid4() self.go_term = None - logger.debug("uc_id:", self.uc_id) # contains a unique id + logger.debug("uc_id:", self.uc_id) # contains a unique id logger.debug("kw is:", kw) # dict containing search terms if kw['search_terms_or']: @@ -51,7 +52,8 @@ class SearchResultPage: search = self.search_terms self.original_search_string = self.search_terms # check for dodgy search terms - rx = re.compile(r'.*\W(href|http|sql|select|update)\W.*', re.IGNORECASE) + rx = re.compile( + r'.*\W(href|http|sql|select|update)\W.*', re.IGNORECASE) if rx.match(search): logger.info("Regex failed search") self.search_term_exists = False @@ -72,11 +74,11 @@ class SearchResultPage: self.dataset = create_dataset(kw['dataset'], dataset_type) logger.debug("search_terms:", self.search_terms) - #ZS: I don't like using try/except, but it seems like the easiest way to account for all possible bad searches here + # ZS: I don't like using try/except, but it seems like the easiest way to account for all possible bad searches here try: self.search() except: - self.search_term_exists = False + self.search_term_exists = False self.too_many_results = False if self.search_term_exists: @@ -95,7 +97,8 @@ class SearchResultPage: trait_list = [] json_trait_list = [] - species = webqtlDatabaseFunction.retrieve_species(self.dataset.group.name) + species = webqtlDatabaseFunction.retrieve_species( + self.dataset.group.name) # result_set represents the results for each search term; a search of # "shh grin2b" would have two sets of results, one for each term logger.debug("self.results is:", pf(self.results)) @@ -108,7 +111,8 @@ class SearchResultPage: trait_dict = {} trait_id = result[0] - this_trait = create_trait(dataset=self.dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + this_trait = create_trait( + dataset=self.dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) if this_trait: trait_dict['index'] = index + 1 trait_dict['name'] = this_trait.name @@ -117,7 +121,8 @@ class SearchResultPage: else: trait_dict['display_name'] = this_trait.name trait_dict['dataset'] = this_trait.dataset.name - trait_dict['hmac'] = hmac.data_hmac('{}:{}'.format(this_trait.name, this_trait.dataset.name)) + trait_dict['hmac'] = hmac.data_hmac( + '{}:{}'.format(this_trait.name, this_trait.dataset.name)) if this_trait.dataset.type == "ProbeSet": trait_dict['symbol'] = this_trait.symbol if this_trait.symbol else "N/A" trait_dict['description'] = "N/A" @@ -167,9 +172,11 @@ class SearchResultPage: self.trait_list = trait_list if self.dataset.type == "ProbeSet": - self.header_data_names = ['index', 'display_name', 'symbol', 'description', 'location', 'mean', 'lrs_score', 'lrs_location', 'additive'] + self.header_data_names = ['index', 'display_name', 'symbol', 'description', + 'location', 'mean', 'lrs_score', 'lrs_location', 'additive'] elif self.dataset.type == "Publish": - self.header_data_names = ['index', 'display_name', 'description', 'mean', 'authors', 'pubmed_text', 'lrs_score', 'lrs_location', 'additive'] + self.header_data_names = ['index', 'display_name', 'description', 'mean', + 'authors', 'pubmed_text', 'lrs_score', 'lrs_location', 'additive'] elif self.dataset.type == "Geno": self.header_data_names = ['index', 'display_name', 'location'] @@ -183,7 +190,8 @@ class SearchResultPage: combined_from_clause = "" combined_where_clause = "" - previous_from_clauses = [] #The same table can't be referenced twice in the from clause + # The same table can't be referenced twice in the from clause + previous_from_clauses = [] logger.debug("len(search_terms)>1") symbol_list = [] @@ -197,7 +205,8 @@ class SearchResultPage: for i, a_search in enumerate(alias_terms): the_search = self.get_search_ob(a_search) if the_search != None: - get_from_clause = getattr(the_search, "get_from_clause", None) + get_from_clause = getattr( + the_search, "get_from_clause", None) if callable(get_from_clause): from_clause = the_search.get_from_clause() if from_clause in previous_from_clauses: @@ -221,7 +230,8 @@ class SearchResultPage: else: the_search = self.get_search_ob(a_search) if the_search != None: - get_from_clause = getattr(the_search, "get_from_clause", None) + get_from_clause = getattr( + the_search, "get_from_clause", None) if callable(get_from_clause): from_clause = the_search.get_from_clause() if from_clause in previous_from_clauses: @@ -231,7 +241,7 @@ class SearchResultPage: combined_from_clause += from_clause where_clause = the_search.get_where_clause() combined_where_clause += "(" + where_clause + ")" - if (i+1) < len(self.search_terms): + if (i + 1) < len(self.search_terms): if self.and_or == "and": combined_where_clause += "AND" else: @@ -240,7 +250,8 @@ class SearchResultPage: self.search_term_exists = False if self.search_term_exists: combined_where_clause = "(" + combined_where_clause + ")" - final_query = the_search.compile_final_query(combined_from_clause, combined_where_clause) + final_query = the_search.compile_final_query( + combined_from_clause, combined_where_clause) results = the_search.execute(final_query) self.results.extend(results) @@ -262,14 +273,15 @@ class SearchResultPage: if search_ob: search_class = getattr(do_search, search_ob) the_search = search_class(search_term, - search_operator, - self.dataset, - search_type['key'] - ) + search_operator, + self.dataset, + search_type['key'] + ) return the_search else: return None + def get_GO_symbols(a_search): query = """SELECT genes FROM GORef @@ -287,13 +299,15 @@ def get_GO_symbols(a_search): return new_terms + def insert_newlines(string, every=64): """ This is because it is seemingly impossible to change the width of the description column, so I'm just manually adding line breaks """ lines = [] for i in range(0, len(string), every): - lines.append(string[i:i+every]) + lines.append(string[i:i + every]) return '\n'.join(lines) + def get_aliases(symbol_list, species): updated_symbols = [] @@ -308,7 +322,8 @@ def get_aliases(symbol_list, species): symbols_string = ",".join(updated_symbols) filtered_aliases = [] - response = requests.get(GN2_BASE_URL + "/gn3/gene/aliases2/" + symbols_string) + response = requests.get( + GN2_BASE_URL + "/gn3/gene/aliases2/" + symbols_string) if response: alias_lists = json.loads(response.content) seen = set() @@ -322,10 +337,9 @@ def get_aliases(symbol_list, species): search_terms = [] for alias in filtered_aliases: - the_search_term = {'key': None, + the_search_term = {'key': None, 'search_term': [alias], - 'separator' : None} + 'separator': None} search_terms.append(the_search_term) return search_terms - diff --git a/wqflask/wqflask/send_mail.py b/wqflask/wqflask/send_mail.py index 86e8a558..299c866a 100644 --- a/wqflask/wqflask/send_mail.py +++ b/wqflask/wqflask/send_mail.py @@ -8,10 +8,12 @@ Redis = StrictRedis() import mailer + def timestamp(): ts = datetime.datetime.utcnow() return ts.isoformat() + def main(): while True: print("I'm alive!") @@ -31,7 +33,6 @@ def main(): process_message(msg) - def process_message(msg): msg = json.loads(msg) diff --git a/wqflask/wqflask/server_side.py b/wqflask/wqflask/server_side.py index 48761fa0..7f68efad 100644 --- a/wqflask/wqflask/server_side.py +++ b/wqflask/wqflask/server_side.py @@ -1,7 +1,6 @@ # handles server side table processing - class ServerSideTable: """ This class is used to do server-side processing @@ -31,7 +30,7 @@ class ServerSideTable: self.rows_count = rows_count self.table_rows = table_rows self.header_data_names = header_data_names - + self.sort_rows() self.paginate_rows() @@ -50,8 +49,8 @@ class ServerSideTable: column_name = self.header_data_names[column_number - 1] sort_direction = self.request_values['sSortDir_' + str(i)] self.table_rows = sorted(self.table_rows, - key=lambda x: x[column_name], - reverse=is_reverse(sort_direction)) + key=lambda x: x[column_name], + reverse=is_reverse(sort_direction)) def paginate_rows(self): """ diff --git a/wqflask/wqflask/show_trait/SampleList.py b/wqflask/wqflask/show_trait/SampleList.py index f955f632..f9d30dba 100644 --- a/wqflask/wqflask/show_trait/SampleList.py +++ b/wqflask/wqflask/show_trait/SampleList.py @@ -8,6 +8,7 @@ from pprint import pformat as pf from utility import Plot from utility import Bunch + class SampleList: def __init__(self, dataset, @@ -36,16 +37,16 @@ class SampleList: if isinstance(self.this_trait, list): sample = webqtlCaseData.webqtlCaseData(name=sample_name) if counter <= len(self.this_trait): - if isinstance(self.this_trait[counter-1], (bytes, bytearray)): - if (self.this_trait[counter-1].decode("utf-8").lower() != 'x'): + if isinstance(self.this_trait[counter - 1], (bytes, bytearray)): + if (self.this_trait[counter - 1].decode("utf-8").lower() != 'x'): sample = webqtlCaseData.webqtlCaseData( name=sample_name, - value=float(self.this_trait[counter-1])) + value=float(self.this_trait[counter - 1])) else: - if (self.this_trait[counter-1].lower() != 'x'): + if (self.this_trait[counter - 1].lower() != 'x'): sample = webqtlCaseData.webqtlCaseData( name=sample_name, - value=float(self.this_trait[counter-1])) + value=float(self.this_trait[counter - 1])) else: # ZS - If there's no value for the sample/strain, # create the sample object (so samples with no value @@ -56,8 +57,8 @@ class SampleList: sample = webqtlCaseData.webqtlCaseData(name=sample_name) sample.extra_info = {} - if (self.dataset.group.name == 'AXBXA' and - sample_name in ('AXB18/19/20', 'AXB13/14', 'BXA8/17')): + if (self.dataset.group.name == 'AXBXA' + and sample_name in ('AXB18/19/20', 'AXB13/14', 'BXA8/17')): sample.extra_info['url'] = "/mouseCross.html#AXB/BXA" sample.extra_info['css_class'] = "fs12" @@ -69,18 +70,24 @@ class SampleList: sample.extra_attributes = self.sample_attribute_values.get( sample_name, {}) - #ZS: Add a url so RRID case attributes can be displayed as links + # ZS: Add a url so RRID case attributes can be displayed as links if 'rrid' in sample.extra_attributes: if self.dataset.group.species == "mouse": if len(sample.extra_attributes['rrid'].split(":")) > 1: - the_rrid = sample.extra_attributes['rrid'].split(":")[1] - sample.extra_attributes['rrid'] = [sample.extra_attributes['rrid']] - sample.extra_attributes['rrid'].append(webqtlConfig.RRID_MOUSE_URL % the_rrid) + the_rrid = sample.extra_attributes['rrid'].split(":")[ + 1] + sample.extra_attributes['rrid'] = [ + sample.extra_attributes['rrid']] + sample.extra_attributes['rrid'].append( + webqtlConfig.RRID_MOUSE_URL % the_rrid) elif self.dataset.group.species == "rat": if len(str(sample.extra_attributes['rrid'])): - the_rrid = sample.extra_attributes['rrid'].split("_")[1] - sample.extra_attributes['rrid'] = [sample.extra_attributes['rrid']] - sample.extra_attributes['rrid'].append(webqtlConfig.RRID_RAT_URL % the_rrid) + the_rrid = sample.extra_attributes['rrid'].split("_")[ + 1] + sample.extra_attributes['rrid'] = [ + sample.extra_attributes['rrid']] + sample.extra_attributes['rrid'].append( + webqtlConfig.RRID_RAT_URL % the_rrid) self.sample_list.append(sample) @@ -129,7 +136,8 @@ class SampleList: self.attributes[key].name = name self.attributes[key].distinct_values = [ item.Value for item in values] - self.attributes[key].distinct_values=natural_sort(self.attributes[key].distinct_values) + self.attributes[key].distinct_values = natural_sort( + self.attributes[key].distinct_values) all_numbers = True for value in self.attributes[key].distinct_values: try: @@ -169,7 +177,8 @@ class SampleList: except ValueError: pass - attribute_values[self.attributes[item.Id].name.lower()] = attribute_value + attribute_values[self.attributes[item.Id].name.lower( + )] = attribute_value self.sample_attribute_values[sample_name] = attribute_values def get_first_attr_col(self): diff --git a/wqflask/wqflask/show_trait/export_trait_data.py b/wqflask/wqflask/show_trait/export_trait_data.py index 379b746c..7fabc3f6 100644 --- a/wqflask/wqflask/show_trait/export_trait_data.py +++ b/wqflask/wqflask/show_trait/export_trait_data.py @@ -5,6 +5,7 @@ from functools import cmp_to_key from base.trait import create_trait from base import data_set + def export_sample_table(targs): sample_data = json.loads(targs['export_data']) @@ -28,6 +29,7 @@ def export_sample_table(targs): return trait_name, final_sample_data + def get_export_metadata(trait_id, dataset_name): dataset = data_set.create_dataset(dataset_name) this_trait = create_trait(dataset=dataset, @@ -38,16 +40,23 @@ def get_export_metadata(trait_id, dataset_name): metadata = [] if dataset.type == "Publish": metadata.append(["Phenotype ID: " + trait_id]) - metadata.append(["Phenotype URL: " + "http://genenetwork.org/show_trait?trait_id=" + trait_id + "&dataset=" + dataset_name]) + metadata.append(["Phenotype URL: " + "http://genenetwork.org/show_trait?trait_id=" + \ + trait_id + "&dataset=" + dataset_name]) metadata.append(["Group: " + dataset.group.name]) - metadata.append(["Phenotype: " + this_trait.description_display.replace(",", "\",\"")]) - metadata.append(["Authors: " + (this_trait.authors if this_trait.authors else "N/A")]) - metadata.append(["Title: " + (this_trait.title if this_trait.title else "N/A")]) - metadata.append(["Journal: " + (this_trait.journal if this_trait.journal else "N/A")]) - metadata.append(["Dataset Link: http://gn1.genenetwork.org/webqtl/main.py?FormID=sharinginfo&InfoPageName=" + dataset.name]) + metadata.append( + ["Phenotype: " + this_trait.description_display.replace(",", "\",\"")]) + metadata.append( + ["Authors: " + (this_trait.authors if this_trait.authors else "N/A")]) + metadata.append( + ["Title: " + (this_trait.title if this_trait.title else "N/A")]) + metadata.append( + ["Journal: " + (this_trait.journal if this_trait.journal else "N/A")]) + metadata.append( + ["Dataset Link: http://gn1.genenetwork.org/webqtl/main.py?FormID=sharinginfo&InfoPageName=" + dataset.name]) else: metadata.append(["Record ID: " + trait_id]) - metadata.append(["Trait URL: " + "http://genenetwork.org/show_trait?trait_id=" + trait_id + "&dataset=" + dataset_name]) + metadata.append(["Trait URL: " + "http://genenetwork.org/show_trait?trait_id=" + \ + trait_id + "&dataset=" + dataset_name]) if this_trait.symbol: metadata.append(["Symbol: " + this_trait.symbol]) metadata.append(["Dataset: " + dataset.name]) @@ -64,6 +73,7 @@ def dict_to_sorted_list(dictionary): sorted_values = [item[1] for item in sorted_list] return sorted_values + def cmp_samples(a, b): if b[0] == 'name': return 1 diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index f9c5fbe6..fcebbc4d 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -46,7 +46,7 @@ class ShowTrait: resource_id=self.resource_id) elif 'group' in kw: self.temp_trait = True - self.trait_id = "Temp_"+kw['species'] + "_" + kw['group'] + \ + self.trait_id = "Temp_" + kw['species'] + "_" + kw['group'] + \ "_" + datetime.datetime.now().strftime("%m%d%H%M%S") self.temp_species = kw['species'] self.temp_group = kw['group'] @@ -368,8 +368,8 @@ class ShowTrait: chr = transcript_start = transcript_end = None if chr and transcript_start and transcript_end and self.this_trait.refseq_transcriptid: - transcript_start = int(transcript_start*1000000) - transcript_end = int(transcript_end*1000000) + transcript_start = int(transcript_start * 1000000) + transcript_end = int(transcript_end * 1000000) self.ucsc_blat_link = webqtlConfig.UCSC_REFSEQ % ( 'mm10', self.this_trait.refseq_transcriptid, chr, transcript_start, transcript_end) @@ -393,8 +393,8 @@ class ShowTrait: if chr and transcript_start and transcript_end and kgId: # Convert to bases from megabases - transcript_start = int(transcript_start*1000000) - transcript_end = int(transcript_end*1000000) + transcript_start = int(transcript_start * 1000000) + transcript_end = int(transcript_end * 1000000) self.ucsc_blat_link = webqtlConfig.UCSC_REFSEQ % ( 'rn6', kgId, chr, transcript_start, transcript_end) @@ -452,8 +452,8 @@ class ShowTrait: for sample in list(self.this_trait.data.keys()): if (self.this_trait.data[sample].name2 != self.this_trait.data[sample].name): - if ((self.this_trait.data[sample].name2 in primary_sample_names) and - (self.this_trait.data[sample].name not in primary_sample_names)): + if ((self.this_trait.data[sample].name2 in primary_sample_names) + and (self.this_trait.data[sample].name not in primary_sample_names)): primary_sample_names.append( self.this_trait.data[sample].name) primary_sample_names.remove( @@ -515,13 +515,13 @@ def quantile_normalize_vals(sample_groups): ranked_vals = ss.rankdata(trait_vals) p_list = [] for i, val in enumerate(trait_vals): - p_list.append(((i+1) - 0.5)/len(trait_vals)) + p_list.append(((i + 1) - 0.5) / len(trait_vals)) z = ss.norm.ppf(p_list) normed_vals = [] for rank in ranked_vals: - normed_vals.append("%0.3f" % z[int(rank)-1]) + normed_vals.append("%0.3f" % z[int(rank) - 1]) return normed_vals @@ -585,7 +585,7 @@ def get_nearest_marker(this_trait, this_db): GenoXRef.GenoId = Geno.Id AND GenoFreeze.Id = GenoXRef.GenoFreezeId AND GenoFreeze.Name = '{}' - ORDER BY ABS( Geno.Mb - {}) LIMIT 1""".format(this_chr, this_db.group.name+"Geno", this_mb) + ORDER BY ABS( Geno.Mb - {}) LIMIT 1""".format(this_chr, this_db.group.name + "Geno", this_mb) logger.sql(query) result = g.db.execute(query).fetchall() @@ -605,7 +605,7 @@ def get_table_widths(sample_groups, sample_column_width, has_num_cases=False): trait_table_width += 80 if has_num_cases: trait_table_width += 80 - trait_table_width += len(sample_groups[0].attributes)*88 + trait_table_width += len(sample_groups[0].attributes) * 88 trait_table_width = str(trait_table_width) + "px" diff --git a/wqflask/wqflask/snp_browser/snp_browser.py b/wqflask/wqflask/snp_browser/snp_browser.py index 8658abf8..c4d0e135 100644 --- a/wqflask/wqflask/snp_browser/snp_browser.py +++ b/wqflask/wqflask/snp_browser/snp_browser.py @@ -9,6 +9,7 @@ logger = getLogger(__name__) from base import species from base import webqtlConfig + class SnpBrowser: def __init__(self, start_vars): @@ -26,9 +27,11 @@ class SnpBrowser: self.table_rows = [] if self.limit_strains == "true": - self.header_fields, self.empty_field_count, self.header_data_names = get_header_list(variant_type = self.variant_type, strains = self.chosen_strains, empty_columns = self.empty_columns) + self.header_fields, self.empty_field_count, self.header_data_names = get_header_list( + variant_type=self.variant_type, strains=self.chosen_strains, empty_columns=self.empty_columns) else: - self.header_fields, self.empty_field_count, self.header_data_names = get_header_list(variant_type = self.variant_type, strains = self.strain_lists, species = self.species_name, empty_columns = self.empty_columns) + self.header_fields, self.empty_field_count, self.header_data_names = get_header_list( + variant_type=self.variant_type, strains=self.strain_lists, species=self.species_name, empty_columns=self.empty_columns) def initialize_parameters(self, start_vars): if 'first_run' in start_vars: @@ -52,10 +55,12 @@ class SnpBrowser: self.rat_chr_list = [] mouse_species_ob = species.TheSpecies(species_name="Mouse") for key in mouse_species_ob.chromosomes.chromosomes: - self.mouse_chr_list.append(mouse_species_ob.chromosomes.chromosomes[key].name) + self.mouse_chr_list.append( + mouse_species_ob.chromosomes.chromosomes[key].name) rat_species_ob = species.TheSpecies(species_name="Rat") for key in rat_species_ob.chromosomes.chromosomes: - self.rat_chr_list.append(rat_species_ob.chromosomes.chromosomes[key].name) + self.rat_chr_list.append( + rat_species_ob.chromosomes.chromosomes[key].name) if self.species_id == 1: self.this_chr_list = self.mouse_chr_list @@ -108,9 +113,11 @@ class SnpBrowser: "CAST/EiJ"] self.chosen_strains_rat = ["BN", "F344", "WLI", "WMI"] if 'chosen_strains_mouse' in start_vars: - self.chosen_strains_mouse = start_vars['chosen_strains_mouse'].split(",") + self.chosen_strains_mouse = start_vars['chosen_strains_mouse'].split( + ",") if 'chosen_strains_rat' in start_vars: - self.chosen_strains_rat = start_vars['chosen_strains_rat'].split(",") + self.chosen_strains_rat = start_vars['chosen_strains_rat'].split( + ",") if self.species_id == 1: self.chosen_strains = self.chosen_strains_mouse @@ -149,9 +156,11 @@ class SnpBrowser: if self.gene_name != "": if self.species_id != 0: - query = "SELECT geneSymbol, chromosome, txStart, txEnd FROM GeneList WHERE SpeciesId = %s AND geneSymbol = '%s'" % (self.species_id, self.gene_name) + query = "SELECT geneSymbol, chromosome, txStart, txEnd FROM GeneList WHERE SpeciesId = %s AND geneSymbol = '%s'" % ( + self.species_id, self.gene_name) else: - query = "SELECT geneSymbol, chromosome, txStart, txEnd FROM GeneList WHERE geneSymbol = '%s'" % (self.gene_name) + query = "SELECT geneSymbol, chromosome, txStart, txEnd FROM GeneList WHERE geneSymbol = '%s'" % ( + self.gene_name) result = g.db.execute(query).fetchone() if result: self.gene_name, self.chr, self.start_mb, self.end_mb = result @@ -162,9 +171,11 @@ class SnpBrowser: query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll WHERE Rs = '%s'" % self.gene_name else: if self.species_id != 0: - query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll where SpeciesId = %s AND SnpName = '%s'" % (self.species_id, self.gene_name) + query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll where SpeciesId = %s AND SnpName = '%s'" % ( + self.species_id, self.gene_name) else: - query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll where SnpName = '%s'" % (self.gene_name) + query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll where SnpName = '%s'" % ( + self.gene_name) result_snp = g.db.execute(query).fetchall() if result_snp: self.snp_list = [item[0] for item in result_snp] @@ -176,9 +187,11 @@ class SnpBrowser: elif self.variant_type == "InDel": if self.gene_name[0] == "I": if self.species_id != 0: - query = "SELECT Id, Chromosome, Mb_start, Mb_end FROM IndelAll WHERE SpeciesId = %s AND Name = '%s'" % (self.species_id, self.gene_name) + query = "SELECT Id, Chromosome, Mb_start, Mb_end FROM IndelAll WHERE SpeciesId = %s AND Name = '%s'" % ( + self.species_id, self.gene_name) else: - query = "SELECT Id, Chromosome, Mb_start, Mb_end FROM IndelAll WHERE Name = '%s'" % (self.gene_name) + query = "SELECT Id, Chromosome, Mb_start, Mb_end FROM IndelAll WHERE Name = '%s'" % ( + self.gene_name) result_snp = g.db.execute(query).fetchall() if result_snp: self.snp_list = [item[0] for item in result_snp] @@ -249,12 +262,13 @@ class SnpBrowser: def filter_results(self, results): filtered_results = [] - strain_index_list = [] #ZS: List of positions of selected strains in strain list + strain_index_list = [] # ZS: List of positions of selected strains in strain list last_mb = -1 if self.limit_strains == "true" and len(self.chosen_strains) > 0: for item in self.chosen_strains: - index = self.strain_lists[self.species_name.lower()].index(item) + index = self.strain_lists[self.species_name.lower()].index( + item) strain_index_list.append(index) for seq, result in enumerate(results): @@ -262,7 +276,8 @@ class SnpBrowser: if self.variant_type == "SNP": display_strains = [] - snp_id, species_id, snp_name, rs, chr, mb, mb_2016, alleles, snp_source, conservation_score = result[:10] + snp_id, species_id, snp_name, rs, chr, mb, mb_2016, alleles, snp_source, conservation_score = result[ + :10] effect_list = result[10:28] if self.species_id == 1: self.allele_list = result[30:] @@ -272,13 +287,14 @@ class SnpBrowser: if self.limit_strains == "true" and len(self.chosen_strains) > 0: for index in strain_index_list: if self.species_id == 1: - display_strains.append(result[29+index]) + display_strains.append(result[29 + index]) elif self.species_id == 2: - display_strains.append(result[31+index]) + display_strains.append(result[31 + index]) self.allele_list = display_strains effect_info_dict = get_effect_info(effect_list) - coding_domain_list = ['Start Gained', 'Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] + coding_domain_list = ['Start Gained', 'Start Lost', + 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] intron_domain_list = ['Splice Site', 'Nonsplice Site'] for key in effect_info_dict: @@ -295,19 +311,22 @@ class SnpBrowser: if 'Intergenic' in domain: if self.gene_name != "": - gene_id = get_gene_id(self.species_id, self.gene_name) + gene_id = get_gene_id( + self.species_id, self.gene_name) gene = [gene_id, self.gene_name] else: gene = check_if_in_gene(species_id, chr, mb) transcript = exon = function = function_details = '' - if self.redundant == "false" or last_mb != mb: # filter redundant + if self.redundant == "false" or last_mb != mb: # filter redundant if self.include_record(domain, function, snp_source, conservation_score): - info_list = [snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, function, function_details, snp_source, conservation_score, snp_id] + info_list = [snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, + function, function_details, snp_source, conservation_score, snp_id] info_list.extend(self.allele_list) filtered_results.append(info_list) last_mb = mb else: - gene_list, transcript_list, exon_list, function_list, function_details_list = effect_info_dict[key] + gene_list, transcript_list, exon_list, function_list, function_details_list = effect_info_dict[ + key] for index, item in enumerate(gene_list): gene = item transcript = transcript_list[index] @@ -324,13 +343,15 @@ class SnpBrowser: function = "" if function_details_list: - function_details = "Biotype: " + function_details_list[index] + function_details = "Biotype: " + \ + function_details_list[index] else: function_details = "" if self.redundant == "false" or last_mb != mb: if self.include_record(domain, function, snp_source, conservation_score): - info_list = [snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, function, function_details, snp_source, conservation_score, snp_id] + info_list = [snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, + function, function_details, snp_source, conservation_score, snp_id] info_list.extend(self.allele_list) filtered_results.append(info_list) last_mb = mb @@ -344,7 +365,8 @@ class SnpBrowser: gene = "No Gene" domain = conservation_score = snp_id = snp_name = rs = flank_3 = flank_5 = ncbi = function = "" if self.include_record(domain, function, source_name, conservation_score): - filtered_results.append([indel_name, indel_chr, indel_mb_start, indel_mb_end, indel_strand, indel_type, indel_size, indel_sequence, source_name]) + filtered_results.append([indel_name, indel_chr, indel_mb_start, indel_mb_end, + indel_strand, indel_type, indel_size, indel_sequence, source_name]) last_mb = indel_mb_start else: @@ -364,9 +386,10 @@ class SnpBrowser: if gene_name and (gene_name not in gene_name_list): gene_name_list.append(gene_name) if len(gene_name_list) > 0: - gene_id_name_dict = get_gene_id_name_dict(self.species_id, gene_name_list) + gene_id_name_dict = get_gene_id_name_dict( + self.species_id, gene_name_list) - #ZS: list of booleans representing which columns are entirely empty, so they aren't displayed on the page; only including ones that are sometimes empty (since there's always a location, etc) + # ZS: list of booleans representing which columns are entirely empty, so they aren't displayed on the page; only including ones that are sometimes empty (since there's always a location, etc) self.empty_columns = { "snp_source": "false", "conservation_score": "false", @@ -382,20 +405,23 @@ class SnpBrowser: for i, result in enumerate(self.filtered_results): this_row = {} if self.variant_type == "SNP": - snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, function, function_details, snp_source, conservation_score, snp_id = result[:14] + snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, function, function_details, snp_source, conservation_score, snp_id = result[ + :14] allele_value_list = result[14:] if rs: snp_url = webqtlConfig.DBSNP % (rs) snp_name = rs else: rs = "" - start_bp = int(mb*1000000 - 100) - end_bp = int(mb*1000000 + 100) + start_bp = int(mb * 1000000 - 100) + end_bp = int(mb * 1000000 + 100) position_info = "chr%s:%d-%d" % (chr, start_bp, end_bp) if self.species_id == 2: - snp_url = webqtlConfig.GENOMEBROWSER_URL % ("rn6", position_info) + snp_url = webqtlConfig.GENOMEBROWSER_URL % ( + "rn6", position_info) else: - snp_url = webqtlConfig.GENOMEBROWSER_URL % ("mm10", position_info) + snp_url = webqtlConfig.GENOMEBROWSER_URL % ( + "mm10", position_info) mb = float(mb) mb_formatted = "%2.6f" % mb @@ -428,13 +454,14 @@ class SnpBrowser: gene_link = "" if transcript: - transcript_link = webqtlConfig.ENSEMBLETRANSCRIPT_URL % (transcript) + transcript_link = webqtlConfig.ENSEMBLETRANSCRIPT_URL % ( + transcript) self.empty_columns['transcript'] = "true" else: transcript_link = "" if exon: - exon = exon[1] # exon[0] is exon_id, exon[1] is exon_rank + exon = exon[1] # exon[0] is exon_id, exon[1] is exon_rank self.empty_columns['exon'] = "true" else: exon = "" @@ -459,20 +486,20 @@ class SnpBrowser: function_list = function_details.strip().split(",") function_list = [item.strip() for item in function_list] function_list[0] = function_list[0].title() - function_details = ", ".join(item for item in function_list) + function_details = ", ".join( + item for item in function_list) function_details = function_details.replace("_", " ") function_details = function_details.replace("/", " -> ") if function_details == "Biotype: Protein Coding": function_details = function_details + ", Coding Region Unknown" self.empty_columns['function_details'] = "true" - + #[snp_href, chr, mb_formatted, alleles, snp_source_cell, conservation_score, gene_name_cell, transcript_href, exon, domain_1, domain_2, function, function_details] - base_color_dict = {"A": "#C33232", "C": "#1569C7", "T": "#CFCF32", "G": "#32C332", + base_color_dict = {"A": "#C33232", "C": "#1569C7", "T": "#CFCF32", "G": "#32C332", "t": "#FF6", "c": "#5CB3FF", "a": "#F66", "g": "#CF9", ":": "#FFFFFF", "-": "#FFFFFF", "?": "#FFFFFF"} - the_bases = [] for j, item in enumerate(allele_value_list): if item and isinstance(item, str): @@ -575,7 +602,7 @@ class SnpBrowser: if conservation_score: score_as_float = float(conservation_score) try: - input_score_float = float(self.score) # the user-input score + input_score_float = float(self.score) # the user-input score except: input_score_float = 0.0 @@ -628,30 +655,31 @@ class SnpBrowser: left_offset, right_offset, top_offset, bottom_offset = (30, 30, 40, 50) plot_width = canvas_width - left_offset - right_offset plot_height = canvas_height - top_offset - bottom_offset - y_zero = top_offset + plot_height/2 + y_zero = top_offset + plot_height / 2 - x_scale = plot_width/(self.end_mb - self.start_mb) + x_scale = plot_width / (self.end_mb - self.start_mb) - #draw clickable image map at some point + # draw clickable image map at some point n_click = 80.0 - click_step = plot_width/n_click - click_mb_step = (self.end_mb - self.start_mb)/n_click + click_step = plot_width / n_click + click_mb_step = (self.end_mb - self.start_mb) / n_click - #for i in range(n_click): + # for i in range(n_click): # href = url_for('snp_browser', first_run="false", chosen_strains_mouse=self.chosen_strains_mouse, chosen_strains_rat=self.chosen_strains_rat, variant=self.variant_type, species=self.species_name, gene_name=self.gene_name, chr=self.chr, start_mb=self.start_mb, end_mb=self.end_mb, limit_strains=self.limit_strains, domain=self.domain, function=self.function, criteria=self.criteria, score=self.score, diff_alleles=self.diff_alleles) + def get_browser_sample_lists(species_id=1): strain_lists = {} mouse_strain_list = [] query = "SHOW COLUMNS FROM SnpPattern;" - results = g.db.execute(query).fetchall(); + results = g.db.execute(query).fetchall() for result in results[1:]: mouse_strain_list.append(result[0]) rat_strain_list = [] query = "SHOW COLUMNS FROM RatSnpPattern;" - results = g.db.execute(query).fetchall(); + results = g.db.execute(query).fetchall() for result in results[2:]: rat_strain_list.append(result[0]) @@ -660,7 +688,8 @@ def get_browser_sample_lists(species_id=1): return strain_lists -def get_header_list(variant_type, strains, species = None, empty_columns = None): + +def get_header_list(variant_type, strains, species=None, empty_columns=None): if species == "Mouse": strain_list = strains['mouse'] elif species == "Rat": @@ -668,13 +697,15 @@ def get_header_list(variant_type, strains, species = None, empty_columns = None) else: strain_list = strains - empty_field_count = 0 #ZS: This is an awkward way of letting the javascript know the index where the allele value columns start; there's probably a better way of doing this + empty_field_count = 0 # ZS: This is an awkward way of letting the javascript know the index where the allele value columns start; there's probably a better way of doing this header_fields = [] header_data_names = [] if variant_type == "SNP": - header_fields.append(['Index', 'SNP ID', 'Chr', 'Mb', 'Alleles', 'Source', 'ConScore', 'Gene', 'Transcript', 'Exon', 'Domain 1', 'Domain 2', 'Function', 'Details']) - header_data_names = ['index', 'snp_name', 'chr', 'mb_formatted', 'alleles', 'snp_source', 'conservation_score', 'gene_name', 'transcript', 'exon', 'domain_1', 'domain_2', 'function', 'function_details'] + header_fields.append(['Index', 'SNP ID', 'Chr', 'Mb', 'Alleles', 'Source', 'ConScore', + 'Gene', 'Transcript', 'Exon', 'Domain 1', 'Domain 2', 'Function', 'Details']) + header_data_names = ['index', 'snp_name', 'chr', 'mb_formatted', 'alleles', 'snp_source', 'conservation_score', + 'gene_name', 'transcript', 'exon', 'domain_1', 'domain_2', 'function', 'function_details'] header_fields.append(strain_list) header_data_names += strain_list @@ -704,18 +735,21 @@ def get_header_list(variant_type, strains, species = None, empty_columns = None) if empty_columns['function_details'] == "false": empty_field_count += 1 header_fields[0].remove('Details') - + for col in empty_columns.keys(): if empty_columns[col] == "false": header_data_names.remove(col) elif variant_type == "InDel": - header_fields = ['Index', 'ID', 'Type', 'InDel Chr', 'Mb Start', 'Mb End', 'Strand', 'Size', 'Sequence', 'Source'] - header_data_names = ['index', 'indel_name', 'indel_type', 'indel_chr', 'indel_mb_s', 'indel_mb_e', 'indel_strand', 'indel_size', 'indel_sequence', 'source_name'] + header_fields = ['Index', 'ID', 'Type', 'InDel Chr', + 'Mb Start', 'Mb End', 'Strand', 'Size', 'Sequence', 'Source'] + header_data_names = ['index', 'indel_name', 'indel_type', 'indel_chr', 'indel_mb_s', + 'indel_mb_e', 'indel_strand', 'indel_size', 'indel_sequence', 'source_name'] return header_fields, empty_field_count, header_data_names -def get_effect_details_by_category(effect_name = None, effect_value = None): + +def get_effect_details_by_category(effect_name=None, effect_value=None): gene_list = [] transcript_list = [] exon_list = [] @@ -723,10 +757,13 @@ def get_effect_details_by_category(effect_name = None, effect_value = None): function_detail_list = [] tmp_list = [] - gene_group_list = ['Upstream', 'Downstream', 'Splice Site', 'Nonsplice Site', '3\' UTR'] - biotype_group_list = ['Unknown Effect In Exon', 'Start Gained', 'Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] + gene_group_list = ['Upstream', 'Downstream', + 'Splice Site', 'Nonsplice Site', '3\' UTR'] + biotype_group_list = ['Unknown Effect In Exon', 'Start Gained', + 'Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] new_codon_group_list = ['Start Gained'] - codon_effect_group_list = ['Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] + codon_effect_group_list = [ + 'Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] effect_detail_list = effect_value.strip().split('|') effect_detail_list = [item.strip() for item in effect_detail_list] @@ -764,13 +801,16 @@ def get_effect_details_by_category(effect_name = None, effect_value = None): return [gene_list, transcript_list, exon_list, function_list, function_detail_list] + def get_effect_info(effect_list): domain = "" effect_detail_list = [] effect_info_dict = {} - prime3_utr, prime5_utr, upstream, downstream, intron, nonsplice_site, splice_site, intergenic = effect_list[:8] - exon, non_synonymous_coding, synonymous_coding, start_gained, start_lost, stop_gained, stop_lost, unknown_effect_in_exon = effect_list[8:16] + prime3_utr, prime5_utr, upstream, downstream, intron, nonsplice_site, splice_site, intergenic = effect_list[ + :8] + exon, non_synonymous_coding, synonymous_coding, start_gained, start_lost, stop_gained, stop_lost, unknown_effect_in_exon = effect_list[ + 8:16] if intergenic: domain = "Intergenic" @@ -779,63 +819,77 @@ def get_effect_info(effect_list): # if not exon, get gene list/transcript list info if upstream: domain = "Upstream" - effect_detail_list = get_effect_details_by_category(effect_name='Upstream', effect_value=upstream) + effect_detail_list = get_effect_details_by_category( + effect_name='Upstream', effect_value=upstream) effect_info_dict[domain] = effect_detail_list if downstream: domain = "Downstream" - effect_detail_list = get_effect_details_by_category(effect_name='Downstream', effect_value=downstream) + effect_detail_list = get_effect_details_by_category( + effect_name='Downstream', effect_value=downstream) effect_info_dict[domain] = effect_detail_list if intron: if splice_site: domain = "Splice Site" - effect_detail_list = get_effect_details_by_category(effect_name='Splice Site', effect_value=splice_site) + effect_detail_list = get_effect_details_by_category( + effect_name='Splice Site', effect_value=splice_site) effect_info_dict[domain] = effect_detail_list if nonsplice_site: domain = "Nonsplice Site" - effect_detail_list = get_effect_details_by_category(effect_name='Nonsplice Site', effect_value=nonsplice_site) + effect_detail_list = get_effect_details_by_category( + effect_name='Nonsplice Site', effect_value=nonsplice_site) effect_info_dict[domain] = effect_detail_list # get gene, transcript_list, and exon info if prime3_utr: domain = "3\' UTR" - effect_detail_list = get_effect_details_by_category(effect_name='3\' UTR', effect_value=prime3_utr) + effect_detail_list = get_effect_details_by_category( + effect_name='3\' UTR', effect_value=prime3_utr) effect_info_dict[domain] = effect_detail_list if prime5_utr: domain = "5\' UTR" - effect_detail_list = get_effect_details_by_category(effect_name='5\' UTR', effect_value=prime5_utr) + effect_detail_list = get_effect_details_by_category( + effect_name='5\' UTR', effect_value=prime5_utr) effect_info_dict[domain] = effect_detail_list if start_gained: domain = "Start Gained" - effect_detail_list = get_effect_details_by_category(effect_name='Start Gained', effect_value=start_gained) + effect_detail_list = get_effect_details_by_category( + effect_name='Start Gained', effect_value=start_gained) effect_info_dict[domain] = effect_detail_list if unknown_effect_in_exon: domain = "Unknown Effect In Exon" - effect_detail_list = get_effect_details_by_category(effect_name='Unknown Effect In Exon', effect_value=unknown_effect_in_exon) + effect_detail_list = get_effect_details_by_category( + effect_name='Unknown Effect In Exon', effect_value=unknown_effect_in_exon) effect_info_dict[domain] = effect_detail_list if start_lost: domain = "Start Lost" - effect_detail_list = get_effect_details_by_category(effect_name='Start Lost', effect_value=start_lost) + effect_detail_list = get_effect_details_by_category( + effect_name='Start Lost', effect_value=start_lost) effect_info_dict[domain] = effect_detail_list if stop_gained: domain = "Stop Gained" - effect_detail_list = get_effect_details_by_category(effect_name='Stop Gained', effect_value=stop_gained) + effect_detail_list = get_effect_details_by_category( + effect_name='Stop Gained', effect_value=stop_gained) effect_info_dict[domain] = effect_detail_list if stop_lost: domain = "Stop Lost" - effect_detail_list = get_effect_details_by_category(effect_name='Stop Lost', effect_value=stop_lost) + effect_detail_list = get_effect_details_by_category( + effect_name='Stop Lost', effect_value=stop_lost) effect_info_dict[domain] = effect_detail_list if non_synonymous_coding: domain = "Nonsynonymous" - effect_detail_list = get_effect_details_by_category(effect_name='Nonsynonymous', effect_value=non_synonymous_coding) + effect_detail_list = get_effect_details_by_category( + effect_name='Nonsynonymous', effect_value=non_synonymous_coding) effect_info_dict[domain] = effect_detail_list if synonymous_coding: domain = "Synonymous" - effect_detail_list = get_effect_details_by_category(effect_name='Synonymous', effect_value=synonymous_coding) + effect_detail_list = get_effect_details_by_category( + effect_name='Synonymous', effect_value=synonymous_coding) effect_info_dict[domain] = effect_detail_list return effect_info_dict + def get_gene_id(species_id, gene_name): query = """ SELECT @@ -853,11 +907,13 @@ def get_gene_id(species_id, gene_name): else: return "" + def get_gene_id_name_dict(species_id, gene_name_list): gene_id_name_dict = {} if len(gene_name_list) == 0: return "" - gene_name_str_list = ["'" + gene_name + "'" for gene_name in gene_name_list] + gene_name_str_list = ["'" + gene_name + \ + "'" for gene_name in gene_name_list] gene_name_str = ",".join(gene_name_str_list) query = """ @@ -877,8 +933,9 @@ def get_gene_id_name_dict(species_id, gene_name_list): return gene_id_name_dict + def check_if_in_gene(species_id, chr, mb): - if species_id != 0: #ZS: Check if this is necessary + if species_id != 0: # ZS: Check if this is necessary query = """SELECT geneId, geneSymbol FROM GeneList WHERE SpeciesId = {0} AND chromosome = '{1}' AND @@ -895,4 +952,3 @@ def check_if_in_gene(species_id, chr, mb): return [result[0], result[1]] else: return "" - diff --git a/wqflask/wqflask/submit_bnw.py b/wqflask/wqflask/submit_bnw.py index a0e84c8c..b21a88cc 100644 --- a/wqflask/wqflask/submit_bnw.py +++ b/wqflask/wqflask/submit_bnw.py @@ -3,7 +3,8 @@ from base import data_set from utility import helper_functions import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + def get_bnw_input(start_vars): logger.debug("BNW VARS:", start_vars) diff --git a/wqflask/wqflask/update_search_results.py b/wqflask/wqflask/update_search_results.py index 22a46ef2..2e467dc8 100644 --- a/wqflask/wqflask/update_search_results.py +++ b/wqflask/wqflask/update_search_results.py @@ -10,6 +10,7 @@ from utility.benchmark import Bench from utility.logger import getLogger logger = getLogger(__name__) + class GSearch: def __init__(self, kw): @@ -51,10 +52,12 @@ class GSearch: self.trait_list = [] with Bench("Creating trait objects"): for line in re: - dataset = create_dataset(line[3], "ProbeSet", get_samplelist=False) + dataset = create_dataset( + line[3], "ProbeSet", get_samplelist=False) trait_id = line[4] - #with Bench("Building trait object"): - this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + # with Bench("Building trait object"): + this_trait = GeneralTrait( + dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) self.trait_list.append(this_trait) elif self.type == "phenotype": @@ -96,7 +99,8 @@ class GSearch: for line in re: dataset = create_dataset(line[2], "Publish") trait_id = line[3] - this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + this_trait = GeneralTrait( + dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) self.trait_list.append(this_trait) self.results = self.convert_to_json() @@ -108,8 +112,8 @@ class GSearch: json_dict['data'] = [] for i, trait in enumerate(self.trait_list): - trait_row = { "checkbox": "<INPUT TYPE=\"checkbox\" NAME=\"searchResult\" class=\"checkbox trait_checkbox\" style=\"transform: scale(1.5);\" VALUE=\"{}:{}\">".format(trait.name, trait.dataset.name), - "index": i+1, + trait_row = {"checkbox": "<INPUT TYPE=\"checkbox\" NAME=\"searchResult\" class=\"checkbox trait_checkbox\" style=\"transform: scale(1.5);\" VALUE=\"{}:{}\">".format(trait.name, trait.dataset.name), + "index": i + 1, "species": trait.dataset.group.species, "group": trait.dataset.group.name, "tissue": trait.dataset.tissue, diff --git a/wqflask/wqflask/user_login.py b/wqflask/wqflask/user_login.py index b6e7973f..ff77982f 100644 --- a/wqflask/wqflask/user_login.py +++ b/wqflask/wqflask/user_login.py @@ -29,13 +29,15 @@ from utility.tools import SMTP_CONNECT, SMTP_USERNAME, SMTP_PASSWORD, LOG_SQL_AL THREE_DAYS = 60 * 60 * 24 * 3 + def timestamp(): return datetime.datetime.utcnow().isoformat() + def basic_info(): - return dict(timestamp = timestamp(), - ip_address = request.remote_addr, - user_agent = request.headers.get('User-Agent')) + return dict(timestamp=timestamp(), + ip_address=request.remote_addr, + user_agent=request.headers.get('User-Agent')) def encode_password(pass_gen_fields, unencrypted_password): @@ -43,10 +45,10 @@ def encode_password(pass_gen_fields, unencrypted_password): salt = pass_gen_fields['salt'] else: salt = bytes(pass_gen_fields['salt'], "utf-8") - encrypted_password = pbkdf2.pbkdf2_hex(str(unencrypted_password), + encrypted_password = pbkdf2.pbkdf2_hex(str(unencrypted_password), salt, - pass_gen_fields['iterations'], - pass_gen_fields['keylength'], + pass_gen_fields['iterations'], + pass_gen_fields['keylength'], pass_gen_fields['hashfunc']) pass_gen_fields.pop("unencrypted_password", None) @@ -54,61 +56,65 @@ def encode_password(pass_gen_fields, unencrypted_password): return pass_gen_fields + def set_password(password): pass_gen_fields = { - "unencrypted_password": password, - "algorithm": "pbkdf2", - "hashfunc": "sha256", - "salt": base64.b64encode(os.urandom(32)), - "iterations": 100000, - "keylength": 32, - "created_timestamp": timestamp() + "unencrypted_password": password, + "algorithm": "pbkdf2", + "hashfunc": "sha256", + "salt": base64.b64encode(os.urandom(32)), + "iterations": 100000, + "keylength": 32, + "created_timestamp": timestamp() } assert len(password) >= 6, "Password shouldn't be shorter than 6 characters" - encoded_password = encode_password(pass_gen_fields, pass_gen_fields['unencrypted_password']) + encoded_password = encode_password( + pass_gen_fields, pass_gen_fields['unencrypted_password']) return encoded_password + def get_signed_session_id(user): session_id = str(uuid.uuid4()) session_id_signature = hmac.hmac_creation(session_id) session_id_signed = session_id + ":" + session_id_signature - #ZS: Need to check if this is ever actually used or exists + # ZS: Need to check if this is ever actually used or exists if 'user_id' not in user: user['user_id'] = str(uuid.uuid4()) save_user(user, user['user_id']) if 'github_id' in user: - session = dict(login_time = time.time(), - user_type = "github", - user_id = user['user_id'], - github_id = user['github_id'], - user_name = user['name'], - user_url = user['user_url']) + session = dict(login_time=time.time(), + user_type="github", + user_id=user['user_id'], + github_id=user['github_id'], + user_name=user['name'], + user_url=user['user_url']) elif 'orcid' in user: - session = dict(login_time = time.time(), - user_type = "orcid", - user_id = user['user_id'], - github_id = user['orcid'], - user_name = user['name'], - user_url = user['user_url']) + session = dict(login_time=time.time(), + user_type="orcid", + user_id=user['user_id'], + github_id=user['orcid'], + user_name=user['name'], + user_url=user['user_url']) else: - session = dict(login_time = time.time(), - user_type = "gn2", - user_id = user['user_id'], - user_name = user['full_name'], - user_email_address = user['email_address']) + session = dict(login_time=time.time(), + user_type="gn2", + user_id=user['user_id'], + user_name=user['full_name'], + user_email_address=user['email_address']) key = UserSession.user_cookie_name + ":" + session_id Redis.hmset(key, session) Redis.expire(key, THREE_DAYS) - + return session_id_signed + def send_email(toaddr, msg, fromaddr="no-reply@genenetwork.org"): """Send an E-mail through SMTP_CONNECT host. If SMTP_USERNAME is not 'UNKNOWN' TLS is used @@ -123,28 +129,31 @@ def send_email(toaddr, msg, fromaddr="no-reply@genenetwork.org"): server.login(SMTP_USERNAME, SMTP_PASSWORD) server.sendmail(fromaddr, toaddr, msg) server.quit() - logger.info("Successfully sent email to "+toaddr) + logger.info("Successfully sent email to " + toaddr) + -def send_verification_email(user_details, template_name = "email/user_verification.txt", key_prefix = "verification_code", subject = "GeneNetwork e-mail verification"): +def send_verification_email(user_details, template_name="email/user_verification.txt", key_prefix="verification_code", subject="GeneNetwork e-mail verification"): verification_code = str(uuid.uuid4()) key = key_prefix + ":" + verification_code - data = json.dumps(dict(id=user_details['user_id'], timestamp = timestamp())) + data = json.dumps(dict(id=user_details['user_id'], timestamp=timestamp())) Redis.set(key, data) Redis.expire(key, THREE_DAYS) recipient = user_details['email_address'] - body = render_template(template_name, verification_code = verification_code) + body = render_template(template_name, verification_code=verification_code) send_email(recipient, subject, body) return {"recipient": recipient, "subject": subject, "body": body} -def send_invitation_email(user_email, temp_password, template_name = "email/user_invitation.txt", subject = "You've been added to a GeneNetwork user group"): + +def send_invitation_email(user_email, temp_password, template_name="email/user_invitation.txt", subject="You've been added to a GeneNetwork user group"): recipient = user_email body = render_template(template_name, temp_password) send_email(recipient, subject, body) return {"recipient": recipient, "subject": subject, "body": body} + @app.route("/manage/verify_email") def verify_email(): if 'code' in request.args: @@ -153,27 +162,32 @@ def verify_email(): # As long as they have access to the email account # We might as well log them in session_id_signed = get_signed_session_id(user_details) - flash("Thank you for logging in {}.".format(user_details['full_name']), "alert-success") - response = make_response(redirect(url_for('index_page', import_collections = import_col, anon_id = anon_id))) - response.set_cookie(UserSession.user_cookie_name, session_id_signed, max_age=None) + flash("Thank you for logging in {}.".format( + user_details['full_name']), "alert-success") + response = make_response(redirect( + url_for('index_page', import_collections=import_col, anon_id=anon_id))) + response.set_cookie(UserSession.user_cookie_name, + session_id_signed, max_age=None) return response else: - flash("Invalid code: Password reset code does not exist or might have expired!", "error") + flash( + "Invalid code: Password reset code does not exist or might have expired!", "error") + @app.route("/n/login", methods=('GET', 'POST')) def login(): params = request.form if request.form else request.args logger.debug("in login params are:", params) - if not params: #ZS: If coming to page for first time + if not params: # ZS: If coming to page for first time from utility.tools import GITHUB_AUTH_URL, GITHUB_CLIENT_ID, ORCID_AUTH_URL, ORCID_CLIENT_ID external_login = {} if GITHUB_AUTH_URL and GITHUB_CLIENT_ID != 'UNKNOWN': external_login["github"] = GITHUB_AUTH_URL if ORCID_AUTH_URL and ORCID_CLIENT_ID != 'UNKNOWN': external_login["orcid"] = ORCID_AUTH_URL - return render_template("new_security/login_user.html", external_login = external_login, redis_is_available=is_redis_available()) - else: #ZS: After clicking sign-in + return render_template("new_security/login_user.html", external_login=external_login, redis_is_available=is_redis_available()) + else: # ZS: After clicking sign-in if 'type' in params and 'uid' in params: user_details = get_user_by_unique_column("user_id", params['uid']) if user_details: @@ -186,31 +200,36 @@ def login(): display_id = user_details['orcid'] else: display_id = "" - flash("Thank you for logging in {}.".format(display_id), "alert-success") + flash("Thank you for logging in {}.".format( + display_id), "alert-success") response = make_response(redirect(url_for('index_page'))) - response.set_cookie(UserSession.user_cookie_name, session_id_signed, max_age=None) + response.set_cookie( + UserSession.user_cookie_name, session_id_signed, max_age=None) else: flash("Something went unexpectedly wrong.", "alert-danger") - response = make_response(redirect(url_for('index_page'))) + response = make_response(redirect(url_for('index_page'))) return response else: - user_details = get_user_by_unique_column("email_address", params['email_address']) + user_details = get_user_by_unique_column( + "email_address", params['email_address']) password_match = False if user_details: submitted_password = params['password'] pwfields = user_details['password'] if isinstance(pwfields, str): pwfields = json.loads(pwfields) - encrypted_pass_fields = encode_password(pwfields, submitted_password) - password_match = pbkdf2.safe_str_cmp(encrypted_pass_fields['password'], pwfields['password']) + encrypted_pass_fields = encode_password( + pwfields, submitted_password) + password_match = pbkdf2.safe_str_cmp( + encrypted_pass_fields['password'], pwfields['password']) - else: # Invalid e-mail + else: # Invalid e-mail flash("Invalid e-mail address. Please try again.", "alert-danger") response = make_response(redirect(url_for('login'))) return response - if password_match: # If password correct - if user_details['confirmed']: # If account confirmed + if password_match: # If password correct + if user_details['confirmed']: # If account confirmed import_col = "false" anon_id = "" if 'import_collections' in params: @@ -218,20 +237,25 @@ def login(): anon_id = params['anon_id'] session_id_signed = get_signed_session_id(user_details) - flash("Thank you for logging in {}.".format(user_details['full_name']), "alert-success") - response = make_response(redirect(url_for('index_page', import_collections = import_col, anon_id = anon_id))) - response.set_cookie(UserSession.user_cookie_name, session_id_signed, max_age=None) + flash("Thank you for logging in {}.".format( + user_details['full_name']), "alert-success") + response = make_response(redirect( + url_for('index_page', import_collections=import_col, anon_id=anon_id))) + response.set_cookie( + UserSession.user_cookie_name, session_id_signed, max_age=None) return response else: - email_ob = send_verification_email(user_details, template_name = "email/user_verification.txt") + email_ob = send_verification_email( + user_details, template_name="email/user_verification.txt") return render_template("newsecurity/verification_still_needed.html", subject=email_ob['subject']) - else: # Incorrect password - #ZS: It previously seemed to store that there was an incorrect log-in attempt here, but it did so in the MySQL DB so this might need to be reproduced with Redis + else: # Incorrect password + # ZS: It previously seemed to store that there was an incorrect log-in attempt here, but it did so in the MySQL DB so this might need to be reproduced with Redis flash("Invalid password. Please try again.", "alert-danger") response = make_response(redirect(url_for('login'))) return response + @app.route("/n/login/github_oauth2", methods=('GET', 'POST')) def github_oauth2(): from utility.tools import GITHUB_CLIENT_ID, GITHUB_CLIENT_SECRET, GITHUB_AUTH_URL @@ -242,34 +266,39 @@ def github_oauth2(): "code": code } - result = requests.post("https://github.com/login/oauth/access_token", json=data) - result_dict = {arr[0]:arr[1] for arr in [tok.split("=") for tok in result.text.split("&")]} + result = requests.post( + "https://github.com/login/oauth/access_token", json=data) + result_dict = {arr[0]: arr[1] + for arr in [tok.split("=") for tok in result.text.split("&")]} github_user = get_github_user_details(result_dict["access_token"]) user_details = get_user_by_unique_column("github_id", github_user["id"]) if user_details == None: user_details = { - "user_id": str(uuid.uuid4()), - "name": github_user["name"].encode("utf-8") if github_user["name"] else "None", + "user_id": str(uuid.uuid4()), + "name": github_user["name"].encode("utf-8") if github_user["name"] else "None", "github_id": github_user["id"], - "user_url": github_user["html_url"].encode("utf-8"), - "login_type": "github", - "organization": "", - "active": 1, + "user_url": github_user["html_url"].encode("utf-8"), + "login_type": "github", + "organization": "", + "active": 1, "confirmed": 1 } save_user(user_details, user_details["user_id"]) - url = "/n/login?type=github&uid="+user_details["user_id"] + url = "/n/login?type=github&uid=" + user_details["user_id"] return redirect(url) + def get_github_user_details(access_token): from utility.tools import GITHUB_API_URL - result = requests.get(GITHUB_API_URL, headers = {'Authorization':'token ' + access_token }).content + result = requests.get(GITHUB_API_URL, headers={ + 'Authorization': 'token ' + access_token}).content return json.loads(result) + @app.route("/n/login/orcid_oauth2", methods=('GET', 'POST')) def orcid_oauth2(): from uuid import uuid4 @@ -279,8 +308,8 @@ def orcid_oauth2(): url = "/n/login" if code: data = { - "client_id": ORCID_CLIENT_ID, - "client_secret": ORCID_CLIENT_SECRET, + "client_id": ORCID_CLIENT_ID, + "client_secret": ORCID_CLIENT_SECRET, "grant_type": "authorization_code", "redirect_uri": GN2_BRANCH_URL + "n/login/orcid_oauth2", "code": code @@ -292,25 +321,27 @@ def orcid_oauth2(): user_details = get_user_by_unique_column("orcid", result_dict["orcid"]) if user_details == None: user_details = { - "user_id": str(uuid4()), - "name": result_dict["name"], - "orcid": result_dict["orcid"], - "user_url": "%s/%s" % ("/".join(ORCID_AUTH_URL.split("/")[:-2]), result_dict["orcid"]), - "login_type": "orcid", - "organization": "", - "active": 1, + "user_id": str(uuid4()), + "name": result_dict["name"], + "orcid": result_dict["orcid"], + "user_url": "%s/%s" % ("/".join(ORCID_AUTH_URL.split("/")[:-2]), result_dict["orcid"]), + "login_type": "orcid", + "organization": "", + "active": 1, "confirmed": 1 } save_user(user_details, user_details["user_id"]) - url = "/n/login?type=orcid&uid="+user_details["user_id"] + url = "/n/login?type=orcid&uid=" + user_details["user_id"] else: flash("There was an error getting code from ORCID") return redirect(url) + def get_github_user_details(access_token): from utility.tools import GITHUB_API_URL - result = requests.get(GITHUB_API_URL, headers = {'Authorization':'token ' + access_token }).content + result = requests.get(GITHUB_API_URL, headers={ + 'Authorization': 'token ' + access_token}).content return json.loads(result) @@ -325,6 +356,7 @@ def logout(): response.set_cookie(UserSession.user_cookie_name, '', expires=0) return response + @app.route("/n/forgot_password", methods=['GET']) def forgot_password(): """Entry point for forgotten password""" @@ -333,15 +365,16 @@ def forgot_password(): print("ERRORS: ", errors) return render_template("new_security/forgot_password.html", errors=errors) + def send_forgot_password_email(verification_email): from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText - template_name = "email/forgot_password.txt" + template_name = "email/forgot_password.txt" key_prefix = "forgot_password_code" subject = "GeneNetwork password reset" fromaddr = "no-reply@genenetwork.org" - + verification_code = str(uuid.uuid4()) key = key_prefix + ":" + verification_code @@ -353,7 +386,7 @@ def send_forgot_password_email(verification_email): save_verification_code(verification_email, verification_code) - body = render_template(template_name, verification_code = verification_code) + body = render_template(template_name, verification_code=verification_code) msg = MIMEMultipart() msg["To"] = verification_email @@ -365,6 +398,7 @@ def send_forgot_password_email(verification_email): return subject + @app.route("/n/forgot_password_submit", methods=('POST',)) def forgot_password_submit(): """When a forgotten password form is submitted we get here""" @@ -373,19 +407,23 @@ def forgot_password_submit(): next_page = None if email_address != "": logger.debug("Wants to send password E-mail to ", email_address) - user_details = get_user_by_unique_column("email_address", email_address) + user_details = get_user_by_unique_column( + "email_address", email_address) if user_details: - email_subject = send_forgot_password_email(user_details["email_address"]) + email_subject = send_forgot_password_email( + user_details["email_address"]) return render_template("new_security/forgot_password_step2.html", subject=email_subject) else: - flash("The e-mail entered is not associated with an account.", "alert-danger") + flash("The e-mail entered is not associated with an account.", + "alert-danger") return redirect(url_for("forgot_password")) else: flash("You MUST provide an email", "alert-danger") return redirect(url_for("forgot_password")) + @app.route("/n/password_reset", methods=['GET']) def password_reset(): """Entry point after user clicks link in E-mail""" @@ -400,11 +438,13 @@ def password_reset(): return render_template( "new_security/password_reset.html", user_encode=user_details["email_address"]) else: - flash("Invalid code: Password reset code does not exist or might have expired!", "error") + flash( + "Invalid code: Password reset code does not exist or might have expired!", "error") return redirect(url_for("login")) else: return redirect(url_for("login")) + @app.route("/n/password_reset_step2", methods=('POST',)) def password_reset_step2(): """Handle confirmation E-mail for password reset""" @@ -422,44 +462,52 @@ def password_reset_step2(): flash("Password changed successfully. You can now sign in.", "alert-info") return redirect(url_for('login')) + def register_user(params): - thank_you_mode = False - errors = [] - user_details = {} + thank_you_mode = False + errors = [] + user_details = {} - user_details['email_address'] = params.get('email_address', '').encode("utf-8").strip() - if not (5 <= len(user_details['email_address']) <= 50): - errors.append('Email Address needs to be between 5 and 50 characters.') - else: - email_exists = get_user_by_unique_column("email_address", user_details['email_address']) - if email_exists: - errors.append('User already exists with that email') + user_details['email_address'] = params.get( + 'email_address', '').encode("utf-8").strip() + if not (5 <= len(user_details['email_address']) <= 50): + errors.append( + 'Email Address needs to be between 5 and 50 characters.') + else: + email_exists = get_user_by_unique_column( + "email_address", user_details['email_address']) + if email_exists: + errors.append('User already exists with that email') - user_details['full_name'] = params.get('full_name', '').encode("utf-8").strip() - if not (5 <= len(user_details['full_name']) <= 50): - errors.append('Full Name needs to be between 5 and 50 characters.') + user_details['full_name'] = params.get( + 'full_name', '').encode("utf-8").strip() + if not (5 <= len(user_details['full_name']) <= 50): + errors.append('Full Name needs to be between 5 and 50 characters.') - user_details['organization'] = params.get('organization', '').encode("utf-8").strip() - if user_details['organization'] and not (5 <= len(user_details['organization']) <= 50): - errors.append('Organization needs to be empty or between 5 and 50 characters.') + user_details['organization'] = params.get( + 'organization', '').encode("utf-8").strip() + if user_details['organization'] and not (5 <= len(user_details['organization']) <= 50): + errors.append( + 'Organization needs to be empty or between 5 and 50 characters.') - password = str(params.get('password', '')) - if not (6 <= len(password)): - errors.append('Password needs to be at least 6 characters.') + password = str(params.get('password', '')) + if not (6 <= len(password)): + errors.append('Password needs to be at least 6 characters.') - if params.get('password_confirm') != password: - errors.append("Passwords don't match.") + if params.get('password_confirm') != password: + errors.append("Passwords don't match.") - user_details['password'] = set_password(password) - user_details['user_id'] = str(uuid.uuid4()) - user_details['confirmed'] = 1 + user_details['password'] = set_password(password) + user_details['user_id'] = str(uuid.uuid4()) + user_details['confirmed'] = 1 - user_details['registration_info'] = basic_info() + user_details['registration_info'] = basic_info() - if len(errors) == 0: - save_user(user_details, user_details['user_id']) + if len(errors) == 0: + save_user(user_details, user_details['user_id']) + + return errors - return errors @app.route("/n/register", methods=('GET', 'POST')) def register(): @@ -473,11 +521,13 @@ def register(): errors = register_user(params) if len(errors) == 0: - flash("Registration successful. You may login with your new account", "alert-info") + flash( + "Registration successful. You may login with your new account", "alert-info") return redirect(url_for("login")) return render_template("new_security/register_user.html", values=params, errors=errors) + @app.errorhandler(401) def unauthorized(error): return redirect(url_for('login')) diff --git a/wqflask/wqflask/user_manager.py b/wqflask/wqflask/user_manager.py index fcec3b67..cf84ea73 100644 --- a/wqflask/wqflask/user_manager.py +++ b/wqflask/wqflask/user_manager.py @@ -74,11 +74,11 @@ class AnonUser: self.key = "anon_collection:v1:{}".format(self.anon_id) def add_collection(self, new_collection): - collection_dict = dict(name = new_collection.name, - created_timestamp = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), - changed_timestamp = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), - num_members = new_collection.num_members, - members = new_collection.get_members()) + collection_dict = dict(name=new_collection.name, + created_timestamp=datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), + changed_timestamp=datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), + num_members=new_collection.num_members, + members=new_collection.get_members()) Redis.set(self.key, json.dumps(collection_dict)) Redis.expire(self.key, 60 * 60 * 24 * 365) @@ -93,8 +93,10 @@ class AnonUser: this_collection = {} this_collection['id'] = collection['id'] this_collection['name'] = collection['name'] - this_collection['created_timestamp'] = collection['created_timestamp'].strftime('%b %d %Y %I:%M%p') - this_collection['changed_timestamp'] = collection['changed_timestamp'].strftime('%b %d %Y %I:%M%p') + this_collection['created_timestamp'] = collection['created_timestamp'].strftime( + '%b %d %Y %I:%M%p') + this_collection['changed_timestamp'] = collection['changed_timestamp'].strftime( + '%b %d %Y %I:%M%p') this_collection['num_members'] = collection['num_members'] this_collection['members'] = collection['members'] updated_collections.append(this_collection) @@ -108,21 +110,26 @@ class AnonUser: else: collections = json.loads(json_collections) for collection in collections: - collection['created_timestamp'] = datetime.datetime.strptime(collection['created_timestamp'], '%b %d %Y %I:%M%p') - collection['changed_timestamp'] = datetime.datetime.strptime(collection['changed_timestamp'], '%b %d %Y %I:%M%p') + collection['created_timestamp'] = datetime.datetime.strptime( + collection['created_timestamp'], '%b %d %Y %I:%M%p') + collection['changed_timestamp'] = datetime.datetime.strptime( + collection['changed_timestamp'], '%b %d %Y %I:%M%p') - collections = sorted(collections, key = lambda i: i['changed_timestamp'], reverse = True) + collections = sorted( + collections, key=lambda i: i['changed_timestamp'], reverse=True) return collections def import_traits_to_user(self): result = Redis.get(self.key) collections_list = json.loads(result if result else "[]") for collection in collections_list: - collection_exists = g.user_session.get_collection_by_name(collection['name']) + collection_exists = g.user_session.get_collection_by_name( + collection['name']) if collection_exists: continue else: - g.user_session.add_collection(collection['name'], collection['members']) + g.user_session.add_collection( + collection['name'], collection['members']) def display_num_collections(self): """ @@ -148,9 +155,11 @@ def verify_cookie(cookie): the_uuid, separator, the_signature = cookie.partition(':') assert len(the_uuid) == 36, "Is session_id a uuid?" assert separator == ":", "Expected a : here" - assert the_signature == actual_hmac_creation(the_uuid), "Uh-oh, someone tampering with the cookie?" + assert the_signature == actual_hmac_creation( + the_uuid), "Uh-oh, someone tampering with the cookie?" return the_uuid + def create_signed_cookie(): the_uuid = str(uuid.uuid4()) signature = actual_hmac_creation(the_uuid) @@ -158,6 +167,7 @@ def create_signed_cookie(): logger.debug("uuid_signed:", uuid_signed) return the_uuid, uuid_signed + class UserSession: """Logged in user handling""" @@ -182,13 +192,13 @@ class UserSession: # weekend and the site hasn't been visited by the user self.logged_in = False - ########### Grrr...this won't work because of the way flask handles cookies + # Grrr...this won't work because of the way flask handles cookies # Delete the cookie #response = make_response(redirect(url_for('login'))) #response.set_cookie(self.cookie_name, '', expires=0) - #flash( + # flash( # "Due to inactivity your session has expired. If you'd like please login again.") - #return response + # return response return if Redis.ttl(self.redis_key) < THREE_DAYS: @@ -213,7 +223,7 @@ class UserSession: user_email = self.record['user_email_address'] - #ZS: Get user's collections if they exist + # ZS: Get user's collections if they exist user_id = None user_id = get_user_id("email_address", user_email) return user_id @@ -230,7 +240,7 @@ class UserSession: def user_collections(self): """List of user's collections""" - #ZS: Get user's collections if they exist + # ZS: Get user's collections if they exist collections = get_user_collections(self.redis_user_id) return collections @@ -248,7 +258,7 @@ class UserSession: 'created_timestamp': datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), 'changed_timestamp': datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), 'num_members': len(traits), - 'members': list(traits) } + 'members': list(traits)} current_collections = self.user_collections current_collections.append(collection_dict) @@ -280,7 +290,8 @@ class UserSession: updated_collection['members'] = updated_traits updated_collection['num_members'] = len(updated_traits) - updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime( + '%b %d %Y %I:%M%p') updated_collections = [] for collection in self.user_collections: @@ -306,7 +317,8 @@ class UserSession: updated_collection['members'] = updated_traits updated_collection['num_members'] = len(updated_traits) - updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime( + '%b %d %Y %I:%M%p') updated_collections = [] for collection in self.user_collections: @@ -341,23 +353,29 @@ class UserSession: Redis.delete(self.cookie_name) logger.debug("At end of delete_session") + @app.before_request def get_cookie(): logger.info("@app.before_request get cookie") g.user_session = UserSession() g.cookie_session = AnonUser() -#@app.after_request +# @app.after_request + + def set_cookie(response): if not request.cookies.get(g.cookie_session.cookie_name): - response.set_cookie(g.cookie_session.cookie_name, g.cookie_session.cookie) + response.set_cookie(g.cookie_session.cookie_name, + g.cookie_session.cookie) return response + class UsersManager: def __init__(self): self.users = model.User.query.all() logger.debug("Users are:", self.users) + class UserManager: def __init__(self, kw): self.user_id = kw['user_id'] @@ -383,22 +401,28 @@ class RegisterUser: self.errors = [] self.user = Bunch() - self.user.email_address = kw.get('email_address', '').encode("utf-8").strip() + self.user.email_address = kw.get( + 'email_address', '').encode("utf-8").strip() if not (5 <= len(self.user.email_address) <= 50): - self.errors.append('Email Address needs to be between 5 and 50 characters.') + self.errors.append( + 'Email Address needs to be between 5 and 50 characters.') else: - email_exists = get_user_by_unique_column("email_address", self.user.email_address) + email_exists = get_user_by_unique_column( + "email_address", self.user.email_address) #email_exists = get_user_by_unique_column(es, "email_address", self.user.email_address) if email_exists: self.errors.append('User already exists with that email') self.user.full_name = kw.get('full_name', '').encode("utf-8").strip() if not (5 <= len(self.user.full_name) <= 50): - self.errors.append('Full Name needs to be between 5 and 50 characters.') + self.errors.append( + 'Full Name needs to be between 5 and 50 characters.') - self.user.organization = kw.get('organization', '').encode("utf-8").strip() + self.user.organization = kw.get( + 'organization', '').encode("utf-8").strip() if self.user.organization and not (5 <= len(self.user.organization) <= 50): - self.errors.append('Organization needs to be empty or between 5 and 50 characters.') + self.errors.append( + 'Organization needs to be empty or between 5 and 50 characters.') password = str(kw.get('password', '')) if not (6 <= len(password)): @@ -419,6 +443,7 @@ class RegisterUser: self.user.registration_info = json.dumps(basic_info(), sort_keys=True) save_user(self.user.__dict__, self.user.user_id) + def set_password(password, user): pwfields = Bunch() @@ -450,12 +475,12 @@ def set_password(password, user): pwfields.encrypt_time = enc_password.encrypt_time user.password = json.dumps(pwfields.__dict__, - sort_keys=True, - ) + sort_keys=True, + ) class VerificationEmail: - template_name = "email/verification.txt" + template_name = "email/verification.txt" key_prefix = "verification_code" subject = "GeneNetwork email verification" @@ -473,9 +498,10 @@ class VerificationEmail: to = user.email_address subject = self.subject body = render_template(self.template_name, - verification_code = verification_code) + verification_code=verification_code) send_email(to, subject, body) + class ForgotPasswordEmail(VerificationEmail): template_name = "email/forgot_password.txt" key_prefix = "forgot_password_code" @@ -496,11 +522,10 @@ class ForgotPasswordEmail(VerificationEmail): save_verification_code(toaddr, verification_code) - subject = self.subject body = render_template( self.template_name, - verification_code = verification_code) + verification_code=verification_code) msg = MIMEMultipart() msg["To"] = toaddr @@ -525,11 +550,13 @@ class Password: def basic_info(): - return dict(timestamp = timestamp(), - ip_address = request.remote_addr, - user_agent = request.headers.get('User-Agent')) + return dict(timestamp=timestamp(), + ip_address=request.remote_addr, + user_agent=request.headers.get('User-Agent')) + +# @app.route("/manage/verify_email") + -#@app.route("/manage/verify_email") def verify_email(): user = DecodeUser(VerificationEmail.key_prefix).user user.confirmed = json.dumps(basic_info(), sort_keys=True) @@ -543,7 +570,9 @@ def verify_email(): response.set_cookie(UserSession.cookie_name, session_id_signed) return response -#@app.route("/n/password_reset", methods=['GET']) +# @app.route("/n/password_reset", methods=['GET']) + + def password_reset(): """Entry point after user clicks link in E-mail""" logger.debug("in password_reset request.url is:", request.url) @@ -556,18 +585,22 @@ def password_reset(): if verification_code: user_email = check_verification_code(verification_code) if user_email: - user_details = get_user_by_unique_column('email_address', user_email) + user_details = get_user_by_unique_column( + 'email_address', user_email) if user_details: return render_template( "new_security/password_reset.html", user_encode=user_details["user_id"]) else: flash("Invalid code: User no longer exists!", "error") else: - flash("Invalid code: Password reset code does not exist or might have expired!", "error") + flash( + "Invalid code: Password reset code does not exist or might have expired!", "error") else: return redirect(url_for("login")) -#@app.route("/n/password_reset_step2", methods=('POST',)) +# @app.route("/n/password_reset_step2", methods=('POST',)) + + def password_reset_step2(): """Handle confirmation E-mail for password reset""" logger.debug("in password_reset request.url is:", request.url) @@ -577,7 +610,6 @@ def password_reset_step2(): logger.debug("locals are:", locals()) - user = Bunch() password = request.form['password'] set_password(password, user) @@ -589,6 +621,7 @@ def password_reset_step2(): return response + class DecodeUser: def __init__(self, code_prefix): @@ -611,7 +644,9 @@ class DecodeUser: logger.debug("data is:", data) return model.User.query.get(data['id']) -#@app.route("/n/login", methods=('GET', 'POST')) +# @app.route("/n/login", methods=('GET', 'POST')) + + def login(): lu = LoginUser() login_type = request.args.get("type") @@ -621,7 +656,9 @@ def login(): else: return lu.standard_login() -#@app.route("/n/login/github_oauth2", methods=('GET', 'POST')) +# @app.route("/n/login/github_oauth2", methods=('GET', 'POST')) + + def github_oauth2(): from utility.tools import GITHUB_CLIENT_ID, GITHUB_CLIENT_SECRET code = request.args.get("code") @@ -630,29 +667,26 @@ def github_oauth2(): "client_secret": GITHUB_CLIENT_SECRET, "code": code } - result = requests.post("https://github.com/login/oauth/access_token", json=data) - result_dict = {arr[0]:arr[1] for arr in [tok.split("=") for tok in [token.encode("utf-8") for token in result.text.split("&")]]} + result = requests.post( + "https://github.com/login/oauth/access_token", json=data) + result_dict = {arr[0]: arr[1] for arr in [tok.split( + "=") for tok in [token.encode("utf-8") for token in result.text.split("&")]]} github_user = get_github_user_details(result_dict["access_token"]) user_details = get_user_by_unique_column("github_id", github_user["id"]) if user_details == None: user_details = { - "user_id": str(uuid.uuid4()) - , "name": github_user["name"].encode("utf-8") - , "github_id": github_user["id"] - , "user_url": github_user["html_url"].encode("utf-8") - , "login_type": "github" - , "organization": "" - , "active": 1 - , "confirmed": 1 + "user_id": str(uuid.uuid4()), "name": github_user["name"].encode("utf-8"), "github_id": github_user["id"], "user_url": github_user["html_url"].encode("utf-8"), "login_type": "github", "organization": "", "active": 1, "confirmed": 1 } save_user(user_details, user_details["user_id"]) - url = "/n/login?type=github&uid="+user_details["user_id"] + url = "/n/login?type=github&uid=" + user_details["user_id"] return redirect(url) -#@app.route("/n/login/orcid_oauth2", methods=('GET', 'POST')) +# @app.route("/n/login/orcid_oauth2", methods=('GET', 'POST')) + + def orcid_oauth2(): from uuid import uuid4 from utility.tools import ORCID_CLIENT_ID, ORCID_CLIENT_SECRET, ORCID_TOKEN_URL, ORCID_AUTH_URL @@ -661,10 +695,7 @@ def orcid_oauth2(): url = "/n/login" if code: data = { - "client_id": ORCID_CLIENT_ID - , "client_secret": ORCID_CLIENT_SECRET - , "grant_type": "authorization_code" - , "code": code + "client_id": ORCID_CLIENT_ID, "client_secret": ORCID_CLIENT_SECRET, "grant_type": "authorization_code", "code": code } result = requests.post(ORCID_TOKEN_URL, data=data) result_dict = json.loads(result.text.encode("utf-8")) @@ -672,31 +703,27 @@ def orcid_oauth2(): user_details = get_user_by_unique_column("orcid", result_dict["orcid"]) if user_details == None: user_details = { - "user_id": str(uuid4()) - , "name": result_dict["name"] - , "orcid": result_dict["orcid"] - , "user_url": "%s/%s" % ( + "user_id": str(uuid4()), "name": result_dict["name"], "orcid": result_dict["orcid"], "user_url": "%s/%s" % ( "/".join(ORCID_AUTH_URL.split("/")[:-2]), - result_dict["orcid"]) - , "login_type": "orcid" - , "organization": "" - , "active": 1 - , "confirmed": 1 + result_dict["orcid"]), "login_type": "orcid", "organization": "", "active": 1, "confirmed": 1 } save_user(user_details, user_details["user_id"]) - url = "/n/login?type=orcid&uid="+user_details["user_id"] + url = "/n/login?type=orcid&uid=" + user_details["user_id"] else: flash("There was an error getting code from ORCID") return redirect(url) + def get_github_user_details(access_token): from utility.tools import GITHUB_API_URL - result = requests.get(GITHUB_API_URL, params={"access_token":access_token}) + result = requests.get(GITHUB_API_URL, params={ + "access_token": access_token}) return result.json() + class LoginUser: - remember_time = 60 * 60 * 24 * 30 # One month in seconds + remember_time = 60 * 60 * 24 * 30 # One month in seconds def __init__(self): self.remember_me = False @@ -730,19 +757,18 @@ class LoginUser: external_login["orcid"] = ORCID_AUTH_URL return render_template( - "new_security/login_user.html" - , external_login=external_login - , redis_is_available = is_redis_available()) + "new_security/login_user.html", external_login=external_login, redis_is_available=is_redis_available()) else: - user_details = get_user_by_unique_column("email_address", params["email_address"]) + user_details = get_user_by_unique_column( + "email_address", params["email_address"]) #user_details = get_user_by_unique_column(es, "email_address", params["email_address"]) user = None valid = None if user_details: - user = model.User(); + user = model.User() for key in user_details: user.__dict__[key] = user_details[key] - valid = False; + valid = False submitted_password = params['password'] pwfields = Struct(json.loads(user.password)) @@ -752,8 +778,10 @@ class LoginUser: pwfields.iterations, pwfields.keylength, pwfields.hashfunc) - logger.debug("\n\nComparing:\n{}\n{}\n".format(encrypted.password, pwfields.password)) - valid = pbkdf2.safe_str_cmp(encrypted.password, pwfields.password) + logger.debug("\n\nComparing:\n{}\n{}\n".format( + encrypted.password, pwfields.password)) + valid = pbkdf2.safe_str_cmp( + encrypted.password, pwfields.password) logger.debug("valid is:", valid) if valid and not user.confirmed: @@ -770,7 +798,7 @@ class LoginUser: else: import_col = "false" - #g.cookie_session.import_traits_to_user() + # g.cookie_session.import_traits_to_user() self.logged_in = True @@ -779,7 +807,8 @@ class LoginUser: else: if user: self.unsuccessful_login(user) - flash("Invalid email-address or password. Please try again.", "alert-danger") + flash("Invalid email-address or password. Please try again.", + "alert-danger") response = make_response(redirect(url_for('login'))) return response @@ -787,14 +816,17 @@ class LoginUser: def actual_login(self, user, assumed_by=None, import_collections=None): """The meat of the logging in process""" session_id_signed = self.successful_login(user, assumed_by) - flash("Thank you for logging in {}.".format(user.full_name), "alert-success") - response = make_response(redirect(url_for('index_page', import_collections=import_collections))) + flash("Thank you for logging in {}.".format( + user.full_name), "alert-success") + response = make_response( + redirect(url_for('index_page', import_collections=import_collections))) if self.remember_me: max_age = self.remember_time else: max_age = None - response.set_cookie(UserSession.cookie_name, session_id_signed, max_age=max_age) + response.set_cookie(UserSession.cookie_name, + session_id_signed, max_age=max_age) return response def successful_login(self, user, assumed_by=None): @@ -810,10 +842,10 @@ class LoginUser: if not user.id: user.id = '' - session = dict(login_time = time.time(), - user_id = user.id, - user_name = user.full_name, - user_email_address = user.email_address) + session = dict(login_time=time.time(), + user_id=user.id, + user_name=user.full_name, + user_email_address=user.email_address) key = UserSession.cookie_name + ":" + login_rec.session_id logger.debug("Key when signing:", key) @@ -832,7 +864,9 @@ class LoginUser: db_session.add(login_rec) db_session.commit() -#@app.route("/n/logout") +# @app.route("/n/logout") + + def logout(): logger.debug("Logging out...") UserSession().delete_session() @@ -843,7 +877,7 @@ def logout(): return response -#@app.route("/n/forgot_password", methods=['GET']) +# @app.route("/n/forgot_password", methods=['GET']) def forgot_password(): """Entry point for forgotten password""" print("ARGS: ", request.args) @@ -851,7 +885,9 @@ def forgot_password(): print("ERRORS: ", errors) return render_template("new_security/forgot_password.html", errors=errors) -#@app.route("/n/forgot_password_submit", methods=('POST',)) +# @app.route("/n/forgot_password_submit", methods=('POST',)) + + def forgot_password_submit(): """When a forgotten password form is submitted we get here""" params = request.form @@ -859,23 +895,27 @@ def forgot_password_submit(): next_page = None if email_address != "": logger.debug("Wants to send password E-mail to ", email_address) - user_details = get_user_by_unique_column("email_address", email_address) + user_details = get_user_by_unique_column( + "email_address", email_address) if user_details: ForgotPasswordEmail(user_details["email_address"]) return render_template("new_security/forgot_password_step2.html", subject=ForgotPasswordEmail.subject) else: - flash("The e-mail entered is not associated with an account.", "alert-danger") + flash("The e-mail entered is not associated with an account.", + "alert-danger") return redirect(url_for("forgot_password")) else: flash("You MUST provide an email", "alert-danger") return redirect(url_for("forgot_password")) + @app.errorhandler(401) def unauthorized(error): return redirect(url_for('login')) + def is_redis_available(): try: Redis.ping() @@ -886,7 +926,7 @@ def is_redis_available(): ### # ZS: The following 6 functions require the old MySQL User accounts; I'm leaving them commented out just in case we decide to reimplement them using ElasticSearch ### -#def super_only(): +# def super_only(): # try: # superuser = g.user_session.user_ob.superuser # except AttributeError: @@ -895,26 +935,26 @@ def is_redis_available(): # flash("You must be a superuser to access that page.", "alert-error") # abort(401) -#@app.route("/manage/users") -#def manage_users(): +# @app.route("/manage/users") +# def manage_users(): # super_only() # template_vars = UsersManager() # return render_template("admin/user_manager.html", **template_vars.__dict__) -#@app.route("/manage/user") -#def manage_user(): +# @app.route("/manage/user") +# def manage_user(): # super_only() # template_vars = UserManager(request.args) # return render_template("admin/ind_user_manager.html", **template_vars.__dict__) -#@app.route("/manage/groups") -#def manage_groups(): +# @app.route("/manage/groups") +# def manage_groups(): # super_only() # template_vars = GroupsManager(request.args) # return render_template("admin/group_manager.html", **template_vars.__dict__) -#@app.route("/manage/make_superuser") -#def make_superuser(): +# @app.route("/manage/make_superuser") +# def make_superuser(): # super_only() # params = request.args # user_id = params['user_id'] @@ -926,8 +966,8 @@ def is_redis_available(): # flash("We've made {} a superuser!".format(user.name_and_org)) # return redirect(url_for("manage_users")) -#@app.route("/manage/assume_identity") -#def assume_identity(): +# @app.route("/manage/assume_identity") +# def assume_identity(): # super_only() # params = request.args # user_id = params['user_id'] @@ -936,12 +976,11 @@ def is_redis_available(): # return LoginUser().actual_login(user, assumed_by=assumed_by) -#@app.route("/n/register", methods=('GET', 'POST')) +# @app.route("/n/register", methods=('GET', 'POST')) def register(): params = None errors = None - params = request.form if request.form else request.args params = params.to_dict(flat=True) @@ -951,7 +990,8 @@ def register(): errors = result.errors if len(errors) == 0: - flash("Registration successful. You may login with your new account", "alert-info") + flash( + "Registration successful. You may login with your new account", "alert-info") return redirect(url_for("login")) return render_template("new_security/register_user.html", values=params, errors=errors) @@ -971,6 +1011,7 @@ def url_for_hmac(endpoint, **values): combiner = "?" return url + combiner + "hm=" + hm + def data_hmac(stringy): """Takes arbitray data string and appends :hmac so we know data hasn't been tampered with""" return stringy + ":" + actual_hmac_creation(stringy) @@ -993,6 +1034,7 @@ def verify_url_hmac(url): assert hm == hmac, "Unexpected url (stage 3)" + def actual_hmac_creation(stringy): """Helper function to create the actual hmac""" @@ -1005,6 +1047,7 @@ def actual_hmac_creation(stringy): hm = hm[:20] return hm + app.jinja_env.globals.update(url_for_hmac=url_for_hmac, data_hmac=data_hmac) @@ -1017,27 +1060,29 @@ app.jinja_env.globals.update(url_for_hmac=url_for_hmac, # Body=body)) # Redis.rpush("mail_queue", msg) + def send_email(toaddr, msg, fromaddr="no-reply@genenetwork.org"): """Send an E-mail through SMTP_CONNECT host. If SMTP_USERNAME is not 'UNKNOWN' TLS is used """ if SMTP_USERNAME == 'UNKNOWN': - logger.debug("SMTP: connecting with host "+SMTP_CONNECT) + logger.debug("SMTP: connecting with host " + SMTP_CONNECT) server = SMTP(SMTP_CONNECT) server.sendmail(fromaddr, toaddr, msg) else: - logger.debug("SMTP: connecting TLS with host "+SMTP_CONNECT) + logger.debug("SMTP: connecting TLS with host " + SMTP_CONNECT) server = SMTP(SMTP_CONNECT) server.starttls() - logger.debug("SMTP: login with user "+SMTP_USERNAME) + logger.debug("SMTP: login with user " + SMTP_USERNAME) server.login(SMTP_USERNAME, SMTP_PASSWORD) - logger.debug("SMTP: "+fromaddr) - logger.debug("SMTP: "+toaddr) - logger.debug("SMTP: "+msg) + logger.debug("SMTP: " + fromaddr) + logger.debug("SMTP: " + toaddr) + logger.debug("SMTP: " + msg) server.sendmail(fromaddr, toaddr, msg) server.quit() - logger.info("Successfully sent email to "+toaddr) + logger.info("Successfully sent email to " + toaddr) + class GroupsManager: def __init__(self, kw): diff --git a/wqflask/wqflask/user_session.py b/wqflask/wqflask/user_session.py index cc0ac744..67e2e158 100644 --- a/wqflask/wqflask/user_session.py +++ b/wqflask/wqflask/user_session.py @@ -20,30 +20,36 @@ logger = getLogger(__name__) THREE_DAYS = 60 * 60 * 24 * 3 THIRTY_DAYS = 60 * 60 * 24 * 30 + @app.before_request def get_user_session(): logger.info("@app.before_request get_session") g.user_session = UserSession() - #ZS: I think this should solve the issue of deleting the cookie and redirecting to the home page when a user's session has expired + # ZS: I think this should solve the issue of deleting the cookie and redirecting to the home page when a user's session has expired if not g.user_session: response = make_response(redirect(url_for('login'))) response.set_cookie('session_id_v2', '', expires=0) return response + @app.after_request def set_user_session(response): if hasattr(g, 'user_session'): if not request.cookies.get(g.user_session.cookie_name): - response.set_cookie(g.user_session.cookie_name, g.user_session.cookie) + response.set_cookie(g.user_session.cookie_name, + g.user_session.cookie) return response + def verify_cookie(cookie): the_uuid, separator, the_signature = cookie.partition(':') assert len(the_uuid) == 36, "Is session_id a uuid?" assert separator == ":", "Expected a : here" - assert the_signature == hmac.hmac_creation(the_uuid), "Uh-oh, someone tampering with the cookie?" + assert the_signature == hmac.hmac_creation( + the_uuid), "Uh-oh, someone tampering with the cookie?" return the_uuid + def create_signed_cookie(): the_uuid = str(uuid.uuid4()) signature = hmac.hmac_creation(the_uuid) @@ -51,17 +57,21 @@ def create_signed_cookie(): logger.debug("uuid_signed:", uuid_signed) return the_uuid, uuid_signed -@app.route("/user/manage", methods=('GET','POST')) + +@app.route("/user/manage", methods=('GET', 'POST')) def manage_user(): params = request.form if request.form else request.args if 'new_full_name' in params: - set_user_attribute(g.user_session.user_id, 'full_name', params['new_full_name']) + set_user_attribute(g.user_session.user_id, + 'full_name', params['new_full_name']) if 'new_organization' in params: - set_user_attribute(g.user_session.user_id, 'organization', params['new_organization']) + set_user_attribute(g.user_session.user_id, + 'organization', params['new_organization']) user_details = get_user_by_unique_column("user_id", g.user_session.user_id) - return render_template("admin/manage_user.html", user_details = user_details) + return render_template("admin/manage_user.html", user_details=user_details) + class UserSession: """Logged in user handling""" @@ -89,25 +99,26 @@ class UserSession: self.session_id = session_id self.record = Redis.hgetall(self.redis_key) - #ZS: If user correctled logged in but their session expired - #ZS: Need to test this by setting the time-out to be really short or something + # ZS: If user correctled logged in but their session expired + # ZS: Need to test this by setting the time-out to be really short or something if not self.record or self.record == []: if user_cookie: self.logged_in = False - self.record = dict(login_time = time.time(), - user_type = "anon", - user_id = str(uuid.uuid4())) + self.record = dict(login_time=time.time(), + user_type="anon", + user_id=str(uuid.uuid4())) Redis.hmset(self.redis_key, self.record) Redis.expire(self.redis_key, THIRTY_DAYS) - ########### Grrr...this won't work because of the way flask handles cookies + # Grrr...this won't work because of the way flask handles cookies # Delete the cookie - flash("Due to inactivity your session has expired. If you'd like please login again.") + flash( + "Due to inactivity your session has expired. If you'd like please login again.") return None else: - self.record = dict(login_time = time.time(), - user_type = "anon", - user_id = str(uuid.uuid4())) + self.record = dict(login_time=time.time(), + user_type="anon", + user_id=str(uuid.uuid4())) Redis.hmset(self.redis_key, self.record) Redis.expire(self.redis_key, THIRTY_DAYS) else: @@ -138,13 +149,13 @@ class UserSession: def redis_user_id(self): """User id from Redis (need to check if this is the same as the id stored in self.records)""" - #ZS: This part is a bit weird. Some accounts used to not have saved user ids, and in the process of testing I think I created some duplicate accounts for myself. - #ZS: Accounts should automatically generate user_ids if they don't already have one now, so this might not be necessary for anything other than my account's collections + # ZS: This part is a bit weird. Some accounts used to not have saved user ids, and in the process of testing I think I created some duplicate accounts for myself. + # ZS: Accounts should automatically generate user_ids if they don't already have one now, so this might not be necessary for anything other than my account's collections if 'user_email_address' in self.record: user_email = self.record['user_email_address'] - #ZS: Get user's collections if they exist + # ZS: Get user's collections if they exist user_id = None user_id = get_user_id("email_address", user_email) elif 'user_id' in self.record: @@ -153,7 +164,7 @@ class UserSession: user_github_id = self.record['github_id'] user_id = None user_id = get_user_id("github_id", user_github_id) - else: #ZS: Anonymous user + else: # ZS: Anonymous user return None return user_id @@ -170,9 +181,11 @@ class UserSession: def user_collections(self): """List of user's collections""" - #ZS: Get user's collections if they exist + # ZS: Get user's collections if they exist collections = get_user_collections(self.user_id) - collections = [item for item in collections if item['name'] != "Your Default Collection"] + [item for item in collections if item['name'] == "Your Default Collection"] #ZS: Ensure Default Collection is last in list + collections = [item for item in collections if item['name'] != "Your Default Collection"] + \ + [item for item in collections if item['name'] + == "Your Default Collection"] # ZS: Ensure Default Collection is last in list return collections @property @@ -189,7 +202,7 @@ class UserSession: 'created_timestamp': datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), 'changed_timestamp': datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), 'num_members': len(traits), - 'members': list(traits) } + 'members': list(traits)} current_collections = self.user_collections current_collections.append(collection_dict) @@ -228,12 +241,14 @@ class UserSession: this_collection = self.get_collection_by_id(collection_id) updated_collection = this_collection - current_members_minus_new = [member for member in this_collection['members'] if member not in traits_to_add] + current_members_minus_new = [ + member for member in this_collection['members'] if member not in traits_to_add] updated_traits = traits_to_add + current_members_minus_new updated_collection['members'] = updated_traits updated_collection['num_members'] = len(updated_traits) - updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime( + '%b %d %Y %I:%M%p') updated_collections = [] for collection in self.user_collections: @@ -259,7 +274,8 @@ class UserSession: updated_collection['members'] = updated_traits updated_collection['num_members'] = len(updated_traits) - updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime( + '%b %d %Y %I:%M%p') updated_collections = [] for collection in self.user_collections: @@ -302,5 +318,3 @@ class UserSession: # And more importantly delete the redis record Redis.delete(self.redis_key) self.logged_in = False - - diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 37f2094f..f9b8f310 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -140,8 +140,8 @@ def handle_bad_request(e): logger.error(traceback.format_exc()) now = datetime.datetime.utcnow() time_str = now.strftime('%l:%M%p UTC %b %d, %Y') - formatted_lines = [request.url + - " ("+time_str+")"]+traceback.format_exc().splitlines() + formatted_lines = [request.url + + " (" + time_str + ")"] + traceback.format_exc().splitlines() # Handle random animations # Use a cookie to have one animation on refresh @@ -240,7 +240,7 @@ def search_page(): if USE_REDIS and valid_search: Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL)) - Redis.expire(key, 60*60) + Redis.expire(key, 60 * 60) if valid_search: return render_template("search_result_page.html", **result) @@ -518,10 +518,10 @@ def export_perm_data(): ["#N_genotypes: " + str(perm_info['n_genotypes'])], ["#Genotype_file: " + perm_info['genofile']], ["#Units_linkage: " + perm_info['units_linkage']], - ["#Permutation_stratified_by: " + - ", ".join([str(cofactor) for cofactor in perm_info['strat_cofactors']])], - ["#RESULTS_1: Suggestive LRS(p=0.63) = " + - str(np.percentile(np.array(perm_info['perm_data']), 67))], + ["#Permutation_stratified_by: " + + ", ".join([str(cofactor) for cofactor in perm_info['strat_cofactors']])], + ["#RESULTS_1: Suggestive LRS(p=0.63) = " + + str(np.percentile(np.array(perm_info['perm_data']), 67))], ["#RESULTS_2: Significant LRS(p=0.05) = " + str( np.percentile(np.array(perm_info['perm_data']), 95))], ["#RESULTS_3: Highly Significant LRS(p=0.01) = " + str( @@ -601,7 +601,7 @@ def heatmap_page(): pickled_result = pickle.dumps(result, pickle.HIGHEST_PROTOCOL) logger.info("pickled result length:", len(pickled_result)) Redis.set(key, pickled_result) - Redis.expire(key, 60*60) + Redis.expire(key, 60 * 60) with Bench("Rendering template"): rendered_template = render_template("heatmap.html", **result) diff --git a/wqflask/wqflask/wgcna/wgcna_analysis.py b/wqflask/wqflask/wgcna/wgcna_analysis.py index 21516b30..f96892a0 100644 --- a/wqflask/wqflask/wgcna/wgcna_analysis.py +++ b/wqflask/wqflask/wgcna/wgcna_analysis.py @@ -70,7 +70,7 @@ class WGCNA: self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] print(("Retrieved phenotype data from database", - requestform['trait_list'])) + requestform['trait_list'])) helper_functions.get_trait_db_obs(self, self.trait_db_list) # self.input contains the phenotype values we need to send to R |