From f376eaca55643972943fc6d313a3ca00b32d66ae Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 18 Sep 2020 14:40:41 -0500 Subject: Made a bunch of changes to move trait page DataTables initialization to a separate file (initialize_show_trait_tables.js). The biggest complication was getting the order of attributes in the rows to sync with the order of atribute namees in the column headers. Previously this logic was all in the template. * wqflask/base/webqtlCaseData.py - added attribute first_attr_col as a very awkward solution to passing the column position into the column render function in situations where there are case attribute columns (which can be variable in number) * wqflask/wqflask/show_trait/show_trait.py - Replace "attribute_names" in js_data with "attributes" (which allows the JS access to more information) and also pass new se_exists and has_num_cases variables with js_data, so the javascript has access to whether or not those columns exist in the table * wqflask/wqflask/static/new/javascript/show_trait.js - Change case attribute-related logic to use js_data.attributes instead of js_data.attribute_names * wqflask/wqflask/templates/show_trait.html - Removed table initialization from template * wqflask/wqflask/static/new/javascript/initialize_show_trait_tables.js - new file that initializes tables and reproduces what the template logic used to do with JS logic --- wqflask/base/webqtlCaseData.py | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'wqflask/base') diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py index 2844cedd..3cf2d80d 100644 --- a/wqflask/base/webqtlCaseData.py +++ b/wqflask/base/webqtlCaseData.py @@ -41,6 +41,8 @@ class webqtlCaseData: self.this_id = None # Set a sane default (can't be just "id" cause that's a reserved word) self.outlier = None # Not set to True/False until later + self.first_attr_col = self.get_first_attr_col() + def __repr__(self): case_data_string = " " if self.value is not None: @@ -79,3 +81,12 @@ class webqtlCaseData: if self.num_cases is not None: return "%s" % self.num_cases return "x" + + def get_first_attr_col(self): + col_num = 4 + if self.variance is not None: + col_num += 2 + if self.num_cases is not None: + col_num += 1 + + return col_num \ No newline at end of file -- cgit v1.2.3 From 217f527e873d5197c7efcaec627e1df5afadefa4 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 8 Oct 2020 15:32:18 -0500 Subject: Fixed issue where new phenotype groups wouldn't be saved to the self.datasets property because the group name was used as the key (instead of the group name + "Publish", which is the full dataset name for phenotypes) * wqflask/base/data_set.py - Set "group_name" as a separate variable from "name" to avoid it being used as the key in self.datasets --- wqflask/base/data_set.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index e0ef559c..aeafc027 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -150,10 +150,11 @@ Publish or ProbeSet. E.g. "geno": "Geno", } + group_name = name if t in ['pheno', 'other_pheno']: - name = name.replace("Publish", "") + group_name = name.replace("Publish", "") - if bool(len(g.db.execute(sql_query_mapping[t].format(name)).fetchone())): + if bool(len(g.db.execute(sql_query_mapping[t].format(group_name)).fetchone())): self.datasets[name] = dataset_name_mapping[t] self.redis_instance.set("dataset_structure", json.dumps(self.datasets)) return True -- cgit v1.2.3 From 3d444c29ab975e313534dd7c57747b3e79f06c26 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 8 Oct 2020 15:40:18 -0500 Subject: Fixed remaining issue that applies to all dataset types * wqflask/base/data_set.py - Fixed issue where there was an error when trying to take the len of the query results when there were no results --- wqflask/base/data_set.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index aeafc027..2f1549ae 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -154,7 +154,8 @@ Publish or ProbeSet. E.g. if t in ['pheno', 'other_pheno']: group_name = name.replace("Publish", "") - if bool(len(g.db.execute(sql_query_mapping[t].format(group_name)).fetchone())): + results = g.db.execute(sql_query_mapping[t].format(group_name)).fetchone() + if results: self.datasets[name] = dataset_name_mapping[t] self.redis_instance.set("dataset_structure", json.dumps(self.datasets)) return True -- cgit v1.2.3 From eae4f1e6fad57a77063a637f0b03c171b6437565 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 29 Sep 2020 14:57:25 +0300 Subject: Apply PEP 8 to buffer * wqflask/wqflask/wgcna/wgcna_analysis.py: Apply pep8 * wqflask/wqflask/collect.py: Ditto * wqflask/base/trait.py: Ditto --- wqflask/base/trait.py | 194 +++++++++++++++++++------------- wqflask/wqflask/collect.py | 37 +++--- wqflask/wqflask/wgcna/wgcna_analysis.py | 181 +++++++++++++++++------------ 3 files changed, 238 insertions(+), 174 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 8f8b5b70..7ebbc4bb 100644 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -1,4 +1,10 @@ from __future__ import absolute_import, division, print_function +from utility.logger import getLogger +from flask import Flask, g, request, url_for, redirect, make_response, render_template +from pprint import pformat as pf +from MySQLdb import escape_string as escape +import simplejson as json +from wqflask import app import os import string @@ -16,22 +22,19 @@ from utility import webqtlUtil from utility import hmac from utility.authentication_tools import check_resource_availability from utility.tools import GN2_BASE_URL, GN_VERSION -from utility.redis_tools import get_redis_conn, get_resource_id, get_resource_info -Redis = get_redis_conn() +from utility.redis_tools import get_redis_conn +from utility.redis_tools import get_resource_id +from utility.redis_tools import get_resource_info -from wqflask import app +Redis = get_redis_conn() -import simplejson as json -from MySQLdb import escape_string as escape -from pprint import pformat as pf -from flask import Flask, g, request, url_for, redirect, make_response, render_template +logger = getLogger(__name__) -from utility.logger import getLogger -logger = getLogger(__name__ ) def create_trait(**kw): - assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name"; + assert bool(kw.get('dataset')) != bool( + kw.get('dataset_name')), "Needs dataset ob. or name" permitted = True if kw.get('name'): @@ -43,18 +46,21 @@ def create_trait(**kw): if kw.get('dataset_name') != "Temp": if dataset.type == 'Publish': - permissions = check_resource_availability(dataset, kw.get('name')) + permissions = check_resource_availability( + dataset, kw.get('name')) else: permissions = check_resource_availability(dataset) if "view" in permissions['data']: the_trait = GeneralTrait(**kw) if the_trait.dataset.type != "Temp": - the_trait = retrieve_trait_info(the_trait, the_trait.dataset, get_qtl_info=kw.get('get_qtl_info')) + the_trait = retrieve_trait_info( + the_trait, the_trait.dataset, get_qtl_info=kw.get('get_qtl_info')) return the_trait else: return None + class GeneralTrait(object): """ Trait class defines a trait in webqtl, can be either Microarray, @@ -64,12 +70,15 @@ class GeneralTrait(object): def __init__(self, get_qtl_info=False, get_sample_info=True, **kw): # xor assertion - assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name"; - self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. + assert bool(kw.get('dataset')) != bool( + kw.get('dataset_name')), "Needs dataset ob. or name" + # Trait ID, ProbeSet ID, Published ID, etc. + self.name = kw.get('name') if kw.get('dataset_name'): if kw.get('dataset_name') == "Temp": temp_group = self.name.split("_")[2] - self.dataset = create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = temp_group) + self.dataset = create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=temp_group) else: self.dataset = create_dataset(kw.get('dataset_name')) else: @@ -77,7 +86,8 @@ class GeneralTrait(object): self.cellid = kw.get('cellid') self.identification = kw.get('identification', 'un-named trait') self.haveinfo = kw.get('haveinfo', False) - self.sequence = kw.get('sequence') # Blat sequence, available for ProbeSet + # Blat sequence, available for ProbeSet + self.sequence = kw.get('sequence') self.data = kw.get('data', {}) self.view = True @@ -125,11 +135,11 @@ class GeneralTrait(object): vals.append(sample_data.value) the_vars.append(sample_data.variance) sample_aliases.append(sample_data.name2) - return samples, vals, the_vars, sample_aliases + return samples, vals, the_vars, sample_aliases @property def description_fmt(self): - '''Return a text formated description''' + """Return a text formated description""" if self.dataset.type == 'ProbeSet': if self.description: formatted = self.description @@ -149,7 +159,7 @@ class GeneralTrait(object): @property def alias_fmt(self): - '''Return a text formatted alias''' + """Return a text formatted alias""" alias = 'Not available' if getattr(self, "alias", None): @@ -160,16 +170,20 @@ class GeneralTrait(object): @property def wikidata_alias_fmt(self): - '''Return a text formatted alias''' + """Return a text formatted alias""" alias = 'Not available' if self.symbol: - human_response = requests.get(GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.upper()) - mouse_response = requests.get(GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.capitalize()) - other_response = requests.get(GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.lower()) + human_response = requests.get( + GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.upper()) + mouse_response = requests.get( + GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.capitalize()) + other_response = requests.get( + GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.lower()) if human_response and mouse_response and other_response: - alias_list = json.loads(human_response.content) + json.loads(mouse_response.content) + json.loads(other_response.content) + alias_list = json.loads(human_response.content) + json.loads( + mouse_response.content) + json.loads(other_response.content) filtered_aliases = [] seen = set() @@ -183,31 +197,31 @@ class GeneralTrait(object): return alias - @property def location_fmt(self): - '''Return a text formatted location + """Return a text formatted location While we're at it we set self.location in case we need it later (do we?) - ''' + """ if self.chr and self.mb: - self.location = 'Chr %s @ %s Mb' % (self.chr,self.mb) + self.location = 'Chr %s @ %s Mb' % (self.chr, self.mb) elif self.chr: self.location = 'Chr %s @ Unknown position' % (self.chr) else: self.location = 'Not available' fmt = self.location - ##XZ: deal with direction + # XZ: deal with direction if self.strand_probe == '+': fmt += (' on the plus strand ') elif self.strand_probe == '-': fmt += (' on the minus strand ') return fmt - + + def retrieve_sample_data(trait, dataset, samplelist=None): if samplelist == None: samplelist = [] @@ -225,16 +239,19 @@ def retrieve_sample_data(trait, dataset, samplelist=None): all_samples_ordered = dataset.group.all_samples_ordered() for i, item in enumerate(results): try: - trait.data[all_samples_ordered[i]] = webqtlCaseData(all_samples_ordered[i], float(item)) + trait.data[all_samples_ordered[i]] = webqtlCaseData( + all_samples_ordered[i], float(item)) except: pass else: for item in results: name, value, variance, num_cases, name2 = item if not samplelist or (samplelist and name in samplelist): - trait.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases) + # name, value, variance, num_cases) + trait.data[name] = webqtlCaseData(*item) return trait + @app.route("/trait/get_sample_data") def get_sample_data(): params = request.args @@ -250,7 +267,8 @@ def get_sample_data(): trait_dict['group'] = trait_ob.dataset.group.name trait_dict['tissue'] = trait_ob.dataset.tissue trait_dict['species'] = trait_ob.dataset.group.species - trait_dict['url'] = url_for('show_trait_page', trait_id = trait, dataset = dataset) + trait_dict['url'] = url_for( + 'show_trait_page', trait_id=trait, dataset=dataset) trait_dict['description'] = trait_ob.description_display if trait_ob.dataset.type == "ProbeSet": trait_dict['symbol'] = trait_ob.symbol @@ -260,22 +278,24 @@ def get_sample_data(): trait_dict['pubmed_link'] = trait_ob.pubmed_link trait_dict['pubmed_text'] = trait_ob.pubmed_text - return json.dumps([trait_dict, {key: value.value for key, value in trait_ob.data.iteritems() }]) + return json.dumps([trait_dict, {key: value.value for key, value in trait_ob.data.iteritems()}]) else: return None - + + def jsonable(trait): """Return a dict suitable for using as json Actual turning into json doesn't happen here though""" - dataset = create_dataset(dataset_name = trait.dataset.name, dataset_type = trait.dataset.type, group_name = trait.dataset.group.name) - + dataset = create_dataset(dataset_name=trait.dataset.name, + dataset_type=trait.dataset.type, group_name=trait.dataset.group.name) + if dataset.type == "ProbeSet": return dict(name=trait.name, symbol=trait.symbol, dataset=dataset.name, - dataset_name = dataset.shortname, + dataset_name=dataset.shortname, description=trait.description_display, mean=trait.mean, location=trait.location_repr, @@ -287,7 +307,7 @@ def jsonable(trait): if trait.pubmed_id: return dict(name=trait.name, dataset=dataset.name, - dataset_name = dataset.shortname, + dataset_name=dataset.shortname, description=trait.description_display, abbreviation=trait.abbreviation, authors=trait.authors, @@ -300,7 +320,7 @@ def jsonable(trait): else: return dict(name=trait.name, dataset=dataset.name, - dataset_name = dataset.shortname, + dataset_name=dataset.shortname, description=trait.description_display, abbreviation=trait.abbreviation, authors=trait.authors, @@ -312,19 +332,20 @@ def jsonable(trait): elif dataset.type == "Geno": return dict(name=trait.name, dataset=dataset.name, - dataset_name = dataset.shortname, + dataset_name=dataset.shortname, location=trait.location_repr ) else: return dict() + def jsonable_table_row(trait, dataset_name, index): """Return a list suitable for json and intended to be displayed in a table Actual turning into json doesn't happen here though""" dataset = create_dataset(dataset_name) - + if dataset.type == "ProbeSet": if trait.mean == "": mean = "N/A" @@ -336,11 +357,13 @@ def jsonable_table_row(trait, dataset_name, index): additive = "%.3f" % round(float(trait.additive), 2) return ['', index, - ''+str(trait.name)+'', + ''+str(trait.name)+'', trait.symbol, trait.description_display, trait.location_repr, - mean, + mean, trait.LRS_score_repr, trait.LRS_location_repr, additive] @@ -352,7 +375,9 @@ def jsonable_table_row(trait, dataset_name, index): if trait.pubmed_id: return ['', index, - ''+str(trait.name)+'', + ''+str(trait.name)+'', trait.description_display, trait.authors, '' + trait.pubmed_text + '', @@ -362,7 +387,9 @@ def jsonable_table_row(trait, dataset_name, index): else: return ['', index, - ''+str(trait.name)+'', + ''+str(trait.name)+'', trait.description_display, trait.authors, trait.pubmed_text, @@ -372,7 +399,9 @@ def jsonable_table_row(trait, dataset_name, index): elif dataset.type == "Geno": return ['', index, - ''+str(trait.name)+'', + ''+str(trait.name)+'', trait.location_repr] else: return dict() @@ -383,14 +412,16 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): resource_id = get_resource_id(dataset, trait.name) if dataset.type == 'Publish': - the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view".format(resource_id, g.user_session.user_id) + the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view".format( + resource_id, g.user_session.user_id) else: - the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view&trait={}".format(resource_id, g.user_session.user_id, trait.name) + the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view&trait={}".format( + resource_id, g.user_session.user_id, trait.name) try: response = requests.get(the_url).content trait_info = json.loads(response) - except: #ZS: I'm assuming the trait is viewable if the try fails for some reason; it should never reach this point unless the user has privileges, since that's dealt with in create_trait + except: # ZS: I'm assuming the trait is viewable if the try fails for some reason; it should never reach this point unless the user has privileges, since that's dealt with in create_trait if dataset.type == 'Publish': query = """ SELECT @@ -419,8 +450,8 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): logger.sql(query) trait_info = g.db.execute(query).fetchone() - #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name - #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. + # XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name + # XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. elif dataset.type == 'ProbeSet': display_fields_string = ', ProbeSet.'.join(dataset.display_fields) display_fields_string = 'ProbeSet.' + display_fields_string @@ -433,14 +464,15 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): ProbeSetFreeze.Name = '%s' AND ProbeSet.Name = '%s' """ % (escape(display_fields_string), - escape(dataset.name), - escape(str(trait.name))) + escape(dataset.name), + escape(str(trait.name))) logger.sql(query) trait_info = g.db.execute(query).fetchone() - #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name + # XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name # to avoid the problem of same marker name from different species. elif dataset.type == 'Geno': - display_fields_string = string.join(dataset.display_fields,',Geno.') + display_fields_string = string.join( + dataset.display_fields, ',Geno.') display_fields_string = 'Geno.' + display_fields_string query = """ SELECT %s @@ -451,21 +483,21 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): GenoFreeze.Name = '%s' AND Geno.Name = '%s' """ % (escape(display_fields_string), - escape(dataset.name), - escape(trait.name)) + escape(dataset.name), + escape(trait.name)) logger.sql(query) trait_info = g.db.execute(query).fetchone() - else: #Temp type + else: # Temp type query = """SELECT %s FROM %s WHERE Name = %s""" logger.sql(query) trait_info = g.db.execute(query, - (string.join(dataset.display_fields,','), - dataset.type, trait.name)).fetchone() + (string.join(dataset.display_fields, ','), + dataset.type, trait.name)).fetchone() if trait_info: trait.haveinfo = True for i, field in enumerate(dataset.display_fields): - holder = trait_info[i] + holder = trait_info[i] setattr(trait, field, holder) if dataset.type == 'Publish': @@ -478,9 +510,9 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): description = trait.post_publication_description - #If the dataset is confidential and the user has access to confidential - #phenotype traits, then display the pre-publication description instead - #of the post-publication description + # If the dataset is confidential and the user has access to confidential + # phenotype traits, then display the pre-publication description instead + # of the post-publication description if trait.confidential: trait.abbreviation = trait.pre_publication_abbreviation trait.description_display = trait.pre_publication_description @@ -491,9 +523,12 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): else: trait.description_display = "" - trait.abbreviation = unicode(str(trait.abbreviation).strip(codecs.BOM_UTF8), 'utf-8', errors="replace") - trait.description_display = unicode(str(trait.description_display).strip(codecs.BOM_UTF8), 'utf-8', errors="replace") - trait.authors = unicode(str(trait.authors).strip(codecs.BOM_UTF8), 'utf-8', errors="replace") + trait.abbreviation = unicode(str(trait.abbreviation).strip( + codecs.BOM_UTF8), 'utf-8', errors="replace") + trait.description_display = unicode(str(trait.description_display).strip( + codecs.BOM_UTF8), 'utf-8', errors="replace") + trait.authors = unicode(str(trait.authors).strip( + codecs.BOM_UTF8), 'utf-8', errors="replace") if not trait.year.isdigit(): trait.pubmed_text = "N/A" @@ -504,8 +539,10 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % trait.pubmed_id if dataset.type == 'ProbeSet' and dataset.group: - description_string = unicode(str(trait.description).strip(codecs.BOM_UTF8), 'utf-8') - target_string = unicode(str(trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8') + description_string = unicode( + str(trait.description).strip(codecs.BOM_UTF8), 'utf-8') + target_string = unicode( + str(trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8') if str(description_string or "") != "" and description_string != 'None': description_display = description_string @@ -514,7 +551,7 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): if (str(description_display or "") != "" and description_display != 'N/A' and - str(target_string or "") != "" and target_string != 'None'): + str(target_string or "") != "" and target_string != 'None'): description_display = description_display + '; ' + target_string.strip() # Save it for the jinja2 template @@ -522,15 +559,17 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): trait.location_repr = 'N/A' if trait.chr and trait.mb: - trait.location_repr = 'Chr%s: %.6f' % (trait.chr, float(trait.mb)) + trait.location_repr = 'Chr%s: %.6f' % ( + trait.chr, float(trait.mb)) elif dataset.type == "Geno": trait.location_repr = 'N/A' if trait.chr and trait.mb: - trait.location_repr = 'Chr%s: %.6f' % (trait.chr, float(trait.mb)) + trait.location_repr = 'Chr%s: %.6f' % ( + trait.chr, float(trait.mb)) if get_qtl_info: - #LRS and its location + # LRS and its location trait.LRS_score_repr = "N/A" trait.LRS_location_repr = "N/A" trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.additive = "" @@ -601,10 +640,11 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): else: trait.locus = trait.lrs = trait.additive = "" if (dataset.type == 'Publish' or dataset.type == "ProbeSet") and str(trait.locus_chr or "") != "" and str(trait.locus_mb or "") != "": - trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (trait.locus_chr, float(trait.locus_mb)) + trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % ( + trait.locus_chr, float(trait.locus_mb)) if str(trait.lrs or "") != "": trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs else: - raise KeyError, `trait.name`+' information is not found in the database.' - + raise KeyError, `trait.name`+ ' information is not found in the database.' + return trait diff --git a/wqflask/wqflask/collect.py b/wqflask/wqflask/collect.py index 42a09fed..4c558bfe 100644 --- a/wqflask/wqflask/collect.py +++ b/wqflask/wqflask/collect.py @@ -1,41 +1,30 @@ from __future__ import print_function, division, absolute_import - -import os -import hashlib import datetime -import time - -import uuid -import hashlib -import base64 - -import urlparse - import simplejson as json -from flask import (Flask, g, render_template, url_for, request, make_response, - redirect, flash, jsonify) +from flask import g +from flask import render_template +from flask import url_for +from flask import request +from flask import redirect +from flask import flash from wqflask import app - -from pprint import pformat as pf - -from wqflask.database import db_session - -from wqflask import model - -from utility import Bunch, Struct, hmac +from utility import hmac from utility.formatting import numify from utility.redis_tools import get_redis_conn -Redis = get_redis_conn() -from base.trait import create_trait, retrieve_trait_info, jsonable +from base.trait import create_trait +from base.trait import retrieve_trait_info +from base.trait import jsonable from base.data_set import create_dataset -import logging from utility.logger import getLogger + logger = getLogger(__name__) +Redis = get_redis_conn() + def process_traits(unprocessed_traits): if isinstance(unprocessed_traits, basestring): diff --git a/wqflask/wqflask/wgcna/wgcna_analysis.py b/wqflask/wqflask/wgcna/wgcna_analysis.py index 880a1cb2..70077703 100644 --- a/wqflask/wqflask/wgcna/wgcna_analysis.py +++ b/wqflask/wqflask/wgcna/wgcna_analysis.py @@ -1,5 +1,8 @@ -# WGCNA analysis for GN2 -# Author / Maintainer: Danny Arends +""" +WGCNA analysis for GN2 + +Author / Maintainer: Danny Arends +""" import sys from numpy import * import scipy as sp # SciPy @@ -17,106 +20,138 @@ from utility import helper_functions from rpy2.robjects.packages import importr utils = importr("utils") -## Get pointers to some common R functions -r_library = ro.r["library"] # Map the library function -r_options = ro.r["options"] # Map the options function -r_read_csv = ro.r["read.csv"] # Map the read.csv function -r_dim = ro.r["dim"] # Map the dim function -r_c = ro.r["c"] # Map the c function -r_cat = ro.r["cat"] # Map the cat function -r_paste = ro.r["paste"] # Map the paste function -r_unlist = ro.r["unlist"] # Map the unlist function -r_unique = ro.r["unique"] # Map the unique function -r_length = ro.r["length"] # Map the length function -r_unlist = ro.r["unlist"] # Map the unlist function -r_list = ro.r.list # Map the list function -r_matrix = ro.r.matrix # Map the matrix function -r_seq = ro.r["seq"] # Map the seq function -r_table = ro.r["table"] # Map the table function -r_names = ro.r["names"] # Map the names function -r_sink = ro.r["sink"] # Map the sink function -r_is_NA = ro.r["is.na"] # Map the is.na function -r_file = ro.r["file"] # Map the file function -r_png = ro.r["png"] # Map the png function for plotting -r_dev_off = ro.r["dev.off"] # Map the dev.off function +# Get pointers to some common R functions +r_library = ro.r["library"] # Map the library function +r_options = ro.r["options"] # Map the options function +r_read_csv = ro.r["read.csv"] # Map the read.csv function +r_dim = ro.r["dim"] # Map the dim function +r_c = ro.r["c"] # Map the c function +r_cat = ro.r["cat"] # Map the cat function +r_paste = ro.r["paste"] # Map the paste function +r_unlist = ro.r["unlist"] # Map the unlist function +r_unique = ro.r["unique"] # Map the unique function +r_length = ro.r["length"] # Map the length function +r_unlist = ro.r["unlist"] # Map the unlist function +r_list = ro.r.list # Map the list function +r_matrix = ro.r.matrix # Map the matrix function +r_seq = ro.r["seq"] # Map the seq function +r_table = ro.r["table"] # Map the table function +r_names = ro.r["names"] # Map the names function +r_sink = ro.r["sink"] # Map the sink function +r_is_NA = ro.r["is.na"] # Map the is.na function +r_file = ro.r["file"] # Map the file function +r_png = ro.r["png"] # Map the png function for plotting +r_dev_off = ro.r["dev.off"] # Map the dev.off function + class WGCNA(object): def __init__(self): + # To log output from stdout/stderr to a file add `r_sink(log)` print("Initialization of WGCNA") - #log = r_file("/tmp/genenetwork_wcgna.log", open = "wt") - #r_sink(log) # Uncomment the r_sink() commands to log output from stdout/stderr to a file - #r_sink(log, type = "message") - r_library("WGCNA") # Load WGCNA - Should only be done once, since it is quite expensive - r_options(stringsAsFactors = False) + + # Load WGCNA - Should only be done once, since it is quite expensive + r_library("WGCNA") + r_options(stringsAsFactors=False) print("Initialization of WGCNA done, package loaded in R session") - self.r_enableWGCNAThreads = ro.r["enableWGCNAThreads"] # Map the enableWGCNAThreads function - self.r_pickSoftThreshold = ro.r["pickSoftThreshold"] # Map the pickSoftThreshold function - self.r_blockwiseModules = ro.r["blockwiseModules"] # Map the blockwiseModules function - self.r_labels2colors = ro.r["labels2colors"] # Map the labels2colors function - self.r_plotDendroAndColors = ro.r["plotDendroAndColors"] # Map the plotDendroAndColors function + # Map the enableWGCNAThreads function + self.r_enableWGCNAThreads = ro.r["enableWGCNAThreads"] + # Map the pickSoftThreshold function + self.r_pickSoftThreshold = ro.r["pickSoftThreshold"] + # Map the blockwiseModules function + self.r_blockwiseModules = ro.r["blockwiseModules"] + # Map the labels2colors function + self.r_labels2colors = ro.r["labels2colors"] + # Map the plotDendroAndColors function + self.r_plotDendroAndColors = ro.r["plotDendroAndColors"] print("Obtained pointers to WGCNA functions") def run_analysis(self, requestform): print("Starting WGCNA analysis on dataset") - self.r_enableWGCNAThreads() # Enable multi threading - self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] - print("Retrieved phenotype data from database", requestform['trait_list']) + # Enable multi threading + self.r_enableWGCNAThreads() + self.trait_db_list = [trait.strip() + for trait in requestform['trait_list'].split(',')] + print("Retrieved phenotype data from database", + requestform['trait_list']) helper_functions.get_trait_db_obs(self, self.trait_db_list) - self.input = {} # self.input contains the phenotype values we need to send to R - strains = [] # All the strains we have data for (contains duplicates) - traits = [] # All the traits we have data for (should not contain duplicates) + # self.input contains the phenotype values we need to send to R + self.input = {} + # All the strains we have data for (contains duplicates) + strains = [] + # All the traits we have data for (should not contain duplicates) + traits = [] for trait in self.trait_list: traits.append(trait[0].name) self.input[trait[0].name] = {} for strain in trait[0].data: strains.append(strain) - self.input[trait[0].name][strain] = trait[0].data[strain].value + self.input[trait[0].name][strain] = trait[0].data[strain].value # Transfer the load data from python to R - uStrainsR = r_unique(ro.Vector(strains)) # Unique strains in R vector + # Unique strains in R vector + uStrainsR = r_unique(ro.Vector(strains)) uTraitsR = r_unique(ro.Vector(traits)) # Unique traits in R vector r_cat("The number of unique strains:", r_length(uStrainsR), "\n") r_cat("The number of unique traits:", r_length(uTraitsR), "\n") - # rM is the datamatrix holding all the data in R /rows = strains columns = traits - rM = ro.r.matrix(ri.NA_Real, nrow=r_length(uStrainsR), ncol=r_length(uTraitsR), dimnames = r_list(uStrainsR, uTraitsR)) + # rM is the datamatrix holding all the data in + # R /rows = strains columns = traits + rM = ro.r.matrix(ri.NA_Real, nrow=r_length(uStrainsR), ncol=r_length( + uTraitsR), dimnames=r_list(uStrainsR, uTraitsR)) for t in uTraitsR: - trait = t[0] # R uses vectors every single element is a vector + # R uses vectors every single element is a vector + trait = t[0] for s in uStrainsR: - strain = s[0] # R uses vectors every single element is a vector - #DEBUG: print(trait, strain, " in python: ", self.input[trait].get(strain), "in R:", rM.rx(strain,trait)[0]) - rM.rx[strain, trait] = self.input[trait].get(strain) # Update the matrix location + # R uses vectors every single element is a vector + strain = s[0] + rM.rx[strain, trait] = self.input[trait].get( + strain) # Update the matrix location sys.stdout.flush() self.results = {} - self.results['nphe'] = r_length(uTraitsR)[0] # Number of phenotypes/traits - self.results['nstr'] = r_length(uStrainsR)[0] # Number of strains + # Number of phenotypes/traits + self.results['nphe'] = r_length(uTraitsR)[0] + self.results['nstr'] = r_length( + uStrainsR)[0] # Number of strains self.results['phenotypes'] = uTraitsR # Traits used - self.results['strains'] = uStrainsR # Strains used in the analysis - self.results['requestform'] = requestform # Store the user specified parameters for the output page + # Strains used in the analysis + self.results['strains'] = uStrainsR + # Store the user specified parameters for the output page + self.results['requestform'] = requestform - # Calculate soft threshold if the user specified the SoftThreshold variable + # Calculate soft threshold if the user specified the + # SoftThreshold variable if requestform.get('SoftThresholds') is not None: - powers = [int(threshold.strip()) for threshold in requestform['SoftThresholds'].rstrip().split(",")] - rpow = r_unlist(r_c(powers)) - print "SoftThresholds: {} == {}".format(powers, rpow) - self.sft = self.r_pickSoftThreshold(rM, powerVector = rpow, verbose = 5) - - print "PowerEstimate: {}".format(self.sft[0]) - self.results['PowerEstimate'] = self.sft[0] - if self.sft[0][0] is ri.NA_Integer: - print "No power is suitable for the analysis, just use 1" - self.results['Power'] = 1 # No power could be estimated - else: - self.results['Power'] = self.sft[0][0] # Use the estimated power + powers = [int(threshold.strip()) + for threshold in requestform['SoftThresholds'].rstrip().split(",")] + rpow = r_unlist(r_c(powers)) + print("SoftThresholds: {} == {}".format(powers, rpow)) + self.sft = self.r_pickSoftThreshold( + rM, powerVector=rpow, verbose=5) + + print("PowerEstimate: {}".format(self.sft[0])) + self.results['PowerEstimate'] = self.sft[0] + if self.sft[0][0] is ri.NA_Integer: + print "No power is suitable for the analysis, just use 1" + # No power could be estimated + self.results['Power'] = 1 + else: + # Use the estimated power + self.results['Power'] = self.sft[0][0] else: - # The user clicked a button, so no soft threshold selection - self.results['Power'] = requestform.get('Power') # Use the power value the user gives + # The user clicked a button, so no soft threshold selection + # Use the power value the user gives + self.results['Power'] = requestform.get('Power') # Create the block wise modules using WGCNA - network = self.r_blockwiseModules(rM, power = self.results['Power'], TOMType = requestform['TOMtype'], minModuleSize = requestform['MinModuleSize'], verbose = 3) + network = self.r_blockwiseModules( + rM, + power=self.results['Power'], + TOMType=requestform['TOMtype'], + minModuleSize=requestform['MinModuleSize'], + verbose=3) # Save the network for the GUI self.results['network'] = network @@ -130,7 +165,9 @@ class WGCNA(object): self.results['imgloc'] = GENERATED_IMAGE_DIR + self.results['imgurl'] r_png(self.results['imgloc'], width=1000, height=600, type='cairo-png') mergedColors = self.r_labels2colors(network[1]) - self.r_plotDendroAndColors(network[5][0], mergedColors, "Module colors", dendroLabels = False, hang = 0.03, addGuide = True, guideHang = 0.05) + self.r_plotDendroAndColors(network[5][0], mergedColors, + "Module colors", dendroLabels=False, + hang=0.03, addGuide=True, guideHang=0.05) r_dev_off() sys.stdout.flush() @@ -146,11 +183,9 @@ class WGCNA(object): print("Processing WGCNA output") template_vars = {} template_vars["input"] = self.input - template_vars["powers"] = self.sft[1:] # Results from the soft threshold analysis + # Results from the soft threshold analysis + template_vars["powers"] = self.sft[1:] template_vars["results"] = self.results self.render_image(results) sys.stdout.flush() - #r_sink(type = "message") # This restores R output to the stdout/stderr - #r_sink() # We should end the Rpy session more or less return(dict(template_vars)) - -- cgit v1.2.3