diff options
Diffstat (limited to 'wqflask')
-rw-r--r-- | wqflask/base/data_set.py | 6 | ||||
-rw-r--r-- | wqflask/base/trait.py | 194 | ||||
-rw-r--r-- | wqflask/tests/wqflask/test_collect.py | 57 | ||||
-rw-r--r-- | wqflask/wqflask/collect.py | 37 | ||||
-rw-r--r-- | wqflask/wqflask/wgcna/wgcna_analysis.py | 181 |
5 files changed, 299 insertions, 176 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index e0ef559c..2f1549ae 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -150,10 +150,12 @@ Publish or ProbeSet. E.g. "geno": "Geno", } + group_name = name if t in ['pheno', 'other_pheno']: - name = name.replace("Publish", "") + group_name = name.replace("Publish", "") - if bool(len(g.db.execute(sql_query_mapping[t].format(name)).fetchone())): + results = g.db.execute(sql_query_mapping[t].format(group_name)).fetchone() + if results: self.datasets[name] = dataset_name_mapping[t] self.redis_instance.set("dataset_structure", json.dumps(self.datasets)) return True diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 8f8b5b70..7ebbc4bb 100644 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -1,4 +1,10 @@ from __future__ import absolute_import, division, print_function +from utility.logger import getLogger +from flask import Flask, g, request, url_for, redirect, make_response, render_template +from pprint import pformat as pf +from MySQLdb import escape_string as escape +import simplejson as json +from wqflask import app import os import string @@ -16,22 +22,19 @@ from utility import webqtlUtil from utility import hmac from utility.authentication_tools import check_resource_availability from utility.tools import GN2_BASE_URL, GN_VERSION -from utility.redis_tools import get_redis_conn, get_resource_id, get_resource_info -Redis = get_redis_conn() +from utility.redis_tools import get_redis_conn +from utility.redis_tools import get_resource_id +from utility.redis_tools import get_resource_info -from wqflask import app +Redis = get_redis_conn() -import simplejson as json -from MySQLdb import escape_string as escape -from pprint import pformat as pf -from flask import Flask, g, request, url_for, redirect, make_response, render_template +logger = getLogger(__name__) -from utility.logger import getLogger -logger = getLogger(__name__ ) def create_trait(**kw): - assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name"; + assert bool(kw.get('dataset')) != bool( + kw.get('dataset_name')), "Needs dataset ob. or name" permitted = True if kw.get('name'): @@ -43,18 +46,21 @@ def create_trait(**kw): if kw.get('dataset_name') != "Temp": if dataset.type == 'Publish': - permissions = check_resource_availability(dataset, kw.get('name')) + permissions = check_resource_availability( + dataset, kw.get('name')) else: permissions = check_resource_availability(dataset) if "view" in permissions['data']: the_trait = GeneralTrait(**kw) if the_trait.dataset.type != "Temp": - the_trait = retrieve_trait_info(the_trait, the_trait.dataset, get_qtl_info=kw.get('get_qtl_info')) + the_trait = retrieve_trait_info( + the_trait, the_trait.dataset, get_qtl_info=kw.get('get_qtl_info')) return the_trait else: return None + class GeneralTrait(object): """ Trait class defines a trait in webqtl, can be either Microarray, @@ -64,12 +70,15 @@ class GeneralTrait(object): def __init__(self, get_qtl_info=False, get_sample_info=True, **kw): # xor assertion - assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name"; - self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. + assert bool(kw.get('dataset')) != bool( + kw.get('dataset_name')), "Needs dataset ob. or name" + # Trait ID, ProbeSet ID, Published ID, etc. + self.name = kw.get('name') if kw.get('dataset_name'): if kw.get('dataset_name') == "Temp": temp_group = self.name.split("_")[2] - self.dataset = create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = temp_group) + self.dataset = create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=temp_group) else: self.dataset = create_dataset(kw.get('dataset_name')) else: @@ -77,7 +86,8 @@ class GeneralTrait(object): self.cellid = kw.get('cellid') self.identification = kw.get('identification', 'un-named trait') self.haveinfo = kw.get('haveinfo', False) - self.sequence = kw.get('sequence') # Blat sequence, available for ProbeSet + # Blat sequence, available for ProbeSet + self.sequence = kw.get('sequence') self.data = kw.get('data', {}) self.view = True @@ -125,11 +135,11 @@ class GeneralTrait(object): vals.append(sample_data.value) the_vars.append(sample_data.variance) sample_aliases.append(sample_data.name2) - return samples, vals, the_vars, sample_aliases + return samples, vals, the_vars, sample_aliases @property def description_fmt(self): - '''Return a text formated description''' + """Return a text formated description""" if self.dataset.type == 'ProbeSet': if self.description: formatted = self.description @@ -149,7 +159,7 @@ class GeneralTrait(object): @property def alias_fmt(self): - '''Return a text formatted alias''' + """Return a text formatted alias""" alias = 'Not available' if getattr(self, "alias", None): @@ -160,16 +170,20 @@ class GeneralTrait(object): @property def wikidata_alias_fmt(self): - '''Return a text formatted alias''' + """Return a text formatted alias""" alias = 'Not available' if self.symbol: - human_response = requests.get(GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.upper()) - mouse_response = requests.get(GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.capitalize()) - other_response = requests.get(GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.lower()) + human_response = requests.get( + GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.upper()) + mouse_response = requests.get( + GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.capitalize()) + other_response = requests.get( + GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.lower()) if human_response and mouse_response and other_response: - alias_list = json.loads(human_response.content) + json.loads(mouse_response.content) + json.loads(other_response.content) + alias_list = json.loads(human_response.content) + json.loads( + mouse_response.content) + json.loads(other_response.content) filtered_aliases = [] seen = set() @@ -183,31 +197,31 @@ class GeneralTrait(object): return alias - @property def location_fmt(self): - '''Return a text formatted location + """Return a text formatted location While we're at it we set self.location in case we need it later (do we?) - ''' + """ if self.chr and self.mb: - self.location = 'Chr %s @ %s Mb' % (self.chr,self.mb) + self.location = 'Chr %s @ %s Mb' % (self.chr, self.mb) elif self.chr: self.location = 'Chr %s @ Unknown position' % (self.chr) else: self.location = 'Not available' fmt = self.location - ##XZ: deal with direction + # XZ: deal with direction if self.strand_probe == '+': fmt += (' on the plus strand ') elif self.strand_probe == '-': fmt += (' on the minus strand ') return fmt - + + def retrieve_sample_data(trait, dataset, samplelist=None): if samplelist == None: samplelist = [] @@ -225,16 +239,19 @@ def retrieve_sample_data(trait, dataset, samplelist=None): all_samples_ordered = dataset.group.all_samples_ordered() for i, item in enumerate(results): try: - trait.data[all_samples_ordered[i]] = webqtlCaseData(all_samples_ordered[i], float(item)) + trait.data[all_samples_ordered[i]] = webqtlCaseData( + all_samples_ordered[i], float(item)) except: pass else: for item in results: name, value, variance, num_cases, name2 = item if not samplelist or (samplelist and name in samplelist): - trait.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases) + # name, value, variance, num_cases) + trait.data[name] = webqtlCaseData(*item) return trait + @app.route("/trait/get_sample_data") def get_sample_data(): params = request.args @@ -250,7 +267,8 @@ def get_sample_data(): trait_dict['group'] = trait_ob.dataset.group.name trait_dict['tissue'] = trait_ob.dataset.tissue trait_dict['species'] = trait_ob.dataset.group.species - trait_dict['url'] = url_for('show_trait_page', trait_id = trait, dataset = dataset) + trait_dict['url'] = url_for( + 'show_trait_page', trait_id=trait, dataset=dataset) trait_dict['description'] = trait_ob.description_display if trait_ob.dataset.type == "ProbeSet": trait_dict['symbol'] = trait_ob.symbol @@ -260,22 +278,24 @@ def get_sample_data(): trait_dict['pubmed_link'] = trait_ob.pubmed_link trait_dict['pubmed_text'] = trait_ob.pubmed_text - return json.dumps([trait_dict, {key: value.value for key, value in trait_ob.data.iteritems() }]) + return json.dumps([trait_dict, {key: value.value for key, value in trait_ob.data.iteritems()}]) else: return None - + + def jsonable(trait): """Return a dict suitable for using as json Actual turning into json doesn't happen here though""" - dataset = create_dataset(dataset_name = trait.dataset.name, dataset_type = trait.dataset.type, group_name = trait.dataset.group.name) - + dataset = create_dataset(dataset_name=trait.dataset.name, + dataset_type=trait.dataset.type, group_name=trait.dataset.group.name) + if dataset.type == "ProbeSet": return dict(name=trait.name, symbol=trait.symbol, dataset=dataset.name, - dataset_name = dataset.shortname, + dataset_name=dataset.shortname, description=trait.description_display, mean=trait.mean, location=trait.location_repr, @@ -287,7 +307,7 @@ def jsonable(trait): if trait.pubmed_id: return dict(name=trait.name, dataset=dataset.name, - dataset_name = dataset.shortname, + dataset_name=dataset.shortname, description=trait.description_display, abbreviation=trait.abbreviation, authors=trait.authors, @@ -300,7 +320,7 @@ def jsonable(trait): else: return dict(name=trait.name, dataset=dataset.name, - dataset_name = dataset.shortname, + dataset_name=dataset.shortname, description=trait.description_display, abbreviation=trait.abbreviation, authors=trait.authors, @@ -312,19 +332,20 @@ def jsonable(trait): elif dataset.type == "Geno": return dict(name=trait.name, dataset=dataset.name, - dataset_name = dataset.shortname, + dataset_name=dataset.shortname, location=trait.location_repr ) else: return dict() + def jsonable_table_row(trait, dataset_name, index): """Return a list suitable for json and intended to be displayed in a table Actual turning into json doesn't happen here though""" dataset = create_dataset(dataset_name) - + if dataset.type == "ProbeSet": if trait.mean == "": mean = "N/A" @@ -336,11 +357,13 @@ def jsonable_table_row(trait, dataset_name, index): additive = "%.3f" % round(float(trait.additive), 2) return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', index, - '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>', + '<a href="/show_trait?trait_id=' + + str(trait.name)+'&dataset='+dataset.name + + '">'+str(trait.name)+'</a>', trait.symbol, trait.description_display, trait.location_repr, - mean, + mean, trait.LRS_score_repr, trait.LRS_location_repr, additive] @@ -352,7 +375,9 @@ def jsonable_table_row(trait, dataset_name, index): if trait.pubmed_id: return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', index, - '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>', + '<a href="/show_trait?trait_id=' + + str(trait.name)+'&dataset='+dataset.name + + '">'+str(trait.name)+'</a>', trait.description_display, trait.authors, '<a href="' + trait.pubmed_link + '">' + trait.pubmed_text + '</href>', @@ -362,7 +387,9 @@ def jsonable_table_row(trait, dataset_name, index): else: return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', index, - '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>', + '<a href="/show_trait?trait_id=' + + str(trait.name)+'&dataset='+dataset.name + + '">'+str(trait.name)+'</a>', trait.description_display, trait.authors, trait.pubmed_text, @@ -372,7 +399,9 @@ def jsonable_table_row(trait, dataset_name, index): elif dataset.type == "Geno": return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', index, - '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>', + '<a href="/show_trait?trait_id=' + + str(trait.name)+'&dataset='+dataset.name + + '">'+str(trait.name)+'</a>', trait.location_repr] else: return dict() @@ -383,14 +412,16 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): resource_id = get_resource_id(dataset, trait.name) if dataset.type == 'Publish': - the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view".format(resource_id, g.user_session.user_id) + the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view".format( + resource_id, g.user_session.user_id) else: - the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view&trait={}".format(resource_id, g.user_session.user_id, trait.name) + the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view&trait={}".format( + resource_id, g.user_session.user_id, trait.name) try: response = requests.get(the_url).content trait_info = json.loads(response) - except: #ZS: I'm assuming the trait is viewable if the try fails for some reason; it should never reach this point unless the user has privileges, since that's dealt with in create_trait + except: # ZS: I'm assuming the trait is viewable if the try fails for some reason; it should never reach this point unless the user has privileges, since that's dealt with in create_trait if dataset.type == 'Publish': query = """ SELECT @@ -419,8 +450,8 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): logger.sql(query) trait_info = g.db.execute(query).fetchone() - #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name - #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. + # XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name + # XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. elif dataset.type == 'ProbeSet': display_fields_string = ', ProbeSet.'.join(dataset.display_fields) display_fields_string = 'ProbeSet.' + display_fields_string @@ -433,14 +464,15 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): ProbeSetFreeze.Name = '%s' AND ProbeSet.Name = '%s' """ % (escape(display_fields_string), - escape(dataset.name), - escape(str(trait.name))) + escape(dataset.name), + escape(str(trait.name))) logger.sql(query) trait_info = g.db.execute(query).fetchone() - #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name + # XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name # to avoid the problem of same marker name from different species. elif dataset.type == 'Geno': - display_fields_string = string.join(dataset.display_fields,',Geno.') + display_fields_string = string.join( + dataset.display_fields, ',Geno.') display_fields_string = 'Geno.' + display_fields_string query = """ SELECT %s @@ -451,21 +483,21 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): GenoFreeze.Name = '%s' AND Geno.Name = '%s' """ % (escape(display_fields_string), - escape(dataset.name), - escape(trait.name)) + escape(dataset.name), + escape(trait.name)) logger.sql(query) trait_info = g.db.execute(query).fetchone() - else: #Temp type + else: # Temp type query = """SELECT %s FROM %s WHERE Name = %s""" logger.sql(query) trait_info = g.db.execute(query, - (string.join(dataset.display_fields,','), - dataset.type, trait.name)).fetchone() + (string.join(dataset.display_fields, ','), + dataset.type, trait.name)).fetchone() if trait_info: trait.haveinfo = True for i, field in enumerate(dataset.display_fields): - holder = trait_info[i] + holder = trait_info[i] setattr(trait, field, holder) if dataset.type == 'Publish': @@ -478,9 +510,9 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): description = trait.post_publication_description - #If the dataset is confidential and the user has access to confidential - #phenotype traits, then display the pre-publication description instead - #of the post-publication description + # If the dataset is confidential and the user has access to confidential + # phenotype traits, then display the pre-publication description instead + # of the post-publication description if trait.confidential: trait.abbreviation = trait.pre_publication_abbreviation trait.description_display = trait.pre_publication_description @@ -491,9 +523,12 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): else: trait.description_display = "" - trait.abbreviation = unicode(str(trait.abbreviation).strip(codecs.BOM_UTF8), 'utf-8', errors="replace") - trait.description_display = unicode(str(trait.description_display).strip(codecs.BOM_UTF8), 'utf-8', errors="replace") - trait.authors = unicode(str(trait.authors).strip(codecs.BOM_UTF8), 'utf-8', errors="replace") + trait.abbreviation = unicode(str(trait.abbreviation).strip( + codecs.BOM_UTF8), 'utf-8', errors="replace") + trait.description_display = unicode(str(trait.description_display).strip( + codecs.BOM_UTF8), 'utf-8', errors="replace") + trait.authors = unicode(str(trait.authors).strip( + codecs.BOM_UTF8), 'utf-8', errors="replace") if not trait.year.isdigit(): trait.pubmed_text = "N/A" @@ -504,8 +539,10 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % trait.pubmed_id if dataset.type == 'ProbeSet' and dataset.group: - description_string = unicode(str(trait.description).strip(codecs.BOM_UTF8), 'utf-8') - target_string = unicode(str(trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8') + description_string = unicode( + str(trait.description).strip(codecs.BOM_UTF8), 'utf-8') + target_string = unicode( + str(trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8') if str(description_string or "") != "" and description_string != 'None': description_display = description_string @@ -514,7 +551,7 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): if (str(description_display or "") != "" and description_display != 'N/A' and - str(target_string or "") != "" and target_string != 'None'): + str(target_string or "") != "" and target_string != 'None'): description_display = description_display + '; ' + target_string.strip() # Save it for the jinja2 template @@ -522,15 +559,17 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): trait.location_repr = 'N/A' if trait.chr and trait.mb: - trait.location_repr = 'Chr%s: %.6f' % (trait.chr, float(trait.mb)) + trait.location_repr = 'Chr%s: %.6f' % ( + trait.chr, float(trait.mb)) elif dataset.type == "Geno": trait.location_repr = 'N/A' if trait.chr and trait.mb: - trait.location_repr = 'Chr%s: %.6f' % (trait.chr, float(trait.mb)) + trait.location_repr = 'Chr%s: %.6f' % ( + trait.chr, float(trait.mb)) if get_qtl_info: - #LRS and its location + # LRS and its location trait.LRS_score_repr = "N/A" trait.LRS_location_repr = "N/A" trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.additive = "" @@ -601,10 +640,11 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): else: trait.locus = trait.lrs = trait.additive = "" if (dataset.type == 'Publish' or dataset.type == "ProbeSet") and str(trait.locus_chr or "") != "" and str(trait.locus_mb or "") != "": - trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (trait.locus_chr, float(trait.locus_mb)) + trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % ( + trait.locus_chr, float(trait.locus_mb)) if str(trait.lrs or "") != "": trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs else: - raise KeyError, `trait.name`+' information is not found in the database.' - + raise KeyError, `trait.name`+ ' information is not found in the database.' + return trait diff --git a/wqflask/tests/wqflask/test_collect.py b/wqflask/tests/wqflask/test_collect.py new file mode 100644 index 00000000..6b8d7931 --- /dev/null +++ b/wqflask/tests/wqflask/test_collect.py @@ -0,0 +1,57 @@ +"""Test cases for some methods in collect.py""" + +import unittest +import mock + +from flask import Flask +from wqflask.collect import process_traits + +app = Flask(__name__) + + +class MockSession: + """Helper class for mocking wqflask.collect.g.user_session.logged_in""" + def __init__(self, is_logged_in=False): + self.is_logged_in = is_logged_in + + @property + def logged_in(self): + return self.is_logged_in + + +class MockFlaskG: + """Helper class for mocking wqflask.collect.g.user_session""" + def __init__(self, is_logged_in=False): + self.is_logged_in = is_logged_in + + @property + def user_session(self): + if self.is_logged_in: + return MockSession(is_logged_in=True) + return MockSession() + + +class TestCollect(unittest.TestCase): + + def setUp(self): + self.app_context = app.app_context() + self.app_context.push() + + def tearDown(self): + self.app_context.pop() + + @mock.patch("wqflask.collect.g", MockFlaskG()) + def test_process_traits_when_user_is_logged_out(self): + """ + Test that the correct traits are returned when the user is logged + out + """ + self.assertEqual(process_traits( + b'1452452_at:HC_M2_0606_P:163d04f7db7c9e110de6,' + b'1452447_at:HC_M2_0606_P:eeece8fceb67072debea,' + b'1451401_a_at:HC_M2_0606_P:a043d23b3b3906d8318e,' + b'1429252_at:HC_M2_0606_P:6fa378b349bc9180e8f5'), + set(['1429252_at:HC_M2_0606_P', + '1451401_a_at:HC_M2_0606_P', + '1452447_at:HC_M2_0606_P', + '1452452_at:HC_M2_0606_P'])) diff --git a/wqflask/wqflask/collect.py b/wqflask/wqflask/collect.py index 42a09fed..4c558bfe 100644 --- a/wqflask/wqflask/collect.py +++ b/wqflask/wqflask/collect.py @@ -1,41 +1,30 @@ from __future__ import print_function, division, absolute_import - -import os -import hashlib import datetime -import time - -import uuid -import hashlib -import base64 - -import urlparse - import simplejson as json -from flask import (Flask, g, render_template, url_for, request, make_response, - redirect, flash, jsonify) +from flask import g +from flask import render_template +from flask import url_for +from flask import request +from flask import redirect +from flask import flash from wqflask import app - -from pprint import pformat as pf - -from wqflask.database import db_session - -from wqflask import model - -from utility import Bunch, Struct, hmac +from utility import hmac from utility.formatting import numify from utility.redis_tools import get_redis_conn -Redis = get_redis_conn() -from base.trait import create_trait, retrieve_trait_info, jsonable +from base.trait import create_trait +from base.trait import retrieve_trait_info +from base.trait import jsonable from base.data_set import create_dataset -import logging from utility.logger import getLogger + logger = getLogger(__name__) +Redis = get_redis_conn() + def process_traits(unprocessed_traits): if isinstance(unprocessed_traits, basestring): diff --git a/wqflask/wqflask/wgcna/wgcna_analysis.py b/wqflask/wqflask/wgcna/wgcna_analysis.py index 880a1cb2..70077703 100644 --- a/wqflask/wqflask/wgcna/wgcna_analysis.py +++ b/wqflask/wqflask/wgcna/wgcna_analysis.py @@ -1,5 +1,8 @@ -# WGCNA analysis for GN2 -# Author / Maintainer: Danny Arends <Danny.Arends@gmail.com> +""" +WGCNA analysis for GN2 + +Author / Maintainer: Danny Arends <Danny.Arends@gmail.com> +""" import sys from numpy import * import scipy as sp # SciPy @@ -17,106 +20,138 @@ from utility import helper_functions from rpy2.robjects.packages import importr utils = importr("utils") -## Get pointers to some common R functions -r_library = ro.r["library"] # Map the library function -r_options = ro.r["options"] # Map the options function -r_read_csv = ro.r["read.csv"] # Map the read.csv function -r_dim = ro.r["dim"] # Map the dim function -r_c = ro.r["c"] # Map the c function -r_cat = ro.r["cat"] # Map the cat function -r_paste = ro.r["paste"] # Map the paste function -r_unlist = ro.r["unlist"] # Map the unlist function -r_unique = ro.r["unique"] # Map the unique function -r_length = ro.r["length"] # Map the length function -r_unlist = ro.r["unlist"] # Map the unlist function -r_list = ro.r.list # Map the list function -r_matrix = ro.r.matrix # Map the matrix function -r_seq = ro.r["seq"] # Map the seq function -r_table = ro.r["table"] # Map the table function -r_names = ro.r["names"] # Map the names function -r_sink = ro.r["sink"] # Map the sink function -r_is_NA = ro.r["is.na"] # Map the is.na function -r_file = ro.r["file"] # Map the file function -r_png = ro.r["png"] # Map the png function for plotting -r_dev_off = ro.r["dev.off"] # Map the dev.off function +# Get pointers to some common R functions +r_library = ro.r["library"] # Map the library function +r_options = ro.r["options"] # Map the options function +r_read_csv = ro.r["read.csv"] # Map the read.csv function +r_dim = ro.r["dim"] # Map the dim function +r_c = ro.r["c"] # Map the c function +r_cat = ro.r["cat"] # Map the cat function +r_paste = ro.r["paste"] # Map the paste function +r_unlist = ro.r["unlist"] # Map the unlist function +r_unique = ro.r["unique"] # Map the unique function +r_length = ro.r["length"] # Map the length function +r_unlist = ro.r["unlist"] # Map the unlist function +r_list = ro.r.list # Map the list function +r_matrix = ro.r.matrix # Map the matrix function +r_seq = ro.r["seq"] # Map the seq function +r_table = ro.r["table"] # Map the table function +r_names = ro.r["names"] # Map the names function +r_sink = ro.r["sink"] # Map the sink function +r_is_NA = ro.r["is.na"] # Map the is.na function +r_file = ro.r["file"] # Map the file function +r_png = ro.r["png"] # Map the png function for plotting +r_dev_off = ro.r["dev.off"] # Map the dev.off function + class WGCNA(object): def __init__(self): + # To log output from stdout/stderr to a file add `r_sink(log)` print("Initialization of WGCNA") - #log = r_file("/tmp/genenetwork_wcgna.log", open = "wt") - #r_sink(log) # Uncomment the r_sink() commands to log output from stdout/stderr to a file - #r_sink(log, type = "message") - r_library("WGCNA") # Load WGCNA - Should only be done once, since it is quite expensive - r_options(stringsAsFactors = False) + + # Load WGCNA - Should only be done once, since it is quite expensive + r_library("WGCNA") + r_options(stringsAsFactors=False) print("Initialization of WGCNA done, package loaded in R session") - self.r_enableWGCNAThreads = ro.r["enableWGCNAThreads"] # Map the enableWGCNAThreads function - self.r_pickSoftThreshold = ro.r["pickSoftThreshold"] # Map the pickSoftThreshold function - self.r_blockwiseModules = ro.r["blockwiseModules"] # Map the blockwiseModules function - self.r_labels2colors = ro.r["labels2colors"] # Map the labels2colors function - self.r_plotDendroAndColors = ro.r["plotDendroAndColors"] # Map the plotDendroAndColors function + # Map the enableWGCNAThreads function + self.r_enableWGCNAThreads = ro.r["enableWGCNAThreads"] + # Map the pickSoftThreshold function + self.r_pickSoftThreshold = ro.r["pickSoftThreshold"] + # Map the blockwiseModules function + self.r_blockwiseModules = ro.r["blockwiseModules"] + # Map the labels2colors function + self.r_labels2colors = ro.r["labels2colors"] + # Map the plotDendroAndColors function + self.r_plotDendroAndColors = ro.r["plotDendroAndColors"] print("Obtained pointers to WGCNA functions") def run_analysis(self, requestform): print("Starting WGCNA analysis on dataset") - self.r_enableWGCNAThreads() # Enable multi threading - self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] - print("Retrieved phenotype data from database", requestform['trait_list']) + # Enable multi threading + self.r_enableWGCNAThreads() + self.trait_db_list = [trait.strip() + for trait in requestform['trait_list'].split(',')] + print("Retrieved phenotype data from database", + requestform['trait_list']) helper_functions.get_trait_db_obs(self, self.trait_db_list) - self.input = {} # self.input contains the phenotype values we need to send to R - strains = [] # All the strains we have data for (contains duplicates) - traits = [] # All the traits we have data for (should not contain duplicates) + # self.input contains the phenotype values we need to send to R + self.input = {} + # All the strains we have data for (contains duplicates) + strains = [] + # All the traits we have data for (should not contain duplicates) + traits = [] for trait in self.trait_list: traits.append(trait[0].name) self.input[trait[0].name] = {} for strain in trait[0].data: strains.append(strain) - self.input[trait[0].name][strain] = trait[0].data[strain].value + self.input[trait[0].name][strain] = trait[0].data[strain].value # Transfer the load data from python to R - uStrainsR = r_unique(ro.Vector(strains)) # Unique strains in R vector + # Unique strains in R vector + uStrainsR = r_unique(ro.Vector(strains)) uTraitsR = r_unique(ro.Vector(traits)) # Unique traits in R vector r_cat("The number of unique strains:", r_length(uStrainsR), "\n") r_cat("The number of unique traits:", r_length(uTraitsR), "\n") - # rM is the datamatrix holding all the data in R /rows = strains columns = traits - rM = ro.r.matrix(ri.NA_Real, nrow=r_length(uStrainsR), ncol=r_length(uTraitsR), dimnames = r_list(uStrainsR, uTraitsR)) + # rM is the datamatrix holding all the data in + # R /rows = strains columns = traits + rM = ro.r.matrix(ri.NA_Real, nrow=r_length(uStrainsR), ncol=r_length( + uTraitsR), dimnames=r_list(uStrainsR, uTraitsR)) for t in uTraitsR: - trait = t[0] # R uses vectors every single element is a vector + # R uses vectors every single element is a vector + trait = t[0] for s in uStrainsR: - strain = s[0] # R uses vectors every single element is a vector - #DEBUG: print(trait, strain, " in python: ", self.input[trait].get(strain), "in R:", rM.rx(strain,trait)[0]) - rM.rx[strain, trait] = self.input[trait].get(strain) # Update the matrix location + # R uses vectors every single element is a vector + strain = s[0] + rM.rx[strain, trait] = self.input[trait].get( + strain) # Update the matrix location sys.stdout.flush() self.results = {} - self.results['nphe'] = r_length(uTraitsR)[0] # Number of phenotypes/traits - self.results['nstr'] = r_length(uStrainsR)[0] # Number of strains + # Number of phenotypes/traits + self.results['nphe'] = r_length(uTraitsR)[0] + self.results['nstr'] = r_length( + uStrainsR)[0] # Number of strains self.results['phenotypes'] = uTraitsR # Traits used - self.results['strains'] = uStrainsR # Strains used in the analysis - self.results['requestform'] = requestform # Store the user specified parameters for the output page + # Strains used in the analysis + self.results['strains'] = uStrainsR + # Store the user specified parameters for the output page + self.results['requestform'] = requestform - # Calculate soft threshold if the user specified the SoftThreshold variable + # Calculate soft threshold if the user specified the + # SoftThreshold variable if requestform.get('SoftThresholds') is not None: - powers = [int(threshold.strip()) for threshold in requestform['SoftThresholds'].rstrip().split(",")] - rpow = r_unlist(r_c(powers)) - print "SoftThresholds: {} == {}".format(powers, rpow) - self.sft = self.r_pickSoftThreshold(rM, powerVector = rpow, verbose = 5) - - print "PowerEstimate: {}".format(self.sft[0]) - self.results['PowerEstimate'] = self.sft[0] - if self.sft[0][0] is ri.NA_Integer: - print "No power is suitable for the analysis, just use 1" - self.results['Power'] = 1 # No power could be estimated - else: - self.results['Power'] = self.sft[0][0] # Use the estimated power + powers = [int(threshold.strip()) + for threshold in requestform['SoftThresholds'].rstrip().split(",")] + rpow = r_unlist(r_c(powers)) + print("SoftThresholds: {} == {}".format(powers, rpow)) + self.sft = self.r_pickSoftThreshold( + rM, powerVector=rpow, verbose=5) + + print("PowerEstimate: {}".format(self.sft[0])) + self.results['PowerEstimate'] = self.sft[0] + if self.sft[0][0] is ri.NA_Integer: + print "No power is suitable for the analysis, just use 1" + # No power could be estimated + self.results['Power'] = 1 + else: + # Use the estimated power + self.results['Power'] = self.sft[0][0] else: - # The user clicked a button, so no soft threshold selection - self.results['Power'] = requestform.get('Power') # Use the power value the user gives + # The user clicked a button, so no soft threshold selection + # Use the power value the user gives + self.results['Power'] = requestform.get('Power') # Create the block wise modules using WGCNA - network = self.r_blockwiseModules(rM, power = self.results['Power'], TOMType = requestform['TOMtype'], minModuleSize = requestform['MinModuleSize'], verbose = 3) + network = self.r_blockwiseModules( + rM, + power=self.results['Power'], + TOMType=requestform['TOMtype'], + minModuleSize=requestform['MinModuleSize'], + verbose=3) # Save the network for the GUI self.results['network'] = network @@ -130,7 +165,9 @@ class WGCNA(object): self.results['imgloc'] = GENERATED_IMAGE_DIR + self.results['imgurl'] r_png(self.results['imgloc'], width=1000, height=600, type='cairo-png') mergedColors = self.r_labels2colors(network[1]) - self.r_plotDendroAndColors(network[5][0], mergedColors, "Module colors", dendroLabels = False, hang = 0.03, addGuide = True, guideHang = 0.05) + self.r_plotDendroAndColors(network[5][0], mergedColors, + "Module colors", dendroLabels=False, + hang=0.03, addGuide=True, guideHang=0.05) r_dev_off() sys.stdout.flush() @@ -146,11 +183,9 @@ class WGCNA(object): print("Processing WGCNA output") template_vars = {} template_vars["input"] = self.input - template_vars["powers"] = self.sft[1:] # Results from the soft threshold analysis + # Results from the soft threshold analysis + template_vars["powers"] = self.sft[1:] template_vars["results"] = self.results self.render_image(results) sys.stdout.flush() - #r_sink(type = "message") # This restores R output to the stdout/stderr - #r_sink() # We should end the Rpy session more or less return(dict(template_vars)) - |