diff options
123 files changed, 4581 insertions, 3786 deletions
diff --git a/etc/default_settings.py b/etc/default_settings.py index 27522187..a194b10e 100644 --- a/etc/default_settings.py +++ b/etc/default_settings.py @@ -73,23 +73,24 @@ SMTP_PASSWORD = "UNKNOWN" # ---- Behavioural settings (defaults) note that logger and log levels can # be overridden at the module level and with enviroment settings -WEBSERVER_MODE = 'DEV' # Python webserver mode (DEBUG|DEV|PROD) +WEBSERVER_MODE = 'DEV' # Python webserver mode (DEBUG|DEV|PROD) WEBSERVER_BRANDING = None # Set the branding (nyi) WEBSERVER_DEPLOY = None # Deployment specifics (nyi) -WEBSERVER_URL = "http://localhost:"+str(SERVER_PORT)+"/" # external URL +WEBSERVER_URL = "http://localhost:" + str(SERVER_PORT) + "/" # external URL -LOG_LEVEL = 'WARNING' # Logger mode (DEBUG|INFO|WARNING|ERROR|CRITICAL) +LOG_LEVEL = 'WARNING' # Logger mode (DEBUG|INFO|WARNING|ERROR|CRITICAL) LOG_LEVEL_DEBUG = '0' # logger.debugf log level (0-5, 5 = show all) -LOG_SQL = 'False' # Log SQL/backend and GN_SERVER calls +LOG_SQL = 'False' # Log SQL/backend and GN_SERVER calls LOG_SQL_ALCHEMY = 'False' -LOG_BENCH = True # Log bench marks +LOG_BENCH = True # Log bench marks -USE_REDIS = True # REDIS caching (note that redis will be phased out) -USE_GN_SERVER = 'False' # Use GN_SERVER SQL calls -HOME = os.environ['HOME'] +USE_REDIS = True # REDIS caching (note that redis will be phased out) +USE_GN_SERVER = 'False' # Use GN_SERVER SQL calls +HOME = os.environ['HOME'] # ---- Default locations -GENENETWORK_FILES = HOME+"/genotype_files" # base dir for all static data files +# base dir for all static data files +GENENETWORK_FILES = HOME + "/genotype_files" # ---- Path overrides for Genenetwork - the defaults are normally # picked up from Guix or in the HOME directory @@ -98,7 +99,7 @@ GENENETWORK_FILES = HOME+"/genotype_files" # base dir for all static data fil # PRIVATE_FILES = HOME+"/gn2_private_data" # private static data files (unused) # ---- Local path to JS libraries - for development modules (only) -JS_GN_PATH = os.environ['HOME']+"/genenetwork/javascript" +JS_GN_PATH = os.environ['HOME'] + "/genenetwork/javascript" # ---- GN2 Executables (overwrite for testing only) # PLINK_COMMAND = str.strip(os.popen("which plink2").read()) diff --git a/webtests/browser_run.py b/webtests/browser_run.py deleted file mode 100644 index 6cf46de5..00000000 --- a/webtests/browser_run.py +++ /dev/null @@ -1,72 +0,0 @@ -__all__ = ('sleep', 'testmod', 'test') - -from doctest import testmod - -from time import sleep - -import selenium -from selenium import webdriver -from selenium.common.exceptions import NoSuchElementException, ElementNotVisibleException -from selenium.webdriver.common.keys import Keys - -class Test: - def __init__(self): - #self.browser = webdriver.Chrome('/home/gn2/gn2/webtests/chromedriver') - self.browser = webdriver.Firefox() - - def get(self, url): - self.browser.get(url) - sleep(5) - self.title() - - def click(self, xpath_selector): - el = self.browser.find_element_by_xpath(xpath_selector) - if el.text: - text = el.text.strip() - elif el.get_attribute("value"): - text = el.get_attribute("value").strip() - else: - text = "Notext" - el.click() - print("clicked:", text) - sleep(2) - - def click_option(self, xpath_selector, option_text): - el = self.browser.find_element_by_xpath(xpath_selector) - for option in el.find_elements_by_tag_name('option'): - if option.text == option_text: - option.click() # select() in earlier versions of webdriver - break - sleep(2) - - def enter_text(self, xpath_selector, text): - el = self.browser.find_element_by_xpath(xpath_selector) - sleep(10) - el.send_keys(text) - sleep(5) - # Just in case things get mangled by JavaScript, etc. we print the text for testing - self.get_text(xpath_selector) - - def get_text(self, xpath_selector): - el = self.browser.find_element_by_xpath(xpath_selector) - text = el.text.strip() or el.get_attribute("value").strip() - print("text:", text) - - def get_element_style(self, xpath_selector): - el = self.browser.find_element_by_xpath(xpath_selector) - style = el.get_attribute("style").strip() - print("style:", style) - - def switch_window(self): - self.browser.switch_to_window(self.browser.window_handles[-1]) - sleep(2) - self.title() - sleep(2) - - - def title(self): - print("title:", self.browser.title) - - - -test = Test() diff --git a/webtests/chromedriver b/webtests/chromedriver Binary files differdeleted file mode 100644 index 754eb668..00000000 --- a/webtests/chromedriver +++ /dev/null diff --git a/webtests/correlation_matrix_test.py b/webtests/correlation_matrix_test.py deleted file mode 100644 index 97114890..00000000 --- a/webtests/correlation_matrix_test.py +++ /dev/null @@ -1,70 +0,0 @@ -""" - -Test Correlation matrix - ->>> test.get("http://genenetwork.org/") -title: GeneNetwork - -Choose the type ->>> test.click_option('''//*[@id="tissue"]''', 'Hippocampus mRNA') - -Enter the Get Any ->>> test.enter_text('''//*[@id="tfor"]''', 'grin2b') -text: grin2b - -Search ->>> test.click('//*[@id="btsearch"]') -clicked: Search - -Add 1430675_at ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[2]/td/input''') -clicked: HC_M2_0606_P::1430675_at - -Add 1442370_at ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[3]/td/input''') -clicked: HC_M2_0606_P::1442370_at - -Add 1457003_at ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[4]/td/input''') -clicked: HC_M2_0606_P::1457003_at - -Add 1422223_at ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[5]/td/input''') -clicked: HC_M2_0606_P::1422223_at - ->>> sleep(5) - -Add to collection page ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[1]/td/table/tbody/tr[1]/td[4]/a''') -clicked: Notext - ->>> sleep(5) - -A new window is created, so we switch to it ->>> test.switch_window() -title: BXD Trait Collection - -Select all records ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr/td/a/img''') -clicked: Notext - -Click Matrix ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/table/tbody/tr/td/table/tbody/tr[2]/td/table/tbody/tr/td[2]/a/img''') -clicked: Notext - -Another new window ->>> test.switch_window() -title: Correlation Matrix - -Sleep a bunch because this can take a while ->>> sleep(10) - -Ensure that the correlation between Trait3 (HC_M2_0606_P::1457003_at) and Trait4 (HC_M2_0606_P::1422223_at) is 0.608 ->>> test.get_text('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/blockquote/table/tbody/tr[5]/td[5]/a/font''') -text: 0.608\n71 - -""" - -from browser_run import * - -testmod() diff --git a/webtests/correlation_test.py b/webtests/correlation_test.py deleted file mode 100644 index 311bb847..00000000 --- a/webtests/correlation_test.py +++ /dev/null @@ -1,49 +0,0 @@ -""" -Test calculate correlations - ->>> test.get("http://genenetwork.org") -title: GeneNetwork - -Choose the type ->>> test.click_option('''//*[@id="tissue"]''', 'Hippocampus mRNA') - -Enter the Get Any ->>> test.enter_text('''//*[@id="tfor"]''', 'ssh') -text: ssh - -Search ->>> test.click('//*[@id="btsearch"]') -clicked: Search - -Choose the first result ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[2]/td[2]/a''') -clicked: 1455854_a_at - -A new window is created, so we switch to it ->>> test.switch_window() -title: Hippocampus M430v2 BXD 06/06 PDNN : 1455854_a_at: Display Trait - -Click on Calculate Correlations ->>> test.click('''//*[@id="title3"]''') -clicked: Calculate Correlations - -Click on Compute ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p[6]/table/tbody/tr/td/div/div/span/table/tbody/tr/td/input[3]''') -clicked: Compute - -Another new window ->>> test.switch_window() -title: Correlation - -Sleep a bunch because this can take a while ->>> sleep(25) - -Ensure the Sample rho is the exepcted 1.000 because it should be the same record ->>> test.get_text('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/table/tbody/tr[2]/td/div/table/tbody/tr[2]/td[9]/a''') -text: 1.000 - -""" - -from browser_run import * - -testmod() diff --git a/webtests/libpeerconnection.log b/webtests/libpeerconnection.log deleted file mode 100644 index e69de29b..00000000 --- a/webtests/libpeerconnection.log +++ /dev/null diff --git a/webtests/marker_regression_test.py b/webtests/marker_regression_test.py deleted file mode 100644 index 9b4a4acb..00000000 --- a/webtests/marker_regression_test.py +++ /dev/null @@ -1,53 +0,0 @@ -""" -Test calculate correlations - ->>> test.get("http://genenetwork.org") -title: GeneNetwork - -Choose the type ->>> test.click_option('''//*[@id="tissue"]''', 'Liver mRNA') - -Enter the Get Any ->>> test.enter_text('''//*[@id="tfor"]''', 'grin2b') -text: grin2b - -Search ->>> test.click('//*[@id="btsearch"]') -clicked: Search - -Choose the second result ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[3]/td[2]/a''') -clicked: 1431700_at_A - -A new window is created, so we switch to it ->>> test.switch_window() -title: GSE16780 UCLA Hybrid MDP Liver Affy HT M430A (Sep11) RMA : 1431700_at_A: Display Trait - -Click on Mapping Tools ->>> test.click('''//*[@id="title4"]''') -clicked: Mapping Tools - -Click on Marker Regression tab ->>> test.click('''//*[@id="mapping_tabs"]/ul/li[2]/a''') -clicked: Marker Regression - -Click on Compute ->>> test.click('''//*[@id="mappingtabs-2"]/span/table/tbody/tr[1]/td/input''') -clicked: Compute - -Another new window ->>> test.switch_window() -title: Genome Association Result - -Sleep a bunch because this can take a while ->>> sleep(60) - -Ensure that the LRS of the top record is the exepcted value ->>> test.get_text('''//*[@id="1"]/td[2]''') -text: 11.511 - -""" - -from browser_run import * - -testmod() diff --git a/webtests/show_trait_js_test.py b/webtests/show_trait_js_test.py deleted file mode 100644 index 34ffd3b7..00000000 --- a/webtests/show_trait_js_test.py +++ /dev/null @@ -1,40 +0,0 @@ -""" -Test if JS is working on the show trait page - ->>> test.get("alexandria.uthsc.edu:89") -title: GeneNetwork - -Choose the species ->>> test.click_option('''//*[@id="species"]''', 'Human') - -Choose the group ->>> test.click_option('''//*[@id="cross"]''', 'Human Brain Transcriptome (Yale/Kavli)') - -Choose the type ->>> test.click_option('''//*[@id="tissue"]''', 'Orbital Prefrontal Cortex mRNA') - -Enter the Get Any ->>> test.enter_text('''//*[@id="tfor"]''', 'shh') -text: shh - -Search ->>> test.click('//*[@id="btsearch"]') -clicked: Search - -Choose the first result ->>> test.click('''//*[@id="KIN_YSM_OFC_0711::3081205"]/td[2]/a''') -clicked: 3081205 - -A new window is created, so we switch to it ->>> test.switch_window() -title: KIN/YSM Human OFC Affy Hu-Exon 1.0 ST (Jul11) Quantile : 3081205: Display Trait - -Check that the Calculate Correlations tab is closed (if javascript isn't working, it will be open) ->>> test.get_element_style('''//*[@id="sectionbody3"]''') -style: display: none; - -""" - -from browser_run import * - -testmod() diff --git a/webtests/test_runner.py b/webtests/test_runner.py deleted file mode 100644 index b5b590a6..00000000 --- a/webtests/test_runner.py +++ /dev/null @@ -1,26 +0,0 @@ -import unittest -import doctest -import glob - -tests = ("correlation_test", - "correlation_matrix_test", - "marker_regression_test", - "show_trait_js_test") - - -def main(): - tests = glob.glob("*_test.py") - - suite = unittest.TestSuite() - - for testname in tests: - test = testname.rsplit(".", 1)[0] - print("Test is:", test) - mod = __import__(test) - suite.addTest(doctest.DocTestSuite(mod)) - - runner = unittest.TextTestRunner() - runner.run(suite) - -if __name__ == '__main__': - main() diff --git a/wqflask/base/GeneralObject.py b/wqflask/base/GeneralObject.py index 249195e2..ce8e60b8 100644 --- a/wqflask/base/GeneralObject.py +++ b/wqflask/base/GeneralObject.py @@ -62,5 +62,5 @@ class GeneralObject: return s def __eq__(self, other): - return (len(list(self.__dict__.keys())) == - len(list(other.__dict__.keys()))) + return (len(list(self.__dict__.keys())) + == len(list(other.__dict__.keys()))) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 3183363b..e20f2f98 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -150,8 +150,8 @@ class DatasetType: "FROM PublishFreeze, InbredSet " "WHERE InbredSet.Name = '%s' AND " "PublishFreeze.InbredSetId = InbredSet.Id"), - 'geno': ("SELECT GenoFreeze.Id FROM GenoFreeze WHERE " - "GenoFreeze.Name = \"%s\" ") + 'geno': ("SELECT GenoFreeze.Id FROM GenoFreeze WHERE " + "GenoFreeze.Name = \"%s\" ") } dataset_name_mapping = { @@ -168,7 +168,8 @@ class DatasetType: results = g.db.execute(sql_query_mapping[t] % group_name).fetchone() if results: self.datasets[name] = dataset_name_mapping[t] - self.redis_instance.set("dataset_structure", json.dumps(self.datasets)) + self.redis_instance.set( + "dataset_structure", json.dumps(self.datasets)) return True return None @@ -215,7 +216,7 @@ def create_datasets_list(): if USE_REDIS: r.set(key, pickle.dumps(datasets, pickle.HIGHEST_PROTOCOL)) - r.expire(key, 60*60) + r.expire(key, 60 * 60) return datasets @@ -239,7 +240,8 @@ class Markers: for line in bimbam_fh: marker = {} marker['name'] = line.split(delimiter)[0].rstrip() - marker['Mb'] = float(line.split(delimiter)[1].rstrip())/1000000 + marker['Mb'] = float(line.split(delimiter)[ + 1].rstrip()) / 1000000 marker['chr'] = line.split(delimiter)[2].rstrip() markers.append(marker) @@ -311,7 +313,6 @@ class HumanMarkers(Markers): marker['Mb'] = float(splat[3]) / 1000000 self.markers.append(marker) - def add_pvalues(self, p_values): super(HumanMarkers, self).add_pvalues(p_values) @@ -369,8 +370,8 @@ class DatasetGroup: def get_markers(self): def check_plink_gemma(): if flat_file_exists("mapping"): - MAPPING_PATH = flat_files("mapping")+"/" - if os.path.isfile(MAPPING_PATH+self.name+".bed"): + MAPPING_PATH = flat_files("mapping") + "/" + if os.path.isfile(MAPPING_PATH + self.name + ".bed"): return True return False @@ -416,7 +417,7 @@ class DatasetGroup: else: logger.debug("Cache not hit") - genotype_fn = locate_ignore_error(self.name+".geno", 'genotype') + genotype_fn = locate_ignore_error(self.name + ".geno", 'genotype') if genotype_fn: self.samplelist = get_group_samplelists.get_samplelist( "geno", genotype_fn) @@ -425,7 +426,7 @@ class DatasetGroup: if USE_REDIS: r.set(key, json.dumps(self.samplelist)) - r.expire(key, 60*5) + r.expire(key, 60 * 5) def all_samples_ordered(self): result = [] @@ -531,7 +532,7 @@ def datasets(group_name, this_group=None): if USE_REDIS: r.set(key, pickle.dumps(dataset_menu, pickle.HIGHEST_PROTOCOL)) - r.expire(key, 60*5) + r.expire(key, 60 * 5) if this_group != None: this_group._datasets = dataset_menu @@ -622,7 +623,7 @@ class DataSet: WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND ProbeFreeze.TissueId = Tissue.Id AND (ProbeSetFreeze.Name = '%s' OR ProbeSetFreeze.FullName = '%s' OR ProbeSetFreeze.ShortName = '%s') - """ % (query_args), "/dataset/"+self.name+".json", + """ % (query_args), "/dataset/" + self.name + ".json", lambda r: (r["id"], r["name"], r["full_name"], r["short_name"], r["data_scale"], r["tissue"]) ) @@ -1031,8 +1032,8 @@ class MrnaAssayDataSet(DataSet): else: description_display = this_trait.symbol - if (len(description_display) > 1 and description_display != 'N/A' and - len(target_string) > 1 and target_string != 'None'): + if (len(description_display) > 1 and description_display != 'N/A' + and len(target_string) > 1 and target_string != 'None'): description_display = description_display + '; ' + target_string.strip() # Save it for the jinja2 template diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index 1f8224af..882ae911 100644 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -9,7 +9,8 @@ from utility.db_tools import escape from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) + class MrnaAssayTissueData: @@ -20,7 +21,7 @@ class MrnaAssayTissueData: self.data = collections.defaultdict(Bunch) - query = '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description + query = '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description from ( select Symbol, max(Mean) as maxmean from TissueProbeSetXRef @@ -31,14 +32,14 @@ class MrnaAssayTissueData: # Due to the limit size of TissueProbeSetFreezeId table in DB, # performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list) if len(gene_symbols) == 0: - query += '''Symbol!='' and Symbol Is Not Null group by Symbol) + query += '''Symbol!='' and Symbol Is Not Null group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean; ''' else: in_clause = db_tools.create_in_clause(gene_symbols) - #ZS: This was in the query, not sure why: http://docs.python.org/2/library/string.html?highlight=lower#string.lower + # ZS: This was in the query, not sure why: http://docs.python.org/2/library/string.html?highlight=lower#string.lower query += ''' Symbol in {} group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean; @@ -64,16 +65,16 @@ class MrnaAssayTissueData: self.data[symbol].probe_target_description = result.Probe_Target_Description ########################################################################### - #Input: cursor, symbolList (list), dataIdDict(Dict) - #output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair, + # Input: cursor, symbolList (list), dataIdDict(Dict) + # output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair, # key is symbol, value is one list of expression values of one probeSet; - #function: get one dictionary whose key is gene symbol and value is tissue expression data (list type). - #Attention! All keys are lower case! + # function: get one dictionary whose key is gene symbol and value is tissue expression data (list type). + # Attention! All keys are lower case! ########################################################################### def get_symbol_values_pairs(self): id_list = [self.data[symbol].data_id for symbol in self.data] - + symbol_values_dict = {} if len(id_list) > 0: @@ -87,6 +88,7 @@ class MrnaAssayTissueData: if result.Symbol.lower() not in symbol_values_dict: symbol_values_dict[result.Symbol.lower()] = [result.value] else: - symbol_values_dict[result.Symbol.lower()].append(result.value) + symbol_values_dict[result.Symbol.lower()].append( + result.value) return symbol_values_dict diff --git a/wqflask/base/species.py b/wqflask/base/species.py index eae3325a..e3c29916 100644 --- a/wqflask/base/species.py +++ b/wqflask/base/species.py @@ -4,7 +4,8 @@ from flask import Flask, g from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) + class TheSpecies: def __init__(self, dataset=None, species_name=None): @@ -15,6 +16,7 @@ class TheSpecies: self.dataset = dataset self.chromosomes = Chromosomes(dataset=self.dataset) + class IndChromosome: def __init__(self, name, length): self.name = name @@ -25,6 +27,7 @@ class IndChromosome: """Chromosome length in megabases""" return self.length / 1000000 + class Chromosomes: def __init__(self, dataset=None, species=None): self.chromosomes = collections.OrderedDict() @@ -52,4 +55,5 @@ class Chromosomes: results = g.db.execute(query).fetchall() for item in results: - self.chromosomes[item.OrderId] = IndChromosome(item.Name, item.Length) + self.chromosomes[item.OrderId] = IndChromosome( + item.Name, item.Length) diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 5574128d..d09cfd40 100644 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -479,9 +479,9 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): else: description_display = trait.symbol - if (str(description_display or "") != "" and - description_display != 'N/A' and - str(target_string or "") != "" and target_string != 'None'): + if (str(description_display or "") != "" + and description_display != 'N/A' + and str(target_string or "") != "" and target_string != 'None'): description_display = description_display + '; ' + target_string.strip() # Save it for the jinja2 template @@ -575,6 +575,6 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): if str(trait.lrs or "") != "": trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs else: - raise KeyError(repr(trait.name) + - ' information is not found in the database.') + raise KeyError(repr(trait.name) + + ' information is not found in the database.') return trait diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py index aa55470f..25b6cb8a 100644 --- a/wqflask/base/webqtlCaseData.py +++ b/wqflask/base/webqtlCaseData.py @@ -28,17 +28,20 @@ import utility.tools utility.tools.show_settings() + class webqtlCaseData: """one case data in one trait""" def __init__(self, name, value=None, variance=None, num_cases=None, name2=None): self.name = name - self.name2 = name2 # Other name (for traits like BXD65a) + # Other name (for traits like BXD65a) + self.name2 = name2 self.value = value # Trait Value self.variance = variance # Trait Variance self.num_cases = num_cases # Number of individuals/cases self.extra_attributes = None - self.this_id = None # Set a sane default (can't be just "id" cause that's a reserved word) + # Set a sane default (can't be just "id" cause that's a reserved word) + self.this_id = None self.outlier = None # Not set to True/False until later def __repr__(self): @@ -78,4 +81,4 @@ class webqtlCaseData: def display_num_cases(self): if self.num_cases is not None: return "%s" % self.num_cases - return "x"
\ No newline at end of file + return "x" diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index bb8704a5..39947158 100644 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -1,4 +1,4 @@ -#########################################' +# ' # Environment Variables - public # # Note: much of this needs to handled by the settings/environment @@ -10,35 +10,35 @@ from utility.tools import valid_path, mk_dir, assert_dir, assert_writable_dir, flat_files, TEMPDIR -#Debug Level -#1 for debug, mod python will reload import each time +# Debug Level +# 1 for debug, mod python will reload import each time DEBUG = 1 -#USER privilege -USERDICT = {'guest':1,'user':2, 'admin':3, 'root':4} +# USER privilege +USERDICT = {'guest': 1, 'user': 2, 'admin': 3, 'root': 4} -#Set privileges +# Set privileges SUPER_PRIVILEGES = {'data': 'edit', 'metadata': 'edit', 'admin': 'edit-admins'} DEFAULT_PRIVILEGES = {'data': 'view', 'metadata': 'view', 'admin': 'not-admin'} -#minimum number of informative strains +# minimum number of informative strains KMININFORMATIVE = 5 -#Daily download limit from one IP +# Daily download limit from one IP DAILYMAXIMUM = 1000 -#maximum LRS value +# maximum LRS value MAXLRS = 460.0 -#MINIMUM Database public value +# MINIMUM Database public value PUBLICTHRESH = 0 -#EXTERNAL LINK ADDRESSES +# EXTERNAL LINK ADDRESSES PUBMEDLINK_URL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract" UCSC_BLAT = 'http://genome.ucsc.edu/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' UTHSC_BLAT = 'http://ucscbrowser.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' UTHSC_BLAT2 = 'http://ucscbrowserbeta.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' -GENOMEBROWSER_URL="https://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=%s" +GENOMEBROWSER_URL = "https://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=%s" NCBI_LOCUSID = "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=%s" GENBANK_ID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&doptcmdl=DocSum&term=%s" OMIM_ID = "http://www.ncbi.nlm.nih.gov/omim/%s" @@ -56,7 +56,7 @@ GEMMA_URL = "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid=%s" ABA_URL = "http://mouse.brain-map.org/search/show?search_type=gene&search_term=%s" EBIGWAS_URL = "https://www.ebi.ac.uk/gwas/search?query=%s" WIKI_PI_URL = "http://severus.dbmi.pitt.edu/wiki-pi/index.php/search?q=%s" -ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Transcript/Idhistory?t=%s" +ENSEMBLETRANSCRIPT_URL = "http://useast.ensembl.org/Mus_musculus/Transcript/Idhistory?t=%s" DBSNP = 'http://ensembl.org/Mus_musculus/Variation/Population?v=%s' PROTEIN_ATLAS_URL = "http://www.proteinatlas.org/search/%s" OPEN_TARGETS_URL = "https://genetics.opentargets.org/gene/%s" @@ -71,13 +71,13 @@ RRID_RAT_URL = "https://rgd.mcw.edu/rgdweb/report/strain/main.html?id=%s" # want to reach this base dir assert_writable_dir(TEMPDIR) -TMPDIR = mk_dir(TEMPDIR+'/gn2/') +TMPDIR = mk_dir(TEMPDIR + '/gn2/') assert_writable_dir(TMPDIR) -CACHEDIR = mk_dir(TMPDIR+'/cache/') +CACHEDIR = mk_dir(TMPDIR + '/cache/') # We can no longer write into the git tree: -GENERATED_IMAGE_DIR = mk_dir(TMPDIR+'generated/') -GENERATED_TEXT_DIR = mk_dir(TMPDIR+'generated_text/') +GENERATED_IMAGE_DIR = mk_dir(TMPDIR + 'generated/') +GENERATED_TEXT_DIR = mk_dir(TMPDIR + 'generated_text/') # Make sure we have permissions to access these assert_writable_dir(CACHEDIR) @@ -85,12 +85,12 @@ assert_writable_dir(GENERATED_IMAGE_DIR) assert_writable_dir(GENERATED_TEXT_DIR) # Flat file directories -GENODIR = flat_files('genotype')+'/' +GENODIR = flat_files('genotype') + '/' assert_dir(GENODIR) # assert_dir(GENODIR+'bimbam') # for gemma # JSON genotypes are OBSOLETE -JSON_GENODIR = flat_files('genotype/json')+'/' +JSON_GENODIR = flat_files('genotype/json') + '/' if not valid_path(JSON_GENODIR): # fall back on old location (move the dir, FIXME) JSON_GENODIR = flat_files('json') @@ -98,4 +98,4 @@ if not valid_path(JSON_GENODIR): # Are we using the following...? PORTADDR = "http://50.16.251.170" INFOPAGEHREF = '/dbdoc/%s.html' -CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR' +CGIDIR = '/webqtl/' # XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR' diff --git a/wqflask/db/call.py b/wqflask/db/call.py index 0971d2a2..1fe0772b 100644 --- a/wqflask/db/call.py +++ b/wqflask/db/call.py @@ -4,7 +4,9 @@ from flask import g import string try: # Python2 support - import urllib.request, urllib.error, urllib.parse + import urllib.request + import urllib.error + import urllib.parse except: import urllib2 import json @@ -12,10 +14,11 @@ from utility.tools import USE_GN_SERVER, LOG_SQL, GN_SERVER_URL from utility.benchmark import Bench from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) # from inspect import stack + def fetch1(query, path=None, func=None): """Fetch one result as a Tuple using either a SQL query or the URI path to GN_SERVER (when USE_GN_SERVER is True). Apply func to @@ -35,6 +38,7 @@ GN_SERVER result when set (which should return a Tuple) else: return fetchone(query) + def fetchone(query): """Return tuple containing one row by calling SQL directly (the original fetchone, but with logging) @@ -46,6 +50,7 @@ original fetchone, but with logging) return res.fetchone() return logger.sql(query, helper) + def fetchall(query): """Return row iterator by calling SQL directly (the original fetchall, but with logging) @@ -57,6 +62,7 @@ original fetchall, but with logging) return res.fetchall() return logger.sql(query, helper) + def gn_server(path): """Return JSON record by calling GN_SERVER @@ -64,9 +70,9 @@ def gn_server(path): with Bench("GN_SERVER", LOG_SQL): res = () try: - res = urllib.request.urlopen(GN_SERVER_URL+path) + res = urllib.request.urlopen(GN_SERVER_URL + path) except: - res = urllib2.urlopen(GN_SERVER_URL+path) + res = urllib2.urlopen(GN_SERVER_URL + path) rest = res.read() res2 = json.loads(rest) logger.debug(res2) diff --git a/wqflask/db/gn_server.py b/wqflask/db/gn_server.py index da224112..f9b01658 100644 --- a/wqflask/db/gn_server.py +++ b/wqflask/db/gn_server.py @@ -3,7 +3,8 @@ from db.call import gn_server from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) + def menu_main(): return gn_server("/int/menu/main.json") diff --git a/wqflask/db/webqtlDatabaseFunction.py b/wqflask/db/webqtlDatabaseFunction.py index 2805febd..29112949 100644 --- a/wqflask/db/webqtlDatabaseFunction.py +++ b/wqflask/db/webqtlDatabaseFunction.py @@ -24,24 +24,27 @@ from db.call import fetch1 from utility.tools import USE_GN_SERVER from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) ########################################################################### -#output: cursor instance -#function: connect to database and return cursor instance +# output: cursor instance +# function: connect to database and return cursor instance ########################################################################### + def retrieve_species(group): """Get the species of a group (e.g. returns string "mouse" on "BXD" """ - result = fetch1("select Species.Name from Species, InbredSet where InbredSet.Name = '%s' and InbredSet.SpeciesId = Species.Id" % (group), "/cross/"+group+".json", lambda r: (r["species"],))[0] + result = fetch1("select Species.Name from Species, InbredSet where InbredSet.Name = '%s' and InbredSet.SpeciesId = Species.Id" % ( + group), "/cross/" + group + ".json", lambda r: (r["species"],))[0] logger.debug("retrieve_species result:", result) return result def retrieve_species_id(group): - result = fetch1("select SpeciesId from InbredSet where Name = '%s'" % (group), "/cross/"+group+".json", lambda r: (r["species_id"],))[0] + result = fetch1("select SpeciesId from InbredSet where Name = '%s'" % ( + group), "/cross/" + group + ".json", lambda r: (r["species_id"],))[0] logger.debug("retrieve_species_id result:", result) return result diff --git a/wqflask/maintenance/convert_dryad_to_bimbam.py b/wqflask/maintenance/convert_dryad_to_bimbam.py index 12ce35e9..18fbb8a1 100644 --- a/wqflask/maintenance/convert_dryad_to_bimbam.py +++ b/wqflask/maintenance/convert_dryad_to_bimbam.py @@ -41,7 +41,7 @@ def read_dryad_file(filename): return geno_rows - #for i, marker in enumerate(marker_list): + # for i, marker in enumerate(marker_list): # this_row = [] # this_row.append(marker) # this_row.append("X") @@ -52,18 +52,21 @@ def read_dryad_file(filename): # this_row.append(line.split(" ")[i+2]) # print("row: " + str(i)) # geno_rows.append(this_row) - # - #return geno_rows + # + # return geno_rows + def write_bimbam_files(geno_rows): with open('/home/zas1024/cfw_data/CFW_geno.txt', 'w') as geno_fh: for row in geno_rows: geno_fh.write(", ".join(row) + "\n") + def convert_dryad_to_bimbam(filename): geno_file_rows = read_dryad_file(filename) write_bimbam_files(geno_file_rows) -if __name__=="__main__": + +if __name__ == "__main__": input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt" convert_dryad_to_bimbam(input_filename) diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py index 0853b3ac..078be529 100644 --- a/wqflask/maintenance/convert_geno_to_bimbam.py +++ b/wqflask/maintenance/convert_geno_to_bimbam.py @@ -20,7 +20,10 @@ import simplejson as json from pprint import pformat as pf -class EmptyConfigurations(Exception): pass + +class EmptyConfigurations(Exception): + pass + class Marker: def __init__(self): @@ -30,6 +33,7 @@ class Marker: self.Mb = None self.genotypes = [] + class ConvertGenoFile: def __init__(self, input_file, output_files): @@ -52,7 +56,7 @@ class ConvertGenoFile: '@pat': "0", '@het': "0.5", '@unk': "NA" - } + } self.configurations = {} self.input_fh = open(self.input_file) @@ -80,13 +84,14 @@ class ConvertGenoFile: genotypes = row_items[2:] for item_count, genotype in enumerate(genotypes): if genotype.upper().strip() in self.configurations: - this_marker.genotypes.append(self.configurations[genotype.upper().strip()]) + this_marker.genotypes.append( + self.configurations[genotype.upper().strip()]) else: this_marker.genotypes.append("NA") self.markers.append(this_marker.__dict__) - self.write_to_bimbam() + self.write_to_bimbam() def write_to_bimbam(self): with open(self.output_files[0], "w") as geno_fh: @@ -103,9 +108,11 @@ class ConvertGenoFile: with open(self.output_files[2], "w") as snp_fh: for marker in self.markers: if self.mb_exists: - snp_fh.write(marker['name'] +", " + str(int(float(marker['Mb'])*1000000)) + ", " + marker['chr'] + "\n") + snp_fh.write( + marker['name'] + ", " + str(int(float(marker['Mb']) * 1000000)) + ", " + marker['chr'] + "\n") else: - snp_fh.write(marker['name'] +", " + str(int(float(marker['cM'])*1000000)) + ", " + marker['chr'] + "\n") + snp_fh.write( + marker['name'] + ", " + str(int(float(marker['cM']) * 1000000)) + ", " + marker['chr'] + "\n") def get_sample_list(self, row_contents): self.sample_list = [] @@ -119,7 +126,7 @@ class ConvertGenoFile: self.sample_list = row_contents[3:] else: self.sample_list = row_contents[2:] - + def process_rows(self): for self.latest_row_pos, row in enumerate(self.input_fh): self.latest_row_value = row @@ -157,10 +164,14 @@ class ConvertGenoFile: group_name = ".".join(input_file.split('.')[:-1]) if group_name == "HSNIH-Palmer": continue - geno_output_file = os.path.join(new_directory, group_name + "_geno.txt") - pheno_output_file = os.path.join(new_directory, group_name + "_pheno.txt") - snp_output_file = os.path.join(new_directory, group_name + "_snps.txt") - output_files = [geno_output_file, pheno_output_file, snp_output_file] + geno_output_file = os.path.join( + new_directory, group_name + "_geno.txt") + pheno_output_file = os.path.join( + new_directory, group_name + "_pheno.txt") + snp_output_file = os.path.join( + new_directory, group_name + "_snps.txt") + output_files = [geno_output_file, + pheno_output_file, snp_output_file] print("%s -> %s" % ( os.path.join(old_directory, input_file), geno_output_file)) convertob = ConvertGenoFile(input_file, output_files) @@ -173,17 +184,18 @@ class ConvertGenoFile: print(" Exception:", why) print(traceback.print_exc()) print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, - convertob.latest_col_pos)) + convertob.latest_col_pos)) print(" Column is:", convertob.latest_col_value) print(" Row is:", convertob.latest_row_value) break -if __name__=="__main__": + +if __name__ == "__main__": Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype""" New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/bimbam""" #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno""" #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps""" #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json") - #convertob.convert() + # convertob.convert() ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory) - #ConvertGenoFiles(Geno_Directory) + # ConvertGenoFiles(Geno_Directory) diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py index 544e2fd1..db65a11f 100644 --- a/wqflask/maintenance/gen_select_dataset.py +++ b/wqflask/maintenance/gen_select_dataset.py @@ -55,16 +55,17 @@ from pprint import pformat as pf #conn = Engine.connect() + def parse_db_uri(): """Converts a database URI to the db name, host name, user name, and password""" parsed_uri = urllib.parse.urlparse(SQL_URI) db_conn_info = dict( - db = parsed_uri.path[1:], - host = parsed_uri.hostname, - user = parsed_uri.username, - passwd = parsed_uri.password) + db=parsed_uri.path[1:], + host=parsed_uri.hostname, + user=parsed_uri.username, + passwd=parsed_uri.password) print(db_conn_info) return db_conn_info @@ -119,21 +120,23 @@ def get_types(groups): else: if not phenotypes_exist(group_name) and not genotypes_exist(group_name): types[species].pop(group_name, None) - groups[species] = tuple(group for group in groups[species] if group[0] != group_name) - else: #ZS: This whole else statement might be unnecessary, need to check + groups[species] = tuple( + group for group in groups[species] if group[0] != group_name) + else: # ZS: This whole else statement might be unnecessary, need to check types_list = build_types(species, group_name) if len(types_list) > 0: types[species][group_name] = types_list else: types[species].pop(group_name, None) - groups[species] = tuple(group for group in groups[species] if group[0] != group_name) + groups[species] = tuple( + group for group in groups[species] if group[0] != group_name) return types def phenotypes_exist(group_name): #print("group_name:", group_name) Cursor.execute("""select Name from PublishFreeze - where PublishFreeze.Name = '%s'""" % (group_name+"Publish")) + where PublishFreeze.Name = '%s'""" % (group_name + "Publish")) results = Cursor.fetchone() #print("RESULTS:", results) @@ -143,10 +146,11 @@ def phenotypes_exist(group_name): else: return False + def genotypes_exist(group_name): #print("group_name:", group_name) Cursor.execute("""select Name from GenoFreeze - where GenoFreeze.Name = '%s'""" % (group_name+"Geno")) + where GenoFreeze.Name = '%s'""" % (group_name + "Geno")) results = Cursor.fetchone() #print("RESULTS:", results) @@ -156,6 +160,7 @@ def genotypes_exist(group_name): else: return False + def build_types(species, group): """Fetches tissues @@ -184,6 +189,7 @@ def build_types(species, group): return results + def get_datasets(types): """Build datasets list""" datasets = {} @@ -246,7 +252,7 @@ def build_datasets(species, group, type_name): dataset_text = "%s Genotypes" % group datasets.append((dataset_id, dataset_value, dataset_text)) - else: # for mRNA expression/ProbeSet + else: # for mRNA expression/ProbeSet Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and @@ -308,6 +314,7 @@ def _test_it(): datasets = build_datasets("Mouse", "BXD", "Hippocampus") #print("build_datasets:", pf(datasets)) + if __name__ == '__main__': Conn = MySQLdb.Connect(**parse_db_uri()) Cursor = Conn.cursor() diff --git a/wqflask/maintenance/generate_kinship_from_bimbam.py b/wqflask/maintenance/generate_kinship_from_bimbam.py index 3e4d1741..9f01d094 100644 --- a/wqflask/maintenance/generate_kinship_from_bimbam.py +++ b/wqflask/maintenance/generate_kinship_from_bimbam.py @@ -13,14 +13,17 @@ sys.path.append("..") import os import glob + class GenerateKinshipMatrices: def __init__(self, group_name, geno_file, pheno_file): self.group_name = group_name self.geno_file = geno_file self.pheno_file = pheno_file - + def generate_kinship(self): - gemma_command = "/gnu/store/xhzgjr0jvakxv6h3blj8z496xjig69b0-profile/bin/gemma -g " + self.geno_file + " -p " + self.pheno_file + " -gk 1 -outdir /home/zas1024/genotype_files/genotype/bimbam/ -o " + self.group_name + gemma_command = "/gnu/store/xhzgjr0jvakxv6h3blj8z496xjig69b0-profile/bin/gemma -g " + self.geno_file + \ + " -p " + self.pheno_file + \ + " -gk 1 -outdir /home/zas1024/genotype_files/genotype/bimbam/ -o " + self.group_name print("command:", gemma_command) os.system(gemma_command) @@ -33,9 +36,12 @@ class GenerateKinshipMatrices: group_name = ".".join(input_file.split('.')[:-1]) if group_name == "HSNIH-Palmer": continue - geno_input_file = os.path.join(bimbam_dir, group_name + "_geno.txt") - pheno_input_file = os.path.join(bimbam_dir, group_name + "_pheno.txt") - convertob = GenerateKinshipMatrices(group_name, geno_input_file, pheno_input_file) + geno_input_file = os.path.join( + bimbam_dir, group_name + "_geno.txt") + pheno_input_file = os.path.join( + bimbam_dir, group_name + "_pheno.txt") + convertob = GenerateKinshipMatrices( + group_name, geno_input_file, pheno_input_file) try: convertob.generate_kinship() except EmptyConfigurations as why: @@ -46,15 +52,15 @@ class GenerateKinshipMatrices: print(" Exception:", why) print(traceback.print_exc()) print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, - convertob.latest_col_pos)) + convertob.latest_col_pos)) print(" Column is:", convertob.latest_col_value) print(" Row is:", convertob.latest_row_value) break - - -if __name__=="__main__": + + +if __name__ == "__main__": Geno_Directory = """/export/local/home/zas1024/genotype_files/genotype/""" Bimbam_Directory = """/export/local/home/zas1024/genotype_files/genotype/bimbam/""" GenerateKinshipMatrices.process_all(Geno_Directory, Bimbam_Directory) - - #./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD + + # ./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD diff --git a/wqflask/maintenance/generate_probesetfreeze_file.py b/wqflask/maintenance/generate_probesetfreeze_file.py index b1e41e9a..e964c8ed 100644 --- a/wqflask/maintenance/generate_probesetfreeze_file.py +++ b/wqflask/maintenance/generate_probesetfreeze_file.py @@ -23,10 +23,12 @@ def get_cursor(): cursor = con.cursor() return cursor + def show_progress(process, counter): if counter % 1000 == 0: print("{}: {}".format(process, counter)) + def get_strains(cursor): cursor.execute("""select Strain.Name from Strain, StrainXRef, InbredSet @@ -42,6 +44,7 @@ def get_strains(cursor): return strains + def get_probeset_vals(cursor, dataset_name): cursor.execute(""" select ProbeSet.Id, ProbeSet.Name from ProbeSetXRef, @@ -77,6 +80,7 @@ def get_probeset_vals(cursor, dataset_name): return probeset_vals + def trim_strains(strains, probeset_vals): trimmed_strains = [] #print("probeset_vals is:", pf(probeset_vals)) @@ -89,6 +93,7 @@ def trim_strains(strains, probeset_vals): print("trimmed_strains:", pf(trimmed_strains)) return trimmed_strains + def write_data_matrix_file(strains, probeset_vals, filename): with open(filename, "wb") as fh: csv_writer = csv.writer(fh, delimiter=",", quoting=csv.QUOTE_ALL) @@ -103,10 +108,12 @@ def write_data_matrix_file(strains, probeset_vals, filename): csv_writer.writerow(row_data) show_progress("Writing", counter) + def main(): - filename = os.path.expanduser("~/gene/wqflask/maintenance/" + - "ProbeSetFreezeId_210_FullName_Eye_AXBXA_Illumina_V6.2" + - "(Oct08)_RankInv_Beta.txt") + filename = os.path.expanduser( + "~/gene/wqflask/maintenance/" + "ProbeSetFreezeId_210_FullName_Eye_AXBXA_Illumina_V6.2" + "(Oct08)_RankInv_Beta.txt") dataset_name = "Eye_AXBXA_1008_RankInv" cursor = get_cursor() @@ -117,5 +124,6 @@ def main(): trimmed_strains = trim_strains(strains, probeset_vals) write_data_matrix_file(trimmed_strains, probeset_vals, filename) + if __name__ == '__main__': main() diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py index f5f7e0e7..32e0e34b 100644 --- a/wqflask/maintenance/geno_to_json.py +++ b/wqflask/maintenance/geno_to_json.py @@ -25,9 +25,10 @@ from pprint import pformat as pf #from utility.tools import flat_files -class EmptyConfigurations(Exception): pass - +class EmptyConfigurations(Exception): + pass + class Marker: def __init__(self): @@ -37,23 +38,24 @@ class Marker: self.Mb = None self.genotypes = [] + class ConvertGenoFile: def __init__(self, input_file, output_file): - + self.input_file = input_file self.output_file = output_file - + self.mb_exists = False self.cm_exists = False self.markers = [] - + self.latest_row_pos = None self.latest_col_pos = None - + self.latest_row_value = None self.latest_col_value = None - + def convert(self): self.haplotype_notation = { @@ -61,24 +63,23 @@ class ConvertGenoFile: '@pat': "0", '@het': "0.5", '@unk': "NA" - } - + } + self.configurations = {} #self.skipped_cols = 3 - - #if self.input_file.endswith(".geno.gz"): + + # if self.input_file.endswith(".geno.gz"): # print("self.input_file: ", self.input_file) # self.input_fh = gzip.open(self.input_file) - #else: + # else: self.input_fh = open(self.input_file) - + with open(self.output_file, "w") as self.output_fh: - #if self.file_type == "geno": + # if self.file_type == "geno": self.process_csv() - #elif self.file_type == "snps": + # elif self.file_type == "snps": # self.process_snps_file() - def process_csv(self): for row_count, row in enumerate(self.process_rows()): row_items = row.split("\t") @@ -100,31 +101,31 @@ class ConvertGenoFile: genotypes = row_items[2:] for item_count, genotype in enumerate(genotypes): if genotype.upper() in self.configurations: - this_marker.genotypes.append(self.configurations[genotype.upper()]) + this_marker.genotypes.append( + self.configurations[genotype.upper()]) else: this_marker.genotypes.append("NA") - - #print("this_marker is:", pf(this_marker.__dict__)) - #if this_marker.chr == "14": + + #print("this_marker is:", pf(this_marker.__dict__)) + # if this_marker.chr == "14": self.markers.append(this_marker.__dict__) with open(self.output_file, 'w') as fh: json.dump(self.markers, fh, indent=" ", sort_keys=True) - - # print('configurations:', str(configurations)) - #self.latest_col_pos = item_count + self.skipped_cols - #self.latest_col_value = item - - #if item_count != 0: - # self.output_fh.write(" ") - #self.output_fh.write(self.configurations[item.upper()]) - - #self.output_fh.write("\n") + # print('configurations:', str(configurations)) + #self.latest_col_pos = item_count + self.skipped_cols + #self.latest_col_value = item + + # if item_count != 0: + # self.output_fh.write(" ") + # self.output_fh.write(self.configurations[item.upper()]) + + # self.output_fh.write("\n") def process_rows(self): for self.latest_row_pos, row in enumerate(self.input_fh): - #if self.input_file.endswith(".geno.gz"): + # if self.input_file.endswith(".geno.gz"): # print("row: ", row) self.latest_row_value = row # Take care of headers @@ -171,26 +172,25 @@ class ConvertGenoFile: print(" Exception:", why) print(traceback.print_exc()) print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, - convertob.latest_col_pos)) + convertob.latest_col_pos)) print(" Column is:", convertob.latest_col_value) print(" Row is:", convertob.latest_row_value) break - - #def process_snps_file(cls, snps_file, new_directory): + + # def process_snps_file(cls, snps_file, new_directory): # output_file = os.path.join(new_directory, "mouse_families.json") # print("%s -> %s" % (snps_file, output_file)) # convertob = ConvertGenoFile(input_file, output_file) - -if __name__=="__main__": +if __name__ == "__main__": Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype""" New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/json""" #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno""" #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps""" #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json") - #convertob.convert() + # convertob.convert() ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory) - #ConvertGenoFiles(Geno_Directory) - + # ConvertGenoFiles(Geno_Directory) + #process_csv(Input_File, Output_File) diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py index 3f9d0278..0a450d3f 100644 --- a/wqflask/maintenance/get_group_samplelists.py +++ b/wqflask/maintenance/get_group_samplelists.py @@ -4,12 +4,14 @@ import gzip from base import webqtlConfig + def get_samplelist(file_type, geno_file): if file_type == "geno": return get_samplelist_from_geno(geno_file) elif file_type == "plink": return get_samplelist_from_plink(geno_file) + def get_samplelist_from_geno(genofilename): if os.path.isfile(genofilename + '.gz'): genofilename += '.gz' @@ -33,6 +35,7 @@ def get_samplelist_from_geno(genofilename): samplelist = headers[3:] return samplelist + def get_samplelist_from_plink(genofilename): genofile = open(genofilename) diff --git a/wqflask/maintenance/print_benchmark.py b/wqflask/maintenance/print_benchmark.py index a1046c86..9d12da8a 100644 --- a/wqflask/maintenance/print_benchmark.py +++ b/wqflask/maintenance/print_benchmark.py @@ -15,15 +15,18 @@ class TheCounter: self.time_took = time.time() - start_time TheCounter.Counters[self.__class__.__name__] = self.time_took + class PrintAll(TheCounter): def print_it(self, counter): print(counter) + class PrintSome(TheCounter): def print_it(self, counter): if counter % 1000 == 0: print(counter) + class PrintNone(TheCounter): def print_it(self, counter): pass @@ -37,5 +40,6 @@ def new_main(): print(pf(TheCounter.Counters)) + if __name__ == '__main__': new_main() diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py index 701b2b50..0cc963e5 100644 --- a/wqflask/maintenance/quantile_normalize.py +++ b/wqflask/maintenance/quantile_normalize.py @@ -14,42 +14,48 @@ from wqflask import app from utility.elasticsearch_tools import get_elasticsearch_connection from utility.tools import ELASTICSEARCH_HOST, ELASTICSEARCH_PORT, SQL_URI + def parse_db_uri(): """Converts a database URI to the db name, host name, user name, and password""" parsed_uri = urllib.parse.urlparse(SQL_URI) db_conn_info = dict( - db = parsed_uri.path[1:], - host = parsed_uri.hostname, - user = parsed_uri.username, - passwd = parsed_uri.password) + db=parsed_uri.path[1:], + host=parsed_uri.hostname, + user=parsed_uri.username, + passwd=parsed_uri.password) print(db_conn_info) return db_conn_info + def create_dataframe(input_file): with open(input_file) as f: ncols = len(f.readline().split("\t")) - input_array = np.loadtxt(open(input_file, "rb"), delimiter="\t", skiprows=1, usecols=list(range(1, ncols))) + input_array = np.loadtxt(open( + input_file, "rb"), delimiter="\t", skiprows=1, usecols=list(range(1, ncols))) return pd.DataFrame(input_array) -#This function taken from https://github.com/ShawnLYU/Quantile_Normalize +# This function taken from https://github.com/ShawnLYU/Quantile_Normalize + + def quantileNormalize(df_input): df = df_input.copy() - #compute rank + # compute rank dic = {} for col in df: - dic.update({col : sorted(df[col])}) + dic.update({col: sorted(df[col])}) sorted_df = pd.DataFrame(dic) - rank = sorted_df.mean(axis = 1).tolist() - #sort + rank = sorted_df.mean(axis=1).tolist() + # sort for col in df: t = np.searchsorted(np.sort(df[col]), df[col]) df[col] = [rank[i] for i in t] return df + def set_data(dataset_name): orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt" @@ -64,10 +70,10 @@ def set_data(dataset_name): trait_name = line1.split('\t')[0] for i, sample in enumerate(sample_names): this_sample = { - "name": sample, - "value": line1.split('\t')[i+1], - "qnorm": line2.split('\t')[i+1] - } + "name": sample, + "value": line1.split('\t')[i + 1], + "qnorm": line2.split('\t')[i + 1] + } sample_list.append(this_sample) query = """SELECT Species.SpeciesName, InbredSet.InbredSetName, ProbeSetFreeze.FullName FROM Species, InbredSet, ProbeSetFreeze, ProbeFreeze, ProbeSetXRef, ProbeSet @@ -95,13 +101,14 @@ def set_data(dataset_name): } } + if __name__ == '__main__': Conn = MySQLdb.Connect(**parse_db_uri()) Cursor = Conn.cursor() - #es = Elasticsearch([{ + # es = Elasticsearch([{ # "host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT - #}], timeout=60) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None + # }], timeout=60) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None es = get_elasticsearch_connection(for_user=False) @@ -116,8 +123,8 @@ if __name__ == '__main__': success, _ = bulk(es, set_data(sys.argv[1])) response = es.search( - index = "traits", doc_type = "trait", body = { - "query": { "match": { "name": "ENSMUSG00000028982" } } + index="traits", doc_type="trait", body={ + "query": {"match": {"name": "ENSMUSG00000028982"}} } ) diff --git a/wqflask/maintenance/set_resource_defaults.py b/wqflask/maintenance/set_resource_defaults.py index 4177c124..0f472494 100644 --- a/wqflask/maintenance/set_resource_defaults.py +++ b/wqflask/maintenance/set_resource_defaults.py @@ -37,20 +37,22 @@ import urllib.parse from utility.logger import getLogger logger = getLogger(__name__) + def parse_db_uri(): """Converts a database URI to the db name, host name, user name, and password""" parsed_uri = urllib.parse.urlparse(SQL_URI) db_conn_info = dict( - db = parsed_uri.path[1:], - host = parsed_uri.hostname, - user = parsed_uri.username, - passwd = parsed_uri.password) + db=parsed_uri.path[1:], + host=parsed_uri.hostname, + user=parsed_uri.username, + passwd=parsed_uri.password) print(db_conn_info) return db_conn_info + def insert_probeset_resources(default_owner_id): current_resources = Redis.hgetall("resources") Cursor.execute(""" SELECT @@ -63,20 +65,21 @@ def insert_probeset_resources(default_owner_id): resource_ob = {} resource_ob['name'] = resource[1] resource_ob['owner_id'] = default_owner_id - resource_ob['data'] = { "dataset" : str(resource[0])} + resource_ob['data'] = {"dataset": str(resource[0])} resource_ob['type'] = "dataset-probeset" if resource[2] < 1 and resource[3] > 0: - resource_ob['default_mask'] = { "data": "view", - "metadata": "view", - "admin": "not-admin"} + resource_ob['default_mask'] = {"data": "view", + "metadata": "view", + "admin": "not-admin"} else: - resource_ob['default_mask'] = { "data": "no-access", - "metadata": "no-access", - "admin": "not-admin"} + resource_ob['default_mask'] = {"data": "no-access", + "metadata": "no-access", + "admin": "not-admin"} resource_ob['group_masks'] = {} add_resource(resource_ob, update=False) + def insert_publish_resources(default_owner_id): current_resources = Redis.hgetall("resources") Cursor.execute(""" SELECT @@ -97,12 +100,12 @@ def insert_publish_resources(default_owner_id): else: resource_ob['name'] = str(resource[0]) resource_ob['owner_id'] = default_owner_id - resource_ob['data'] = { "dataset" : str(resource[1]) , - "trait" : str(resource[0])} + resource_ob['data'] = {"dataset": str(resource[1]), + "trait": str(resource[0])} resource_ob['type'] = "dataset-publish" - resource_ob['default_mask'] = { "data": "view", - "metadata": "view", - "admin": "not-admin"} + resource_ob['default_mask'] = {"data": "view", + "metadata": "view", + "admin": "not-admin"} resource_ob['group_masks'] = {} @@ -110,6 +113,7 @@ def insert_publish_resources(default_owner_id): else: continue + def insert_geno_resources(default_owner_id): current_resources = Redis.hgetall("resources") Cursor.execute(""" SELECT @@ -125,20 +129,21 @@ def insert_geno_resources(default_owner_id): resource_ob['owner_id'] = "c5ce8c56-78a6-474f-bcaf-7129d97f56ae" else: resource_ob['owner_id'] = default_owner_id - resource_ob['data'] = { "dataset" : str(resource[0]) } + resource_ob['data'] = {"dataset": str(resource[0])} resource_ob['type'] = "dataset-geno" if resource[2] < 1: - resource_ob['default_mask'] = { "data": "view", - "metadata": "view", - "admin": "not-admin"} + resource_ob['default_mask'] = {"data": "view", + "metadata": "view", + "admin": "not-admin"} else: - resource_ob['default_mask'] = { "data": "no-access", - "metadata": "no-access", - "admin": "not-admin"} + resource_ob['default_mask'] = {"data": "no-access", + "metadata": "no-access", + "admin": "not-admin"} resource_ob['group_masks'] = {} add_resource(resource_ob, update=False) + def insert_resources(default_owner_id): current_resources = get_resources() print("START") @@ -149,6 +154,7 @@ def insert_resources(default_owner_id): insert_probeset_resources(default_owner_id) print("AFTER PROBESET") + def main(): """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" @@ -158,6 +164,7 @@ def main(): insert_resources(owner_id) + if __name__ == '__main__': Conn = MySQLdb.Connect(**parse_db_uri()) Cursor = Conn.cursor() diff --git a/wqflask/run_gunicorn.py b/wqflask/run_gunicorn.py index 58108e03..03f310eb 100644 --- a/wqflask/run_gunicorn.py +++ b/wqflask/run_gunicorn.py @@ -14,9 +14,11 @@ from utility.startup_config import app_config app_config() + @app.route("/gunicorn") def hello(): return "<h1 style='color:blue'>Hello There!</h1>" + if __name__ == "__main__": app.run(host='0.0.0.0') diff --git a/wqflask/tests/unit/base/test_data_set.py b/wqflask/tests/unit/base/test_data_set.py index ee5d6f06..66ad361d 100644 --- a/wqflask/tests/unit/base/test_data_set.py +++ b/wqflask/tests/unit/base/test_data_set.py @@ -88,8 +88,8 @@ class TestDataSetTypes(unittest.TestCase): '"B139_K_1206_R": "ProbeSet", ' '"Test": "ProbeSet"}')) db_mock.db.execute.assert_called_once_with( - ("SELECT ProbeSetFreeze.Id FROM ProbeSetFreeze " + - "WHERE ProbeSetFreeze.Name = \"Test\" ") + ("SELECT ProbeSetFreeze.Id FROM ProbeSetFreeze " + + "WHERE ProbeSetFreeze.Name = \"Test\" ") ) @mock.patch('base.data_set.g') @@ -145,9 +145,9 @@ class TestDataSetTypes(unittest.TestCase): '"Test": "Publish"}')) db_mock.db.execute.assert_called_with( - ("SELECT PublishFreeze.Name " + - "FROM PublishFreeze, InbredSet " + - "WHERE InbredSet.Name = 'Test' AND " + ("SELECT PublishFreeze.Name " + + "FROM PublishFreeze, InbredSet " + + "WHERE InbredSet.Name = 'Test' AND " "PublishFreeze.InbredSetId = InbredSet.Id") ) diff --git a/wqflask/tests/unit/base/test_webqtl_case_data.py b/wqflask/tests/unit/base/test_webqtl_case_data.py index 8e8ba482..e1555cb4 100644 --- a/wqflask/tests/unit/base/test_webqtl_case_data.py +++ b/wqflask/tests/unit/base/test_webqtl_case_data.py @@ -4,15 +4,16 @@ import unittest from wqflask import app # Required because of utility.tools in webqtlCaseData.py from base.webqtlCaseData import webqtlCaseData + class TestWebqtlCaseData(unittest.TestCase): """Tests for WebqtlCaseData class""" def setUp(self): self.w = webqtlCaseData(name="Test", - value=0, - variance=0.0, - num_cases=10, - name2="Test2") + value=0, + variance=0.0, + num_cases=10, + name2="Test2") def test_webqtl_case_data_repr(self): self.assertEqual( diff --git a/wqflask/tests/unit/utility/test_authentication_tools.py b/wqflask/tests/unit/utility/test_authentication_tools.py index 42dcae88..024ab43f 100644 --- a/wqflask/tests/unit/utility/test_authentication_tools.py +++ b/wqflask/tests/unit/utility/test_authentication_tools.py @@ -5,6 +5,7 @@ from unittest import mock from utility.authentication_tools import check_resource_availability from utility.authentication_tools import add_new_resource + class TestResponse: """Mock Test Response after a request""" @property diff --git a/wqflask/tests/unit/utility/test_chunks.py b/wqflask/tests/unit/utility/test_chunks.py index 8d90a1ec..1d349193 100644 --- a/wqflask/tests/unit/utility/test_chunks.py +++ b/wqflask/tests/unit/utility/test_chunks.py @@ -7,6 +7,7 @@ from utility.chunks import divide_into_chunks class TestChunks(unittest.TestCase): "Test Utility method for chunking" + def test_divide_into_chunks(self): "Check that a list is chunked correctly" self.assertEqual(divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3), diff --git a/wqflask/tests/unit/wqflask/api/test_correlation.py b/wqflask/tests/unit/wqflask/api/test_correlation.py index d0264b87..1089a36f 100644 --- a/wqflask/tests/unit/wqflask/api/test_correlation.py +++ b/wqflask/tests/unit/wqflask/api/test_correlation.py @@ -105,10 +105,10 @@ class TestCorrelations(unittest.TestCase): target_dataset = AttributeSetter({"group": group}) target_vals = [3.4, 6.2, 4.1, 3.4, 1.2, 5.6] - trait_data = {"S1": AttributeSetter({"value": 2.3}), "S2": AttributeSetter({"value": 1.1}), - "S3": AttributeSetter( - {"value": 6.3}), "S4": AttributeSetter({"value": 3.6}), "S5": AttributeSetter({"value": 4.1}), - "S6": AttributeSetter({"value": 5.0})} + trait_data = {"S1": AttributeSetter({"value": 2.3}), "S2": AttributeSetter({"value": 1.1}), + "S3": AttributeSetter( + {"value": 6.3}), "S4": AttributeSetter({"value": 3.6}), "S5": AttributeSetter({"value": 4.1}), + "S6": AttributeSetter({"value": 5.0})} this_trait = AttributeSetter({"data": trait_data}) mock_normalize.return_value = ([2.3, 1.1, 6.3, 3.6, 4.1, 5.0], [3.4, 6.2, 4.1, 3.4, 1.2, 5.6], 6) @@ -127,9 +127,9 @@ class TestCorrelations(unittest.TestCase): expected_pearsonr = [-0.21618688834430866, 0.680771605997119, 6] expected_spearmanr = [-0.11595420713048969, 0.826848213385815, 6] for i, val in enumerate(expected_pearsonr): - self.assertAlmostEqual(val, results_pearsonr[i],4) + self.assertAlmostEqual(val, results_pearsonr[i], 4) for i, val in enumerate(expected_spearmanr): - self.assertAlmostEqual(val, results_spearmanr[i],4) + self.assertAlmostEqual(val, results_spearmanr[i], 4) self.assertEqual(results_num_overlap, None) @mock.patch("wqflask.api.correlation.do_literature_correlation_for_all_traits") diff --git a/wqflask/tests/unit/wqflask/api/test_gen_menu.py b/wqflask/tests/unit/wqflask/api/test_gen_menu.py index 57eb1650..fd0fe52e 100644 --- a/wqflask/tests/unit/wqflask/api/test_gen_menu.py +++ b/wqflask/tests/unit/wqflask/api/test_gen_menu.py @@ -105,13 +105,13 @@ class TestGenMenu(unittest.TestCase): for name in ["mouse", "human"]: db_mock.db.execute.assert_any_call( ("SELECT InbredSet.Name, InbredSet.FullName, " + - "IFNULL(InbredSet.Family, 'None') " + - "FROM InbredSet, Species WHERE Species.Name " + - "= '{}' AND InbredSet.SpeciesId = Species.Id GROUP by " + - "InbredSet.Name ORDER BY IFNULL(InbredSet.FamilyOrder, " + - "InbredSet.FullName) ASC, IFNULL(InbredSet.Family, " + - "InbredSet.FullName) ASC, InbredSet.FullName ASC, " + - "InbredSet.MenuOrderId ASC").format(name) + "IFNULL(InbredSet.Family, 'None') " + + "FROM InbredSet, Species WHERE Species.Name " + + "= '{}' AND InbredSet.SpeciesId = Species.Id GROUP by " + + "InbredSet.Name ORDER BY IFNULL(InbredSet.FamilyOrder, " + + "InbredSet.FullName) ASC, IFNULL(InbredSet.Family, " + + "InbredSet.FullName) ASC, InbredSet.FullName ASC, " + + "InbredSet.MenuOrderId ASC").format(name) ) @mock.patch('wqflask.api.gen_menu.g') @@ -172,12 +172,12 @@ class TestGenMenu(unittest.TestCase): self.assertEqual(build_datasets("Mouse", "BXD", "Phenotypes"), [['602', "BXDPublish", "BXD Published Phenotypes"]]) db_mock.db.execute.assert_called_with( - "SELECT InfoFiles.GN_AccesionId, PublishFreeze.Name, " + - "PublishFreeze.FullName FROM InfoFiles, PublishFreeze, " + - "InbredSet WHERE InbredSet.Name = 'BXD' AND " + - "PublishFreeze.InbredSetId = InbredSet.Id AND " + - "InfoFiles.InfoPageName = PublishFreeze.Name " + - "ORDER BY PublishFreeze.CreateTime ASC" + "SELECT InfoFiles.GN_AccesionId, PublishFreeze.Name, " + + "PublishFreeze.FullName FROM InfoFiles, PublishFreeze, " + + "InbredSet WHERE InbredSet.Name = 'BXD' AND " + + "PublishFreeze.InbredSetId = InbredSet.Id AND " + + "InfoFiles.InfoPageName = PublishFreeze.Name " + + "ORDER BY PublishFreeze.CreateTime ASC" ) self.assertEqual(build_datasets("Mouse", "MDP", "Phenotypes"), [['602', "BXDPublish", "Mouse Phenome Database"]]) @@ -221,8 +221,8 @@ class TestGenMenu(unittest.TestCase): "SELECT InfoFiles.GN_AccesionId FROM InfoFiles, " "GenoFreeze, InbredSet WHERE InbredSet.Name = 'HLC' AND " "GenoFreeze.InbredSetId = InbredSet.Id AND " - "InfoFiles.InfoPageName = GenoFreeze.ShortName " + - "ORDER BY GenoFreeze.CreateTime DESC" + "InfoFiles.InfoPageName = GenoFreeze.ShortName " + + "ORDER BY GenoFreeze.CreateTime DESC" ) db_mock.db.execute.return_value.fetchone.return_value = () self.assertEqual(build_datasets("Mouse", "HLC", "Genotypes"), @@ -239,16 +239,16 @@ class TestGenMenu(unittest.TestCase): "112", 'HC_M2_0606_P', "Hippocampus Consortium M430v2 (Jun06) PDNN" ]]) db_mock.db.execute.assert_called_once_with( - "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, " + - "ProbeSetFreeze.FullName FROM ProbeSetFreeze, " + - "ProbeFreeze, InbredSet, Tissue, Species WHERE " + - "Species.Name = 'Mouse' AND Species.Id = " + - "InbredSet.SpeciesId AND InbredSet.Name = 'HLC' AND " + - "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND " + - "Tissue.Name = 'mRNA' AND ProbeFreeze.TissueId = " + - "Tissue.Id AND ProbeFreeze.InbredSetId = InbredSet.Id AND " + - "ProbeSetFreeze.public > 0 " + - "ORDER BY -ProbeSetFreeze.OrderList DESC, ProbeSetFreeze.CreateTime DESC") + "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, " + + "ProbeSetFreeze.FullName FROM ProbeSetFreeze, " + + "ProbeFreeze, InbredSet, Tissue, Species WHERE " + + "Species.Name = 'Mouse' AND Species.Id = " + + "InbredSet.SpeciesId AND InbredSet.Name = 'HLC' AND " + + "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND " + + "Tissue.Name = 'mRNA' AND ProbeFreeze.TissueId = " + + "Tissue.Id AND ProbeFreeze.InbredSetId = InbredSet.Id AND " + + "ProbeSetFreeze.public > 0 " + + "ORDER BY -ProbeSetFreeze.OrderList DESC, ProbeSetFreeze.CreateTime DESC") @mock.patch('wqflask.api.gen_menu.build_datasets') @mock.patch('wqflask.api.gen_menu.g') @@ -266,15 +266,15 @@ class TestGenMenu(unittest.TestCase): ['H', 'H', 'Molecular Traits'], ['R', 'R', 'Molecular Traits']]) db_mock.db.execute.assert_called_once_with( - "SELECT DISTINCT Tissue.Name " + - "FROM ProbeFreeze, ProbeSetFreeze, InbredSet, " + - "Tissue, Species WHERE Species.Name = 'mouse' " + - "AND Species.Id = InbredSet.SpeciesId AND " + - "InbredSet.Name = 'random group' AND " + - "ProbeFreeze.TissueId = Tissue.Id AND " + - "ProbeFreeze.InbredSetId = InbredSet.Id AND " + - "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " + - "ORDER BY Tissue.Name" + "SELECT DISTINCT Tissue.Name " + + "FROM ProbeFreeze, ProbeSetFreeze, InbredSet, " + + "Tissue, Species WHERE Species.Name = 'mouse' " + + "AND Species.Id = InbredSet.SpeciesId AND " + + "InbredSet.Name = 'random group' AND " + + "ProbeFreeze.TissueId = Tissue.Id AND " + + "ProbeFreeze.InbredSetId = InbredSet.Id AND " + + "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " + + "ORDER BY Tissue.Name" ) @mock.patch('wqflask.api.gen_menu.build_types') diff --git a/wqflask/tests/unit/wqflask/correlation/test_correlation_functions.py b/wqflask/tests/unit/wqflask/correlation/test_correlation_functions.py index 44d2e0fc..2bbeab1f 100644 --- a/wqflask/tests/unit/wqflask/correlation/test_correlation_functions.py +++ b/wqflask/tests/unit/wqflask/correlation/test_correlation_functions.py @@ -5,7 +5,7 @@ from wqflask.correlation.correlation_functions import cal_zero_order_corr_for_ti class TestCorrelationFunctions(unittest.TestCase): - + @mock.patch("wqflask.correlation.correlation_functions.MrnaAssayTissueData") def test_get_trait_symbol_and_tissue_values(self, mock_class): """test for getting trait symbol and tissue_values""" diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_display_mapping_results.py b/wqflask/tests/unit/wqflask/marker_regression/test_display_mapping_results.py index 8ae0f09f..f4869c45 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_display_mapping_results.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_display_mapping_results.py @@ -9,6 +9,7 @@ from wqflask.marker_regression.display_mapping_results import ( class TestDisplayMappingResults(unittest.TestCase): """Basic Methods to test Mapping Results""" + def test_pil_colors(self): """Test that colors use PILLOW color format""" self.assertEqual(DisplayMappingResults.CLICKABLE_WEBQTL_REGION_COLOR, @@ -17,6 +18,7 @@ class TestDisplayMappingResults(unittest.TestCase): class TestHtmlGenWrapper(unittest.TestCase): """Test Wrapper around HTMLGen""" + def test_create_image(self): """Test HT.Image method""" self.assertEqual( @@ -37,7 +39,8 @@ class TestHtmlGenWrapper(unittest.TestCase): cgi="/testing/", enctype='multipart/form-data', name="formName", - submit=HtmlGenWrapper.create_input_tag(type_='hidden', name='Default_Name') + submit=HtmlGenWrapper.create_input_tag( + type_='hidden', name='Default_Name') ) test_image = HtmlGenWrapper.create_image_tag( src="test.png", diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_gemma_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_gemma_mapping.py index fe2569b8..4003d68f 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_gemma_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_gemma_mapping.py @@ -47,11 +47,11 @@ class TestGemmaMapping(unittest.TestCase): @mock.patch("wqflask.marker_regression.run_mapping.random.choice") @mock.patch("wqflask.marker_regression.gemma_mapping.os") @mock.patch("wqflask.marker_regression.gemma_mapping.gen_pheno_txt_file") - def test_run_gemma_firstrun_set_true(self, mock_gen_pheno_txt, mock_os, mock_choice, mock_gen_covar, mock_flat_files,mock_parse_loco): + def test_run_gemma_firstrun_set_true(self, mock_gen_pheno_txt, mock_os, mock_choice, mock_gen_covar, mock_flat_files, mock_parse_loco): """add tests for run_gemma where first run is set to true""" - this_chromosomes={} + this_chromosomes = {} for i in range(1, 5): - this_chromosomes[f'CH{i}']=(AttributeSetter({"name": f"CH{i}"})) + this_chromosomes[f'CH{i}'] = (AttributeSetter({"name": f"CH{i}"})) chromosomes = AttributeSetter({"chromosomes": this_chromosomes}) dataset_group = MockGroup( @@ -68,9 +68,10 @@ class TestGemmaMapping(unittest.TestCase): mock_parse_loco.return_value = [] results = run_gemma(this_trait=trait, this_dataset=dataset, samples=[ ], vals=[], covariates="", use_loco=True) - self.assertEqual(mock_os.system.call_count,2) + self.assertEqual(mock_os.system.call_count, 2) mock_gen_pheno_txt.assert_called_once() - mock_parse_loco.assert_called_once_with(dataset, "GP1_GWA_RRRRRR",True) + mock_parse_loco.assert_called_once_with( + dataset, "GP1_GWA_RRRRRR", True) mock_os.path.isfile.assert_called_once_with( ('/home/user/imgfile_output.assoc.txt')) self.assertEqual(mock_flat_files.call_count, 4) @@ -102,7 +103,8 @@ class TestGemmaMapping(unittest.TestCase): create_trait_side_effect = [] for i in range(4): - create_dataset_side_effect.append(AttributeSetter({"name": f'name_{i}'})) + create_dataset_side_effect.append( + AttributeSetter({"name": f'name_{i}'})) create_trait_side_effect.append( AttributeSetter({"data": [f'data_{i}']})) @@ -144,7 +146,7 @@ class TestGemmaMapping(unittest.TestCase): "files": [["file_name", "user", "~/file1"], ["file_name", "user", "~/file2"]] } - return_file="""X/Y\tM1\t28.457155\tQ\tE\tA\tMMB\t23.3\tW\t0.9\t0.85\t + return_file = """X/Y\tM1\t28.457155\tQ\tE\tA\tMMB\t23.3\tW\t0.9\t0.85\t chr4\tM2\t12\tQ\tE\tMMB\tR\t24\tW\t0.87\t0.5 Y\tM4\t12\tQ\tE\tMMB\tR\t11.6\tW\t0.21\t0.7 X\tM5\t12\tQ\tE\tMMB\tR\t21.1\tW\t0.65\t0.6""" @@ -159,11 +161,14 @@ X\tM5\t12\tQ\tE\tMMB\tR\t21.1\tW\t0.65\t0.6""" mock_open.side_effect = handles results = parse_loco_output( this_dataset={}, gwa_output_filename=".xw/") - expected_results= [ - {'name': 'M1', 'chr': 'X/Y', 'Mb': 2.8457155e-05, 'p_value': 0.85, 'additive': 23.3, 'lod_score': 0.07058107428570727}, - {'name': 'M2', 'chr': 4, 'Mb': 1.2e-05, 'p_value': 0.5, 'additive': 24.0, 'lod_score': 0.3010299956639812}, - {'name': 'M4', 'chr': 'Y', 'Mb': 1.2e-05, 'p_value': 0.7, 'additive': 11.6, 'lod_score': 0.1549019599857432}, - {'name': 'M5', 'chr': 'X', 'Mb': 1.2e-05, 'p_value': 0.6, 'additive': 21.1, 'lod_score': 0.22184874961635637}] + expected_results = [ + {'name': 'M1', 'chr': 'X/Y', 'Mb': 2.8457155e-05, 'p_value': 0.85, + 'additive': 23.3, 'lod_score': 0.07058107428570727}, + {'name': 'M2', 'chr': 4, 'Mb': 1.2e-05, 'p_value': 0.5, + 'additive': 24.0, 'lod_score': 0.3010299956639812}, + {'name': 'M4', 'chr': 'Y', 'Mb': 1.2e-05, 'p_value': 0.7, + 'additive': 11.6, 'lod_score': 0.1549019599857432}, + {'name': 'M5', 'chr': 'X', 'Mb': 1.2e-05, 'p_value': 0.6, 'additive': 21.1, 'lod_score': 0.22184874961635637}] self.assertEqual(expected_results, results) diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_plink_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_plink_mapping.py index 5eec93f1..fd21a825 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_plink_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_plink_mapping.py @@ -12,9 +12,10 @@ class AttributeSetter: def __init__(self, obj): for key, val in obj.items(): setattr(self, key, val) -class TestPlinkMapping(unittest.TestCase): +class TestPlinkMapping(unittest.TestCase): + def test_build_line_list(self): """test for building line list""" line_1 = "this is line one test" diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_qtlreaper_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_qtlreaper_mapping.py index b47f877a..8b4337ec 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_qtlreaper_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_qtlreaper_mapping.py @@ -1,21 +1,24 @@ import unittest -from unittest import mock -from wqflask.marker_regression.qtlreaper_mapping import gen_pheno_txt_file +from unittest import mock +from wqflask.marker_regression.qtlreaper_mapping import gen_pheno_txt_file + +# issues some methods in genofile object are not defined +# modify samples should equal to vals -#issues some methods in genofile object are not defined -#modify samples should equal to vals -class TestQtlReaperMapping(unittest.TestCase): - @mock.patch("wqflask.marker_regression.qtlreaper_mapping.TEMPDIR", "/home/user/data") - def test_gen_pheno_txt_file(self): - vals=["V1","x","V4","V3","x"] - samples=["S1","S2","S3","S4","S5"] - trait_filename="trait_file" - with mock.patch("builtins.open", mock.mock_open())as mock_open: - gen_pheno_txt_file(samples=samples,vals=vals,trait_filename=trait_filename) - mock_open.assert_called_once_with("/home/user/data/gn2/trait_file.txt","w") - filehandler=mock_open() - write_calls= [mock.call('Trait\t'),mock.call('S1\tS3\tS4\n'),mock.call('T1\t'),mock.call('V1\tV4\tV3')] - filehandler.write.assert_has_calls(write_calls) +class TestQtlReaperMapping(unittest.TestCase): + @mock.patch("wqflask.marker_regression.qtlreaper_mapping.TEMPDIR", "/home/user/data") + def test_gen_pheno_txt_file(self): + vals = ["V1", "x", "V4", "V3", "x"] + samples = ["S1", "S2", "S3", "S4", "S5"] + trait_filename = "trait_file" + with mock.patch("builtins.open", mock.mock_open())as mock_open: + gen_pheno_txt_file(samples=samples, vals=vals, + trait_filename=trait_filename) + mock_open.assert_called_once_with( + "/home/user/data/gn2/trait_file.txt", "w") + filehandler = mock_open() + write_calls = [mock.call('Trait\t'), mock.call( + 'S1\tS3\tS4\n'), mock.call('T1\t'), mock.call('V1\tV4\tV3')] - + filehandler.write.assert_has_calls(write_calls) diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py index c585f1df..91d2c587 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py @@ -5,44 +5,38 @@ from wqflask.marker_regression.rqtl_mapping import get_trait_data_type from wqflask.marker_regression.rqtl_mapping import sanitize_rqtl_phenotype from wqflask.marker_regression.rqtl_mapping import sanitize_rqtl_names -class TestRqtlMapping(unittest.TestCase): - - def setUp(self): - self.app_context=app.app_context() - self.app_context.push() - - def tearDown(self): - self.app_context.pop() - - - @mock.patch("wqflask.marker_regression.rqtl_mapping.g") - @mock.patch("wqflask.marker_regression.rqtl_mapping.logger") - def test_get_trait_data(self,mock_logger,mock_db): - """test for getting trait data_type return True""" - query_value="""SELECT value FROM TraitMetadata WHERE type='trait_data_type'""" - mock_db.db.execute.return_value.fetchone.return_value=["""{"type":"trait_data_type","name":"T1","traid_id":"fer434f"}"""] - results=get_trait_data_type("traid_id") - mock_db.db.execute.assert_called_with(query_value) - self.assertEqual(results,"fer434f") - - def test_sanitize_rqtl_phenotype(self): - """test for sanitizing rqtl phenotype""" - vals=['f',"x","r","x","x"] - results=sanitize_rqtl_phenotype(vals) - expected_phenotype_string='c(f,NA,r,NA,NA)' - - self.assertEqual(results,expected_phenotype_string) - - def test_sanitize_rqtl_names(self): - """test for sanitzing rqtl names""" - vals=['f',"x","r","x","x"] - expected_sanitized_name="c('f',NA,'r',NA,NA)" - results=sanitize_rqtl_names(vals) - self.assertEqual(expected_sanitized_name,results) - - - - - +class TestRqtlMapping(unittest.TestCase): + def setUp(self): + self.app_context = app.app_context() + self.app_context.push() + + def tearDown(self): + self.app_context.pop() + + @mock.patch("wqflask.marker_regression.rqtl_mapping.g") + @mock.patch("wqflask.marker_regression.rqtl_mapping.logger") + def test_get_trait_data(self, mock_logger, mock_db): + """test for getting trait data_type return True""" + query_value = """SELECT value FROM TraitMetadata WHERE type='trait_data_type'""" + mock_db.db.execute.return_value.fetchone.return_value = [ + """{"type":"trait_data_type","name":"T1","traid_id":"fer434f"}"""] + results = get_trait_data_type("traid_id") + mock_db.db.execute.assert_called_with(query_value) + self.assertEqual(results, "fer434f") + + def test_sanitize_rqtl_phenotype(self): + """test for sanitizing rqtl phenotype""" + vals = ['f', "x", "r", "x", "x"] + results = sanitize_rqtl_phenotype(vals) + expected_phenotype_string = 'c(f,NA,r,NA,NA)' + + self.assertEqual(results, expected_phenotype_string) + + def test_sanitize_rqtl_names(self): + """test for sanitzing rqtl names""" + vals = ['f', "x", "r", "x", "x"] + expected_sanitized_name = "c('f',NA,'r',NA,NA)" + results = sanitize_rqtl_names(vals) + self.assertEqual(expected_sanitized_name, results) diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py index a29d8cfb..78cd3be9 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py @@ -187,7 +187,8 @@ class TestRunMapping(unittest.TestCase): mock.call('Time/Date: 09/01/19 / 10:12:12\n'), mock.call('Population: Human GP1_\n'), mock.call( 'Data Set: dataser_1\n'), - mock.call('N Samples: 100\n'), mock.call('Transform - Quantile Normalized\n'), + mock.call('N Samples: 100\n'), mock.call( + 'Transform - Quantile Normalized\n'), mock.call('Gene Symbol: IGFI\n'), mock.call( 'Location: X1 @ 123313 Mb\n'), mock.call('Cofactors (dataset - trait):\n'), diff --git a/wqflask/tests/unit/wqflask/snp_browser/test_snp_browser.py b/wqflask/tests/unit/wqflask/snp_browser/test_snp_browser.py index ce3e7b83..89442c47 100644 --- a/wqflask/tests/unit/wqflask/snp_browser/test_snp_browser.py +++ b/wqflask/tests/unit/wqflask/snp_browser/test_snp_browser.py @@ -21,11 +21,11 @@ class TestSnpBrowser(unittest.TestCase): "transcript": "false", "exon": "false", "domain_2": "true", "function": "false", "function_details": "true"} strains = {"mouse": ["S1", "S2", "S3", "S4", "S5"], "rat": []} expected_results = ([['Index', 'SNP ID', 'Chr', 'Mb', 'Alleles', 'ConScore', - 'Domain 1', 'Domain 2', 'Details'], - ['S1', 'S2', 'S3', 'S4', 'S5']], 5, - ['index', 'snp_name', 'chr', 'mb_formatted', 'alleles', - 'conservation_score', 'domain_1', 'domain_2', - 'function_details', 'S1', 'S2', 'S3', 'S4', 'S5']) + 'Domain 1', 'Domain 2', 'Details'], + ['S1', 'S2', 'S3', 'S4', 'S5']], 5, + ['index', 'snp_name', 'chr', 'mb_formatted', 'alleles', + 'conservation_score', 'domain_1', 'domain_2', + 'function_details', 'S1', 'S2', 'S3', 'S4', 'S5']) results_with_snp = get_header_list( variant_type="SNP", strains=strains, species="Mouse", empty_columns=empty_columns) @@ -33,9 +33,9 @@ class TestSnpBrowser(unittest.TestCase): variant_type="InDel", strains=strains, species="rat", empty_columns=[]) expected_results_with_indel = ( ['Index', 'ID', 'Type', 'InDel Chr', 'Mb Start', - 'Mb End', 'Strand', 'Size', 'Sequence', 'Source'], 0, - ['index', 'indel_name', 'indel_type', 'indel_chr', 'indel_mb_s', - 'indel_mb_e', 'indel_strand', 'indel_size', 'indel_sequence', 'source_name']) + 'Mb End', 'Strand', 'Size', 'Sequence', 'Source'], 0, + ['index', 'indel_name', 'indel_type', 'indel_chr', 'indel_mb_s', + 'indel_mb_e', 'indel_strand', 'indel_size', 'indel_sequence', 'source_name']) self.assertEqual(expected_results, results_with_snp) self.assertEqual(expected_results_with_indel, results_with_indel) diff --git a/wqflask/tests/unit/wqflask/test_collect.py b/wqflask/tests/unit/wqflask/test_collect.py index 9a36132d..2a914fb2 100644 --- a/wqflask/tests/unit/wqflask/test_collect.py +++ b/wqflask/tests/unit/wqflask/test_collect.py @@ -11,6 +11,7 @@ app = Flask(__name__) class MockSession: """Helper class for mocking wqflask.collect.g.user_session.logged_in""" + def __init__(self, is_logged_in=False): self.is_logged_in = is_logged_in @@ -21,6 +22,7 @@ class MockSession: class MockFlaskG: """Helper class for mocking wqflask.collect.g.user_session""" + def __init__(self, is_logged_in=False): self.is_logged_in = is_logged_in diff --git a/wqflask/tests/unit/wqflask/test_server_side.py b/wqflask/tests/unit/wqflask/test_server_side.py index 4f91d8ca..be7ca2df 100644 --- a/wqflask/tests/unit/wqflask/test_server_side.py +++ b/wqflask/tests/unit/wqflask/test_server_side.py @@ -17,15 +17,18 @@ class TestServerSideTableTests(unittest.TestCase): def test_get_page(self): rows_count = 3 table_rows = [ - {'first': 'd', 'second': 4, 'third': 'zz'}, - {'first': 'b', 'second': 2, 'third': 'aa'}, + {'first': 'd', 'second': 4, 'third': 'zz'}, + {'first': 'b', 'second': 2, 'third': 'aa'}, {'first': 'c', 'second': 1, 'third': 'ss'}, ] headers = ['first', 'second', 'third'] - request_args = {'sEcho': '1', 'iSortCol_0': '1', 'iSortingCols': '1', 'sSortDir_0': 'asc', 'iDisplayStart': '0', 'iDisplayLength': '3'} + request_args = {'sEcho': '1', 'iSortCol_0': '1', 'iSortingCols': '1', + 'sSortDir_0': 'asc', 'iDisplayStart': '0', 'iDisplayLength': '3'} - test_page = ServerSideTable(rows_count, table_rows, headers, request_args).get_page() + test_page = ServerSideTable( + rows_count, table_rows, headers, request_args).get_page() self.assertEqual(test_page['sEcho'], '1') self.assertEqual(test_page['iTotalRecords'], 'nan') self.assertEqual(test_page['iTotalDisplayRecords'], '3') - self.assertEqual(test_page['data'], [{'first': 'b', 'second': 2, 'third': 'aa'}, {'first': 'c', 'second': 1, 'third': 'ss'}, {'first': 'd', 'second': 4, 'third': 'zz'}]) + self.assertEqual(test_page['data'], [{'first': 'b', 'second': 2, 'third': 'aa'}, { + 'first': 'c', 'second': 1, 'third': 'ss'}, {'first': 'd', 'second': 4, 'third': 'zz'}]) diff --git a/wqflask/tests/wqflask/show_trait/testSampleList.py b/wqflask/tests/wqflask/show_trait/testSampleList.py index 34c51e3e..305586ce 100644 --- a/wqflask/tests/wqflask/show_trait/testSampleList.py +++ b/wqflask/tests/wqflask/show_trait/testSampleList.py @@ -10,7 +10,8 @@ class TestSampleList(unittest.TestCase): characters_list = ["z", "f", "q", "s", "t", "a", "g"] names_list = ["temp1", "publish", "Sample", "Dataset"] - sorted_list_a=natural_sort(characters_list) - sorted_list_b=natural_sort(names_list) + sorted_list_a = natural_sort(characters_list) + sorted_list_b = natural_sort(names_list) self.assertEqual(sorted_list_a, ["a", "f", "g", "q", "s", "t", "z"]) - self.assertEqual(sorted_list_b,["Dataset", "Sample", "publish", "temp1"]) + self.assertEqual( + sorted_list_b, ["Dataset", "Sample", "publish", "temp1"]) diff --git a/wqflask/tests/wqflask/show_trait/test_show_trait.py b/wqflask/tests/wqflask/show_trait/test_show_trait.py index 8c866874..63df2ba5 100644 --- a/wqflask/tests/wqflask/show_trait/test_show_trait.py +++ b/wqflask/tests/wqflask/show_trait/test_show_trait.py @@ -72,7 +72,8 @@ class TestTraits(unittest.TestCase): mock_get.return_value = get_return_obj results = get_ncbi_summary(trait) mock_exists.assert_called_once() - mock_get.assert_called_once_with(f"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id={trait.geneid}&retmode=json") + mock_get.assert_called_once_with( + f"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id={trait.geneid}&retmode=json") self.assertEqual(results, "this is a summary of the geneid") @@ -242,7 +243,6 @@ class TestTraits(unittest.TestCase): self.assertEqual(get_genotype_scales(file_location), expected_results) mock_get_scales.assert_called_once_with(file_location) - @mock.patch("wqflask.show_trait.show_trait.locate_ignore_error") def test_get_scales_from_genofile_found(self, mock_ignore_location): """"add test for get scales from genofile where file is found""" diff --git a/wqflask/utility/Plot.py b/wqflask/utility/Plot.py index 61f408d2..9b2c6735 100644 --- a/wqflask/utility/Plot.py +++ b/wqflask/utility/Plot.py @@ -34,7 +34,7 @@ import utility.corestats as corestats from base import webqtlConfig from utility.pillow_utils import draw_rotated_text import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) # ---- Define common colours ---- # BLUE = ImageColor.getrgb("blue") @@ -47,6 +47,7 @@ COUR_FILE = "./wqflask/static/fonts/courbd.ttf" TAHOMA_FILE = "./wqflask/static/fonts/tahoma.ttf" # ---- END: FONT FILES ---- # + def cformat(d, rank=0): 'custom string format' strD = "%2.6f" % d @@ -68,22 +69,24 @@ def cformat(d, rank=0): strD = '0.0' return strD + def frange(start, end=None, inc=1.0): "A faster range-like function that does accept float increments..." if end == None: end = start + 0.0 start = 0.0 else: - start += 0.0 # force it to be a float + start += 0.0 # force it to be a float count = int((end - start) / inc) if start + count * inc != end: - # Need to adjust the count. AFAICT, it always comes up one short. + # Need to adjust the count. AFAICT, it always comes up one short. count += 1 L = [start] * count for i in range(1, count): L[i] = start + i * inc return L + def find_outliers(vals): """Calculates the upper and lower bounds of a set of sample/case values @@ -119,154 +122,163 @@ def find_outliers(vals): # parameter: data is either object returned by reaper permutation function (called by MarkerRegressionPage.py) # or the first object returned by direct (pair-scan) permu function (called by DirectPlotPage.py) -def plotBar(canvas, data, barColor=BLUE, axesColor=BLACK, labelColor=BLACK, XLabel=None, YLabel=None, title=None, offset= (60, 20, 40, 40), zoom = 1): + + +def plotBar(canvas, data, barColor=BLUE, axesColor=BLACK, labelColor=BLACK, XLabel=None, YLabel=None, title=None, offset=(60, 20, 40, 40), zoom=1): im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset plotHeight = canvas.size[1] - yTopOffset - yBottomOffset - if plotHeight<=0 or plotWidth<=0: - return + if plotHeight <= 0 or plotWidth <= 0: + return if len(data) < 2: - return + return max_D = max(data) min_D = min(data) - #add by NL 06-20-2011: fix the error: when max_D is infinite, log function in detScale will go wrong - if max_D == float('inf') or max_D>webqtlConfig.MAXLRS: - max_D=webqtlConfig.MAXLRS #maximum LRS value + # add by NL 06-20-2011: fix the error: when max_D is infinite, log function in detScale will go wrong + if max_D == float('inf') or max_D > webqtlConfig.MAXLRS: + max_D = webqtlConfig.MAXLRS # maximum LRS value xLow, xTop, stepX = detScale(min_D, max_D) - #reduce data - #ZS: Used to determine number of bins for permutation output - step = ceil((xTop-xLow)/50.0) + # reduce data + # ZS: Used to determine number of bins for permutation output + step = ceil((xTop - xLow) / 50.0) j = xLow dataXY = [] Count = [] while j <= xTop: - dataXY.append(j) - Count.append(0) - j += step + dataXY.append(j) + Count.append(0) + j += step for i, item in enumerate(data): - if item == float('inf') or item>webqtlConfig.MAXLRS: - item = webqtlConfig.MAXLRS #maximum LRS value - j = int((item-xLow)/step) - Count[j] += 1 + if item == float('inf') or item > webqtlConfig.MAXLRS: + item = webqtlConfig.MAXLRS # maximum LRS value + j = int((item - xLow) / step) + Count[j] += 1 - yLow, yTop, stepY=detScale(0, max(Count)) + yLow, yTop, stepY = detScale(0, max(Count)) - #draw data - xScale = plotWidth/(xTop-xLow) - yScale = plotHeight/(yTop-yLow) - barWidth = xScale*step + # draw data + xScale = plotWidth / (xTop - xLow) + yScale = plotHeight / (yTop - yLow) + barWidth = xScale * step for i, count in enumerate(Count): - if count: - xc = (dataXY[i]-xLow)*xScale+xLeftOffset - yc =-(count-yLow)*yScale+yTopOffset+plotHeight - im_drawer.rectangle( - xy=((xc+2, yc), (xc+barWidth-2, yTopOffset+plotHeight)), - outline=barColor, fill=barColor) - - #draw drawing region + if count: + xc = (dataXY[i] - xLow) * xScale + xLeftOffset + yc = -(count - yLow) * yScale + yTopOffset + plotHeight + im_drawer.rectangle( + xy=((xc + 2, yc), (xc + barWidth - 2, yTopOffset + plotHeight)), + outline=barColor, fill=barColor) + + # draw drawing region im_drawer.rectangle( - xy=((xLeftOffset, yTopOffset), (xLeftOffset+plotWidth, yTopOffset+plotHeight)) + xy=((xLeftOffset, yTopOffset), + (xLeftOffset + plotWidth, yTopOffset + plotHeight)) ) - #draw scale - scaleFont=ImageFont.truetype(font=COUR_FILE, size=11) - x=xLow - for i in range(int(stepX)+1): - xc=xLeftOffset+(x-xLow)*xScale - im_drawer.line( - xy=((xc, yTopOffset+plotHeight), (xc, yTopOffset+plotHeight+5)), - fill=axesColor) - strX = cformat(d=x, rank=0) - im_drawer.text( - text=strX, - xy=(xc-im_drawer.textsize(strX, font=scaleFont)[0]/2, - yTopOffset+plotHeight+14), font=scaleFont) - x+= (xTop - xLow)/stepX - - y=yLow - for i in range(int(stepY)+1): - yc=yTopOffset+plotHeight-(y-yLow)*yScale - im_drawer.line(xy=((xLeftOffset, yc), (xLeftOffset-5, yc)), fill=axesColor) - strY = "%d" %y - im_drawer.text( - text=strY, - xy=(xLeftOffset-im_drawer.textsize(strY, font=scaleFont)[0]-6, yc+5), - font=scaleFont) - y+= (yTop - yLow)/stepY - - #draw label - labelFont=ImageFont.truetype(font=TAHOMA_FILE, size=17) + # draw scale + scaleFont = ImageFont.truetype(font=COUR_FILE, size=11) + x = xLow + for i in range(int(stepX) + 1): + xc = xLeftOffset + (x - xLow) * xScale + im_drawer.line( + xy=((xc, yTopOffset + plotHeight), + (xc, yTopOffset + plotHeight + 5)), + fill=axesColor) + strX = cformat(d=x, rank=0) + im_drawer.text( + text=strX, + xy=(xc - im_drawer.textsize(strX, font=scaleFont)[0] / 2, + yTopOffset + plotHeight + 14), font=scaleFont) + x += (xTop - xLow) / stepX + + y = yLow + for i in range(int(stepY) + 1): + yc = yTopOffset + plotHeight - (y - yLow) * yScale + im_drawer.line( + xy=((xLeftOffset, yc), (xLeftOffset - 5, yc)), fill=axesColor) + strY = "%d" % y + im_drawer.text( + text=strY, + xy=(xLeftOffset - im_drawer.textsize(strY, + font=scaleFont)[0] - 6, yc + 5), + font=scaleFont) + y += (yTop - yLow) / stepY + + # draw label + labelFont = ImageFont.truetype(font=TAHOMA_FILE, size=17) if XLabel: - im_drawer.text( - text=XLabel, - xy=(xLeftOffset+( - plotWidth-im_drawer.textsize(XLabel, font=labelFont)[0])/2.0, - yTopOffset+plotHeight+yBottomOffset-10), - font=labelFont, fill=labelColor) + im_drawer.text( + text=XLabel, + xy=(xLeftOffset + ( + plotWidth - im_drawer.textsize(XLabel, font=labelFont)[0]) / 2.0, + yTopOffset + plotHeight + yBottomOffset - 10), + font=labelFont, fill=labelColor) if YLabel: draw_rotated_text(canvas, text=YLabel, xy=(19, - yTopOffset+plotHeight-( - plotHeight-im_drawer.textsize( - YLabel, font=labelFont)[0])/2.0), + yTopOffset + plotHeight - ( + plotHeight - im_drawer.textsize( + YLabel, font=labelFont)[0]) / 2.0), font=labelFont, fill=labelColor, angle=90) - labelFont=ImageFont.truetype(font=VERDANA_FILE, size=16) + labelFont = ImageFont.truetype(font=VERDANA_FILE, size=16) if title: - im_drawer.text( - text=title, - xy=(xLeftOffset+(plotWidth-im_drawer.textsize( - title, font=labelFont)[0])/2.0, - 20), - font=labelFont, fill=labelColor) + im_drawer.text( + text=title, + xy=(xLeftOffset + (plotWidth - im_drawer.textsize( + title, font=labelFont)[0]) / 2.0, + 20), + font=labelFont, fill=labelColor) # This function determines the scale of the plot + + def detScaleOld(min, max): - if min>=max: + if min >= max: return None elif min == -1.0 and max == 1.0: return [-1.2, 1.2, 12] else: - a=max-min - b=floor(log10(a)) - c=pow(10.0, b) - if a < c*5.0: - c/=2.0 - #print a,b,c - low=c*floor(min/c) - high=c*ceil(max/c) - return [low, high, round((high-low)/c)] - -def detScale(min=0,max=0): - - if min>=max: + a = max - min + b = floor(log10(a)) + c = pow(10.0, b) + if a < c * 5.0: + c /= 2.0 + # print a,b,c + low = c * floor(min / c) + high = c * ceil(max / c) + return [low, high, round((high - low) / c)] + + +def detScale(min=0, max=0): + + if min >= max: return None elif min == -1.0 and max == 1.0: return [-1.2, 1.2, 12] else: - a=max-min + a = max - min if max != 0: - max += 0.1*a + max += 0.1 * a if min != 0: - if min > 0 and min < 0.1*a: + if min > 0 and min < 0.1 * a: min = 0.0 else: - min -= 0.1*a - a=max-min - b=floor(log10(a)) - c=pow(10.0, b) - low=c*floor(min/c) - high=c*ceil(max/c) - n = round((high-low)/c) + min -= 0.1 * a + a = max - min + b = floor(log10(a)) + c = pow(10.0, b) + low = c * floor(min / c) + high = c * ceil(max / c) + n = round((high - low) / c) div = 2.0 while n < 5 or n > 15: if n < 5: @@ -274,23 +286,27 @@ def detScale(min=0,max=0): else: c *= div if div == 2.0: - div =5.0 + div = 5.0 else: - div =2.0 - low=c*floor(min/c) - high=c*ceil(max/c) - n = round((high-low)/c) + div = 2.0 + low = c * floor(min / c) + high = c * ceil(max / c) + n = round((high - low) / c) return [low, high, n] + def bluefunc(x): - return 1.0 / (1.0 + exp(-10*(x-0.6))) + return 1.0 / (1.0 + exp(-10 * (x - 0.6))) + def redfunc(x): - return 1.0 / (1.0 + exp(10*(x-0.5))) + return 1.0 / (1.0 + exp(10 * (x - 0.5))) + def greenfunc(x): - return 1 - pow(redfunc(x+0.2), 2) - bluefunc(x-0.3) + return 1 - pow(redfunc(x + 0.2), 2) - bluefunc(x - 0.3) + def colorSpectrum(n=100): multiple = 10 @@ -303,26 +319,27 @@ def colorSpectrum(n=100): return [ImageColor.getrgb("rgb(100%,0%,0%)"), ImageColor.getrgb("rgb(0%,100%,0%)"), ImageColor.getrgb("rgb(0%,0%,100%)")] - N = n*multiple - out = [None]*N; + N = n * multiple + out = [None] * N for i in range(N): - x = float(i)/N + x = float(i) / N out[i] = ImageColor.getrgb("rgb({}%,{}%,{}%".format( - *[int(i*100) for i in ( + *[int(i * 100) for i in ( redfunc(x), greenfunc(x), bluefunc(x))])) out2 = [out[0]] - step = N/float(n-1) + step = N / float(n - 1) j = 0 - for i in range(n-2): + for i in range(n - 2): j += step out2.append(out[int(j)]) out2.append(out[-1]) return out2 + def _test(): import doctest doctest.testmod() -if __name__=="__main__": +if __name__ == "__main__": _test() diff --git a/wqflask/utility/TDCell.py b/wqflask/utility/TDCell.py index 8de8e050..4b0f4b1d 100644 --- a/wqflask/utility/TDCell.py +++ b/wqflask/utility/TDCell.py @@ -33,9 +33,9 @@ class TDCell: def __init__(self, html="", text="", val=0.0): - self.html = html #html, for web page - self.text = text #text value, for output to a text file - self.val = val #sort by value + self.html = html # html, for web page + self.text = text # text value, for output to a text file + self.val = val # sort by value def __str__(self): return self.text diff --git a/wqflask/utility/THCell.py b/wqflask/utility/THCell.py index dde221b5..f533dcb8 100644 --- a/wqflask/utility/THCell.py +++ b/wqflask/utility/THCell.py @@ -33,10 +33,10 @@ class THCell: def __init__(self, html="", text="", sort=1, idx=-1): - self.html = html #html, for web page - self.text = text #Column text value - self.sort = sort #0: not sortable, 1: yes - self.idx = idx #sort by value + self.html = html # html, for web page + self.text = text # Column text value + self.sort = sort # 0: not sortable, 1: yes + self.idx = idx # sort by value def __str__(self): return self.text diff --git a/wqflask/utility/__init__.py b/wqflask/utility/__init__.py index df926884..25273fa0 100644 --- a/wqflask/utility/__init__.py +++ b/wqflask/utility/__init__.py @@ -2,10 +2,12 @@ from pprint import pformat as pf # Todo: Move these out of __init__ + class Bunch: """Like a dictionary but using object notation""" - def __init__(self, **kw): - self.__dict__ = kw + + def __init__(self, **kw): + self.__dict__ = kw def __repr__(self): return pf(self.__dict__) @@ -30,6 +32,4 @@ class Struct: def __repr__(self): return '{%s}' % str(', '.join('%s : %s' % (k, repr(v)) for - (k, v) in list(self.__dict__.items()))) - - + (k, v) in list(self.__dict__.items()))) diff --git a/wqflask/utility/after.py b/wqflask/utility/after.py index 06091ecb..2b560e48 100644 --- a/wqflask/utility/after.py +++ b/wqflask/utility/after.py @@ -7,6 +7,7 @@ from flask import g from wqflask import app + def after_this_request(f): if not hasattr(g, 'after_request_callbacks'): g.after_request_callbacks = [] diff --git a/wqflask/utility/authentication_tools.py b/wqflask/utility/authentication_tools.py index 672b36d5..57dbf8ba 100644 --- a/wqflask/utility/authentication_tools.py +++ b/wqflask/utility/authentication_tools.py @@ -11,6 +11,7 @@ from utility.redis_tools import (get_redis_conn, add_resource) Redis = get_redis_conn() + def check_resource_availability(dataset, trait_id=None): # At least for now assume temporary entered traits are accessible if type(dataset) == str or dataset.type == "Temp": diff --git a/wqflask/utility/benchmark.py b/wqflask/utility/benchmark.py index 91ea91e8..6ece2f21 100644 --- a/wqflask/utility/benchmark.py +++ b/wqflask/utility/benchmark.py @@ -4,7 +4,8 @@ import time from utility.tools import LOG_BENCH from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) + class Bench: entries = collections.OrderedDict() @@ -18,7 +19,8 @@ class Bench: if self.name: logger.debug("Starting benchmark: %s" % (self.name)) else: - logger.debug("Starting benchmark at: %s [%i]" % (inspect.stack()[1][3], inspect.stack()[1][2])) + logger.debug("Starting benchmark at: %s [%i]" % ( + inspect.stack()[1][3], inspect.stack()[1][2])) self.start_time = time.time() def __exit__(self, type, value, traceback): @@ -32,14 +34,16 @@ class Bench: logger.info(" %s took: %f seconds" % (name, (time_taken))) if self.name: - Bench.entries[self.name] = Bench.entries.get(self.name, 0) + time_taken + Bench.entries[self.name] = Bench.entries.get( + self.name, 0) + time_taken @classmethod def report(cls): - total_time = sum((time_taken for time_taken in list(cls.entries.values()))) + total_time = sum( + (time_taken for time_taken in list(cls.entries.values()))) print("\nTiming report\n") for name, time_taken in list(cls.entries.items()): - percent = int(round((time_taken/total_time) * 100)) + percent = int(round((time_taken / total_time) * 100)) print("[{}%] {}: {}".format(percent, name, time_taken)) print() diff --git a/wqflask/utility/chunks.py b/wqflask/utility/chunks.py index 9a7db102..484b5de6 100644 --- a/wqflask/utility/chunks.py +++ b/wqflask/utility/chunks.py @@ -26,6 +26,6 @@ def divide_into_chunks(the_list, number_chunks): chunks = [] for counter in range(0, length, chunksize): - chunks.append(the_list[counter:counter+chunksize]) + chunks.append(the_list[counter:counter + chunksize]) return chunks diff --git a/wqflask/utility/corestats.py b/wqflask/utility/corestats.py index 67ca3ad3..da0a21db 100644 --- a/wqflask/utility/corestats.py +++ b/wqflask/utility/corestats.py @@ -15,7 +15,9 @@ import sys -#ZS: Should switch to using some third party library for this; maybe scipy has an equivalent +# ZS: Should switch to using some third party library for this; maybe scipy has an equivalent + + class Stats: def __init__(self, sequence): @@ -63,7 +65,8 @@ class Stats: if len(self.sequence) < 1: value = None elif (percentile >= 100): - sys.stderr.write('ERROR: percentile must be < 100. you supplied: %s\n'% percentile) + sys.stderr.write( + 'ERROR: percentile must be < 100. you supplied: %s\n' % percentile) value = None else: element_idx = int(len(self.sequence) * (percentile / 100.0)) @@ -80,4 +83,4 @@ class Stats: # stats = corestats.Stats(sequence) # print stats.avg() # print stats.percentile(90) -# -------------------------------------------
\ No newline at end of file +# ------------------------------------------- diff --git a/wqflask/utility/elasticsearch_tools.py b/wqflask/utility/elasticsearch_tools.py index a5580811..eae3ba03 100644 --- a/wqflask/utility/elasticsearch_tools.py +++ b/wqflask/utility/elasticsearch_tools.py @@ -47,11 +47,14 @@ logger = getLogger(__name__) from utility.tools import ELASTICSEARCH_HOST, ELASTICSEARCH_PORT + def test_elasticsearch_connection(): - es = Elasticsearch(['http://'+ELASTICSEARCH_HOST+":"+str(ELASTICSEARCH_PORT)+'/'], verify_certs=True) + es = Elasticsearch(['http://' + ELASTICSEARCH_HOST + \ + ":" + str(ELASTICSEARCH_PORT) + '/'], verify_certs=True) if not es.ping(): logger.warning("Elasticsearch is DOWN") + def get_elasticsearch_connection(for_user=True): """Return a connection to ES. Returns None on failure""" logger.info("get_elasticsearch_connection") @@ -77,6 +80,7 @@ def get_elasticsearch_connection(for_user=True): return es + def setup_users_index(es_connection): if es_connection: index_settings = { @@ -85,20 +89,24 @@ def setup_users_index(es_connection): "type": "keyword"}}} es_connection.indices.create(index='users', ignore=400) - es_connection.indices.put_mapping(body=index_settings, index="users", doc_type="local") + es_connection.indices.put_mapping( + body=index_settings, index="users", doc_type="local") + def get_user_by_unique_column(es, column_name, column_value, index="users", doc_type="local"): return get_item_by_unique_column(es, column_name, column_value, index=index, doc_type=doc_type) + def save_user(es, user, user_id): es_save_data(es, "users", "local", user, user_id) + def get_item_by_unique_column(es, column_name, column_value, index, doc_type): item_details = None try: response = es.search( - index = index, doc_type = doc_type, body = { - "query": { "match": { column_name: column_value } } + index=index, doc_type=doc_type, body={ + "query": {"match": {column_name: column_value}} }) if len(response["hits"]["hits"]) > 0: item_details = response["hits"]["hits"][0]["_source"] @@ -106,7 +114,8 @@ def get_item_by_unique_column(es, column_name, column_value, index, doc_type): pass return item_details + def es_save_data(es, index, doc_type, data_item, data_id,): from time import sleep es.create(index, doc_type, body=data_item, id=data_id) - sleep(1) # Delay 1 second to allow indexing + sleep(1) # Delay 1 second to allow indexing diff --git a/wqflask/utility/external.py b/wqflask/utility/external.py index 50afea08..805d2ffe 100644 --- a/wqflask/utility/external.py +++ b/wqflask/utility/external.py @@ -4,6 +4,7 @@ import os import sys import subprocess + def shell(command): if subprocess.call(command, shell=True) != 0: - raise Exception("ERROR: failed on "+command) + raise Exception("ERROR: failed on " + command) diff --git a/wqflask/utility/gen_geno_ob.py b/wqflask/utility/gen_geno_ob.py index 0a381c9b..e619b7b6 100644 --- a/wqflask/utility/gen_geno_ob.py +++ b/wqflask/utility/gen_geno_ob.py @@ -1,5 +1,6 @@ import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + class genotype: """ @@ -18,7 +19,7 @@ class genotype: self.filler = False self.mb_exists = False - #ZS: This is because I'm not sure if some files switch the column that contains Mb/cM positions; might be unnecessary + # ZS: This is because I'm not sure if some files switch the column that contains Mb/cM positions; might be unnecessary self.cm_column = 2 self.mb_column = 3 @@ -36,14 +37,16 @@ class genotype: return len(self.chromosomes) def read_rdata_output(self, qtl_results): - #ZS: This is necessary because R/qtl requires centimorgan marker positions, which it normally gets from the .geno file, but that doesn't exist for HET3-ITP (which only has RData), so it needs to read in the marker cM positions from the results - self.chromosomes = [] #ZS: Overwriting since the .geno file's contents are just placeholders + # ZS: This is necessary because R/qtl requires centimorgan marker positions, which it normally gets from the .geno file, but that doesn't exist for HET3-ITP (which only has RData), so it needs to read in the marker cM positions from the results + # ZS: Overwriting since the .geno file's contents are just placeholders + self.chromosomes = [] - this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers + this_chr = "" # ZS: This is so it can track when the chromosome changes as it iterates through markers chr_ob = None for marker in qtl_results: locus = Locus(self) - if (str(marker['chr']) != this_chr) and this_chr != "X": #ZS: This is really awkward but works as a temporary fix + # ZS: This is really awkward but works as a temporary fix + if (str(marker['chr']) != this_chr) and this_chr != "X": if this_chr != "": self.chromosomes.append(chr_ob) this_chr = str(marker['chr']) @@ -68,7 +71,7 @@ class genotype: with open(filename, 'r') as geno_file: lines = geno_file.readlines() - this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers + this_chr = "" # ZS: This is so it can track when the chromosome changes as it iterates through markers chr_ob = None for line in lines: if line[0] == "#": @@ -119,6 +122,7 @@ class genotype: self.chromosomes.append(chr_ob) + class Chr: def __init__(self, name, geno_ob): self.name = name @@ -140,8 +144,9 @@ class Chr: def add_marker(self, marker_row): self.loci.append(Locus(self.geno_ob, marker_row)) + class Locus: - def __init__(self, geno_ob, marker_row = None): + def __init__(self, geno_ob, marker_row=None): self.chr = None self.name = None self.cM = None @@ -153,9 +158,11 @@ class Locus: try: self.cM = float(marker_row[geno_ob.cm_column]) except: - self.cM = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else 0 + self.cM = float( + marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else 0 try: - self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None + self.Mb = float( + marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None except: self.Mb = self.cM @@ -175,5 +182,5 @@ class Locus: for allele in marker_row[start_pos:]: if allele in list(geno_table.keys()): self.genotype.append(geno_table[allele]) - else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown + else: # ZS: Some genotype appears that isn't specified in the metadata, make it unknown self.genotype.append("U") diff --git a/wqflask/utility/genofile_parser.py b/wqflask/utility/genofile_parser.py index f8e96d19..86d9823e 100644 --- a/wqflask/utility/genofile_parser.py +++ b/wqflask/utility/genofile_parser.py @@ -12,88 +12,89 @@ import simplejson as json from pprint import pformat as pf + class Marker: - def __init__(self): - self.name = None - self.chr = None - self.cM = None - self.Mb = None - self.genotypes = [] + def __init__(self): + self.name = None + self.chr = None + self.cM = None + self.Mb = None + self.genotypes = [] class ConvertGenoFile: - def __init__(self, input_file): - self.mb_exists = False - self.cm_exists = False - self.markers = [] - - self.latest_row_pos = None - self.latest_col_pos = None + def __init__(self, input_file): + self.mb_exists = False + self.cm_exists = False + self.markers = [] - self.latest_row_value = None - self.latest_col_value = None - self.input_fh = open(input_file) - print("!!!!!!!!!!!!!!!!PARSER!!!!!!!!!!!!!!!!!!") - self.haplotype_notation = { - '@mat': "1", - '@pat': "2", - '@het': "-999", - '@unk': "-999" - } - self.configurations = {} + self.latest_row_pos = None + self.latest_col_pos = None - def process_rows(self): - for self.latest_row_pos, row in enumerate(self.input_fh): - self.latest_row_value = row - # Take care of headers - if not row.strip(): - continue - if row.startswith('#'): - continue - if row.startswith('Chr'): - if 'Mb' in row.split(): - self.mb_exists = True - if 'cM' in row.split(): - self.cm_exists = True - skip = 2 + self.cm_exists + self.mb_exists - self.individuals = row.split()[skip:] - continue - if row.startswith('@'): - key, _separater, value = row.partition(':') - key = key.strip() - value = value.strip() - if key in self.haplotype_notation: - self.configurations[value] = self.haplotype_notation[key] - continue - if not len(self.configurations): - raise EmptyConfigurations - yield row + self.latest_row_value = None + self.latest_col_value = None + self.input_fh = open(input_file) + print("!!!!!!!!!!!!!!!!PARSER!!!!!!!!!!!!!!!!!!") + self.haplotype_notation = { + '@mat': "1", + '@pat': "2", + '@het': "-999", + '@unk': "-999" + } + self.configurations = {} - def process_csv(self): - for row in self.process_rows(): - row_items = row.split("\t") + def process_rows(self): + for self.latest_row_pos, row in enumerate(self.input_fh): + self.latest_row_value = row + # Take care of headers + if not row.strip(): + continue + if row.startswith('#'): + continue + if row.startswith('Chr'): + if 'Mb' in row.split(): + self.mb_exists = True + if 'cM' in row.split(): + self.cm_exists = True + skip = 2 + self.cm_exists + self.mb_exists + self.individuals = row.split()[skip:] + continue + if row.startswith('@'): + key, _separater, value = row.partition(':') + key = key.strip() + value = value.strip() + if key in self.haplotype_notation: + self.configurations[value] = self.haplotype_notation[key] + continue + if not len(self.configurations): + raise EmptyConfigurations + yield row - this_marker = Marker() - this_marker.name = row_items[1] - this_marker.chr = row_items[0] - if self.cm_exists and self.mb_exists: - this_marker.cM = row_items[2] - this_marker.Mb = row_items[3] - genotypes = row_items[4:] - elif self.cm_exists: - this_marker.cM = row_items[2] - genotypes = row_items[3:] - elif self.mb_exists: - this_marker.Mb = row_items[2] - genotypes = row_items[3:] - else: - genotypes = row_items[2:] - for item_count, genotype in enumerate(genotypes): - if genotype.upper().strip() in self.configurations: - this_marker.genotypes.append(self.configurations[genotype.upper().strip()]) - else: - print("WARNING:", genotype.upper()) - this_marker.genotypes.append("NA") - self.markers.append(this_marker.__dict__) + def process_csv(self): + for row in self.process_rows(): + row_items = row.split("\t") + this_marker = Marker() + this_marker.name = row_items[1] + this_marker.chr = row_items[0] + if self.cm_exists and self.mb_exists: + this_marker.cM = row_items[2] + this_marker.Mb = row_items[3] + genotypes = row_items[4:] + elif self.cm_exists: + this_marker.cM = row_items[2] + genotypes = row_items[3:] + elif self.mb_exists: + this_marker.Mb = row_items[2] + genotypes = row_items[3:] + else: + genotypes = row_items[2:] + for item_count, genotype in enumerate(genotypes): + if genotype.upper().strip() in self.configurations: + this_marker.genotypes.append( + self.configurations[genotype.upper().strip()]) + else: + print("WARNING:", genotype.upper()) + this_marker.genotypes.append("NA") + self.markers.append(this_marker.__dict__) diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py index 12fd6be5..ecc075ae 100644 --- a/wqflask/utility/helper_functions.py +++ b/wqflask/utility/helper_functions.py @@ -39,7 +39,7 @@ def get_trait_db_obs(self, trait_db_list): for trait in trait_db_list: data, _separator, hmac_string = trait.rpartition(':') data = data.strip() - assert hmac_string==hmac.hmac_creation(data), "Data tampering?" + assert hmac_string == hmac.hmac_creation(data), "Data tampering?" trait_name, dataset_name = data.split(":")[:2] if dataset_name == "Temp": dataset_ob = data_set.create_dataset( diff --git a/wqflask/utility/logger.py b/wqflask/utility/logger.py index e904eb94..d706e32a 100644 --- a/wqflask/utility/logger.py +++ b/wqflask/utility/logger.py @@ -35,6 +35,7 @@ import datetime from utility.tools import LOG_LEVEL, LOG_LEVEL_DEBUG, LOG_SQL + class GNLogger: """A logger class with some additional functionality, such as multiple parameter logging, SQL logging, timing, colors, and lazy @@ -49,14 +50,14 @@ class GNLogger: """Set the undelying log level""" self.logger.setLevel(value) - def debug(self,*args): + def debug(self, *args): """Call logging.debug for multiple args. Use (lazy) debugf and level=num to filter on LOG_LEVEL_DEBUG. """ self.collect(self.logger.debug, *args) - def debug20(self,*args): + def debug20(self, *args): """Call logging.debug for multiple args. Use level=num to filter on LOG_LEVEL_DEBUG (NYI). @@ -65,29 +66,29 @@ LOG_LEVEL_DEBUG (NYI). if self.logger.getEffectiveLevel() < 20: self.collect(self.logger.debug, *args) - def info(self,*args): + def info(self, *args): """Call logging.info for multiple args""" self.collect(self.logger.info, *args) - def warning(self,*args): + def warning(self, *args): """Call logging.warning for multiple args""" self.collect(self.logger.warning, *args) # self.logger.warning(self.collect(*args)) - def error(self,*args): + def error(self, *args): """Call logging.error for multiple args""" now = datetime.datetime.utcnow() time_str = now.strftime('%H:%M:%S UTC %Y%m%d') - l = [time_str]+list(args) + l = [time_str] + list(args) self.collect(self.logger.error, *l) - def infof(self,*args): + def infof(self, *args): """Call logging.info for multiple args lazily""" # only evaluate function when logging if self.logger.getEffectiveLevel() < 30: self.collectf(self.logger.debug, *args) - def debugf(self,level=0,*args): + def debugf(self, level=0, *args): """Call logging.debug for multiple args lazily and handle LOG_LEVEL_DEBUG correctly @@ -97,7 +98,7 @@ LOG_LEVEL_DEBUG (NYI). if self.logger.getEffectiveLevel() < 20: self.collectf(self.logger.debug, *args) - def sql(self, sqlcommand, fun = None): + def sql(self, sqlcommand, fun=None): """Log SQL command, optionally invoking a timed fun""" if LOG_SQL: caller = stack()[1][3] @@ -110,11 +111,11 @@ LOG_LEVEL_DEBUG (NYI). self.info(result) return result - def collect(self,fun,*args): + def collect(self, fun, *args): """Collect arguments and use fun to output""" - out = "."+stack()[2][3] + out = "." + stack()[2][3] for a in args: - if len(out)>1: + if len(out) > 1: out += ": " if isinstance(a, str): out = out + a @@ -122,11 +123,11 @@ LOG_LEVEL_DEBUG (NYI). out = out + pf(a, width=160) fun(out) - def collectf(self,fun,*args): + def collectf(self, fun, *args): """Collect arguments and use fun to output one by one""" - out = "."+stack()[2][3] + out = "." + stack()[2][3] for a in args: - if len(out)>1: + if len(out) > 1: out += ": " if isfunction(a): out += a() @@ -139,7 +140,9 @@ LOG_LEVEL_DEBUG (NYI). # Get the module logger. You can override log levels at the # module level -def getLogger(name, level = None): + + +def getLogger(name, level=None): gnlogger = GNLogger(name) logger = gnlogger.logger @@ -148,5 +151,6 @@ def getLogger(name, level = None): else: logger.setLevel(LOG_LEVEL) - logger.info("Log level of "+name+" set to "+logging.getLevelName(logger.getEffectiveLevel())) + logger.info("Log level of " + name + " set to " + \ + logging.getLevelName(logger.getEffectiveLevel())) return gnlogger diff --git a/wqflask/utility/pillow_utils.py b/wqflask/utility/pillow_utils.py index c486abba..5713e155 100644 --- a/wqflask/utility/pillow_utils.py +++ b/wqflask/utility/pillow_utils.py @@ -3,12 +3,14 @@ from PIL import Image, ImageColor, ImageDraw, ImageFont from utility.tools import TEMPDIR import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) BLACK = ImageColor.getrgb("black") WHITE = ImageColor.getrgb("white") # def draw_rotated_text(canvas: Image, text: str, font: ImageFont, xy: tuple, fill: ImageColor=BLACK, angle: int=-90): + + def draw_rotated_text(canvas, text, font, xy, fill=BLACK, angle=-90): # type: (Image, str, ImageFont, tuple, ImageColor, int) """Utility function draw rotated text""" @@ -20,6 +22,8 @@ def draw_rotated_text(canvas, text, font, xy, fill=BLACK, angle=-90): canvas.paste(im=tmp_img2, box=tuple([int(i) for i in xy])) # def draw_open_polygon(canvas: Image, xy: tuple, fill: ImageColor=WHITE, outline: ImageColor=BLACK): + + def draw_open_polygon(canvas, xy, fill=None, outline=BLACK, width=0): # type: (Image, tuple, ImageColor, ImageColor) draw_ctx = ImageDraw.Draw(canvas) diff --git a/wqflask/utility/redis_tools.py b/wqflask/utility/redis_tools.py index 8052035f..96a4be12 100644 --- a/wqflask/utility/redis_tools.py +++ b/wqflask/utility/redis_tools.py @@ -133,8 +133,10 @@ def get_user_groups(user_id): for key in groups_list: try: group_ob = json.loads(groups_list[key]) - group_admins = set([this_admin.encode('utf-8') if this_admin else None for this_admin in group_ob['admins']]) - group_members = set([this_member.encode('utf-8') if this_member else None for this_member in group_ob['members']]) + group_admins = set([this_admin.encode( + 'utf-8') if this_admin else None for this_admin in group_ob['admins']]) + group_members = set([this_member.encode( + 'utf-8') if this_member else None for this_member in group_ob['members']]) if user_id in group_admins: admin_group_ids.append(group_ob['id']) elif user_id in group_members: @@ -203,7 +205,8 @@ def get_groups_like_unique_column(column_name, column_value): if column_value in group_info[column_name]: matched_groups.append(group_info) else: - matched_groups.append(load_json_from_redis(group_list, column_value)) + matched_groups.append( + load_json_from_redis(group_list, column_value)) return matched_groups diff --git a/wqflask/utility/startup_config.py b/wqflask/utility/startup_config.py index f1aaebb6..6ef759e0 100644 --- a/wqflask/utility/startup_config.py +++ b/wqflask/utility/startup_config.py @@ -3,12 +3,13 @@ from wqflask import app from utility.tools import WEBSERVER_MODE, show_settings, get_setting_int, get_setting, get_setting_bool import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) -BLUE = '\033[94m' +BLUE = '\033[94m' GREEN = '\033[92m' -BOLD = '\033[1m' -ENDC = '\033[0m' +BOLD = '\033[1m' +ENDC = '\033[0m' + def app_config(): app.config['SESSION_TYPE'] = 'filesystem' @@ -27,7 +28,8 @@ def app_config(): port = get_setting_int("SERVER_PORT") if get_setting_bool("USE_GN_SERVER"): - print(("GN2 API server URL is ["+BLUE+get_setting("GN_SERVER_URL")+ENDC+"]")) + print( + ("GN2 API server URL is [" + BLUE + get_setting("GN_SERVER_URL") + ENDC + "]")) import requests page = requests.get(get_setting("GN_SERVER_URL")) if page.status_code != 200: @@ -36,4 +38,5 @@ def app_config(): # import utility.elasticsearch_tools as es # es.test_elasticsearch_connection() - print(("GN2 is running. Visit %s[http://localhost:%s/%s](%s)" % (BLUE, str(port), ENDC, get_setting("WEBSERVER_URL")))) + print(("GN2 is running. Visit %s[http://localhost:%s/%s](%s)" % + (BLUE, str(port), ENDC, get_setting("WEBSERVER_URL")))) diff --git a/wqflask/utility/svg.py b/wqflask/utility/svg.py index b92cc2d1..eddb97da 100644 --- a/wqflask/utility/svg.py +++ b/wqflask/utility/svg.py @@ -172,7 +172,7 @@ def _viewboxlist(a): """formats a tuple""" s = '' for e in a: - s += str(e)+' ' + s += str(e) + ' ' return s @@ -189,7 +189,7 @@ class pathdata: def __init__(self, x=None, y=None): self.path = [] if x is not None and y is not None: - self.path.append('M '+str(x)+' '+str(y)) + self.path.append('M ' + str(x) + ' ' + str(y)) def closepath(self): """ends the path""" @@ -197,79 +197,83 @@ class pathdata: def move(self, x, y): """move to absolute""" - self.path.append('M '+str(x)+' '+str(y)) + self.path.append('M ' + str(x) + ' ' + str(y)) def relmove(self, x, y): """move to relative""" - self.path.append('m '+str(x)+' '+str(y)) + self.path.append('m ' + str(x) + ' ' + str(y)) def line(self, x, y): """line to absolute""" - self.path.append('L '+str(x)+' '+str(y)) + self.path.append('L ' + str(x) + ' ' + str(y)) def relline(self, x, y): """line to relative""" - self.path.append('l '+str(x)+' '+str(y)) + self.path.append('l ' + str(x) + ' ' + str(y)) def hline(self, x): """horizontal line to absolute""" - self.path.append('H'+str(x)) + self.path.append('H' + str(x)) def relhline(self, x): """horizontal line to relative""" - self.path.append('h'+str(x)) + self.path.append('h' + str(x)) def vline(self, y): """verical line to absolute""" - self.path.append('V'+str(y)) + self.path.append('V' + str(y)) def relvline(self, y): """vertical line to relative""" - self.path.append('v'+str(y)) + self.path.append('v' + str(y)) def bezier(self, x1, y1, x2, y2, x, y): """bezier with xy1 and xy2 to xy absolut""" - self.path.append('C'+str(x1)+','+str(y1)+' '+str(x2) + - ','+str(y2)+' '+str(x)+','+str(y)) + self.path.append('C' + str(x1) + ',' + str(y1) + ' ' + str(x2) + + ',' + str(y2) + ' ' + str(x) + ',' + str(y)) def relbezier(self, x1, y1, x2, y2, x, y): """bezier with xy1 and xy2 to xy relative""" - self.path.append('c'+str(x1)+','+str(y1)+' '+str(x2) + - ','+str(y2)+' '+str(x)+','+str(y)) + self.path.append('c' + str(x1) + ',' + str(y1) + ' ' + str(x2) + + ',' + str(y2) + ' ' + str(x) + ',' + str(y)) def smbezier(self, x2, y2, x, y): """smooth bezier with xy2 to xy absolut""" - self.path.append('S'+str(x2)+','+str(y2)+' '+str(x)+','+str(y)) + self.path.append('S' + str(x2) + ',' + str(y2) + \ + ' ' + str(x) + ',' + str(y)) def relsmbezier(self, x2, y2, x, y): """smooth bezier with xy2 to xy relative""" - self.path.append('s'+str(x2)+','+str(y2)+' '+str(x)+','+str(y)) + self.path.append('s' + str(x2) + ',' + str(y2) + \ + ' ' + str(x) + ',' + str(y)) def qbezier(self, x1, y1, x, y): """quadratic bezier with xy1 to xy absolut""" - self.path.append('Q'+str(x1)+','+str(y1)+' '+str(x)+','+str(y)) + self.path.append('Q' + str(x1) + ',' + str(y1) + \ + ' ' + str(x) + ',' + str(y)) def relqbezier(self, x1, y1, x, y): """quadratic bezier with xy1 to xy relative""" - self.path.append('q'+str(x1)+','+str(y1)+' '+str(x)+','+str(y)) + self.path.append('q' + str(x1) + ',' + str(y1) + \ + ' ' + str(x) + ',' + str(y)) def smqbezier(self, x, y): """smooth quadratic bezier to xy absolut""" - self.path.append('T'+str(x)+','+str(y)) + self.path.append('T' + str(x) + ',' + str(y)) def relsmqbezier(self, x, y): """smooth quadratic bezier to xy relative""" - self.path.append('t'+str(x)+','+str(y)) + self.path.append('t' + str(x) + ',' + str(y)) def ellarc(self, rx, ry, xrot, laf, sf, x, y): """elliptival arc with rx and ry rotating with xrot using large-arc-flag and sweep-flag to xy absolut""" - self.path.append('A'+str(rx)+','+str(ry)+' '+str(xrot) + - ' '+str(laf)+' '+str(sf)+' '+str(x)+' '+str(y)) + self.path.append('A' + str(rx) + ',' + str(ry) + ' ' + str(xrot) + + ' ' + str(laf) + ' ' + str(sf) + ' ' + str(x) + ' ' + str(y)) def relellarc(self, rx, ry, xrot, laf, sf, x, y): """elliptival arc with rx and ry rotating with xrot using large-arc-flag and sweep-flag to xy relative""" - self.path.append('a'+str(rx)+','+str(ry)+' '+str(xrot) + - ' '+str(laf)+' '+str(sf)+' '+str(x)+' '+str(y)) + self.path.append('a' + str(rx) + ',' + str(ry) + ' ' + str(xrot) + + ' ' + str(laf) + ' ' + str(sf) + ' ' + str(x) + ' ' + str(y)) def __repr__(self): return ' '.join(self.path) @@ -312,36 +316,36 @@ class SVGelement: self.elements.append(SVGelement) def toXml(self, level, f): - f.write('\t'*level) - f.write('<'+self.type) + f.write('\t' * level) + f.write('<' + self.type) for attkey in list(self.attributes.keys()): - f.write(' '+_escape(str(attkey))+'=' + - _quoteattr(str(self.attributes[attkey]))) + f.write(' ' + _escape(str(attkey)) + '=' + + _quoteattr(str(self.attributes[attkey]))) if self.namespace: - f.write(' xmlns="' + _escape(str(self.namespace)) + - '" xmlns:xlink="http://www.w3.org/1999/xlink"') + f.write(' xmlns="' + _escape(str(self.namespace)) + + '" xmlns:xlink="http://www.w3.org/1999/xlink"') if self.elements or self.text or self.cdata: f.write('>') if self.elements: f.write('\n') for element in self.elements: - element.toXml(level+1, f) + element.toXml(level + 1, f) if self.cdata: - f.write('\n'+'\t'*(level+1)+'<![CDATA[') + f.write('\n' + '\t' * (level + 1) + '<![CDATA[') for line in self.cdata.splitlines(): - f.write('\n'+'\t'*(level+2)+line) - f.write('\n'+'\t'*(level+1)+']]>\n') + f.write('\n' + '\t' * (level + 2) + line) + f.write('\n' + '\t' * (level + 1) + ']]>\n') if self.text: if isinstance(self.text, type('')): # If the text is only text f.write(_escape(str(self.text))) else: # If the text is a spannedtext class f.write(str(self.text)) if self.elements: - f.write('\t'*level+'</'+self.type+'>\n') + f.write('\t' * level + '</' + self.type + '>\n') elif self.text: - f.write('</'+self.type+'>\n') + f.write('</' + self.type + '>\n') elif self.cdata: - f.write('\t'*level+'</'+self.type+'>\n') + f.write('\t' * level + '</' + self.type + '>\n') else: f.write('/>\n') @@ -447,38 +451,41 @@ class rect(SVGelement): if width == None or height == None: raise ValueError('both height and width are required') - SVGelement.__init__(self, 'rect', {'width':width,'height':height}, **args) - if x!=None: - self.attributes['x']=x - if y!=None: - self.attributes['y']=y - if fill!=None: - self.attributes['fill']=fill - if stroke!=None: - self.attributes['stroke']=stroke - if stroke_width!=None: - self.attributes['stroke-width']=stroke_width + SVGelement.__init__( + self, 'rect', {'width': width, 'height': height}, **args) + if x != None: + self.attributes['x'] = x + if y != None: + self.attributes['y'] = y + if fill != None: + self.attributes['fill'] = fill + if stroke != None: + self.attributes['stroke'] = stroke + if stroke_width != None: + self.attributes['stroke-width'] = stroke_width + class ellipse(SVGelement): """e=ellipse(rx,ry,x,y,fill,stroke,stroke_width,**args) an ellipse is defined as a center and a x and y radius. """ - def __init__(self,cx=None,cy=None,rx=None,ry=None,fill=None,stroke=None,stroke_width=None,**args): - if rx==None or ry== None: + + def __init__(self, cx=None, cy=None, rx=None, ry=None, fill=None, stroke=None, stroke_width=None, **args): + if rx == None or ry == None: raise ValueError('both rx and ry are required') - SVGelement.__init__(self, 'ellipse', {'rx':rx,'ry':ry}, **args) - if cx!=None: - self.attributes['cx']=cx - if cy!=None: - self.attributes['cy']=cy - if fill!=None: - self.attributes['fill']=fill - if stroke!=None: - self.attributes['stroke']=stroke - if stroke_width!=None: - self.attributes['stroke-width']=stroke_width + SVGelement.__init__(self, 'ellipse', {'rx': rx, 'ry': ry}, **args) + if cx != None: + self.attributes['cx'] = cx + if cy != None: + self.attributes['cy'] = cy + if fill != None: + self.attributes['fill'] = fill + if stroke != None: + self.attributes['stroke'] = stroke + if stroke_width != None: + self.attributes['stroke-width'] = stroke_width class circle(SVGelement): @@ -486,20 +493,22 @@ class circle(SVGelement): The circle creates an element using a x, y and radius values eg """ - def __init__(self,cx=None,cy=None,r=None,fill=None,stroke=None,stroke_width=None,**args): - if r==None: + + def __init__(self, cx=None, cy=None, r=None, fill=None, stroke=None, stroke_width=None, **args): + if r == None: raise ValueError('r is required') - SVGelement.__init__(self, 'circle', {'r':r}, **args) - if cx!=None: - self.attributes['cx']=cx - if cy!=None: - self.attributes['cy']=cy - if fill!=None: - self.attributes['fill']=fill - if stroke!=None: - self.attributes['stroke']=stroke - if stroke_width!=None: - self.attributes['stroke-width']=stroke_width + SVGelement.__init__(self, 'circle', {'r': r}, **args) + if cx != None: + self.attributes['cx'] = cx + if cy != None: + self.attributes['cy'] = cy + if fill != None: + self.attributes['fill'] = fill + if stroke != None: + self.attributes['stroke'] = stroke + if stroke_width != None: + self.attributes['stroke-width'] = stroke_width + class point(circle): """p=point(x,y,color) @@ -507,72 +516,83 @@ class point(circle): A point is defined as a circle with a size 1 radius. It may be more efficient to use a very small rectangle if you use many points because a circle is difficult to render. """ - def __init__(self,x,y,fill='black',**args): + + def __init__(self, x, y, fill='black', **args): circle.__init__(self, x, y, 1, fill, **args) + class line(SVGelement): """l=line(x1,y1,x2,y2,stroke,stroke_width,**args) A line is defined by a begin x,y pair and an end x,y pair """ - def __init__(self,x1=None,y1=None,x2=None,y2=None,stroke=None,stroke_width=None,**args): + + def __init__(self, x1=None, y1=None, x2=None, y2=None, stroke=None, stroke_width=None, **args): SVGelement.__init__(self, 'line', **args) - if x1!=None: - self.attributes['x1']=x1 - if y1!=None: - self.attributes['y1']=y1 - if x2!=None: - self.attributes['x2']=x2 - if y2!=None: - self.attributes['y2']=y2 - if stroke_width!=None: - self.attributes['stroke-width']=stroke_width - if stroke!=None: - self.attributes['stroke']=stroke + if x1 != None: + self.attributes['x1'] = x1 + if y1 != None: + self.attributes['y1'] = y1 + if x2 != None: + self.attributes['x2'] = x2 + if y2 != None: + self.attributes['y2'] = y2 + if stroke_width != None: + self.attributes['stroke-width'] = stroke_width + if stroke != None: + self.attributes['stroke'] = stroke + class polyline(SVGelement): """pl=polyline([[x1,y1],[x2,y2],...],fill,stroke,stroke_width,**args) a polyline is defined by a list of xy pairs """ - def __init__(self,points,fill=None,stroke=None,stroke_width=None,**args): - SVGelement.__init__(self, 'polyline', {'points':_xypointlist(points)}, **args) - if fill!=None: - self.attributes['fill']=fill - if stroke_width!=None: - self.attributes['stroke-width']=stroke_width - if stroke!=None: - self.attributes['stroke']=stroke + + def __init__(self, points, fill=None, stroke=None, stroke_width=None, **args): + SVGelement.__init__(self, 'polyline', { + 'points': _xypointlist(points)}, **args) + if fill != None: + self.attributes['fill'] = fill + if stroke_width != None: + self.attributes['stroke-width'] = stroke_width + if stroke != None: + self.attributes['stroke'] = stroke + class polygon(SVGelement): """pl=polyline([[x1,y1],[x2,y2],...],fill,stroke,stroke_width,**args) a polygon is defined by a list of xy pairs """ - def __init__(self,points,fill=None,stroke=None,stroke_width=None,**args): - SVGelement.__init__(self, 'polygon', {'points':_xypointlist(points)}, **args) - if fill!=None: - self.attributes['fill']=fill - if stroke_width!=None: - self.attributes['stroke-width']=stroke_width - if stroke!=None: - self.attributes['stroke']=stroke + + def __init__(self, points, fill=None, stroke=None, stroke_width=None, **args): + SVGelement.__init__( + self, 'polygon', {'points': _xypointlist(points)}, **args) + if fill != None: + self.attributes['fill'] = fill + if stroke_width != None: + self.attributes['stroke-width'] = stroke_width + if stroke != None: + self.attributes['stroke'] = stroke + class path(SVGelement): """p=path(path,fill,stroke,stroke_width,**args) a path is defined by a path object and optional width, stroke and fillcolor """ - def __init__(self,pathdata,fill=None,stroke=None,stroke_width=None,id=None,**args): - SVGelement.__init__(self, 'path', {'d':str(pathdata)}, **args) - if stroke!=None: - self.attributes['stroke']=stroke - if fill!=None: - self.attributes['fill']=fill - if stroke_width!=None: - self.attributes['stroke-width']=stroke_width - if id!=None: - self.attributes['id']=id + + def __init__(self, pathdata, fill=None, stroke=None, stroke_width=None, id=None, **args): + SVGelement.__init__(self, 'path', {'d': str(pathdata)}, **args) + if stroke != None: + self.attributes['stroke'] = stroke + if fill != None: + self.attributes['fill'] = fill + if stroke_width != None: + self.attributes['stroke-width'] = stroke_width + if id != None: + self.attributes['id'] = id class text(SVGelement): @@ -580,20 +600,21 @@ class text(SVGelement): a text element can bge used for displaying text on the screen """ - def __init__(self,x=None,y=None,text=None,font_size=None,font_family=None,text_anchor=None,**args): + + def __init__(self, x=None, y=None, text=None, font_size=None, font_family=None, text_anchor=None, **args): SVGelement.__init__(self, 'text', **args) - if x!=None: - self.attributes['x']=x - if y!=None: - self.attributes['y']=y - if font_size!=None: - self.attributes['font-size']=font_size - if font_family!=None: - self.attributes['font-family']=font_family - if text!=None: - self.text=text - if text_anchor!=None: - self.attributes['text-anchor']=text_anchor + if x != None: + self.attributes['x'] = x + if y != None: + self.attributes['y'] = y + if font_size != None: + self.attributes['font-size'] = font_size + if font_family != None: + self.attributes['font-family'] = font_family + if text != None: + self.text = text + if text_anchor != None: + self.attributes['text-anchor'] = text_anchor class textpath(SVGelement): @@ -601,10 +622,12 @@ class textpath(SVGelement): a textpath places a text on a path which is referenced by a link. """ - def __init__(self,link,text=None,**args): - SVGelement.__init__(self, 'textPath', {'xlink:href':link}, **args) - if text!=None: - self.text=text + + def __init__(self, link, text=None, **args): + SVGelement.__init__(self, 'textPath', {'xlink:href': link}, **args) + if text != None: + self.text = text + class pattern(SVGelement): """p=pattern(x,y,width,height,patternUnits,**args) @@ -613,18 +636,20 @@ class pattern(SVGelement): graphic object which can be replicated ("tiled") at fixed intervals in x and y to cover the areas to be painted. """ - def __init__(self,x=None,y=None,width=None,height=None,patternUnits=None,**args): + + def __init__(self, x=None, y=None, width=None, height=None, patternUnits=None, **args): SVGelement.__init__(self, 'pattern', **args) - if x!=None: - self.attributes['x']=x - if y!=None: - self.attributes['y']=y - if width!=None: - self.attributes['width']=width - if height!=None: - self.attributes['height']=height - if patternUnits!=None: - self.attributes['patternUnits']=patternUnits + if x != None: + self.attributes['x'] = x + if y != None: + self.attributes['y'] = y + if width != None: + self.attributes['width'] = width + if height != None: + self.attributes['height'] = height + if patternUnits != None: + self.attributes['patternUnits'] = patternUnits + class title(SVGelement): """t=title(text,**args) @@ -632,10 +657,12 @@ class title(SVGelement): a title is a text element. The text is displayed in the title bar add at least one to the root svg element """ - def __init__(self,text=None,**args): + + def __init__(self, text=None, **args): SVGelement.__init__(self, 'title', **args) - if text!=None: - self.text=text + if text != None: + self.text = text + class description(SVGelement): """d=description(text,**args) @@ -643,10 +670,12 @@ class description(SVGelement): a description can be added to any element and is used for a tooltip Add this element before adding other elements. """ - def __init__(self,text=None,**args): + + def __init__(self, text=None, **args): SVGelement.__init__(self, 'desc', **args) - if text!=None: - self.text=text + if text != None: + self.text = text + class lineargradient(SVGelement): """lg=lineargradient(x1,y1,x2,y2,id,**args) @@ -654,18 +683,20 @@ class lineargradient(SVGelement): defines a lineargradient using two xy pairs. stop elements van be added to define the gradient colors. """ - def __init__(self,x1=None,y1=None,x2=None,y2=None,id=None,**args): + + def __init__(self, x1=None, y1=None, x2=None, y2=None, id=None, **args): SVGelement.__init__(self, 'linearGradient', **args) - if x1!=None: - self.attributes['x1']=x1 - if y1!=None: - self.attributes['y1']=y1 - if x2!=None: - self.attributes['x2']=x2 - if y2!=None: - self.attributes['y2']=y2 - if id!=None: - self.attributes['id']=id + if x1 != None: + self.attributes['x1'] = x1 + if y1 != None: + self.attributes['y1'] = y1 + if x2 != None: + self.attributes['x2'] = x2 + if y2 != None: + self.attributes['y2'] = y2 + if id != None: + self.attributes['id'] = id + class radialgradient(SVGelement): """rg=radialgradient(cx,cy,r,fx,fy,id,**args) @@ -673,38 +704,43 @@ class radialgradient(SVGelement): defines a radial gradient using a outer circle which are defined by a cx,cy and r and by using a focalpoint. stop elements van be added to define the gradient colors. """ - def __init__(self,cx=None,cy=None,r=None,fx=None,fy=None,id=None,**args): + + def __init__(self, cx=None, cy=None, r=None, fx=None, fy=None, id=None, **args): SVGelement.__init__(self, 'radialGradient', **args) - if cx!=None: - self.attributes['cx']=cx - if cy!=None: - self.attributes['cy']=cy - if r!=None: - self.attributes['r']=r - if fx!=None: - self.attributes['fx']=fx - if fy!=None: - self.attributes['fy']=fy - if id!=None: - self.attributes['id']=id + if cx != None: + self.attributes['cx'] = cx + if cy != None: + self.attributes['cy'] = cy + if r != None: + self.attributes['r'] = r + if fx != None: + self.attributes['fx'] = fx + if fy != None: + self.attributes['fy'] = fy + if id != None: + self.attributes['id'] = id + class stop(SVGelement): """st=stop(offset,stop_color,**args) Puts a stop color at the specified radius """ - def __init__(self,offset,stop_color=None,**args): - SVGelement.__init__(self, 'stop', {'offset':offset}, **args) - if stop_color!=None: - self.attributes['stop-color']=stop_color + + def __init__(self, offset, stop_color=None, **args): + SVGelement.__init__(self, 'stop', {'offset': offset}, **args) + if stop_color != None: + self.attributes['stop-color'] = stop_color + class style(SVGelement): """st=style(type,cdata=None,**args) Add a CDATA element to this element for defing in line stylesheets etc.. """ - def __init__(self,type,cdata=None,**args): - SVGelement.__init__(self, 'style', {'type':type}, cdata=cdata, **args) + + def __init__(self, type, cdata=None, **args): + SVGelement.__init__(self, 'style', {'type': type}, cdata=cdata, **args) class image(SVGelement): @@ -712,22 +748,26 @@ class image(SVGelement): adds an image to the drawing. Supported formats are .png, .jpg and .svg. """ - def __init__(self,url,x=None,y=None,width=None,height=None,**args): - if width==None or height==None: + + def __init__(self, url, x=None, y=None, width=None, height=None, **args): + if width == None or height == None: raise ValueError('both height and width are required') - SVGelement.__init__(self, 'image', {'xlink:href':url,'width':width,'height':height}, **args) - if x!=None: - self.attributes['x']=x - if y!=None: - self.attributes['y']=y + SVGelement.__init__( + self, 'image', {'xlink:href': url, 'width': width, 'height': height}, **args) + if x != None: + self.attributes['x'] = x + if y != None: + self.attributes['y'] = y + class cursor(SVGelement): """c=cursor(url,**args) defines a custom cursor for a element or a drawing """ - def __init__(self,url,**args): - SVGelement.__init__(self, 'cursor', {'xlink:href':url}, **args) + + def __init__(self, url, **args): + SVGelement.__init__(self, 'cursor', {'xlink:href': url}, **args) class marker(SVGelement): @@ -736,20 +776,22 @@ class marker(SVGelement): defines a marker which can be used as an endpoint for a line or other pathtypes add an element to it which should be used as a marker. """ - def __init__(self,id=None,viewBox=None,refx=None,refy=None,markerWidth=None,markerHeight=None,**args): + + def __init__(self, id=None, viewBox=None, refx=None, refy=None, markerWidth=None, markerHeight=None, **args): SVGelement.__init__(self, 'marker', **args) - if id!=None: - self.attributes['id']=id - if viewBox!=None: - self.attributes['viewBox']=_viewboxlist(viewBox) - if refx!=None: - self.attributes['refX']=refx - if refy!=None: - self.attributes['refY']=refy - if markerWidth!=None: - self.attributes['markerWidth']=markerWidth - if markerHeight!=None: - self.attributes['markerHeight']=markerHeight + if id != None: + self.attributes['id'] = id + if viewBox != None: + self.attributes['viewBox'] = _viewboxlist(viewBox) + if refx != None: + self.attributes['refX'] = refx + if refy != None: + self.attributes['refY'] = refy + if markerWidth != None: + self.attributes['markerWidth'] = markerWidth + if markerHeight != None: + self.attributes['markerHeight'] = markerHeight + class group(SVGelement): """g=group(id,**args) @@ -757,10 +799,12 @@ class group(SVGelement): a group is defined by an id and is used to contain elements g.addElement(SVGelement) """ - def __init__(self,id=None,**args): + + def __init__(self, id=None, **args): SVGelement.__init__(self, 'g', **args) - if id!=None: - self.attributes['id']=id + if id != None: + self.attributes['id'] = id + class symbol(SVGelement): """sy=symbol(id,viewbox,**args) @@ -771,21 +815,24 @@ class symbol(SVGelement): sy.addElement(SVGelement) """ - def __init__(self,id=None,viewBox=None,**args): + def __init__(self, id=None, viewBox=None, **args): SVGelement.__init__(self, 'symbol', **args) - if id!=None: - self.attributes['id']=id - if viewBox!=None: - self.attributes['viewBox']=_viewboxlist(viewBox) + if id != None: + self.attributes['id'] = id + if viewBox != None: + self.attributes['viewBox'] = _viewboxlist(viewBox) + class defs(SVGelement): """d=defs(**args) container for defining elements """ - def __init__(self,**args): + + def __init__(self, **args): SVGelement.__init__(self, 'defs', **args) + class switch(SVGelement): """sw=switch(**args) @@ -793,7 +840,8 @@ class switch(SVGelement): requiredFeatures, requiredExtensions and systemLanguage. Refer to the SVG specification for details. """ - def __init__(self,**args): + + def __init__(self, **args): SVGelement.__init__(self, 'switch', **args) @@ -802,17 +850,18 @@ class use(SVGelement): references a symbol by linking to its id and its position, height and width """ - def __init__(self,link,x=None,y=None,width=None,height=None,**args): - SVGelement.__init__(self, 'use', {'xlink:href':link}, **args) - if x!=None: - self.attributes['x']=x - if y!=None: - self.attributes['y']=y - if width!=None: - self.attributes['width']=width - if height!=None: - self.attributes['height']=height + def __init__(self, link, x=None, y=None, width=None, height=None, **args): + SVGelement.__init__(self, 'use', {'xlink:href': link}, **args) + if x != None: + self.attributes['x'] = x + if y != None: + self.attributes['y'] = y + + if width != None: + self.attributes['width'] = width + if height != None: + self.attributes['height'] = height class link(SVGelement): @@ -821,17 +870,21 @@ class link(SVGelement): a link is defined by a hyperlink. add elements which have to be linked a.addElement(SVGelement) """ - def __init__(self,link='',**args): - SVGelement.__init__(self, 'a', {'xlink:href':link}, **args) + + def __init__(self, link='', **args): + SVGelement.__init__(self, 'a', {'xlink:href': link}, **args) + class view(SVGelement): """v=view(id,**args) a view can be used to create a view with different attributes""" - def __init__(self,id=None,**args): + + def __init__(self, id=None, **args): SVGelement.__init__(self, 'view', **args) - if id!=None: - self.attributes['id']=id + if id != None: + self.attributes['id'] = id + class script(SVGelement): """sc=script(type,type,cdata,**args) @@ -839,78 +892,94 @@ class script(SVGelement): adds a script element which contains CDATA to the SVG drawing """ - def __init__(self,type,cdata=None,**args): - SVGelement.__init__(self, 'script', {'type':type}, cdata=cdata, **args) + + def __init__(self, type, cdata=None, **args): + SVGelement.__init__( + self, 'script', {'type': type}, cdata=cdata, **args) + class animate(SVGelement): """an=animate(attribute,from,to,during,**args) animates an attribute. """ - def __init__(self,attribute,fr=None,to=None,dur=None,**args): - SVGelement.__init__(self, 'animate', {'attributeName':attribute}, **args) - if fr!=None: - self.attributes['from']=fr - if to!=None: - self.attributes['to']=to - if dur!=None: - self.attributes['dur']=dur + + def __init__(self, attribute, fr=None, to=None, dur=None, **args): + SVGelement.__init__( + self, 'animate', {'attributeName': attribute}, **args) + if fr != None: + self.attributes['from'] = fr + if to != None: + self.attributes['to'] = to + if dur != None: + self.attributes['dur'] = dur + class animateMotion(SVGelement): """an=animateMotion(pathdata,dur,**args) animates a SVGelement over the given path in dur seconds """ - def __init__(self,pathdata,dur,**args): + + def __init__(self, pathdata, dur, **args): SVGelement.__init__(self, 'animateMotion', **args) - if pathdata!=None: - self.attributes['path']=str(pathdata) - if dur!=None: - self.attributes['dur']=dur + if pathdata != None: + self.attributes['path'] = str(pathdata) + if dur != None: + self.attributes['dur'] = dur + class animateTransform(SVGelement): """antr=animateTransform(type,from,to,dur,**args) transform an element from and to a value. """ - def __init__(self,type=None,fr=None,to=None,dur=None,**args): - SVGelement.__init__(self, 'animateTransform', {'attributeName':'transform'}, **args) + + def __init__(self, type=None, fr=None, to=None, dur=None, **args): + SVGelement.__init__(self, 'animateTransform', { + 'attributeName': 'transform'}, **args) # As far as I know the attributeName is always transform - if type!=None: - self.attributes['type']=type - if fr!=None: - self.attributes['from']=fr - if to!=None: - self.attributes['to']=to - if dur!=None: - self.attributes['dur']=dur + if type != None: + self.attributes['type'] = type + if fr != None: + self.attributes['from'] = fr + if to != None: + self.attributes['to'] = to + if dur != None: + self.attributes['dur'] = dur + + class animateColor(SVGelement): """ac=animateColor(attribute,type,from,to,dur,**args) Animates the color of a element """ - def __init__(self,attribute,type=None,fr=None,to=None,dur=None,**args): - SVGelement.__init__(self, 'animateColor', {'attributeName':attribute}, **args) - if type!=None: - self.attributes['type']=type - if fr!=None: - self.attributes['from']=fr - if to!=None: - self.attributes['to']=to - if dur!=None: - self.attributes['dur']=dur + + def __init__(self, attribute, type=None, fr=None, to=None, dur=None, **args): + SVGelement.__init__(self, 'animateColor', { + 'attributeName': attribute}, **args) + if type != None: + self.attributes['type'] = type + if fr != None: + self.attributes['from'] = fr + if to != None: + self.attributes['to'] = to + if dur != None: + self.attributes['dur'] = dur + + class set(SVGelement): """st=set(attribute,to,during,**args) sets an attribute to a value for a """ - def __init__(self,attribute,to=None,dur=None,**args): - SVGelement.__init__(self, 'set', {'attributeName':attribute}, **args) - if to!=None: - self.attributes['to']=to - if dur!=None: - self.attributes['dur']=dur + def __init__(self, attribute, to=None, dur=None, **args): + SVGelement.__init__(self, 'set', {'attributeName': attribute}, **args) + if to != None: + self.attributes['to'] = to + if dur != None: + self.attributes['dur'] = dur class svg(SVGelement): @@ -928,15 +997,17 @@ class svg(SVGelement): d.setSVG(s) d.toXml() """ - def __init__(self,viewBox=None, width=None, height=None,**args): + + def __init__(self, viewBox=None, width=None, height=None, **args): SVGelement.__init__(self, 'svg', **args) - if viewBox!=None: - self.attributes['viewBox']=_viewboxlist(viewBox) - if width!=None: - self.attributes['width']=width - if height!=None: - self.attributes['height']=height - self.namespace="http://www.w3.org/2000/svg" + if viewBox != None: + self.attributes['viewBox'] = _viewboxlist(viewBox) + if width != None: + self.attributes['width'] = width + if height != None: + self.attributes['height'] = height + self.namespace = "http://www.w3.org/2000/svg" + class drawing: """d=drawing() @@ -950,29 +1021,32 @@ class drawing: """ def __init__(self, entity={}): - self.svg=None + self.svg = None self.entity = entity + def setSVG(self, svg): - self.svg=svg + self.svg = svg # Voeg een element toe aan de grafiek toe. - if use_dom_implementation==0: - def toXml(self, filename='',compress=False): + if use_dom_implementation == 0: + def toXml(self, filename='', compress=False): import io - xml=io.StringIO() + xml = io.StringIO() xml.write("<?xml version='1.0' encoding='UTF-8'?>\n") - xml.write("<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.0//EN\" \"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd\"") + xml.write( + "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.0//EN\" \"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd\"") if self.entity: xml.write(" [\n") for item in list(self.entity.keys()): - xml.write("<!ENTITY %s \"%s\">\n" % (item, self.entity[item])) + xml.write("<!ENTITY %s \"%s\">\n" % + (item, self.entity[item])) xml.write("]") xml.write(">\n") self.svg.toXml(0, xml) if not filename: if compress: import gzip - f=io.StringIO() - zf=gzip.GzipFile(fileobj=f, mode='wb') + f = io.StringIO() + zf = gzip.GzipFile(fileobj=f, mode='wb') zf.write(xml.getvalue()) zf.close() f.seek(0) @@ -980,57 +1054,62 @@ class drawing: else: return xml.getvalue() else: - if filename[-4:]=='svgz': + if filename[-4:] == 'svgz': import gzip - f=gzip.GzipFile(filename=filename, mode="wb", compresslevel=9) + f = gzip.GzipFile(filename=filename, + mode="wb", compresslevel=9) f.write(xml.getvalue()) f.close() else: - f=file(filename, 'w') + f = file(filename, 'w') f.write(xml.getvalue()) f.close() else: - def toXml(self,filename='',compress=False): + def toXml(self, filename='', compress=False): """drawing.toXml() ---->to the screen drawing.toXml(filename)---->to the file writes a svg drawing to the screen or to a file compresses if filename ends with svgz or if compress is true """ - doctype = implementation.createDocumentType('svg', "-//W3C//DTD SVG 1.0//EN""", 'http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd ') + doctype = implementation.createDocumentType( + 'svg', "-//W3C//DTD SVG 1.0//EN""", 'http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd ') global root # root is defined global so it can be used by the appender. Its also possible to use it as an arugument but # that is a bit messy. - root=implementation.createDocument(None, None, doctype) + root = implementation.createDocument(None, None, doctype) # Create the xml document. global appender + def appender(element, elementroot): """This recursive function appends elements to an element and sets the attributes and type. It stops when alle elements have been appended""" if element.namespace: - e=root.createElementNS(element.namespace, element.type) + e = root.createElementNS(element.namespace, element.type) else: - e=root.createElement(element.type) + e = root.createElement(element.type) if element.text: - textnode=root.createTextNode(element.text) + textnode = root.createTextNode(element.text) e.appendChild(textnode) - for attribute in list(element.attributes.keys()): #in element.attributes is supported from python 2.2 - e.setAttribute(attribute, str(element.attributes[attribute])) + # in element.attributes is supported from python 2.2 + for attribute in list(element.attributes.keys()): + e.setAttribute(attribute, str( + element.attributes[attribute])) if element.elements: for el in element.elements: - e=appender(el, e) + e = appender(el, e) elementroot.appendChild(e) return elementroot - root=appender(self.svg, root) + root = appender(self.svg, root) if not filename: import io - xml=io.StringIO() + xml = io.StringIO() PrettyPrint(root, xml) if compress: import gzip - f=io.StringIO() - zf=gzip.GzipFile(fileobj=f, mode='wb') + f = io.StringIO() + zf = gzip.GzipFile(fileobj=f, mode='wb') zf.write(xml.getvalue()) zf.close() f.seek(0) @@ -1039,63 +1118,67 @@ class drawing: return xml.getvalue() else: try: - if filename[-4:]=='svgz': + if filename[-4:] == 'svgz': import gzip import io - xml=io.StringIO() + xml = io.StringIO() PrettyPrint(root, xml) - f=gzip.GzipFile(filename=filename, mode='wb', compresslevel=9) + f = gzip.GzipFile(filename=filename, + mode='wb', compresslevel=9) f.write(xml.getvalue()) f.close() else: - f=open(filename, 'w') + f = open(filename, 'w') PrettyPrint(root, f) f.close() except: print(("Cannot write SVG file: " + filename)) + def validate(self): try: import xml.parsers.xmlproc.xmlval except: - raise exceptions.ImportError('PyXml is required for validating SVG') - svg=self.toXml() - xv=xml.parsers.xmlproc.xmlval.XMLValidator() + raise exceptions.ImportError( + 'PyXml is required for validating SVG') + svg = self.toXml() + xv = xml.parsers.xmlproc.xmlval.XMLValidator() try: xv.feed(svg) except: raise Exception("SVG is not well formed, see messages above") else: print("SVG well formed") -if __name__=='__main__': - d=drawing() - s=svg((0, 0, 100, 100)) - r=rect(-100, -100, 300, 300, 'cyan') +if __name__ == '__main__': + + d = drawing() + s = svg((0, 0, 100, 100)) + r = rect(-100, -100, 300, 300, 'cyan') s.addElement(r) - t=title('SVGdraw Demo') + t = title('SVGdraw Demo') s.addElement(t) - g=group('animations') - e=ellipse(0, 0, 5, 2) + g = group('animations') + e = ellipse(0, 0, 5, 2) g.addElement(e) - c=circle(0, 0, 1, 'red') + c = circle(0, 0, 1, 'red') g.addElement(c) - pd=pathdata(0, -10) + pd = pathdata(0, -10) for i in range(6): pd.relsmbezier(10, 5, 0, 10) pd.relsmbezier(-10, 5, 0, 10) - an=animateMotion(pd, 10) - an.attributes['rotate']='auto-reverse' - an.attributes['repeatCount']="indefinite" + an = animateMotion(pd, 10) + an.attributes['rotate'] = 'auto-reverse' + an.attributes['repeatCount'] = "indefinite" g.addElement(an) s.addElement(g) for i in range(20, 120, 20): - u=use('#animations', i, 0) + u = use('#animations', i, 0) s.addElement(u) for i in range(0, 120, 20): for j in range(5, 105, 10): - c=circle(i, j, 1, 'red', 'black', .5) + c = circle(i, j, 1, 'red', 'black', .5) s.addElement(c) d.setSVG(s) diff --git a/wqflask/utility/temp_data.py b/wqflask/utility/temp_data.py index b2cbd458..07c5a318 100644 --- a/wqflask/utility/temp_data.py +++ b/wqflask/utility/temp_data.py @@ -2,6 +2,7 @@ from redis import Redis import simplejson as json + class TempData: def __init__(self, temp_uuid): @@ -11,7 +12,7 @@ class TempData: def store(self, field, value): self.redis.hset(self.key, field, value) - self.redis.expire(self.key, 60*15) # Expire in 15 minutes + self.redis.expire(self.key, 60 * 15) # Expire in 15 minutes def get_all(self): return self.redis.hgetall(self.key) diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py index 65df59c3..e28abb48 100644 --- a/wqflask/utility/tools.py +++ b/wqflask/utility/tools.py @@ -9,16 +9,18 @@ from wqflask import app # Use the standard logger here to avoid a circular dependency import logging -logger = logging.getLogger(__name__ ) +logger = logging.getLogger(__name__) OVERRIDES = {} + def app_set(command_id, value): """Set application wide value""" app.config.setdefault(command_id, value) return value -def get_setting(command_id,guess=None): + +def get_setting(command_id, guess=None): """Resolve a setting from the environment or the global settings in app.config, with valid_path is a function checking whether the path points to an expected directory and returns the full path to @@ -54,7 +56,7 @@ def get_setting(command_id,guess=None): # print("Looking for "+command_id+"\n") command = value(os.environ.get(command_id)) if command is None or command == "": - command = OVERRIDES.get(command_id) # currently not in use + command = OVERRIDES.get(command_id) # currently not in use if command is None: # ---- Check whether setting exists in app command = value(app.config.get(command_id)) @@ -62,16 +64,19 @@ def get_setting(command_id,guess=None): command = value(guess) if command is None or command == "": # print command - raise Exception(command_id+' setting unknown or faulty (update default_settings.py?).') + raise Exception( + command_id + ' setting unknown or faulty (update default_settings.py?).') # print("Set "+command_id+"="+str(command)) return command + def get_setting_bool(id): v = get_setting(id) if v not in [0, False, 'False', 'FALSE', None]: - return True + return True return False + def get_setting_int(id): v = get_setting(id) if isinstance(v, str): @@ -80,69 +85,83 @@ def get_setting_int(id): return 0 return v + def valid_bin(bin): if os.path.islink(bin) or valid_file(bin): return bin return None + def valid_file(fn): if os.path.isfile(fn): return fn return None + def valid_path(dir): if os.path.isdir(dir): return dir return None + def js_path(module=None): """ Find the JS module in the two paths """ - try_gn = get_setting("JS_GN_PATH")+"/"+module + try_gn = get_setting("JS_GN_PATH") + "/" + module if valid_path(try_gn): return try_gn - try_guix = get_setting("JS_GUIX_PATH")+"/"+module + try_guix = get_setting("JS_GUIX_PATH") + "/" + module if valid_path(try_guix): return try_guix - raise "No JS path found for "+module+" (if not in Guix check JS_GN_PATH)" + raise "No JS path found for " + module + \ + " (if not in Guix check JS_GN_PATH)" + def reaper_command(guess=None): return get_setting("REAPER_COMMAND", guess) + def gemma_command(guess=None): return assert_bin(get_setting("GEMMA_COMMAND", guess)) + def gemma_wrapper_command(guess=None): return assert_bin(get_setting("GEMMA_WRAPPER_COMMAND", guess)) + def plink_command(guess=None): return assert_bin(get_setting("PLINK_COMMAND", guess)) + def flat_file_exists(subdir): base = get_setting("GENENETWORK_FILES") - return valid_path(base+"/"+subdir) + return valid_path(base + "/" + subdir) + def flat_files(subdir=None): base = get_setting("GENENETWORK_FILES") if subdir: - return assert_dir(base+"/"+subdir) + return assert_dir(base + "/" + subdir) return assert_dir(base) + def assert_bin(fn): if not valid_bin(fn): - raise Exception("ERROR: can not find binary "+fn) + raise Exception("ERROR: can not find binary " + fn) return fn + def assert_dir(dir): if not valid_path(dir): - raise Exception("ERROR: can not find directory "+dir) + raise Exception("ERROR: can not find directory " + dir) return dir + def assert_writable_dir(dir): try: fn = dir + "/test.txt" - fh = open( fn, 'w' ) + fh = open(fn, 'w') fh.write("I am writing this text to the file\n") fh.close() os.remove(fn) @@ -150,16 +169,19 @@ def assert_writable_dir(dir): raise Exception('Unable to write test.txt to directory ' + dir) return dir + def assert_file(fn): if not valid_file(fn): - raise Exception('Unable to find file '+fn) + raise Exception('Unable to find file ' + fn) return fn + def mk_dir(dir): if not valid_path(dir): os.makedirs(dir) return assert_dir(dir) + def locate(name, subdir=None): """ Locate a static flat file in the GENENETWORK_FILES environment. @@ -168,19 +190,22 @@ def locate(name, subdir=None): """ base = get_setting("GENENETWORK_FILES") if subdir: - base = base+"/"+subdir + base = base + "/" + subdir if valid_path(base): lookfor = base + "/" + name if valid_file(lookfor): - logger.info("Found: file "+lookfor+"\n") + logger.info("Found: file " + lookfor + "\n") return lookfor else: - raise Exception("Can not locate "+lookfor) - if subdir: sys.stderr.write(subdir) - raise Exception("Can not locate "+name+" in "+base) + raise Exception("Can not locate " + lookfor) + if subdir: + sys.stderr.write(subdir) + raise Exception("Can not locate " + name + " in " + base) + def locate_phewas(name, subdir=None): - return locate(name, '/phewas/'+subdir) + return locate(name, '/phewas/' + subdir) + def locate_ignore_error(name, subdir=None): """ @@ -191,35 +216,38 @@ def locate_ignore_error(name, subdir=None): """ base = get_setting("GENENETWORK_FILES") if subdir: - base = base+"/"+subdir + base = base + "/" + subdir if valid_path(base): lookfor = base + "/" + name if valid_file(lookfor): - logger.debug("Found: file "+name+"\n") + logger.debug("Found: file " + name + "\n") return lookfor - logger.info("WARNING: file "+name+" not found\n") + logger.info("WARNING: file " + name + " not found\n") return None + def tempdir(): """ Get UNIX TMPDIR by default """ return valid_path(get_setting("TMPDIR", "/tmp")) -BLUE = '\033[94m' + +BLUE = '\033[94m' GREEN = '\033[92m' -BOLD = '\033[1m' -ENDC = '\033[0m' +BOLD = '\033[1m' +ENDC = '\033[0m' + def show_settings(): from utility.tools import LOG_LEVEL - print(("Set global log level to "+BLUE+LOG_LEVEL+ENDC)) + print(("Set global log level to " + BLUE + LOG_LEVEL + ENDC)) log_level = getattr(logging, LOG_LEVEL.upper()) logging.basicConfig(level=log_level) logger.info(OVERRIDES) - logger.info(BLUE+"Mr. Mojo Risin 2"+ENDC) + logger.info(BLUE + "Mr. Mojo Risin 2" + ENDC) keylist = list(app.config.keys()) print("runserver.py: ****** Webserver configuration - k,v pairs from app.config ******") keylist.sort() @@ -231,35 +259,35 @@ def show_settings(): # Cached values -GN_VERSION = get_setting('GN_VERSION') -HOME = get_setting('HOME') -SERVER_PORT = get_setting('SERVER_PORT') -WEBSERVER_MODE = get_setting('WEBSERVER_MODE') -GN2_BASE_URL = get_setting('GN2_BASE_URL') -GN2_BRANCH_URL = get_setting('GN2_BRANCH_URL') -GN_SERVER_URL = get_setting('GN_SERVER_URL') -SERVER_PORT = get_setting_int('SERVER_PORT') -SQL_URI = get_setting('SQL_URI') -LOG_LEVEL = get_setting('LOG_LEVEL') -LOG_LEVEL_DEBUG = get_setting_int('LOG_LEVEL_DEBUG') -LOG_SQL = get_setting_bool('LOG_SQL') -LOG_SQL_ALCHEMY = get_setting_bool('LOG_SQL_ALCHEMY') -LOG_BENCH = get_setting_bool('LOG_BENCH') -LOG_FORMAT = "%(message)s" # not yet in use -USE_REDIS = get_setting_bool('USE_REDIS') -USE_GN_SERVER = get_setting_bool('USE_GN_SERVER') - -GENENETWORK_FILES = get_setting('GENENETWORK_FILES') -JS_GUIX_PATH = get_setting('JS_GUIX_PATH') +GN_VERSION = get_setting('GN_VERSION') +HOME = get_setting('HOME') +SERVER_PORT = get_setting('SERVER_PORT') +WEBSERVER_MODE = get_setting('WEBSERVER_MODE') +GN2_BASE_URL = get_setting('GN2_BASE_URL') +GN2_BRANCH_URL = get_setting('GN2_BRANCH_URL') +GN_SERVER_URL = get_setting('GN_SERVER_URL') +SERVER_PORT = get_setting_int('SERVER_PORT') +SQL_URI = get_setting('SQL_URI') +LOG_LEVEL = get_setting('LOG_LEVEL') +LOG_LEVEL_DEBUG = get_setting_int('LOG_LEVEL_DEBUG') +LOG_SQL = get_setting_bool('LOG_SQL') +LOG_SQL_ALCHEMY = get_setting_bool('LOG_SQL_ALCHEMY') +LOG_BENCH = get_setting_bool('LOG_BENCH') +LOG_FORMAT = "%(message)s" # not yet in use +USE_REDIS = get_setting_bool('USE_REDIS') +USE_GN_SERVER = get_setting_bool('USE_GN_SERVER') + +GENENETWORK_FILES = get_setting('GENENETWORK_FILES') +JS_GUIX_PATH = get_setting('JS_GUIX_PATH') assert_dir(JS_GUIX_PATH) -JS_GN_PATH = get_setting('JS_GN_PATH') +JS_GN_PATH = get_setting('JS_GN_PATH') # assert_dir(JS_GN_PATH) GITHUB_CLIENT_ID = get_setting('GITHUB_CLIENT_ID') GITHUB_CLIENT_SECRET = get_setting('GITHUB_CLIENT_SECRET') if GITHUB_CLIENT_ID != 'UNKNOWN' and GITHUB_CLIENT_SECRET: GITHUB_AUTH_URL = "https://github.com/login/oauth/authorize?client_id=" + \ - GITHUB_CLIENT_ID+"&client_secret="+GITHUB_CLIENT_SECRET + GITHUB_CLIENT_ID + "&client_secret=" + GITHUB_CLIENT_SECRET GITHUB_API_URL = get_setting('GITHUB_API_URL') ORCID_CLIENT_ID = get_setting('ORCID_CLIENT_ID') @@ -267,7 +295,8 @@ ORCID_CLIENT_SECRET = get_setting('ORCID_CLIENT_SECRET') ORCID_AUTH_URL = None if ORCID_CLIENT_ID != 'UNKNOWN' and ORCID_CLIENT_SECRET: ORCID_AUTH_URL = "https://orcid.org/oauth/authorize?response_type=code&scope=/authenticate&show_login=true&client_id=" + \ - ORCID_CLIENT_ID+"&client_secret="+ORCID_CLIENT_SECRET + "&redirect_uri=" + GN2_BRANCH_URL + "n/login/orcid_oauth2" + ORCID_CLIENT_ID + "&client_secret=" + ORCID_CLIENT_SECRET + \ + "&redirect_uri=" + GN2_BRANCH_URL + "n/login/orcid_oauth2" ORCID_TOKEN_URL = get_setting('ORCID_TOKEN_URL') ELASTICSEARCH_HOST = get_setting('ELASTICSEARCH_HOST') @@ -279,28 +308,29 @@ SMTP_CONNECT = get_setting('SMTP_CONNECT') SMTP_USERNAME = get_setting('SMTP_USERNAME') SMTP_PASSWORD = get_setting('SMTP_PASSWORD') -REAPER_COMMAND = app_set("REAPER_COMMAND", reaper_command()) -GEMMA_COMMAND = app_set("GEMMA_COMMAND", gemma_command()) +REAPER_COMMAND = app_set("REAPER_COMMAND", reaper_command()) +GEMMA_COMMAND = app_set("GEMMA_COMMAND", gemma_command()) assert(GEMMA_COMMAND is not None) -PLINK_COMMAND = app_set("PLINK_COMMAND", plink_command()) +PLINK_COMMAND = app_set("PLINK_COMMAND", plink_command()) GEMMA_WRAPPER_COMMAND = gemma_wrapper_command() -TEMPDIR = tempdir() # defaults to UNIX TMPDIR +TEMPDIR = tempdir() # defaults to UNIX TMPDIR assert_dir(TEMPDIR) # ---- Handle specific JS modules JS_GUIX_PATH = get_setting("JS_GUIX_PATH") assert_dir(JS_GUIX_PATH) -assert_dir(JS_GUIX_PATH+'/cytoscape-panzoom') +assert_dir(JS_GUIX_PATH + '/cytoscape-panzoom') CSS_PATH = JS_GUIX_PATH # The CSS is bundled together with the JS # assert_dir(JS_PATH) -JS_TWITTER_POST_FETCHER_PATH = get_setting("JS_TWITTER_POST_FETCHER_PATH", js_path("javascript-twitter-post-fetcher")) +JS_TWITTER_POST_FETCHER_PATH = get_setting( + "JS_TWITTER_POST_FETCHER_PATH", js_path("javascript-twitter-post-fetcher")) assert_dir(JS_TWITTER_POST_FETCHER_PATH) -assert_file(JS_TWITTER_POST_FETCHER_PATH+"/js/twitterFetcher_min.js") +assert_file(JS_TWITTER_POST_FETCHER_PATH + "/js/twitterFetcher_min.js") JS_CYTOSCAPE_PATH = get_setting("JS_CYTOSCAPE_PATH", js_path("cytoscape")) assert_dir(JS_CYTOSCAPE_PATH) -assert_file(JS_CYTOSCAPE_PATH+'/cytoscape.min.js') +assert_file(JS_CYTOSCAPE_PATH + '/cytoscape.min.js') # assert_file(PHEWAS_FILES+"/auwerx/PheWAS_pval_EMMA_norm.RData") diff --git a/wqflask/utility/type_checking.py b/wqflask/utility/type_checking.py index 6b029317..00f14ba9 100644 --- a/wqflask/utility/type_checking.py +++ b/wqflask/utility/type_checking.py @@ -7,6 +7,7 @@ def is_float(value): except: return False + def is_int(value): try: int(value) @@ -14,6 +15,7 @@ def is_int(value): except: return False + def is_str(value): if value is None: return False @@ -23,19 +25,22 @@ def is_str(value): except: return False -def get_float(vars_obj,name,default=None): + +def get_float(vars_obj, name, default=None): if name in vars_obj: if is_float(vars_obj[name]): return float(vars_obj[name]) return default -def get_int(vars_obj,name,default=None): + +def get_int(vars_obj, name, default=None): if name in vars_obj: if is_int(vars_obj[name]): return float(vars_obj[name]) return default -def get_string(vars_obj,name,default=None): + +def get_string(vars_obj, name, default=None): if name in vars_obj: if not vars_obj[name] is None: return str(vars_obj[name]) diff --git a/wqflask/utility/webqtlUtil.py b/wqflask/utility/webqtlUtil.py index 5681fadf..0cb71567 100644 --- a/wqflask/utility/webqtlUtil.py +++ b/wqflask/utility/webqtlUtil.py @@ -33,44 +33,46 @@ from math import * from base import webqtlConfig # NL, 07/27/2010. moved from webqtlForm.py -#Dict of Parents and F1 information, In the order of [F1, Mat, Pat] -ParInfo ={ -'BXH':['BHF1', 'HBF1', 'C57BL/6J', 'C3H/HeJ'], -'AKXD':['AKF1', 'KAF1', 'AKR/J', 'DBA/2J'], -'BXD':['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], -'C57BL-6JxC57BL-6NJF2':['', '', 'C57BL/6J', 'C57BL/6NJ'], -'BXD300':['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], -'B6BTBRF2':['B6BTBRF1', 'BTBRB6F1', 'C57BL/6J', 'BTBRT<+>tf/J'], -'BHHBF2':['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'], -'BHF2':['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'], -'B6D2F2':['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], -'BDF2-1999':['B6D2F2', 'D2B6F2', 'C57BL/6J', 'DBA/2J'], -'BDF2-2005':['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], -'CTB6F2':['CTB6F2', 'B6CTF2', 'C57BL/6J', 'Castaneous'], -'CXB':['CBF1', 'BCF1', 'C57BL/6ByJ', 'BALB/cByJ'], -'AXBXA':['ABF1', 'BAF1', 'C57BL/6J', 'A/J'], -'AXB':['ABF1', 'BAF1', 'C57BL/6J', 'A/J'], -'BXA':['BAF1', 'ABF1', 'C57BL/6J', 'A/J'], -'LXS':['LSF1', 'SLF1', 'ISS', 'ILS'], -'HXBBXH':['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv'], -'BayXSha':['BayXShaF1', 'ShaXBayF1', 'Bay-0', 'Shahdara'], -'ColXBur':['ColXBurF1', 'BurXColF1', 'Col-0', 'Bur-0'], -'ColXCvi':['ColXCviF1', 'CviXColF1', 'Col-0', 'Cvi'], -'SXM':['SMF1', 'MSF1', 'Steptoe', 'Morex'], -'HRDP':['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv'] +# Dict of Parents and F1 information, In the order of [F1, Mat, Pat] +ParInfo = { + 'BXH': ['BHF1', 'HBF1', 'C57BL/6J', 'C3H/HeJ'], + 'AKXD': ['AKF1', 'KAF1', 'AKR/J', 'DBA/2J'], + 'BXD': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], + 'C57BL-6JxC57BL-6NJF2': ['', '', 'C57BL/6J', 'C57BL/6NJ'], + 'BXD300': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], + 'B6BTBRF2': ['B6BTBRF1', 'BTBRB6F1', 'C57BL/6J', 'BTBRT<+>tf/J'], + 'BHHBF2': ['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'], + 'BHF2': ['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'], + 'B6D2F2': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], + 'BDF2-1999': ['B6D2F2', 'D2B6F2', 'C57BL/6J', 'DBA/2J'], + 'BDF2-2005': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], + 'CTB6F2': ['CTB6F2', 'B6CTF2', 'C57BL/6J', 'Castaneous'], + 'CXB': ['CBF1', 'BCF1', 'C57BL/6ByJ', 'BALB/cByJ'], + 'AXBXA': ['ABF1', 'BAF1', 'C57BL/6J', 'A/J'], + 'AXB': ['ABF1', 'BAF1', 'C57BL/6J', 'A/J'], + 'BXA': ['BAF1', 'ABF1', 'C57BL/6J', 'A/J'], + 'LXS': ['LSF1', 'SLF1', 'ISS', 'ILS'], + 'HXBBXH': ['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv'], + 'BayXSha': ['BayXShaF1', 'ShaXBayF1', 'Bay-0', 'Shahdara'], + 'ColXBur': ['ColXBurF1', 'BurXColF1', 'Col-0', 'Bur-0'], + 'ColXCvi': ['ColXCviF1', 'CviXColF1', 'Col-0', 'Cvi'], + 'SXM': ['SMF1', 'MSF1', 'Steptoe', 'Morex'], + 'HRDP': ['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv'] } ######################################### # Accessory Functions ######################################### -def genRandStr(prefix = "", length=8, chars=string.ascii_letters+string.digits): + +def genRandStr(prefix="", length=8, chars=string.ascii_letters + string.digits): from random import choice _str = prefix[:] for i in range(length): _str += choice(chars) return _str + def ListNotNull(lst): '''Obsolete - Use built in function any (or all or whatever) @@ -83,14 +85,16 @@ def ListNotNull(lst): return 1 return None -def readLineCSV(line): ### dcrowell July 2008 + +def readLineCSV(line): # dcrowell July 2008 """Parses a CSV string of text and returns a list containing each element as a string. Used by correlationPage""" returnList = line.split('","') - returnList[-1]=returnList[-1][:-2] - returnList[0]=returnList[0][1:] + returnList[-1] = returnList[-1][:-2] + returnList[0] = returnList[0][1:] return returnList + def cmpEigenValue(A, B): try: if A[0] > B[0]: @@ -102,12 +106,13 @@ def cmpEigenValue(A, B): except: return 0 + def hasAccessToConfidentialPhenotypeTrait(privilege, userName, authorized_users): access_to_confidential_phenotype_trait = 0 if webqtlConfig.USERDICT[privilege] > webqtlConfig.USERDICT['user']: access_to_confidential_phenotype_trait = 1 else: - AuthorizedUsersList=[x.strip() for x in authorized_users.split(',')] + AuthorizedUsersList = [x.strip() for x in authorized_users.split(',')] if userName in AuthorizedUsersList: access_to_confidential_phenotype_trait = 1 return access_to_confidential_phenotype_trait diff --git a/wqflask/wqflask/__init__.py b/wqflask/wqflask/__init__.py index 0564cfa7..a2bf3085 100644 --- a/wqflask/wqflask/__init__.py +++ b/wqflask/wqflask/__init__.py @@ -7,11 +7,11 @@ from flask import g from flask import Flask from utility import formatting from wqflask.markdown_routes import glossary_blueprint -from wqflask.markdown_routes import references_blueprint -from wqflask.markdown_routes import links_blueprint +from wqflask.markdown_routes import references_blueprint +from wqflask.markdown_routes import links_blueprint from wqflask.markdown_routes import policies_blueprint -from wqflask.markdown_routes import environments_blueprint -from wqflask.markdown_routes import facilities_blueprint +from wqflask.markdown_routes import environments_blueprint +from wqflask.markdown_routes import facilities_blueprint app = Flask(__name__) @@ -30,6 +30,7 @@ app.register_blueprint(policies_blueprint, url_prefix="/policies") app.register_blueprint(environments_blueprint, url_prefix="/environments") app.register_blueprint(facilities_blueprint, url_prefix="/facilities") + @app.before_request def before_request(): g.request_start_time = time.time() @@ -49,4 +50,4 @@ from wqflask import db_info from wqflask import user_login from wqflask import user_session -import wqflask.views +import wqflask.views diff --git a/wqflask/wqflask/api/correlation.py b/wqflask/wqflask/api/correlation.py index f5b50dcd..870f3275 100644 --- a/wqflask/wqflask/api/correlation.py +++ b/wqflask/wqflask/api/correlation.py @@ -16,21 +16,25 @@ from utility import webqtlUtil, helper_functions, corr_result_helpers from utility.benchmark import Bench import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + def do_correlation(start_vars): assert('db' in start_vars) assert('target_db' in start_vars) assert('trait_id' in start_vars) - this_dataset = data_set.create_dataset(dataset_name = start_vars['db']) - target_dataset = data_set.create_dataset(dataset_name = start_vars['target_db']) - this_trait = create_trait(dataset = this_dataset, name = start_vars['trait_id']) + this_dataset = data_set.create_dataset(dataset_name=start_vars['db']) + target_dataset = data_set.create_dataset( + dataset_name=start_vars['target_db']) + this_trait = create_trait(dataset=this_dataset, + name=start_vars['trait_id']) this_trait = retrieve_sample_data(this_trait, this_dataset) corr_params = init_corr_params(start_vars) - corr_results = calculate_results(this_trait, this_dataset, target_dataset, corr_params) + corr_results = calculate_results( + this_trait, this_dataset, target_dataset, corr_params) #corr_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0]))) final_results = [] @@ -38,26 +42,26 @@ def do_correlation(start_vars): if corr_params['type'] == "tissue": [sample_r, num_overlap, sample_p, symbol] = corr_results[trait] result_dict = { - "trait" : trait, - "sample_r" : sample_r, - "#_strains" : num_overlap, - "p_value" : sample_p, - "symbol" : symbol + "trait": trait, + "sample_r": sample_r, + "#_strains": num_overlap, + "p_value": sample_p, + "symbol": symbol } elif corr_params['type'] == "literature" or corr_params['type'] == "lit": [gene_id, sample_r] = corr_results[trait] result_dict = { - "trait" : trait, - "sample_r" : sample_r, - "gene_id" : gene_id + "trait": trait, + "sample_r": sample_r, + "gene_id": gene_id } else: [sample_r, sample_p, num_overlap] = corr_results[trait] result_dict = { - "trait" : trait, - "sample_r" : sample_r, - "#_strains" : num_overlap, - "p_value" : sample_p + "trait": trait, + "sample_r": sample_r, + "#_strains": num_overlap, + "p_value": sample_p } final_results.append(result_dict) @@ -66,6 +70,7 @@ def do_correlation(start_vars): return final_results + def calculate_results(this_trait, this_dataset, target_dataset, corr_params): corr_results = {} @@ -73,52 +78,66 @@ def calculate_results(this_trait, this_dataset, target_dataset, corr_params): if corr_params['type'] == "tissue": trait_symbol_dict = this_dataset.retrieve_genes("Symbol") - corr_results = do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params) + corr_results = do_tissue_correlation_for_all_traits( + this_trait, trait_symbol_dict, corr_params) sorted_results = collections.OrderedDict(sorted(list(corr_results.items()), key=lambda t: -abs(t[1][1]))) - elif corr_params['type'] == "literature" or corr_params['type'] == "lit": #ZS: Just so a user can use either "lit" or "literature" + # ZS: Just so a user can use either "lit" or "literature" + elif corr_params['type'] == "literature" or corr_params['type'] == "lit": trait_geneid_dict = this_dataset.retrieve_genes("GeneId") - corr_results = do_literature_correlation_for_all_traits(this_trait, this_dataset, trait_geneid_dict, corr_params) + corr_results = do_literature_correlation_for_all_traits( + this_trait, this_dataset, trait_geneid_dict, corr_params) sorted_results = collections.OrderedDict(sorted(list(corr_results.items()), - key=lambda t: -abs(t[1][1]))) + key=lambda t: -abs(t[1][1]))) else: for target_trait, target_vals in list(target_dataset.trait_data.items()): - result = get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, corr_params['type']) + result = get_sample_r_and_p_values( + this_trait, this_dataset, target_vals, target_dataset, corr_params['type']) if result is not None: corr_results[target_trait] = result - sorted_results = collections.OrderedDict(sorted(list(corr_results.items()), key=lambda t: -abs(t[1][0]))) + sorted_results = collections.OrderedDict( + sorted(list(corr_results.items()), key=lambda t: -abs(t[1][0]))) return sorted_results + def do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params, tissue_dataset_id=1): - #Gets tissue expression values for the primary trait - primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list = [this_trait.symbol]) + # Gets tissue expression values for the primary trait + primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=[this_trait.symbol]) if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower()] + primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower( + )] - corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list=list(trait_symbol_dict.values())) + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=list(trait_symbol_dict.values())) tissue_corr_data = {} for trait, symbol in list(trait_symbol_dict.items()): if symbol and symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] + this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower( + )] result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, this_trait_tissue_values, corr_params['method']) - tissue_corr_data[trait] = [result[0], result[1], result[2], symbol] + tissue_corr_data[trait] = [ + result[0], result[1], result[2], symbol] return tissue_corr_data + def do_literature_correlation_for_all_traits(this_trait, target_dataset, trait_geneid_dict, corr_params): - input_trait_mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), this_trait.geneid) + input_trait_mouse_gene_id = convert_to_mouse_gene_id( + target_dataset.group.species.lower(), this_trait.geneid) lit_corr_data = {} for trait, gene_id in list(trait_geneid_dict.items()): - mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), gene_id) + mouse_gene_id = convert_to_mouse_gene_id( + target_dataset.group.species.lower(), gene_id) if mouse_gene_id and str(mouse_gene_id).find(";") == -1: result = g.db.execute( @@ -145,6 +164,7 @@ def do_literature_correlation_for_all_traits(this_trait, target_dataset, trait_g return lit_corr_data + def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, type): """ Calculates the sample r (or rho) and p-value @@ -163,12 +183,15 @@ def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_data this_trait_vals.append(this_sample_value) shared_target_vals.append(target_sample_value) - this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, shared_target_vals) + this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values( + this_trait_vals, shared_target_vals) if type == 'pearson': - sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, shared_target_vals) + sample_r, sample_p = scipy.stats.pearsonr( + this_trait_vals, shared_target_vals) else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, shared_target_vals) + sample_r, sample_p = scipy.stats.spearmanr( + this_trait_vals, shared_target_vals) if num_overlap > 5: if scipy.isnan(sample_r): @@ -176,6 +199,7 @@ def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_data else: return [sample_r, sample_p, num_overlap] + def convert_to_mouse_gene_id(species=None, gene_id=None): """If the species is rat or human, translate the gene_id to the mouse geneid @@ -212,6 +236,7 @@ def convert_to_mouse_gene_id(species=None, gene_id=None): return mouse_gene_id + def init_corr_params(start_vars): method = "pearson" if 'method' in start_vars: @@ -227,9 +252,9 @@ def init_corr_params(start_vars): return_count = int(start_vars['return_count']) corr_params = { - 'method' : method, - 'type' : type, - 'return_count' : return_count + 'method': method, + 'type': type, + 'return_count': return_count } return corr_params diff --git a/wqflask/wqflask/api/gen_menu.py b/wqflask/wqflask/api/gen_menu.py index eaddecd7..e65b36e4 100644 --- a/wqflask/wqflask/api/gen_menu.py +++ b/wqflask/wqflask/api/gen_menu.py @@ -87,14 +87,14 @@ def phenotypes_exist(group_name): results = g.db.execute( ("SELECT Name FROM PublishFreeze " "WHERE PublishFreeze.Name = " - "'{}'").format(group_name+"Publish")).fetchone() + "'{}'").format(group_name + "Publish")).fetchone() return bool(results) def genotypes_exist(group_name): results = g.db.execute( ("SELECT Name FROM GenoFreeze " + - "WHERE GenoFreeze.Name = '{}'").format(group_name+"Geno")).fetchone() + "WHERE GenoFreeze.Name = '{}'").format(group_name + "Geno")).fetchone() return bool(results) @@ -179,11 +179,11 @@ def build_datasets(species, group, type_name): elif type_name == "Genotypes": results = g.db.execute( ("SELECT InfoFiles.GN_AccesionId " + - "FROM InfoFiles, GenoFreeze, InbredSet " + - "WHERE InbredSet.Name = '{}' AND " + - "GenoFreeze.InbredSetId = InbredSet.Id AND " + - "InfoFiles.InfoPageName = GenoFreeze.ShortName " + - "ORDER BY GenoFreeze.CreateTime DESC").format(group)).fetchone() + "FROM InfoFiles, GenoFreeze, InbredSet " + + "WHERE InbredSet.Name = '{}' AND " + + "GenoFreeze.InbredSetId = InbredSet.Id AND " + + "InfoFiles.InfoPageName = GenoFreeze.ShortName " + + "ORDER BY GenoFreeze.CreateTime DESC").format(group)).fetchone() dataset_id = "None" if bool(results): diff --git a/wqflask/wqflask/api/mapping.py b/wqflask/wqflask/api/mapping.py index d59a69df..f8b0d8bd 100644 --- a/wqflask/wqflask/api/mapping.py +++ b/wqflask/wqflask/api/mapping.py @@ -8,15 +8,16 @@ from utility import helper_functions from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + def do_mapping_for_api(start_vars): assert('db' in start_vars) assert('trait_id' in start_vars) - dataset = data_set.create_dataset(dataset_name = start_vars['db']) + dataset = data_set.create_dataset(dataset_name=start_vars['db']) dataset.group.get_markers() - this_trait = create_trait(dataset = dataset, name = start_vars['trait_id']) + this_trait = create_trait(dataset=dataset, name=start_vars['trait_id']) this_trait = retrieve_sample_data(this_trait, dataset) samples = [] @@ -36,26 +37,32 @@ def do_mapping_for_api(start_vars): mapping_params = initialize_parameters(start_vars, dataset, this_trait) - covariates = "" #ZS: It seems to take an empty string as default. This should probably be changed. + # ZS: It seems to take an empty string as default. This should probably be changed. + covariates = "" if mapping_params['mapping_method'] == "gemma": header_row = ["name", "chr", "Mb", "lod_score", "p_value"] - if mapping_params['use_loco'] == "True": #ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api - result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0] + # ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api + if mapping_params['use_loco'] == "True": + result_markers = gemma_mapping.run_gemma( + this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0] else: - result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf']) + result_markers = gemma_mapping.run_gemma( + this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf']) elif mapping_params['mapping_method'] == "rqtl": header_row = ["name", "chr", "cM", "lod_score"] if mapping_params['num_perm'] > 0: _sperm_output, _suggestive, _significant, result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], - mapping_params['perm_check'], mapping_params['num_perm'], - mapping_params['do_control'], mapping_params['control_marker'], - mapping_params['manhattan_plot'], mapping_params['pair_scan']) + mapping_params['perm_check'], mapping_params[ + 'num_perm'], + mapping_params['do_control'], mapping_params[ + 'control_marker'], + mapping_params['manhattan_plot'], mapping_params['pair_scan']) else: result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], - mapping_params['perm_check'], mapping_params['num_perm'], - mapping_params['do_control'], mapping_params['control_marker'], - mapping_params['manhattan_plot'], mapping_params['pair_scan']) + mapping_params['perm_check'], mapping_params['num_perm'], + mapping_params['do_control'], mapping_params['control_marker'], + mapping_params['manhattan_plot'], mapping_params['pair_scan']) if mapping_params['limit_to']: result_markers = result_markers[:mapping_params['limit_to']] @@ -74,7 +81,6 @@ def do_mapping_for_api(start_vars): return result_markers, None - def initialize_parameters(start_vars, dataset, this_trait): mapping_params = {} @@ -118,7 +124,7 @@ def initialize_parameters(start_vars, dataset, this_trait): mapping_params['maf'] = 0.01 if 'maf' in start_vars: - mapping_params['maf'] = start_vars['maf'] # Minor allele frequency + mapping_params['maf'] = start_vars['maf'] # Minor allele frequency mapping_params['use_loco'] = True if 'use_loco' in start_vars: @@ -135,5 +141,3 @@ def initialize_parameters(start_vars, dataset, this_trait): mapping_params['perm_check'] = False return mapping_params - - diff --git a/wqflask/wqflask/api/router.py b/wqflask/wqflask/api/router.py index 60e163f2..aec74c9e 100644 --- a/wqflask/wqflask/api/router.py +++ b/wqflask/wqflask/api/router.py @@ -23,47 +23,52 @@ from wqflask.api import correlation, mapping, gen_menu from utility.tools import flat_files import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) version = "pre1" + @app.route("/api/v_{}/".format(version)) def hello_world(): - return flask.jsonify({"hello":"world"}) + return flask.jsonify({"hello": "world"}) + @app.route("/api/v_{}/species".format(version)) def get_species_list(): - results = g.db.execute("SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species;") + results = g.db.execute( + "SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species;") the_species = results.fetchall() species_list = [] for species in the_species: species_dict = { - "Id" : species[0], - "Name" : species[1], - "FullName" : species[2], - "TaxonomyId" : species[3] + "Id": species[0], + "Name": species[1], + "FullName": species[2], + "TaxonomyId": species[3] } species_list.append(species_dict) return flask.jsonify(species_list) + @app.route("/api/v_{}/species/<path:species_name>".format(version)) @app.route("/api/v_{}/species/<path:species_name>.<path:file_format>".format(version)) -def get_species_info(species_name, file_format = "json"): +def get_species_info(species_name, file_format="json"): results = g.db.execute("""SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species WHERE (Name="{0}" OR FullName="{0}" OR SpeciesName="{0}");""".format(species_name)) the_species = results.fetchone() - species_dict = { - "Id" : the_species[0], - "Name" : the_species[1], - "FullName" : the_species[2], - "TaxonomyId" : the_species[3] + species_dict = { + "Id": the_species[0], + "Name": the_species[1], + "FullName": the_species[2], + "TaxonomyId": the_species[3] } - + return flask.jsonify(species_dict) + @app.route("/api/v_{}/groups".format(version)) @app.route("/api/v_{}/groups/<path:species_name>".format(version)) def get_groups_list(species_name=None): @@ -87,14 +92,14 @@ def get_groups_list(species_name=None): groups_list = [] for group in the_groups: group_dict = { - "Id" : group[0], - "SpeciesId" : group[1], - "DisplayName" : group[2], - "Name" : group[3], - "FullName" : group[4], - "public" : group[5], - "MappingMethodId" : group[6], - "GeneticType" : group[7] + "Id": group[0], + "SpeciesId": group[1], + "DisplayName": group[2], + "Name": group[3], + "FullName": group[4], + "public": group[5], + "MappingMethodId": group[6], + "GeneticType": group[7] } groups_list.append(group_dict) @@ -102,11 +107,12 @@ def get_groups_list(species_name=None): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/group/<path:group_name>".format(version)) @app.route("/api/v_{}/group/<path:group_name>.<path:file_format>".format(version)) @app.route("/api/v_{}/group/<path:species_name>/<path:group_name>".format(version)) @app.route("/api/v_{}/group/<path:species_name>/<path:group_name>.<path:file_format>".format(version)) -def get_group_info(group_name, species_name = None, file_format = "json"): +def get_group_info(group_name, species_name=None, file_format="json"): if species_name: results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName, InbredSet.Name, InbredSet.FullName, InbredSet.public, @@ -131,20 +137,21 @@ def get_group_info(group_name, species_name = None, file_format = "json"): group = results.fetchone() if group: group_dict = { - "Id" : group[0], - "SpeciesId" : group[1], - "DisplayName" : group[2], - "Name" : group[3], - "FullName" : group[4], - "public" : group[5], - "MappingMethodId" : group[6], - "GeneticType" : group[7] + "Id": group[0], + "SpeciesId": group[1], + "DisplayName": group[2], + "Name": group[3], + "FullName": group[4], + "public": group[5], + "MappingMethodId": group[6], + "GeneticType": group[7] } return flask.jsonify(group_dict) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/datasets/<path:group_name>".format(version)) @app.route("/api/v_{}/datasets/<path:species_name>/<path:group_name>".format(version)) def get_datasets_for_group(group_name, species_name=None): @@ -179,17 +186,17 @@ def get_datasets_for_group(group_name, species_name=None): datasets_list = [] for dataset in the_datasets: dataset_dict = { - "Id" : dataset[0], - "ProbeFreezeId" : dataset[1], - "AvgID" : dataset[2], - "Short_Abbreviation" : dataset[3], - "Long_Abbreviation" : dataset[4], - "FullName" : dataset[5], - "ShortName" : dataset[6], - "CreateTime" : dataset[7], - "public" : dataset[8], - "confidentiality" : dataset[9], - "DataScale" : dataset[10] + "Id": dataset[0], + "ProbeFreezeId": dataset[1], + "AvgID": dataset[2], + "Short_Abbreviation": dataset[3], + "Long_Abbreviation": dataset[4], + "FullName": dataset[5], + "ShortName": dataset[6], + "CreateTime": dataset[7], + "public": dataset[8], + "confidentiality": dataset[9], + "DataScale": dataset[10] } datasets_list.append(dataset_dict) @@ -197,14 +204,15 @@ def get_datasets_for_group(group_name, species_name=None): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/dataset/<path:dataset_name>".format(version)) @app.route("/api/v_{}/dataset/<path:dataset_name>.<path:file_format>".format(version)) @app.route("/api/v_{}/dataset/<path:group_name>/<path:dataset_name>".format(version)) @app.route("/api/v_{}/dataset/<path:group_name>/<path:dataset_name>.<path:file_format>".format(version)) -def get_dataset_info(dataset_name, group_name = None, file_format="json"): - #ZS: First get ProbeSet (mRNA expression) datasets and then get Phenotype datasets +def get_dataset_info(dataset_name, group_name=None, file_format="json"): + # ZS: First get ProbeSet (mRNA expression) datasets and then get Phenotype datasets - datasets_list = [] #ZS: I figure I might as well return a list if there are multiple matches, though I don"t know if this will actually happen in practice + datasets_list = [] # ZS: I figure I might as well return a list if there are multiple matches, though I don"t know if this will actually happen in practice probeset_query = """ SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName, @@ -235,16 +243,16 @@ def get_dataset_info(dataset_name, group_name = None, file_format="json"): if dataset: dataset_dict = { - "dataset_type" : "mRNA expression", - "id" : dataset[0], - "name" : dataset[1], - "full_name" : dataset[2], - "short_name" : dataset[3], - "data_scale" : dataset[4], - "tissue_id" : dataset[5], - "tissue" : dataset[6], - "public" : dataset[7], - "confidential" : dataset[8] + "dataset_type": "mRNA expression", + "id": dataset[0], + "name": dataset[1], + "full_name": dataset[2], + "short_name": dataset[3], + "data_scale": dataset[4], + "tissue_id": dataset[5], + "tissue": dataset[6], + "public": dataset[7], + "confidential": dataset[8] } datasets_list.append(dataset_dict) @@ -272,25 +280,25 @@ def get_dataset_info(dataset_name, group_name = None, file_format="json"): if dataset: if dataset[5]: dataset_dict = { - "dataset_type" : "phenotype", - "id" : dataset[0], - "name" : dataset[1], - "description" : dataset[2], - "pubmed_id" : dataset[5], - "title" : dataset[6], - "year" : dataset[7] + "dataset_type": "phenotype", + "id": dataset[0], + "name": dataset[1], + "description": dataset[2], + "pubmed_id": dataset[5], + "title": dataset[6], + "year": dataset[7] } elif dataset[4]: dataset_dict = { - "dataset_type" : "phenotype", - "id" : dataset[0], - "name" : dataset[3], - "description" : dataset[4] + "dataset_type": "phenotype", + "id": dataset[0], + "name": dataset[3], + "description": dataset[4] } else: dataset_dict = { - "dataset_type" : "phenotype", - "id" : dataset[0] + "dataset_type": "phenotype", + "id": dataset[0] } datasets_list.append(dataset_dict) @@ -302,10 +310,12 @@ def get_dataset_info(dataset_name, group_name = None, file_format="json"): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/traits/<path:dataset_name>".format(version), methods=("GET",)) @app.route("/api/v_{}/traits/<path:dataset_name>.<path:file_format>".format(version), methods=("GET",)) -def fetch_traits(dataset_name, file_format = "json"): - trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name, request.args) +def fetch_traits(dataset_name, file_format="json"): + trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids( + dataset_name, request.args) if ("ids_only" in request.args) and (len(trait_ids) > 0): if file_format == "json": filename = dataset_name + "_trait_ids.json" @@ -353,7 +363,8 @@ def fetch_traits(dataset_name, file_format = "json"): ProbeSet.Id """ - field_list = ["Id", "Name", "Symbol", "Description", "Chr", "Mb", "Aliases", "Mean", "SE", "Locus", "LRS", "P-Value", "Additive", "h2"] + field_list = ["Id", "Name", "Symbol", "Description", "Chr", "Mb", + "Aliases", "Mean", "SE", "Locus", "LRS", "P-Value", "Additive", "h2"] elif data_type == "Geno": query = """ SELECT @@ -370,7 +381,8 @@ def fetch_traits(dataset_name, file_format = "json"): Geno.Id """ - field_list = ["Id", "Name", "Marker_Name", "Chr", "Mb", "Sequence", "Source"] + field_list = ["Id", "Name", "Marker_Name", + "Chr", "Mb", "Sequence", "Source"] else: query = """ SELECT @@ -386,7 +398,8 @@ def fetch_traits(dataset_name, file_format = "json"): PublishXRef.Id """ - field_list = ["Id", "PhenotypeId", "PublicationId", "Locus", "LRS", "Additive", "Sequence"] + field_list = ["Id", "PhenotypeId", "PublicationId", + "Locus", "LRS", "Additive", "Sequence"] if 'limit_to' in request.args: limit_number = request.args['limit_to'] @@ -430,10 +443,12 @@ def fetch_traits(dataset_name, file_format = "json"): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/sample_data/<path:dataset_name>".format(version)) @app.route("/api/v_{}/sample_data/<path:dataset_name>.<path:file_format>".format(version)) -def all_sample_data(dataset_name, file_format = "csv"): - trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name, request.args) +def all_sample_data(dataset_name, file_format="csv"): + trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids( + dataset_name, request.args) if len(trait_ids) > 0: sample_list = get_samplelist(dataset_name) @@ -536,9 +551,10 @@ def all_sample_data(dataset_name, file_format = "csv"): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>".format(version)) @app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version)) -def trait_sample_data(dataset_name, trait_name, file_format = "json"): +def trait_sample_data(dataset_name, trait_name, file_format="json"): probeset_query = """ SELECT Strain.Name, Strain.Name2, ProbeSetData.value, ProbeSetData.Id, ProbeSetSE.error @@ -563,10 +579,10 @@ def trait_sample_data(dataset_name, trait_name, file_format = "json"): sample_list = [] for sample in sample_data: sample_dict = { - "sample_name": sample[0], - "sample_name_2": sample[1], - "value": sample[2], - "data_id": sample[3], + "sample_name": sample[0], + "sample_name_2": sample[1], + "value": sample[2], + "data_id": sample[3], } if sample[4]: sample_dict["se"] = sample[4] @@ -610,10 +626,10 @@ def trait_sample_data(dataset_name, trait_name, file_format = "json"): sample_list = [] for sample in sample_data: sample_dict = { - "sample_name" : sample[0], - "sample_name_2" : sample[1], - "value" : sample[2], - "data_id" : sample[3] + "sample_name": sample[0], + "sample_name_2": sample[1], + "value": sample[2], + "data_id": sample[3] } if sample[4]: sample_dict["se"] = sample[4] @@ -623,13 +639,14 @@ def trait_sample_data(dataset_name, trait_name, file_format = "json"): return flask.jsonify(sample_list) else: - return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/trait/<path:dataset_name>/<path:trait_name>".format(version)) @app.route("/api/v_{}/trait/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version)) @app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>".format(version)) @app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version)) -def get_trait_info(dataset_name, trait_name, file_format = "json"): +def get_trait_info(dataset_name, trait_name, file_format="json"): probeset_query = """ SELECT ProbeSet.Id, ProbeSet.Name, ProbeSet.Symbol, ProbeSet.description, ProbeSet.Chr, ProbeSet.Mb, ProbeSet.alias, @@ -648,26 +665,27 @@ def get_trait_info(dataset_name, trait_name, file_format = "json"): trait_info = probeset_results.fetchone() if trait_info: trait_dict = { - "id" : trait_info[0], - "name" : trait_info[1], - "symbol" : trait_info[2], - "description" : trait_info[3], - "chr" : trait_info[4], - "mb" : trait_info[5], - "alias" :trait_info[6], - "mean" : trait_info[7], - "se" : trait_info[8], - "locus" : trait_info[9], - "lrs" : trait_info[10], - "p_value" : trait_info[11], - "additive" : trait_info[12] + "id": trait_info[0], + "name": trait_info[1], + "symbol": trait_info[2], + "description": trait_info[3], + "chr": trait_info[4], + "mb": trait_info[5], + "alias": trait_info[6], + "mean": trait_info[7], + "se": trait_info[8], + "locus": trait_info[9], + "lrs": trait_info[10], + "p_value": trait_info[11], + "additive": trait_info[12] } return flask.jsonify(trait_dict) else: - if "Publish" in dataset_name: #ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish" + # ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish" + if "Publish" in dataset_name: dataset_name = dataset_name.replace("Publish", "") - + group_id = get_group_id(dataset_name) pheno_query = """ SELECT @@ -684,25 +702,28 @@ def get_trait_info(dataset_name, trait_name, file_format = "json"): trait_info = pheno_results.fetchone() if trait_info: trait_dict = { - "id" : trait_info[0], - "locus" : trait_info[1], - "lrs" : trait_info[2], - "additive" : trait_info[3] + "id": trait_info[0], + "locus": trait_info[1], + "lrs": trait_info[2], + "additive": trait_info[3] } return flask.jsonify(trait_dict) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/correlation".format(version), methods=("GET",)) def get_corr_results(): results = correlation.do_correlation(request.args) if len(results) > 0: - return flask.jsonify(results) #ZS: I think flask.jsonify expects a dict/list instead of JSON + # ZS: I think flask.jsonify expects a dict/list instead of JSON + return flask.jsonify(results) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/mapping".format(version), methods=("GET",)) def get_mapping_results(): results, format = mapping.do_mapping_for_api(request.args) @@ -726,6 +747,7 @@ def get_mapping_results(): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>/<string:dataset_name>.zip".format(version)) @app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>/<string:dataset_name>".format(version)) @app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>.zip".format(version)) @@ -754,7 +776,8 @@ def get_genotypes(group_name, file_format="csv", dataset_name=None): output_lines.append(line.split()) i += 1 - csv_writer = csv.writer(si, delimiter = "\t", escapechar = "\\", quoting = csv.QUOTE_NONE) + csv_writer = csv.writer( + si, delimiter="\t", escapechar="\\", quoting=csv.QUOTE_NONE) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") elif file_format == "rqtl2": @@ -765,18 +788,23 @@ def get_genotypes(group_name, file_format="csv", dataset_name=None): filename = group_name if os.path.isfile("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name)): - yaml_file = json.load(open("{0}/{1}.json".format(flat_files("genotype/rqtl2"), group_name))) + yaml_file = json.load( + open("{0}/{1}.json".format(flat_files("genotype/rqtl2"), group_name))) yaml_file["geno"] = filename + "_geno.csv" yaml_file["gmap"] = filename + "_gmap.csv" yaml_file["pheno"] = filename + "_pheno.csv" config_file = [filename + ".json", json.dumps(yaml_file)] #config_file = [filename + ".yaml", open("{0}/{1}.yaml".format(flat_files("genotype/rqtl2"), group_name))] - geno_file = [filename + "_geno.csv", open("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name))] - gmap_file = [filename + "_gmap.csv", open("{0}/{1}_gmap.csv".format(flat_files("genotype/rqtl2"), group_name))] + geno_file = [filename + "_geno.csv", + open("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name))] + gmap_file = [filename + "_gmap.csv", + open("{0}/{1}_gmap.csv".format(flat_files("genotype/rqtl2"), group_name))] if dataset_name: - phenotypes = requests.get("http://gn2.genenetwork.org/api/v_pre1/sample_data/" + dataset_name) + phenotypes = requests.get( + "http://gn2.genenetwork.org/api/v_pre1/sample_data/" + dataset_name) else: - phenotypes = requests.get("http://gn2.genenetwork.org/api/v_pre1/sample_data/" + group_name + "Publish") + phenotypes = requests.get( + "http://gn2.genenetwork.org/api/v_pre1/sample_data/" + group_name + "Publish") with ZipFile(memory_file, 'w', compression=ZIP_DEFLATED) as zf: zf.writestr(config_file[0], config_file[1]) @@ -799,10 +827,11 @@ def get_genotypes(group_name, file_format="csv", dataset_name=None): for line in genofile: if limit_num and i >= limit_num: break - output_lines.append([line.strip() for line in line.split(",")]) + output_lines.append([line.strip() + for line in line.split(",")]) i += 1 - csv_writer = csv.writer(si, delimiter = ",") + csv_writer = csv.writer(si, delimiter=",") else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") @@ -813,6 +842,7 @@ def get_genotypes(group_name, file_format="csv", dataset_name=None): return output + @app.route("/api/v_{}/gen_dropdown".format(version), methods=("GET",)) def gen_dropdown_menu(): results = gen_menu.gen_dropdown_json() @@ -822,18 +852,20 @@ def gen_dropdown_menu(): else: return return_error(code=500, source=request.url_rule.rule, title="Some error occurred", details="") + def return_error(code, source, title, details): json_ob = {"errors": [ { "status": code, - "source": { "pointer": source }, - "title" : title, + "source": {"pointer": source}, + "title": title, "detail": details } ]} return flask.jsonify(json_ob) + def get_dataset_trait_ids(dataset_name, start_vars): if 'limit_to' in start_vars: @@ -842,8 +874,8 @@ def get_dataset_trait_ids(dataset_name, start_vars): limit_string = "" if "Geno" in dataset_name: - data_type = "Geno" #ZS: Need to pass back the dataset type - query = """ + data_type = "Geno" # ZS: Need to pass back the dataset type + query = """ SELECT GenoXRef.GenoId, Geno.Name, GenoXRef.GenoFreezeId FROM @@ -866,7 +898,7 @@ def get_dataset_trait_ids(dataset_name, start_vars): data_type = "Publish" dataset_name = dataset_name.replace("Publish", "") dataset_id = get_group_id(dataset_name) - + query = """ SELECT PublishXRef.PhenotypeId, PublishXRef.Id, InbredSet.InbredSetCode @@ -881,7 +913,8 @@ def get_dataset_trait_ids(dataset_name, start_vars): results = g.db.execute(query).fetchall() trait_ids = [result[0] for result in results] - trait_names = [str(result[2]) + "_" + str(result[1]) for result in results] + trait_names = [str(result[2]) + "_" + str(result[1]) + for result in results] return trait_ids, trait_names, data_type, dataset_id @@ -906,6 +939,7 @@ def get_dataset_trait_ids(dataset_name, start_vars): dataset_id = results[0][2] return trait_ids, trait_names, data_type, dataset_id + def get_samplelist(dataset_name): group_id = get_group_id_from_dataset(dataset_name) @@ -915,13 +949,14 @@ def get_samplelist(dataset_name): WHERE StrainXRef.StrainId = Strain.Id AND StrainXRef.InbredSetId = {} """.format(group_id) - + results = g.db.execute(query).fetchall() - + samplelist = [result[0] for result in results] return samplelist + def get_group_id_from_dataset(dataset_name): if "Publish" in dataset_name: query = """ @@ -962,6 +997,7 @@ def get_group_id_from_dataset(dataset_name): else: return None + def get_group_id(group_name): query = """ SELECT InbredSet.Id diff --git a/wqflask/wqflask/collect.py b/wqflask/wqflask/collect.py index 0291f2b8..01274ba9 100644 --- a/wqflask/wqflask/collect.py +++ b/wqflask/wqflask/collect.py @@ -35,11 +35,12 @@ def process_traits(unprocessed_traits): data, _separator, the_hmac = trait.rpartition(':') data = data.strip() if g.user_session.logged_in: - assert the_hmac == hmac.hmac_creation(data), "Data tampering?" + assert the_hmac == hmac.hmac_creation(data), "Data tampering?" traits.add(str(data)) return traits + def report_change(len_before, len_now): new_length = len_now - len_before if new_length: @@ -48,16 +49,18 @@ def report_change(len_before, len_now): else: logger.debug("No new traits were added.") + @app.route("/collections/store_trait_list", methods=('POST',)) def store_traits_list(): - params = request.form + params = request.form - traits = params['traits'] - hash = params['hash'] + traits = params['traits'] + hash = params['hash'] - Redis.set(hash, traits) + Redis.set(hash, traits) + + return hash - return hash @app.route("/collections/add") def collections_add(): @@ -68,19 +71,20 @@ def collections_add(): uc_id = g.user_session.add_collection(collection_name, set()) collections = g.user_session.user_collections - #ZS: One of these might be unnecessary + # ZS: One of these might be unnecessary if 'traits' in request.args: - traits=request.args['traits'] + traits = request.args['traits'] return render_template("collections/add.html", - traits = traits, - collections = collections, - ) + traits=traits, + collections=collections, + ) else: hash = request.args['hash'] return render_template("collections/add.html", - hash = hash, - collections = collections, - ) + hash=hash, + collections=collections, + ) + @app.route("/collections/new") def collections_new(): @@ -118,6 +122,7 @@ def collections_new(): # CauseAnError pass + def create_new(collection_name): params = request.args @@ -133,15 +138,17 @@ def create_new(collection_name): return redirect(url_for('view_collection', uc_id=uc_id)) + @app.route("/collections/list") def list_collections(): params = request.args user_collections = list(g.user_session.user_collections) return render_template("collections/list.html", - params = params, - collections = user_collections, - ) + params=params, + collections=user_collections, + ) + @app.route("/collections/remove", methods=('POST',)) def remove_traits(): @@ -151,7 +158,8 @@ def remove_traits(): traits_to_remove = params['trait_list'] traits_to_remove = process_traits(traits_to_remove) - members_now = g.user_session.remove_traits_from_collection(uc_id, traits_to_remove) + members_now = g.user_session.remove_traits_from_collection( + uc_id, traits_to_remove) return redirect(url_for("view_collection", uc_id=uc_id)) @@ -174,7 +182,8 @@ def delete_collection(): else: flash("We've deleted the selected collection.", "alert-info") else: - flash("We've deleted the collection: {}.".format(collection_name), "alert-info") + flash("We've deleted the collection: {}.".format( + collection_name), "alert-info") return redirect(url_for('list_collections')) @@ -184,7 +193,8 @@ def view_collection(): params = request.args uc_id = params['uc_id'] - uc = next((collection for collection in g.user_session.user_collections if collection["id"] == uc_id)) + uc = next( + (collection for collection in g.user_session.user_collections if collection["id"] == uc_id)) traits = uc["members"] trait_obs = [] @@ -196,25 +206,28 @@ def view_collection(): name, dataset_name = atrait.split(':') if dataset_name == "Temp": group = name.split("_")[2] - dataset = create_dataset(dataset_name, dataset_type = "Temp", group_name = group) + dataset = create_dataset( + dataset_name, dataset_type="Temp", group_name=group) trait_ob = create_trait(name=name, dataset=dataset) else: dataset = create_dataset(dataset_name) trait_ob = create_trait(name=name, dataset=dataset) - trait_ob = retrieve_trait_info(trait_ob, dataset, get_qtl_info=True) + trait_ob = retrieve_trait_info( + trait_ob, dataset, get_qtl_info=True) trait_obs.append(trait_ob) json_version.append(jsonable(trait_ob)) collection_info = dict(trait_obs=trait_obs, - uc = uc) + uc=uc) if "json" in params: return json.dumps(json_version) else: return render_template("collections/view.html", - **collection_info - ) + **collection_info + ) + @app.route("/collections/change_name", methods=('POST',)) def change_collection_name(): @@ -226,4 +239,3 @@ def change_collection_name(): g.user_session.change_collection_name(collection_id, new_name) return new_name - diff --git a/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py b/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py index 5855ccf0..cb88eb53 100644 --- a/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py +++ b/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -23,7 +23,7 @@ from pprint import pformat as pf from base.trait import create_trait from base import data_set from utility import webqtlUtil, helper_functions, corr_result_helpers -import utility.webqtlUtil #this is for parallel computing only. +import utility.webqtlUtil # this is for parallel computing only. from wqflask.correlation import correlation_functions from MySQLdb import escape_string as escape @@ -34,16 +34,18 @@ from flask import Flask, g class ComparisonBarChart: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.all_sample_list = [] self.traits = [] self.insufficient_shared_samples = False - this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop + # ZS: Getting initial group name before verifying all traits are in the same group in the following loop + this_group = self.trait_list[0][1].group.name for trait_db in self.trait_list: - + if trait_db[1].group.name != this_group: self.insufficient_shared_samples = True break @@ -51,7 +53,7 @@ class ComparisonBarChart: this_group = trait_db[1].group.name this_trait = trait_db[0] self.traits.append(this_trait) - + this_sample_data = this_trait.data for sample in this_sample_data: @@ -74,10 +76,10 @@ class ComparisonBarChart: this_trait_vals.append('') self.sample_data.append(this_trait_vals) - self.js_data = dict(traits = [trait.name for trait in self.traits], - samples = self.all_sample_list, - sample_data = self.sample_data,) - + self.js_data = dict(traits=[trait.name for trait in self.traits], + samples=self.all_sample_list, + sample_data=self.sample_data,) + def get_trait_db_obs(self, trait_db_list): self.trait_list = [] @@ -88,9 +90,8 @@ class ComparisonBarChart: #print("dataset_name:", dataset_name) dataset_ob = data_set.create_dataset(dataset_name) trait_ob = create_trait(dataset=dataset_ob, - name=trait_name, - cellid=None) + name=trait_name, + cellid=None) self.trait_list.append((trait_ob, dataset_ob)) #print("trait_list:", self.trait_list) - diff --git a/wqflask/wqflask/correlation/corr_scatter_plot.py b/wqflask/wqflask/correlation/corr_scatter_plot.py index d5dc26f5..cafb9265 100644 --- a/wqflask/wqflask/correlation/corr_scatter_plot.py +++ b/wqflask/wqflask/correlation/corr_scatter_plot.py @@ -9,24 +9,29 @@ from scipy import stats import numpy as np import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + class CorrScatterPlot: """Page that displays a correlation scatterplot with a line fitted to it""" def __init__(self, params): if "Temp" in params['dataset_1']: - self.dataset_1 = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = params['dataset_1'].split("_")[1]) + self.dataset_1 = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=params['dataset_1'].split("_")[1]) else: self.dataset_1 = data_set.create_dataset(params['dataset_1']) if "Temp" in params['dataset_2']: - self.dataset_2 = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = params['dataset_2'].split("_")[1]) + self.dataset_2 = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=params['dataset_2'].split("_")[1]) else: self.dataset_2 = data_set.create_dataset(params['dataset_2']) #self.dataset_3 = data_set.create_dataset(params['dataset_3']) - self.trait_1 = create_trait(name=params['trait_1'], dataset=self.dataset_1) - self.trait_2 = create_trait(name=params['trait_2'], dataset=self.dataset_2) + self.trait_1 = create_trait( + name=params['trait_1'], dataset=self.dataset_1) + self.trait_2 = create_trait( + name=params['trait_2'], dataset=self.dataset_2) #self.trait_3 = create_trait(name=params['trait_3'], dataset=self.dataset_3) self.method = params['method'] @@ -37,10 +42,13 @@ class CorrScatterPlot: if self.dataset_1.group.f1list != None: primary_samples += self.dataset_1.group.f1list - self.trait_1 = retrieve_sample_data(self.trait_1, self.dataset_1, primary_samples) - self.trait_2 = retrieve_sample_data(self.trait_2, self.dataset_2, primary_samples) + self.trait_1 = retrieve_sample_data( + self.trait_1, self.dataset_1, primary_samples) + self.trait_2 = retrieve_sample_data( + self.trait_2, self.dataset_2, primary_samples) - samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples(self.trait_1.data, self.trait_2.data) + samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples( + self.trait_1.data, self.trait_2.data) self.data = [] self.indIDs = list(samples_1.keys()) @@ -53,72 +61,76 @@ class CorrScatterPlot: vals_2.append(samples_2[sample].value) self.data.append(vals_2) - slope, intercept, r_value, p_value, std_err = stats.linregress(vals_1, vals_2) + slope, intercept, r_value, p_value, std_err = stats.linregress( + vals_1, vals_2) if slope < 0.001: slope_string = '%.3E' % slope else: slope_string = '%.3f' % slope - - x_buffer = (max(vals_1) - min(vals_1))*0.1 - y_buffer = (max(vals_2) - min(vals_2))*0.1 + + x_buffer = (max(vals_1) - min(vals_1)) * 0.1 + y_buffer = (max(vals_2) - min(vals_2)) * 0.1 x_range = [min(vals_1) - x_buffer, max(vals_1) + x_buffer] y_range = [min(vals_2) - y_buffer, max(vals_2) + y_buffer] - intercept_coords = get_intercept_coords(slope, intercept, x_range, y_range) + intercept_coords = get_intercept_coords( + slope, intercept, x_range, y_range) rx = stats.rankdata(vals_1) ry = stats.rankdata(vals_2) self.rdata = [] self.rdata.append(rx.tolist()) - self.rdata.append(ry.tolist()) - srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress(rx, ry) + self.rdata.append(ry.tolist()) + srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress( + rx, ry) if srslope < 0.001: srslope_string = '%.3E' % srslope else: srslope_string = '%.3f' % srslope - x_buffer = (max(rx) - min(rx))*0.1 - y_buffer = (max(ry) - min(ry))*0.1 + x_buffer = (max(rx) - min(rx)) * 0.1 + y_buffer = (max(ry) - min(ry)) * 0.1 sr_range = [min(rx) - x_buffer, max(rx) + x_buffer] - sr_intercept_coords = get_intercept_coords(srslope, srintercept, sr_range, sr_range) + sr_intercept_coords = get_intercept_coords( + srslope, srintercept, sr_range, sr_range) self.collections_exist = "False" if g.user_session.num_collections > 0: self.collections_exist = "True" self.js_data = dict( - data = self.data, - rdata = self.rdata, - indIDs = self.indIDs, - trait_1 = self.trait_1.dataset.name + ": " + str(self.trait_1.name), - trait_2 = self.trait_2.dataset.name + ": " + str(self.trait_2.name), - samples_1 = samples_1, - samples_2 = samples_2, - num_overlap = num_overlap, - vals_1 = vals_1, - vals_2 = vals_2, - x_range = x_range, - y_range = y_range, - sr_range = sr_range, - intercept_coords = intercept_coords, - sr_intercept_coords = sr_intercept_coords, - - slope = slope, - slope_string = slope_string, - intercept = intercept, - r_value = r_value, - p_value = p_value, - - srslope = srslope, - srslope_string = srslope_string, - srintercept = srintercept, - srr_value = srr_value, - srp_value = srp_value + data=self.data, + rdata=self.rdata, + indIDs=self.indIDs, + trait_1=self.trait_1.dataset.name + ": " + str(self.trait_1.name), + trait_2=self.trait_2.dataset.name + ": " + str(self.trait_2.name), + samples_1=samples_1, + samples_2=samples_2, + num_overlap=num_overlap, + vals_1=vals_1, + vals_2=vals_2, + x_range=x_range, + y_range=y_range, + sr_range=sr_range, + intercept_coords=intercept_coords, + sr_intercept_coords=sr_intercept_coords, + + slope=slope, + slope_string=slope_string, + intercept=intercept, + r_value=r_value, + p_value=p_value, + + srslope=srslope, + srslope_string=srslope_string, + srintercept=srintercept, + srr_value=srr_value, + srp_value=srp_value #trait3 = self.trait_3.data, #vals_3 = vals_3 @@ -129,10 +141,10 @@ class CorrScatterPlot: def get_intercept_coords(slope, intercept, x_range, y_range): intercept_coords = [] - y1 = slope*x_range[0] + intercept - y2 = slope*x_range[1] + intercept - x1 = (y1-intercept)/slope - x2 = (y2-intercept)/slope + y1 = slope * x_range[0] + intercept + y2 = slope * x_range[1] + intercept + x1 = (y1 - intercept) / slope + x2 = (y2 - intercept) / slope intercept_coords.append([x1, y1]) intercept_coords.append([x2, y2]) diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py index fd7691d4..0f24241a 100644 --- a/wqflask/wqflask/correlation/correlation_functions.py +++ b/wqflask/wqflask/correlation/correlation_functions.py @@ -34,19 +34,19 @@ from flask import Flask, g ##################################################################################### -#Input: primaryValue(list): one list of expression values of one probeSet, +# Input: primaryValue(list): one list of expression values of one probeSet, # targetValue(list): one list of expression values of one probeSet, # method(string): indicate correlation method ('pearson' or 'spearman') -#Output: corr_result(list): first item is Correlation Value, second item is tissue number, +# Output: corr_result(list): first item is Correlation Value, second item is tissue number, # third item is PValue -#Function: get correlation value,Tissue quantity ,p value result by using R; -#Note : This function is special case since both primaryValue and targetValue are from -#the same dataset. So the length of these two parameters is the same. They are pairs. -#Also, in the datatable TissueProbeSetData, all Tissue values are loaded based on -#the same tissue order +# Function: get correlation value,Tissue quantity ,p value result by using R; +# Note : This function is special case since both primaryValue and targetValue are from +# the same dataset. So the length of these two parameters is the same. They are pairs. +# Also, in the datatable TissueProbeSetData, all Tissue values are loaded based on +# the same tissue order ##################################################################################### -def cal_zero_order_corr_for_tiss (primaryValue=[], targetValue=[], method='pearson'): +def cal_zero_order_corr_for_tiss(primaryValue=[], targetValue=[], method='pearson'): R_primary = rpy2.robjects.FloatVector(list(range(len(primaryValue)))) N = len(primaryValue) @@ -55,27 +55,27 @@ def cal_zero_order_corr_for_tiss (primaryValue=[], targetValue=[], method='pears R_target = rpy2.robjects.FloatVector(list(range(len(targetValue)))) for i in range(len(targetValue)): - R_target[i]=targetValue[i] + R_target[i] = targetValue[i] R_corr_test = rpy2.robjects.r['cor.test'] - if method =='spearman': + if method == 'spearman': R_result = R_corr_test(R_primary, R_target, method='spearman') else: R_result = R_corr_test(R_primary, R_target) - corr_result =[] - corr_result.append( R_result[3][0]) - corr_result.append( N ) - corr_result.append( R_result[2][0]) + corr_result = [] + corr_result.append(R_result[3][0]) + corr_result.append(N) + corr_result.append(R_result[2][0]) return corr_result ######################################################################################################## -#input: cursor, symbolList (list), dataIdDict(Dict): key is symbol -#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair. +# input: cursor, symbolList (list), dataIdDict(Dict): key is symbol +# output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair. # key is symbol, value is one list of expression values of one probeSet. -#function: wrapper function for getSymbolValuePairDict function +# function: wrapper function for getSymbolValuePairDict function # build gene symbol list if necessary, cut it into small lists if necessary, # then call getSymbolValuePairDict function and merge the results. ######################################################################################################## diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index cb341e79..9b0b6118 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -78,11 +78,12 @@ class CorrelationResults: with Bench("Doing correlations"): if start_vars['dataset'] == "Temp": - self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group']) + self.dataset = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=start_vars['group']) self.trait_id = start_vars['trait_id'] self.this_trait = create_trait(dataset=self.dataset, - name=self.trait_id, - cellid=None) + name=self.trait_id, + cellid=None) else: helper_functions.get_species_dataset_trait(self, start_vars) @@ -95,9 +96,9 @@ class CorrelationResults: self.p_range_lower = get_float(start_vars, 'p_range_lower', -1.0) self.p_range_upper = get_float(start_vars, 'p_range_upper', 1.0) - if ('loc_chr' in start_vars and - 'min_loc_mb' in start_vars and - 'max_loc_mb' in start_vars): + if ('loc_chr' in start_vars + and 'min_loc_mb' in start_vars + and 'max_loc_mb' in start_vars): self.location_type = get_string(start_vars, 'location_type') self.location_chr = get_string(start_vars, 'loc_chr') @@ -109,8 +110,8 @@ class CorrelationResults: self.get_formatted_corr_type() self.return_number = int(start_vars['corr_return_results']) - #The two if statements below append samples to the sample list based upon whether the user - #rselected Primary Samples Only, Other Samples Only, or All Samples + # The two if statements below append samples to the sample list based upon whether the user + # rselected Primary Samples Only, Other Samples Only, or All Samples primary_samples = self.dataset.group.samplelist if self.dataset.group.parlist != None: @@ -118,23 +119,26 @@ class CorrelationResults: if self.dataset.group.f1list != None: primary_samples += self.dataset.group.f1list - #If either BXD/whatever Only or All Samples, append all of that group's samplelist + # If either BXD/whatever Only or All Samples, append all of that group's samplelist if corr_samples_group != 'samples_other': self.process_samples(start_vars, primary_samples) - #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and - #exclude the primary samples (because they would have been added in the previous - #if statement if the user selected All Samples) + # If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and + # exclude the primary samples (because they would have been added in the previous + # if statement if the user selected All Samples) if corr_samples_group != 'samples_primary': if corr_samples_group == 'samples_other': primary_samples = [x for x in primary_samples if x not in ( - self.dataset.group.parlist + self.dataset.group.f1list)] - self.process_samples(start_vars, list(self.this_trait.data.keys()), primary_samples) + self.dataset.group.parlist + self.dataset.group.f1list)] + self.process_samples(start_vars, list( + self.this_trait.data.keys()), primary_samples) - self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) + self.target_dataset = data_set.create_dataset( + start_vars['corr_dataset']) self.target_dataset.get_trait_data(list(self.sample_data.keys())) - self.header_fields = get_header_fields(self.target_dataset.type, self.corr_method) + self.header_fields = get_header_fields( + self.target_dataset.type, self.corr_method) if self.target_dataset.type == "ProbeSet": self.filter_cols = [7, 6] @@ -153,7 +157,8 @@ class CorrelationResults: tissue_corr_data = self.do_tissue_correlation_for_all_traits() if tissue_corr_data != None: for trait in list(tissue_corr_data.keys())[:self.return_number]: - self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) + self.get_sample_r_and_p_values( + trait, self.target_dataset.trait_data[trait]) else: for trait, values in list(self.target_dataset.trait_data.items()): self.get_sample_r_and_p_values(trait, values) @@ -163,7 +168,8 @@ class CorrelationResults: lit_corr_data = self.do_lit_correlation_for_all_traits() for trait in list(lit_corr_data.keys())[:self.return_number]: - self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) + self.get_sample_r_and_p_values( + trait, self.target_dataset.trait_data[trait]) elif self.corr_type == "sample": for trait, values in list(self.target_dataset.trait_data.items()): @@ -172,8 +178,7 @@ class CorrelationResults: self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()), key=lambda t: -abs(t[1][0]))) - - #ZS: Convert min/max chromosome to an int for the location range option + # ZS: Convert min/max chromosome to an int for the location range option range_chr_as_int = None for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): if 'loc_chr' in start_vars: @@ -181,7 +186,8 @@ class CorrelationResults: range_chr_as_int = order_id for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]): - trait_object = create_trait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) + trait_object = create_trait( + dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) if not trait_object: continue @@ -194,8 +200,8 @@ class CorrelationResults: if chr_info.name == trait_object.chr: chr_as_int = order_id - if (float(self.correlation_data[trait][0]) >= self.p_range_lower and - float(self.correlation_data[trait][0]) <= self.p_range_upper): + if (float(self.correlation_data[trait][0]) >= self.p_range_lower + and float(self.correlation_data[trait][0]) <= self.p_range_upper): if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean): if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr): @@ -215,8 +221,8 @@ class CorrelationResults: continue (trait_object.sample_r, - trait_object.sample_p, - trait_object.num_overlap) = self.correlation_data[trait] + trait_object.sample_p, + trait_object.num_overlap) = self.correlation_data[trait] # Set some sane defaults trait_object.tissue_corr = 0 @@ -236,7 +242,8 @@ class CorrelationResults: if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": self.do_tissue_correlation_for_trait_list() - self.json_results = generate_corr_json(self.correlation_results, self.this_trait, self.dataset, self.target_dataset) + self.json_results = generate_corr_json( + self.correlation_results, self.this_trait, self.dataset, self.target_dataset) ############################################################################################################################################ @@ -259,39 +266,43 @@ class CorrelationResults: def do_tissue_correlation_for_trait_list(self, tissue_dataset_id=1): """Given a list of correlation results (self.correlation_results), gets the tissue correlation value for each""" - #Gets tissue expression values for the primary trait + # Gets tissue expression values for the primary trait primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list = [self.this_trait.symbol]) + symbol_list=[self.this_trait.symbol]) if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] - gene_symbol_list = [trait.symbol for trait in self.correlation_results if trait.symbol] + primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower( + )] + gene_symbol_list = [ + trait.symbol for trait in self.correlation_results if trait.symbol] - corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list=gene_symbol_list) + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=gene_symbol_list) for trait in self.correlation_results: if trait.symbol and trait.symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower()] + this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower( + )] result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, - this_trait_tissue_values, - self.corr_method) + this_trait_tissue_values, + self.corr_method) trait.tissue_corr = result[0] trait.tissue_pvalue = result[2] def do_tissue_correlation_for_all_traits(self, tissue_dataset_id=1): - #Gets tissue expression values for the primary trait + # Gets tissue expression values for the primary trait primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list = [self.this_trait.symbol]) + symbol_list=[self.this_trait.symbol]) if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] + primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower( + )] #print("trait_gene_symbols: ", pf(trait_gene_symbols.values())) - corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list=list(self.trait_symbol_dict.values())) + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=list(self.trait_symbol_dict.values())) #print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict)) @@ -300,27 +311,30 @@ class CorrelationResults: tissue_corr_data = {} for trait, symbol in list(self.trait_symbol_dict.items()): if symbol and symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] + this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower( + )] result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, - this_trait_tissue_values, - self.corr_method) + this_trait_tissue_values, + self.corr_method) tissue_corr_data[trait] = [symbol, result[0], result[2]] tissue_corr_data = collections.OrderedDict(sorted(list(tissue_corr_data.items()), - key=lambda t: -abs(t[1][1]))) + key=lambda t: -abs(t[1][1]))) return tissue_corr_data def do_lit_correlation_for_trait_list(self): - input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) + input_trait_mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), self.this_trait.geneid) for trait in self.correlation_results: if trait.geneid: - trait.mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), trait.geneid) + trait.mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), trait.geneid) else: trait.mouse_gene_id = None @@ -348,13 +362,14 @@ class CorrelationResults: else: trait.lit_corr = 0 - def do_lit_correlation_for_all_traits(self): - input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) + input_trait_mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), self.this_trait.geneid) lit_corr_data = {} for trait, gene_id in list(self.trait_geneid_dict.items()): - mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), gene_id) + mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), gene_id) if mouse_gene_id and str(mouse_gene_id).find(";") == -1: #print("gene_symbols:", input_trait_mouse_gene_id + " / " + mouse_gene_id) @@ -382,7 +397,7 @@ class CorrelationResults: lit_corr_data[trait] = [gene_id, 0] lit_corr_data = collections.OrderedDict(sorted(list(lit_corr_data.items()), - key=lambda t: -abs(t[1][1]))) + key=lambda t: -abs(t[1][1]))) return lit_corr_data @@ -440,21 +455,26 @@ class CorrelationResults: self.this_trait_vals.append(sample_value) target_vals.append(target_sample_value) - self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(self.this_trait_vals, target_vals) + self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( + self.this_trait_vals, target_vals) if num_overlap > 5: - #ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/ + # ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/ if self.corr_method == 'bicor': - sample_r, sample_p = do_bicor(self.this_trait_vals, target_vals) + sample_r, sample_p = do_bicor( + self.this_trait_vals, target_vals) elif self.corr_method == 'pearson': - sample_r, sample_p = scipy.stats.pearsonr(self.this_trait_vals, target_vals) + sample_r, sample_p = scipy.stats.pearsonr( + self.this_trait_vals, target_vals) else: - sample_r, sample_p = scipy.stats.spearmanr(self.this_trait_vals, target_vals) + sample_r, sample_p = scipy.stats.spearmanr( + self.this_trait_vals, target_vals) if numpy.isnan(sample_r): pass else: - self.correlation_data[trait] = [sample_r, sample_p, num_overlap] + self.correlation_data[trait] = [ + sample_r, sample_p, num_overlap] def process_samples(self, start_vars, sample_names, excluded_samples=None): if not excluded_samples: @@ -475,16 +495,18 @@ def do_bicor(this_trait_vals, target_trait_vals): r_library("WGCNA") r_bicor = ro.r["bicorAndPvalue"] # Map the bicorAndPvalue function - r_options(stringsAsFactors = False) + r_options(stringsAsFactors=False) this_vals = ro.Vector(this_trait_vals) target_vals = ro.Vector(target_trait_vals) - the_r, the_p, _fisher_transform, _the_t, _n_obs = [numpy.asarray(x) for x in r_bicor(x = this_vals, y = target_vals)] + the_r, the_p, _fisher_transform, _the_t, _n_obs = [ + numpy.asarray(x) for x in r_bicor(x=this_vals, y=target_vals)] return the_r, the_p -def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_api = False): + +def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_api=False): results_list = [] for i, trait in enumerate(corr_results): if trait.view == False: @@ -493,7 +515,8 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap results_dict['index'] = i + 1 results_dict['trait_id'] = trait.name results_dict['dataset'] = trait.dataset.name - results_dict['hmac'] = hmac.data_hmac('{}:{}'.format(trait.name, trait.dataset.name)) + results_dict['hmac'] = hmac.data_hmac( + '{}:{}'.format(trait.name, trait.dataset.name)) if target_dataset.type == "ProbeSet": results_dict['symbol'] = trait.symbol results_dict['description'] = "N/A" @@ -544,7 +567,8 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap if bool(trait.authors): authors_list = trait.authors.split(',') if len(authors_list) > 6: - results_dict['authors_display'] = ", ".join(authors_list[:6]) + ", et al." + results_dict['authors_display'] = ", ".join( + authors_list[:6]) + ", et al." else: results_dict['authors_display'] = trait.authors if bool(trait.pubmed_id): @@ -574,85 +598,85 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap return json.dumps(results_list) + def get_header_fields(data_type, corr_method): if data_type == "ProbeSet": if corr_method == "spearman": header_fields = ['Index', - 'Record', - 'Symbol', - 'Description', - 'Location', - 'Mean', - 'Sample rho', - 'N', - 'Sample p(rho)', - 'Lit rho', - 'Tissue rho', - 'Tissue p(rho)', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Symbol', + 'Description', + 'Location', + 'Mean', + 'Sample rho', + 'N', + 'Sample p(rho)', + 'Lit rho', + 'Tissue rho', + 'Tissue p(rho)', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] else: header_fields = ['Index', - 'Record', - 'Symbol', - 'Description', - 'Location', - 'Mean', - 'Sample r', - 'N', - 'Sample p(r)', - 'Lit r', - 'Tissue r', - 'Tissue p(r)', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Symbol', + 'Description', + 'Location', + 'Mean', + 'Sample r', + 'N', + 'Sample p(r)', + 'Lit r', + 'Tissue r', + 'Tissue p(r)', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] elif data_type == "Publish": if corr_method == "spearman": header_fields = ['Index', - 'Record', - 'Abbreviation', - 'Description', - 'Mean', - 'Authors', - 'Year', - 'Sample rho', - 'N', - 'Sample p(rho)', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Abbreviation', + 'Description', + 'Mean', + 'Authors', + 'Year', + 'Sample rho', + 'N', + 'Sample p(rho)', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] else: header_fields = ['Index', - 'Record', - 'Abbreviation', - 'Description', - 'Mean', - 'Authors', - 'Year', - 'Sample r', - 'N', - 'Sample p(r)', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Abbreviation', + 'Description', + 'Mean', + 'Authors', + 'Year', + 'Sample r', + 'N', + 'Sample p(r)', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] else: if corr_method == "spearman": header_fields = ['Index', - 'ID', - 'Location', - 'Sample rho', - 'N', - 'Sample p(rho)'] + 'ID', + 'Location', + 'Sample rho', + 'N', + 'Sample p(rho)'] else: header_fields = ['Index', - 'ID', - 'Location', - 'Sample r', - 'N', - 'Sample p(r)'] + 'ID', + 'Location', + 'Sample r', + 'N', + 'Sample p(r)'] return header_fields - diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index d0b4a156..3a54a218 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -44,7 +44,8 @@ THIRTY_DAYS = 60 * 60 * 24 * 30 class CorrelationMatrix: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) @@ -52,7 +53,8 @@ class CorrelationMatrix: self.traits = [] self.insufficient_shared_samples = False self.do_PCA = True - this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop + # ZS: Getting initial group name before verifying all traits are in the same group in the following loop + this_group = self.trait_list[0][1].group.name for trait_db in self.trait_list: this_group = trait_db[1].group.name this_trait = trait_db[0] @@ -76,10 +78,12 @@ class CorrelationMatrix: this_trait_vals.append('') self.sample_data.append(this_trait_vals) - if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples + # Shouldn't do PCA if there are more traits than observations/samples + if len(this_trait_vals) < len(self.trait_list): self.do_PCA = False - self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) + # ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) + self.lowest_overlap = 8 self.corr_results = [] self.pca_corr_results = [] @@ -93,7 +97,7 @@ class CorrelationMatrix: corr_result_row = [] pca_corr_result_row = [] - is_spearman = False #ZS: To determine if it's above or below the diagonal + is_spearman = False # ZS: To determine if it's above or below the diagonal for target in self.trait_list: target_trait = target[0] target_db = target[1] @@ -112,7 +116,8 @@ class CorrelationMatrix: if sample in self.shared_samples_list: self.shared_samples_list.remove(sample) - this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) + this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( + this_trait_vals, target_vals) if num_overlap < self.lowest_overlap: self.lowest_overlap = num_overlap @@ -120,21 +125,25 @@ class CorrelationMatrix: corr_result_row.append([target_trait, 0, num_overlap]) pca_corr_result_row.append(0) else: - pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) + pearson_r, pearson_p = scipy.stats.pearsonr( + this_trait_vals, target_vals) if is_spearman == False: sample_r, sample_p = pearson_r, pearson_p if sample_r == 1: is_spearman = True else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) + sample_r, sample_p = scipy.stats.spearmanr( + this_trait_vals, target_vals) - corr_result_row.append([target_trait, sample_r, num_overlap]) + corr_result_row.append( + [target_trait, sample_r, num_overlap]) pca_corr_result_row.append(pearson_r) self.corr_results.append(corr_result_row) self.pca_corr_results.append(pca_corr_result_row) - self.export_filename, self.export_filepath = export_corr_matrix(self.corr_results) + self.export_filename, self.export_filepath = export_corr_matrix( + self.corr_results) self.trait_data_array = [] for trait_db in self.trait_list: @@ -156,34 +165,37 @@ class CorrelationMatrix: try: corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) - corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen) + corr_eigen_value, corr_eigen_vectors = sortEigenVectors( + corr_result_eigen) if self.do_PCA == True: self.pca_works = "True" self.pca_trait_ids = [] - pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) + pca = self.calculate_pca( + list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) self.loadings_array = self.process_loadings() else: self.pca_works = "False" except: self.pca_works = "False" - self.js_data = dict(traits = [trait.name for trait in self.traits], - groups = groups, - cols = list(range(len(self.traits))), - rows = list(range(len(self.traits))), - samples = self.all_sample_list, - sample_data = self.sample_data,) + self.js_data = dict(traits=[trait.name for trait in self.traits], + groups=groups, + cols=list(range(len(self.traits))), + rows=list(range(len(self.traits))), + samples=self.all_sample_list, + sample_data=self.sample_data,) def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): base = importr('base') stats = importr('stats') - corr_results_to_list = robjects.FloatVector([item for sublist in self.pca_corr_results for item in sublist]) + corr_results_to_list = robjects.FloatVector( + [item for sublist in self.pca_corr_results for item in sublist]) m = robjects.r.matrix(corr_results_to_list, nrow=len(cols)) eigen = base.eigen(m) - pca = stats.princomp(m, cor = "TRUE") + pca = stats.princomp(m, cor="TRUE") self.loadings = pca.rx('loadings') self.scores = pca.rx('scores') self.scale = pca.rx('scale') @@ -193,15 +205,17 @@ class CorrelationMatrix: pca_traits = [] for i, vector in enumerate(trait_array_vectors): - #ZS: Check if below check is necessary - #if corr_eigen_value[i-1] > 100.0/len(self.trait_list): - pca_traits.append((vector*-1.0).tolist()) + # ZS: Check if below check is necessary + # if corr_eigen_value[i-1] > 100.0/len(self.trait_list): + pca_traits.append((vector * -1.0).tolist()) this_group_name = self.trait_list[0][1].group.name - temp_dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = this_group_name) + temp_dataset = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=this_group_name) temp_dataset.group.get_samplelist() for i, pca_trait in enumerate(pca_traits): - trait_id = "PCA" + str(i+1) + "_" + temp_dataset.group.species + "_" + this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") + trait_id = "PCA" + str(i + 1) + "_" + temp_dataset.group.species + "_" + \ + this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") this_vals_string = "" position = 0 for sample in temp_dataset.group.all_samples_ordered(): @@ -224,27 +238,34 @@ class CorrelationMatrix: for i in range(len(self.trait_list)): loadings_row = [] if len(self.trait_list) > 2: - the_range = 3 + the_range = 3 else: - the_range = 2 + the_range = 2 for j in range(the_range): - position = i + len(self.trait_list)*j + position = i + len(self.trait_list) * j loadings_row.append(self.loadings[0][position]) loadings_array.append(loadings_row) return loadings_array + def export_corr_matrix(corr_results): - corr_matrix_filename = "corr_matrix_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - matrix_export_path = "{}{}.csv".format(GENERATED_TEXT_DIR, corr_matrix_filename) + corr_matrix_filename = "corr_matrix_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + matrix_export_path = "{}{}.csv".format( + GENERATED_TEXT_DIR, corr_matrix_filename) with open(matrix_export_path, "w+") as output_file: - output_file.write("Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") + output_file.write( + "Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") output_file.write("\n") output_file.write("Correlation ") for i, item in enumerate(corr_results[0]): - output_file.write("Trait" + str(i + 1) + ": " + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") + output_file.write("Trait" + str(i + 1) + ": " + \ + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") output_file.write("\n") for i, row in enumerate(corr_results): - output_file.write("Trait" + str(i + 1) + ": " + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") + output_file.write("Trait" + str(i + 1) + ": " + \ + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") for item in row: output_file.write(str(item[1]) + "\t") output_file.write("\n") @@ -253,16 +274,19 @@ def export_corr_matrix(corr_results): output_file.write("\n") output_file.write("N ") for i, item in enumerate(corr_results[0]): - output_file.write("Trait" + str(i) + ": " + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") + output_file.write("Trait" + str(i) + ": " + \ + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") output_file.write("\n") for i, row in enumerate(corr_results): - output_file.write("Trait" + str(i) + ": " + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") + output_file.write("Trait" + str(i) + ": " + \ + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") for item in row: output_file.write(str(item[2]) + "\t") output_file.write("\n") return corr_matrix_filename, matrix_export_path + def zScore(trait_data_array): NN = len(trait_data_array[0]) if NN < 10: @@ -271,18 +295,19 @@ def zScore(trait_data_array): i = 0 for data in trait_data_array: N = len(data) - S = reduce(lambda x, y: x+y, data, 0.) - SS = reduce(lambda x, y: x+y*y, data, 0.) - mean = S/N - var = SS - S*S/N - stdev = math.sqrt(var/(N-1)) + S = reduce(lambda x, y: x + y, data, 0.) + SS = reduce(lambda x, y: x + y * y, data, 0.) + mean = S / N + var = SS - S * S / N + stdev = math.sqrt(var / (N - 1)) if stdev == 0: stdev = 1e-100 - data2 = [(x-mean)/stdev for x in data] + data2 = [(x - mean) / stdev for x in data] trait_data_array[i] = data2 i += 1 return trait_data_array + def sortEigenVectors(vector): try: eigenValues = vector[0].tolist() @@ -298,8 +323,8 @@ def sortEigenVectors(vector): for item in combines: A.append(item[0]) B.append(item[1]) - sum = reduce(lambda x, y: x+y, A, 0.0) - A = [x*100.0/sum for x in A] + sum = reduce(lambda x, y: x + y, A, 0.0) + A = [x * 100.0 / sum for x in A] return [A, B] except: return [] diff --git a/wqflask/wqflask/ctl/ctl_analysis.py b/wqflask/wqflask/ctl/ctl_analysis.py index 1556e370..bb928ec5 100644 --- a/wqflask/wqflask/ctl/ctl_analysis.py +++ b/wqflask/wqflask/ctl/ctl_analysis.py @@ -25,33 +25,39 @@ from utility.tools import locate, GN2_BRANCH_URL from rpy2.robjects.packages import importr import utility.logger -logger = utility.logger.getLogger(__name__ ) - -## Get pointers to some common R functions -r_library = ro.r["library"] # Map the library function -r_options = ro.r["options"] # Map the options function -r_t = ro.r["t"] # Map the t function -r_unlist = ro.r["unlist"] # Map the unlist function -r_list = ro.r.list # Map the list function -r_png = ro.r["png"] # Map the png function for plotting -r_dev_off = ro.r["dev.off"] # Map the dev.off function -r_write_table = ro.r["write.table"] # Map the write.table function -r_data_frame = ro.r["data.frame"] # Map the write.table function -r_as_numeric = ro.r["as.numeric"] # Map the write.table function +logger = utility.logger.getLogger(__name__) + +# Get pointers to some common R functions +r_library = ro.r["library"] # Map the library function +r_options = ro.r["options"] # Map the options function +r_t = ro.r["t"] # Map the t function +r_unlist = ro.r["unlist"] # Map the unlist function +r_list = ro.r.list # Map the list function +r_png = ro.r["png"] # Map the png function for plotting +r_dev_off = ro.r["dev.off"] # Map the dev.off function +r_write_table = ro.r["write.table"] # Map the write.table function +r_data_frame = ro.r["data.frame"] # Map the write.table function +r_as_numeric = ro.r["as.numeric"] # Map the write.table function + class CTL: def __init__(self): logger.info("Initialization of CTL") #log = r_file("/tmp/genenetwork_ctl.log", open = "wt") - #r_sink(log) # Uncomment the r_sink() commands to log output from stdout/stderr to a file + # r_sink(log) # Uncomment the r_sink() commands to log output from stdout/stderr to a file #r_sink(log, type = "message") - r_library("ctl") # Load CTL - Should only be done once, since it is quite expensive - r_options(stringsAsFactors = False) + # Load CTL - Should only be done once, since it is quite expensive + r_library("ctl") + r_options(stringsAsFactors=False) logger.info("Initialization of CTL done, package loaded in R session") - self.r_CTLscan = ro.r["CTLscan"] # Map the CTLscan function - self.r_CTLsignificant = ro.r["CTLsignificant"] # Map the CTLsignificant function - self.r_lineplot = ro.r["ctl.lineplot"] # Map the ctl.lineplot function - self.r_plotCTLobject = ro.r["plot.CTLobject"] # Map the CTLsignificant function + # Map the CTLscan function + self.r_CTLscan = ro.r["CTLscan"] + # Map the CTLsignificant function + self.r_CTLsignificant = ro.r["CTLsignificant"] + # Map the ctl.lineplot function + self.r_lineplot = ro.r["ctl.lineplot"] + # Map the CTLsignificant function + self.r_plotCTLobject = ro.r["plot.CTLobject"] self.nodes_list = [] self.edges_list = [] logger.info("Obtained pointers to CTL functions") @@ -59,28 +65,29 @@ class CTL: self.gn2_url = GN2_BRANCH_URL def addNode(self, gt): - node_dict = { 'data' : {'id' : str(gt.name) + ":" + str(gt.dataset.name), - 'sid' : str(gt.name), - 'dataset' : str(gt.dataset.name), - 'label' : gt.name, - 'symbol' : gt.symbol, - 'geneid' : gt.geneid, - 'omim' : gt.omim } } + node_dict = {'data': {'id': str(gt.name) + ":" + str(gt.dataset.name), + 'sid': str(gt.name), + 'dataset': str(gt.dataset.name), + 'label': gt.name, + 'symbol': gt.symbol, + 'geneid': gt.geneid, + 'omim': gt.omim}} self.nodes_list.append(node_dict) def addEdge(self, gtS, gtT, significant, x): - edge_data = {'id' : str(gtS.symbol) + '_' + significant[1][x] + '_' + str(gtT.symbol), - 'source' : str(gtS.name) + ":" + str(gtS.dataset.name), - 'target' : str(gtT.name) + ":" + str(gtT.dataset.name), - 'lod' : significant[3][x], - 'color' : "#ff0000", - 'width' : significant[3][x] } - edge_dict = { 'data' : edge_data } + edge_data = {'id': str(gtS.symbol) + '_' + significant[1][x] + '_' + str(gtT.symbol), + 'source': str(gtS.name) + ":" + str(gtS.dataset.name), + 'target': str(gtT.name) + ":" + str(gtT.dataset.name), + 'lod': significant[3][x], + 'color': "#ff0000", + 'width': significant[3][x]} + edge_dict = {'data': edge_data} self.edges_list.append(edge_dict) def run_analysis(self, requestform): logger.info("Starting CTL analysis on dataset") - self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] + self.trait_db_list = [trait.strip() + for trait in requestform['trait_list'].split(',')] self.trait_db_list = [x for x in self.trait_db_list if x] logger.debug("strategy:", requestform.get("strategy")) @@ -108,45 +115,49 @@ class CTL: markers = [] markernames = [] for marker in parser.markers: - markernames.append(marker["name"]) - markers.append(marker["genotypes"]) + markernames.append(marker["name"]) + markers.append(marker["genotypes"]) genotypes = list(itertools.chain(*markers)) - logger.debug(len(genotypes) / len(individuals), "==", len(parser.markers)) + logger.debug(len(genotypes) / len(individuals), + "==", len(parser.markers)) - rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True)) + rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len( + individuals), dimnames=r_list(markernames, individuals), byrow=True)) # Create a phenotype matrix traits = [] for trait in self.trait_db_list: - logger.debug("retrieving data for", trait) - if trait != "": - ts = trait.split(':') - gt = create_trait(name = ts[0], dataset_name = ts[1]) - gt = retrieve_sample_data(gt, dataset, individuals) - for ind in individuals: - if ind in list(gt.data.keys()): - traits.append(gt.data[ind].value) - else: - traits.append("-999") - - rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True)) + logger.debug("retrieving data for", trait) + if trait != "": + ts = trait.split(':') + gt = create_trait(name=ts[0], dataset_name=ts[1]) + gt = retrieve_sample_data(gt, dataset, individuals) + for ind in individuals: + if ind in list(gt.data.keys()): + traits.append(gt.data[ind].value) + else: + traits.append("-999") + + rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len( + individuals), dimnames=r_list(self.trait_db_list, individuals), byrow=True)) logger.debug(rPheno) # Use a data frame to store the objects - rPheno = r_data_frame(rPheno, check_names = False) - rGeno = r_data_frame(rGeno, check_names = False) + rPheno = r_data_frame(rPheno, check_names=False) + rGeno = r_data_frame(rGeno, check_names=False) # Debug: Print the genotype and phenotype files to disk #r_write_table(rGeno, "~/outputGN/geno.csv") #r_write_table(rPheno, "~/outputGN/pheno.csv") # Perform the CTL scan - res = self.r_CTLscan(rGeno, rPheno, strategy = strategy, nperm = nperm, parametric = parametric, nthreads=6) + res = self.r_CTLscan(rGeno, rPheno, strategy=strategy, + nperm=nperm, parametric=parametric, nthreads=6) # Get significant interactions - significant = self.r_CTLsignificant(res, significance = significance) + significant = self.r_CTLsignificant(res, significance=significance) # Create an image for output self.results = {} @@ -154,40 +165,54 @@ class CTL: self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1'] self.results['ctlresult'] = significant - self.results['requestform'] = requestform # Store the user specified parameters for the output page + # Store the user specified parameters for the output page + self.results['requestform'] = requestform # Create the lineplot - r_png(self.results['imgloc1'], width=1000, height=600, type='cairo-png') - self.r_lineplot(res, significance = significance) + r_png(self.results['imgloc1'], width=1000, + height=600, type='cairo-png') + self.r_lineplot(res, significance=significance) r_dev_off() - n = 2 # We start from 2, since R starts from 1 :) + # We start from 2, since R starts from 1 :) + n = 2 for trait in self.trait_db_list: - # Create the QTL like CTL plots - self.results['imgurl' + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png" - self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)] - r_png(self.results['imgloc' + str(n)], width=1000, height=600, type='cairo-png') - self.r_plotCTLobject(res, (n-1), significance = significance, main='Phenotype ' + trait) - r_dev_off() - n = n + 1 + # Create the QTL like CTL plots + self.results['imgurl' + \ + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png" + self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + \ + self.results['imgurl' + str(n)] + r_png(self.results['imgloc' + str(n)], + width=1000, height=600, type='cairo-png') + self.r_plotCTLobject( + res, (n - 1), significance=significance, main='Phenotype ' + trait) + r_dev_off() + n = n + 1 # Flush any output from R sys.stdout.flush() # Create the interactive graph for cytoscape visualization (Nodes and Edges) if not isinstance(significant, ri.RNULLType): - for x in range(len(significant[0])): - logger.debug(significant[0][x], significant[1][x], significant[2][x]) # Debug to console - tsS = significant[0][x].split(':') # Source - tsT = significant[2][x].split(':') # Target - gtS = create_trait(name = tsS[0], dataset_name = tsS[1]) # Retrieve Source info from the DB - gtT = create_trait(name = tsT[0], dataset_name = tsT[1]) # Retrieve Target info from the DB - self.addNode(gtS) - self.addNode(gtT) - self.addEdge(gtS, gtT, significant, x) - - significant[0][x] = "{} ({})".format(gtS.symbol, gtS.name) # Update the trait name for the displayed table - significant[2][x] = "{} ({})".format(gtT.symbol, gtT.name) # Update the trait name for the displayed table + for x in range(len(significant[0])): + logger.debug(significant[0][x], significant[1] + [x], significant[2][x]) # Debug to console + # Source + tsS = significant[0][x].split(':') + # Target + tsT = significant[2][x].split(':') + # Retrieve Source info from the DB + gtS = create_trait(name=tsS[0], dataset_name=tsS[1]) + # Retrieve Target info from the DB + gtT = create_trait(name=tsT[0], dataset_name=tsT[1]) + self.addNode(gtS) + self.addNode(gtT) + self.addEdge(gtS, gtT, significant, x) + + # Update the trait name for the displayed table + significant[0][x] = "{} ({})".format(gtS.symbol, gtS.name) + # Update the trait name for the displayed table + significant[2][x] = "{} ({})".format(gtT.symbol, gtT.name) self.elements = json.dumps(self.nodes_list + self.edges_list) @@ -202,8 +227,8 @@ class CTL: self.loadImage("imgloc1", "imgdata1") n = 2 for trait in self.trait_db_list: - self.loadImage("imgloc" + str(n), "imgdata" + str(n)) - n = n + 1 + self.loadImage("imgloc" + str(n), "imgdata" + str(n)) + n = n + 1 def process_results(self, results): logger.info("Processing CTL output") @@ -213,4 +238,3 @@ class CTL: self.render_image(self.results) sys.stdout.flush() return(dict(template_vars)) - diff --git a/wqflask/wqflask/database.py b/wqflask/wqflask/database.py index adeed6ad..e743c4b3 100644 --- a/wqflask/wqflask/database.py +++ b/wqflask/wqflask/database.py @@ -6,7 +6,7 @@ from sqlalchemy.ext.declarative import declarative_base from utility.tools import SQL_URI import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) engine = create_engine(SQL_URI, encoding="latin1") @@ -17,6 +17,7 @@ db_session = scoped_session(sessionmaker(autocommit=False, Base = declarative_base() Base.query = db_session.query_property() + def init_db(): # import all modules here that might define models so that # they will be registered properly on the metadata. Otherwise @@ -27,4 +28,5 @@ def init_db(): Base.metadata.create_all(bind=engine) logger.info("Done creating all model metadata") + init_db() diff --git a/wqflask/wqflask/db_info.py b/wqflask/wqflask/db_info.py index 25e624ef..938c453e 100644 --- a/wqflask/wqflask/db_info.py +++ b/wqflask/wqflask/db_info.py @@ -23,23 +23,23 @@ class InfoPage: def get_info(self, create=False): query_base = ("SELECT InfoPageName, GN_AccesionId, Species.MenuName, Species.TaxonomyId, Tissue.Name, InbredSet.Name, " + - "GeneChip.GeneChipName, GeneChip.GeoPlatform, AvgMethod.Name, Datasets.DatasetName, Datasets.GeoSeries, " + - "Datasets.PublicationTitle, DatasetStatus.DatasetStatusName, Datasets.Summary, Datasets.AboutCases, " + - "Datasets.AboutTissue, Datasets.AboutDataProcessing, Datasets.Acknowledgment, Datasets.ExperimentDesign, " + - "Datasets.Contributors, Datasets.Citation, Datasets.Notes, Investigators.FirstName, Investigators.LastName, " + - "Investigators.Address, Investigators.City, Investigators.State, Investigators.ZipCode, Investigators.Country, " + - "Investigators.Phone, Investigators.Email, Investigators.Url, Organizations.OrganizationName, " + - "InvestigatorId, DatasetId, DatasetStatusId, Datasets.AboutPlatform, InfoFileTitle, Specifics " + - "FROM InfoFiles " + - "LEFT JOIN Species USING (SpeciesId) " + - "LEFT JOIN Tissue USING (TissueId) " + - "LEFT JOIN InbredSet USING (InbredSetId) " + - "LEFT JOIN GeneChip USING (GeneChipId) " + - "LEFT JOIN AvgMethod USING (AvgMethodId) " + - "LEFT JOIN Datasets USING (DatasetId) " + - "LEFT JOIN Investigators USING (InvestigatorId) " + - "LEFT JOIN Organizations USING (OrganizationId) " + - "LEFT JOIN DatasetStatus USING (DatasetStatusId) WHERE ") + "GeneChip.GeneChipName, GeneChip.GeoPlatform, AvgMethod.Name, Datasets.DatasetName, Datasets.GeoSeries, " + + "Datasets.PublicationTitle, DatasetStatus.DatasetStatusName, Datasets.Summary, Datasets.AboutCases, " + + "Datasets.AboutTissue, Datasets.AboutDataProcessing, Datasets.Acknowledgment, Datasets.ExperimentDesign, " + + "Datasets.Contributors, Datasets.Citation, Datasets.Notes, Investigators.FirstName, Investigators.LastName, " + + "Investigators.Address, Investigators.City, Investigators.State, Investigators.ZipCode, Investigators.Country, " + + "Investigators.Phone, Investigators.Email, Investigators.Url, Organizations.OrganizationName, " + + "InvestigatorId, DatasetId, DatasetStatusId, Datasets.AboutPlatform, InfoFileTitle, Specifics " + + "FROM InfoFiles " + + "LEFT JOIN Species USING (SpeciesId) " + + "LEFT JOIN Tissue USING (TissueId) " + + "LEFT JOIN InbredSet USING (InbredSetId) " + + "LEFT JOIN GeneChip USING (GeneChipId) " + + "LEFT JOIN AvgMethod USING (AvgMethodId) " + + "LEFT JOIN Datasets USING (DatasetId) " + + "LEFT JOIN Investigators USING (InvestigatorId) " + + "LEFT JOIN Organizations USING (OrganizationId) " + + "LEFT JOIN DatasetStatus USING (DatasetStatusId) WHERE ") if self.gn_accession_id: final_query = query_base + \ @@ -90,6 +90,7 @@ class InfoPage: except Exception as e: pass + def process_query_results(results): info_ob = { 'info_page_name': results[0], @@ -134,5 +135,3 @@ def process_query_results(results): } return info_ob - - diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index 364a3eed..761ae326 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -26,14 +26,16 @@ class DoSearch: def __init__(self, search_term, search_operator=None, dataset=None, search_type=None): self.search_term = search_term # Make sure search_operator is something we expect - assert search_operator in (None, "=", "<", ">", "<=", ">="), "Bad search operator" + assert search_operator in ( + None, "=", "<", ">", "<=", ">="), "Bad search operator" self.search_operator = search_operator self.dataset = dataset self.search_type = search_type if self.dataset: - #Get group information for dataset and the species id - self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group.name) + # Get group information for dataset and the species id + self.species_id = webqtlDatabaseFunction.retrieve_species_id( + self.dataset.group.name) def execute(self, query): """Executes query and returns results""" @@ -73,6 +75,7 @@ class DoSearch: else: return None + class MrnaAssaySearch(DoSearch): """A search within an expression dataset, including mRNA, protein, SNP, but not phenotype or metabolites""" @@ -103,12 +106,13 @@ class MrnaAssaySearch(DoSearch): search_string = escape(self.search_term[0]) if self.search_term[0] != "*": - match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % (search_string) + match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % ( + search_string) else: match_clause = "" - where_clause = (match_clause + - """ProbeSet.Id = ProbeSetXRef.ProbeSetId + where_clause = (match_clause + + """ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s """ % (escape(str(self.dataset.id)))) @@ -130,30 +134,30 @@ class MrnaAssaySearch(DoSearch): else: match_clause = "" - where_clause = (match_clause + - """ProbeSet.Id = ProbeSetXRef.ProbeSetId + where_clause = (match_clause + + """ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s """ % (escape(str(self.dataset.id)))) return where_clause - def compile_final_query(self, from_clause = '', where_clause = ''): + def compile_final_query(self, from_clause='', where_clause=''): """Generates the final query string""" from_clause = self.normalize_spaces(from_clause) query = (self.base_query + - """%s + """%s WHERE %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s ORDER BY ProbeSet.symbol ASC """ % (escape(from_clause), - where_clause, - escape(str(self.dataset.id)))) + where_clause, + escape(str(self.dataset.id)))) return query - def run_combined(self, from_clause = '', where_clause = ''): + def run_combined(self, from_clause='', where_clause=''): """Generates and runs a combined search of an mRNA expression dataset""" logger.debug("Running ProbeSetSearch") @@ -162,14 +166,14 @@ class MrnaAssaySearch(DoSearch): from_clause = self.normalize_spaces(from_clause) query = (self.base_query + - """%s + """%s WHERE %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s ORDER BY ProbeSet.symbol ASC """ % (escape(from_clause), - where_clause, - escape(str(self.dataset.id)))) + where_clause, + escape(str(self.dataset.id)))) return self.execute(query) @@ -195,15 +199,15 @@ class PhenotypeSearch(DoSearch): FROM Phenotype, PublishFreeze, Publication, PublishXRef """ search_fields = ('Phenotype.Post_publication_description', - 'Phenotype.Pre_publication_description', - 'Phenotype.Pre_publication_abbreviation', - 'Phenotype.Post_publication_abbreviation', - 'Phenotype.Lab_code', - 'Publication.PubMed_ID', - 'Publication.Abstract', - 'Publication.Title', - 'Publication.Authors', - 'PublishXRef.Id') + 'Phenotype.Pre_publication_description', + 'Phenotype.Pre_publication_abbreviation', + 'Phenotype.Post_publication_abbreviation', + 'Phenotype.Lab_code', + 'Publication.PubMed_ID', + 'Publication.Abstract', + 'Publication.Title', + 'Publication.Authors', + 'PublishXRef.Id') header_fields = ['Index', 'Record', @@ -218,53 +222,56 @@ class PhenotypeSearch(DoSearch): def get_where_clause(self): """Generate clause for WHERE portion of query""" - #Todo: Zach will figure out exactly what both these lines mean - #and comment here + # Todo: Zach will figure out exactly what both these lines mean + # and comment here - #if "'" not in self.search_term[0]: - search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0]) + "[[:>:]]" + # if "'" not in self.search_term[0]: + search_term = "[[:<:]]" + \ + self.handle_wildcard(self.search_term[0]) + "[[:>:]]" if "_" in self.search_term[0]: if len(self.search_term[0].split("_")[0]) == 3: - search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0].split("_")[1]) + "[[:>:]]" + search_term = "[[:<:]]" + self.handle_wildcard( + self.search_term[0].split("_")[1]) + "[[:>:]]" # This adds a clause to the query that matches the search term # against each field in the search_fields tuple where_clause_list = [] for field in self.search_fields: - where_clause_list.append('''%s REGEXP "%s"''' % (field, search_term)) + where_clause_list.append('''%s REGEXP "%s"''' % + (field, search_term)) where_clause = "(%s) " % ' OR '.join(where_clause_list) return where_clause - def compile_final_query(self, from_clause = '', where_clause = ''): + def compile_final_query(self, from_clause='', where_clause=''): """Generates the final query string""" from_clause = self.normalize_spaces(from_clause) if self.search_term[0] == "*": query = (self.base_query + - """%s + """%s WHERE PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s ORDER BY PublishXRef.Id""" % ( - from_clause, - escape(str(self.dataset.group.id)), - escape(str(self.dataset.id)))) + from_clause, + escape(str(self.dataset.group.id)), + escape(str(self.dataset.id)))) else: query = (self.base_query + - """%s + """%s WHERE %s and PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s ORDER BY PublishXRef.Id""" % ( - from_clause, - where_clause, - escape(str(self.dataset.group.id)), - escape(str(self.dataset.id)))) + from_clause, + where_clause, + escape(str(self.dataset.group.id)), + escape(str(self.dataset.id)))) return query @@ -276,26 +283,27 @@ class PhenotypeSearch(DoSearch): from_clause = self.normalize_spaces(from_clause) query = (self.base_query + - """%s + """%s WHERE %s PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s""" % ( - from_clause, - where_clause, - escape(str(self.dataset.group.id)), - escape(str(self.dataset.id)))) + from_clause, + where_clause, + escape(str(self.dataset.group.id)), + escape(str(self.dataset.id)))) return self.execute(query) def run(self): """Generates and runs a simple search of a phenotype dataset""" - query = self.compile_final_query(where_clause = self.get_where_clause()) + query = self.compile_final_query(where_clause=self.get_where_clause()) return self.execute(query) + class GenotypeSearch(DoSearch): """A search within a genotype dataset""" @@ -328,45 +336,46 @@ class GenotypeSearch(DoSearch): for field in self.search_fields: where_clause.append('''%s REGEXP "%s"''' % ("%s.%s" % self.mescape(self.dataset.type, field), - self.search_term)) + self.search_term)) logger.debug("hello ;where_clause is:", pf(where_clause)) where_clause = "(%s) " % ' OR '.join(where_clause) return where_clause - def compile_final_query(self, from_clause = '', where_clause = ''): + def compile_final_query(self, from_clause='', where_clause=''): """Generates the final query string""" from_clause = self.normalize_spaces(from_clause) - if self.search_term[0] == "*": - query = (self.base_query + - """WHERE Geno.Id = GenoXRef.GenoId + query = (self.base_query + + """WHERE Geno.Id = GenoXRef.GenoId and GenoXRef.GenoFreezeId = GenoFreeze.Id - and GenoFreeze.Id = %s"""% (escape(str(self.dataset.id)))) + and GenoFreeze.Id = %s""" % (escape(str(self.dataset.id)))) else: query = (self.base_query + - """WHERE %s + """WHERE %s and Geno.Id = GenoXRef.GenoId and GenoXRef.GenoFreezeId = GenoFreeze.Id - and GenoFreeze.Id = %s"""% (where_clause, - escape(str(self.dataset.id)))) + and GenoFreeze.Id = %s""" % (where_clause, + escape(str(self.dataset.id)))) return query def run(self): """Generates and runs a simple search of a genotype dataset""" - #Todo: Zach will figure out exactly what both these lines mean - #and comment here + # Todo: Zach will figure out exactly what both these lines mean + # and comment here if self.search_term[0] == "*": self.query = self.compile_final_query() else: - self.query = self.compile_final_query(where_clause = self.get_where_clause()) + self.query = self.compile_final_query( + where_clause=self.get_where_clause()) return self.execute(self.query) + class RifSearch(MrnaAssaySearch): """Searches for traits with a Gene RIF entry including the search term.""" @@ -390,10 +399,11 @@ class RifSearch(MrnaAssaySearch): return self.execute(query) + class WikiSearch(MrnaAssaySearch): """Searches GeneWiki for traits other people have annotated""" - DoSearch.search_types['ProbeSet_WIKI'] = "WikiSearch" + DoSearch.search_types['ProbeSet_WIKI'] = "WikiSearch" def get_from_clause(self): return ", GeneRIF " @@ -403,7 +413,7 @@ class WikiSearch(MrnaAssaySearch): and GeneRIF.versionId=0 and GeneRIF.display>0 and (GeneRIF.comment REGEXP '%s' or GeneRIF.initial = '%s') """ % (self.dataset.type, - "[[:<:]]"+str(self.search_term[0])+"[[:>:]]", + "[[:<:]]" + str(self.search_term[0]) + "[[:>:]]", str(self.search_term[0])) return where_clause @@ -415,10 +425,11 @@ class WikiSearch(MrnaAssaySearch): return self.execute(query) + class GoSearch(MrnaAssaySearch): """Searches for synapse-associated genes listed in the Gene Ontology.""" - DoSearch.search_types['ProbeSet_GO'] = "GoSearch" + DoSearch.search_types['ProbeSet_GO'] = "GoSearch" def get_from_clause(self): from_clause = """, db_GeneOntology.term as GOterm, @@ -429,7 +440,7 @@ class GoSearch(MrnaAssaySearch): def get_where_clause(self): field = 'GOterm.acc' - go_id = 'GO:' + ('0000000'+self.search_term[0])[-7:] + go_id = 'GO:' + ('0000000' + self.search_term[0])[-7:] statements = ("""%s.symbol=GOgene_product.symbol and GOassociation.gene_product_id=GOgene_product.id and @@ -448,7 +459,9 @@ class GoSearch(MrnaAssaySearch): return self.execute(query) -#ZS: Not sure what the best way to deal with LRS searches is +# ZS: Not sure what the best way to deal with LRS searches is + + class LrsSearch(DoSearch): """Searches for genes with a QTL within the given LRS values @@ -486,17 +499,18 @@ class LrsSearch(DoSearch): assert isinstance(self.search_term, (list, tuple)) lrs_min, lrs_max = self.search_term[:2] if self.search_type == "LOD": - lrs_min = lrs_min*4.61 - lrs_max = lrs_max*4.61 + lrs_min = lrs_min * 4.61 + lrs_max = lrs_max * 4.61 where_clause = """ %sXRef.LRS > %s and %sXRef.LRS < %s """ % self.mescape(self.dataset.type, - min(lrs_min, lrs_max), + min(lrs_min, + lrs_max), self.dataset.type, max(lrs_min, lrs_max)) if len(self.search_term) > 2: - #If the user typed, for example "Chr4", the "Chr" substring needs to be removed so that all search elements can be converted to floats + # If the user typed, for example "Chr4", the "Chr" substring needs to be removed so that all search elements can be converted to floats chr_num = self.search_term[2] if "chr" in self.search_term[2].lower(): chr_num = self.search_term[2].lower().replace("chr", "") @@ -512,27 +526,27 @@ class LrsSearch(DoSearch): where_clause += """ and %sXRef.Locus = Geno.name and Geno.SpeciesId = %s """ % self.mescape(self.dataset.type, - self.species_id) + self.species_id) else: # Deal with >, <, >=, and <= logger.debug("self.search_term is:", self.search_term) lrs_val = self.search_term[0] if self.search_type == "LOD": - lrs_val = lrs_val*4.61 + lrs_val = lrs_val * 4.61 where_clause = """ %sXRef.LRS %s %s """ % self.mescape(self.dataset.type, - self.search_operator, - self.search_term[0]) + self.search_operator, + self.search_term[0]) return where_clause - def run(self): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(self.from_clause, self.where_clause) + self.query = self.compile_final_query( + self.from_clause, self.where_clause) return self.execute(self.query) @@ -546,10 +560,12 @@ class MrnaLrsSearch(LrsSearch, MrnaAssaySearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(from_clause = self.from_clause, where_clause = self.where_clause) + self.query = self.compile_final_query( + from_clause=self.from_clause, where_clause=self.where_clause) return self.execute(self.query) + class PhenotypeLrsSearch(LrsSearch, PhenotypeSearch): for search_key in ('LRS', 'LOD'): @@ -560,7 +576,8 @@ class PhenotypeLrsSearch(LrsSearch, PhenotypeSearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(from_clause = self.from_clause, where_clause = self.where_clause) + self.query = self.compile_final_query( + from_clause=self.from_clause, where_clause=self.where_clause) return self.execute(self.query) @@ -587,7 +604,8 @@ class CisTransLrsSearch(DoSearch): elif len(self.search_term) == 3: lrs_min, lrs_max, self.mb_buffer = self.search_term elif len(self.search_term) == 4: - lrs_min, lrs_max, self.mb_buffer = [float(value) for value in self.search_term[:3]] + lrs_min, lrs_max, self.mb_buffer = [ + float(value) for value in self.search_term[:3]] chromosome = self.search_term[3] if "Chr" in chromosome or "chr" in chromosome: chromosome = int(chromosome[3:]) @@ -599,19 +617,19 @@ class CisTransLrsSearch(DoSearch): lrs_max = lrs_max * 4.61 sub_clause = """ %sXRef.LRS > %s and - %sXRef.LRS < %s and """ % ( - escape(self.dataset.type), - escape(str(min(lrs_min, lrs_max))), - escape(self.dataset.type), - escape(str(max(lrs_min, lrs_max))) - ) + %sXRef.LRS < %s and """ % ( + escape(self.dataset.type), + escape(str(min(lrs_min, lrs_max))), + escape(self.dataset.type), + escape(str(max(lrs_min, lrs_max))) + ) else: # Deal with >, <, >=, and <= - sub_clause = """ %sXRef.LRS %s %s and """ % ( - escape(self.dataset.type), - escape(self.search_operator), - escape(self.search_term[0]) - ) + sub_clause = """ %sXRef.LRS %s %s and """ % ( + escape(self.dataset.type), + escape(self.search_operator), + escape(self.search_term[0]) + ) if cis_trans == "cis": where_clause = sub_clause + """ @@ -619,36 +637,42 @@ class CisTransLrsSearch(DoSearch): %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and %s.Chr = Geno.Chr""" % ( - escape(self.dataset.type), - the_operator, - escape(str(self.mb_buffer)), - escape(self.dataset.type), - escape(str(self.species_id)), - escape(self.dataset.type) - ) + escape(self.dataset.type), + the_operator, + escape(str(self.mb_buffer)), + escape(self.dataset.type), + escape(str(self.species_id)), + escape(self.dataset.type) + ) else: if chromosome: location_clause = "(%s.Chr = '%s' and %s.Chr = Geno.Chr and ABS(%s.Mb-Geno.Mb) %s %s) or (%s.Chr != Geno.Chr and Geno.Chr = '%s')" % (escape(self.dataset.type), - chromosome, - escape(self.dataset.type), - escape(self.dataset.type), - the_operator, - escape(str(self.mb_buffer)), - escape(self.dataset.type), - chromosome) + chromosome, + escape( + self.dataset.type), + escape( + self.dataset.type), + the_operator, + escape( + str(self.mb_buffer)), + escape( + self.dataset.type), + chromosome) else: - location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape(self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type), escape(self.dataset.type)) + location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape( + self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type), escape(self.dataset.type)) where_clause = sub_clause + """ %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and (%s)""" % ( - escape(self.dataset.type), - escape(str(self.species_id)), - location_clause - ) + escape(self.dataset.type), + escape(str(self.species_id)), + location_clause + ) return where_clause + class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """ Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values @@ -667,7 +691,7 @@ class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """ for search_key in ('LRS', 'LOD'): - DoSearch.search_types['ProbeSet_CIS'+search_key] = "CisLrsSearch" + DoSearch.search_types['ProbeSet_CIS' + search_key] = "CisLrsSearch" def get_where_clause(self): return CisTransLrsSearch.get_where_clause(self, "cis") @@ -676,10 +700,12 @@ class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(self.from_clause, self.where_clause) + self.query = self.compile_final_query( + self.from_clause, self.where_clause) return self.execute(self.query) + class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values @@ -697,7 +723,7 @@ class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """ for search_key in ('LRS', 'LOD'): - DoSearch.search_types['ProbeSet_TRANS'+search_key] = "TransLrsSearch" + DoSearch.search_types['ProbeSet_TRANS' + search_key] = "TransLrsSearch" def get_where_clause(self): return CisTransLrsSearch.get_where_clause(self, "trans") @@ -706,7 +732,8 @@ class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(self.from_clause, self.where_clause) + self.query = self.compile_final_query( + self.from_clause, self.where_clause) return self.execute(self.query) @@ -725,14 +752,15 @@ class MeanSearch(MrnaAssaySearch): where_clause = """ %sXRef.mean > %s and %sXRef.mean < %s """ % self.mescape(self.dataset.type, - min(self.mean_min, self.mean_max), - self.dataset.type, - max(self.mean_min, self.mean_max)) + min(self.mean_min, + self.mean_max), + self.dataset.type, + max(self.mean_min, self.mean_max)) else: # Deal with >, <, >=, and <= where_clause = """ %sXRef.mean %s %s """ % self.mescape(self.dataset.type, - self.search_operator, - self.search_term[0]) + self.search_operator, + self.search_term[0]) return where_clause @@ -740,10 +768,11 @@ class MeanSearch(MrnaAssaySearch): self.where_clause = self.get_where_clause() logger.debug("where_clause is:", pf(self.where_clause)) - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class RangeSearch(MrnaAssaySearch): """Searches for genes with a range of expression varying between two values""" @@ -775,10 +804,11 @@ class RangeSearch(MrnaAssaySearch): def run(self): self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class PositionSearch(DoSearch): """Searches for genes/markers located within a specified range on a specified chromosome""" @@ -786,7 +816,8 @@ class PositionSearch(DoSearch): DoSearch.search_types[search_key] = "PositionSearch" def get_where_clause(self): - self.search_term = [float(value) if is_number(value) else value for value in self.search_term] + self.search_term = [float(value) if is_number( + value) else value for value in self.search_term] chr, self.mb_min, self.mb_max = self.search_term[:3] self.chr = str(chr).lower() self.get_chr() @@ -796,11 +827,11 @@ class PositionSearch(DoSearch): %s.Mb < %s """ % self.mescape(self.dataset.type, self.chr, self.dataset.type, - min(self.mb_min, self.mb_max), + min(self.mb_min, + self.mb_max), self.dataset.type, max(self.mb_min, self.mb_max)) - return where_clause def get_chr(self): @@ -815,36 +846,39 @@ class PositionSearch(DoSearch): def run(self): self.get_where_clause() - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class MrnaPositionSearch(PositionSearch, MrnaAssaySearch): """Searches for genes located within a specified range on a specified chromosome""" for search_key in ('POSITION', 'POS', 'MB'): - DoSearch.search_types['ProbeSet_'+search_key] = "MrnaPositionSearch" + DoSearch.search_types['ProbeSet_' + search_key] = "MrnaPositionSearch" def run(self): self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class GenotypePositionSearch(PositionSearch, GenotypeSearch): """Searches for genes located within a specified range on a specified chromosome""" for search_key in ('POSITION', 'POS', 'MB'): - DoSearch.search_types['Geno_'+search_key] = "GenotypePositionSearch" + DoSearch.search_types['Geno_' + search_key] = "GenotypePositionSearch" def run(self): self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class PvalueSearch(MrnaAssaySearch): """Searches for traits with a permutationed p-value between low and high""" @@ -859,25 +893,26 @@ class PvalueSearch(MrnaAssaySearch): self.pvalue_min, self.pvalue_max = self.search_term[:2] self.where_clause = """ %sXRef.pValue > %s and %sXRef.pValue < %s """ % self.mescape( - self.dataset.type, - min(self.pvalue_min, self.pvalue_max), - self.dataset.type, - max(self.pvalue_min, self.pvalue_max)) + self.dataset.type, + min(self.pvalue_min, self.pvalue_max), + self.dataset.type, + max(self.pvalue_min, self.pvalue_max)) else: # Deal with >, <, >=, and <= self.where_clause = """ %sXRef.pValue %s %s """ % self.mescape( - self.dataset.type, - self.search_operator, - self.search_term[0]) + self.dataset.type, + self.search_operator, + self.search_term[0]) logger.debug("where_clause is:", pf(self.where_clause)) - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) logger.sql(self.query) return self.execute(self.query) + class AuthorSearch(PhenotypeSearch): """Searches for phenotype traits with specified author(s)""" @@ -888,7 +923,7 @@ class AuthorSearch(PhenotypeSearch): self.where_clause = """ Publication.Authors REGEXP "[[:<:]]%s[[:>:]]" and """ % (self.search_term[0]) - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) @@ -900,6 +935,7 @@ def is_number(s): except ValueError: return False + def get_aliases(symbol, species): if species == "mouse": symbol_string = symbol.capitalize() @@ -909,7 +945,8 @@ def get_aliases(symbol, species): return [] filtered_aliases = [] - response = requests.get(GN2_BASE_URL + "/gn3/gene/aliases/" + symbol_string) + response = requests.get( + GN2_BASE_URL + "/gn3/gene/aliases/" + symbol_string) if response: alias_list = json.loads(response.content) @@ -923,9 +960,10 @@ def get_aliases(symbol, species): return filtered_aliases + if __name__ == "__main__": - ### Usually this will be used as a library, but call it from the command line for testing - ### And it runs the code below + # Usually this will be used as a library, but call it from the command line for testing + # And it runs the code below import MySQLdb import sys diff --git a/wqflask/wqflask/docs.py b/wqflask/wqflask/docs.py index 207767c4..0a1a597d 100644 --- a/wqflask/wqflask/docs.py +++ b/wqflask/wqflask/docs.py @@ -5,6 +5,7 @@ from flask import g from utility.logger import getLogger logger = getLogger(__name__) + class Docs: def __init__(self, entry, start_vars={}): @@ -19,11 +20,10 @@ class Docs: self.title = self.entry.capitalize() self.content = "" else: - + self.title = result[0] self.content = result[1].decode("utf-8") - self.editable = "false" # ZS: Removing option to edit to see if text still gets vandalized try: @@ -35,11 +35,13 @@ class Docs: def update_text(start_vars): content = start_vars['ckcontent'] - content = content.replace('%', '%%').replace('"', '\\"').replace("'", "\\'") + content = content.replace('%', '%%').replace( + '"', '\\"').replace("'", "\\'") try: if g.user_session.record['user_email_address'] == "zachary.a.sloan@gmail.com" or g.user_session.record['user_email_address'] == "labwilliams@gmail.com": - sql = "UPDATE Docs SET content='{0}' WHERE entry='{1}';".format(content, start_vars['entry_type']) + sql = "UPDATE Docs SET content='{0}' WHERE entry='{1}';".format( + content, start_vars['entry_type']) g.db.execute(sql) except: pass diff --git a/wqflask/wqflask/export_traits.py b/wqflask/wqflask/export_traits.py index 6fb760e0..a22d6acc 100644 --- a/wqflask/wqflask/export_traits.py +++ b/wqflask/wqflask/export_traits.py @@ -1,6 +1,6 @@ import csv import xlsxwriter -import io +import io import datetime import itertools @@ -13,13 +13,14 @@ from base.trait import create_trait, retrieve_trait_info from pprint import pformat as pf from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) + def export_search_results_csv(targs): table_data = json.loads(targs['export_data']) table_rows = table_data['rows'] - + now = datetime.datetime.now() time_str = now.strftime('%H:%M_%d%B%Y') if 'file_name' in targs: @@ -34,9 +35,12 @@ def export_search_results_csv(targs): metadata.append(["Data Set: " + targs['database_name']]) if 'accession_id' in targs: if targs['accession_id'] != "None": - metadata.append(["Metadata Link: http://genenetwork.org/webqtl/main.py?FormID=sharinginfo&GN_AccessionId=" + targs['accession_id']]) - metadata.append(["Export Date: " + datetime.datetime.now().strftime("%B %d, %Y")]) - metadata.append(["Export Time: " + datetime.datetime.now().strftime("%H:%M GMT")]) + metadata.append( + ["Metadata Link: http://genenetwork.org/webqtl/main.py?FormID=sharinginfo&GN_AccessionId=" + targs['accession_id']]) + metadata.append( + ["Export Date: " + datetime.datetime.now().strftime("%B %d, %Y")]) + metadata.append( + ["Export Time: " + datetime.datetime.now().strftime("%H:%M GMT")]) if 'search_string' in targs: if targs['search_string'] != "None": metadata.append(["Search Query: " + targs['search_string']]) @@ -51,10 +55,12 @@ def export_search_results_csv(targs): for trait in table_rows: trait_name, dataset_name, _hash = trait.split(":") trait_ob = create_trait(name=trait_name, dataset_name=dataset_name) - trait_ob = retrieve_trait_info(trait_ob, trait_ob.dataset, get_qtl_info=True) + trait_ob = retrieve_trait_info( + trait_ob, trait_ob.dataset, get_qtl_info=True) trait_list.append(trait_ob) - table_headers = ['Index', 'URL', 'Species', 'Group', 'Dataset', 'Record ID', 'Symbol', 'Description', 'ProbeTarget', 'PubMed_ID', 'Chr', 'Mb', 'Alias', 'Gene_ID', 'Homologene_ID', 'UniGene_ID', 'Strand_Probe', 'Probe_set_specificity', 'Probe_set_BLAT_score', 'Probe_set_BLAT_Mb_start', 'Probe_set_BLAT_Mb_end', 'QTL_Chr', 'QTL_Mb', 'Locus_at_Peak', 'Max_LRS', 'P_value_of_MAX', 'Mean_Expression'] + table_headers = ['Index', 'URL', 'Species', 'Group', 'Dataset', 'Record ID', 'Symbol', 'Description', 'ProbeTarget', 'PubMed_ID', 'Chr', 'Mb', 'Alias', 'Gene_ID', 'Homologene_ID', 'UniGene_ID', + 'Strand_Probe', 'Probe_set_specificity', 'Probe_set_BLAT_score', 'Probe_set_BLAT_Mb_start', 'Probe_set_BLAT_Mb_end', 'QTL_Chr', 'QTL_Mb', 'Locus_at_Peak', 'Max_LRS', 'P_value_of_MAX', 'Mean_Expression'] traits_by_group = sort_traits_by_group(trait_list) @@ -86,7 +92,8 @@ def export_search_results_csv(targs): trait_symbol = "N/A" row_contents = [ i + 1, - "https://genenetwork.org/show_trait?trait_id=" + str(trait.name) + "&dataset=" + str(trait.dataset.name), + "https://genenetwork.org/show_trait?trait_id=" + \ + str(trait.name) + "&dataset=" + str(trait.dataset.name), trait.dataset.group.species, trait.dataset.group.name, trait.dataset.name, @@ -116,13 +123,15 @@ def export_search_results_csv(targs): for sample in trait.dataset.group.samplelist: if sample in trait.data: - row_contents += [trait.data[sample].value, trait.data[sample].variance] + row_contents += [trait.data[sample].value, + trait.data[sample].variance] else: row_contents += ["x", "x"] csv_rows.append(row_contents) - csv_rows = list(map(list, itertools.zip_longest(*[row for row in csv_rows]))) + csv_rows = list( + map(list, itertools.zip_longest(*[row for row in csv_rows]))) writer.writerows(csv_rows) csv_data = buff.getvalue() buff.close() @@ -132,6 +141,7 @@ def export_search_results_csv(targs): return file_list + def sort_traits_by_group(trait_list=[]): traits_by_group = {} for trait in trait_list: diff --git a/wqflask/wqflask/external_tools/send_to_bnw.py b/wqflask/wqflask/external_tools/send_to_bnw.py index c5c79e98..c1b14ede 100644 --- a/wqflask/wqflask/external_tools/send_to_bnw.py +++ b/wqflask/wqflask/external_tools/send_to_bnw.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -22,11 +22,13 @@ from base.trait import GeneralTrait from utility import helper_functions, corr_result_helpers import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + class SendToBNW: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) trait_samples_list = [] @@ -38,9 +40,10 @@ class SendToBNW: trait1_samples = list(this_sample_data.keys()) trait_samples_list.append(trait1_samples) - shared_samples = list(set(trait_samples_list[0]).intersection(*trait_samples_list)) + shared_samples = list( + set(trait_samples_list[0]).intersection(*trait_samples_list)) - self.form_value = "" #ZS: string that is passed to BNW through form + self.form_value = "" # ZS: string that is passed to BNW through form values_list = [] for trait_db in self.trait_list: this_trait = trait_db[0] diff --git a/wqflask/wqflask/external_tools/send_to_geneweaver.py b/wqflask/wqflask/external_tools/send_to_geneweaver.py index 47e4c53a..9a4f7150 100644 --- a/wqflask/wqflask/external_tools/send_to_geneweaver.py +++ b/wqflask/wqflask/external_tools/send_to_geneweaver.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -27,11 +27,13 @@ from base.species import TheSpecies from utility import helper_functions, corr_result_helpers import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + class SendToGeneWeaver: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.chip_name = test_chip(self.trait_list) @@ -52,11 +54,12 @@ class SendToGeneWeaver: trait_name_list = get_trait_name_list(self.trait_list) self.hidden_vars = { - 'client': "genenetwork", - 'species': species_name, - 'idtype': self.chip_name, - 'list': ",".join(trait_name_list), - } + 'client': "genenetwork", + 'species': species_name, + 'idtype': self.chip_name, + 'list': ",".join(trait_name_list), + } + def get_trait_name_list(trait_list): name_list = [] @@ -65,6 +68,7 @@ def get_trait_name_list(trait_list): return name_list + def test_chip(trait_list): final_chip_name = "" @@ -74,7 +78,7 @@ def test_chip(trait_list): FROM GeneChip, ProbeFreeze, ProbeSetFreeze WHERE GeneChip.Id = ProbeFreeze.ChipId and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeSetFreeze.Name = '%s'""" % dataset.name).fetchone() + ProbeSetFreeze.Name = '%s'""" % dataset.name).fetchone() if result: chip_name = result[0] diff --git a/wqflask/wqflask/external_tools/send_to_webgestalt.py b/wqflask/wqflask/external_tools/send_to_webgestalt.py index e1e5e655..6e74f4fe 100644 --- a/wqflask/wqflask/external_tools/send_to_webgestalt.py +++ b/wqflask/wqflask/external_tools/send_to_webgestalt.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -27,11 +27,13 @@ from base.species import TheSpecies from utility import helper_functions, corr_result_helpers import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + class SendToWebGestalt: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.chip_name = test_chip(self.trait_list) @@ -46,18 +48,18 @@ class SendToWebGestalt: id_type = "entrezgene" - self.hidden_vars = { - 'gene_list' : "\n".join(gene_id_list), - 'id_type' : "entrezgene", - 'ref_set' : "genome", - 'enriched_database_category' : "geneontology", - 'enriched_database_name' : "Biological_Process", - 'sig_method' : "fdr", - 'sig_value' : "0.05", - 'enrich_method' : "ORA", - 'fdr_method' : "BH", - 'min_num' : "2" - } + self.hidden_vars = { + 'gene_list': "\n".join(gene_id_list), + 'id_type': "entrezgene", + 'ref_set': "genome", + 'enriched_database_category': "geneontology", + 'enriched_database_name': "Biological_Process", + 'sig_method': "fdr", + 'sig_value': "0.05", + 'enrich_method': "ORA", + 'fdr_method': "BH", + 'min_num': "2" + } species = self.trait_list[0][1].group.species if species == "rat": @@ -69,6 +71,7 @@ class SendToWebGestalt: else: self.hidden_vars['organism'] = "others" + def test_chip(trait_list): final_chip_name = "" @@ -78,7 +81,7 @@ def test_chip(trait_list): FROM GeneChip, ProbeFreeze, ProbeSetFreeze WHERE GeneChip.Id = ProbeFreeze.ChipId and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeSetFreeze.Name = '%s'""" % dataset.name).fetchone() + ProbeSetFreeze.Name = '%s'""" % dataset.name).fetchone() if result: chip_name = result[0] @@ -113,6 +116,7 @@ def test_chip(trait_list): return chip_name + def gen_gene_id_list(trait_list): trait_name_list = [] gene_id_list = [] diff --git a/wqflask/wqflask/group_manager.py b/wqflask/wqflask/group_manager.py index 69ee9623..04a100ba 100644 --- a/wqflask/wqflask/group_manager.py +++ b/wqflask/wqflask/group_manager.py @@ -1,4 +1,5 @@ -import random, string +import random +import string from flask import (Flask, g, render_template, url_for, request, make_response, redirect, flash) @@ -7,149 +8,168 @@ from wqflask import app from wqflask.user_login import send_verification_email, send_invitation_email, basic_info, set_password from utility.redis_tools import get_user_groups, get_group_info, save_user, create_group, delete_group, add_users_to_group, remove_users_from_group, \ - change_group_name, save_verification_code, check_verification_code, get_user_by_unique_column, get_resources, get_resource_info + change_group_name, save_verification_code, check_verification_code, get_user_by_unique_column, get_resources, get_resource_info from utility.logger import getLogger logger = getLogger(__name__) + @app.route("/groups/manage", methods=('GET', 'POST')) def manage_groups(): - params = request.form if request.form else request.args - if "add_new_group" in params: - return redirect(url_for('add_group')) - else: - admin_groups, member_groups = get_user_groups(g.user_session.user_id) - return render_template("admin/group_manager.html", admin_groups=admin_groups, member_groups=member_groups) + params = request.form if request.form else request.args + if "add_new_group" in params: + return redirect(url_for('add_group')) + else: + admin_groups, member_groups = get_user_groups(g.user_session.user_id) + return render_template("admin/group_manager.html", admin_groups=admin_groups, member_groups=member_groups) + @app.route("/groups/view", methods=('GET', 'POST')) def view_group(): - params = request.form if request.form else request.args - group_id = params['id'] - group_info = get_group_info(group_id) - admins_info = [] - user_is_admin = False - if g.user_session.user_id in group_info['admins']: - user_is_admin = True - for user_id in group_info['admins']: - if user_id: - user_info = get_user_by_unique_column("user_id", user_id) - admins_info.append(user_info) - members_info = [] - for user_id in group_info['members']: - if user_id: - user_info = get_user_by_unique_column("user_id", user_id) - members_info.append(user_info) - - #ZS: This whole part might not scale well with many resources - resources_info = [] - all_resources = get_resources() - for resource_id in all_resources: - resource_info = get_resource_info(resource_id) - group_masks = resource_info['group_masks'] - if group_id in group_masks: - this_resource = {} - privileges = group_masks[group_id] - this_resource['id'] = resource_id - this_resource['name'] = resource_info['name'] - this_resource['data'] = privileges['data'] - this_resource['metadata'] = privileges['metadata'] - this_resource['admin'] = privileges['admin'] - resources_info.append(this_resource) - - return render_template("admin/view_group.html", group_info=group_info, admins=admins_info, members=members_info, user_is_admin=user_is_admin, resources=resources_info) + params = request.form if request.form else request.args + group_id = params['id'] + group_info = get_group_info(group_id) + admins_info = [] + user_is_admin = False + if g.user_session.user_id in group_info['admins']: + user_is_admin = True + for user_id in group_info['admins']: + if user_id: + user_info = get_user_by_unique_column("user_id", user_id) + admins_info.append(user_info) + members_info = [] + for user_id in group_info['members']: + if user_id: + user_info = get_user_by_unique_column("user_id", user_id) + members_info.append(user_info) + + # ZS: This whole part might not scale well with many resources + resources_info = [] + all_resources = get_resources() + for resource_id in all_resources: + resource_info = get_resource_info(resource_id) + group_masks = resource_info['group_masks'] + if group_id in group_masks: + this_resource = {} + privileges = group_masks[group_id] + this_resource['id'] = resource_id + this_resource['name'] = resource_info['name'] + this_resource['data'] = privileges['data'] + this_resource['metadata'] = privileges['metadata'] + this_resource['admin'] = privileges['admin'] + resources_info.append(this_resource) + + return render_template("admin/view_group.html", group_info=group_info, admins=admins_info, members=members_info, user_is_admin=user_is_admin, resources=resources_info) + @app.route("/groups/remove", methods=('POST',)) def remove_groups(): - group_ids_to_remove = request.form['selected_group_ids'] - for group_id in group_ids_to_remove.split(":"): - delete_group(g.user_session.user_id, group_id) + group_ids_to_remove = request.form['selected_group_ids'] + for group_id in group_ids_to_remove.split(":"): + delete_group(g.user_session.user_id, group_id) + + return redirect(url_for('manage_groups')) - return redirect(url_for('manage_groups')) @app.route("/groups/remove_users", methods=('POST',)) def remove_users(): - group_id = request.form['group_id'] - admin_ids_to_remove = request.form['selected_admin_ids'] - member_ids_to_remove = request.form['selected_member_ids'] + group_id = request.form['group_id'] + admin_ids_to_remove = request.form['selected_admin_ids'] + member_ids_to_remove = request.form['selected_member_ids'] - remove_users_from_group(g.user_session.user_id, admin_ids_to_remove.split(":"), group_id, user_type="admins") - remove_users_from_group(g.user_session.user_id, member_ids_to_remove.split(":"), group_id, user_type="members") + remove_users_from_group(g.user_session.user_id, admin_ids_to_remove.split( + ":"), group_id, user_type="admins") + remove_users_from_group(g.user_session.user_id, member_ids_to_remove.split( + ":"), group_id, user_type="members") + + return redirect(url_for('view_group', id=group_id)) - return redirect(url_for('view_group', id=group_id)) @app.route("/groups/add_<path:user_type>", methods=('POST',)) def add_users(user_type='members'): - group_id = request.form['group_id'] - if user_type == "admins": - user_emails = request.form['admin_emails_to_add'].split(",") - add_users_to_group(g.user_session.user_id, group_id, user_emails, admins = True) - elif user_type == "members": - user_emails = request.form['member_emails_to_add'].split(",") - add_users_to_group(g.user_session.user_id, group_id, user_emails, admins = False) + group_id = request.form['group_id'] + if user_type == "admins": + user_emails = request.form['admin_emails_to_add'].split(",") + add_users_to_group(g.user_session.user_id, group_id, + user_emails, admins=True) + elif user_type == "members": + user_emails = request.form['member_emails_to_add'].split(",") + add_users_to_group(g.user_session.user_id, group_id, + user_emails, admins=False) + + return redirect(url_for('view_group', id=group_id)) - return redirect(url_for('view_group', id=group_id)) @app.route("/groups/change_name", methods=('POST',)) def change_name(): - group_id = request.form['group_id'] - new_name = request.form['new_name'] - group_info = change_group_name(g.user_session.user_id, group_id, new_name) + group_id = request.form['group_id'] + new_name = request.form['new_name'] + group_info = change_group_name(g.user_session.user_id, group_id, new_name) + + return new_name - return new_name @app.route("/groups/create", methods=('GET', 'POST')) def add_or_edit_group(): - params = request.form if request.form else request.args - if "group_name" in params: - member_user_ids = set() - admin_user_ids = set() - admin_user_ids.add(g.user_session.user_id) #ZS: Always add the user creating the group as an admin - if "admin_emails_to_add" in params: - admin_emails = params['admin_emails_to_add'].split(",") - for email in admin_emails: - user_details = get_user_by_unique_column("email_address", email) - if user_details: - admin_user_ids.add(user_details['user_id']) - #send_group_invites(params['group_id'], user_email_list = admin_emails, user_type="admins") - if "member_emails_to_add" in params: - member_emails = params['member_emails_to_add'].split(",") - for email in member_emails: - user_details = get_user_by_unique_column("email_address", email) - if user_details: - member_user_ids.add(user_details['user_id']) - #send_group_invites(params['group_id'], user_email_list = user_emails, user_type="members") - - create_group(list(admin_user_ids), list(member_user_ids), params['group_name']) - return redirect(url_for('manage_groups')) - else: - return render_template("admin/create_group.html") - -#ZS: Will integrate this later, for now just letting users be added directly -def send_group_invites(group_id, user_email_list = [], user_type="members"): - for user_email in user_email_list: - user_details = get_user_by_unique_column("email_address", user_email) - if user_details: - group_info = get_group_info(group_id) - #ZS: Probably not necessary since the group should normally always exist if group_id is being passed here, - # but it's technically possible to hit it if Redis is cleared out before submitting the new users or something - if group_info: - #ZS: Don't add user if they're already an admin or if they're being added a regular user and are already a regular user, - # but do add them if they're a regular user and are added as an admin - if (user_details['user_id'] in group_info['admins']) or \ - ((user_type == "members") and (user_details['user_id'] in group_info['members'])): - continue - else: - send_verification_email(user_details, template_name = "email/group_verification.txt", key_prefix = "verification_code", subject = "You've been invited to join a GeneNetwork user group") - else: - temp_password = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - user_details = { - 'user_id': str(uuid.uuid4()), - 'email_address': user_email, - 'registration_info': basic_info(), - 'password': set_password(temp_password), - 'confirmed': 0 - } - save_user(user_details, user_details['user_id']) - send_invitation_email(user_email, temp_password) - -#@app.route() + params = request.form if request.form else request.args + if "group_name" in params: + member_user_ids = set() + admin_user_ids = set() + # ZS: Always add the user creating the group as an admin + admin_user_ids.add(g.user_session.user_id) + if "admin_emails_to_add" in params: + admin_emails = params['admin_emails_to_add'].split(",") + for email in admin_emails: + user_details = get_user_by_unique_column( + "email_address", email) + if user_details: + admin_user_ids.add(user_details['user_id']) + #send_group_invites(params['group_id'], user_email_list = admin_emails, user_type="admins") + if "member_emails_to_add" in params: + member_emails = params['member_emails_to_add'].split(",") + for email in member_emails: + user_details = get_user_by_unique_column( + "email_address", email) + if user_details: + member_user_ids.add(user_details['user_id']) + #send_group_invites(params['group_id'], user_email_list = user_emails, user_type="members") + + create_group(list(admin_user_ids), list( + member_user_ids), params['group_name']) + return redirect(url_for('manage_groups')) + else: + return render_template("admin/create_group.html") + +# ZS: Will integrate this later, for now just letting users be added directly + + +def send_group_invites(group_id, user_email_list=[], user_type="members"): + for user_email in user_email_list: + user_details = get_user_by_unique_column("email_address", user_email) + if user_details: + group_info = get_group_info(group_id) + # ZS: Probably not necessary since the group should normally always exist if group_id is being passed here, + # but it's technically possible to hit it if Redis is cleared out before submitting the new users or something + if group_info: + # ZS: Don't add user if they're already an admin or if they're being added a regular user and are already a regular user, + # but do add them if they're a regular user and are added as an admin + if (user_details['user_id'] in group_info['admins']) or \ + ((user_type == "members") and (user_details['user_id'] in group_info['members'])): + continue + else: + send_verification_email(user_details, template_name="email/group_verification.txt", + key_prefix="verification_code", subject="You've been invited to join a GeneNetwork user group") + else: + temp_password = ''.join(random.choice( + string.ascii_uppercase + string.digits) for _ in range(6)) + user_details = { + 'user_id': str(uuid.uuid4()), + 'email_address': user_email, + 'registration_info': basic_info(), + 'password': set_password(temp_password), + 'confirmed': 0 + } + save_user(user_details, user_details['user_id']) + send_invitation_email(user_email, temp_password) + +# @app.route() diff --git a/wqflask/wqflask/gsearch.py b/wqflask/wqflask/gsearch.py index 9bf23d57..fb8bdc55 100644 --- a/wqflask/wqflask/gsearch.py +++ b/wqflask/wqflask/gsearch.py @@ -18,6 +18,7 @@ from utility.type_checking import is_float, is_int, is_str, get_float, get_int, from utility.logger import getLogger logger = getLogger(__name__) + class GSearch: def __init__(self, kw): @@ -76,18 +77,21 @@ class GSearch: this_trait['name'] = line[5] this_trait['dataset'] = line[3] this_trait['dataset_fullname'] = line[4] - this_trait['hmac'] = hmac.data_hmac('{}:{}'.format(line[5], line[3])) + this_trait['hmac'] = hmac.data_hmac( + '{}:{}'.format(line[5], line[3])) this_trait['species'] = line[0] this_trait['group'] = line[1] this_trait['tissue'] = line[2] this_trait['symbol'] = line[6] if line[7]: - this_trait['description'] = line[7].decode('utf-8', 'replace') + this_trait['description'] = line[7].decode( + 'utf-8', 'replace') else: this_trait['description'] = "N/A" this_trait['location_repr'] = 'N/A' if (line[8] != "NULL" and line[8] != "") and (line[9] != 0): - this_trait['location_repr'] = 'Chr%s: %.6f' % (line[8], float(line[9])) + this_trait['location_repr'] = 'Chr%s: %.6f' % ( + line[8], float(line[9])) try: this_trait['mean'] = '%.3f' % line[10] except: @@ -102,7 +106,8 @@ class GSearch: this_trait['locus_chr'] = line[16] this_trait['locus_mb'] = line[17] - dataset_ob = SimpleNamespace(id=this_trait["dataset_id"], type="ProbeSet",species=this_trait["species"]) + dataset_ob = SimpleNamespace( + id=this_trait["dataset_id"], type="ProbeSet", species=this_trait["species"]) if dataset_ob.id not in dataset_to_permissions: permissions = check_resource_availability(dataset_ob) dataset_to_permissions[dataset_ob.id] = permissions @@ -117,7 +122,9 @@ class GSearch: max_lrs_text = "N/A" if this_trait['locus_chr'] != None and this_trait['locus_mb'] != None: - max_lrs_text = "Chr" + str(this_trait['locus_chr']) + ": " + str(this_trait['locus_mb']) + max_lrs_text = "Chr" + \ + str(this_trait['locus_chr']) + \ + ": " + str(this_trait['locus_mb']) this_trait['max_lrs_text'] = max_lrs_text trait_list.append(this_trait) @@ -126,18 +133,18 @@ class GSearch: self.trait_list = json.dumps(trait_list) self.header_fields = ['Index', - 'Record', - 'Species', - 'Group', - 'Tissue', - 'Dataset', - 'Symbol', - 'Description', - 'Location', - 'Mean', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Species', + 'Group', + 'Tissue', + 'Dataset', + 'Symbol', + 'Description', + 'Location', + 'Mean', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] elif self.type == "phenotype": search_term = self.terms @@ -145,7 +152,8 @@ class GSearch: if "_" in self.terms: if len(self.terms.split("_")[0]) == 3: search_term = self.terms.split("_")[1] - group_clause = "AND InbredSet.`InbredSetCode` = '{}'".format(self.terms.split("_")[0]) + group_clause = "AND InbredSet.`InbredSetCode` = '{}'".format( + self.terms.split("_")[0]) sql = """ SELECT Species.`Name`, @@ -191,18 +199,22 @@ class GSearch: this_trait['index'] = i + 1 this_trait['name'] = str(line[4]) if len(str(line[12])) == 3: - this_trait['display_name'] = str(line[12]) + "_" + this_trait['name'] + this_trait['display_name'] = str( + line[12]) + "_" + this_trait['name'] else: this_trait['display_name'] = this_trait['name'] this_trait['dataset'] = line[2] this_trait['dataset_fullname'] = line[3] - this_trait['hmac'] = hmac.data_hmac('{}:{}'.format(line[4], line[2])) + this_trait['hmac'] = hmac.data_hmac( + '{}:{}'.format(line[4], line[2])) this_trait['species'] = line[0] this_trait['group'] = line[1] if line[9] != None and line[6] != None: - this_trait['description'] = line[6].decode('utf-8', 'replace') + this_trait['description'] = line[6].decode( + 'utf-8', 'replace') elif line[5] != None: - this_trait['description'] = line[5].decode('utf-8', 'replace') + this_trait['description'] = line[5].decode( + 'utf-8', 'replace') else: this_trait['description'] = "N/A" if line[13] != None and line[13] != "": @@ -220,7 +232,8 @@ class GSearch: else: this_trait['pubmed_link'] = "N/A" if line[12]: - this_trait['display_name'] = line[12] + "_" + str(this_trait['name']) + this_trait['display_name'] = line[12] + \ + "_" + str(this_trait['name']) this_trait['LRS_score_repr'] = "N/A" if line[10] != "" and line[10] != None: this_trait['LRS_score_repr'] = '%3.1f' % line[10] @@ -229,15 +242,18 @@ class GSearch: this_trait['additive'] = '%.3f' % line[11] this_trait['max_lrs_text'] = "N/A" - trait_ob = create_trait(dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False) + trait_ob = create_trait( + dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False) if not trait_ob: continue if this_trait['dataset'] == this_trait['group'] + "Publish": - try: - if trait_ob.locus_chr != "" and trait_ob.locus_mb != "": - this_trait['max_lrs_text'] = "Chr" + str(trait_ob.locus_chr) + ": " + str(trait_ob.locus_mb) - except: - this_trait['max_lrs_text'] = "N/A" + try: + if trait_ob.locus_chr != "" and trait_ob.locus_mb != "": + this_trait['max_lrs_text'] = "Chr" + \ + str(trait_ob.locus_chr) + \ + ": " + str(trait_ob.locus_mb) + except: + this_trait['max_lrs_text'] = "N/A" trait_list.append(this_trait) @@ -245,12 +261,12 @@ class GSearch: self.trait_list = json.dumps(trait_list) self.header_fields = ['Index', - 'Species', - 'Group', - 'Record', - 'Description', - 'Authors', - 'Year', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Species', + 'Group', + 'Record', + 'Description', + 'Authors', + 'Year', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] diff --git a/wqflask/wqflask/heatmap/heatmap.py b/wqflask/wqflask/heatmap/heatmap.py index 20e3559a..001bab3b 100644 --- a/wqflask/wqflask/heatmap/heatmap.py +++ b/wqflask/wqflask/heatmap/heatmap.py @@ -12,19 +12,21 @@ from utility.logger import getLogger Redis = Redis() -logger = getLogger(__name__ ) +logger = getLogger(__name__) + class Heatmap: def __init__(self, start_vars, temp_uuid): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.temp_uuid = temp_uuid self.num_permutations = 5000 self.dataset = self.trait_list[0][1] - self.json_data = {} #The dictionary that will be used to create the json object that contains all the data needed to create the figure + self.json_data = {} # The dictionary that will be used to create the json object that contains all the data needed to create the figure self.all_sample_list = [] self.traits = [] @@ -32,7 +34,8 @@ class Heatmap: chrnames = [] self.species = species.TheSpecies(dataset=self.trait_list[0][1]) for key in list(self.species.chromosomes.chromosomes.keys()): - chrnames.append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) + chrnames.append([self.species.chromosomes.chromosomes[key].name, + self.species.chromosomes.chromosomes[key].mb_length]) for trait_db in self.trait_list: @@ -83,7 +86,7 @@ class Heatmap: self.json_data[trait] = self.trait_results[trait] self.js_data = dict( - json_data = self.json_data + json_data=self.json_data ) def gen_reaper_results(self): @@ -107,19 +110,22 @@ class Heatmap: trimmed_samples.append(str(samples[i])) trimmed_values.append(values[i]) - trait_filename = str(this_trait.name) + "_" + str(self.dataset.name) + "_pheno" + trait_filename = str(this_trait.name) + "_" + \ + str(self.dataset.name) + "_pheno" gen_pheno_txt_file(trimmed_samples, trimmed_values, trait_filename) - output_filename = self.dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + output_filename = self.dataset.group.name + "_GWA_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) reaper_command = REAPER_COMMAND + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt -n 1000 -o {4}{5}.txt'.format(flat_files('genotype'), - genofile_name, - TEMPDIR, - trait_filename, - webqtlConfig.GENERATED_IMAGE_DIR, - output_filename) + genofile_name, + TEMPDIR, + trait_filename, + webqtlConfig.GENERATED_IMAGE_DIR, + output_filename) - os.system(reaper_command) + os.system(reaper_command) reaper_results = parse_reaper_output(output_filename) @@ -128,9 +134,12 @@ class Heatmap: self.trait_results[this_trait.name] = [] for qtl in reaper_results: if qtl['additive'] > 0: - self.trait_results[this_trait.name].append(-float(qtl['lrs_value'])) + self.trait_results[this_trait.name].append( + -float(qtl['lrs_value'])) else: - self.trait_results[this_trait.name].append(float(qtl['lrs_value'])) + self.trait_results[this_trait.name].append( + float(qtl['lrs_value'])) + def gen_pheno_txt_file(samples, vals, filename): """Generates phenotype file for GEMMA""" @@ -151,6 +160,7 @@ def gen_pheno_txt_file(samples, vals, filename): values_string = "\t".join(filtered_vals_list) outfile.write(values_string) + def parse_reaper_output(gwa_filename): included_markers = [] p_values = [] diff --git a/wqflask/wqflask/interval_analyst/GeneUtil.py b/wqflask/wqflask/interval_analyst/GeneUtil.py index d0dd7aea..5e86ae31 100644 --- a/wqflask/wqflask/interval_analyst/GeneUtil.py +++ b/wqflask/wqflask/interval_analyst/GeneUtil.py @@ -2,30 +2,32 @@ import string from flask import Flask, g -#Just return a list of dictionaries -#each dictionary contains sub-dictionary +# Just return a list of dictionaries +# each dictionary contains sub-dictionary + + def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): - fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd', - 'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID', - 'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd'] - - ##List All Species in the Gene Table - speciesDict = {} - results = g.db.execute(""" + fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd', + 'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID', + 'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd'] + + # List All Species in the Gene Table + speciesDict = {} + results = g.db.execute(""" SELECT Species.Name, GeneList.SpeciesId FROM Species, GeneList WHERE GeneList.SpeciesId = Species.Id GROUP BY GeneList.SpeciesId""").fetchall() - for item in results: - speciesDict[item[0]] = item[1] - - ##List current Species and other Species - speciesId = speciesDict[species] - otherSpecies = [[X, speciesDict[X]] for X in list(speciesDict.keys())] - otherSpecies.remove([species, speciesId]) + for item in results: + speciesDict[item[0]] = item[1] - results = g.db.execute(""" + # List current Species and other Species + speciesId = speciesDict[species] + otherSpecies = [[X, speciesDict[X]] for X in list(speciesDict.keys())] + otherSpecies.remove([species, speciesId]) + + results = g.db.execute(""" SELECT %s FROM GeneList WHERE SpeciesId = %d AND Chromosome = '%s' AND @@ -36,47 +38,49 @@ def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): startMb, endMb, startMb, endMb)).fetchall() - GeneList = [] + GeneList = [] - if results: - for result in results: - newdict = {} - for j, item in enumerate(fetchFields): - newdict[item] = result[j] - #count SNPs if possible - if diffCol and species=='mouse': - newdict["snpCount"] = g.db.execute(""" + if results: + for result in results: + newdict = {} + for j, item in enumerate(fetchFields): + newdict[item] = result[j] + # count SNPs if possible + if diffCol and species == 'mouse': + newdict["snpCount"] = g.db.execute(""" SELECT count(*) FROM BXDSnpPosition WHERE Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND StrainId1 = %d AND StrainId2 = %d """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1])).fetchone()[0] - newdict["snpDensity"] = newdict["snpCount"]/(newdict["TxEnd"]-newdict["TxStart"])/1000.0 - else: - newdict["snpDensity"] = newdict["snpCount"] = 0 - - try: - newdict['GeneLength'] = 1000.0*(newdict['TxEnd'] - newdict['TxStart']) - except: - pass - - #load gene from other Species by the same name - for item in otherSpecies: - othSpec, othSpecId = item - newdict2 = {} - - resultsOther = g.db.execute("SELECT %s FROM GeneList WHERE SpeciesId = %d AND geneSymbol= '%s' LIMIT 1" % (", ".join(fetchFields), + newdict["snpDensity"] = newdict["snpCount"] / \ + (newdict["TxEnd"] - newdict["TxStart"]) / 1000.0 + else: + newdict["snpDensity"] = newdict["snpCount"] = 0 + + try: + newdict['GeneLength'] = 1000.0 * \ + (newdict['TxEnd'] - newdict['TxStart']) + except: + pass + + # load gene from other Species by the same name + for item in otherSpecies: + othSpec, othSpecId = item + newdict2 = {} + + resultsOther = g.db.execute("SELECT %s FROM GeneList WHERE SpeciesId = %d AND geneSymbol= '%s' LIMIT 1" % (", ".join(fetchFields), othSpecId, newdict["GeneSymbol"])).fetchone() - if resultsOther: - for j, item in enumerate(fetchFields): - newdict2[item] = resultsOther[j] - - #count SNPs if possible, could be a separate function - if diffCol and othSpec == 'mouse': - newdict2["snpCount"] = g.db.execute(""" + if resultsOther: + for j, item in enumerate(fetchFields): + newdict2[item] = resultsOther[j] + + # count SNPs if possible, could be a separate function + if diffCol and othSpec == 'mouse': + newdict2["snpCount"] = g.db.execute(""" SELECT count(*) FROM BXDSnpPosition WHERE Chr = '%s' AND @@ -84,19 +88,19 @@ def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): StrainId1 = %d AND StrainId2 = %d """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1])).fetchone()[0] - newdict2["snpDensity"] = newdict2["snpCount"]/(newdict2["TxEnd"]-newdict2["TxStart"])/1000.0 - else: - newdict2["snpDensity"] = newdict2["snpCount"] = 0 - - try: - newdict2['GeneLength'] = 1000.0*(newdict2['TxEnd'] - newdict2['TxStart']) - except: - pass - - newdict['%sGene' % othSpec] = newdict2 - - GeneList.append(newdict) + newdict2["snpDensity"] = newdict2["snpCount"] / \ + (newdict2["TxEnd"] - newdict2["TxStart"]) / 1000.0 + else: + newdict2["snpDensity"] = newdict2["snpCount"] = 0 + + try: + newdict2['GeneLength'] = 1000.0 * \ + (newdict2['TxEnd'] - newdict2['TxStart']) + except: + pass - return GeneList + newdict['%sGene' % othSpec] = newdict2 + GeneList.append(newdict) + return GeneList diff --git a/wqflask/wqflask/markdown_routes.py b/wqflask/wqflask/markdown_routes.py index ebf75807..c27ff143 100644 --- a/wqflask/wqflask/markdown_routes.py +++ b/wqflask/wqflask/markdown_routes.py @@ -103,7 +103,7 @@ def environments(): @environments_blueprint.route('/svg-dependency-graph') def svg_graph(): directory, file_name, _ = get_file_from_python_search_path( - "wqflask/dependency-graph.svg").partition("dependency-graph.svg") + "wqflask/dependency-graph.svg").partition("dependency-graph.svg") return send_from_directory(directory, file_name) diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py index 4074f098..5bf8822a 100644 --- a/wqflask/wqflask/marker_regression/display_mapping_results.py +++ b/wqflask/wqflask/marker_regression/display_mapping_results.py @@ -51,7 +51,7 @@ try: # Only import this for Python3 from functools import reduce except: pass -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) RED = ImageColor.getrgb("red") BLUE = ImageColor.getrgb("blue") @@ -307,7 +307,8 @@ class DisplayMappingResults: if 'color_scheme' in start_vars: self.color_scheme = start_vars['color_scheme'] if self.color_scheme == "single": - self.manhattan_single_color = ImageColor.getrgb("#" + start_vars['manhattan_single_color']) + self.manhattan_single_color = ImageColor.getrgb( + "#" + start_vars['manhattan_single_color']) if 'permCheck' in list(start_vars.keys()): self.permChecked = start_vars['permCheck'] @@ -357,7 +358,8 @@ class DisplayMappingResults: if 'reaper_version' in list(start_vars.keys()) and self.mapping_method == "reaper": self.reaper_version = start_vars['reaper_version'] if 'output_files' in start_vars: - self.output_files = ",".join([(the_file if the_file is not None else "") for the_file in start_vars['output_files']]) + self.output_files = ",".join( + [(the_file if the_file is not None else "") for the_file in start_vars['output_files']]) self.categorical_vars = "" self.perm_strata = "" @@ -386,28 +388,31 @@ class DisplayMappingResults: self.dataset.group.genofile = self.genofile_string.split(":")[0] if self.mapping_method == "reaper" and self.manhattan_plot != True: - self.genotype = self.dataset.group.read_genotype_file(use_reaper=True) + self.genotype = self.dataset.group.read_genotype_file( + use_reaper=True) else: self.genotype = self.dataset.group.read_genotype_file() - #Darwing Options + # Darwing Options try: - if self.selectedChr > -1: - self.graphWidth = min(self.GRAPH_MAX_WIDTH, max(self.GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) - else: - self.graphWidth = min(self.GRAPH_MAX_WIDTH, max(self.MULT_GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) + if self.selectedChr > -1: + self.graphWidth = min(self.GRAPH_MAX_WIDTH, max( + self.GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) + else: + self.graphWidth = min(self.GRAPH_MAX_WIDTH, max( + self.MULT_GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) except: - if self.selectedChr > -1: - self.graphWidth = self.GRAPH_DEFAULT_WIDTH - else: - self.graphWidth = self.MULT_GRAPH_DEFAULT_WIDTH + if self.selectedChr > -1: + self.graphWidth = self.GRAPH_DEFAULT_WIDTH + else: + self.graphWidth = self.MULT_GRAPH_DEFAULT_WIDTH -## BEGIN HaplotypeAnalyst +# BEGIN HaplotypeAnalyst if 'haplotypeAnalystCheck' in list(start_vars.keys()): self.haplotypeAnalystChecked = start_vars['haplotypeAnalystCheck'] else: self.haplotypeAnalystChecked = False -## END HaplotypeAnalyst +# END HaplotypeAnalyst self.graphHeight = self.GRAPH_DEFAULT_HEIGHT self.dominanceChecked = False @@ -446,7 +451,7 @@ class DisplayMappingResults: except: self.lrsMax = 0 - #Trait Infos + # Trait Infos self.identification = "" ################################################################ @@ -471,10 +476,12 @@ class DisplayMappingResults: Chr_Length.OrderId """ % (self.dataset.group.name, ", ".join(["'%s'" % X[0] for X in self.ChrList[1:]]))) - self.ChrLengthMbList = [x[0]/1000000.0 for x in self.ChrLengthMbList] - self.ChrLengthMbSum = reduce(lambda x, y:x+y, self.ChrLengthMbList, 0.0) + self.ChrLengthMbList = [x[0] / 1000000.0 for x in self.ChrLengthMbList] + self.ChrLengthMbSum = reduce( + lambda x, y: x + y, self.ChrLengthMbList, 0.0) if self.ChrLengthMbList: - self.MbGraphInterval = self.ChrLengthMbSum/(len(self.ChrLengthMbList)*12) #Empirical Mb interval + self.MbGraphInterval = self.ChrLengthMbSum / \ + (len(self.ChrLengthMbList) * 12) # Empirical Mb interval else: self.MbGraphInterval = 1 @@ -482,38 +489,38 @@ class DisplayMappingResults: for i, _chr in enumerate(self.genotype): self.ChrLengthCMList.append(_chr[-1].cM - _chr[0].cM) - self.ChrLengthCMSum = reduce(lambda x, y:x+y, self.ChrLengthCMList, 0.0) + self.ChrLengthCMSum = reduce( + lambda x, y: x + y, self.ChrLengthCMList, 0.0) if self.plotScale == 'physic': - self.GraphInterval = self.MbGraphInterval #Mb + self.GraphInterval = self.MbGraphInterval # Mb else: - self.GraphInterval = self.cMGraphInterval #cM + self.GraphInterval = self.cMGraphInterval # cM -## BEGIN HaplotypeAnalyst -## count the amount of individuals to be plotted, and increase self.graphHeight +# BEGIN HaplotypeAnalyst +# count the amount of individuals to be plotted, and increase self.graphHeight if self.haplotypeAnalystChecked and self.selectedChr > -1: thisTrait = self.this_trait - smd=[] + smd = [] for sample in self.sample_vals_dict.keys(): if self.sample_vals_dict[sample] != "x": - temp = GeneralObject(name=sample, value=float(self.sample_vals_dict[sample])) + temp = GeneralObject(name=sample, value=float( + self.sample_vals_dict[sample])) smd.append(temp) else: continue samplelist = list(self.genotype.prgy) - for j, _geno in enumerate (self.genotype[0][1].genotype): + for j, _geno in enumerate(self.genotype[0][1].genotype): for item in smd: if item.name == samplelist[j]: self.NR_INDIVIDUALS = self.NR_INDIVIDUALS + 1 # default: - self.graphHeight = self.graphHeight + 2 * (self.NR_INDIVIDUALS+10) * self.EACH_GENE_HEIGHT -## END HaplotypeAnalyst - - - + self.graphHeight = self.graphHeight + 2 * \ + (self.NR_INDIVIDUALS + 10) * self.EACH_GENE_HEIGHT +# END HaplotypeAnalyst ######################### - ## Get the sorting column + # Get the sorting column ######################### RISet = self.dataset.group.name if RISet in ('AXB', 'BXA', 'AXBXA'): @@ -529,10 +536,11 @@ class DisplayMappingResults: elif RISet in ('LXS'): self.diffCol = ['ILS', 'ISS'] else: - self.diffCol= [] + self.diffCol = [] for i, strain in enumerate(self.diffCol): - self.diffCol[i] = g.db.execute("select Id from Strain where Symbol = %s", strain).fetchone()[0] + self.diffCol[i] = g.db.execute( + "select Id from Strain where Symbol = %s", strain).fetchone()[0] ################################################################ # GeneCollection goes here @@ -546,7 +554,7 @@ class DisplayMappingResults: geneTable = "" self.geneCol = None - if self.plotScale == 'physic' and self.selectedChr > -1 and (self.intervalAnalystChecked or self.geneChecked): + if self.plotScale == 'physic' and self.selectedChr > -1 and (self.intervalAnalystChecked or self.geneChecked): # Draw the genes for this chromosome / region of this chromosome webqtldatabase = self.dataset.name @@ -555,24 +563,26 @@ class DisplayMappingResults: chrName = "X" else: chrName = self.selectedChr - self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, "mouse") + self.geneCol = GeneUtil.loadGenes( + chrName, self.diffCol, self.startMb, self.endMb, "mouse") elif self.dataset.group.species == "rat": if self.selectedChr == 21: chrName = "X" else: chrName = self.selectedChr - self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, "rat") + self.geneCol = GeneUtil.loadGenes( + chrName, self.diffCol, self.startMb, self.endMb, "rat") if self.geneCol and self.intervalAnalystChecked: - ####################################################################### - #Nick use GENEID as RefGene to get Literature Correlation Informations# - #For Interval Mapping, Literature Correlation isn't useful, so skip it# - #through set GENEID is None # - ####################################################################### + ####################################################################### + #Nick use GENEID as RefGene to get Literature Correlation Informations# + #For Interval Mapping, Literature Correlation isn't useful, so skip it# + #through set GENEID is None # + ####################################################################### - GENEID = None + GENEID = None - self.geneTable(self.geneCol, GENEID) + self.geneTable(self.geneCol, GENEID) ################################################################ # Plots goes here @@ -580,11 +590,12 @@ class DisplayMappingResults: showLocusForm = "" intCanvas = Image.new("RGBA", size=(self.graphWidth, self.graphHeight)) with Bench("Drawing Plot"): - gifmap = self.plotIntMapping(intCanvas, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm) + gifmap = self.plotIntMapping( + intCanvas, startMb=self.startMb, endMb=self.endMb, showLocusForm=showLocusForm) self.gifmap = gifmap.__str__() - self.filename= webqtlUtil.genRandStr("Itvl_") + self.filename = webqtlUtil.genRandStr("Itvl_") intCanvas.save( "{}.png".format( os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, self.filename)), @@ -594,20 +605,22 @@ class DisplayMappingResults: border="0", usemap='#WebQTLImageMap' ) - #Scales plot differently for high resolution + # Scales plot differently for high resolution if self.draw2X: - intCanvasX2 = Image.new("RGBA", size=(self.graphWidth*2, self.graphHeight*2)) - gifmapX2 = self.plotIntMapping(intCanvasX2, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm, zoom=2) + intCanvasX2 = Image.new("RGBA", size=( + self.graphWidth * 2, self.graphHeight * 2)) + gifmapX2 = self.plotIntMapping( + intCanvasX2, startMb=self.startMb, endMb=self.endMb, showLocusForm=showLocusForm, zoom=2) intCanvasX2.save( "{}.png".format( os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, - self.filename+"X2")), + self.filename + "X2")), format='png') ################################################################ # Outputs goes here ################################################################ - #this form is used for opening Locus page or trait page, only available for genetic mapping + # this form is used for opening Locus page or trait page, only available for genetic mapping if showLocusForm: showLocusForm = HtmlGenWrapper.create_form_tag( cgi=os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), @@ -615,7 +628,8 @@ class DisplayMappingResults: name=showLocusForm, submit=HtmlGenWrapper.create_input_tag(type_='hidden')) - hddn = {'FormID':'showDatabase', 'ProbeSetID':'_','database':fd.RISet+"Geno",'CellID':'_', 'RISet':fd.RISet, 'incparentsf1':'ON'} + hddn = {'FormID': 'showDatabase', 'ProbeSetID': '_', 'database': fd.RISet + \ + "Geno", 'CellID': '_', 'RISet': fd.RISet, 'incparentsf1': 'ON'} for key in hddn.keys(): showLocusForm.append(HtmlGenWrapper.create_input_tag( name=key, value=hddn[key], type_='hidden')) @@ -634,11 +648,12 @@ class DisplayMappingResults: if self.traitList and self.traitList[0].dataset and self.traitList[0].dataset.type == 'Geno': btminfo.append(HtmlGenWrapper.create_br_tag()) - btminfo.append('Mapping using genotype data as a trait will result in infinity LRS at one locus. In order to display the result properly, all LRSs higher than 100 are capped at 100.') + btminfo.append( + 'Mapping using genotype data as a trait will result in infinity LRS at one locus. In order to display the result properly, all LRSs higher than 100 are capped at 100.') - def plotIntMapping(self, canvas, offset= (80, 120, 90, 100), zoom = 1, startMb = None, endMb = None, showLocusForm = ""): + def plotIntMapping(self, canvas, offset=(80, 120, 90, 100), zoom=1, startMb=None, endMb=None, showLocusForm=""): im_drawer = ImageDraw.Draw(canvas) - #calculating margins + # calculating margins xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset if self.multipleInterval: yTopOffset = max(90, yTopOffset) @@ -659,34 +674,36 @@ class DisplayMappingResults: xLeftOffset += 20 fontZoom = 1.5 - xLeftOffset = int(xLeftOffset*fontZoom) - xRightOffset = int(xRightOffset*fontZoom) - yBottomOffset = int(yBottomOffset*fontZoom) + xLeftOffset = int(xLeftOffset * fontZoom) + xRightOffset = int(xRightOffset * fontZoom) + yBottomOffset = int(yBottomOffset * fontZoom) cWidth = canvas.size[0] cHeight = canvas.size[1] plotWidth = cWidth - xLeftOffset - xRightOffset plotHeight = cHeight - yTopOffset - yBottomOffset - #Drawing Area Height + # Drawing Area Height drawAreaHeight = plotHeight if self.plotScale == 'physic' and self.selectedChr > -1: if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - drawAreaHeight -= 4*self.BAND_HEIGHT + 4*self.BAND_SPACING+ 10*zoom + drawAreaHeight -= 4 * self.BAND_HEIGHT + 4 * self.BAND_SPACING + 10 * zoom else: - drawAreaHeight -= 3*self.BAND_HEIGHT + 3*self.BAND_SPACING+ 10*zoom + drawAreaHeight -= 3 * self.BAND_HEIGHT + 3 * self.BAND_SPACING + 10 * zoom if self.geneChecked: - drawAreaHeight -= self.NUM_GENE_ROWS*self.EACH_GENE_HEIGHT + 3*self.BAND_SPACING + 10*zoom + drawAreaHeight -= self.NUM_GENE_ROWS * \ + self.EACH_GENE_HEIGHT + 3 * self.BAND_SPACING + 10 * zoom else: if self.selectedChr > -1: drawAreaHeight -= 20 else: drawAreaHeight -= 30 -## BEGIN HaplotypeAnalyst +# BEGIN HaplotypeAnalyst if self.haplotypeAnalystChecked and self.selectedChr > -1: - drawAreaHeight -= self.EACH_GENE_HEIGHT * (self.NR_INDIVIDUALS+10) * 2 * zoom -## END HaplotypeAnalyst + drawAreaHeight -= self.EACH_GENE_HEIGHT * \ + (self.NR_INDIVIDUALS + 10) * 2 * zoom +# END HaplotypeAnalyst if zoom == 2: drawAreaHeight -= 60 @@ -696,42 +713,52 @@ class DisplayMappingResults: newoffset = (xLeftOffset, xRightOffset, yTopOffset, yBottomOffset) # Draw the alternating-color background first and get plotXScale - plotXScale = self.drawGraphBackground(canvas, gifmap, offset=newoffset, zoom= zoom, startMb=startMb, endMb = endMb) + plotXScale = self.drawGraphBackground( + canvas, gifmap, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) - #draw bootstap + # draw bootstap if self.bootChecked and not self.multipleInterval: - self.drawBootStrapResult(canvas, self.nboot, drawAreaHeight, plotXScale, offset=newoffset, zoom= zoom, startMb=startMb, endMb = endMb) + self.drawBootStrapResult(canvas, self.nboot, drawAreaHeight, plotXScale, + offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) # Draw clickable region and gene band if selected if self.plotScale == 'physic' and self.selectedChr > -1: - self.drawClickBand(canvas, gifmap, plotXScale, offset=newoffset, zoom = zoom, startMb=startMb, endMb = endMb) + self.drawClickBand(canvas, gifmap, plotXScale, offset=newoffset, + zoom=zoom, startMb=startMb, endMb=endMb) if self.geneChecked and self.geneCol: - self.drawGeneBand(canvas, gifmap, plotXScale, offset=newoffset, zoom = zoom, startMb=startMb, endMb = endMb) + self.drawGeneBand(canvas, gifmap, plotXScale, offset=newoffset, + zoom=zoom, startMb=startMb, endMb=endMb) if self.SNPChecked: - self.drawSNPTrackNew(canvas, offset=newoffset, zoom = 2*zoom, startMb=startMb, endMb = endMb) -## BEGIN HaplotypeAnalyst + self.drawSNPTrackNew( + canvas, offset=newoffset, zoom=2 * zoom, startMb=startMb, endMb=endMb) +# BEGIN HaplotypeAnalyst if self.haplotypeAnalystChecked: - self.drawHaplotypeBand(canvas, gifmap, plotXScale, offset=newoffset, zoom = zoom, startMb=startMb, endMb = endMb) -## END HaplotypeAnalyst + self.drawHaplotypeBand( + canvas, gifmap, plotXScale, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) +# END HaplotypeAnalyst # Draw X axis - self.drawXAxis(canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, offset=newoffset, zoom = zoom, startMb=startMb, endMb = endMb) + self.drawXAxis(canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, + offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) # Draw QTL curve - self.drawQTL(canvas, drawAreaHeight, gifmap, plotXScale, offset=newoffset, zoom= zoom, startMb=startMb, endMb = endMb) + self.drawQTL(canvas, drawAreaHeight, gifmap, plotXScale, + offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) - #draw legend + # draw legend if self.multipleInterval: - self.drawMultiTraitName(fd, canvas, gifmap, showLocusForm, offset=newoffset) + self.drawMultiTraitName( + fd, canvas, gifmap, showLocusForm, offset=newoffset) elif self.legendChecked: - self.drawLegendPanel(canvas, offset=newoffset, zoom = zoom) + self.drawLegendPanel(canvas, offset=newoffset, zoom=zoom) else: pass - #draw position, no need to use a separate function - self.drawProbeSetPosition(canvas, plotXScale, offset=newoffset, zoom = zoom) + # draw position, no need to use a separate function + self.drawProbeSetPosition( + canvas, plotXScale, offset=newoffset, zoom=zoom) return gifmap - def drawBootStrapResult(self, canvas, nboot, drawAreaHeight, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawBootStrapResult(self, canvas, nboot, drawAreaHeight, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset @@ -741,9 +768,9 @@ class DisplayMappingResults: if zoom == 2: fontZoom = 1.5 - bootHeightThresh = drawAreaHeight*3/4 + bootHeightThresh = drawAreaHeight * 3 / 4 - #break bootstrap result into groups + # break bootstrap result into groups BootCoord = [] i = 0 previous_chr = None @@ -751,7 +778,7 @@ class DisplayMappingResults: startX = xLeftOffset BootChrCoord = [] - if self.selectedChr == -1: #ZS: If viewing full genome/all chromosomes + if self.selectedChr == -1: # ZS: If viewing full genome/all chromosomes for i, result in enumerate(self.qtlresults): if result['chr'] != previous_chr: previous_chr = result['chr'] @@ -759,28 +786,33 @@ class DisplayMappingResults: if previous_chr_as_int != 1: BootCoord.append(BootChrCoord) BootChrCoord = [] - startX += (self.ChrLengthDistList[previous_chr_as_int - 2] + self.GraphInterval)*plotXScale + startX += ( + self.ChrLengthDistList[previous_chr_as_int - 2] + self.GraphInterval) * plotXScale if self.plotScale == 'physic': - Xc = startX + (result['Mb']-self.startMb)*plotXScale + Xc = startX + (result['Mb'] - self.startMb) * plotXScale else: - Xc = startX + (result['cM']-self.qtlresults[0]['cM'])*plotXScale + Xc = startX + \ + (result['cM'] - self.qtlresults[0]['cM']) * plotXScale BootChrCoord.append([Xc, self.bootResult[i]]) else: for i, result in enumerate(self.qtlresults): if str(result['chr']) == str(self.ChrList[self.selectedChr][0]): if self.plotScale == 'physic': - Xc = startX + (result['Mb']-self.startMb)*plotXScale + Xc = startX + (result['Mb'] - \ + self.startMb) * plotXScale else: - Xc = startX + (result['cM']-self.qtlresults[0]['cM'])*plotXScale + Xc = startX + \ + (result['cM'] - self.qtlresults[0] + ['cM']) * plotXScale BootChrCoord.append([Xc, self.bootResult[i]]) BootCoord = [BootChrCoord] - #reduce bootResult + # reduce bootResult if self.selectedChr > -1: maxBootBar = 80.0 else: maxBootBar = 200.0 - stepBootStrap = plotWidth/maxBootBar + stepBootStrap = plotWidth / maxBootBar reducedBootCoord = [] maxBootCount = 0 @@ -796,14 +828,16 @@ class DisplayMappingResults: if maxBootCount < bootCount: maxBootCount = bootCount # end if - reducedBootCoord.append([bootStartPixX, BootChrCoord[i][0], bootCount]) + reducedBootCoord.append( + [bootStartPixX, BootChrCoord[i][0], bootCount]) bootStartPixX = BootChrCoord[i][0] bootCount = BootChrCoord[i][1] # end else # end for - #add last piece - if BootChrCoord[-1][0] - bootStartPixX > stepBootStrap/2.0: - reducedBootCoord.append([bootStartPixX, BootChrCoord[-1][0], bootCount]) + # add last piece + if BootChrCoord[-1][0] - bootStartPixX > stepBootStrap / 2.0: + reducedBootCoord.append( + [bootStartPixX, BootChrCoord[-1][0], bootCount]) else: reducedBootCoord[-1][2] += bootCount reducedBootCoord[-1][1] = BootChrCoord[-1][0] @@ -815,64 +849,68 @@ class DisplayMappingResults: if item[2] > 0: if item[0] < xLeftOffset: item[0] = xLeftOffset - if item[0] > xLeftOffset+plotWidth: - item[0] = xLeftOffset+plotWidth + if item[0] > xLeftOffset + plotWidth: + item[0] = xLeftOffset + plotWidth if item[1] < xLeftOffset: item[1] = xLeftOffset - if item[1] > xLeftOffset+plotWidth: - item[1] = xLeftOffset+plotWidth + if item[1] > xLeftOffset + plotWidth: + item[1] = xLeftOffset + plotWidth if item[0] != item[1]: im_drawer.rectangle( xy=((item[0], yZero), - (item[1], yZero - item[2]*bootHeightThresh/maxBootCount)), + (item[1], yZero - item[2] * bootHeightThresh / maxBootCount)), fill=self.BOOTSTRAP_BOX_COLOR, outline=BLACK) - ###draw boot scale - highestPercent = (maxBootCount*100.0)/nboot + # draw boot scale + highestPercent = (maxBootCount * 100.0) / nboot bootScale = Plot.detScale(0, highestPercent) - bootScale = Plot.frange(bootScale[0], bootScale[1], bootScale[1]/bootScale[2]) + bootScale = Plot.frange( + bootScale[0], bootScale[1], bootScale[1] / bootScale[2]) bootScale = bootScale[:-1] + [highestPercent] - bootOffset = 50*fontZoom - bootScaleFont=ImageFont.truetype(font=VERDANA_FILE, size=13*fontZoom) + bootOffset = 50 * fontZoom + bootScaleFont = ImageFont.truetype( + font=VERDANA_FILE, size=13 * fontZoom) im_drawer.rectangle( - xy=((canvas.size[0]-bootOffset, yZero-bootHeightThresh), - (canvas.size[0]-bootOffset-15*zoom, yZero)), - fill = YELLOW, outline=BLACK) + xy=((canvas.size[0] - bootOffset, yZero - bootHeightThresh), + (canvas.size[0] - bootOffset - 15 * zoom, yZero)), + fill=YELLOW, outline=BLACK) im_drawer.line( - xy=((canvas.size[0]-bootOffset+4, yZero), - (canvas.size[0]-bootOffset, yZero)), + xy=((canvas.size[0] - bootOffset + 4, yZero), + (canvas.size[0] - bootOffset, yZero)), fill=BLACK) TEXT_Y_DISPLACEMENT = -8 - im_drawer.text(xy=(canvas.size[0]-bootOffset+10, yZero+TEXT_Y_DISPLACEMENT), text='0%', + im_drawer.text(xy=(canvas.size[0] - bootOffset + 10, yZero + TEXT_Y_DISPLACEMENT), text='0%', font=bootScaleFont, fill=BLACK) for item in bootScale: if item == 0: continue - bootY = yZero-bootHeightThresh*item/highestPercent + bootY = yZero - bootHeightThresh * item / highestPercent im_drawer.line( - xy=((canvas.size[0]-bootOffset+4, bootY), - (canvas.size[0]-bootOffset, bootY)), + xy=((canvas.size[0] - bootOffset + 4, bootY), + (canvas.size[0] - bootOffset, bootY)), fill=BLACK) - im_drawer.text(xy=(canvas.size[0]-bootOffset+10, bootY+TEXT_Y_DISPLACEMENT), - text='%2.1f'%item, font=bootScaleFont, fill=BLACK) + im_drawer.text(xy=(canvas.size[0] - bootOffset + 10, bootY + TEXT_Y_DISPLACEMENT), + text='%2.1f' % item, font=bootScaleFont, fill=BLACK) if self.legendChecked: if hasattr(self.traitList[0], 'chr') and hasattr(self.traitList[0], 'mb'): startPosY = 30 else: startPosY = 15 - smallLabelFont = ImageFont.truetype(font=TREBUC_FILE, size=12*fontZoom) + smallLabelFont = ImageFont.truetype( + font=TREBUC_FILE, size=12 * fontZoom) leftOffset = canvas.size[0] - xRightOffset - 190 im_drawer.rectangle( - xy=((leftOffset, startPosY-6), (leftOffset+12, startPosY+6)), + xy=((leftOffset, startPosY - 6), + (leftOffset + 12, startPosY + 6)), fill=YELLOW, outline=BLACK) - im_drawer.text(xy=(canvas.size[0] - xRightOffset - 170, startPosY+TEXT_Y_DISPLACEMENT), + im_drawer.text(xy=(canvas.size[0] - xRightOffset - 170, startPosY + TEXT_Y_DISPLACEMENT), text='Frequency of the Peak LRS', font=smallLabelFont, fill=BLACK) - def drawProbeSetPosition(self, canvas, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawProbeSetPosition(self, canvas, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) if len(self.traitList) != 1: return @@ -896,21 +934,22 @@ class DisplayMappingResults: if self.plotScale == "physic": this_chr = str(self.ChrList[self.selectedChr][0]) else: - this_chr = str(self.ChrList[self.selectedChr][1]+1) + this_chr = str(self.ChrList[self.selectedChr][1] + 1) if self.plotScale == 'physic': if self.selectedChr > -1: if this_chr != Chr or Mb < self.startMb or Mb > self.endMb: return else: - locPixel = xLeftOffset + (Mb-self.startMb)*plotXScale + locPixel = xLeftOffset + (Mb - self.startMb) * plotXScale else: locPixel = xLeftOffset for i, _chr in enumerate(self.ChrList[1:]): if _chr[0] != Chr: - locPixel += (self.ChrLengthDistList[i] + self.GraphInterval)*plotXScale + locPixel += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale else: - locPixel += Mb*plotXScale + locPixel += Mb * plotXScale break else: if self.selectedChr > -1: @@ -918,33 +957,37 @@ class DisplayMappingResults: if qtlresult['chr'] != self.selectedChr: continue - if i==0 and qtlresult['Mb'] >= Mb: - locPixel=-1 + if i == 0 and qtlresult['Mb'] >= Mb: + locPixel = -1 break - #the trait's position is between two traits - if i > 0 and self.qtlresults[i-1]['Mb'] < Mb and qtlresult['Mb'] >= Mb: - locPixel = xLeftOffset + plotXScale*(self.qtlresults[i-1]['Mb']+(qtlresult['Mb']-self.qtlresults[i-1]['Mb'])*(Mb - self.qtlresults[i-1]['Mb'])/(qtlresult['Mb']-self.qtlresults[i-1]['Mb'])) + # the trait's position is between two traits + if i > 0 and self.qtlresults[i - 1]['Mb'] < Mb and qtlresult['Mb'] >= Mb: + locPixel = xLeftOffset + plotXScale * (self.qtlresults[i - 1]['Mb'] + (qtlresult['Mb'] - self.qtlresults[i - 1]['Mb']) * ( + Mb - self.qtlresults[i - 1]['Mb']) / (qtlresult['Mb'] - self.qtlresults[i - 1]['Mb'])) break - #the trait's position is on the right of the last genotype - if i==len(self.qtlresults) and Mb>=qtlresult['Mb']: + # the trait's position is on the right of the last genotype + if i == len(self.qtlresults) and Mb >= qtlresult['Mb']: locPixel = -1 else: locPixel = xLeftOffset for i, _chr in enumerate(self.ChrList): - if i < (len(self.ChrList)-1): + if i < (len(self.ChrList) - 1): if _chr != Chr: - locPixel += (self.ChrLengthDistList[i] + self.GraphInterval)*plotXScale + locPixel += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale else: - locPixel += (Mb*(_chr[-1].cM-_chr[0].cM)/self.ChrLengthCMList[i])*plotXScale + locPixel += (Mb * (_chr[-1].cM - _chr[0].cM) / \ + self.ChrLengthCMList[i]) * plotXScale break if locPixel >= 0 and self.plotScale == 'physic': - traitPixel = ((locPixel, yZero), (locPixel-7, yZero+14), (locPixel+7, yZero+14)) + traitPixel = ((locPixel, yZero), (locPixel - 7, + yZero + 14), (locPixel + 7, yZero + 14)) draw_open_polygon(canvas, xy=traitPixel, outline=BLACK, fill=self.TRANSCRIPT_LOCATION_COLOR) - def drawSNPTrackNew(self, canvas, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawSNPTrackNew(self, canvas, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) if self.plotScale != 'physic' or self.selectedChr == -1 or not self.diffCol: return @@ -963,36 +1006,37 @@ class DisplayMappingResults: #chrName = self.genotype[0].name chrName = self.ChrList[self.selectedChr][0] - stepMb = (endMb-startMb)/plotWidth + stepMb = (endMb - startMb) / plotWidth strainId1, strainId2 = self.diffCol SNPCounts = [] - while startMb<endMb: + while startMb < endMb: snp_count = g.db.execute(""" select count(*) from BXDSnpPosition where Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND StrainId1 = %d AND StrainId2 = %d - """ % (chrName, startMb, startMb+stepMb, strainId1, strainId2)).fetchone()[0] + """ % (chrName, startMb, startMb + stepMb, strainId1, strainId2)).fetchone()[0] SNPCounts.append(snp_count) startMb += stepMb if (len(SNPCounts) > 0): maxCount = max(SNPCounts) - if maxCount>0: + if maxCount > 0: for i in range(xLeftOffset, xLeftOffset + plotWidth): - snpDensity = float(SNPCounts[i-xLeftOffset]*SNP_HEIGHT_MODIFIER/maxCount) + snpDensity = float( + SNPCounts[i - xLeftOffset] * SNP_HEIGHT_MODIFIER / maxCount) im_drawer.line( - xy=((i, drawSNPLocationY+(snpDensity)*zoom), - (i, drawSNPLocationY-(snpDensity)*zoom)), + xy=((i, drawSNPLocationY + (snpDensity) * zoom), + (i, drawSNPLocationY - (snpDensity) * zoom)), fill=self.SNP_COLOR, width=1) - def drawMultiTraitName(self, fd, canvas, gifmap, showLocusForm, offset= (40, 120, 80, 10), zoom = 1): + def drawMultiTraitName(self, fd, canvas, gifmap, showLocusForm, offset=(40, 120, 80, 10), zoom=1): nameWidths = [] yPaddingTop = 10 - colorFont=ImageFont.truetype(font=TREBUC_FILE, size=12) - if len(self.qtlresults) >20 and self.selectedChr > -1: + colorFont = ImageFont.truetype(font=TREBUC_FILE, size=12) + if len(self.qtlresults) > 20 and self.selectedChr > -1: rightShift = 20 rightShiftStep = 60 rectWidth = 10 @@ -1004,7 +1048,7 @@ class DisplayMappingResults: for k, thisTrait in enumerate(self.traitList): thisLRSColor = self.colorCollection[k] kstep = k % 4 - if k!=0 and kstep==0: + if k != 0 and kstep == 0: if nameWidths: rightShiftStep = max(nameWidths[-4:]) + rectWidth + 20 rightShift += rightShiftStep @@ -1014,19 +1058,23 @@ class DisplayMappingResults: nameWidths.append(nameWidth) im_drawer.rectangle( - xy=((rightShift, yPaddingTop+kstep*15), - (rectWidth+rightShift, yPaddingTop+10+kstep*15)), + xy=((rightShift, yPaddingTop + kstep * 15), + (rectWidth + rightShift, yPaddingTop + 10 + kstep * 15)), fill=thisLRSColor, outline=BLACK) im_drawer.text( - text=name, xy=(rectWidth+2+rightShift, yPaddingTop+10+kstep*15), + text=name, xy=(rectWidth + 2 + rightShift, + yPaddingTop + 10 + kstep * 15), font=colorFont, fill=BLACK) if thisTrait.db: - COORDS = "%d,%d,%d,%d" %(rectWidth+2+rightShift, yPaddingTop+kstep*15, rectWidth+2+rightShift+nameWidth, yPaddingTop+10+kstep*15,) - HREF= "javascript:showDatabase3('%s','%s','%s','');" % (showLocusForm, thisTrait.db.name, thisTrait.name) - Areas = HtmlGenWrapper.create_area_tag(shape='rect', coords=COORDS, href=HREF) - gifmap.append(Areas) ### TODO + COORDS = "%d,%d,%d,%d" % (rectWidth + 2 + rightShift, yPaddingTop + kstep * \ + 15, rectWidth + 2 + rightShift + nameWidth, yPaddingTop + 10 + kstep * 15,) + HREF = "javascript:showDatabase3('%s','%s','%s','');" % ( + showLocusForm, thisTrait.db.name, thisTrait.name) + Areas = HtmlGenWrapper.create_area_tag( + shape='rect', coords=COORDS, href=HREF) + gifmap.append(Areas) # TODO - def drawLegendPanel(self, canvas, offset= (40, 120, 80, 10), zoom = 1): + def drawLegendPanel(self, canvas, offset=(40, 120, 80, 10), zoom=1): im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset @@ -1037,80 +1085,82 @@ class DisplayMappingResults: if zoom == 2: fontZoom = 1.5 - labelFont=ImageFont.truetype(font=TREBUC_FILE, size=12*fontZoom) + labelFont = ImageFont.truetype(font=TREBUC_FILE, size=12 * fontZoom) startPosY = 15 - stepPosY = 12*fontZoom + stepPosY = 12 * fontZoom startPosX = canvas.size[0] - xRightOffset - 415 if hasattr(self.traitList[0], 'chr') and hasattr(self.traitList[0], 'mb'): startPosY = 15 nCol = 2 - smallLabelFont = ImageFont.truetype(font=TREBUC_FILE, size=12*fontZoom) + smallLabelFont = ImageFont.truetype( + font=TREBUC_FILE, size=12 * fontZoom) leftOffset = canvas.size[0] - xRightOffset - 190 draw_open_polygon( canvas, xy=( - (leftOffset + 6, startPosY-7), - (leftOffset - 1, startPosY+7), - (leftOffset + 13, startPosY+7)), + (leftOffset + 6, startPosY - 7), + (leftOffset - 1, startPosY + 7), + (leftOffset + 13, startPosY + 7)), outline=BLACK, fill=self.TRANSCRIPT_LOCATION_COLOR ) TEXT_Y_DISPLACEMENT = -8 im_drawer.text( text="Sequence Site", - xy=(leftOffset + 20, startPosY+TEXT_Y_DISPLACEMENT), font=smallLabelFont, + xy=(leftOffset + 20, startPosY + TEXT_Y_DISPLACEMENT), font=smallLabelFont, fill=self.TOP_RIGHT_INFO_COLOR) if self.manhattan_plot != True: im_drawer.line( - xy=((startPosX, startPosY), (startPosX+32, startPosY)), + xy=((startPosX, startPosY), (startPosX + 32, startPosY)), fill=self.LRS_COLOR, width=2) im_drawer.text( - text=self.LRS_LOD, xy=(startPosX+40, startPosY+TEXT_Y_DISPLACEMENT), + text=self.LRS_LOD, xy=( + startPosX + 40, startPosY + TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) startPosY += stepPosY if self.additiveChecked: im_drawer.line( - xy=((startPosX, startPosY), (startPosX+17, startPosY)), + xy=((startPosX, startPosY), (startPosX + 17, startPosY)), fill=self.ADDITIVE_COLOR_POSITIVE, width=2) im_drawer.line( - xy=((startPosX+18, startPosY), (startPosX+32, startPosY)), + xy=((startPosX + 18, startPosY), (startPosX + 32, startPosY)), fill=self.ADDITIVE_COLOR_NEGATIVE, width=2) im_drawer.text( - text='Additive Effect', xy=(startPosX+40, startPosY+TEXT_Y_DISPLACEMENT), + text='Additive Effect', xy=(startPosX + 40, startPosY + TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) startPosY += stepPosY if self.genotype.type == 'intercross' and self.dominanceChecked: im_drawer.line( - xy=((startPosX, startPosY), (startPosX+17, startPosY)), + xy=((startPosX, startPosY), (startPosX + 17, startPosY)), fill=self.DOMINANCE_COLOR_POSITIVE, width=4) im_drawer.line( - xy=((startPosX+18, startPosY), (startPosX+35, startPosY)), + xy=((startPosX + 18, startPosY), (startPosX + 35, startPosY)), fill=self.DOMINANCE_COLOR_NEGATIVE, width=4) im_drawer.text( - text='Dominance Effect', xy=(startPosX+42, startPosY+5), + text='Dominance Effect', xy=(startPosX + 42, startPosY + 5), font=labelFont, fill=BLACK) startPosY += stepPosY if self.haplotypeAnalystChecked: im_drawer.line( - xy=((startPosX-34, startPosY), (startPosX-17, startPosY)), + xy=((startPosX - 34, startPosY), (startPosX - 17, startPosY)), fill=self.HAPLOTYPE_POSITIVE, width=4) im_drawer.line( - xy=((startPosX-17, startPosY), (startPosX, startPosY)), + xy=((startPosX - 17, startPosY), (startPosX, startPosY)), fill=self.HAPLOTYPE_NEGATIVE, width=4) im_drawer.line( - xy=((startPosX, startPosY), (startPosX+17, startPosY)), + xy=((startPosX, startPosY), (startPosX + 17, startPosY)), fill=self.HAPLOTYPE_HETEROZYGOUS, width=4) im_drawer.line( - xy=((startPosX+17, startPosY), (startPosX+34, startPosY)), + xy=((startPosX + 17, startPosY), (startPosX + 34, startPosY)), fill=self.HAPLOTYPE_RECOMBINATION, width=4) im_drawer.text( text='Haplotypes (Pat, Mat, Het, Unk)', - xy=(startPosX+41, startPosY+TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) + xy=(startPosX + 41, startPosY + TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) startPosY += stepPosY if self.permChecked and self.nperm > 0: @@ -1118,26 +1168,29 @@ class DisplayMappingResults: if self.multipleInterval and not self.bootChecked: thisStartX = canvas.size[0] - xRightOffset - 205 im_drawer.line( - xy=((thisStartX, startPosY), ( startPosX + 32, startPosY)), + xy=((thisStartX, startPosY), (startPosX + 32, startPosY)), fill=self.SIGNIFICANT_COLOR, width=self.SIGNIFICANT_WIDTH) im_drawer.line( - xy=((thisStartX, startPosY + stepPosY), ( startPosX + 32, startPosY + stepPosY)), + xy=((thisStartX, startPosY + stepPosY), + (startPosX + 32, startPosY + stepPosY)), fill=self.SUGGESTIVE_COLOR, width=self.SUGGESTIVE_WIDTH) im_drawer.text( - text='Significant %s = %2.2f' % (self.LRS_LOD, self.significant), - xy=(thisStartX+40, startPosY+TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) + text='Significant %s = %2.2f' % ( + self.LRS_LOD, self.significant), + xy=(thisStartX + 40, startPosY + TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) im_drawer.text( text='Suggestive %s = %2.2f' % (self.LRS_LOD, self.suggestive), - xy=(thisStartX+40, startPosY + TEXT_Y_DISPLACEMENT +stepPosY), font=labelFont, + xy=(thisStartX + 40, startPosY + TEXT_Y_DISPLACEMENT + stepPosY), font=labelFont, fill=BLACK) - labelFont = ImageFont.truetype(font=VERDANA_FILE, size=12*fontZoom) + labelFont = ImageFont.truetype(font=VERDANA_FILE, size=12 * fontZoom) labelColor = BLACK if self.dataset.type == "Publish" or self.dataset.type == "Geno": dataset_label = self.dataset.fullname else: - dataset_label = "%s - %s" % (self.dataset.group.name, self.dataset.fullname) + dataset_label = "%s - %s" % (self.dataset.group.name, + self.dataset.fullname) string1 = 'Dataset: %s' % (dataset_label) @@ -1154,7 +1207,8 @@ class DisplayMappingResults: string3 = 'Using GEMMA mapping method with ' if self.covariates != "": string3 += 'the cofactors below:' - cofactor_names = ", ".join([covar.split(":")[0] for covar in self.covariates.split(",")]) + cofactor_names = ", ".join( + [covar.split(":")[0] for covar in self.covariates.split(",")]) string4 = cofactor_names else: string3 += 'no cofactors' @@ -1162,7 +1216,8 @@ class DisplayMappingResults: string3 = 'Using R/qtl mapping method with ' if self.covariates != "": string3 += 'the cofactors below:' - cofactor_names = ", ".join([covar.split(":")[0] for covar in self.covariates.split(",")]) + cofactor_names = ", ".join( + [covar.split(":")[0] for covar in self.covariates.split(",")]) string4 = cofactor_names elif self.controlLocus and self.doControl != "false": string3 += '%s as control' % self.controlLocus @@ -1180,32 +1235,36 @@ class DisplayMappingResults: if self.selectedChr == -1: identification = "Mapping on All Chromosomes for " else: - identification = "Mapping on Chromosome %s for " % (self.ChrList[self.selectedChr][0]) + identification = "Mapping on Chromosome %s for " % ( + self.ChrList[self.selectedChr][0]) if self.this_trait.symbol: - identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.symbol) + identification += "Trait: %s - %s" % ( + self.this_trait.name, self.this_trait.symbol) elif self.dataset.type == "Publish": if self.this_trait.post_publication_abbreviation: - identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.post_publication_abbreviation) + identification += "Trait: %s - %s" % ( + self.this_trait.name, self.this_trait.post_publication_abbreviation) elif self.this_trait.pre_publication_abbreviation: - identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.pre_publication_abbreviation) + identification += "Trait: %s - %s" % ( + self.this_trait.name, self.this_trait.pre_publication_abbreviation) else: identification += "Trait: %s" % (self.this_trait.name) else: identification += "Trait: %s" % (self.this_trait.name) identification += " with %s samples" % (self.n_samples) - d = 4+ max( + d = 4 + max( im_drawer.textsize(identification, font=labelFont)[0], im_drawer.textsize(string1, font=labelFont)[0], im_drawer.textsize(string2, font=labelFont)[0]) im_drawer.text( text=identification, - xy=(xLeftOffset, y_constant*fontZoom), font=labelFont, + xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) y_constant += 15 else: - d = 4+ max( + d = 4 + max( im_drawer.textsize(string1, font=labelFont)[0], im_drawer.textsize(string2, font=labelFont)[0]) @@ -1223,28 +1282,28 @@ class DisplayMappingResults: transform_text += "Invert +/-" im_drawer.text( - text=transform_text, xy=(xLeftOffset, y_constant*fontZoom), + text=transform_text, xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) y_constant += 15 im_drawer.text( - text=string1, xy=(xLeftOffset, y_constant*fontZoom), + text=string1, xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) y_constant += 15 im_drawer.text( - text=string2, xy=(xLeftOffset, y_constant*fontZoom), + text=string2, xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) y_constant += 15 if string3 != '': im_drawer.text( - text=string3, xy=(xLeftOffset, y_constant*fontZoom), + text=string3, xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) y_constant += 15 if string4 != '': im_drawer.text( - text=string4, xy=(xLeftOffset, y_constant*fontZoom), + text=string4, xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) - def drawGeneBand(self, canvas, gifmap, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawGeneBand(self, canvas, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) if self.plotScale != 'physic' or self.selectedChr == -1 or not self.geneCol: return @@ -1264,11 +1323,12 @@ class DisplayMappingResults: if self.dataset.group.species == "mouse": txStart = theGO["TxStart"] txEnd = theGO["TxEnd"] - geneLength = (txEnd - txStart)*1000.0 - tenPercentLength = geneLength*0.0001 - SNPdensity = theGO["snpCount"]/geneLength + geneLength = (txEnd - txStart) * 1000.0 + tenPercentLength = geneLength * 0.0001 + SNPdensity = theGO["snpCount"] / geneLength - exonStarts = list(map(float, theGO['exonStarts'].split(",")[:-1])) + exonStarts = list( + map(float, theGO['exonStarts'].split(",")[:-1])) exonEnds = list(map(float, theGO['exonEnds'].split(",")[:-1])) cdsStart = theGO['cdsStart'] cdsEnd = theGO['cdsEnd'] @@ -1277,23 +1337,26 @@ class DisplayMappingResults: strand = theGO["Strand"] exonCount = theGO["exonCount"] - geneStartPix = xLeftOffset + plotXScale*(float(txStart) - startMb) - geneEndPix = xLeftOffset + plotXScale*(float(txEnd) - startMb) #at least one pixel + geneStartPix = xLeftOffset + \ + plotXScale * (float(txStart) - startMb) + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) # at least one pixel if (geneEndPix < xLeftOffset): - return; # this gene is not on the screen + return # this gene is not on the screen elif (geneEndPix > xLeftOffset + plotWidth): - geneEndPix = xLeftOffset + plotWidth; # clip the last in-range gene + geneEndPix = xLeftOffset + plotWidth # clip the last in-range gene if (geneStartPix > xLeftOffset + plotWidth): - return; # we are outside the valid on-screen range, so stop drawing genes + return # we are outside the valid on-screen range, so stop drawing genes elif (geneStartPix < xLeftOffset): - geneStartPix = xLeftOffset; # clip the first in-range gene + geneStartPix = xLeftOffset # clip the first in-range gene - #color the gene based on SNP density - #found earlier, needs to be recomputed as snps are added - #always apply colors now, even if SNP Track not checked - Zach 11/24/2010 + # color the gene based on SNP density + # found earlier, needs to be recomputed as snps are added + # always apply colors now, even if SNP Track not checked - Zach 11/24/2010 - densities=[1.0000000000000001e-05, 0.094094033555233408, 0.3306166377816987, 0.88246026851027781, 2.6690084029581951, 4.1, 61.0] + densities = [1.0000000000000001e-05, 0.094094033555233408, + 0.3306166377816987, 0.88246026851027781, 2.6690084029581951, 4.1, 61.0] if SNPdensity < densities[0]: myColor = BLACK elif SNPdensity < densities[1]: @@ -1310,11 +1373,12 @@ class DisplayMappingResults: myColor = DARKRED outlineColor = myColor - fillColor = myColor + fillColor = myColor - TITLE = "Gene: %s (%s)\nFrom %2.3f to %2.3f Mb (%s)\nNum. exons: %d." % (geneSymbol, accession, float(txStart), float(txEnd), strand, exonCount) + TITLE = "Gene: %s (%s)\nFrom %2.3f to %2.3f Mb (%s)\nNum. exons: %d." % ( + geneSymbol, accession, float(txStart), float(txEnd), strand, exonCount) # NL: 06-02-2011 Rob required to change this link for gene related - HREF=geneNCBILink %geneSymbol + HREF = geneNCBILink % geneSymbol elif self.dataset.group.species == "rat": exonStarts = [] @@ -1327,85 +1391,92 @@ class DisplayMappingResults: strand = theGO["Strand"] exonCount = 0 - geneStartPix = xLeftOffset + plotXScale*(float(txStart) - startMb) - geneEndPix = xLeftOffset + plotXScale*(float(txEnd) - startMb) #at least one pixel + geneStartPix = xLeftOffset + \ + plotXScale * (float(txStart) - startMb) + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) # at least one pixel if (geneEndPix < xLeftOffset): - return; # this gene is not on the screen + return # this gene is not on the screen elif (geneEndPix > xLeftOffset + plotWidth): - geneEndPix = xLeftOffset + plotWidth; # clip the last in-range gene + geneEndPix = xLeftOffset + plotWidth # clip the last in-range gene if (geneStartPix > xLeftOffset + plotWidth): - return; # we are outside the valid on-screen range, so stop drawing genes + return # we are outside the valid on-screen range, so stop drawing genes elif (geneStartPix < xLeftOffset): - geneStartPix = xLeftOffset; # clip the first in-range gene + geneStartPix = xLeftOffset # clip the first in-range gene outlineColor = DARKBLUE fillColor = DARKBLUE - TITLE = "Gene: %s\nFrom %2.3f to %2.3f Mb (%s)" % (geneSymbol, float(txStart), float(txEnd), strand) + TITLE = "Gene: %s\nFrom %2.3f to %2.3f Mb (%s)" % ( + geneSymbol, float(txStart), float(txEnd), strand) # NL: 06-02-2011 Rob required to change this link for gene related - HREF=geneNCBILink %geneSymbol + HREF = geneNCBILink % geneSymbol else: outlineColor = ORANGE fillColor = ORANGE TITLE = "Gene: %s" % geneSymbol - #Draw Genes - geneYLocation = yPaddingTop + (gIndex % self.NUM_GENE_ROWS) * self.EACH_GENE_HEIGHT*zoom + # Draw Genes + geneYLocation = yPaddingTop + \ + (gIndex % self.NUM_GENE_ROWS) * self.EACH_GENE_HEIGHT * zoom if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - geneYLocation += 4*self.BAND_HEIGHT + 4*self.BAND_SPACING + geneYLocation += 4 * self.BAND_HEIGHT + 4 * self.BAND_SPACING else: - geneYLocation += 3*self.BAND_HEIGHT + 3*self.BAND_SPACING + geneYLocation += 3 * self.BAND_HEIGHT + 3 * self.BAND_SPACING - #draw the detail view + # draw the detail view if self.endMb - self.startMb <= self.DRAW_DETAIL_MB and geneEndPix - geneStartPix > self.EACH_GENE_ARROW_SPACING * 3: utrColor = ImageColor.getrgb("rgb(66%, 66%, 66%)") arrowColor = ImageColor.getrgb("rgb(70%, 70%, 70%)") - #draw the line that runs the entire length of the gene + # draw the line that runs the entire length of the gene im_drawer.line( xy=( - (geneStartPix, geneYLocation + self.EACH_GENE_HEIGHT/2*zoom), - ( geneEndPix, geneYLocation + self.EACH_GENE_HEIGHT/2*zoom)), + (geneStartPix, geneYLocation + \ + self.EACH_GENE_HEIGHT / 2 * zoom), + (geneEndPix, geneYLocation + self.EACH_GENE_HEIGHT / 2 * zoom)), fill=outlineColor, width=1) - #draw the arrows + # draw the arrows if geneEndPix - geneStartPix < 1: genePixRange = 1 else: genePixRange = int(geneEndPix - geneStartPix) for xCoord in range(0, genePixRange): - if (xCoord % self.EACH_GENE_ARROW_SPACING == 0 and xCoord + self.EACH_GENE_ARROW_SPACING < geneEndPix-geneStartPix) or xCoord == 0: + if (xCoord % self.EACH_GENE_ARROW_SPACING == 0 and xCoord + self.EACH_GENE_ARROW_SPACING < geneEndPix - geneStartPix) or xCoord == 0: if strand == "+": im_drawer.line( xy=((geneStartPix + xCoord, geneYLocation), (geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, - geneYLocation +(self.EACH_GENE_HEIGHT / 2)*zoom)), + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), fill=arrowColor, width=1) im_drawer.line( xy=((geneStartPix + xCoord, - geneYLocation + self.EACH_GENE_HEIGHT*zoom), - (geneStartPix + xCoord+self.EACH_GENE_ARROW_WIDTH, + geneYLocation + self.EACH_GENE_HEIGHT * zoom), + (geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), fill=arrowColor, width=1) else: im_drawer.line( xy=((geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, geneYLocation), - ( geneStartPix + xCoord, - geneYLocation +(self.EACH_GENE_HEIGHT / 2)*zoom)), + (geneStartPix + xCoord, + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), fill=arrowColor, width=1) im_drawer.line( xy=((geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, - geneYLocation + self.EACH_GENE_HEIGHT*zoom), - ( geneStartPix + xCoord, - geneYLocation + (self.EACH_GENE_HEIGHT / 2)*zoom)), + geneYLocation + self.EACH_GENE_HEIGHT * zoom), + (geneStartPix + xCoord, + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), fill=arrowColor, width=1) - #draw the blocks for the exon regions + # draw the blocks for the exon regions for i in range(0, len(exonStarts)): - exonStartPix = (exonStarts[i]-startMb)*plotXScale + xLeftOffset - exonEndPix = (exonEnds[i]-startMb)*plotXScale + xLeftOffset + exonStartPix = ( + exonStarts[i] - startMb) * plotXScale + xLeftOffset + exonEndPix = (exonEnds[i] - startMb) * \ + plotXScale + xLeftOffset if (exonStartPix < xLeftOffset): exonStartPix = xLeftOffset if (exonEndPix < xLeftOffset): @@ -1416,13 +1487,14 @@ class DisplayMappingResults: exonStartPix = xLeftOffset + plotWidth im_drawer.rectangle( xy=((exonStartPix, geneYLocation), - (exonEndPix, (geneYLocation + self.EACH_GENE_HEIGHT*zoom))), - outline = outlineColor, fill = fillColor) + (exonEndPix, (geneYLocation + self.EACH_GENE_HEIGHT * zoom))), + outline=outlineColor, fill=fillColor) - #draw gray blocks for 3' and 5' UTR blocks + # draw gray blocks for 3' and 5' UTR blocks if cdsStart and cdsEnd: - utrStartPix = (txStart-startMb)*plotXScale + xLeftOffset - utrEndPix = (cdsStart-startMb)*plotXScale + xLeftOffset + utrStartPix = (txStart - startMb) * \ + plotXScale + xLeftOffset + utrEndPix = (cdsStart - startMb) * plotXScale + xLeftOffset if (utrStartPix < xLeftOffset): utrStartPix = xLeftOffset if (utrEndPix < xLeftOffset): @@ -1439,13 +1511,14 @@ class DisplayMappingResults: labelText = "5'" im_drawer.text( text=labelText, - xy=(utrStartPix-9, geneYLocation+self.EACH_GENE_HEIGHT), + xy=(utrStartPix - 9, geneYLocation + \ + self.EACH_GENE_HEIGHT), font=ImageFont.truetype(font=ARIAL_FILE, size=2)) - #the second UTR region + # the second UTR region - utrStartPix = (cdsEnd-startMb)*plotXScale + xLeftOffset - utrEndPix = (txEnd-startMb)*plotXScale + xLeftOffset + utrStartPix = (cdsEnd - startMb) * plotXScale + xLeftOffset + utrEndPix = (txEnd - startMb) * plotXScale + xLeftOffset if (utrStartPix < xLeftOffset): utrStartPix = xLeftOffset if (utrEndPix < xLeftOffset): @@ -1462,17 +1535,19 @@ class DisplayMappingResults: labelText = "3'" im_drawer.text( text=labelText, - xy=(utrEndPix+2, geneYLocation+self.EACH_GENE_HEIGHT), + xy=(utrEndPix + 2, geneYLocation + \ + self.EACH_GENE_HEIGHT), font=ImageFont.truetype(font=ARIAL_FILE, size=2)) - #draw the genes as rectangles + # draw the genes as rectangles else: im_drawer.rectangle( xy=((geneStartPix, geneYLocation), - (geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT*zoom))), - outline= outlineColor, fill = fillColor) + (geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT * zoom))), + outline=outlineColor, fill=fillColor) - COORDS = "%d, %d, %d, %d" %(geneStartPix, geneYLocation, geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT)) + COORDS = "%d, %d, %d, %d" % ( + geneStartPix, geneYLocation, geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT)) # NL: 06-02-2011 Rob required to display NCBI info in a new window gifmap.append( HtmlGenWrapper.create_area_tag( @@ -1482,8 +1557,8 @@ class DisplayMappingResults: title=TITLE, target="_blank")) -## BEGIN HaplotypeAnalyst - def drawHaplotypeBand(self, canvas, gifmap, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): +# BEGIN HaplotypeAnalyst + def drawHaplotypeBand(self, canvas, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): if self.plotScale != 'physic' or self.selectedChr == -1 or not self.geneCol: return @@ -1496,61 +1571,66 @@ class DisplayMappingResults: samplelist = list(self.genotype.prgy) - smd=[] + smd = [] for sample in self.sample_vals_dict.keys(): if self.sample_vals_dict[sample] != "x" and sample in samplelist: - temp = GeneralObject(name=sample, value=float(self.sample_vals_dict[sample])) + temp = GeneralObject(name=sample, value=float( + self.sample_vals_dict[sample])) smd.append(temp) else: continue - smd.sort(key = lambda A: A.value) + smd.sort(key=lambda A: A.value) smd.reverse() oldgeneEndPix = -1 - #Initializing plotRight, error before + # Initializing plotRight, error before plotRight = xRightOffset im_drawer = ImageDraw.Draw(canvas) -#### find out PlotRight +# find out PlotRight for _chr in self.genotype: if _chr.name == self.ChrList[self.selectedChr][0]: for i, _locus in enumerate(_chr): txStart = _chr[i].Mb - txEnd = _chr[i].Mb + txEnd = _chr[i].Mb - geneStartPix = xLeftOffset + plotXScale*(float(txStart) - startMb) - 0 - geneEndPix = xLeftOffset + plotXScale*(float(txEnd) - startMb) - 0 + geneStartPix = xLeftOffset + plotXScale * \ + (float(txStart) - startMb) - 0 + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) - 0 drawit = 1 if (geneStartPix < xLeftOffset): - drawit = 0; + drawit = 0 if (geneStartPix > xLeftOffset + plotWidth): - drawit = 0; + drawit = 0 if drawit == 1: - if _chr[i].name != " - " : + if _chr[i].name != " - ": plotRight = geneEndPix + 4 -#### end find out PlotRight +# end find out PlotRight firstGene = 1 lastGene = 0 - #Sets the length to the length of the strain list. Beforehand, "oldgeno = self.genotype[0][i].genotype" - #was the only place it was initialized, which worked as long as the very start (startMb = None/0) wasn't being mapped. - #Now there should always be some value set for "oldgeno" - Zach 12/14/2010 - oldgeno = [None]*len(self.strainlist) + # Sets the length to the length of the strain list. Beforehand, "oldgeno = self.genotype[0][i].genotype" + # was the only place it was initialized, which worked as long as the very start (startMb = None/0) wasn't being mapped. + # Now there should always be some value set for "oldgeno" - Zach 12/14/2010 + oldgeno = [None] * len(self.strainlist) for i, _chr in enumerate(self.genotype): if _chr.name == self.ChrList[self.selectedChr][0]: for j, _locus in enumerate(_chr): txStart = _chr[j].Mb - txEnd = _chr[j].Mb + txEnd = _chr[j].Mb - geneStartPix = xLeftOffset + plotXScale*(float(txStart) - startMb) - 0 - geneEndPix = xLeftOffset + plotXScale*(float(txEnd) - startMb) + 0 + geneStartPix = xLeftOffset + plotXScale * \ + (float(txStart) - startMb) - 0 + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) + 0 if oldgeneEndPix >= xLeftOffset: drawStart = oldgeneEndPix + 4 @@ -1582,36 +1662,38 @@ class DisplayMappingResults: if drawit == 1: myColor = DARKBLUE outlineColor = myColor - fillColor = myColor + fillColor = myColor - maxind=0 + maxind = 0 - #Draw Genes + # Draw Genes - geneYLocation = yPaddingTop + self.NUM_GENE_ROWS * (self.EACH_GENE_HEIGHT)*zoom + geneYLocation = yPaddingTop + self.NUM_GENE_ROWS * \ + (self.EACH_GENE_HEIGHT) * zoom if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - geneYLocation += 4*self.BAND_HEIGHT + 4*self.BAND_SPACING + geneYLocation += 4 * self.BAND_HEIGHT + 4 * self.BAND_SPACING else: - geneYLocation += 3*self.BAND_HEIGHT + 3*self.BAND_SPACING + geneYLocation += 3 * self.BAND_HEIGHT + 3 * self.BAND_SPACING - if _chr[j].name != " - " : + if _chr[j].name != " - ": if (firstGene == 1) and (lastGene != 1): oldgeneEndPix = drawStart = xLeftOffset oldgeno = _chr[j].genotype continue - for k, _geno in enumerate (_chr[j].genotype): - plotbxd=0 + for k, _geno in enumerate(_chr[j].genotype): + plotbxd = 0 if samplelist[k] in [item.name for item in smd]: - plotbxd=1 + plotbxd = 1 if (plotbxd == 1): ind = 0 if samplelist[k] in [item.name for item in smd]: - ind = [item.name for item in smd].index(samplelist[k]) + ind = [item.name for item in smd].index( + samplelist[k]) - maxind=max(ind, maxind) + maxind = max(ind, maxind) # lines if (oldgeno[k] == -1 and _geno == -1): @@ -1621,28 +1703,29 @@ class DisplayMappingResults: elif (oldgeno[k] == 0 and _geno == 0): mylineColor = self.HAPLOTYPE_HETEROZYGOUS else: - mylineColor = self.HAPLOTYPE_RECOMBINATION # XZ: Unknown + mylineColor = self.HAPLOTYPE_RECOMBINATION # XZ: Unknown im_drawer.line( xy=((drawStart, - geneYLocation+7+2*ind*self.EACH_GENE_HEIGHT*zoom), + geneYLocation + 7 + 2 * ind * self.EACH_GENE_HEIGHT * zoom), (drawEnd, - geneYLocation+7+2*ind*self.EACH_GENE_HEIGHT*zoom)), - fill= mylineColor, width=zoom*(self.EACH_GENE_HEIGHT+2)) + geneYLocation + 7 + 2 * ind * self.EACH_GENE_HEIGHT * zoom)), + fill=mylineColor, width=zoom * (self.EACH_GENE_HEIGHT + 2)) - fillColor=BLACK - outlineColor=BLACK + fillColor = BLACK + outlineColor = BLACK if lastGene == 0: im_drawer.rectangle( xy=((geneStartPix, - geneYLocation+2*ind*self.EACH_GENE_HEIGHT*zoom), + geneYLocation + 2 * ind * self.EACH_GENE_HEIGHT * zoom), (geneEndPix, - geneYLocation+2*ind*self.EACH_GENE_HEIGHT+ 2*self.EACH_GENE_HEIGHT*zoom)), + geneYLocation + 2 * ind * self.EACH_GENE_HEIGHT + 2 * self.EACH_GENE_HEIGHT * zoom)), outline=outlineColor, fill=fillColor) - - COORDS = "%d, %d, %d, %d" %(geneStartPix, geneYLocation+ind*self.EACH_GENE_HEIGHT, geneEndPix+1, (geneYLocation + ind*self.EACH_GENE_HEIGHT)) - TITLE = "Strain: %s, marker (%s) \n Position %2.3f Mb." % (samplelist[k], _chr[j].name, float(txStart)) + COORDS = "%d, %d, %d, %d" % ( + geneStartPix, geneYLocation + ind * self.EACH_GENE_HEIGHT, geneEndPix + 1, (geneYLocation + ind * self.EACH_GENE_HEIGHT)) + TITLE = "Strain: %s, marker (%s) \n Position %2.3f Mb." % ( + samplelist[k], _chr[j].name, float(txStart)) HREF = '' gifmap.append( HtmlGenWrapper.create_area_tag( @@ -1658,11 +1741,10 @@ class DisplayMappingResults: mylineColor = self.HAPLOTYPE_RECOMBINATION im_drawer.line( xy=((plotRight, - geneYLocation+7+2*ind*self.EACH_GENE_HEIGHT*zoom), + geneYLocation + 7 + 2 * ind * self.EACH_GENE_HEIGHT * zoom), (drawEnd, - geneYLocation+7+2*ind*self.EACH_GENE_HEIGHT*zoom)), - fill= mylineColor, width=zoom*(self.EACH_GENE_HEIGHT+2)) - + geneYLocation + 7 + 2 * ind * self.EACH_GENE_HEIGHT * zoom)), + fill=mylineColor, width=zoom * (self.EACH_GENE_HEIGHT + 2)) if lastGene == 0: draw_rotated_text( @@ -1670,10 +1752,10 @@ class DisplayMappingResults: font=ImageFont.truetype(font=VERDANA_FILE, size=12), xy=(geneStartPix, - geneYLocation+17+2*maxind*self.EACH_GENE_HEIGHT*zoom), + geneYLocation + 17 + 2 * maxind * self.EACH_GENE_HEIGHT * zoom), fill=BLACK, angle=-90) - oldgeneEndPix = geneEndPix; + oldgeneEndPix = geneEndPix oldgeno = _chr[j].genotype firstGene = 0 else: @@ -1683,31 +1765,34 @@ class DisplayMappingResults: if _chr.name == self.ChrList[self.selectedChr][0]: for j, _geno in enumerate(_chr[1].genotype): - plotbxd=0 + plotbxd = 0 if samplelist[j] in [item.name for item in smd]: - plotbxd=1 + plotbxd = 1 if (plotbxd == 1): - ind = [item.name for item in smd].index(samplelist[j]) - 1 + ind = [item.name for item in smd].index( + samplelist[j]) - 1 expr = smd[ind].value # Place where font is hardcoded im_drawer.text( text="%s" % (samplelist[j]), xy=((xLeftOffset + plotWidth + 10), - geneYLocation+11+2*ind*self.EACH_GENE_HEIGHT*zoom), - font=ImageFont.truetype(font=VERDANA_FILE, size=12), + geneYLocation + 11 + 2 * ind * self.EACH_GENE_HEIGHT * zoom), + font=ImageFont.truetype( + font=VERDANA_FILE, size=12), fill=BLACK) im_drawer.text( text="%2.2f" % (expr), xy=((xLeftOffset + plotWidth + 60), - geneYLocation+11+2*ind*self.EACH_GENE_HEIGHT*zoom), - font=ImageFont.truetype(font=VERDANA_FILE, size=12), + geneYLocation + 11 + 2 * ind * self.EACH_GENE_HEIGHT * zoom), + font=ImageFont.truetype( + font=VERDANA_FILE, size=12), fill=BLACK) -## END HaplotypeAnalyst +# END HaplotypeAnalyst - def drawClickBand(self, canvas, gifmap, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawClickBand(self, canvas, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) if self.plotScale != 'physic' or self.selectedChr == -1: return @@ -1724,12 +1809,16 @@ class DisplayMappingResults: # but it makes the HTML huge, and takes forever to render the page in the first place) # Draw the bands that you can click on to go to UCSC / Ensembl MAX_CLICKABLE_REGION_DIVISIONS = 100 - clickableRegionLabelFont=ImageFont.truetype(font=VERDANA_FILE, size=9) - pixelStep = max(5, int(float(plotWidth)/MAX_CLICKABLE_REGION_DIVISIONS)) + clickableRegionLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=9) + pixelStep = max( + 5, int(float(plotWidth) / MAX_CLICKABLE_REGION_DIVISIONS)) # pixelStep: every N pixels, we make a new clickable area for the user to go to that area of the genome. - numBasesCurrentlyOnScreen = self.kONE_MILLION*abs(startMb - endMb) # Number of bases on screen now - flankingWidthInBases = int ( min( (float(numBasesCurrentlyOnScreen) / 2.0), (5*self.kONE_MILLION) ) ) + numBasesCurrentlyOnScreen = self.kONE_MILLION * \ + abs(startMb - endMb) # Number of bases on screen now + flankingWidthInBases = int( + min((float(numBasesCurrentlyOnScreen) / 2.0), (5 * self.kONE_MILLION))) webqtlZoomWidth = numBasesCurrentlyOnScreen / 16.0 # Flanking width should be such that we either zoom in to a 10 million base region, or we show the clicked region at the same scale as we are currently seeing. @@ -1738,23 +1827,33 @@ class DisplayMappingResults: paddingTop = yTopOffset if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - phenogenPaddingTop = paddingTop + (self.BAND_HEIGHT + self.BAND_SPACING) - ucscPaddingTop = paddingTop + 2*(self.BAND_HEIGHT + self.BAND_SPACING) - ensemblPaddingTop = paddingTop + 3*(self.BAND_HEIGHT + self.BAND_SPACING) + phenogenPaddingTop = paddingTop + \ + (self.BAND_HEIGHT + self.BAND_SPACING) + ucscPaddingTop = paddingTop + 2 * \ + (self.BAND_HEIGHT + self.BAND_SPACING) + ensemblPaddingTop = paddingTop + 3 * \ + (self.BAND_HEIGHT + self.BAND_SPACING) else: - ucscPaddingTop = paddingTop + (self.BAND_HEIGHT + self.BAND_SPACING) - ensemblPaddingTop = paddingTop + 2*(self.BAND_HEIGHT + self.BAND_SPACING) + ucscPaddingTop = paddingTop + \ + (self.BAND_HEIGHT + self.BAND_SPACING) + ensemblPaddingTop = paddingTop + 2 * \ + (self.BAND_HEIGHT + self.BAND_SPACING) if zoom == 1: for pixel in range(xLeftOffset, xLeftOffset + plotWidth, pixelStep): - calBase = self.kONE_MILLION*(startMb + (endMb-startMb)*(pixel-xLeftOffset-0.0)/plotWidth) + calBase = self.kONE_MILLION * \ + (startMb + (endMb - startMb) * \ + (pixel - xLeftOffset - 0.0) / plotWidth) xBrowse1 = pixel - xBrowse2 = min(xLeftOffset + plotWidth, (pixel + pixelStep - 1)) + xBrowse2 = min(xLeftOffset + plotWidth, + (pixel + pixelStep - 1)) - WEBQTL_COORDS = "%d, %d, %d, %d" % (xBrowse1, paddingTop, xBrowse2, (paddingTop+self.BAND_HEIGHT)) - WEBQTL_HREF = "javascript:rangeView('%s', %f, %f)" % (self.selectedChr - 1, max(0, (calBase-webqtlZoomWidth))/1000000.0, (calBase+webqtlZoomWidth)/1000000.0) + WEBQTL_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, paddingTop, xBrowse2, (paddingTop + self.BAND_HEIGHT)) + WEBQTL_HREF = "javascript:rangeView('%s', %f, %f)" % (self.selectedChr - 1, max( + 0, (calBase - webqtlZoomWidth)) / 1000000.0, (calBase + webqtlZoomWidth) / 1000000.0) WEBQTL_TITLE = "Click to view this section of the genome in WebQTL" gifmap.append( @@ -1769,15 +1868,19 @@ class DisplayMappingResults: outline=self.CLICKABLE_WEBQTL_REGION_COLOR, fill=self.CLICKABLE_WEBQTL_REGION_COLOR) im_drawer.line( - xy=((xBrowse1, paddingTop), ( xBrowse1, (paddingTop + self.BAND_HEIGHT))), + xy=((xBrowse1, paddingTop), (xBrowse1, + (paddingTop + self.BAND_HEIGHT))), fill=self.CLICKABLE_WEBQTL_REGION_OUTLINE_COLOR) if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - PHENOGEN_COORDS = "%d, %d, %d, %d" % (xBrowse1, phenogenPaddingTop, xBrowse2, (phenogenPaddingTop+self.BAND_HEIGHT)) + PHENOGEN_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, phenogenPaddingTop, xBrowse2, (phenogenPaddingTop + self.BAND_HEIGHT)) if self.dataset.group.species == "mouse": - PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % (self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases) + PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) else: - PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % (self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases) + PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) PHENOGEN_TITLE = "Click to view this section of the genome in PhenoGen" gifmap.append( HtmlGenWrapper.create_area_tag( @@ -1787,18 +1890,22 @@ class DisplayMappingResults: title=PHENOGEN_TITLE)) im_drawer.rectangle( xy=((xBrowse1, phenogenPaddingTop), - (xBrowse2, (phenogenPaddingTop+self.BAND_HEIGHT))), + (xBrowse2, (phenogenPaddingTop + self.BAND_HEIGHT))), outline=self.CLICKABLE_PHENOGEN_REGION_COLOR, fill=self.CLICKABLE_PHENOGEN_REGION_COLOR) im_drawer.line( - xy=((xBrowse1, phenogenPaddingTop), ( xBrowse1, (phenogenPaddingTop+self.BAND_HEIGHT))), + xy=((xBrowse1, phenogenPaddingTop), (xBrowse1, + (phenogenPaddingTop + self.BAND_HEIGHT))), fill=self.CLICKABLE_PHENOGEN_REGION_OUTLINE_COLOR) - UCSC_COORDS = "%d, %d, %d, %d" %(xBrowse1, ucscPaddingTop, xBrowse2, (ucscPaddingTop+self.BAND_HEIGHT)) + UCSC_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, ucscPaddingTop, xBrowse2, (ucscPaddingTop + self.BAND_HEIGHT)) if self.dataset.group.species == "mouse": - UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d&hgt.customText=%s/snp/chr%s" % (self._ucscDb, self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases, webqtlConfig.PORTADDR, self.selectedChr) + UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d&hgt.customText=%s/snp/chr%s" % ( + self._ucscDb, self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases, webqtlConfig.PORTADDR, self.selectedChr) else: - UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d" % (self._ucscDb, self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases) + UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d" % ( + self._ucscDb, self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) UCSC_TITLE = "Click to view this section of the genome in the UCSC Genome Browser" gifmap.append( HtmlGenWrapper.create_area_tag( @@ -1808,19 +1915,22 @@ class DisplayMappingResults: title=UCSC_TITLE)) im_drawer.rectangle( xy=((xBrowse1, ucscPaddingTop), - (xBrowse2, (ucscPaddingTop+self.BAND_HEIGHT))), + (xBrowse2, (ucscPaddingTop + self.BAND_HEIGHT))), outline=self.CLICKABLE_UCSC_REGION_COLOR, fill=self.CLICKABLE_UCSC_REGION_COLOR) im_drawer.line( xy=((xBrowse1, ucscPaddingTop), - (xBrowse1, (ucscPaddingTop+self.BAND_HEIGHT))), + (xBrowse1, (ucscPaddingTop + self.BAND_HEIGHT))), fill=self.CLICKABLE_UCSC_REGION_OUTLINE_COLOR) - ENSEMBL_COORDS = "%d, %d, %d, %d" %(xBrowse1, ensemblPaddingTop, xBrowse2, (ensemblPaddingTop+self.BAND_HEIGHT)) + ENSEMBL_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, ensemblPaddingTop, xBrowse2, (ensemblPaddingTop + self.BAND_HEIGHT)) if self.dataset.group.species == "mouse": - ENSEMBL_HREF = "http://www.ensembl.org/Mus_musculus/contigview?highlight=&chr=%s&vc_start=%d&vc_end=%d&x=35&y=12" % (self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases) + ENSEMBL_HREF = "http://www.ensembl.org/Mus_musculus/contigview?highlight=&chr=%s&vc_start=%d&vc_end=%d&x=35&y=12" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) else: - ENSEMBL_HREF = "http://www.ensembl.org/Rattus_norvegicus/contigview?chr=%s&start=%d&end=%d" % (self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases) + ENSEMBL_HREF = "http://www.ensembl.org/Rattus_norvegicus/contigview?chr=%s&start=%d&end=%d" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) ENSEMBL_TITLE = "Click to view this section of the genome in the Ensembl Genome Browser" gifmap.append(HtmlGenWrapper.create_area_tag( shape='rect', @@ -1829,36 +1939,40 @@ class DisplayMappingResults: title=ENSEMBL_TITLE)) im_drawer.rectangle( xy=((xBrowse1, ensemblPaddingTop), - (xBrowse2, (ensemblPaddingTop+self.BAND_HEIGHT))), + (xBrowse2, (ensemblPaddingTop + self.BAND_HEIGHT))), outline=self.CLICKABLE_ENSEMBL_REGION_COLOR, fill=self.CLICKABLE_ENSEMBL_REGION_COLOR) im_drawer.line( xy=((xBrowse1, ensemblPaddingTop), - (xBrowse1, (ensemblPaddingTop+self.BAND_HEIGHT))), + (xBrowse1, (ensemblPaddingTop + self.BAND_HEIGHT))), fill=self.CLICKABLE_ENSEMBL_REGION_OUTLINE_COLOR) # end for im_drawer.text( text="Click to view the corresponding section of the genome in an 8x expanded WebQTL map", - xy=((xLeftOffset + 10), paddingTop),# + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), paddingTop), # + self.BAND_HEIGHT/2), font=clickableRegionLabelFont, fill=self.CLICKABLE_WEBQTL_TEXT_COLOR) if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": im_drawer.text( text="Click to view the corresponding section of the genome in PhenoGen", - xy=((xLeftOffset + 10), phenogenPaddingTop),# + self.BAND_HEIGHT/2), + # + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), phenogenPaddingTop), font=clickableRegionLabelFont, fill=self.CLICKABLE_PHENOGEN_TEXT_COLOR) im_drawer.text( text="Click to view the corresponding section of the genome in the UCSC Genome Browser", - xy=((xLeftOffset + 10), ucscPaddingTop),# + self.BAND_HEIGHT/2), + # + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), ucscPaddingTop), font=clickableRegionLabelFont, fill=self.CLICKABLE_UCSC_TEXT_COLOR) im_drawer.text( text="Click to view the corresponding section of the genome in the Ensembl Genome Browser", - xy=((xLeftOffset+10), ensemblPaddingTop),# + self.BAND_HEIGHT/2), + # + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), ensemblPaddingTop), font=clickableRegionLabelFont, fill=self.CLICKABLE_ENSEMBL_TEXT_COLOR) - #draw the gray text - chrFont = ImageFont.truetype(font=VERDANA_BOLD_FILE, size=26*zoom) + # draw the gray text + chrFont = ImageFont.truetype( + font=VERDANA_BOLD_FILE, size=26 * zoom) chrX = xLeftOffset + plotWidth - 2 - im_drawer.textsize( "Chr %s" % self.ChrList[self.selectedChr][0], font=chrFont)[0] im_drawer.text( @@ -1866,17 +1980,17 @@ class DisplayMappingResults: xy=(chrX, phenogenPaddingTop), font=chrFont, fill=GRAY) # end of drawBrowserClickableRegions else: - #draw the gray text - chrFont = ImageFont.truetype(font=VERDANA_FILE, size=26*zoom) + # draw the gray text + chrFont = ImageFont.truetype(font=VERDANA_FILE, size=26 * zoom) chrX = xLeftOffset + (plotWidth - im_drawer.textsize( - "Chr %s" % currentChromosome, font=chrFont)[0])/2 + "Chr %s" % currentChromosome, font=chrFont)[0]) / 2 im_drawer.text( text="Chr %s" % currentChromosome, xy=(chrX, 32), font=chrFont, fill=GRAY) # end of drawBrowserClickableRegions pass - def drawXAxis(self, canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawXAxis(self, canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset @@ -1886,33 +2000,33 @@ class DisplayMappingResults: if zoom == 2: fontZoom = 1.5 - #Parameters - NUM_MINOR_TICKS = 5 # Number of minor ticks between major ticks + # Parameters + NUM_MINOR_TICKS = 5 # Number of minor ticks between major ticks X_MAJOR_TICK_THICKNESS = 3 X_MINOR_TICK_THICKNESS = 1 - X_AXIS_THICKNESS = 1*zoom + X_AXIS_THICKNESS = 1 * zoom # ======= Alex: Draw the X-axis labels (megabase location) - MBLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=15*zoom) - xMajorTickHeight = 10 * zoom # How high the tick extends below the axis - xMinorTickHeight = 5*zoom + MBLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=15 * zoom) + xMajorTickHeight = 10 * zoom # How high the tick extends below the axis + xMinorTickHeight = 5 * zoom xAxisTickMarkColor = BLACK xAxisLabelColor = BLACK - fontHeight = 12*fontZoom # How tall the font that we're using is + fontHeight = 12 * fontZoom # How tall the font that we're using is spacingFromLabelToAxis = 10 if self.plotScale == 'physic': - strYLoc = yZero + MBLabelFont.font.height/2 - ###Physical single chromosome view + strYLoc = yZero + MBLabelFont.font.height / 2 + # Physical single chromosome view if self.selectedChr > -1: XScale = Plot.detScale(startMb, endMb) XStart, XEnd, XStep = XScale if XStep < 8: XStep *= 2 - spacingAmtX = spacingAmt = (XEnd-XStart)/XStep + spacingAmtX = spacingAmt = (XEnd - XStart) / XStep j = 0 - while abs(spacingAmtX -int(spacingAmtX)) >= spacingAmtX/100.0 and j < 6: + while abs(spacingAmtX - int(spacingAmtX)) >= spacingAmtX / 100.0 and j < 6: j += 1 spacingAmtX *= 10 @@ -1921,30 +2035,32 @@ class DisplayMappingResults: for counter, _Mb in enumerate(Plot.frange(XStart, XEnd, spacingAmt / NUM_MINOR_TICKS)): if _Mb < startMb or _Mb > endMb: continue - Xc = xLeftOffset + plotXScale*(_Mb - startMb) - if counter % NUM_MINOR_TICKS == 0: # Draw a MAJOR mark, not just a minor tick mark + Xc = xLeftOffset + plotXScale * (_Mb - startMb) + if counter % NUM_MINOR_TICKS == 0: # Draw a MAJOR mark, not just a minor tick mark im_drawer.line(xy=((Xc, yZero), - (Xc, yZero+xMajorTickHeight)), + (Xc, yZero + xMajorTickHeight)), fill=xAxisTickMarkColor, - width=X_MAJOR_TICK_THICKNESS) # Draw the MAJOR tick mark - labelStr = str(formatStr % _Mb) # What Mbase location to put on the label - strWidth, strHeight = im_drawer.textsize(labelStr, font=MBLabelFont) + width=X_MAJOR_TICK_THICKNESS) # Draw the MAJOR tick mark + # What Mbase location to put on the label + labelStr = str(formatStr % _Mb) + strWidth, strHeight = im_drawer.textsize( + labelStr, font=MBLabelFont) drawStringXc = (Xc - (strWidth / 2.0)) im_drawer.text(xy=(drawStringXc, strYLoc), text=labelStr, font=MBLabelFont, fill=xAxisLabelColor) else: im_drawer.line(xy=((Xc, yZero), - (Xc, yZero+xMinorTickHeight)), + (Xc, yZero + xMinorTickHeight)), fill=xAxisTickMarkColor, - width=X_MINOR_TICK_THICKNESS) # Draw the MINOR tick mark + width=X_MINOR_TICK_THICKNESS) # Draw the MINOR tick mark - ###Physical genome wide view + # Physical genome wide view else: distScale = 0 startPosX = xLeftOffset for i, distLen in enumerate(self.ChrLengthDistList): - if distScale == 0: #universal scale in whole genome mapping + if distScale == 0: # universal scale in whole genome mapping if distLen > 75: distScale = 25 elif distLen > 30: @@ -1953,51 +2069,55 @@ class DisplayMappingResults: distScale = 5 for j, tickdists in enumerate(range(distScale, int(ceil(distLen)), distScale)): im_drawer.line( - xy=((startPosX+tickdists*plotXScale, yZero), - (startPosX+tickdists*plotXScale, yZero + 7)), - fill=BLACK, width=1*zoom) + xy=((startPosX + tickdists * plotXScale, yZero), + (startPosX + tickdists * plotXScale, yZero + 7)), + fill=BLACK, width=1 * zoom) if j % 2 == 0: draw_rotated_text( canvas, text=str(tickdists), font=MBLabelFont, - xy=(startPosX+tickdists*plotXScale, - yZero+10*zoom), fill=BLACK, angle=270) - startPosX += (self.ChrLengthDistList[i]+self.GraphInterval)*plotXScale + xy=(startPosX + tickdists * plotXScale, + yZero + 10 * zoom), fill=BLACK, angle=270) + startPosX += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale - megabaseLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=int(18*zoom*1.5)) + megabaseLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) im_drawer.text( text="Megabases", xy=( - xLeftOffset+(plotWidth-im_drawer.textsize( - "Megabases", font=megabaseLabelFont)[0])/2, - strYLoc+MBLabelFont.font.height+10*(zoom%2)), + xLeftOffset + (plotWidth - im_drawer.textsize( + "Megabases", font=megabaseLabelFont)[0]) / 2, + strYLoc + MBLabelFont.font.height + 10 * (zoom % 2)), font=megabaseLabelFont, fill=BLACK) pass else: - strYLoc = yZero + spacingFromLabelToAxis + MBLabelFont.font.height/2 + strYLoc = yZero + spacingFromLabelToAxis + MBLabelFont.font.height / 2 ChrAInfo = [] preLpos = -1 distinctCount = 0.0 - if self.selectedChr == -1: #ZS: If viewing full genome/all chromosomes + if self.selectedChr == -1: # ZS: If viewing full genome/all chromosomes for i, _chr in enumerate(self.genotype): thisChr = [] Locus0CM = _chr[0].cM nLoci = len(_chr) - if nLoci <= 8: + if nLoci <= 8: for _locus in _chr: if _locus.name != ' - ': if _locus.cM != preLpos: distinctCount += 1 preLpos = _locus.cM - thisChr.append([_locus.name, _locus.cM-Locus0CM]) + thisChr.append( + [_locus.name, _locus.cM - Locus0CM]) else: - for j in (0, nLoci/4, nLoci/2, nLoci*3/4, -1): + for j in (0, nLoci / 4, nLoci / 2, nLoci * 3 / 4, -1): while _chr[j].name == ' - ': j += 1 if _chr[j].cM != preLpos: distinctCount += 1 preLpos = _chr[j].cM - thisChr.append([_chr[j].name, _chr[j].cM-Locus0CM]) + thisChr.append( + [_chr[j].name, _chr[j].cM - Locus0CM]) ChrAInfo.append(thisChr) else: for i, _chr in enumerate(self.genotype): @@ -2009,10 +2129,11 @@ class DisplayMappingResults: if _locus.cM != preLpos: distinctCount += 1 preLpos = _locus.cM - thisChr.append([_locus.name, _locus.cM-Locus0CM]) + thisChr.append( + [_locus.name, _locus.cM - Locus0CM]) ChrAInfo.append(thisChr) - stepA = (plotWidth+0.0)/distinctCount + stepA = (plotWidth + 0.0) / distinctCount LRectWidth = 10 LRectHeight = 3 @@ -2037,28 +2158,29 @@ class DisplayMappingResults: Zorder = 0 if differ: im_drawer.line( - xy=((startPosX+Lpos, yZero), (xLeftOffset+offsetA,\ - yZero+25)), + xy=((startPosX + Lpos, yZero), (xLeftOffset + offsetA,\ + yZero + 25)), fill=lineColor) im_drawer.line( - xy=((xLeftOffset+offsetA, yZero+25), (xLeftOffset+offsetA,\ - yZero+40+Zorder*(LRectWidth+3))), + xy=((xLeftOffset + offsetA, yZero + 25), (xLeftOffset + offsetA,\ + yZero + 40 + Zorder * (LRectWidth + 3))), fill=lineColor) rectColor = ORANGE else: im_drawer.line( - xy=((xLeftOffset+offsetA, yZero+40+Zorder*(LRectWidth+3)-3), (\ - xLeftOffset+offsetA, yZero+40+Zorder*(LRectWidth+3))), + xy=((xLeftOffset + offsetA, yZero + 40 + Zorder * (LRectWidth + 3) - 3), (\ + xLeftOffset + offsetA, yZero + 40 + Zorder * (LRectWidth + 3))), fill=lineColor) rectColor = DEEPPINK im_drawer.rectangle( - xy=((xLeftOffset+offsetA, yZero+40+Zorder*(LRectWidth+3)), - (xLeftOffset+offsetA-LRectHeight, - yZero+40+Zorder*(LRectWidth+3)+LRectWidth)), - outline=rectColor, fill=rectColor, width = 0) - COORDS="%d,%d,%d,%d"%(xLeftOffset+offsetA-LRectHeight, yZero+40+Zorder*(LRectWidth+3),\ - xLeftOffset+offsetA,yZero+40+Zorder*(LRectWidth+3)+LRectWidth) - HREF = "/show_trait?trait_id=%s&dataset=%s" % (Lname, self.dataset.group.name+"Geno") + xy=((xLeftOffset + offsetA, yZero + 40 + Zorder * (LRectWidth + 3)), + (xLeftOffset + offsetA - LRectHeight, + yZero + 40 + Zorder * (LRectWidth + 3) + LRectWidth)), + outline=rectColor, fill=rectColor, width=0) + COORDS = "%d,%d,%d,%d" % (xLeftOffset + offsetA - LRectHeight, yZero + 40 + Zorder * (LRectWidth + 3),\ + xLeftOffset + offsetA, yZero + 40 + Zorder * (LRectWidth + 3) + LRectWidth) + HREF = "/show_trait?trait_id=%s&dataset=%s" % ( + Lname, self.dataset.group.name + "Geno") #HREF="javascript:showDatabase3('%s','%s','%s','');" % (showLocusForm,fd.RISet+"Geno", Lname) Areas = HtmlGenWrapper.create_area_tag( shape='rect', @@ -2067,26 +2189,27 @@ class DisplayMappingResults: target="_blank", title="Locus : {}".format(Lname)) gifmap.append(Areas) - ##piddle bug + # piddle bug if j == 0: im_drawer.line( - xy=((startPosX, yZero), (startPosX, yZero+40)), + xy=((startPosX, yZero), (startPosX, yZero + 40)), fill=lineColor) - startPosX += (self.ChrLengthDistList[j]+self.GraphInterval)*plotXScale + startPosX += (self.ChrLengthDistList[j] + \ + self.GraphInterval) * plotXScale - centimorganLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=int(18*zoom*1.5)) + centimorganLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) im_drawer.text( text="Centimorgans", - xy=(xLeftOffset+(plotWidth-im_drawer.textsize( - "Centimorgans", font=centimorganLabelFont)[0])/2, - strYLoc + MBLabelFont.font.height+ 10*(zoom%2)), + xy=(xLeftOffset + (plotWidth - im_drawer.textsize( + "Centimorgans", font=centimorganLabelFont)[0]) / 2, + strYLoc + MBLabelFont.font.height + 10 * (zoom % 2)), font=centimorganLabelFont, fill=BLACK) - im_drawer.line(xy=((xLeftOffset, yZero), (xLeftOffset+plotWidth, yZero)), - fill=BLACK, width=X_AXIS_THICKNESS) # Draw the X axis itself - + im_drawer.line(xy=((xLeftOffset, yZero), (xLeftOffset + plotWidth, yZero)), + fill=BLACK, width=X_AXIS_THICKNESS) # Draw the X axis itself - def drawQTL(self, canvas, drawAreaHeight, gifmap, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawQTL(self, canvas, drawAreaHeight, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset @@ -2095,74 +2218,85 @@ class DisplayMappingResults: if zoom == 2: fontZoom = 1.5 - INTERCROSS = (self.genotype.type=="intercross") + INTERCROSS = (self.genotype.type == "intercross") - #draw the LRS scale - #We first determine whether or not we are using a sliding scale. - #If so, we need to compute the maximum LRS value to determine where the max y-value should be, and call this LRS_LOD_Max. - #LRSTop is then defined to be above the LRS_LOD_Max by enough to add one additional LRSScale increment. - #if we are using a set-scale, then we set LRSTop to be the user's value, and LRS_LOD_Max doesn't matter. + # draw the LRS scale + # We first determine whether or not we are using a sliding scale. + # If so, we need to compute the maximum LRS value to determine where the max y-value should be, and call this LRS_LOD_Max. + # LRSTop is then defined to be above the LRS_LOD_Max by enough to add one additional LRSScale increment. + # if we are using a set-scale, then we set LRSTop to be the user's value, and LRS_LOD_Max doesn't matter. - #ZS: This is a mess, but I don't know a better way to account for different mapping methods returning results in different formats + the option to change between LRS and LOD - if self.lrsMax <= 0: #sliding scale + # ZS: This is a mess, but I don't know a better way to account for different mapping methods returning results in different formats + the option to change between LRS and LOD + if self.lrsMax <= 0: # sliding scale if "lrs_value" in self.qtlresults[0]: - LRS_LOD_Max = max([result['lrs_value'] for result in self.qtlresults]) + LRS_LOD_Max = max([result['lrs_value'] + for result in self.qtlresults]) if self.LRS_LOD == "LOD" or self.LRS_LOD == "-logP": LRS_LOD_Max = LRS_LOD_Max / self.LODFACTOR if self.permChecked and self.nperm > 0 and not self.multipleInterval: - self.significant = min(self.significant / self.LODFACTOR, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive / self.LODFACTOR, webqtlConfig.MAXLRS) + self.significant = min( + self.significant / self.LODFACTOR, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive / self.LODFACTOR, webqtlConfig.MAXLRS) else: if self.permChecked and self.nperm > 0 and not self.multipleInterval: - self.significant = min(self.significant, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive, webqtlConfig.MAXLRS) + self.significant = min( + self.significant, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive, webqtlConfig.MAXLRS) else: pass else: - LRS_LOD_Max = max([result['lod_score'] for result in self.qtlresults]) + LRS_LOD_Max = max([result['lod_score'] + for result in self.qtlresults]) if self.LRS_LOD == "LRS": LRS_LOD_Max = LRS_LOD_Max * self.LODFACTOR if self.permChecked and self.nperm > 0 and not self.multipleInterval: - self.significant = min(self.significant * self.LODFACTOR, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive * self.LODFACTOR, webqtlConfig.MAXLRS) + self.significant = min( + self.significant * self.LODFACTOR, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive * self.LODFACTOR, webqtlConfig.MAXLRS) else: if self.permChecked and self.nperm > 0 and not self.multipleInterval: - self.significant = min(self.significant, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive, webqtlConfig.MAXLRS) + self.significant = min( + self.significant, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive, webqtlConfig.MAXLRS) else: pass if self.permChecked and self.nperm > 0 and not self.multipleInterval: LRS_LOD_Max = max(self.significant, LRS_LOD_Max) - #genotype trait will give infinite LRS + # genotype trait will give infinite LRS LRS_LOD_Max = min(LRS_LOD_Max, webqtlConfig.MAXLRS) else: LRS_LOD_Max = self.lrsMax - #ZS: Needed to pass to genome browser + # ZS: Needed to pass to genome browser js_data = json.loads(self.js_data) if self.LRS_LOD == "LRS": - js_data['max_score'] = LRS_LOD_Max/4.61 + js_data['max_score'] = LRS_LOD_Max / 4.61 else: js_data['max_score'] = LRS_LOD_Max self.js_data = json.dumps(js_data) - LRSScaleFont=ImageFont.truetype(font=VERDANA_FILE, size=16*zoom) - LRSLODFont=ImageFont.truetype(font=VERDANA_FILE, size=int(18*zoom*1.5)) + LRSScaleFont = ImageFont.truetype(font=VERDANA_FILE, size=16 * zoom) + LRSLODFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) yZero = yTopOffset + plotHeight # LRSHeightThresh = drawAreaHeight # AdditiveHeightThresh = drawAreaHeight/2 # DominanceHeightThresh = drawAreaHeight/2 if self.selectedChr == 1: - LRSHeightThresh = drawAreaHeight - yTopOffset + 30*(zoom - 1) - AdditiveHeightThresh = LRSHeightThresh/2 - DominanceHeightThresh = LRSHeightThresh/2 + LRSHeightThresh = drawAreaHeight - yTopOffset + 30 * (zoom - 1) + AdditiveHeightThresh = LRSHeightThresh / 2 + DominanceHeightThresh = LRSHeightThresh / 2 else: LRSHeightThresh = drawAreaHeight - AdditiveHeightThresh = drawAreaHeight/2 - DominanceHeightThresh = drawAreaHeight/2 + AdditiveHeightThresh = drawAreaHeight / 2 + DominanceHeightThresh = drawAreaHeight / 2 # LRSHeightThresh = (yZero - yTopOffset + 30*(zoom - 1)) # AdditiveHeightThresh = LRSHeightThresh/2 # DominanceHeightThresh = LRSHeightThresh/2 @@ -2178,7 +2312,7 @@ class DisplayMappingResults: LRSAxisList = Plot.frange(LRSScale, LRS_LOD_Max, LRSScale) - #ZS: Convert to int if all axis values are whole numbers + # ZS: Convert to int if all axis values are whole numbers all_int = True for item in LRSAxisList: if isinstance(item, int): @@ -2192,9 +2326,10 @@ class DisplayMappingResults: # else: # max_lrs_width = canvas.stringWidth("%2.1f" % LRS_LOD_Max, font=LRSScaleFont) + 30 - #draw the "LRS" or "LOD" string to the left of the axis - LRSScaleFont=ImageFont.truetype(font=VERDANA_FILE, size=16*zoom) - LRSLODFont=ImageFont.truetype(font=VERDANA_FILE, size=int(18*zoom*1.5)) + # draw the "LRS" or "LOD" string to the left of the axis + LRSScaleFont = ImageFont.truetype(font=VERDANA_FILE, size=16 * zoom) + LRSLODFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) yZero = yTopOffset + plotHeight # TEXT_X_DISPLACEMENT = -20 @@ -2210,64 +2345,69 @@ class DisplayMappingResults: draw_rotated_text( canvas, text=self.LRS_LOD, font=LRSLODFont, xy=(xLeftOffset - im_drawer.textsize( - "999.99", font=LRSScaleFont)[0] - 15*(zoom-1) + TEXT_X_DISPLACEMENT, - yZero + TEXT_Y_DISPLACEMENT - 300*(zoom - 1)), + "999.99", font=LRSScaleFont)[0] - 15 * (zoom - 1) + TEXT_X_DISPLACEMENT, + yZero + TEXT_Y_DISPLACEMENT - 300 * (zoom - 1)), fill=BLACK, angle=90) for item in LRSAxisList: if LRS_LOD_Max == 0.0: LRS_LOD_Max = 0.000001 - yTopOffset + 30*(zoom - 1) - yLRS = yZero - (item/LRS_LOD_Max) * LRSHeightThresh - im_drawer.line(xy=((xLeftOffset, yLRS), (xLeftOffset-4, yLRS)), - fill=self.LRS_COLOR, width=1*zoom) + yTopOffset + 30 * (zoom - 1) + yLRS = yZero - (item / LRS_LOD_Max) * LRSHeightThresh + im_drawer.line(xy=((xLeftOffset, yLRS), (xLeftOffset - 4, yLRS)), + fill=self.LRS_COLOR, width=1 * zoom) if all_int: scaleStr = "%d" % item else: scaleStr = "%2.1f" % item - #Draw the LRS/LOD Y axis label + # Draw the LRS/LOD Y axis label TEXT_Y_DISPLACEMENT = -10 im_drawer.text( text=scaleStr, - xy=(xLeftOffset-4-im_drawer.textsize(scaleStr, font=LRSScaleFont)[0]-5, - yLRS+TEXT_Y_DISPLACEMENT), + xy=(xLeftOffset - 4 - im_drawer.textsize(scaleStr, font=LRSScaleFont)[0] - 5, + yLRS + TEXT_Y_DISPLACEMENT), font=LRSScaleFont, fill=self.LRS_COLOR) if self.permChecked and self.nperm > 0 and not self.multipleInterval: - significantY = yZero - self.significant*LRSHeightThresh/LRS_LOD_Max - suggestiveY = yZero - self.suggestive*LRSHeightThresh/LRS_LOD_Max + significantY = yZero - self.significant * LRSHeightThresh / LRS_LOD_Max + suggestiveY = yZero - self.suggestive * LRSHeightThresh / LRS_LOD_Max # significantY = yZero - self.significant*LRSHeightThresh/LRSAxisList[-1] # suggestiveY = yZero - self.suggestive*LRSHeightThresh/LRSAxisList[-1] startPosX = xLeftOffset - #"Significant" and "Suggestive" Drawing Routine + # "Significant" and "Suggestive" Drawing Routine # ======= Draw the thick lines for "Significant" and "Suggestive" ===== (crowell: I tried to make the SNPs draw over these lines, but piddle wouldn't have it...) - #ZS: I don't know if what I did here with this inner function is clever or overly complicated, but it's the only way I could think of to avoid duplicating the code inside this function + # ZS: I don't know if what I did here with this inner function is clever or overly complicated, but it's the only way I could think of to avoid duplicating the code inside this function def add_suggestive_significant_lines_and_legend(start_pos_x, chr_length_dist): - rightEdge = int(start_pos_x + chr_length_dist*plotXScale - self.SUGGESTIVE_WIDTH/1.5) + rightEdge = int(start_pos_x + chr_length_dist * \ + plotXScale - self.SUGGESTIVE_WIDTH / 1.5) im_drawer.line( - xy=((start_pos_x+self.SUGGESTIVE_WIDTH/1.5, suggestiveY), + xy=((start_pos_x + self.SUGGESTIVE_WIDTH / 1.5, suggestiveY), (rightEdge, suggestiveY)), - fill=self.SUGGESTIVE_COLOR, width=self.SUGGESTIVE_WIDTH*zoom - #,clipX=(xLeftOffset, xLeftOffset + plotWidth-2) + fill=self.SUGGESTIVE_COLOR, width=self.SUGGESTIVE_WIDTH * zoom + # ,clipX=(xLeftOffset, xLeftOffset + plotWidth-2) ) im_drawer.line( - xy=((start_pos_x+self.SUGGESTIVE_WIDTH/1.5, significantY), - (rightEdge, significantY)), + xy=((start_pos_x + self.SUGGESTIVE_WIDTH / 1.5, significantY), + (rightEdge, significantY)), fill=self.SIGNIFICANT_COLOR, - width=self.SIGNIFICANT_WIDTH*zoom - #, clipX=(xLeftOffset, xLeftOffset + plotWidth-2) + width=self.SIGNIFICANT_WIDTH * zoom + # , clipX=(xLeftOffset, xLeftOffset + plotWidth-2) ) - sugg_coords = "%d, %d, %d, %d" % (start_pos_x, suggestiveY-2, rightEdge + 2*zoom, suggestiveY+2) - sig_coords = "%d, %d, %d, %d" % (start_pos_x, significantY-2, rightEdge + 2*zoom, significantY+2) + sugg_coords = "%d, %d, %d, %d" % ( + start_pos_x, suggestiveY - 2, rightEdge + 2 * zoom, suggestiveY + 2) + sig_coords = "%d, %d, %d, %d" % ( + start_pos_x, significantY - 2, rightEdge + 2 * zoom, significantY + 2) if self.LRS_LOD == 'LRS': sugg_title = "Suggestive LRS = %0.2f" % self.suggestive sig_title = "Significant LRS = %0.2f" % self.significant else: - sugg_title = "Suggestive LOD = %0.2f" % (self.suggestive/4.61) - sig_title = "Significant LOD = %0.2f" % (self.significant/4.61) + sugg_title = "Suggestive LOD = %0.2f" % ( + self.suggestive / 4.61) + sig_title = "Significant LOD = %0.2f" % ( + self.significant / 4.61) Areas1 = HtmlGenWrapper.create_area_tag( shape='rect', coords=sugg_coords, @@ -2279,24 +2419,28 @@ class DisplayMappingResults: gifmap.append(Areas1) gifmap.append(Areas2) - start_pos_x += (chr_length_dist+self.GraphInterval)*plotXScale + start_pos_x += (chr_length_dist + \ + self.GraphInterval) * plotXScale return start_pos_x for i, _chr in enumerate(self.genotype): if self.selectedChr != -1: if _chr.name == self.ChrList[self.selectedChr][0]: - startPosX = add_suggestive_significant_lines_and_legend(startPosX, self.ChrLengthDistList[0]) + startPosX = add_suggestive_significant_lines_and_legend( + startPosX, self.ChrLengthDistList[0]) break else: continue else: - startPosX = add_suggestive_significant_lines_and_legend(startPosX, self.ChrLengthDistList[i]) + startPosX = add_suggestive_significant_lines_and_legend( + startPosX, self.ChrLengthDistList[i]) if self.multipleInterval: lrsEdgeWidth = 1 else: if self.additiveChecked: - additiveMax = max([abs(X['additive']) for X in self.qtlresults]) + additiveMax = max([abs(X['additive']) + for X in self.qtlresults]) lrsEdgeWidth = 3 if zoom == 2: @@ -2306,7 +2450,8 @@ class DisplayMappingResults: AdditiveCoordXY = [] DominanceCoordXY = [] - symbolFont = ImageFont.truetype(font=FNT_BS_FILE, size=5) #ZS: For Manhattan Plot + symbolFont = ImageFont.truetype( + font=FNT_BS_FILE, size=5) # ZS: For Manhattan Plot previous_chr = 1 previous_chr_as_int = 0 @@ -2332,128 +2477,142 @@ class DisplayMappingResults: minusColor = self.ADDITIVE_COLOR_NEGATIVE for k, aPoint in enumerate(AdditiveCoordXY): if k > 0: - Xc0, Yc0 = AdditiveCoordXY[k-1] + Xc0, Yc0 = AdditiveCoordXY[k - 1] Xc, Yc = aPoint - if (Yc0-yZero)*(Yc-yZero) < 0: - if Xc == Xc0: #genotype , locus distance is 0 + if (Yc0 - yZero) * (Yc - yZero) < 0: + if Xc == Xc0: # genotype , locus distance is 0 Xcm = Xc else: - Xcm = (yZero-Yc0)/((Yc-Yc0)/(Xc-Xc0)) +Xc0 + Xcm = (yZero - Yc0) / \ + ((Yc - Yc0) / (Xc - Xc0)) + Xc0 if Yc0 < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xcm, yZero)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( - xy=((Xcm, yZero), (Xc, yZero-(Yc-yZero))), + xy=((Xcm, yZero), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero-(Yc0-yZero)), + xy=((Xc0, yZero - (Yc0 - yZero)), (Xcm, yZero)), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( xy=((Xcm, yZero), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) - elif (Yc0-yZero)*(Yc-yZero) > 0: + elif (Yc0 - yZero) * (Yc - yZero) > 0: if Yc < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), - (Xc, yZero - (Yc-yZero))), + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: - minYc = min(Yc-yZero, Yc0-yZero) + minYc = min(Yc - yZero, Yc0 - yZero) if minYc < 0: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), - (Xc, yZero - (Yc-yZero))), + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) LRSCoordXY = [] AdditiveCoordXY = [] previous_chr = qtlresult['chr'] previous_chr_as_int += 1 - newStartPosX = (self.ChrLengthDistList[previous_chr_as_int - 1]+self.GraphInterval)*plotXScale + newStartPosX = ( + self.ChrLengthDistList[previous_chr_as_int - 1] + self.GraphInterval) * plotXScale if newStartPosX != oldStartPosX: startPosX += newStartPosX oldStartPosX = newStartPosX - #ZS: This is because the chromosome value stored in qtlresult['chr'] can be (for example) either X or 20 depending upon the mapping method/scale used + # ZS: This is because the chromosome value stored in qtlresult['chr'] can be (for example) either X or 20 depending upon the mapping method/scale used this_chr = str(self.ChrList[self.selectedChr][0]) if self.plotScale != "physic": - this_chr = str(self.ChrList[self.selectedChr][1]+1) + this_chr = str(self.ChrList[self.selectedChr][1] + 1) if self.selectedChr == -1 or str(qtlresult['chr']) == this_chr: if self.plotScale != "physic" and self.mapping_method == "reaper" and not self.manhattan_plot: - Xc = startPosX + (qtlresult['cM']-startMb)*plotXScale + Xc = startPosX + (qtlresult['cM'] - startMb) * plotXScale if hasattr(self.genotype, "filler"): if self.genotype.filler: if self.selectedChr != -1: start_cm = self.genotype[self.selectedChr - 1][0].cM - Xc = startPosX + (qtlresult['Mb'] - start_cm)*plotXScale + Xc = startPosX + \ + (qtlresult['Mb'] - start_cm) * plotXScale else: start_cm = self.genotype[previous_chr_as_int][0].cM - Xc = startPosX + ((qtlresult['Mb']-start_cm-startMb)*plotXScale)*(((qtlresult['Mb']-start_cm-startMb)*plotXScale)/((qtlresult['Mb']-start_cm-startMb+self.GraphInterval)*plotXScale)) + Xc = startPosX + ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) * ( + ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) / ((qtlresult['Mb'] - start_cm - startMb + self.GraphInterval) * plotXScale)) else: - Xc = startPosX + (qtlresult['Mb']-startMb)*plotXScale + Xc = startPosX + (qtlresult['Mb'] - startMb) * plotXScale # updated by NL 06-18-2011: # fix the over limit LRS graph issue since genotype trait may give infinite LRS; # for any lrs is over than 460(LRS max in this system), it will be reset to 460 - yLRS = yZero - (item/LRS_LOD_Max) * LRSHeightThresh - + yLRS = yZero - (item / LRS_LOD_Max) * LRSHeightThresh if 'lrs_value' in qtlresult: if self.LRS_LOD == "LOD" or self.LRS_LOD == "-logP": - if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value']=='inf': + if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value'] == 'inf': #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) - Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRS_LOD_Max*self.LODFACTOR) + Yc = yZero - webqtlConfig.MAXLRS * \ + LRSHeightThresh / \ + (LRS_LOD_Max * self.LODFACTOR) else: #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) - Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRS_LOD_Max*self.LODFACTOR) + Yc = yZero - \ + qtlresult['lrs_value'] * LRSHeightThresh / \ + (LRS_LOD_Max * self.LODFACTOR) else: - if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value']=='inf': + if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value'] == 'inf': #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRS_LOD_Max + Yc = yZero - webqtlConfig.MAXLRS * LRSHeightThresh / LRS_LOD_Max else: #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRS_LOD_Max + Yc = yZero - \ + qtlresult['lrs_value'] * \ + LRSHeightThresh / LRS_LOD_Max else: - if qtlresult['lod_score'] > 100 or qtlresult['lod_score']=='inf': + if qtlresult['lod_score'] > 100 or qtlresult['lod_score'] == 'inf': #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRS_LOD_Max + Yc = yZero - webqtlConfig.MAXLRS * LRSHeightThresh / LRS_LOD_Max else: if self.LRS_LOD == "LRS": #Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRS_LOD_Max + Yc = yZero - \ + qtlresult['lod_score'] * self.LODFACTOR * \ + LRSHeightThresh / LRS_LOD_Max else: #Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRS_LOD_Max + Yc = yZero - \ + qtlresult['lod_score'] * \ + LRSHeightThresh / LRS_LOD_Max if self.manhattan_plot == True: if self.color_scheme == "single": @@ -2469,17 +2628,19 @@ class DisplayMappingResults: im_drawer.text( text="5", xy=( - Xc-im_drawer.textsize("5", font=symbolFont)[0]/2+1, - Yc-4), + Xc - im_drawer.textsize("5", + font=symbolFont)[0] / 2 + 1, + Yc - 4), fill=point_color, font=symbolFont) else: LRSCoordXY.append((Xc, Yc)) if not self.multipleInterval and self.additiveChecked: - if additiveMax == 0.0: - additiveMax = 0.000001 - Yc = yZero - qtlresult['additive']*AdditiveHeightThresh/additiveMax - AdditiveCoordXY.append((Xc, Yc)) + if additiveMax == 0.0: + additiveMax = 0.000001 + Yc = yZero - qtlresult['additive'] * \ + AdditiveHeightThresh / additiveMax + AdditiveCoordXY.append((Xc, Yc)) m += 1 @@ -2497,64 +2658,65 @@ class DisplayMappingResults: minusColor = self.ADDITIVE_COLOR_NEGATIVE for k, aPoint in enumerate(AdditiveCoordXY): if k > 0: - Xc0, Yc0 = AdditiveCoordXY[k-1] + Xc0, Yc0 = AdditiveCoordXY[k - 1] Xc, Yc = aPoint - if (Yc0-yZero)*(Yc-yZero) < 0: - if Xc == Xc0: #genotype , locus distance is 0 + if (Yc0 - yZero) * (Yc - yZero) < 0: + if Xc == Xc0: # genotype , locus distance is 0 Xcm = Xc else: - Xcm = (yZero-Yc0)/((Yc-Yc0)/(Xc-Xc0)) +Xc0 + Xcm = (yZero - Yc0) / \ + ((Yc - Yc0) / (Xc - Xc0)) + Xc0 if Yc0 < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xcm, yZero)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( - xy=((Xcm, yZero), (Xc, yZero-(Yc-yZero))), + xy=((Xcm, yZero), (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), + xy=((Xc0, yZero - (Yc0 - yZero)), (Xcm, yZero)), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( xy=((Xcm, yZero), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) - elif (Yc0-yZero)*(Yc-yZero) > 0: + elif (Yc0 - yZero) * (Yc - yZero) > 0: if Yc < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero-(Yc0-yZero)), - (Xc, yZero-(Yc-yZero))), + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: - minYc = min(Yc-yZero, Yc0-yZero) + minYc = min(Yc - yZero, Yc0 - yZero) if minYc < 0: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), - (Xc, yZero - (Yc-yZero))), + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) if not self.multipleInterval and INTERCROSS and self.dominanceChecked: @@ -2562,114 +2724,116 @@ class DisplayMappingResults: minusColor = self.DOMINANCE_COLOR_NEGATIVE for k, aPoint in enumerate(DominanceCoordXY): if k > 0: - Xc0, Yc0 = DominanceCoordXY[k-1] + Xc0, Yc0 = DominanceCoordXY[k - 1] Xc, Yc = aPoint - if (Yc0-yZero)*(Yc-yZero) < 0: - if Xc == Xc0: #genotype , locus distance is 0 + if (Yc0 - yZero) * (Yc - yZero) < 0: + if Xc == Xc0: # genotype , locus distance is 0 Xcm = Xc else: - Xcm = (yZero-Yc0)/((Yc-Yc0)/(Xc-Xc0)) +Xc0 + Xcm = (yZero - Yc0) / \ + ((Yc - Yc0) / (Xc - Xc0)) + Xc0 if Yc0 < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xcm, yZero)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( - xy=((Xcm, yZero), (Xc, yZero-(Yc-yZero))), + xy=((Xcm, yZero), (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), (Xcm, yZero)), + xy=((Xc0, yZero - (Yc0 - yZero)), (Xcm, yZero)), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( xy=((Xcm, yZero), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) - elif (Yc0-yZero)*(Yc-yZero) > 0: + elif (Yc0 - yZero) * (Yc - yZero) > 0: if Yc < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), - (Xc, yZero - (Yc-yZero))), + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: - minYc = min(Yc-yZero, Yc0-yZero) + minYc = min(Yc - yZero, Yc0 - yZero) if minYc < 0: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), - (Xc, yZero - (Yc-yZero))), fill=minusColor, + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) - - ###draw additive scale + # draw additive scale if not self.multipleInterval and self.additiveChecked: - additiveScaleFont=ImageFont.truetype(font=VERDANA_FILE, size=16*zoom) + additiveScaleFont = ImageFont.truetype( + font=VERDANA_FILE, size=16 * zoom) additiveScale = Plot.detScaleOld(0, additiveMax) - additiveStep = (additiveScale[1]-additiveScale[0])/additiveScale[2] + additiveStep = (additiveScale[1] - \ + additiveScale[0]) / additiveScale[2] additiveAxisList = Plot.frange(0, additiveScale[1], additiveStep) - addPlotScale = AdditiveHeightThresh/additiveMax + addPlotScale = AdditiveHeightThresh / additiveMax TEXT_Y_DISPLACEMENT = -8 additiveAxisList.append(additiveScale[1]) for item in additiveAxisList: - additiveY = yZero - item*addPlotScale + additiveY = yZero - item * addPlotScale im_drawer.line( xy=((xLeftOffset + plotWidth, additiveY), - (xLeftOffset+4+ plotWidth, additiveY)), - fill=self.ADDITIVE_COLOR_POSITIVE, width=1*zoom) + (xLeftOffset + 4 + plotWidth, additiveY)), + fill=self.ADDITIVE_COLOR_POSITIVE, width=1 * zoom) scaleStr = "%2.3f" % item im_drawer.text( text=scaleStr, - xy=(xLeftOffset + plotWidth +6, additiveY+TEXT_Y_DISPLACEMENT), + xy=(xLeftOffset + plotWidth + 6, + additiveY + TEXT_Y_DISPLACEMENT), font=additiveScaleFont, fill=self.ADDITIVE_COLOR_POSITIVE) im_drawer.line( - xy=((xLeftOffset+plotWidth, additiveY), - (xLeftOffset+plotWidth, yZero)), - fill=self.ADDITIVE_COLOR_POSITIVE, width=1*zoom) + xy=((xLeftOffset + plotWidth, additiveY), + (xLeftOffset + plotWidth, yZero)), + fill=self.ADDITIVE_COLOR_POSITIVE, width=1 * zoom) im_drawer.line( - xy=((xLeftOffset, yZero), (xLeftOffset, yTopOffset + 30*(zoom - 1))), - fill=self.LRS_COLOR, width=1*zoom) #the blue line running up the y axis - - - def drawGraphBackground(self, canvas, gifmap, offset= (80, 120, 80, 50), zoom = 1, startMb = None, endMb = None): - ##conditions - ##multiple Chromosome view - ##single Chromosome Physical - ##single Chromosome Genetic + xy=((xLeftOffset, yZero), (xLeftOffset, yTopOffset + 30 * (zoom - 1))), + fill=self.LRS_COLOR, width=1 * zoom) # the blue line running up the y axis + + def drawGraphBackground(self, canvas, gifmap, offset=(80, 120, 80, 50), zoom=1, startMb=None, endMb=None): + # conditions + # multiple Chromosome view + # single Chromosome Physical + # single Chromosome Genetic im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset plotHeight = canvas.size[1] - yTopOffset - yBottomOffset - yBottom = yTopOffset+plotHeight + yBottom = yTopOffset + plotHeight fontZoom = zoom if zoom == 2: fontZoom = 1.5 yTopOffset += 30 - #calculate plot scale + # calculate plot scale if self.plotScale != 'physic': self.ChrLengthDistList = self.ChrLengthCMList drawRegionDistance = self.ChrLengthCMSum @@ -2677,10 +2841,10 @@ class DisplayMappingResults: self.ChrLengthDistList = self.ChrLengthMbList drawRegionDistance = self.ChrLengthMbSum - if self.selectedChr > -1: #single chromosome view - spacingAmt = plotWidth/13.5 + if self.selectedChr > -1: # single chromosome view + spacingAmt = plotWidth / 13.5 i = 0 - for startPix in Plot.frange(xLeftOffset, xLeftOffset+plotWidth, spacingAmt): + for startPix in Plot.frange(xLeftOffset, xLeftOffset + plotWidth, spacingAmt): if (i % 2 == 0): theBackColor = self.GRAPH_BACK_DARK_COLOR else: @@ -2688,25 +2852,27 @@ class DisplayMappingResults: i += 1 im_drawer.rectangle( [(startPix, yTopOffset), - (min(startPix+spacingAmt, xLeftOffset+plotWidth), yBottom)], + (min(startPix + spacingAmt, xLeftOffset + plotWidth), yBottom)], outline=theBackColor, fill=theBackColor) drawRegionDistance = self.ChrLengthDistList[self.ChrList[self.selectedChr][1]] self.ChrLengthDistList = [drawRegionDistance] if self.plotScale == 'physic': - plotXScale = plotWidth / (endMb-startMb) + plotXScale = plotWidth / (endMb - startMb) else: plotXScale = plotWidth / drawRegionDistance - else: #multiple chromosome view - plotXScale = plotWidth / ((len(self.genotype)-1)*self.GraphInterval + drawRegionDistance) + else: # multiple chromosome view + plotXScale = plotWidth / \ + ((len(self.genotype) - 1) * self.GraphInterval + drawRegionDistance) startPosX = xLeftOffset if fontZoom == 1.5: chrFontZoom = 2 else: chrFontZoom = 1 - chrLabelFont=ImageFont.truetype(font=VERDANA_FILE, size=24*chrFontZoom) + chrLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=24 * chrFontZoom) for i, _chr in enumerate(self.genotype): if (i % 2 == 0): @@ -2714,23 +2880,27 @@ class DisplayMappingResults: else: theBackColor = self.GRAPH_BACK_LIGHT_COLOR - #draw the shaded boxes and the sig/sug thick lines + # draw the shaded boxes and the sig/sug thick lines im_drawer.rectangle( ((startPosX, yTopOffset), - (startPosX + self.ChrLengthDistList[i]*plotXScale, yBottom)), + (startPosX + self.ChrLengthDistList[i] * plotXScale, yBottom)), outline=GAINSBORO, fill=theBackColor) - chrNameWidth, chrNameHeight = im_drawer.textsize(_chr.name, font=chrLabelFont) - chrStartPix = startPosX + (self.ChrLengthDistList[i]*plotXScale -chrNameWidth)/2 - chrEndPix = startPosX + (self.ChrLengthDistList[i]*plotXScale +chrNameWidth)/2 + chrNameWidth, chrNameHeight = im_drawer.textsize( + _chr.name, font=chrLabelFont) + chrStartPix = startPosX + \ + (self.ChrLengthDistList[i] * plotXScale - chrNameWidth) / 2 + chrEndPix = startPosX + \ + (self.ChrLengthDistList[i] * plotXScale + chrNameWidth) / 2 TEXT_Y_DISPLACEMENT = 0 im_drawer.text(xy=(chrStartPix, yTopOffset + TEXT_Y_DISPLACEMENT), text=_chr.name, font=chrLabelFont, fill=BLACK) - COORDS = "%d,%d,%d,%d" %(chrStartPix, yTopOffset, chrEndPix, yTopOffset +20) + COORDS = "%d,%d,%d,%d" % ( + chrStartPix, yTopOffset, chrEndPix, yTopOffset + 20) - #add by NL 09-03-2010 + # add by NL 09-03-2010 HREF = "javascript:chrView(%d,%s);" % (i, self.ChrLengthMbList) #HREF = "javascript:changeView(%d,%s);" % (i,self.ChrLengthMbList) Areas = HtmlGenWrapper.create_area_tag( @@ -2738,7 +2908,8 @@ class DisplayMappingResults: coords=COORDS, href=HREF) gifmap.append(Areas) - startPosX += (self.ChrLengthDistList[i]+self.GraphInterval)*plotXScale + startPosX += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale return plotXScale @@ -2748,15 +2919,16 @@ class DisplayMappingResults: ######################################### myCanvas = Image.new("RGBA", size=(500, 300)) if 'lod_score' in self.qtlresults[0] and self.LRS_LOD == "LRS": - perm_output = [value*4.61 for value in self.perm_output] + perm_output = [value * 4.61 for value in self.perm_output] elif 'lod_score' not in self.qtlresults[0] and self.LRS_LOD == "LOD": - perm_output = [value/4.61 for value in self.perm_output] + perm_output = [value / 4.61 for value in self.perm_output] else: perm_output = self.perm_output - filename= webqtlUtil.genRandStr("Reg_") - Plot.plotBar(myCanvas, perm_output, XLabel=self.LRS_LOD, YLabel='Frequency', title=' Histogram of Permutation Test') - myCanvas.save("{}.gif".format(GENERATED_IMAGE_DIR+filename), + filename = webqtlUtil.genRandStr("Reg_") + Plot.plotBar(myCanvas, perm_output, XLabel=self.LRS_LOD, + YLabel='Frequency', title=' Histogram of Permutation Test') + myCanvas.save("{}.gif".format(GENERATED_IMAGE_DIR + filename), format='gif') return filename @@ -2775,16 +2947,16 @@ class DisplayMappingResults: if self.dataset.group.species == "mouse": if refGene: gene_table_header_list = ["Index", - "Symbol", - "Mb Start", - "Length (Kb)", - "SNP Count", - "SNP Density", - "Avg Expr", - "Human Chr", - "Mb Start (hg19)", - "Literature Correlation", - "Gene Description"] + "Symbol", + "Mb Start", + "Length (Kb)", + "SNP Count", + "SNP Density", + "Avg Expr", + "Human Chr", + "Mb Start (hg19)", + "Literature Correlation", + "Gene Description"] else: gene_table_header_list = ["", "Index", @@ -2821,20 +2993,21 @@ class DisplayMappingResults: tableIterationsCnt = tableIterationsCnt + 1 - this_row = [] #container for the cells of each row + this_row = [] # container for the cells of each row selectCheck = HtmlGenWrapper.create_input_tag( type_="checkbox", name="selectCheck", value=theGO["GeneSymbol"], Class="checkbox trait_checkbox") # checkbox for each row - geneLength = (theGO["TxEnd"] - theGO["TxStart"])*1000.0 - tenPercentLength = geneLength*0.0001 + geneLength = (theGO["TxEnd"] - theGO["TxStart"]) * 1000.0 + tenPercentLength = geneLength * 0.0001 txStart = theGO["TxStart"] txEnd = theGO["TxEnd"] - theGO["snpDensity"] = theGO["snpCount"]/geneLength + theGO["snpDensity"] = theGO["snpCount"] / geneLength if self.ALEX_DEBUG_BOOL_PRINT_GENE_LIST: - geneIdString = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s' % theGO["GeneID"] + geneIdString = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s' % theGO[ + "GeneID"] if theGO["snpCount"]: snpString = HT.Link( @@ -2844,16 +3017,18 @@ class DisplayMappingResults: f"end={theGO['TxEnd']}&" f"geneName={theGO['GeneSymbol']}&" f"s1={self.diffCol[0]}&s2=%d"), - str(theGO["snpCount"]) # The text to display + str(theGO["snpCount"]) # The text to display ) snpString.set_blank_target() snpString.set_attribute("class", "normalsize") else: snpString = 0 - mouseStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Mouse&db=mm9&position=chr" + theGO["Chromosome"] + "%3A" + str(int(theGO["TxStart"] * 1000000.0)) + "-" + str(int(theGO["TxEnd"]*1000000.0)) +"&pix=620&Submit=submit" + mouseStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Mouse&db=mm9&position=chr" + \ + theGO["Chromosome"] + "%3A" + str(int(theGO["TxStart"] * 1000000.0)) + "-" + str( + int(theGO["TxEnd"] * 1000000.0)) + "&pix=620&Submit=submit" - #the chromosomes for human 1 are 1qXX.XX + # the chromosomes for human 1 are 1qXX.XX if theGO['humanGene']: if theGO['humanGene']["TxStart"] == '': humanStartDisplay = "" @@ -2863,20 +3038,21 @@ class DisplayMappingResults: humanChr = theGO['humanGene']["Chromosome"] humanTxStart = theGO['humanGene']["TxStart"] - humanStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Human&db=hg17&position=chr%s:%d-%d" % (humanChr, int(1000000*theGO['humanGene']["TxStart"]), int(1000000*theGO['humanGene']["TxEnd"])) + humanStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Human&db=hg17&position=chr%s:%d-%d" % ( + humanChr, int(1000000 * theGO['humanGene']["TxStart"]), int(1000000 * theGO['humanGene']["TxEnd"])) else: humanStartString = humanChr = humanStartDisplay = "--" geneDescription = theGO["GeneDescription"] if len(geneDescription) > 70: - geneDescription = geneDescription[:70]+"..." + geneDescription = geneDescription[:70] + "..." if theGO["snpDensity"] < 0.000001: snpDensityStr = "0" else: snpDensityStr = "%0.6f" % theGO["snpDensity"] - avgExpr = [] #theGO["avgExprVal"] + avgExpr = [] # theGO["avgExprVal"] if avgExpr in ([], None): avgExpr = "--" else: @@ -2888,7 +3064,8 @@ class DisplayMappingResults: else: chr_as_int = int(theGO["Chromosome"]) - 1 if refGene: - literatureCorrelationString = str(self.getLiteratureCorrelation(self.cursor, refGene, theGO['GeneID']) or "N/A") + literatureCorrelationString = str(self.getLiteratureCorrelation( + self.cursor, refGene, theGO['GeneID']) or "N/A") this_row = [selectCheck.__str__(), str(tableIterationsCnt), @@ -2896,17 +3073,17 @@ class DisplayMappingResults: geneIdString, theGO["GeneSymbol"], target="_blank") - ), + ), str(HtmlGenWrapper.create_link_tag( mouseStartString, "{:.6f}".format(txStart), target="_blank") - ), + ), str(HtmlGenWrapper.create_link_tag( "javascript:rangeView('{}', {:f}, {:f})".format( str(chr_as_int), - txStart-tenPercentLength, - txEnd+tenPercentLength), + txStart - tenPercentLength, + txEnd + tenPercentLength), "{:.3f}".format(geneLength))), snpString, snpDensityStr, @@ -2931,8 +3108,8 @@ class DisplayMappingResults: str(HtmlGenWrapper.create_link_tag( "javascript:rangeView('{}', {:f}, {:f})".format( str(chr_as_int), - txStart-tenPercentLength, - txEnd+tenPercentLength), + txStart - tenPercentLength, + txEnd + tenPercentLength), "{:.3f}".format(geneLength))), snpString, snpDensityStr, @@ -2956,7 +3133,8 @@ class DisplayMappingResults: if theGO["GeneID"] != "": geneSymbolNCBI = str(HtmlGenWrapper.create_link_tag( - "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids={}".format(theGO["GeneID"]), + "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids={}".format( + theGO["GeneID"]), theGO["GeneSymbol"], Class="normalsize", target="_blank")) @@ -2969,7 +3147,8 @@ class DisplayMappingResults: chr_as_int = int(theGO["Chromosome"]) - 1 geneLength = (float(theGO["TxEnd"]) - float(theGO["TxStart"])) - geneLengthURL = "javascript:rangeView('%s', %f, %f)" % (theGO["Chromosome"], float(theGO["TxStart"])-(geneLength*0.1), float(theGO["TxEnd"])+(geneLength*0.1)) + geneLengthURL = "javascript:rangeView('%s', %f, %f)" % (theGO["Chromosome"], float( + theGO["TxStart"]) - (geneLength * 0.1), float(theGO["TxEnd"]) + (geneLength * 0.1)) avgExprVal = [] if avgExprVal != "" and avgExprVal: @@ -2977,14 +3156,14 @@ class DisplayMappingResults: else: avgExprVal = "" - #Mouse Gene + # Mouse Gene if theGO['mouseGene']: mouseChr = theGO['mouseGene']["Chromosome"] mouseTxStart = "%0.6f" % theGO['mouseGene']["TxStart"] else: mouseChr = mouseTxStart = "" - #the chromosomes for human 1 are 1qXX.XX + # the chromosomes for human 1 are 1qXX.XX if theGO['humanGene']: humanChr = theGO['humanGene']["Chromosome"] humanTxStart = "%0.6f" % theGO['humanGene']["TxStart"] @@ -2996,12 +3175,12 @@ class DisplayMappingResults: geneDesc = "" this_row = [selectCheck.__str__(), - str(gIndex+1), + str(gIndex + 1), geneSymbolNCBI, "%0.6f" % theGO["TxStart"], str(HtmlGenWrapper.create_link_tag( geneLengthURL, - "{:.3f}".format(geneLength*1000.0))), + "{:.3f}".format(geneLength * 1000.0))), avgExprVal, mouseChr, mouseTxStart, @@ -3013,7 +3192,7 @@ class DisplayMappingResults: return gene_table_body - def getLiteratureCorrelation(cursor,geneId1=None,geneId2=None): + def getLiteratureCorrelation(cursor, geneId1=None, geneId2=None): if not geneId1 or not geneId2: return None if geneId1 == geneId2: @@ -3025,9 +3204,10 @@ class DisplayMappingResults: query = 'SELECT Value FROM LCorrRamin3 WHERE GeneId1 = %s and GeneId2 = %s' for x, y in [(geneId1, geneId2), (geneId2, geneId1)]: cursor.execute(query, (x, y)) - lCorr = cursor.fetchone() + lCorr = cursor.fetchone() if lCorr: lCorr = lCorr[0] break - except: raise #lCorr = None + except: + raise # lCorr = None return lCorr diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py index 06c9300a..f88c5ac8 100644 --- a/wqflask/wqflask/marker_regression/gemma_mapping.py +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -149,7 +149,8 @@ def gen_covariates_file(this_dataset, covariates, samples): dataset_name = covariate.split(":")[1] if dataset_name == "Temp": temp_group = trait_name.split("_")[2] - dataset_ob = create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = temp_group) + dataset_ob = create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=temp_group) else: dataset_ob = create_dataset(covariate.split(":")[1]) trait_ob = create_trait(dataset=dataset_ob, diff --git a/wqflask/wqflask/marker_regression/plink_mapping.py b/wqflask/wqflask/marker_regression/plink_mapping.py index 5d675c38..2fa80841 100644 --- a/wqflask/wqflask/marker_regression/plink_mapping.py +++ b/wqflask/wqflask/marker_regression/plink_mapping.py @@ -6,13 +6,14 @@ from utility import webqtlUtil from utility.tools import flat_files, PLINK_COMMAND import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + def run_plink(this_trait, dataset, species, vals, maf): - plink_output_filename = webqtlUtil.genRandStr(f"{dataset.group.name}_{this_trait.name}_") + plink_output_filename = webqtlUtil.genRandStr( + f"{dataset.group.name}_{this_trait.name}_") gen_pheno_txt_file(dataset, vals) - plink_command = f"{PLINK_COMMAND} --noweb --bfile {flat_files('mapping')}/{dataset.group.name} --no-pheno --no-fid --no-parents --no-sex --maf {maf} --out { TMPDIR}{plink_output_filename} --assoc " logger.debug("plink_command:", plink_command) @@ -25,6 +26,7 @@ def run_plink(this_trait, dataset, species, vals, maf): return dataset.group.markers.markers + def gen_pheno_txt_file(this_dataset, vals): """Generates phenotype file for GEMMA/PLINK""" @@ -34,15 +36,17 @@ def gen_pheno_txt_file(this_dataset, vals): split_line = line.split() current_file_data.append(split_line) - with open(f"{flat_files('mapping')}/{this_dataset.group.name}.fam","w") as outfile: + with open(f"{flat_files('mapping')}/{this_dataset.group.name}.fam", "w") as outfile: for i, line in enumerate(current_file_data): if vals[i] == "x": this_val = -9 else: this_val = vals[i] - outfile.write("0 " + line[1] + " " + line[2] + " " + line[3] + " " + line[4] + " " + str(this_val) + "\n") + outfile.write("0 " + line[1] + " " + line[2] + " " + \ + line[3] + " " + line[4] + " " + str(this_val) + "\n") + -def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''): +def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename=''): ped_sample_list = get_samples_from_ped_file(dataset) output_file = open(f"{TMPDIR}{pheno_filename}.txt", "wb") header = f"FID\tIID\t{this_trait.name}\n" @@ -50,7 +54,7 @@ def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''): new_value_list = [] - #if valueDict does not include some strain, value will be set to -9999 as missing value + # if valueDict does not include some strain, value will be set to -9999 as missing value for i, sample in enumerate(ped_sample_list): try: value = vals[i] @@ -63,11 +67,11 @@ def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''): new_line = '' for i, sample in enumerate(ped_sample_list): - j = i+1 + j = i + 1 value = new_value_list[i] new_line += f"{sample}\t{sample}\t{value}\n" - if j%1000 == 0: + if j % 1000 == 0: output_file.write(newLine) new_line = '' @@ -77,10 +81,12 @@ def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''): output_file.close() # get strain name from ped file in order + + def get_samples_from_ped_file(dataset): - ped_file= open(f"{flat_files('mapping')}{dataset.group.name}.ped","r") + ped_file = open(f"{flat_files('mapping')}{dataset.group.name}.ped", "r") line = ped_file.readline() - sample_list=[] + sample_list = [] while line: lineList = line.strip().split('\t') @@ -93,25 +99,26 @@ def get_samples_from_ped_file(dataset): return sample_list + def parse_plink_output(output_filename, species): - plink_results={} + plink_results = {} threshold_p_value = 1 - result_fp = open(f"{TMPDIR}{output_filename}.qassoc","rb") + result_fp = open(f"{TMPDIR}{output_filename}.qassoc", "rb") line = result_fp.readline() - value_list = [] # initialize value list, this list will include snp, bp and pvalue info + value_list = [] # initialize value list, this list will include snp, bp and pvalue info p_value_dict = {} count = 0 while line: - #convert line from str to list + # convert line from str to list line_list = build_line_list(line=line) # only keep the records whose chromosome name is in db - if int(line_list[0]) in species.chromosomes.chromosomes and line_list[-1] and line_list[-1].strip()!='NA': + if int(line_list[0]) in species.chromosomes.chromosomes and line_list[-1] and line_list[-1].strip() != 'NA': chr_name = species.chromosomes.chromosomes[int(line_list[0])] snp = line_list[1] @@ -125,7 +132,7 @@ def parse_plink_output(output_filename, species): value_list = plink_results[chr_name] # pvalue range is [0,1] - if threshold_p_value >=0 and threshold_p_value <= 1: + if threshold_p_value >= 0 and threshold_p_value <= 1: if p_value < threshold_p_value: value_list.append((snp, BP, p_value)) count += 1 @@ -141,7 +148,7 @@ def parse_plink_output(output_filename, species): if value_list: plink_results[chr_name] = value_list - value_list=[] + value_list = [] line = result_fp.readline() else: @@ -154,9 +161,12 @@ def parse_plink_output(output_filename, species): # function: convert line from str to list; # output: lineList list ####################################################### + + def build_line_list(line=""): - line_list = line.strip().split(' ')# irregular number of whitespaces between columns - line_list = [item for item in line_list if item !=''] + # irregular number of whitespaces between columns + line_list = line.strip().split(' ') + line_list = [item for item in line_list if item != ''] line_list = [item.strip() for item in line_list] return line_list diff --git a/wqflask/wqflask/marker_regression/qtlreaper_mapping.py b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py index 8341ee55..4d6715ba 100644 --- a/wqflask/wqflask/marker_regression/qtlreaper_mapping.py +++ b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py @@ -1,4 +1,9 @@ -import os, math, string, random, json, re +import os +import math +import string +import random +import json +import re from base import webqtlConfig from base.trait import GeneralTrait @@ -6,7 +11,8 @@ from base.data_set import create_dataset from utility.tools import flat_files, REAPER_COMMAND, TEMPDIR import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boot_check, num_bootstrap, do_control, control_marker, manhattan_plot, first_run=True, output_files=None): """Generates p-values for each marker using qtlreaper""" @@ -17,66 +23,73 @@ def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boo else: genofile_name = this_dataset.group.name - trait_filename =f"{str(this_trait.name)}_{str(this_dataset.name)}_pheno" + trait_filename = f"{str(this_trait.name)}_{str(this_dataset.name)}_pheno" gen_pheno_txt_file(samples, vals, trait_filename) - output_filename = (f"{this_dataset.group.name}_GWA_"+ - ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - ) + output_filename = (f"{this_dataset.group.name}_GWA_" + + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + ) bootstrap_filename = None permu_filename = None opt_list = [] if boot_check and num_bootstrap > 0: - bootstrap_filename = (f"{this_dataset.group.name}_BOOTSTRAP_" + - ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - ) + bootstrap_filename = (f"{this_dataset.group.name}_BOOTSTRAP_" + + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + ) opt_list.append("-b") opt_list.append(f"--n_bootstrap {str(num_bootstrap)}") - opt_list.append(f"--bootstrap_output {webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt") + opt_list.append( + f"--bootstrap_output {webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt") if num_perm > 0: - permu_filename =("{this_dataset.group.name}_PERM_" + - ''.join(random.choice(string.ascii_uppercase + - string.digits) for _ in range(6)) - ) + permu_filename = ("{this_dataset.group.name}_PERM_" + + ''.join(random.choice(string.ascii_uppercase + + string.digits) for _ in range(6)) + ) opt_list.append("-n " + str(num_perm)) - opt_list.append("--permu_output " + webqtlConfig.GENERATED_IMAGE_DIR + permu_filename + ".txt") + opt_list.append( + "--permu_output " + webqtlConfig.GENERATED_IMAGE_DIR + permu_filename + ".txt") if control_marker != "" and do_control == "true": opt_list.append("-c " + control_marker) if manhattan_plot != True: opt_list.append("--interval 1") - reaper_command = (REAPER_COMMAND + - ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt {4} -o {5}{6}.txt'.format(flat_files('genotype'), + reaper_command = (REAPER_COMMAND + + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt {4} -o {5}{6}.txt'.format(flat_files('genotype'), - genofile_name, - TEMPDIR, - trait_filename, - " ".join(opt_list), - webqtlConfig.GENERATED_IMAGE_DIR, - output_filename)) + genofile_name, + TEMPDIR, + trait_filename, + " ".join( + opt_list), + webqtlConfig.GENERATED_IMAGE_DIR, + output_filename)) logger.debug("reaper_command:" + reaper_command) os.system(reaper_command) else: output_filename, permu_filename, bootstrap_filename = output_files - marker_obs, permu_vals, bootstrap_vals = parse_reaper_output(output_filename, permu_filename, bootstrap_filename) + marker_obs, permu_vals, bootstrap_vals = parse_reaper_output( + output_filename, permu_filename, bootstrap_filename) suggestive = 0 significant = 0 if len(permu_vals) > 0: - suggestive = permu_vals[int(num_perm*0.37-1)] - significant = permu_vals[int(num_perm*0.95-1)] + suggestive = permu_vals[int(num_perm * 0.37 - 1)] + significant = permu_vals[int(num_perm * 0.95 - 1)] + + return (marker_obs, permu_vals, suggestive, significant, bootstrap_vals, + [output_filename, permu_filename, bootstrap_filename]) - return (marker_obs, permu_vals, suggestive, significant, bootstrap_vals, - [output_filename, permu_filename, bootstrap_filename]) def gen_pheno_txt_file(samples, vals, trait_filename): """Generates phenotype file for GEMMA""" - with open(f"{TEMPDIR}/gn2/{trait_filename}.txt","w") as outfile: + with open(f"{TEMPDIR}/gn2/{trait_filename}.txt", "w") as outfile: outfile.write("Trait\t") filtered_sample_list = [] @@ -92,6 +105,7 @@ def gen_pheno_txt_file(samples, vals, trait_filename): values_string = "\t".join(filtered_vals_list) outfile.write(values_string) + def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): included_markers = [] p_values = [] @@ -121,7 +135,7 @@ def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): marker['cM'] = float(line.split("\t")[3]) else: if float(line.split("\t")[3]) > 1000: - marker['Mb'] = float(line.split("\t")[3])/1000000 + marker['Mb'] = float(line.split("\t")[3]) / 1000000 else: marker['Mb'] = float(line.split("\t")[3]) if float(line.split("\t")[6]) != 1: @@ -132,7 +146,7 @@ def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): else: marker['cM'] = float(line.split("\t")[3]) if float(line.split("\t")[4]) > 1000: - marker['Mb'] = float(line.split("\t")[4])/1000000 + marker['Mb'] = float(line.split("\t")[4]) / 1000000 else: marker['Mb'] = float(line.split("\t")[4]) if float(line.split("\t")[7]) != 1: @@ -142,7 +156,7 @@ def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): marker['additive'] = float(line.split("\t")[6]) marker_obs.append(marker) - #ZS: Results have to be reordered because the new reaper returns results sorted alphabetically by chr for some reason, resulting in chr 1 being followed by 10, etc + # ZS: Results have to be reordered because the new reaper returns results sorted alphabetically by chr for some reason, resulting in chr 1 being followed by 10, etc sorted_indices = natural_sort(marker_obs) permu_vals = [] @@ -163,6 +177,7 @@ def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): return marker_obs, permu_vals, bootstrap_vals + def run_original_reaper(this_trait, dataset, samples_before, trait_vals, json_data, num_perm, bootCheck, num_bootstrap, do_control, control_marker, manhattan_plot): genotype = dataset.group.read_genotype_file(use_reaper=True) @@ -185,18 +200,19 @@ def run_original_reaper(this_trait, dataset, samples_before, trait_vals, json_da suggestive = 0 significant = 0 else: - perm_output = genotype.permutation(strains = trimmed_samples, trait = trimmed_values, nperm=num_perm) - suggestive = perm_output[int(num_perm*0.37-1)] - significant = perm_output[int(num_perm*0.95-1)] - #highly_significant = perm_output[int(num_perm*0.99-1)] #ZS: Currently not used, but leaving it here just in case + perm_output = genotype.permutation( + strains=trimmed_samples, trait=trimmed_values, nperm=num_perm) + suggestive = perm_output[int(num_perm * 0.37 - 1)] + significant = perm_output[int(num_perm * 0.95 - 1)] + # highly_significant = perm_output[int(num_perm*0.99-1)] #ZS: Currently not used, but leaving it here just in case json_data['suggestive'] = suggestive json_data['significant'] = significant if control_marker != "" and do_control == "true": - reaper_results = genotype.regression(strains = trimmed_samples, - trait = trimmed_values, - control = str(control_marker)) + reaper_results = genotype.regression(strains=trimmed_samples, + trait=trimmed_values, + control=str(control_marker)) if bootCheck: control_geno = [] control_geno2 = [] @@ -215,31 +231,31 @@ def run_original_reaper(this_trait, dataset, samples_before, trait_vals, json_da _idx = _prgy.index(_strain) control_geno.append(control_geno2[_idx]) - bootstrap_results = genotype.bootstrap(strains = trimmed_samples, - trait = trimmed_values, - control = control_geno, - nboot = num_bootstrap) + bootstrap_results = genotype.bootstrap(strains=trimmed_samples, + trait=trimmed_values, + control=control_geno, + nboot=num_bootstrap) else: - reaper_results = genotype.regression(strains = trimmed_samples, - trait = trimmed_values) + reaper_results = genotype.regression(strains=trimmed_samples, + trait=trimmed_values) if bootCheck: - bootstrap_results = genotype.bootstrap(strains = trimmed_samples, - trait = trimmed_values, - nboot = num_bootstrap) + bootstrap_results = genotype.bootstrap(strains=trimmed_samples, + trait=trimmed_values, + nboot=num_bootstrap) json_data['chr'] = [] json_data['pos'] = [] json_data['lod.hk'] = [] json_data['markernames'] = [] - #if self.additive: + # if self.additive: # self.json_data['additive'] = [] - #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary + # Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary qtl_results = [] for qtl in reaper_results: reaper_locus = qtl.locus - #ZS: Convert chr to int + # ZS: Convert chr to int converted_chr = reaper_locus.chr if reaper_locus.chr != "X" and reaper_locus.chr != "X/Y": converted_chr = int(reaper_locus.chr) @@ -247,19 +263,22 @@ def run_original_reaper(this_trait, dataset, samples_before, trait_vals, json_da json_data['pos'].append(reaper_locus.Mb) json_data['lod.hk'].append(qtl.lrs) json_data['markernames'].append(reaper_locus.name) - #if self.additive: + # if self.additive: # self.json_data['additive'].append(qtl.additive) - locus = {"name":reaper_locus.name, "chr":reaper_locus.chr, "cM":reaper_locus.cM, "Mb":reaper_locus.Mb} - qtl = {"lrs_value": qtl.lrs, "chr":converted_chr, "Mb":reaper_locus.Mb, - "cM":reaper_locus.cM, "name":reaper_locus.name, "additive":qtl.additive, "dominance":qtl.dominance} + locus = {"name": reaper_locus.name, "chr": reaper_locus.chr, + "cM": reaper_locus.cM, "Mb": reaper_locus.Mb} + qtl = {"lrs_value": qtl.lrs, "chr": converted_chr, "Mb": reaper_locus.Mb, + "cM": reaper_locus.cM, "name": reaper_locus.name, "additive": qtl.additive, "dominance": qtl.dominance} qtl_results.append(qtl) return qtl_results, json_data, perm_output, suggestive, significant, bootstrap_results + def natural_sort(marker_list): """ Function to naturally sort numbers + strings, adopted from user Mark Byers here: https://stackoverflow.com/questions/4836710/does-python-have-a-built-in-function-for-string-natural-sort Changed to return indices instead of values, though, since the same reordering needs to be applied to bootstrap results """ convert = lambda text: int(text) if text.isdigit() else text.lower() - alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', str(marker_list[key]['chr'])) ] - return sorted(list(range(len(marker_list))), key = alphanum_key)
\ No newline at end of file + alphanum_key = lambda key: [convert(c) for c in re.split( + '([0-9]+)', str(marker_list[key]['chr']))] + return sorted(list(range(len(marker_list))), key=alphanum_key) diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index 4117a0e5..1fa3dffe 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -13,51 +13,54 @@ from utility.tools import locate, TEMPDIR from flask import g import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) # Get a trait's type (numeric, categorical, etc) from the DB + + def get_trait_data_type(trait_db_string): - logger.info("get_trait_data_type"); + logger.info("get_trait_data_type") the_query = "SELECT value FROM TraitMetadata WHERE type='trait_data_type'" - logger.info("the_query done"); + logger.info("the_query done") results_json = g.db.execute(the_query).fetchone() - logger.info("the_query executed"); + logger.info("the_query executed") results_ob = json.loads(results_json[0]) - logger.info("json results loaded"); + logger.info("json results loaded") if trait_db_string in results_ob: - logger.info("found"); + logger.info("found") return results_ob[trait_db_string] else: - logger.info("not found"); + logger.info("not found") return "numeric" # Run qtl mapping using R/qtl def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors): - logger.info("Start run_rqtl_geno"); - ## Get pointers to some common R functions - r_library = ro.r["library"] # Map the library function - r_c = ro.r["c"] # Map the c function - plot = ro.r["plot"] # Map the plot function - png = ro.r["png"] # Map the png function - dev_off = ro.r["dev.off"] # Map the device off function + logger.info("Start run_rqtl_geno") + # Get pointers to some common R functions + r_library = ro.r["library"] # Map the library function + r_c = ro.r["c"] # Map the c function + plot = ro.r["plot"] # Map the plot function + png = ro.r["png"] # Map the png function + dev_off = ro.r["dev.off"] # Map the device off function print((r_library("qtl"))) # Load R/qtl - logger.info("QTL library loaded"); + logger.info("QTL library loaded") - ## Get pointers to some R/qtl functions - scanone = ro.r["scanone"] # Map the scanone function - scantwo = ro.r["scantwo"] # Map the scantwo function - calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function + # Get pointers to some R/qtl functions + scanone = ro.r["scanone"] # Map the scanone function + scantwo = ro.r["scantwo"] # Map the scantwo function + # Map the calc.genoprob function + calc_genoprob = ro.r["calc.genoprob"] crossname = dataset.group.name - #try: + # try: # generate_cross_from_rdata(dataset) # read_cross_from_rdata = ro.r["generate_cross_from_rdata"] # Map the local read_cross_from_rdata function # genofilelocation = locate(crossname + ".RData", "genotype/rdata") # cross_object = read_cross_from_rdata(genofilelocation) # Map the local GENOtoCSVR function - #except: + # except: if mapping_scale == "morgan": scale_units = "cM" @@ -65,76 +68,101 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec scale_units = "Mb" generate_cross_from_geno(dataset, scale_units) - GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function + # Map the local GENOtoCSVR function + GENOtoCSVR = ro.r["GENOtoCSVR"] crossfilelocation = TMPDIR + crossname + ".cross" if dataset.group.genofile: - genofilelocation = locate(dataset.group.genofile, "genotype") + genofilelocation = locate(dataset.group.genofile, "genotype") else: genofilelocation = locate(dataset.group.name + ".geno", "genotype") - logger.info("Going to create a cross from geno"); - cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available - logger.info("before calc_genoprob"); + logger.info("Going to create a cross from geno") + # TODO: Add the SEX if that is available + cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) + logger.info("before calc_genoprob") if manhattan_plot: cross_object = calc_genoprob(cross_object) else: cross_object = calc_genoprob(cross_object, step=5, stepwidth="max") - logger.info("after calc_genoprob"); + logger.info("after calc_genoprob") pheno_string = sanitize_rqtl_phenotype(vals) - logger.info("phenostring done"); + logger.info("phenostring done") names_string = sanitize_rqtl_names(samples) - logger.info("sanitized pheno and names"); - cross_object = add_phenotype(cross_object, pheno_string, "the_pheno") # Add the phenotype - cross_object = add_names(cross_object, names_string, "the_names") # Add the phenotype - logger.info("Added pheno and names"); - marker_covars = create_marker_covariates(control_marker, cross_object) # Create the additive covariate markers - logger.info("Marker covars done"); + logger.info("sanitized pheno and names") + # Add the phenotype + cross_object = add_phenotype(cross_object, pheno_string, "the_pheno") + # Add the phenotype + cross_object = add_names(cross_object, names_string, "the_names") + logger.info("Added pheno and names") + # Create the additive covariate markers + marker_covars = create_marker_covariates(control_marker, cross_object) + logger.info("Marker covars done") if cofactors != "": - logger.info("Cofactors: " + cofactors); - cross_object, trait_covars = add_cofactors(cross_object, dataset, cofactors, samples) # Create the covariates from selected traits + logger.info("Cofactors: " + cofactors) + # Create the covariates from selected traits + cross_object, trait_covars = add_cofactors( + cross_object, dataset, cofactors, samples) ro.r('all_covars <- cbind(marker_covars, trait_covars)') else: ro.r('all_covars <- marker_covars') covars = ro.r['all_covars'] - #DEBUG to save the session object to file + # DEBUG to save the session object to file if pair_scan: if do_control == "true": - logger.info("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method, n_cluster = 16) + logger.info("Using covariate") + result_data_frame = scantwo( + cross_object, pheno="the_pheno", addcovar=covars, model=model, method=method, n_cluster=16) else: - logger.info("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16) + logger.info("No covariates") + result_data_frame = scantwo( + cross_object, pheno="the_pheno", model=model, method=method, n_cluster=16) pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png" - png(file=TEMPDIR+pair_scan_filename) + png(file=TEMPDIR + pair_scan_filename) plot(result_data_frame) dev_off() return process_pair_scan_results(result_data_frame) else: if do_control == "true" or cofactors != "": - logger.info("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method) + logger.info("Using covariate") + result_data_frame = scanone( + cross_object, pheno="the_pheno", addcovar=covars, model=model, method=method) ro.r('save.image(file = "/home/zas1024/gn2-zach/itp_cofactor_test.RData")') else: - logger.info("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method) - - if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user) - if len(perm_strata_list) > 0: #ZS: The strata list would only be populated if "Stratified" was checked on before mapping - cross_object, strata_ob = add_perm_strata(cross_object, perm_strata_list) + logger.info("No covariates") + result_data_frame = scanone( + cross_object, pheno="the_pheno", model=model, method=method) + + # Do permutation (if requested by user) + if num_perm > 0 and permCheck == "ON": + # ZS: The strata list would only be populated if "Stratified" was checked on before mapping + if len(perm_strata_list) > 0: + cross_object, strata_ob = add_perm_strata( + cross_object, perm_strata_list) if do_control == "true" or cofactors != "": - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), perm_strata = strata_ob, model=model, method=method) + perm_data_frame = scanone(cross_object, pheno_col="the_pheno", addcovar=covars, n_perm=int( + num_perm), perm_strata=strata_ob, model=model, method=method) else: - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, perm_strata = strata_ob, model=model, method=method) + perm_data_frame = scanone( + cross_object, pheno_col="the_pheno", n_perm=num_perm, perm_strata=strata_ob, model=model, method=method) else: if do_control == "true" or cofactors != "": - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), model=model, method=method) + perm_data_frame = scanone(cross_object, pheno_col="the_pheno", addcovar=covars, n_perm=int( + num_perm), model=model, method=method) else: - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method) + perm_data_frame = scanone( + cross_object, pheno_col="the_pheno", n_perm=num_perm, model=model, method=method) - perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface + # Functions that sets the thresholds for the webinterface + perm_output, suggestive, significant = process_rqtl_perm_results( + num_perm, perm_data_frame) return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species) else: return process_rqtl_results(result_data_frame, dataset.group.species) + def generate_cross_from_rdata(dataset): - rdata_location = locate(dataset.group.name + ".RData", "genotype/rdata") + rdata_location = locate(dataset.group.name + ".RData", "genotype/rdata") ro.r(""" generate_cross_from_rdata <- function(filename = '%s') { load(file=filename) @@ -143,7 +171,9 @@ def generate_cross_from_rdata(dataset): } """ % (rdata_location)) -def generate_cross_from_geno(dataset, scale_units): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly + +# TODO: Need to figure out why some genofiles have the wrong format and don't convert properly +def generate_cross_from_geno(dataset, scale_units): ro.r(""" trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } @@ -187,6 +217,7 @@ def generate_cross_from_geno(dataset, scale_units): # TODO: Need to figur } """ % (dataset.group.genofile, scale_units)) + def add_perm_strata(cross, perm_strata): col_string = 'c("the_strata")' perm_strata_string = "c(" @@ -201,12 +232,13 @@ def add_perm_strata(cross, perm_strata): return cross, strata_ob + def sanitize_rqtl_phenotype(vals): pheno_as_string = "c(" for i, val in enumerate(vals): if val == "x": if i < (len(vals) - 1): - pheno_as_string += "NA," + pheno_as_string += "NA," else: pheno_as_string += "NA" else: @@ -218,12 +250,13 @@ def sanitize_rqtl_phenotype(vals): return pheno_as_string + def sanitize_rqtl_names(vals): pheno_as_string = "c(" for i, val in enumerate(vals): if val == "x": if i < (len(vals) - 1): - pheno_as_string += "NA," + pheno_as_string += "NA," else: pheno_as_string += "NA" else: @@ -235,59 +268,66 @@ def sanitize_rqtl_names(vals): return pheno_as_string + def add_phenotype(cross, pheno_as_string, col_name): ro.globalenv["the_cross"] = cross ro.r('pheno <- data.frame(pull.pheno(the_cross))') - ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + ' = as.numeric('+ pheno_as_string +'))') + ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + \ + ' = as.numeric(' + pheno_as_string + '))') return ro.r["the_cross"] + def add_categorical_covar(cross, covar_as_string, i): ro.globalenv["the_cross"] = cross - logger.info("cross set"); + logger.info("cross set") ro.r('covar <- as.factor(' + covar_as_string + ')') - logger.info("covar set"); + logger.info("covar set") ro.r('newcovar <- model.matrix(~covar-1)') - logger.info("model.matrix finished"); + logger.info("model.matrix finished") ro.r('cat("new covar columns", ncol(newcovar), "\n")') nCol = ro.r('ncol(newcovar)') - logger.info("ncol covar done: " + str(nCol[0])); + logger.info("ncol covar done: " + str(nCol[0])) ro.r('pheno <- data.frame(pull.pheno(the_cross))') - logger.info("pheno pulled from cross"); + logger.info("pheno pulled from cross") nCol = int(nCol[0]) - logger.info("nCol python int:" + str(nCol)); + logger.info("nCol python int:" + str(nCol)) col_names = [] - #logger.info("loop") - for x in range(1, (nCol+1)): - #logger.info("loop" + str(x)); - col_name = "covar_" + str(i) + "_" + str(x) - #logger.info("col_name" + col_name); - ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + ' = newcovar[,' + str(x) + '])') - col_names.append(col_name) - #logger.info("loop" + str(x) + "done"); - - logger.info("returning from add_categorical_covar"); + # logger.info("loop") + for x in range(1, (nCol + 1)): + #logger.info("loop" + str(x)); + col_name = "covar_" + str(i) + "_" + str(x) + #logger.info("col_name" + col_name); + ro.r('the_cross$pheno <- cbind(pheno, ' + \ + col_name + ' = newcovar[,' + str(x) + '])') + col_names.append(col_name) + #logger.info("loop" + str(x) + "done"); + + logger.info("returning from add_categorical_covar") return ro.r["the_cross"], col_names def add_names(cross, names_as_string, col_name): ro.globalenv["the_cross"] = cross ro.r('pheno <- data.frame(pull.pheno(the_cross))') - ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + ' = '+ names_as_string +')') + ro.r('the_cross$pheno <- cbind(pheno, ' + \ + col_name + ' = ' + names_as_string + ')') return ro.r["the_cross"] + def pull_var(var_name, cross, var_string): ro.globalenv["the_cross"] = cross - ro.r(var_name +' <- pull.pheno(the_cross, ' + var_string + ')') + ro.r(var_name + ' <- pull.pheno(the_cross, ' + var_string + ')') return ro.r[var_name] + def add_cofactors(cross, this_dataset, covariates, samples): ro.numpy2ri.activate() covariate_list = covariates.split(",") covar_name_string = "c(" for i, covariate in enumerate(covariate_list): - logger.info("Covariate: " + covariate); + logger.info("Covariate: " + covariate) this_covar_data = [] covar_as_string = "c(" trait_name = covariate.split(":")[0] @@ -316,16 +356,18 @@ def add_cofactors(cross, this_dataset, covariates, samples): covar_as_string += ")" datatype = get_trait_data_type(covariate) - logger.info("Covariate: " + covariate + " is of type: " + datatype); - if(datatype == "categorical"): # Cat variable - logger.info("call of add_categorical_covar"); - cross, col_names = add_categorical_covar(cross, covar_as_string, i) # Expand and add it to the cross - logger.info("add_categorical_covar returned"); - for z, col_name in enumerate(col_names): # Go through the additional covar names + logger.info("Covariate: " + covariate + " is of type: " + datatype) + if(datatype == "categorical"): # Cat variable + logger.info("call of add_categorical_covar") + cross, col_names = add_categorical_covar( + cross, covar_as_string, i) # Expand and add it to the cross + logger.info("add_categorical_covar returned") + # Go through the additional covar names + for z, col_name in enumerate(col_names): if i < (len(covariate_list) - 1): covar_name_string += '"' + col_name + '", ' else: - if(z < (len(col_names) -1)): + if(z < (len(col_names) - 1)): covar_name_string += '"' + col_name + '", ' else: covar_name_string += '"' + col_name + '"' @@ -341,28 +383,35 @@ def add_cofactors(cross, this_dataset, covariates, samples): covars_ob = pull_var("trait_covars", cross, covar_name_string) return cross, covars_ob + def create_marker_covariates(control_marker, cross): ro.globalenv["the_cross"] = cross - ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix - userinput_sanitized = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user + # Get the genotype matrix + ro.r('genotypes <- pull.geno(the_cross)') + # TODO: sanitize user input, Never Ever trust a user + userinput_sanitized = control_marker.replace(" ", "").split(",") logger.debug(userinput_sanitized) if len(userinput_sanitized) > 0: - covariate_names = ', '.join('"{0}"'.format(w) for w in userinput_sanitized) + covariate_names = ', '.join('"{0}"'.format(w) + for w in userinput_sanitized) ro.r('covnames <- c(' + covariate_names + ')') else: ro.r('covnames <- c()') ro.r('covInGeno <- which(covnames %in% colnames(genotypes))') ro.r('covnames <- covnames[covInGeno]') ro.r("cat('covnames (purged): ', covnames,'\n')") - ro.r('marker_covars <- genotypes[,covnames]') # Get the covariate matrix by using the marker name as index to the genotype file + # Get the covariate matrix by using the marker name as index to the genotype file + ro.r('marker_covars <- genotypes[,covnames]') # TODO: Create a design matrix from the marker covars for the markers in case of an F2, 4way, etc return ro.r["marker_covars"] + def process_pair_scan_results(result): pair_scan_results = [] result = result[1] - output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] + output = [tuple([result[j][i] for j in range(result.ncol)]) + for i in range(result.nrow)] for i, line in enumerate(result.iter_row()): marker = {} @@ -374,9 +423,10 @@ def process_pair_scan_results(result): return pair_scan_results + def process_rqtl_perm_results(num_perm, results): perm_vals = [] - for line in str(results).split("\n")[1:(num_perm+1)]: + for line in str(results).split("\n")[1:(num_perm + 1)]: #print("R/qtl permutation line:", line.split()) perm_vals.append(float(line.split()[1])) @@ -386,14 +436,18 @@ def process_rqtl_perm_results(num_perm, results): return perm_output, suggestive, significant -def process_rqtl_results(result, species_name): # TODO: how to make this a one liner and not copy the stuff in a loop + +# TODO: how to make this a one liner and not copy the stuff in a loop +def process_rqtl_results(result, species_name): qtl_results = [] - output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] + output = [tuple([result[j][i] for j in range(result.ncol)]) + for i in range(result.nrow)] for i, line in enumerate(result.iter_row()): marker = {} marker['name'] = result.rownames[i] - if species_name == "mouse" and output[i][0] == 20: #ZS: This is awkward, but I'm not sure how to change the 20s to Xs in the RData file + # ZS: This is awkward, but I'm not sure how to change the 20s to Xs in the RData file + if species_name == "mouse" and output[i][0] == 20: marker['chr'] = "X" else: marker['chr'] = output[i][0] @@ -402,4 +456,4 @@ def process_rqtl_results(result, species_name): # TODO: how to make this marker['lod_score'] = output[i][2] qtl_results.append(marker) - return qtl_results
\ No newline at end of file + return qtl_results diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py index 7dd0bcb6..32ccec48 100644 --- a/wqflask/wqflask/marker_regression/run_mapping.py +++ b/wqflask/wqflask/marker_regression/run_mapping.py @@ -1,5 +1,5 @@ from base.trait import GeneralTrait -from base import data_set #import create_dataset +from base import data_set # import create_dataset from pprint import pformat as pf @@ -43,16 +43,18 @@ from utility.external import shell from base.webqtlConfig import TMPDIR, GENERATED_TEXT_DIR import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + class RunMapping: def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) - self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) + # needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) + self.temp_uuid = temp_uuid - #ZS: Needed to zoom in or remap temp traits like PCA traits + # ZS: Needed to zoom in or remap temp traits like PCA traits if "temp_trait" in start_vars and start_vars['temp_trait'] != "False": self.temp_trait = "True" self.group = self.dataset.group.name @@ -60,13 +62,14 @@ class RunMapping: self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] - #ZS: Sometimes a group may have a genofile that only includes a subset of samples + # ZS: Sometimes a group may have a genofile that only includes a subset of samples genofile_samplelist = [] if 'genofile' in start_vars: - if start_vars['genofile'] != "": - self.genofile_string = start_vars['genofile'] - self.dataset.group.genofile = self.genofile_string.split(":")[0] - genofile_samplelist = get_genofile_samplelist(self.dataset) + if start_vars['genofile'] != "": + self.genofile_string = start_vars['genofile'] + self.dataset.group.genofile = self.genofile_string.split(":")[ + 0] + genofile_samplelist = get_genofile_samplelist(self.dataset) all_samples_ordered = self.dataset.group.all_samples_ordered() @@ -93,7 +96,7 @@ class RunMapping: else: self.n_samples = len([val for val in self.vals if val != "x"]) - #ZS: Check if genotypes exist in the DB in order to create links for markers + # ZS: Check if genotypes exist in the DB in order to create links for markers self.geno_db_exists = geno_db_exists(self.dataset) @@ -101,8 +104,11 @@ class RunMapping: if "results_path" in start_vars: self.mapping_results_path = start_vars['results_path'] else: - mapping_results_filename = self.dataset.group.name + "_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - self.mapping_results_path = "{}{}.csv".format(webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) + mapping_results_filename = self.dataset.group.name + "_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + self.mapping_results_path = "{}{}.csv".format( + webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) self.manhattan_plot = False if 'manhattan_plot' in start_vars: @@ -114,19 +120,20 @@ class RunMapping: self.manhattan_single_color = start_vars['manhattan_single_color'] self.manhattan_plot = True - self.maf = start_vars['maf'] # Minor allele frequency + self.maf = start_vars['maf'] # Minor allele frequency if "use_loco" in start_vars: self.use_loco = start_vars['use_loco'] else: self.use_loco = None self.suggestive = "" self.significant = "" - self.pair_scan = False # Initializing this since it is checked in views to determine which template to use + # Initializing this since it is checked in views to determine which template to use + self.pair_scan = False if 'transform' in start_vars: self.transform = start_vars['transform'] else: self.transform = "" - self.score_type = "LRS" #ZS: LRS or LOD + self.score_type = "LRS" # ZS: LRS or LOD self.mapping_scale = "physic" if "mapping_scale" in start_vars: self.mapping_scale = start_vars['mapping_scale'] @@ -136,10 +143,11 @@ class RunMapping: self.covariates = start_vars['covariates'] if "covariates" in start_vars else "" self.categorical_vars = [] - #ZS: This is passed to GN1 code for single chr mapping + # ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: - if int(start_vars['selected_chr']) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this + # ZS: Needs to be -1 if showing full map; there's probably a better way to fix this + if int(start_vars['selected_chr']) != -1: self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) @@ -153,7 +161,7 @@ class RunMapping: self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] - if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load + if "startMb" in start_vars: # ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: @@ -191,34 +199,39 @@ class RunMapping: self.showGenes = "ON" self.viewLegend = "ON" - #self.dataset.group.get_markers() + # self.dataset.group.get_markers() if self.mapping_method == "gemma": self.first_run = True self.output_files = None if 'output_files' in start_vars: self.output_files = start_vars['output_files'] - if 'first_run' in start_vars: #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + # ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + if 'first_run' in start_vars: self.first_run = False self.score_type = "-logP" self.manhattan_plot = True with Bench("Running GEMMA"): if self.use_loco == "True": - marker_obs, self.output_files = gemma_mapping.run_gemma(self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) + marker_obs, self.output_files = gemma_mapping.run_gemma( + self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) else: - marker_obs, self.output_files = gemma_mapping.run_gemma(self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) + marker_obs, self.output_files = gemma_mapping.run_gemma( + self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": perm_strata = [] if "perm_strata" in start_vars and "categorical_vars" in start_vars: - self.categorical_vars = start_vars["categorical_vars"].split(",") + self.categorical_vars = start_vars["categorical_vars"].split( + ",") if len(self.categorical_vars) and start_vars["perm_strata"] == "True": - primary_samples = SampleList(dataset = self.dataset, - sample_names = self.samples, - this_trait = self.this_trait) + primary_samples = SampleList(dataset=self.dataset, + sample_names=self.samples, + this_trait=self.this_trait) - perm_strata = get_perm_strata(self.this_trait, primary_samples, self.categorical_vars, self.samples) + perm_strata = get_perm_strata( + self.this_trait, primary_samples, self.categorical_vars, self.samples) self.score_type = "LOD" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] @@ -227,14 +240,16 @@ class RunMapping: else: self.method = "em" self.model = start_vars['mapmodel_rqtl_geno'] - #if start_vars['pair_scan'] == "true": + # if start_vars['pair_scan'] == "true": # self.pair_scan = True if self.permCheck and self.num_perm > 0: - self.perm_output, self.suggestive, self.significant, results= rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno( + self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) else: - results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, + self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) elif self.mapping_method == "reaper": - if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON + if "startMb" in start_vars: # ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: @@ -267,24 +282,26 @@ class RunMapping: if self.reaper_version == "new": self.first_run = True self.output_files = None - if 'first_run' in start_vars: #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + # ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + if 'first_run' in start_vars: self.first_run = False if 'output_files' in start_vars: - self.output_files = start_vars['output_files'].split(",") + self.output_files = start_vars['output_files'].split( + ",") results, self.perm_output, self.suggestive, self.significant, self.bootstrap_results, self.output_files = qtlreaper_mapping.run_reaper(self.this_trait, - self.dataset, - self.samples, - self.vals, - self.json_data, - self.num_perm, - self.bootCheck, - self.num_bootstrap, - self.do_control, - self.control_marker, - self.manhattan_plot, - self.first_run, - self.output_files) + self.dataset, + self.samples, + self.vals, + self.json_data, + self.num_perm, + self.bootCheck, + self.num_bootstrap, + self.do_control, + self.control_marker, + self.manhattan_plot, + self.first_run, + self.output_files) else: results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.run_original_reaper(self.this_trait, self.dataset, @@ -300,182 +317,201 @@ class RunMapping: elif self.mapping_method == "plink": self.score_type = "-logP" self.manhattan_plot = True - results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) + results = plink_mapping.run_plink( + self.this_trait, self.dataset, self.species, self.vals, self.maf) #results = self.run_plink() else: logger.debug("RUNNING NOTHING") self.no_results = False if len(results) == 0: - self.no_results = True + self.no_results = True else: - if self.pair_scan == True: - self.qtl_results = [] - highest_chr = 1 #This is needed in order to convert the highest chr to X/Y - for marker in results: - if marker['chr1'] > 0 or marker['chr1'] == "X" or marker['chr1'] == "X/Y": - if marker['chr1'] > highest_chr or marker['chr1'] == "X" or marker['chr1'] == "X/Y": - highest_chr = marker['chr1'] - if 'lod_score' in list(marker.keys()): - self.qtl_results.append(marker) - - self.trimmed_markers = results - - for qtl in enumerate(self.qtl_results): - self.json_data['chr1'].append(str(qtl['chr1'])) - self.json_data['chr2'].append(str(qtl['chr2'])) - self.json_data['Mb'].append(qtl['Mb']) - self.json_data['markernames'].append(qtl['name']) - - self.js_data = dict( - json_data = self.json_data, - this_trait = self.this_trait.name, - data_set = self.dataset.name, - maf = self.maf, - manhattan_plot = self.manhattan_plot, - mapping_scale = self.mapping_scale, - qtl_results = self.qtl_results - ) - - else: - self.qtl_results = [] - self.results_for_browser = [] - self.annotations_for_browser = [] - highest_chr = 1 #This is needed in order to convert the highest chr to X/Y - for marker in results: - if 'Mb' in marker: - this_ps = marker['Mb']*1000000 - else: - this_ps = marker['cM']*1000000 - - browser_marker = dict( - chr = str(marker['chr']), - rs = marker['name'], - ps = this_ps, - url = "/show_trait?trait_id=" + marker['name'] + "&dataset=" + self.dataset.group.name + "Geno" - ) - - if self.geno_db_exists == "True": - annot_marker = dict( - name = str(marker['name']), - chr = str(marker['chr']), - rs = marker['name'], - pos = this_ps, - url = "/show_trait?trait_id=" + marker['name'] + "&dataset=" + self.dataset.group.name + "Geno" - ) - else: - annot_marker = dict( - name = str(marker['name']), - chr = str(marker['chr']), - rs = marker['name'], - pos = this_ps - ) - - if 'lrs_value' in marker and marker['lrs_value'] > 0: - browser_marker['p_wald'] = 10**-(marker['lrs_value']/4.61) - elif 'lod_score' in marker and marker['lod_score'] > 0: - browser_marker['p_wald'] = 10**-(marker['lod_score']) - else: - browser_marker['p_wald'] = 0 - - self.results_for_browser.append(browser_marker) - self.annotations_for_browser.append(annot_marker) - if str(marker['chr']) > '0' or str(marker['chr']) == "X" or str(marker['chr']) == "X/Y": - if str(marker['chr']) > str(highest_chr) or str(marker['chr']) == "X" or str(marker['chr']) == "X/Y": - highest_chr = marker['chr'] - if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): - if 'Mb' in marker.keys(): - marker['display_pos'] = "Chr" + str(marker['chr']) + ": " + "{:.6f}".format(marker['Mb']) - elif 'cM' in marker.keys(): - marker['display_pos'] = "Chr" + str(marker['chr']) + ": " + "{:.3f}".format(marker['cM']) - else: - marker['display_pos'] = "N/A" - self.qtl_results.append(marker) - - total_markers = len(self.qtl_results) - - with Bench("Exporting Results"): - export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type, self.transform, self.covariates, self.n_samples) - - with Bench("Trimming Markers for Figure"): - if len(self.qtl_results) > 30000: - self.qtl_results = trim_markers_for_figure(self.qtl_results) - self.results_for_browser = trim_markers_for_figure(self.results_for_browser) - filtered_annotations = [] - for marker in self.results_for_browser: - for annot_marker in self.annotations_for_browser: - if annot_marker['rs'] == marker['rs']: - filtered_annotations.append(annot_marker) - break - self.annotations_for_browser = filtered_annotations - browser_files = write_input_for_browser(self.dataset, self.results_for_browser, self.annotations_for_browser) - else: - browser_files = write_input_for_browser(self.dataset, self.results_for_browser, self.annotations_for_browser) - - with Bench("Trimming Markers for Table"): - self.trimmed_markers = trim_markers_for_table(results) - - chr_lengths = get_chr_lengths(self.mapping_scale, self.mapping_method, self.dataset, self.qtl_results) - - #ZS: For zooming into genome browser, need to pass chromosome name instead of number - if self.dataset.group.species == "mouse": - if self.selected_chr == 20: - this_chr = "X" - else: - this_chr = str(self.selected_chr) - elif self.dataset.group.species == "rat": - if self.selected_chr == 21: - this_chr = "X" - else: - this_chr = str(self.selected_chr) - else: - if self.selected_chr == 22: - this_chr = "X" - elif self.selected_chr == 23: - this_chr = "Y" - else: - this_chr = str(self.selected_chr) - - if self.mapping_method != "gemma": - if self.score_type == "LRS": - significant_for_browser = self.significant / 4.61 - else: - significant_for_browser = self.significant - - self.js_data = dict( - #result_score_type = self.score_type, - #this_trait = self.this_trait.name, - #data_set = self.dataset.name, - #maf = self.maf, - #manhattan_plot = self.manhattan_plot, - #mapping_scale = self.mapping_scale, - #chromosomes = chromosome_mb_lengths, - #qtl_results = self.qtl_results, - categorical_vars = self.categorical_vars, - chr_lengths = chr_lengths, - num_perm = self.num_perm, - perm_results = self.perm_output, - significant = significant_for_browser, - browser_files = browser_files, - selected_chr = this_chr, - total_markers = total_markers - ) - else: + if self.pair_scan == True: + self.qtl_results = [] + highest_chr = 1 # This is needed in order to convert the highest chr to X/Y + for marker in results: + if marker['chr1'] > 0 or marker['chr1'] == "X" or marker['chr1'] == "X/Y": + if marker['chr1'] > highest_chr or marker['chr1'] == "X" or marker['chr1'] == "X/Y": + highest_chr = marker['chr1'] + if 'lod_score' in list(marker.keys()): + self.qtl_results.append(marker) + + self.trimmed_markers = results + + for qtl in enumerate(self.qtl_results): + self.json_data['chr1'].append(str(qtl['chr1'])) + self.json_data['chr2'].append(str(qtl['chr2'])) + self.json_data['Mb'].append(qtl['Mb']) + self.json_data['markernames'].append(qtl['name']) + self.js_data = dict( - chr_lengths = chr_lengths, - browser_files = browser_files, - selected_chr = this_chr, - total_markers = total_markers + json_data=self.json_data, + this_trait=self.this_trait.name, + data_set=self.dataset.name, + maf=self.maf, + manhattan_plot=self.manhattan_plot, + mapping_scale=self.mapping_scale, + qtl_results=self.qtl_results ) + else: + self.qtl_results = [] + self.results_for_browser = [] + self.annotations_for_browser = [] + highest_chr = 1 # This is needed in order to convert the highest chr to X/Y + for marker in results: + if 'Mb' in marker: + this_ps = marker['Mb'] * 1000000 + else: + this_ps = marker['cM'] * 1000000 + + browser_marker = dict( + chr=str(marker['chr']), + rs=marker['name'], + ps=this_ps, + url="/show_trait?trait_id=" + \ + marker['name'] + "&dataset=" + \ + self.dataset.group.name + "Geno" + ) + + if self.geno_db_exists == "True": + annot_marker = dict( + name=str(marker['name']), + chr=str(marker['chr']), + rs=marker['name'], + pos=this_ps, + url="/show_trait?trait_id=" + \ + marker['name'] + "&dataset=" + \ + self.dataset.group.name + "Geno" + ) + else: + annot_marker = dict( + name=str(marker['name']), + chr=str(marker['chr']), + rs=marker['name'], + pos=this_ps + ) + + if 'lrs_value' in marker and marker['lrs_value'] > 0: + browser_marker['p_wald'] = 10**- \ + (marker['lrs_value'] / 4.61) + elif 'lod_score' in marker and marker['lod_score'] > 0: + browser_marker['p_wald'] = 10**-(marker['lod_score']) + else: + browser_marker['p_wald'] = 0 + + self.results_for_browser.append(browser_marker) + self.annotations_for_browser.append(annot_marker) + if str(marker['chr']) > '0' or str(marker['chr']) == "X" or str(marker['chr']) == "X/Y": + if str(marker['chr']) > str(highest_chr) or str(marker['chr']) == "X" or str(marker['chr']) == "X/Y": + highest_chr = marker['chr'] + if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): + if 'Mb' in marker.keys(): + marker['display_pos'] = "Chr" + \ + str(marker['chr']) + ": " + \ + "{:.6f}".format(marker['Mb']) + elif 'cM' in marker.keys(): + marker['display_pos'] = "Chr" + \ + str(marker['chr']) + ": " + \ + "{:.3f}".format(marker['cM']) + else: + marker['display_pos'] = "N/A" + self.qtl_results.append(marker) + + total_markers = len(self.qtl_results) + + with Bench("Exporting Results"): + export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, + self.mapping_scale, self.score_type, self.transform, self.covariates, self.n_samples) + + with Bench("Trimming Markers for Figure"): + if len(self.qtl_results) > 30000: + self.qtl_results = trim_markers_for_figure( + self.qtl_results) + self.results_for_browser = trim_markers_for_figure( + self.results_for_browser) + filtered_annotations = [] + for marker in self.results_for_browser: + for annot_marker in self.annotations_for_browser: + if annot_marker['rs'] == marker['rs']: + filtered_annotations.append(annot_marker) + break + self.annotations_for_browser = filtered_annotations + browser_files = write_input_for_browser( + self.dataset, self.results_for_browser, self.annotations_for_browser) + else: + browser_files = write_input_for_browser( + self.dataset, self.results_for_browser, self.annotations_for_browser) + + with Bench("Trimming Markers for Table"): + self.trimmed_markers = trim_markers_for_table(results) + + chr_lengths = get_chr_lengths( + self.mapping_scale, self.mapping_method, self.dataset, self.qtl_results) + + # ZS: For zooming into genome browser, need to pass chromosome name instead of number + if self.dataset.group.species == "mouse": + if self.selected_chr == 20: + this_chr = "X" + else: + this_chr = str(self.selected_chr) + elif self.dataset.group.species == "rat": + if self.selected_chr == 21: + this_chr = "X" + else: + this_chr = str(self.selected_chr) + else: + if self.selected_chr == 22: + this_chr = "X" + elif self.selected_chr == 23: + this_chr = "Y" + else: + this_chr = str(self.selected_chr) + + if self.mapping_method != "gemma": + if self.score_type == "LRS": + significant_for_browser = self.significant / 4.61 + else: + significant_for_browser = self.significant + + self.js_data = dict( + #result_score_type = self.score_type, + #this_trait = self.this_trait.name, + #data_set = self.dataset.name, + #maf = self.maf, + #manhattan_plot = self.manhattan_plot, + #mapping_scale = self.mapping_scale, + #chromosomes = chromosome_mb_lengths, + #qtl_results = self.qtl_results, + categorical_vars=self.categorical_vars, + chr_lengths=chr_lengths, + num_perm=self.num_perm, + perm_results=self.perm_output, + significant=significant_for_browser, + browser_files=browser_files, + selected_chr=this_chr, + total_markers=total_markers + ) + else: + self.js_data = dict( + chr_lengths=chr_lengths, + browser_files=browser_files, + selected_chr=this_chr, + total_markers=total_markers + ) + def run_rqtl_plink(self): # os.chdir("") never do this inside a webserver!! - output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name)) + output_filename = webqtlUtil.genRandStr("%s_%s_" % ( + self.dataset.group.name, self.this_trait.name)) - plink_mapping.gen_pheno_txt_file_plink(self.this_trait, self.dataset, self.vals, pheno_filename = output_filename) + plink_mapping.gen_pheno_txt_file_plink( + self.this_trait, self.dataset, self.vals, pheno_filename=output_filename) - rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename) + rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % ( + self.dataset.group.name, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename) os.system(rqtl_command) @@ -504,10 +540,13 @@ class RunMapping: trimmed_genotype_data.append(new_genotypes) return trimmed_genotype_data + def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, score_type, transform, covariates, n_samples): with open(results_path, "w+") as output_file: - output_file.write("Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") - output_file.write("Population: " + dataset.group.species.title() + " " + dataset.group.name + "\n") + output_file.write( + "Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") + output_file.write( + "Population: " + dataset.group.species.title() + " " + dataset.group.name + "\n") output_file.write("Data Set: " + dataset.fullname + "\n") output_file.write("N Samples: " + str(n_samples) + "\n") if len(transform) > 0: @@ -527,7 +566,8 @@ def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, output_file.write(transform_text + "\n") if dataset.type == "ProbeSet": output_file.write("Gene Symbol: " + trait.symbol + "\n") - output_file.write("Location: " + str(trait.chr) + " @ " + str(trait.mb) + " Mb\n") + output_file.write("Location: " + str(trait.chr) + \ + " @ " + str(trait.mb) + " Mb\n") if len(covariates) > 0: output_file.write("Cofactors (dataset - trait):\n") for covariate in covariates.split(","): @@ -564,6 +604,7 @@ def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, if i < (len(markers) - 1): output_file.write("\n") + def trim_markers_for_figure(markers): if 'p_wald' in list(markers[0].keys()): score_type = 'p_wald' @@ -612,11 +653,11 @@ def trim_markers_for_figure(markers): if low_counter % 20 == 0: filtered_markers.append(marker) low_counter += 1 - elif 4.61 <= marker[score_type] < (2*4.61): + elif 4.61 <= marker[score_type] < (2 * 4.61): if med_counter % 10 == 0: filtered_markers.append(marker) med_counter += 1 - elif (2*4.61) <= marker[score_type] <= (3*4.61): + elif (2 * 4.61) <= marker[score_type] <= (3 * 4.61): if high_counter % 2 == 0: filtered_markers.append(marker) high_counter += 1 @@ -624,21 +665,27 @@ def trim_markers_for_figure(markers): filtered_markers.append(marker) return filtered_markers + def trim_markers_for_table(markers): if 'lod_score' in list(markers[0].keys()): - sorted_markers = sorted(markers, key=lambda k: k['lod_score'], reverse=True) + sorted_markers = sorted( + markers, key=lambda k: k['lod_score'], reverse=True) else: - sorted_markers = sorted(markers, key=lambda k: k['lrs_value'], reverse=True) + sorted_markers = sorted( + markers, key=lambda k: k['lrs_value'], reverse=True) - #ZS: So we end up with a list of just 2000 markers + # ZS: So we end up with a list of just 2000 markers if len(sorted_markers) >= 2000: trimmed_sorted_markers = sorted_markers[:2000] return trimmed_sorted_markers else: return sorted_markers + def write_input_for_browser(this_dataset, gwas_results, annotations): - file_base = this_dataset.group.name + "_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + file_base = this_dataset.group.name + "_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) gwas_filename = file_base + "_GWAS" annot_filename = file_base + "_ANNOT" gwas_path = "{}/gn2/".format(TEMPDIR) + gwas_filename @@ -650,14 +697,17 @@ def write_input_for_browser(this_dataset, gwas_results, annotations): return [gwas_filename, annot_filename] + def geno_db_exists(this_dataset): geno_db_name = this_dataset.group.name + "Geno" try: - geno_db = data_set.create_dataset(dataset_name=geno_db_name, get_samplelist=False) + geno_db = data_set.create_dataset( + dataset_name=geno_db_name, get_samplelist=False) return "True" except: return "False" + def get_chr_lengths(mapping_scale, mapping_method, dataset, qtl_results): chr_lengths = [] if mapping_scale == "physic": @@ -682,9 +732,11 @@ def get_chr_lengths(mapping_scale, mapping_method, dataset, qtl_results): highest_pos = float(result['cM']) * 1000000 else: highest_pos = float(result['Mb']) * 1000000 - chr_lengths.append({ "chr": str(this_chr), "size": str(highest_pos)}) + chr_lengths.append( + {"chr": str(this_chr), "size": str(highest_pos)}) else: - chr_lengths.append({ "chr": str(this_chr), "size": str(highest_pos)}) + chr_lengths.append( + {"chr": str(this_chr), "size": str(highest_pos)}) this_chr = chr_as_num else: if mapping_method == "reaper": @@ -696,6 +748,7 @@ def get_chr_lengths(mapping_scale, mapping_method, dataset, qtl_results): return chr_lengths + def get_genofile_samplelist(dataset): genofile_samplelist = [] @@ -706,6 +759,7 @@ def get_genofile_samplelist(dataset): return genofile_samplelist + def get_perm_strata(this_trait, sample_list, categorical_vars, used_samples): perm_strata_strings = [] for sample in used_samples: @@ -713,7 +767,8 @@ def get_perm_strata(this_trait, sample_list, categorical_vars, used_samples): combined_string = "" for var in categorical_vars: if var in list(sample_list.sample_attribute_values[sample].keys()): - combined_string += str(sample_list.sample_attribute_values[sample][var]) + combined_string += str( + sample_list.sample_attribute_values[sample][var]) else: combined_string += "NA" else: @@ -721,7 +776,8 @@ def get_perm_strata(this_trait, sample_list, categorical_vars, used_samples): perm_strata_strings.append(combined_string) - d = dict([(y, x+1) for x, y in enumerate(sorted(set(perm_strata_strings)))]) + d = dict([(y, x + 1) + for x, y in enumerate(sorted(set(perm_strata_strings)))]) list_to_numbers = [d[x] for x in perm_strata_strings] perm_strata = list_to_numbers diff --git a/wqflask/wqflask/model.py b/wqflask/wqflask/model.py index 772f74e4..a222b87c 100644 --- a/wqflask/wqflask/model.py +++ b/wqflask/wqflask/model.py @@ -14,9 +14,11 @@ from sqlalchemy.orm import relationship from wqflask.database import Base, init_db + class User(Base): __tablename__ = "user" - id = Column(Unicode(36), primary_key=True, default=lambda: str(uuid.uuid4())) + id = Column(Unicode(36), primary_key=True, + default=lambda: str(uuid.uuid4())) email_address = Column(Unicode(50), unique=True, nullable=False) # Todo: Turn on strict mode for Mysql @@ -27,23 +29,25 @@ class User(Base): active = Column(Boolean(), nullable=False, default=True) - registration_info = Column(Text) # json detailing when they were registered, etc. + # json detailing when they were registered, etc. + registration_info = Column(Text) - confirmed = Column(Text) # json detailing when they confirmed, etc. + confirmed = Column(Text) # json detailing when they confirmed, etc. - superuser = Column(Text) # json detailing when they became a superuser, otherwise empty - # if not superuser + # json detailing when they became a superuser, otherwise empty + superuser = Column(Text) + # if not superuser logins = relationship("Login", order_by="desc(Login.timestamp)", - lazy='dynamic', # Necessary for filter in login_count + lazy='dynamic', # Necessary for filter in login_count foreign_keys="Login.user", ) user_collections = relationship("UserCollection", - order_by="asc(UserCollection.name)", - lazy='dynamic', - ) + order_by="asc(UserCollection.name)", + lazy='dynamic', + ) def display_num_collections(self): """ @@ -63,11 +67,11 @@ class User(Base): print("Couldn't display_num_collections:", why) return "" - def get_collection_by_name(self, collection_name): try: - collect = self.user_collections.filter_by(name=collection_name).first() - except sqlalchemy.orm.exc.NoResultFound: + collect = self.user_collections.filter_by( + name=collection_name).first() + except sqlalchemy.orm.exc.NoResultFound: collect = None return collect @@ -83,7 +87,6 @@ class User(Base): def login_count(self): return self.logins.filter_by(successful=True).count() - @property def confirmed_at(self): if self.confirmed: @@ -116,14 +119,18 @@ class User(Base): except IndexError: return None + class Login(Base): __tablename__ = "login" - id = Column(Unicode(36), primary_key=True, default=lambda: str(uuid.uuid4())) + id = Column(Unicode(36), primary_key=True, + default=lambda: str(uuid.uuid4())) user = Column(Unicode(36), ForeignKey('user.id')) timestamp = Column(DateTime(), default=lambda: datetime.datetime.utcnow()) ip_address = Column(Unicode(39)) - successful = Column(Boolean(), nullable=False) # False if wrong password was entered - session_id = Column(Text) # Set only if successfully logged in, otherwise should be blank + # False if wrong password was entered + successful = Column(Boolean(), nullable=False) + # Set only if successfully logged in, otherwise should be blank + session_id = Column(Text) # Set to user who assumes identity if this was a login for debugging purposes by a superuser assumed_by = Column(Unicode(36), ForeignKey('user.id')) @@ -134,15 +141,19 @@ class Login(Base): ################################################################################################## + class UserCollection(Base): __tablename__ = "user_collection" - id = Column(Unicode(36), primary_key=True, default=lambda: str(uuid.uuid4())) + id = Column(Unicode(36), primary_key=True, + default=lambda: str(uuid.uuid4())) user = Column(Unicode(36), ForeignKey('user.id')) # I'd prefer this to not have a length, but for the index below it needs one name = Column(Unicode(50)) - created_timestamp = Column(DateTime(), default=lambda: datetime.datetime.utcnow()) - changed_timestamp = Column(DateTime(), default=lambda: datetime.datetime.utcnow()) + created_timestamp = Column( + DateTime(), default=lambda: datetime.datetime.utcnow()) + changed_timestamp = Column( + DateTime(), default=lambda: datetime.datetime.utcnow()) members = Column(Text) # We're going to store them as a json list # This index ensures a user doesn't have more than one collection with the same name @@ -158,12 +169,14 @@ class UserCollection(Base): def members_as_set(self): return set(json.loads(self.members)) + def display_collapsible(number): if number: return number else: return "" + def user_uuid(): """Unique cookie for a user""" user_uuid = request.cookies.get('user_uuid') diff --git a/wqflask/wqflask/network_graph/network_graph.py b/wqflask/wqflask/network_graph/network_graph.py index a41df1ed..9b70f03d 100644 --- a/wqflask/wqflask/network_graph/network_graph.py +++ b/wqflask/wqflask/network_graph/network_graph.py @@ -27,6 +27,7 @@ from utility import helper_functions from utility import corr_result_helpers from utility.tools import GN2_BRANCH_URL + class NetworkGraph: def __init__(self, start_vars): diff --git a/wqflask/wqflask/news.py b/wqflask/wqflask/news.py index 861a93f2..e262dd51 100644 --- a/wqflask/wqflask/news.py +++ b/wqflask/wqflask/news.py @@ -1,5 +1,6 @@ from flask import g + class News: def __init__(self): diff --git a/wqflask/wqflask/parser.py b/wqflask/wqflask/parser.py index dcd328c9..bd1c4407 100644 --- a/wqflask/wqflask/parser.py +++ b/wqflask/wqflask/parser.py @@ -22,7 +22,8 @@ import re from pprint import pformat as pf from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) + def parse(pstring): """ @@ -33,7 +34,7 @@ def parse(pstring): (\w+\s*[=:\>\<][\w\*]+) | # wiki=bar, GO:foobar, etc (".*?") | ('.*?') | # terms in quotes, i.e. "brain weight" ([\w\*\?]+)) # shh, brain, etc """, pstring, - flags=re.VERBOSE) + flags=re.VERBOSE) pstring = [item.strip() for item in pstring if item and item.strip()] @@ -52,7 +53,7 @@ def parse(pstring): if '(' in value or '[' in value: assert value.startswith(("(", "[")), "Invalid token" assert value.endswith((")", "]")), "Invalid token" - value = value[1:-1] # Get rid of the parenthesis + value = value[1:-1] # Get rid of the parenthesis values = re.split(r"""\s+|,""", value) value = [value.strip() for value in values if value.strip()] else: diff --git a/wqflask/wqflask/pbkdf2.py b/wqflask/wqflask/pbkdf2.py index 6346df03..1a965fc5 100644 --- a/wqflask/wqflask/pbkdf2.py +++ b/wqflask/wqflask/pbkdf2.py @@ -4,6 +4,8 @@ from werkzeug.security import safe_str_cmp as ssc # Replace this because it just wraps around Python3's internal # functions. Added this during migration. + + def pbkdf2_hex(data, salt, iterations=1000, keylen=24, hashfunc="sha1"): """Wrapper function of python's hashlib.pbkdf2_hmac. """ diff --git a/wqflask/wqflask/resource_manager.py b/wqflask/wqflask/resource_manager.py index 7d51a83d..b28c1b04 100644 --- a/wqflask/wqflask/resource_manager.py +++ b/wqflask/wqflask/resource_manager.py @@ -11,6 +11,7 @@ from utility.redis_tools import get_resource_info, get_group_info, get_groups_li from utility.logger import getLogger logger = getLogger(__name__) + @app.route("/resources/manage", methods=('GET', 'POST')) def manage_resource(): params = request.form if request.form else request.args @@ -26,7 +27,7 @@ def manage_resource(): owner_display_name = None if owner_id != "none": - try: #ZS: User IDs are sometimes stored in Redis as bytes and sometimes as strings, so this is just to avoid any errors for the time being + try: # ZS: User IDs are sometimes stored in Redis as bytes and sometimes as strings, so this is just to avoid any errors for the time being owner_id = str.encode(owner_id) except: pass @@ -38,17 +39,20 @@ def manage_resource(): elif 'email_address' in owner_info: owner_display_name = owner_info['email_address'] - return render_template("admin/manage_resource.html", owner_name = owner_display_name, resource_id = resource_id, resource_info=resource_info, default_mask=default_mask, group_masks=group_masks_with_names, admin_status=admin_status) + return render_template("admin/manage_resource.html", owner_name=owner_display_name, resource_id=resource_id, resource_info=resource_info, default_mask=default_mask, group_masks=group_masks_with_names, admin_status=admin_status) + @app.route("/search_for_users", methods=('POST',)) def search_for_user(): params = request.form user_list = [] user_list += get_users_like_unique_column("full_name", params['user_name']) - user_list += get_users_like_unique_column("email_address", params['user_email']) + user_list += get_users_like_unique_column( + "email_address", params['user_email']) return json.dumps(user_list) + @app.route("/search_for_groups", methods=('POST',)) def search_for_groups(): params = request.form @@ -58,13 +62,15 @@ def search_for_groups(): user_list = [] user_list += get_users_like_unique_column("full_name", params['user_name']) - user_list += get_users_like_unique_column("email_address", params['user_email']) + user_list += get_users_like_unique_column( + "email_address", params['user_email']) for user in user_list: group_list += get_groups_like_unique_column("admins", user['user_id']) group_list += get_groups_like_unique_column("members", user['user_id']) return json.dumps(group_list) + @app.route("/resources/change_owner", methods=('POST',)) def change_owner(): resource_id = request.form['resource_id'] @@ -79,7 +85,8 @@ def change_owner(): flash("You lack the permissions to make this change.", "error") return redirect(url_for("manage_resource", resource_id=resource_id)) else: - return render_template("admin/change_resource_owner.html", resource_id = resource_id) + return render_template("admin/change_resource_owner.html", resource_id=resource_id) + @app.route("/resources/change_default_privileges", methods=('POST',)) def change_default_privileges(): @@ -99,6 +106,7 @@ def change_default_privileges(): else: return redirect(url_for("no_access_page")) + @app.route("/resources/add_group", methods=('POST',)) def add_group_to_resource(): resource_id = request.form['resource_id'] @@ -108,7 +116,7 @@ def add_group_to_resource(): group_id = request.form['selected_group'] resource_info = get_resource_info(resource_id) default_privileges = resource_info['default_mask'] - return render_template("admin/set_group_privileges.html", resource_id = resource_id, group_id = group_id, default_privileges = default_privileges) + return render_template("admin/set_group_privileges.html", resource_id=resource_id, group_id=group_id, default_privileges=default_privileges) elif all(key in request.form for key in ('data_privilege', 'metadata_privilege', 'admin_privilege')): group_id = request.form['group_id'] group_name = get_group_info(group_id)['name'] @@ -118,13 +126,15 @@ def add_group_to_resource(): 'admin': request.form['admin_privilege'] } add_access_mask(resource_id, group_id, access_mask) - flash("Privileges have been added for group {}.".format(group_name), "alert-info") + flash("Privileges have been added for group {}.".format( + group_name), "alert-info") return redirect(url_for("manage_resource", resource_id=resource_id)) else: - return render_template("admin/search_for_groups.html", resource_id = resource_id) + return render_template("admin/search_for_groups.html", resource_id=resource_id) else: return redirect(url_for("no_access_page")) + def get_group_names(group_masks): group_masks_with_names = {} for group_id, group_mask in list(group_masks.items()): @@ -132,5 +142,5 @@ def get_group_names(group_masks): group_name = get_group_info(group_id)['name'] this_mask['name'] = group_name group_masks_with_names[group_id] = this_mask - + return group_masks_with_names diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index fd7c132b..3cbda3dd 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -22,7 +22,8 @@ from utility.tools import GN2_BASE_URL from utility.type_checking import is_str from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) + class SearchResultPage: #maxReturn = 3000 @@ -39,7 +40,7 @@ class SearchResultPage: self.uc_id = uuid.uuid4() self.go_term = None - logger.debug("uc_id:", self.uc_id) # contains a unique id + logger.debug("uc_id:", self.uc_id) # contains a unique id logger.debug("kw is:", kw) # dict containing search terms if kw['search_terms_or']: @@ -51,7 +52,8 @@ class SearchResultPage: search = self.search_terms self.original_search_string = self.search_terms # check for dodgy search terms - rx = re.compile(r'.*\W(href|http|sql|select|update)\W.*', re.IGNORECASE) + rx = re.compile( + r'.*\W(href|http|sql|select|update)\W.*', re.IGNORECASE) if rx.match(search): logger.info("Regex failed search") self.search_term_exists = False @@ -72,11 +74,11 @@ class SearchResultPage: self.dataset = create_dataset(kw['dataset'], dataset_type) logger.debug("search_terms:", self.search_terms) - #ZS: I don't like using try/except, but it seems like the easiest way to account for all possible bad searches here + # ZS: I don't like using try/except, but it seems like the easiest way to account for all possible bad searches here try: self.search() except: - self.search_term_exists = False + self.search_term_exists = False self.too_many_results = False if self.search_term_exists: @@ -95,7 +97,8 @@ class SearchResultPage: trait_list = [] json_trait_list = [] - species = webqtlDatabaseFunction.retrieve_species(self.dataset.group.name) + species = webqtlDatabaseFunction.retrieve_species( + self.dataset.group.name) # result_set represents the results for each search term; a search of # "shh grin2b" would have two sets of results, one for each term logger.debug("self.results is:", pf(self.results)) @@ -108,7 +111,8 @@ class SearchResultPage: trait_dict = {} trait_id = result[0] - this_trait = create_trait(dataset=self.dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + this_trait = create_trait( + dataset=self.dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) if this_trait: trait_dict['index'] = index + 1 trait_dict['name'] = this_trait.name @@ -117,7 +121,8 @@ class SearchResultPage: else: trait_dict['display_name'] = this_trait.name trait_dict['dataset'] = this_trait.dataset.name - trait_dict['hmac'] = hmac.data_hmac('{}:{}'.format(this_trait.name, this_trait.dataset.name)) + trait_dict['hmac'] = hmac.data_hmac( + '{}:{}'.format(this_trait.name, this_trait.dataset.name)) if this_trait.dataset.type == "ProbeSet": trait_dict['symbol'] = this_trait.symbol if this_trait.symbol else "N/A" trait_dict['description'] = "N/A" @@ -167,9 +172,11 @@ class SearchResultPage: self.trait_list = trait_list if self.dataset.type == "ProbeSet": - self.header_data_names = ['index', 'display_name', 'symbol', 'description', 'location', 'mean', 'lrs_score', 'lrs_location', 'additive'] + self.header_data_names = ['index', 'display_name', 'symbol', 'description', + 'location', 'mean', 'lrs_score', 'lrs_location', 'additive'] elif self.dataset.type == "Publish": - self.header_data_names = ['index', 'display_name', 'description', 'mean', 'authors', 'pubmed_text', 'lrs_score', 'lrs_location', 'additive'] + self.header_data_names = ['index', 'display_name', 'description', 'mean', + 'authors', 'pubmed_text', 'lrs_score', 'lrs_location', 'additive'] elif self.dataset.type == "Geno": self.header_data_names = ['index', 'display_name', 'location'] @@ -183,7 +190,8 @@ class SearchResultPage: combined_from_clause = "" combined_where_clause = "" - previous_from_clauses = [] #The same table can't be referenced twice in the from clause + # The same table can't be referenced twice in the from clause + previous_from_clauses = [] logger.debug("len(search_terms)>1") symbol_list = [] @@ -197,7 +205,8 @@ class SearchResultPage: for i, a_search in enumerate(alias_terms): the_search = self.get_search_ob(a_search) if the_search != None: - get_from_clause = getattr(the_search, "get_from_clause", None) + get_from_clause = getattr( + the_search, "get_from_clause", None) if callable(get_from_clause): from_clause = the_search.get_from_clause() if from_clause in previous_from_clauses: @@ -221,7 +230,8 @@ class SearchResultPage: else: the_search = self.get_search_ob(a_search) if the_search != None: - get_from_clause = getattr(the_search, "get_from_clause", None) + get_from_clause = getattr( + the_search, "get_from_clause", None) if callable(get_from_clause): from_clause = the_search.get_from_clause() if from_clause in previous_from_clauses: @@ -231,7 +241,7 @@ class SearchResultPage: combined_from_clause += from_clause where_clause = the_search.get_where_clause() combined_where_clause += "(" + where_clause + ")" - if (i+1) < len(self.search_terms): + if (i + 1) < len(self.search_terms): if self.and_or == "and": combined_where_clause += "AND" else: @@ -240,7 +250,8 @@ class SearchResultPage: self.search_term_exists = False if self.search_term_exists: combined_where_clause = "(" + combined_where_clause + ")" - final_query = the_search.compile_final_query(combined_from_clause, combined_where_clause) + final_query = the_search.compile_final_query( + combined_from_clause, combined_where_clause) results = the_search.execute(final_query) self.results.extend(results) @@ -262,14 +273,15 @@ class SearchResultPage: if search_ob: search_class = getattr(do_search, search_ob) the_search = search_class(search_term, - search_operator, - self.dataset, - search_type['key'] - ) + search_operator, + self.dataset, + search_type['key'] + ) return the_search else: return None + def get_GO_symbols(a_search): query = """SELECT genes FROM GORef @@ -287,13 +299,15 @@ def get_GO_symbols(a_search): return new_terms + def insert_newlines(string, every=64): """ This is because it is seemingly impossible to change the width of the description column, so I'm just manually adding line breaks """ lines = [] for i in range(0, len(string), every): - lines.append(string[i:i+every]) + lines.append(string[i:i + every]) return '\n'.join(lines) + def get_aliases(symbol_list, species): updated_symbols = [] @@ -308,7 +322,8 @@ def get_aliases(symbol_list, species): symbols_string = ",".join(updated_symbols) filtered_aliases = [] - response = requests.get(GN2_BASE_URL + "/gn3/gene/aliases2/" + symbols_string) + response = requests.get( + GN2_BASE_URL + "/gn3/gene/aliases2/" + symbols_string) if response: alias_lists = json.loads(response.content) seen = set() @@ -322,10 +337,9 @@ def get_aliases(symbol_list, species): search_terms = [] for alias in filtered_aliases: - the_search_term = {'key': None, + the_search_term = {'key': None, 'search_term': [alias], - 'separator' : None} + 'separator': None} search_terms.append(the_search_term) return search_terms - diff --git a/wqflask/wqflask/send_mail.py b/wqflask/wqflask/send_mail.py index 86e8a558..299c866a 100644 --- a/wqflask/wqflask/send_mail.py +++ b/wqflask/wqflask/send_mail.py @@ -8,10 +8,12 @@ Redis = StrictRedis() import mailer + def timestamp(): ts = datetime.datetime.utcnow() return ts.isoformat() + def main(): while True: print("I'm alive!") @@ -31,7 +33,6 @@ def main(): process_message(msg) - def process_message(msg): msg = json.loads(msg) diff --git a/wqflask/wqflask/server_side.py b/wqflask/wqflask/server_side.py index 48761fa0..7f68efad 100644 --- a/wqflask/wqflask/server_side.py +++ b/wqflask/wqflask/server_side.py @@ -1,7 +1,6 @@ # handles server side table processing - class ServerSideTable: """ This class is used to do server-side processing @@ -31,7 +30,7 @@ class ServerSideTable: self.rows_count = rows_count self.table_rows = table_rows self.header_data_names = header_data_names - + self.sort_rows() self.paginate_rows() @@ -50,8 +49,8 @@ class ServerSideTable: column_name = self.header_data_names[column_number - 1] sort_direction = self.request_values['sSortDir_' + str(i)] self.table_rows = sorted(self.table_rows, - key=lambda x: x[column_name], - reverse=is_reverse(sort_direction)) + key=lambda x: x[column_name], + reverse=is_reverse(sort_direction)) def paginate_rows(self): """ diff --git a/wqflask/wqflask/show_trait/SampleList.py b/wqflask/wqflask/show_trait/SampleList.py index f955f632..f9d30dba 100644 --- a/wqflask/wqflask/show_trait/SampleList.py +++ b/wqflask/wqflask/show_trait/SampleList.py @@ -8,6 +8,7 @@ from pprint import pformat as pf from utility import Plot from utility import Bunch + class SampleList: def __init__(self, dataset, @@ -36,16 +37,16 @@ class SampleList: if isinstance(self.this_trait, list): sample = webqtlCaseData.webqtlCaseData(name=sample_name) if counter <= len(self.this_trait): - if isinstance(self.this_trait[counter-1], (bytes, bytearray)): - if (self.this_trait[counter-1].decode("utf-8").lower() != 'x'): + if isinstance(self.this_trait[counter - 1], (bytes, bytearray)): + if (self.this_trait[counter - 1].decode("utf-8").lower() != 'x'): sample = webqtlCaseData.webqtlCaseData( name=sample_name, - value=float(self.this_trait[counter-1])) + value=float(self.this_trait[counter - 1])) else: - if (self.this_trait[counter-1].lower() != 'x'): + if (self.this_trait[counter - 1].lower() != 'x'): sample = webqtlCaseData.webqtlCaseData( name=sample_name, - value=float(self.this_trait[counter-1])) + value=float(self.this_trait[counter - 1])) else: # ZS - If there's no value for the sample/strain, # create the sample object (so samples with no value @@ -56,8 +57,8 @@ class SampleList: sample = webqtlCaseData.webqtlCaseData(name=sample_name) sample.extra_info = {} - if (self.dataset.group.name == 'AXBXA' and - sample_name in ('AXB18/19/20', 'AXB13/14', 'BXA8/17')): + if (self.dataset.group.name == 'AXBXA' + and sample_name in ('AXB18/19/20', 'AXB13/14', 'BXA8/17')): sample.extra_info['url'] = "/mouseCross.html#AXB/BXA" sample.extra_info['css_class'] = "fs12" @@ -69,18 +70,24 @@ class SampleList: sample.extra_attributes = self.sample_attribute_values.get( sample_name, {}) - #ZS: Add a url so RRID case attributes can be displayed as links + # ZS: Add a url so RRID case attributes can be displayed as links if 'rrid' in sample.extra_attributes: if self.dataset.group.species == "mouse": if len(sample.extra_attributes['rrid'].split(":")) > 1: - the_rrid = sample.extra_attributes['rrid'].split(":")[1] - sample.extra_attributes['rrid'] = [sample.extra_attributes['rrid']] - sample.extra_attributes['rrid'].append(webqtlConfig.RRID_MOUSE_URL % the_rrid) + the_rrid = sample.extra_attributes['rrid'].split(":")[ + 1] + sample.extra_attributes['rrid'] = [ + sample.extra_attributes['rrid']] + sample.extra_attributes['rrid'].append( + webqtlConfig.RRID_MOUSE_URL % the_rrid) elif self.dataset.group.species == "rat": if len(str(sample.extra_attributes['rrid'])): - the_rrid = sample.extra_attributes['rrid'].split("_")[1] - sample.extra_attributes['rrid'] = [sample.extra_attributes['rrid']] - sample.extra_attributes['rrid'].append(webqtlConfig.RRID_RAT_URL % the_rrid) + the_rrid = sample.extra_attributes['rrid'].split("_")[ + 1] + sample.extra_attributes['rrid'] = [ + sample.extra_attributes['rrid']] + sample.extra_attributes['rrid'].append( + webqtlConfig.RRID_RAT_URL % the_rrid) self.sample_list.append(sample) @@ -129,7 +136,8 @@ class SampleList: self.attributes[key].name = name self.attributes[key].distinct_values = [ item.Value for item in values] - self.attributes[key].distinct_values=natural_sort(self.attributes[key].distinct_values) + self.attributes[key].distinct_values = natural_sort( + self.attributes[key].distinct_values) all_numbers = True for value in self.attributes[key].distinct_values: try: @@ -169,7 +177,8 @@ class SampleList: except ValueError: pass - attribute_values[self.attributes[item.Id].name.lower()] = attribute_value + attribute_values[self.attributes[item.Id].name.lower( + )] = attribute_value self.sample_attribute_values[sample_name] = attribute_values def get_first_attr_col(self): diff --git a/wqflask/wqflask/show_trait/export_trait_data.py b/wqflask/wqflask/show_trait/export_trait_data.py index 379b746c..7fabc3f6 100644 --- a/wqflask/wqflask/show_trait/export_trait_data.py +++ b/wqflask/wqflask/show_trait/export_trait_data.py @@ -5,6 +5,7 @@ from functools import cmp_to_key from base.trait import create_trait from base import data_set + def export_sample_table(targs): sample_data = json.loads(targs['export_data']) @@ -28,6 +29,7 @@ def export_sample_table(targs): return trait_name, final_sample_data + def get_export_metadata(trait_id, dataset_name): dataset = data_set.create_dataset(dataset_name) this_trait = create_trait(dataset=dataset, @@ -38,16 +40,23 @@ def get_export_metadata(trait_id, dataset_name): metadata = [] if dataset.type == "Publish": metadata.append(["Phenotype ID: " + trait_id]) - metadata.append(["Phenotype URL: " + "http://genenetwork.org/show_trait?trait_id=" + trait_id + "&dataset=" + dataset_name]) + metadata.append(["Phenotype URL: " + "http://genenetwork.org/show_trait?trait_id=" + \ + trait_id + "&dataset=" + dataset_name]) metadata.append(["Group: " + dataset.group.name]) - metadata.append(["Phenotype: " + this_trait.description_display.replace(",", "\",\"")]) - metadata.append(["Authors: " + (this_trait.authors if this_trait.authors else "N/A")]) - metadata.append(["Title: " + (this_trait.title if this_trait.title else "N/A")]) - metadata.append(["Journal: " + (this_trait.journal if this_trait.journal else "N/A")]) - metadata.append(["Dataset Link: http://gn1.genenetwork.org/webqtl/main.py?FormID=sharinginfo&InfoPageName=" + dataset.name]) + metadata.append( + ["Phenotype: " + this_trait.description_display.replace(",", "\",\"")]) + metadata.append( + ["Authors: " + (this_trait.authors if this_trait.authors else "N/A")]) + metadata.append( + ["Title: " + (this_trait.title if this_trait.title else "N/A")]) + metadata.append( + ["Journal: " + (this_trait.journal if this_trait.journal else "N/A")]) + metadata.append( + ["Dataset Link: http://gn1.genenetwork.org/webqtl/main.py?FormID=sharinginfo&InfoPageName=" + dataset.name]) else: metadata.append(["Record ID: " + trait_id]) - metadata.append(["Trait URL: " + "http://genenetwork.org/show_trait?trait_id=" + trait_id + "&dataset=" + dataset_name]) + metadata.append(["Trait URL: " + "http://genenetwork.org/show_trait?trait_id=" + \ + trait_id + "&dataset=" + dataset_name]) if this_trait.symbol: metadata.append(["Symbol: " + this_trait.symbol]) metadata.append(["Dataset: " + dataset.name]) @@ -64,6 +73,7 @@ def dict_to_sorted_list(dictionary): sorted_values = [item[1] for item in sorted_list] return sorted_values + def cmp_samples(a, b): if b[0] == 'name': return 1 diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index f9c5fbe6..fcebbc4d 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -46,7 +46,7 @@ class ShowTrait: resource_id=self.resource_id) elif 'group' in kw: self.temp_trait = True - self.trait_id = "Temp_"+kw['species'] + "_" + kw['group'] + \ + self.trait_id = "Temp_" + kw['species'] + "_" + kw['group'] + \ "_" + datetime.datetime.now().strftime("%m%d%H%M%S") self.temp_species = kw['species'] self.temp_group = kw['group'] @@ -368,8 +368,8 @@ class ShowTrait: chr = transcript_start = transcript_end = None if chr and transcript_start and transcript_end and self.this_trait.refseq_transcriptid: - transcript_start = int(transcript_start*1000000) - transcript_end = int(transcript_end*1000000) + transcript_start = int(transcript_start * 1000000) + transcript_end = int(transcript_end * 1000000) self.ucsc_blat_link = webqtlConfig.UCSC_REFSEQ % ( 'mm10', self.this_trait.refseq_transcriptid, chr, transcript_start, transcript_end) @@ -393,8 +393,8 @@ class ShowTrait: if chr and transcript_start and transcript_end and kgId: # Convert to bases from megabases - transcript_start = int(transcript_start*1000000) - transcript_end = int(transcript_end*1000000) + transcript_start = int(transcript_start * 1000000) + transcript_end = int(transcript_end * 1000000) self.ucsc_blat_link = webqtlConfig.UCSC_REFSEQ % ( 'rn6', kgId, chr, transcript_start, transcript_end) @@ -452,8 +452,8 @@ class ShowTrait: for sample in list(self.this_trait.data.keys()): if (self.this_trait.data[sample].name2 != self.this_trait.data[sample].name): - if ((self.this_trait.data[sample].name2 in primary_sample_names) and - (self.this_trait.data[sample].name not in primary_sample_names)): + if ((self.this_trait.data[sample].name2 in primary_sample_names) + and (self.this_trait.data[sample].name not in primary_sample_names)): primary_sample_names.append( self.this_trait.data[sample].name) primary_sample_names.remove( @@ -515,13 +515,13 @@ def quantile_normalize_vals(sample_groups): ranked_vals = ss.rankdata(trait_vals) p_list = [] for i, val in enumerate(trait_vals): - p_list.append(((i+1) - 0.5)/len(trait_vals)) + p_list.append(((i + 1) - 0.5) / len(trait_vals)) z = ss.norm.ppf(p_list) normed_vals = [] for rank in ranked_vals: - normed_vals.append("%0.3f" % z[int(rank)-1]) + normed_vals.append("%0.3f" % z[int(rank) - 1]) return normed_vals @@ -585,7 +585,7 @@ def get_nearest_marker(this_trait, this_db): GenoXRef.GenoId = Geno.Id AND GenoFreeze.Id = GenoXRef.GenoFreezeId AND GenoFreeze.Name = '{}' - ORDER BY ABS( Geno.Mb - {}) LIMIT 1""".format(this_chr, this_db.group.name+"Geno", this_mb) + ORDER BY ABS( Geno.Mb - {}) LIMIT 1""".format(this_chr, this_db.group.name + "Geno", this_mb) logger.sql(query) result = g.db.execute(query).fetchall() @@ -605,7 +605,7 @@ def get_table_widths(sample_groups, sample_column_width, has_num_cases=False): trait_table_width += 80 if has_num_cases: trait_table_width += 80 - trait_table_width += len(sample_groups[0].attributes)*88 + trait_table_width += len(sample_groups[0].attributes) * 88 trait_table_width = str(trait_table_width) + "px" diff --git a/wqflask/wqflask/snp_browser/snp_browser.py b/wqflask/wqflask/snp_browser/snp_browser.py index 8658abf8..c4d0e135 100644 --- a/wqflask/wqflask/snp_browser/snp_browser.py +++ b/wqflask/wqflask/snp_browser/snp_browser.py @@ -9,6 +9,7 @@ logger = getLogger(__name__) from base import species from base import webqtlConfig + class SnpBrowser: def __init__(self, start_vars): @@ -26,9 +27,11 @@ class SnpBrowser: self.table_rows = [] if self.limit_strains == "true": - self.header_fields, self.empty_field_count, self.header_data_names = get_header_list(variant_type = self.variant_type, strains = self.chosen_strains, empty_columns = self.empty_columns) + self.header_fields, self.empty_field_count, self.header_data_names = get_header_list( + variant_type=self.variant_type, strains=self.chosen_strains, empty_columns=self.empty_columns) else: - self.header_fields, self.empty_field_count, self.header_data_names = get_header_list(variant_type = self.variant_type, strains = self.strain_lists, species = self.species_name, empty_columns = self.empty_columns) + self.header_fields, self.empty_field_count, self.header_data_names = get_header_list( + variant_type=self.variant_type, strains=self.strain_lists, species=self.species_name, empty_columns=self.empty_columns) def initialize_parameters(self, start_vars): if 'first_run' in start_vars: @@ -52,10 +55,12 @@ class SnpBrowser: self.rat_chr_list = [] mouse_species_ob = species.TheSpecies(species_name="Mouse") for key in mouse_species_ob.chromosomes.chromosomes: - self.mouse_chr_list.append(mouse_species_ob.chromosomes.chromosomes[key].name) + self.mouse_chr_list.append( + mouse_species_ob.chromosomes.chromosomes[key].name) rat_species_ob = species.TheSpecies(species_name="Rat") for key in rat_species_ob.chromosomes.chromosomes: - self.rat_chr_list.append(rat_species_ob.chromosomes.chromosomes[key].name) + self.rat_chr_list.append( + rat_species_ob.chromosomes.chromosomes[key].name) if self.species_id == 1: self.this_chr_list = self.mouse_chr_list @@ -108,9 +113,11 @@ class SnpBrowser: "CAST/EiJ"] self.chosen_strains_rat = ["BN", "F344", "WLI", "WMI"] if 'chosen_strains_mouse' in start_vars: - self.chosen_strains_mouse = start_vars['chosen_strains_mouse'].split(",") + self.chosen_strains_mouse = start_vars['chosen_strains_mouse'].split( + ",") if 'chosen_strains_rat' in start_vars: - self.chosen_strains_rat = start_vars['chosen_strains_rat'].split(",") + self.chosen_strains_rat = start_vars['chosen_strains_rat'].split( + ",") if self.species_id == 1: self.chosen_strains = self.chosen_strains_mouse @@ -149,9 +156,11 @@ class SnpBrowser: if self.gene_name != "": if self.species_id != 0: - query = "SELECT geneSymbol, chromosome, txStart, txEnd FROM GeneList WHERE SpeciesId = %s AND geneSymbol = '%s'" % (self.species_id, self.gene_name) + query = "SELECT geneSymbol, chromosome, txStart, txEnd FROM GeneList WHERE SpeciesId = %s AND geneSymbol = '%s'" % ( + self.species_id, self.gene_name) else: - query = "SELECT geneSymbol, chromosome, txStart, txEnd FROM GeneList WHERE geneSymbol = '%s'" % (self.gene_name) + query = "SELECT geneSymbol, chromosome, txStart, txEnd FROM GeneList WHERE geneSymbol = '%s'" % ( + self.gene_name) result = g.db.execute(query).fetchone() if result: self.gene_name, self.chr, self.start_mb, self.end_mb = result @@ -162,9 +171,11 @@ class SnpBrowser: query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll WHERE Rs = '%s'" % self.gene_name else: if self.species_id != 0: - query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll where SpeciesId = %s AND SnpName = '%s'" % (self.species_id, self.gene_name) + query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll where SpeciesId = %s AND SnpName = '%s'" % ( + self.species_id, self.gene_name) else: - query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll where SnpName = '%s'" % (self.gene_name) + query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll where SnpName = '%s'" % ( + self.gene_name) result_snp = g.db.execute(query).fetchall() if result_snp: self.snp_list = [item[0] for item in result_snp] @@ -176,9 +187,11 @@ class SnpBrowser: elif self.variant_type == "InDel": if self.gene_name[0] == "I": if self.species_id != 0: - query = "SELECT Id, Chromosome, Mb_start, Mb_end FROM IndelAll WHERE SpeciesId = %s AND Name = '%s'" % (self.species_id, self.gene_name) + query = "SELECT Id, Chromosome, Mb_start, Mb_end FROM IndelAll WHERE SpeciesId = %s AND Name = '%s'" % ( + self.species_id, self.gene_name) else: - query = "SELECT Id, Chromosome, Mb_start, Mb_end FROM IndelAll WHERE Name = '%s'" % (self.gene_name) + query = "SELECT Id, Chromosome, Mb_start, Mb_end FROM IndelAll WHERE Name = '%s'" % ( + self.gene_name) result_snp = g.db.execute(query).fetchall() if result_snp: self.snp_list = [item[0] for item in result_snp] @@ -249,12 +262,13 @@ class SnpBrowser: def filter_results(self, results): filtered_results = [] - strain_index_list = [] #ZS: List of positions of selected strains in strain list + strain_index_list = [] # ZS: List of positions of selected strains in strain list last_mb = -1 if self.limit_strains == "true" and len(self.chosen_strains) > 0: for item in self.chosen_strains: - index = self.strain_lists[self.species_name.lower()].index(item) + index = self.strain_lists[self.species_name.lower()].index( + item) strain_index_list.append(index) for seq, result in enumerate(results): @@ -262,7 +276,8 @@ class SnpBrowser: if self.variant_type == "SNP": display_strains = [] - snp_id, species_id, snp_name, rs, chr, mb, mb_2016, alleles, snp_source, conservation_score = result[:10] + snp_id, species_id, snp_name, rs, chr, mb, mb_2016, alleles, snp_source, conservation_score = result[ + :10] effect_list = result[10:28] if self.species_id == 1: self.allele_list = result[30:] @@ -272,13 +287,14 @@ class SnpBrowser: if self.limit_strains == "true" and len(self.chosen_strains) > 0: for index in strain_index_list: if self.species_id == 1: - display_strains.append(result[29+index]) + display_strains.append(result[29 + index]) elif self.species_id == 2: - display_strains.append(result[31+index]) + display_strains.append(result[31 + index]) self.allele_list = display_strains effect_info_dict = get_effect_info(effect_list) - coding_domain_list = ['Start Gained', 'Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] + coding_domain_list = ['Start Gained', 'Start Lost', + 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] intron_domain_list = ['Splice Site', 'Nonsplice Site'] for key in effect_info_dict: @@ -295,19 +311,22 @@ class SnpBrowser: if 'Intergenic' in domain: if self.gene_name != "": - gene_id = get_gene_id(self.species_id, self.gene_name) + gene_id = get_gene_id( + self.species_id, self.gene_name) gene = [gene_id, self.gene_name] else: gene = check_if_in_gene(species_id, chr, mb) transcript = exon = function = function_details = '' - if self.redundant == "false" or last_mb != mb: # filter redundant + if self.redundant == "false" or last_mb != mb: # filter redundant if self.include_record(domain, function, snp_source, conservation_score): - info_list = [snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, function, function_details, snp_source, conservation_score, snp_id] + info_list = [snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, + function, function_details, snp_source, conservation_score, snp_id] info_list.extend(self.allele_list) filtered_results.append(info_list) last_mb = mb else: - gene_list, transcript_list, exon_list, function_list, function_details_list = effect_info_dict[key] + gene_list, transcript_list, exon_list, function_list, function_details_list = effect_info_dict[ + key] for index, item in enumerate(gene_list): gene = item transcript = transcript_list[index] @@ -324,13 +343,15 @@ class SnpBrowser: function = "" if function_details_list: - function_details = "Biotype: " + function_details_list[index] + function_details = "Biotype: " + \ + function_details_list[index] else: function_details = "" if self.redundant == "false" or last_mb != mb: if self.include_record(domain, function, snp_source, conservation_score): - info_list = [snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, function, function_details, snp_source, conservation_score, snp_id] + info_list = [snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, + function, function_details, snp_source, conservation_score, snp_id] info_list.extend(self.allele_list) filtered_results.append(info_list) last_mb = mb @@ -344,7 +365,8 @@ class SnpBrowser: gene = "No Gene" domain = conservation_score = snp_id = snp_name = rs = flank_3 = flank_5 = ncbi = function = "" if self.include_record(domain, function, source_name, conservation_score): - filtered_results.append([indel_name, indel_chr, indel_mb_start, indel_mb_end, indel_strand, indel_type, indel_size, indel_sequence, source_name]) + filtered_results.append([indel_name, indel_chr, indel_mb_start, indel_mb_end, + indel_strand, indel_type, indel_size, indel_sequence, source_name]) last_mb = indel_mb_start else: @@ -364,9 +386,10 @@ class SnpBrowser: if gene_name and (gene_name not in gene_name_list): gene_name_list.append(gene_name) if len(gene_name_list) > 0: - gene_id_name_dict = get_gene_id_name_dict(self.species_id, gene_name_list) + gene_id_name_dict = get_gene_id_name_dict( + self.species_id, gene_name_list) - #ZS: list of booleans representing which columns are entirely empty, so they aren't displayed on the page; only including ones that are sometimes empty (since there's always a location, etc) + # ZS: list of booleans representing which columns are entirely empty, so they aren't displayed on the page; only including ones that are sometimes empty (since there's always a location, etc) self.empty_columns = { "snp_source": "false", "conservation_score": "false", @@ -382,20 +405,23 @@ class SnpBrowser: for i, result in enumerate(self.filtered_results): this_row = {} if self.variant_type == "SNP": - snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, function, function_details, snp_source, conservation_score, snp_id = result[:14] + snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, function, function_details, snp_source, conservation_score, snp_id = result[ + :14] allele_value_list = result[14:] if rs: snp_url = webqtlConfig.DBSNP % (rs) snp_name = rs else: rs = "" - start_bp = int(mb*1000000 - 100) - end_bp = int(mb*1000000 + 100) + start_bp = int(mb * 1000000 - 100) + end_bp = int(mb * 1000000 + 100) position_info = "chr%s:%d-%d" % (chr, start_bp, end_bp) if self.species_id == 2: - snp_url = webqtlConfig.GENOMEBROWSER_URL % ("rn6", position_info) + snp_url = webqtlConfig.GENOMEBROWSER_URL % ( + "rn6", position_info) else: - snp_url = webqtlConfig.GENOMEBROWSER_URL % ("mm10", position_info) + snp_url = webqtlConfig.GENOMEBROWSER_URL % ( + "mm10", position_info) mb = float(mb) mb_formatted = "%2.6f" % mb @@ -428,13 +454,14 @@ class SnpBrowser: gene_link = "" if transcript: - transcript_link = webqtlConfig.ENSEMBLETRANSCRIPT_URL % (transcript) + transcript_link = webqtlConfig.ENSEMBLETRANSCRIPT_URL % ( + transcript) self.empty_columns['transcript'] = "true" else: transcript_link = "" if exon: - exon = exon[1] # exon[0] is exon_id, exon[1] is exon_rank + exon = exon[1] # exon[0] is exon_id, exon[1] is exon_rank self.empty_columns['exon'] = "true" else: exon = "" @@ -459,20 +486,20 @@ class SnpBrowser: function_list = function_details.strip().split(",") function_list = [item.strip() for item in function_list] function_list[0] = function_list[0].title() - function_details = ", ".join(item for item in function_list) + function_details = ", ".join( + item for item in function_list) function_details = function_details.replace("_", " ") function_details = function_details.replace("/", " -> ") if function_details == "Biotype: Protein Coding": function_details = function_details + ", Coding Region Unknown" self.empty_columns['function_details'] = "true" - + #[snp_href, chr, mb_formatted, alleles, snp_source_cell, conservation_score, gene_name_cell, transcript_href, exon, domain_1, domain_2, function, function_details] - base_color_dict = {"A": "#C33232", "C": "#1569C7", "T": "#CFCF32", "G": "#32C332", + base_color_dict = {"A": "#C33232", "C": "#1569C7", "T": "#CFCF32", "G": "#32C332", "t": "#FF6", "c": "#5CB3FF", "a": "#F66", "g": "#CF9", ":": "#FFFFFF", "-": "#FFFFFF", "?": "#FFFFFF"} - the_bases = [] for j, item in enumerate(allele_value_list): if item and isinstance(item, str): @@ -575,7 +602,7 @@ class SnpBrowser: if conservation_score: score_as_float = float(conservation_score) try: - input_score_float = float(self.score) # the user-input score + input_score_float = float(self.score) # the user-input score except: input_score_float = 0.0 @@ -628,30 +655,31 @@ class SnpBrowser: left_offset, right_offset, top_offset, bottom_offset = (30, 30, 40, 50) plot_width = canvas_width - left_offset - right_offset plot_height = canvas_height - top_offset - bottom_offset - y_zero = top_offset + plot_height/2 + y_zero = top_offset + plot_height / 2 - x_scale = plot_width/(self.end_mb - self.start_mb) + x_scale = plot_width / (self.end_mb - self.start_mb) - #draw clickable image map at some point + # draw clickable image map at some point n_click = 80.0 - click_step = plot_width/n_click - click_mb_step = (self.end_mb - self.start_mb)/n_click + click_step = plot_width / n_click + click_mb_step = (self.end_mb - self.start_mb) / n_click - #for i in range(n_click): + # for i in range(n_click): # href = url_for('snp_browser', first_run="false", chosen_strains_mouse=self.chosen_strains_mouse, chosen_strains_rat=self.chosen_strains_rat, variant=self.variant_type, species=self.species_name, gene_name=self.gene_name, chr=self.chr, start_mb=self.start_mb, end_mb=self.end_mb, limit_strains=self.limit_strains, domain=self.domain, function=self.function, criteria=self.criteria, score=self.score, diff_alleles=self.diff_alleles) + def get_browser_sample_lists(species_id=1): strain_lists = {} mouse_strain_list = [] query = "SHOW COLUMNS FROM SnpPattern;" - results = g.db.execute(query).fetchall(); + results = g.db.execute(query).fetchall() for result in results[1:]: mouse_strain_list.append(result[0]) rat_strain_list = [] query = "SHOW COLUMNS FROM RatSnpPattern;" - results = g.db.execute(query).fetchall(); + results = g.db.execute(query).fetchall() for result in results[2:]: rat_strain_list.append(result[0]) @@ -660,7 +688,8 @@ def get_browser_sample_lists(species_id=1): return strain_lists -def get_header_list(variant_type, strains, species = None, empty_columns = None): + +def get_header_list(variant_type, strains, species=None, empty_columns=None): if species == "Mouse": strain_list = strains['mouse'] elif species == "Rat": @@ -668,13 +697,15 @@ def get_header_list(variant_type, strains, species = None, empty_columns = None) else: strain_list = strains - empty_field_count = 0 #ZS: This is an awkward way of letting the javascript know the index where the allele value columns start; there's probably a better way of doing this + empty_field_count = 0 # ZS: This is an awkward way of letting the javascript know the index where the allele value columns start; there's probably a better way of doing this header_fields = [] header_data_names = [] if variant_type == "SNP": - header_fields.append(['Index', 'SNP ID', 'Chr', 'Mb', 'Alleles', 'Source', 'ConScore', 'Gene', 'Transcript', 'Exon', 'Domain 1', 'Domain 2', 'Function', 'Details']) - header_data_names = ['index', 'snp_name', 'chr', 'mb_formatted', 'alleles', 'snp_source', 'conservation_score', 'gene_name', 'transcript', 'exon', 'domain_1', 'domain_2', 'function', 'function_details'] + header_fields.append(['Index', 'SNP ID', 'Chr', 'Mb', 'Alleles', 'Source', 'ConScore', + 'Gene', 'Transcript', 'Exon', 'Domain 1', 'Domain 2', 'Function', 'Details']) + header_data_names = ['index', 'snp_name', 'chr', 'mb_formatted', 'alleles', 'snp_source', 'conservation_score', + 'gene_name', 'transcript', 'exon', 'domain_1', 'domain_2', 'function', 'function_details'] header_fields.append(strain_list) header_data_names += strain_list @@ -704,18 +735,21 @@ def get_header_list(variant_type, strains, species = None, empty_columns = None) if empty_columns['function_details'] == "false": empty_field_count += 1 header_fields[0].remove('Details') - + for col in empty_columns.keys(): if empty_columns[col] == "false": header_data_names.remove(col) elif variant_type == "InDel": - header_fields = ['Index', 'ID', 'Type', 'InDel Chr', 'Mb Start', 'Mb End', 'Strand', 'Size', 'Sequence', 'Source'] - header_data_names = ['index', 'indel_name', 'indel_type', 'indel_chr', 'indel_mb_s', 'indel_mb_e', 'indel_strand', 'indel_size', 'indel_sequence', 'source_name'] + header_fields = ['Index', 'ID', 'Type', 'InDel Chr', + 'Mb Start', 'Mb End', 'Strand', 'Size', 'Sequence', 'Source'] + header_data_names = ['index', 'indel_name', 'indel_type', 'indel_chr', 'indel_mb_s', + 'indel_mb_e', 'indel_strand', 'indel_size', 'indel_sequence', 'source_name'] return header_fields, empty_field_count, header_data_names -def get_effect_details_by_category(effect_name = None, effect_value = None): + +def get_effect_details_by_category(effect_name=None, effect_value=None): gene_list = [] transcript_list = [] exon_list = [] @@ -723,10 +757,13 @@ def get_effect_details_by_category(effect_name = None, effect_value = None): function_detail_list = [] tmp_list = [] - gene_group_list = ['Upstream', 'Downstream', 'Splice Site', 'Nonsplice Site', '3\' UTR'] - biotype_group_list = ['Unknown Effect In Exon', 'Start Gained', 'Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] + gene_group_list = ['Upstream', 'Downstream', + 'Splice Site', 'Nonsplice Site', '3\' UTR'] + biotype_group_list = ['Unknown Effect In Exon', 'Start Gained', + 'Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] new_codon_group_list = ['Start Gained'] - codon_effect_group_list = ['Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] + codon_effect_group_list = [ + 'Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] effect_detail_list = effect_value.strip().split('|') effect_detail_list = [item.strip() for item in effect_detail_list] @@ -764,13 +801,16 @@ def get_effect_details_by_category(effect_name = None, effect_value = None): return [gene_list, transcript_list, exon_list, function_list, function_detail_list] + def get_effect_info(effect_list): domain = "" effect_detail_list = [] effect_info_dict = {} - prime3_utr, prime5_utr, upstream, downstream, intron, nonsplice_site, splice_site, intergenic = effect_list[:8] - exon, non_synonymous_coding, synonymous_coding, start_gained, start_lost, stop_gained, stop_lost, unknown_effect_in_exon = effect_list[8:16] + prime3_utr, prime5_utr, upstream, downstream, intron, nonsplice_site, splice_site, intergenic = effect_list[ + :8] + exon, non_synonymous_coding, synonymous_coding, start_gained, start_lost, stop_gained, stop_lost, unknown_effect_in_exon = effect_list[ + 8:16] if intergenic: domain = "Intergenic" @@ -779,63 +819,77 @@ def get_effect_info(effect_list): # if not exon, get gene list/transcript list info if upstream: domain = "Upstream" - effect_detail_list = get_effect_details_by_category(effect_name='Upstream', effect_value=upstream) + effect_detail_list = get_effect_details_by_category( + effect_name='Upstream', effect_value=upstream) effect_info_dict[domain] = effect_detail_list if downstream: domain = "Downstream" - effect_detail_list = get_effect_details_by_category(effect_name='Downstream', effect_value=downstream) + effect_detail_list = get_effect_details_by_category( + effect_name='Downstream', effect_value=downstream) effect_info_dict[domain] = effect_detail_list if intron: if splice_site: domain = "Splice Site" - effect_detail_list = get_effect_details_by_category(effect_name='Splice Site', effect_value=splice_site) + effect_detail_list = get_effect_details_by_category( + effect_name='Splice Site', effect_value=splice_site) effect_info_dict[domain] = effect_detail_list if nonsplice_site: domain = "Nonsplice Site" - effect_detail_list = get_effect_details_by_category(effect_name='Nonsplice Site', effect_value=nonsplice_site) + effect_detail_list = get_effect_details_by_category( + effect_name='Nonsplice Site', effect_value=nonsplice_site) effect_info_dict[domain] = effect_detail_list # get gene, transcript_list, and exon info if prime3_utr: domain = "3\' UTR" - effect_detail_list = get_effect_details_by_category(effect_name='3\' UTR', effect_value=prime3_utr) + effect_detail_list = get_effect_details_by_category( + effect_name='3\' UTR', effect_value=prime3_utr) effect_info_dict[domain] = effect_detail_list if prime5_utr: domain = "5\' UTR" - effect_detail_list = get_effect_details_by_category(effect_name='5\' UTR', effect_value=prime5_utr) + effect_detail_list = get_effect_details_by_category( + effect_name='5\' UTR', effect_value=prime5_utr) effect_info_dict[domain] = effect_detail_list if start_gained: domain = "Start Gained" - effect_detail_list = get_effect_details_by_category(effect_name='Start Gained', effect_value=start_gained) + effect_detail_list = get_effect_details_by_category( + effect_name='Start Gained', effect_value=start_gained) effect_info_dict[domain] = effect_detail_list if unknown_effect_in_exon: domain = "Unknown Effect In Exon" - effect_detail_list = get_effect_details_by_category(effect_name='Unknown Effect In Exon', effect_value=unknown_effect_in_exon) + effect_detail_list = get_effect_details_by_category( + effect_name='Unknown Effect In Exon', effect_value=unknown_effect_in_exon) effect_info_dict[domain] = effect_detail_list if start_lost: domain = "Start Lost" - effect_detail_list = get_effect_details_by_category(effect_name='Start Lost', effect_value=start_lost) + effect_detail_list = get_effect_details_by_category( + effect_name='Start Lost', effect_value=start_lost) effect_info_dict[domain] = effect_detail_list if stop_gained: domain = "Stop Gained" - effect_detail_list = get_effect_details_by_category(effect_name='Stop Gained', effect_value=stop_gained) + effect_detail_list = get_effect_details_by_category( + effect_name='Stop Gained', effect_value=stop_gained) effect_info_dict[domain] = effect_detail_list if stop_lost: domain = "Stop Lost" - effect_detail_list = get_effect_details_by_category(effect_name='Stop Lost', effect_value=stop_lost) + effect_detail_list = get_effect_details_by_category( + effect_name='Stop Lost', effect_value=stop_lost) effect_info_dict[domain] = effect_detail_list if non_synonymous_coding: domain = "Nonsynonymous" - effect_detail_list = get_effect_details_by_category(effect_name='Nonsynonymous', effect_value=non_synonymous_coding) + effect_detail_list = get_effect_details_by_category( + effect_name='Nonsynonymous', effect_value=non_synonymous_coding) effect_info_dict[domain] = effect_detail_list if synonymous_coding: domain = "Synonymous" - effect_detail_list = get_effect_details_by_category(effect_name='Synonymous', effect_value=synonymous_coding) + effect_detail_list = get_effect_details_by_category( + effect_name='Synonymous', effect_value=synonymous_coding) effect_info_dict[domain] = effect_detail_list return effect_info_dict + def get_gene_id(species_id, gene_name): query = """ SELECT @@ -853,11 +907,13 @@ def get_gene_id(species_id, gene_name): else: return "" + def get_gene_id_name_dict(species_id, gene_name_list): gene_id_name_dict = {} if len(gene_name_list) == 0: return "" - gene_name_str_list = ["'" + gene_name + "'" for gene_name in gene_name_list] + gene_name_str_list = ["'" + gene_name + \ + "'" for gene_name in gene_name_list] gene_name_str = ",".join(gene_name_str_list) query = """ @@ -877,8 +933,9 @@ def get_gene_id_name_dict(species_id, gene_name_list): return gene_id_name_dict + def check_if_in_gene(species_id, chr, mb): - if species_id != 0: #ZS: Check if this is necessary + if species_id != 0: # ZS: Check if this is necessary query = """SELECT geneId, geneSymbol FROM GeneList WHERE SpeciesId = {0} AND chromosome = '{1}' AND @@ -895,4 +952,3 @@ def check_if_in_gene(species_id, chr, mb): return [result[0], result[1]] else: return "" - diff --git a/wqflask/wqflask/submit_bnw.py b/wqflask/wqflask/submit_bnw.py index a0e84c8c..b21a88cc 100644 --- a/wqflask/wqflask/submit_bnw.py +++ b/wqflask/wqflask/submit_bnw.py @@ -3,7 +3,8 @@ from base import data_set from utility import helper_functions import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + def get_bnw_input(start_vars): logger.debug("BNW VARS:", start_vars) diff --git a/wqflask/wqflask/update_search_results.py b/wqflask/wqflask/update_search_results.py index 22a46ef2..2e467dc8 100644 --- a/wqflask/wqflask/update_search_results.py +++ b/wqflask/wqflask/update_search_results.py @@ -10,6 +10,7 @@ from utility.benchmark import Bench from utility.logger import getLogger logger = getLogger(__name__) + class GSearch: def __init__(self, kw): @@ -51,10 +52,12 @@ class GSearch: self.trait_list = [] with Bench("Creating trait objects"): for line in re: - dataset = create_dataset(line[3], "ProbeSet", get_samplelist=False) + dataset = create_dataset( + line[3], "ProbeSet", get_samplelist=False) trait_id = line[4] - #with Bench("Building trait object"): - this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + # with Bench("Building trait object"): + this_trait = GeneralTrait( + dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) self.trait_list.append(this_trait) elif self.type == "phenotype": @@ -96,7 +99,8 @@ class GSearch: for line in re: dataset = create_dataset(line[2], "Publish") trait_id = line[3] - this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + this_trait = GeneralTrait( + dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) self.trait_list.append(this_trait) self.results = self.convert_to_json() @@ -108,8 +112,8 @@ class GSearch: json_dict['data'] = [] for i, trait in enumerate(self.trait_list): - trait_row = { "checkbox": "<INPUT TYPE=\"checkbox\" NAME=\"searchResult\" class=\"checkbox trait_checkbox\" style=\"transform: scale(1.5);\" VALUE=\"{}:{}\">".format(trait.name, trait.dataset.name), - "index": i+1, + trait_row = {"checkbox": "<INPUT TYPE=\"checkbox\" NAME=\"searchResult\" class=\"checkbox trait_checkbox\" style=\"transform: scale(1.5);\" VALUE=\"{}:{}\">".format(trait.name, trait.dataset.name), + "index": i + 1, "species": trait.dataset.group.species, "group": trait.dataset.group.name, "tissue": trait.dataset.tissue, diff --git a/wqflask/wqflask/user_login.py b/wqflask/wqflask/user_login.py index b6e7973f..ff77982f 100644 --- a/wqflask/wqflask/user_login.py +++ b/wqflask/wqflask/user_login.py @@ -29,13 +29,15 @@ from utility.tools import SMTP_CONNECT, SMTP_USERNAME, SMTP_PASSWORD, LOG_SQL_AL THREE_DAYS = 60 * 60 * 24 * 3 + def timestamp(): return datetime.datetime.utcnow().isoformat() + def basic_info(): - return dict(timestamp = timestamp(), - ip_address = request.remote_addr, - user_agent = request.headers.get('User-Agent')) + return dict(timestamp=timestamp(), + ip_address=request.remote_addr, + user_agent=request.headers.get('User-Agent')) def encode_password(pass_gen_fields, unencrypted_password): @@ -43,10 +45,10 @@ def encode_password(pass_gen_fields, unencrypted_password): salt = pass_gen_fields['salt'] else: salt = bytes(pass_gen_fields['salt'], "utf-8") - encrypted_password = pbkdf2.pbkdf2_hex(str(unencrypted_password), + encrypted_password = pbkdf2.pbkdf2_hex(str(unencrypted_password), salt, - pass_gen_fields['iterations'], - pass_gen_fields['keylength'], + pass_gen_fields['iterations'], + pass_gen_fields['keylength'], pass_gen_fields['hashfunc']) pass_gen_fields.pop("unencrypted_password", None) @@ -54,61 +56,65 @@ def encode_password(pass_gen_fields, unencrypted_password): return pass_gen_fields + def set_password(password): pass_gen_fields = { - "unencrypted_password": password, - "algorithm": "pbkdf2", - "hashfunc": "sha256", - "salt": base64.b64encode(os.urandom(32)), - "iterations": 100000, - "keylength": 32, - "created_timestamp": timestamp() + "unencrypted_password": password, + "algorithm": "pbkdf2", + "hashfunc": "sha256", + "salt": base64.b64encode(os.urandom(32)), + "iterations": 100000, + "keylength": 32, + "created_timestamp": timestamp() } assert len(password) >= 6, "Password shouldn't be shorter than 6 characters" - encoded_password = encode_password(pass_gen_fields, pass_gen_fields['unencrypted_password']) + encoded_password = encode_password( + pass_gen_fields, pass_gen_fields['unencrypted_password']) return encoded_password + def get_signed_session_id(user): session_id = str(uuid.uuid4()) session_id_signature = hmac.hmac_creation(session_id) session_id_signed = session_id + ":" + session_id_signature - #ZS: Need to check if this is ever actually used or exists + # ZS: Need to check if this is ever actually used or exists if 'user_id' not in user: user['user_id'] = str(uuid.uuid4()) save_user(user, user['user_id']) if 'github_id' in user: - session = dict(login_time = time.time(), - user_type = "github", - user_id = user['user_id'], - github_id = user['github_id'], - user_name = user['name'], - user_url = user['user_url']) + session = dict(login_time=time.time(), + user_type="github", + user_id=user['user_id'], + github_id=user['github_id'], + user_name=user['name'], + user_url=user['user_url']) elif 'orcid' in user: - session = dict(login_time = time.time(), - user_type = "orcid", - user_id = user['user_id'], - github_id = user['orcid'], - user_name = user['name'], - user_url = user['user_url']) + session = dict(login_time=time.time(), + user_type="orcid", + user_id=user['user_id'], + github_id=user['orcid'], + user_name=user['name'], + user_url=user['user_url']) else: - session = dict(login_time = time.time(), - user_type = "gn2", - user_id = user['user_id'], - user_name = user['full_name'], - user_email_address = user['email_address']) + session = dict(login_time=time.time(), + user_type="gn2", + user_id=user['user_id'], + user_name=user['full_name'], + user_email_address=user['email_address']) key = UserSession.user_cookie_name + ":" + session_id Redis.hmset(key, session) Redis.expire(key, THREE_DAYS) - + return session_id_signed + def send_email(toaddr, msg, fromaddr="no-reply@genenetwork.org"): """Send an E-mail through SMTP_CONNECT host. If SMTP_USERNAME is not 'UNKNOWN' TLS is used @@ -123,28 +129,31 @@ def send_email(toaddr, msg, fromaddr="no-reply@genenetwork.org"): server.login(SMTP_USERNAME, SMTP_PASSWORD) server.sendmail(fromaddr, toaddr, msg) server.quit() - logger.info("Successfully sent email to "+toaddr) + logger.info("Successfully sent email to " + toaddr) + -def send_verification_email(user_details, template_name = "email/user_verification.txt", key_prefix = "verification_code", subject = "GeneNetwork e-mail verification"): +def send_verification_email(user_details, template_name="email/user_verification.txt", key_prefix="verification_code", subject="GeneNetwork e-mail verification"): verification_code = str(uuid.uuid4()) key = key_prefix + ":" + verification_code - data = json.dumps(dict(id=user_details['user_id'], timestamp = timestamp())) + data = json.dumps(dict(id=user_details['user_id'], timestamp=timestamp())) Redis.set(key, data) Redis.expire(key, THREE_DAYS) recipient = user_details['email_address'] - body = render_template(template_name, verification_code = verification_code) + body = render_template(template_name, verification_code=verification_code) send_email(recipient, subject, body) return {"recipient": recipient, "subject": subject, "body": body} -def send_invitation_email(user_email, temp_password, template_name = "email/user_invitation.txt", subject = "You've been added to a GeneNetwork user group"): + +def send_invitation_email(user_email, temp_password, template_name="email/user_invitation.txt", subject="You've been added to a GeneNetwork user group"): recipient = user_email body = render_template(template_name, temp_password) send_email(recipient, subject, body) return {"recipient": recipient, "subject": subject, "body": body} + @app.route("/manage/verify_email") def verify_email(): if 'code' in request.args: @@ -153,27 +162,32 @@ def verify_email(): # As long as they have access to the email account # We might as well log them in session_id_signed = get_signed_session_id(user_details) - flash("Thank you for logging in {}.".format(user_details['full_name']), "alert-success") - response = make_response(redirect(url_for('index_page', import_collections = import_col, anon_id = anon_id))) - response.set_cookie(UserSession.user_cookie_name, session_id_signed, max_age=None) + flash("Thank you for logging in {}.".format( + user_details['full_name']), "alert-success") + response = make_response(redirect( + url_for('index_page', import_collections=import_col, anon_id=anon_id))) + response.set_cookie(UserSession.user_cookie_name, + session_id_signed, max_age=None) return response else: - flash("Invalid code: Password reset code does not exist or might have expired!", "error") + flash( + "Invalid code: Password reset code does not exist or might have expired!", "error") + @app.route("/n/login", methods=('GET', 'POST')) def login(): params = request.form if request.form else request.args logger.debug("in login params are:", params) - if not params: #ZS: If coming to page for first time + if not params: # ZS: If coming to page for first time from utility.tools import GITHUB_AUTH_URL, GITHUB_CLIENT_ID, ORCID_AUTH_URL, ORCID_CLIENT_ID external_login = {} if GITHUB_AUTH_URL and GITHUB_CLIENT_ID != 'UNKNOWN': external_login["github"] = GITHUB_AUTH_URL if ORCID_AUTH_URL and ORCID_CLIENT_ID != 'UNKNOWN': external_login["orcid"] = ORCID_AUTH_URL - return render_template("new_security/login_user.html", external_login = external_login, redis_is_available=is_redis_available()) - else: #ZS: After clicking sign-in + return render_template("new_security/login_user.html", external_login=external_login, redis_is_available=is_redis_available()) + else: # ZS: After clicking sign-in if 'type' in params and 'uid' in params: user_details = get_user_by_unique_column("user_id", params['uid']) if user_details: @@ -186,31 +200,36 @@ def login(): display_id = user_details['orcid'] else: display_id = "" - flash("Thank you for logging in {}.".format(display_id), "alert-success") + flash("Thank you for logging in {}.".format( + display_id), "alert-success") response = make_response(redirect(url_for('index_page'))) - response.set_cookie(UserSession.user_cookie_name, session_id_signed, max_age=None) + response.set_cookie( + UserSession.user_cookie_name, session_id_signed, max_age=None) else: flash("Something went unexpectedly wrong.", "alert-danger") - response = make_response(redirect(url_for('index_page'))) + response = make_response(redirect(url_for('index_page'))) return response else: - user_details = get_user_by_unique_column("email_address", params['email_address']) + user_details = get_user_by_unique_column( + "email_address", params['email_address']) password_match = False if user_details: submitted_password = params['password'] pwfields = user_details['password'] if isinstance(pwfields, str): pwfields = json.loads(pwfields) - encrypted_pass_fields = encode_password(pwfields, submitted_password) - password_match = pbkdf2.safe_str_cmp(encrypted_pass_fields['password'], pwfields['password']) + encrypted_pass_fields = encode_password( + pwfields, submitted_password) + password_match = pbkdf2.safe_str_cmp( + encrypted_pass_fields['password'], pwfields['password']) - else: # Invalid e-mail + else: # Invalid e-mail flash("Invalid e-mail address. Please try again.", "alert-danger") response = make_response(redirect(url_for('login'))) return response - if password_match: # If password correct - if user_details['confirmed']: # If account confirmed + if password_match: # If password correct + if user_details['confirmed']: # If account confirmed import_col = "false" anon_id = "" if 'import_collections' in params: @@ -218,20 +237,25 @@ def login(): anon_id = params['anon_id'] session_id_signed = get_signed_session_id(user_details) - flash("Thank you for logging in {}.".format(user_details['full_name']), "alert-success") - response = make_response(redirect(url_for('index_page', import_collections = import_col, anon_id = anon_id))) - response.set_cookie(UserSession.user_cookie_name, session_id_signed, max_age=None) + flash("Thank you for logging in {}.".format( + user_details['full_name']), "alert-success") + response = make_response(redirect( + url_for('index_page', import_collections=import_col, anon_id=anon_id))) + response.set_cookie( + UserSession.user_cookie_name, session_id_signed, max_age=None) return response else: - email_ob = send_verification_email(user_details, template_name = "email/user_verification.txt") + email_ob = send_verification_email( + user_details, template_name="email/user_verification.txt") return render_template("newsecurity/verification_still_needed.html", subject=email_ob['subject']) - else: # Incorrect password - #ZS: It previously seemed to store that there was an incorrect log-in attempt here, but it did so in the MySQL DB so this might need to be reproduced with Redis + else: # Incorrect password + # ZS: It previously seemed to store that there was an incorrect log-in attempt here, but it did so in the MySQL DB so this might need to be reproduced with Redis flash("Invalid password. Please try again.", "alert-danger") response = make_response(redirect(url_for('login'))) return response + @app.route("/n/login/github_oauth2", methods=('GET', 'POST')) def github_oauth2(): from utility.tools import GITHUB_CLIENT_ID, GITHUB_CLIENT_SECRET, GITHUB_AUTH_URL @@ -242,34 +266,39 @@ def github_oauth2(): "code": code } - result = requests.post("https://github.com/login/oauth/access_token", json=data) - result_dict = {arr[0]:arr[1] for arr in [tok.split("=") for tok in result.text.split("&")]} + result = requests.post( + "https://github.com/login/oauth/access_token", json=data) + result_dict = {arr[0]: arr[1] + for arr in [tok.split("=") for tok in result.text.split("&")]} github_user = get_github_user_details(result_dict["access_token"]) user_details = get_user_by_unique_column("github_id", github_user["id"]) if user_details == None: user_details = { - "user_id": str(uuid.uuid4()), - "name": github_user["name"].encode("utf-8") if github_user["name"] else "None", + "user_id": str(uuid.uuid4()), + "name": github_user["name"].encode("utf-8") if github_user["name"] else "None", "github_id": github_user["id"], - "user_url": github_user["html_url"].encode("utf-8"), - "login_type": "github", - "organization": "", - "active": 1, + "user_url": github_user["html_url"].encode("utf-8"), + "login_type": "github", + "organization": "", + "active": 1, "confirmed": 1 } save_user(user_details, user_details["user_id"]) - url = "/n/login?type=github&uid="+user_details["user_id"] + url = "/n/login?type=github&uid=" + user_details["user_id"] return redirect(url) + def get_github_user_details(access_token): from utility.tools import GITHUB_API_URL - result = requests.get(GITHUB_API_URL, headers = {'Authorization':'token ' + access_token }).content + result = requests.get(GITHUB_API_URL, headers={ + 'Authorization': 'token ' + access_token}).content return json.loads(result) + @app.route("/n/login/orcid_oauth2", methods=('GET', 'POST')) def orcid_oauth2(): from uuid import uuid4 @@ -279,8 +308,8 @@ def orcid_oauth2(): url = "/n/login" if code: data = { - "client_id": ORCID_CLIENT_ID, - "client_secret": ORCID_CLIENT_SECRET, + "client_id": ORCID_CLIENT_ID, + "client_secret": ORCID_CLIENT_SECRET, "grant_type": "authorization_code", "redirect_uri": GN2_BRANCH_URL + "n/login/orcid_oauth2", "code": code @@ -292,25 +321,27 @@ def orcid_oauth2(): user_details = get_user_by_unique_column("orcid", result_dict["orcid"]) if user_details == None: user_details = { - "user_id": str(uuid4()), - "name": result_dict["name"], - "orcid": result_dict["orcid"], - "user_url": "%s/%s" % ("/".join(ORCID_AUTH_URL.split("/")[:-2]), result_dict["orcid"]), - "login_type": "orcid", - "organization": "", - "active": 1, + "user_id": str(uuid4()), + "name": result_dict["name"], + "orcid": result_dict["orcid"], + "user_url": "%s/%s" % ("/".join(ORCID_AUTH_URL.split("/")[:-2]), result_dict["orcid"]), + "login_type": "orcid", + "organization": "", + "active": 1, "confirmed": 1 } save_user(user_details, user_details["user_id"]) - url = "/n/login?type=orcid&uid="+user_details["user_id"] + url = "/n/login?type=orcid&uid=" + user_details["user_id"] else: flash("There was an error getting code from ORCID") return redirect(url) + def get_github_user_details(access_token): from utility.tools import GITHUB_API_URL - result = requests.get(GITHUB_API_URL, headers = {'Authorization':'token ' + access_token }).content + result = requests.get(GITHUB_API_URL, headers={ + 'Authorization': 'token ' + access_token}).content return json.loads(result) @@ -325,6 +356,7 @@ def logout(): response.set_cookie(UserSession.user_cookie_name, '', expires=0) return response + @app.route("/n/forgot_password", methods=['GET']) def forgot_password(): """Entry point for forgotten password""" @@ -333,15 +365,16 @@ def forgot_password(): print("ERRORS: ", errors) return render_template("new_security/forgot_password.html", errors=errors) + def send_forgot_password_email(verification_email): from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText - template_name = "email/forgot_password.txt" + template_name = "email/forgot_password.txt" key_prefix = "forgot_password_code" subject = "GeneNetwork password reset" fromaddr = "no-reply@genenetwork.org" - + verification_code = str(uuid.uuid4()) key = key_prefix + ":" + verification_code @@ -353,7 +386,7 @@ def send_forgot_password_email(verification_email): save_verification_code(verification_email, verification_code) - body = render_template(template_name, verification_code = verification_code) + body = render_template(template_name, verification_code=verification_code) msg = MIMEMultipart() msg["To"] = verification_email @@ -365,6 +398,7 @@ def send_forgot_password_email(verification_email): return subject + @app.route("/n/forgot_password_submit", methods=('POST',)) def forgot_password_submit(): """When a forgotten password form is submitted we get here""" @@ -373,19 +407,23 @@ def forgot_password_submit(): next_page = None if email_address != "": logger.debug("Wants to send password E-mail to ", email_address) - user_details = get_user_by_unique_column("email_address", email_address) + user_details = get_user_by_unique_column( + "email_address", email_address) if user_details: - email_subject = send_forgot_password_email(user_details["email_address"]) + email_subject = send_forgot_password_email( + user_details["email_address"]) return render_template("new_security/forgot_password_step2.html", subject=email_subject) else: - flash("The e-mail entered is not associated with an account.", "alert-danger") + flash("The e-mail entered is not associated with an account.", + "alert-danger") return redirect(url_for("forgot_password")) else: flash("You MUST provide an email", "alert-danger") return redirect(url_for("forgot_password")) + @app.route("/n/password_reset", methods=['GET']) def password_reset(): """Entry point after user clicks link in E-mail""" @@ -400,11 +438,13 @@ def password_reset(): return render_template( "new_security/password_reset.html", user_encode=user_details["email_address"]) else: - flash("Invalid code: Password reset code does not exist or might have expired!", "error") + flash( + "Invalid code: Password reset code does not exist or might have expired!", "error") return redirect(url_for("login")) else: return redirect(url_for("login")) + @app.route("/n/password_reset_step2", methods=('POST',)) def password_reset_step2(): """Handle confirmation E-mail for password reset""" @@ -422,44 +462,52 @@ def password_reset_step2(): flash("Password changed successfully. You can now sign in.", "alert-info") return redirect(url_for('login')) + def register_user(params): - thank_you_mode = False - errors = [] - user_details = {} + thank_you_mode = False + errors = [] + user_details = {} - user_details['email_address'] = params.get('email_address', '').encode("utf-8").strip() - if not (5 <= len(user_details['email_address']) <= 50): - errors.append('Email Address needs to be between 5 and 50 characters.') - else: - email_exists = get_user_by_unique_column("email_address", user_details['email_address']) - if email_exists: - errors.append('User already exists with that email') + user_details['email_address'] = params.get( + 'email_address', '').encode("utf-8").strip() + if not (5 <= len(user_details['email_address']) <= 50): + errors.append( + 'Email Address needs to be between 5 and 50 characters.') + else: + email_exists = get_user_by_unique_column( + "email_address", user_details['email_address']) + if email_exists: + errors.append('User already exists with that email') - user_details['full_name'] = params.get('full_name', '').encode("utf-8").strip() - if not (5 <= len(user_details['full_name']) <= 50): - errors.append('Full Name needs to be between 5 and 50 characters.') + user_details['full_name'] = params.get( + 'full_name', '').encode("utf-8").strip() + if not (5 <= len(user_details['full_name']) <= 50): + errors.append('Full Name needs to be between 5 and 50 characters.') - user_details['organization'] = params.get('organization', '').encode("utf-8").strip() - if user_details['organization'] and not (5 <= len(user_details['organization']) <= 50): - errors.append('Organization needs to be empty or between 5 and 50 characters.') + user_details['organization'] = params.get( + 'organization', '').encode("utf-8").strip() + if user_details['organization'] and not (5 <= len(user_details['organization']) <= 50): + errors.append( + 'Organization needs to be empty or between 5 and 50 characters.') - password = str(params.get('password', '')) - if not (6 <= len(password)): - errors.append('Password needs to be at least 6 characters.') + password = str(params.get('password', '')) + if not (6 <= len(password)): + errors.append('Password needs to be at least 6 characters.') - if params.get('password_confirm') != password: - errors.append("Passwords don't match.") + if params.get('password_confirm') != password: + errors.append("Passwords don't match.") - user_details['password'] = set_password(password) - user_details['user_id'] = str(uuid.uuid4()) - user_details['confirmed'] = 1 + user_details['password'] = set_password(password) + user_details['user_id'] = str(uuid.uuid4()) + user_details['confirmed'] = 1 - user_details['registration_info'] = basic_info() + user_details['registration_info'] = basic_info() - if len(errors) == 0: - save_user(user_details, user_details['user_id']) + if len(errors) == 0: + save_user(user_details, user_details['user_id']) + + return errors - return errors @app.route("/n/register", methods=('GET', 'POST')) def register(): @@ -473,11 +521,13 @@ def register(): errors = register_user(params) if len(errors) == 0: - flash("Registration successful. You may login with your new account", "alert-info") + flash( + "Registration successful. You may login with your new account", "alert-info") return redirect(url_for("login")) return render_template("new_security/register_user.html", values=params, errors=errors) + @app.errorhandler(401) def unauthorized(error): return redirect(url_for('login')) diff --git a/wqflask/wqflask/user_manager.py b/wqflask/wqflask/user_manager.py index fcec3b67..cf84ea73 100644 --- a/wqflask/wqflask/user_manager.py +++ b/wqflask/wqflask/user_manager.py @@ -74,11 +74,11 @@ class AnonUser: self.key = "anon_collection:v1:{}".format(self.anon_id) def add_collection(self, new_collection): - collection_dict = dict(name = new_collection.name, - created_timestamp = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), - changed_timestamp = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), - num_members = new_collection.num_members, - members = new_collection.get_members()) + collection_dict = dict(name=new_collection.name, + created_timestamp=datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), + changed_timestamp=datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), + num_members=new_collection.num_members, + members=new_collection.get_members()) Redis.set(self.key, json.dumps(collection_dict)) Redis.expire(self.key, 60 * 60 * 24 * 365) @@ -93,8 +93,10 @@ class AnonUser: this_collection = {} this_collection['id'] = collection['id'] this_collection['name'] = collection['name'] - this_collection['created_timestamp'] = collection['created_timestamp'].strftime('%b %d %Y %I:%M%p') - this_collection['changed_timestamp'] = collection['changed_timestamp'].strftime('%b %d %Y %I:%M%p') + this_collection['created_timestamp'] = collection['created_timestamp'].strftime( + '%b %d %Y %I:%M%p') + this_collection['changed_timestamp'] = collection['changed_timestamp'].strftime( + '%b %d %Y %I:%M%p') this_collection['num_members'] = collection['num_members'] this_collection['members'] = collection['members'] updated_collections.append(this_collection) @@ -108,21 +110,26 @@ class AnonUser: else: collections = json.loads(json_collections) for collection in collections: - collection['created_timestamp'] = datetime.datetime.strptime(collection['created_timestamp'], '%b %d %Y %I:%M%p') - collection['changed_timestamp'] = datetime.datetime.strptime(collection['changed_timestamp'], '%b %d %Y %I:%M%p') + collection['created_timestamp'] = datetime.datetime.strptime( + collection['created_timestamp'], '%b %d %Y %I:%M%p') + collection['changed_timestamp'] = datetime.datetime.strptime( + collection['changed_timestamp'], '%b %d %Y %I:%M%p') - collections = sorted(collections, key = lambda i: i['changed_timestamp'], reverse = True) + collections = sorted( + collections, key=lambda i: i['changed_timestamp'], reverse=True) return collections def import_traits_to_user(self): result = Redis.get(self.key) collections_list = json.loads(result if result else "[]") for collection in collections_list: - collection_exists = g.user_session.get_collection_by_name(collection['name']) + collection_exists = g.user_session.get_collection_by_name( + collection['name']) if collection_exists: continue else: - g.user_session.add_collection(collection['name'], collection['members']) + g.user_session.add_collection( + collection['name'], collection['members']) def display_num_collections(self): """ @@ -148,9 +155,11 @@ def verify_cookie(cookie): the_uuid, separator, the_signature = cookie.partition(':') assert len(the_uuid) == 36, "Is session_id a uuid?" assert separator == ":", "Expected a : here" - assert the_signature == actual_hmac_creation(the_uuid), "Uh-oh, someone tampering with the cookie?" + assert the_signature == actual_hmac_creation( + the_uuid), "Uh-oh, someone tampering with the cookie?" return the_uuid + def create_signed_cookie(): the_uuid = str(uuid.uuid4()) signature = actual_hmac_creation(the_uuid) @@ -158,6 +167,7 @@ def create_signed_cookie(): logger.debug("uuid_signed:", uuid_signed) return the_uuid, uuid_signed + class UserSession: """Logged in user handling""" @@ -182,13 +192,13 @@ class UserSession: # weekend and the site hasn't been visited by the user self.logged_in = False - ########### Grrr...this won't work because of the way flask handles cookies + # Grrr...this won't work because of the way flask handles cookies # Delete the cookie #response = make_response(redirect(url_for('login'))) #response.set_cookie(self.cookie_name, '', expires=0) - #flash( + # flash( # "Due to inactivity your session has expired. If you'd like please login again.") - #return response + # return response return if Redis.ttl(self.redis_key) < THREE_DAYS: @@ -213,7 +223,7 @@ class UserSession: user_email = self.record['user_email_address'] - #ZS: Get user's collections if they exist + # ZS: Get user's collections if they exist user_id = None user_id = get_user_id("email_address", user_email) return user_id @@ -230,7 +240,7 @@ class UserSession: def user_collections(self): """List of user's collections""" - #ZS: Get user's collections if they exist + # ZS: Get user's collections if they exist collections = get_user_collections(self.redis_user_id) return collections @@ -248,7 +258,7 @@ class UserSession: 'created_timestamp': datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), 'changed_timestamp': datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), 'num_members': len(traits), - 'members': list(traits) } + 'members': list(traits)} current_collections = self.user_collections current_collections.append(collection_dict) @@ -280,7 +290,8 @@ class UserSession: updated_collection['members'] = updated_traits updated_collection['num_members'] = len(updated_traits) - updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime( + '%b %d %Y %I:%M%p') updated_collections = [] for collection in self.user_collections: @@ -306,7 +317,8 @@ class UserSession: updated_collection['members'] = updated_traits updated_collection['num_members'] = len(updated_traits) - updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime( + '%b %d %Y %I:%M%p') updated_collections = [] for collection in self.user_collections: @@ -341,23 +353,29 @@ class UserSession: Redis.delete(self.cookie_name) logger.debug("At end of delete_session") + @app.before_request def get_cookie(): logger.info("@app.before_request get cookie") g.user_session = UserSession() g.cookie_session = AnonUser() -#@app.after_request +# @app.after_request + + def set_cookie(response): if not request.cookies.get(g.cookie_session.cookie_name): - response.set_cookie(g.cookie_session.cookie_name, g.cookie_session.cookie) + response.set_cookie(g.cookie_session.cookie_name, + g.cookie_session.cookie) return response + class UsersManager: def __init__(self): self.users = model.User.query.all() logger.debug("Users are:", self.users) + class UserManager: def __init__(self, kw): self.user_id = kw['user_id'] @@ -383,22 +401,28 @@ class RegisterUser: self.errors = [] self.user = Bunch() - self.user.email_address = kw.get('email_address', '').encode("utf-8").strip() + self.user.email_address = kw.get( + 'email_address', '').encode("utf-8").strip() if not (5 <= len(self.user.email_address) <= 50): - self.errors.append('Email Address needs to be between 5 and 50 characters.') + self.errors.append( + 'Email Address needs to be between 5 and 50 characters.') else: - email_exists = get_user_by_unique_column("email_address", self.user.email_address) + email_exists = get_user_by_unique_column( + "email_address", self.user.email_address) #email_exists = get_user_by_unique_column(es, "email_address", self.user.email_address) if email_exists: self.errors.append('User already exists with that email') self.user.full_name = kw.get('full_name', '').encode("utf-8").strip() if not (5 <= len(self.user.full_name) <= 50): - self.errors.append('Full Name needs to be between 5 and 50 characters.') + self.errors.append( + 'Full Name needs to be between 5 and 50 characters.') - self.user.organization = kw.get('organization', '').encode("utf-8").strip() + self.user.organization = kw.get( + 'organization', '').encode("utf-8").strip() if self.user.organization and not (5 <= len(self.user.organization) <= 50): - self.errors.append('Organization needs to be empty or between 5 and 50 characters.') + self.errors.append( + 'Organization needs to be empty or between 5 and 50 characters.') password = str(kw.get('password', '')) if not (6 <= len(password)): @@ -419,6 +443,7 @@ class RegisterUser: self.user.registration_info = json.dumps(basic_info(), sort_keys=True) save_user(self.user.__dict__, self.user.user_id) + def set_password(password, user): pwfields = Bunch() @@ -450,12 +475,12 @@ def set_password(password, user): pwfields.encrypt_time = enc_password.encrypt_time user.password = json.dumps(pwfields.__dict__, - sort_keys=True, - ) + sort_keys=True, + ) class VerificationEmail: - template_name = "email/verification.txt" + template_name = "email/verification.txt" key_prefix = "verification_code" subject = "GeneNetwork email verification" @@ -473,9 +498,10 @@ class VerificationEmail: to = user.email_address subject = self.subject body = render_template(self.template_name, - verification_code = verification_code) + verification_code=verification_code) send_email(to, subject, body) + class ForgotPasswordEmail(VerificationEmail): template_name = "email/forgot_password.txt" key_prefix = "forgot_password_code" @@ -496,11 +522,10 @@ class ForgotPasswordEmail(VerificationEmail): save_verification_code(toaddr, verification_code) - subject = self.subject body = render_template( self.template_name, - verification_code = verification_code) + verification_code=verification_code) msg = MIMEMultipart() msg["To"] = toaddr @@ -525,11 +550,13 @@ class Password: def basic_info(): - return dict(timestamp = timestamp(), - ip_address = request.remote_addr, - user_agent = request.headers.get('User-Agent')) + return dict(timestamp=timestamp(), + ip_address=request.remote_addr, + user_agent=request.headers.get('User-Agent')) + +# @app.route("/manage/verify_email") + -#@app.route("/manage/verify_email") def verify_email(): user = DecodeUser(VerificationEmail.key_prefix).user user.confirmed = json.dumps(basic_info(), sort_keys=True) @@ -543,7 +570,9 @@ def verify_email(): response.set_cookie(UserSession.cookie_name, session_id_signed) return response -#@app.route("/n/password_reset", methods=['GET']) +# @app.route("/n/password_reset", methods=['GET']) + + def password_reset(): """Entry point after user clicks link in E-mail""" logger.debug("in password_reset request.url is:", request.url) @@ -556,18 +585,22 @@ def password_reset(): if verification_code: user_email = check_verification_code(verification_code) if user_email: - user_details = get_user_by_unique_column('email_address', user_email) + user_details = get_user_by_unique_column( + 'email_address', user_email) if user_details: return render_template( "new_security/password_reset.html", user_encode=user_details["user_id"]) else: flash("Invalid code: User no longer exists!", "error") else: - flash("Invalid code: Password reset code does not exist or might have expired!", "error") + flash( + "Invalid code: Password reset code does not exist or might have expired!", "error") else: return redirect(url_for("login")) -#@app.route("/n/password_reset_step2", methods=('POST',)) +# @app.route("/n/password_reset_step2", methods=('POST',)) + + def password_reset_step2(): """Handle confirmation E-mail for password reset""" logger.debug("in password_reset request.url is:", request.url) @@ -577,7 +610,6 @@ def password_reset_step2(): logger.debug("locals are:", locals()) - user = Bunch() password = request.form['password'] set_password(password, user) @@ -589,6 +621,7 @@ def password_reset_step2(): return response + class DecodeUser: def __init__(self, code_prefix): @@ -611,7 +644,9 @@ class DecodeUser: logger.debug("data is:", data) return model.User.query.get(data['id']) -#@app.route("/n/login", methods=('GET', 'POST')) +# @app.route("/n/login", methods=('GET', 'POST')) + + def login(): lu = LoginUser() login_type = request.args.get("type") @@ -621,7 +656,9 @@ def login(): else: return lu.standard_login() -#@app.route("/n/login/github_oauth2", methods=('GET', 'POST')) +# @app.route("/n/login/github_oauth2", methods=('GET', 'POST')) + + def github_oauth2(): from utility.tools import GITHUB_CLIENT_ID, GITHUB_CLIENT_SECRET code = request.args.get("code") @@ -630,29 +667,26 @@ def github_oauth2(): "client_secret": GITHUB_CLIENT_SECRET, "code": code } - result = requests.post("https://github.com/login/oauth/access_token", json=data) - result_dict = {arr[0]:arr[1] for arr in [tok.split("=") for tok in [token.encode("utf-8") for token in result.text.split("&")]]} + result = requests.post( + "https://github.com/login/oauth/access_token", json=data) + result_dict = {arr[0]: arr[1] for arr in [tok.split( + "=") for tok in [token.encode("utf-8") for token in result.text.split("&")]]} github_user = get_github_user_details(result_dict["access_token"]) user_details = get_user_by_unique_column("github_id", github_user["id"]) if user_details == None: user_details = { - "user_id": str(uuid.uuid4()) - , "name": github_user["name"].encode("utf-8") - , "github_id": github_user["id"] - , "user_url": github_user["html_url"].encode("utf-8") - , "login_type": "github" - , "organization": "" - , "active": 1 - , "confirmed": 1 + "user_id": str(uuid.uuid4()), "name": github_user["name"].encode("utf-8"), "github_id": github_user["id"], "user_url": github_user["html_url"].encode("utf-8"), "login_type": "github", "organization": "", "active": 1, "confirmed": 1 } save_user(user_details, user_details["user_id"]) - url = "/n/login?type=github&uid="+user_details["user_id"] + url = "/n/login?type=github&uid=" + user_details["user_id"] return redirect(url) -#@app.route("/n/login/orcid_oauth2", methods=('GET', 'POST')) +# @app.route("/n/login/orcid_oauth2", methods=('GET', 'POST')) + + def orcid_oauth2(): from uuid import uuid4 from utility.tools import ORCID_CLIENT_ID, ORCID_CLIENT_SECRET, ORCID_TOKEN_URL, ORCID_AUTH_URL @@ -661,10 +695,7 @@ def orcid_oauth2(): url = "/n/login" if code: data = { - "client_id": ORCID_CLIENT_ID - , "client_secret": ORCID_CLIENT_SECRET - , "grant_type": "authorization_code" - , "code": code + "client_id": ORCID_CLIENT_ID, "client_secret": ORCID_CLIENT_SECRET, "grant_type": "authorization_code", "code": code } result = requests.post(ORCID_TOKEN_URL, data=data) result_dict = json.loads(result.text.encode("utf-8")) @@ -672,31 +703,27 @@ def orcid_oauth2(): user_details = get_user_by_unique_column("orcid", result_dict["orcid"]) if user_details == None: user_details = { - "user_id": str(uuid4()) - , "name": result_dict["name"] - , "orcid": result_dict["orcid"] - , "user_url": "%s/%s" % ( + "user_id": str(uuid4()), "name": result_dict["name"], "orcid": result_dict["orcid"], "user_url": "%s/%s" % ( "/".join(ORCID_AUTH_URL.split("/")[:-2]), - result_dict["orcid"]) - , "login_type": "orcid" - , "organization": "" - , "active": 1 - , "confirmed": 1 + result_dict["orcid"]), "login_type": "orcid", "organization": "", "active": 1, "confirmed": 1 } save_user(user_details, user_details["user_id"]) - url = "/n/login?type=orcid&uid="+user_details["user_id"] + url = "/n/login?type=orcid&uid=" + user_details["user_id"] else: flash("There was an error getting code from ORCID") return redirect(url) + def get_github_user_details(access_token): from utility.tools import GITHUB_API_URL - result = requests.get(GITHUB_API_URL, params={"access_token":access_token}) + result = requests.get(GITHUB_API_URL, params={ + "access_token": access_token}) return result.json() + class LoginUser: - remember_time = 60 * 60 * 24 * 30 # One month in seconds + remember_time = 60 * 60 * 24 * 30 # One month in seconds def __init__(self): self.remember_me = False @@ -730,19 +757,18 @@ class LoginUser: external_login["orcid"] = ORCID_AUTH_URL return render_template( - "new_security/login_user.html" - , external_login=external_login - , redis_is_available = is_redis_available()) + "new_security/login_user.html", external_login=external_login, redis_is_available=is_redis_available()) else: - user_details = get_user_by_unique_column("email_address", params["email_address"]) + user_details = get_user_by_unique_column( + "email_address", params["email_address"]) #user_details = get_user_by_unique_column(es, "email_address", params["email_address"]) user = None valid = None if user_details: - user = model.User(); + user = model.User() for key in user_details: user.__dict__[key] = user_details[key] - valid = False; + valid = False submitted_password = params['password'] pwfields = Struct(json.loads(user.password)) @@ -752,8 +778,10 @@ class LoginUser: pwfields.iterations, pwfields.keylength, pwfields.hashfunc) - logger.debug("\n\nComparing:\n{}\n{}\n".format(encrypted.password, pwfields.password)) - valid = pbkdf2.safe_str_cmp(encrypted.password, pwfields.password) + logger.debug("\n\nComparing:\n{}\n{}\n".format( + encrypted.password, pwfields.password)) + valid = pbkdf2.safe_str_cmp( + encrypted.password, pwfields.password) logger.debug("valid is:", valid) if valid and not user.confirmed: @@ -770,7 +798,7 @@ class LoginUser: else: import_col = "false" - #g.cookie_session.import_traits_to_user() + # g.cookie_session.import_traits_to_user() self.logged_in = True @@ -779,7 +807,8 @@ class LoginUser: else: if user: self.unsuccessful_login(user) - flash("Invalid email-address or password. Please try again.", "alert-danger") + flash("Invalid email-address or password. Please try again.", + "alert-danger") response = make_response(redirect(url_for('login'))) return response @@ -787,14 +816,17 @@ class LoginUser: def actual_login(self, user, assumed_by=None, import_collections=None): """The meat of the logging in process""" session_id_signed = self.successful_login(user, assumed_by) - flash("Thank you for logging in {}.".format(user.full_name), "alert-success") - response = make_response(redirect(url_for('index_page', import_collections=import_collections))) + flash("Thank you for logging in {}.".format( + user.full_name), "alert-success") + response = make_response( + redirect(url_for('index_page', import_collections=import_collections))) if self.remember_me: max_age = self.remember_time else: max_age = None - response.set_cookie(UserSession.cookie_name, session_id_signed, max_age=max_age) + response.set_cookie(UserSession.cookie_name, + session_id_signed, max_age=max_age) return response def successful_login(self, user, assumed_by=None): @@ -810,10 +842,10 @@ class LoginUser: if not user.id: user.id = '' - session = dict(login_time = time.time(), - user_id = user.id, - user_name = user.full_name, - user_email_address = user.email_address) + session = dict(login_time=time.time(), + user_id=user.id, + user_name=user.full_name, + user_email_address=user.email_address) key = UserSession.cookie_name + ":" + login_rec.session_id logger.debug("Key when signing:", key) @@ -832,7 +864,9 @@ class LoginUser: db_session.add(login_rec) db_session.commit() -#@app.route("/n/logout") +# @app.route("/n/logout") + + def logout(): logger.debug("Logging out...") UserSession().delete_session() @@ -843,7 +877,7 @@ def logout(): return response -#@app.route("/n/forgot_password", methods=['GET']) +# @app.route("/n/forgot_password", methods=['GET']) def forgot_password(): """Entry point for forgotten password""" print("ARGS: ", request.args) @@ -851,7 +885,9 @@ def forgot_password(): print("ERRORS: ", errors) return render_template("new_security/forgot_password.html", errors=errors) -#@app.route("/n/forgot_password_submit", methods=('POST',)) +# @app.route("/n/forgot_password_submit", methods=('POST',)) + + def forgot_password_submit(): """When a forgotten password form is submitted we get here""" params = request.form @@ -859,23 +895,27 @@ def forgot_password_submit(): next_page = None if email_address != "": logger.debug("Wants to send password E-mail to ", email_address) - user_details = get_user_by_unique_column("email_address", email_address) + user_details = get_user_by_unique_column( + "email_address", email_address) if user_details: ForgotPasswordEmail(user_details["email_address"]) return render_template("new_security/forgot_password_step2.html", subject=ForgotPasswordEmail.subject) else: - flash("The e-mail entered is not associated with an account.", "alert-danger") + flash("The e-mail entered is not associated with an account.", + "alert-danger") return redirect(url_for("forgot_password")) else: flash("You MUST provide an email", "alert-danger") return redirect(url_for("forgot_password")) + @app.errorhandler(401) def unauthorized(error): return redirect(url_for('login')) + def is_redis_available(): try: Redis.ping() @@ -886,7 +926,7 @@ def is_redis_available(): ### # ZS: The following 6 functions require the old MySQL User accounts; I'm leaving them commented out just in case we decide to reimplement them using ElasticSearch ### -#def super_only(): +# def super_only(): # try: # superuser = g.user_session.user_ob.superuser # except AttributeError: @@ -895,26 +935,26 @@ def is_redis_available(): # flash("You must be a superuser to access that page.", "alert-error") # abort(401) -#@app.route("/manage/users") -#def manage_users(): +# @app.route("/manage/users") +# def manage_users(): # super_only() # template_vars = UsersManager() # return render_template("admin/user_manager.html", **template_vars.__dict__) -#@app.route("/manage/user") -#def manage_user(): +# @app.route("/manage/user") +# def manage_user(): # super_only() # template_vars = UserManager(request.args) # return render_template("admin/ind_user_manager.html", **template_vars.__dict__) -#@app.route("/manage/groups") -#def manage_groups(): +# @app.route("/manage/groups") +# def manage_groups(): # super_only() # template_vars = GroupsManager(request.args) # return render_template("admin/group_manager.html", **template_vars.__dict__) -#@app.route("/manage/make_superuser") -#def make_superuser(): +# @app.route("/manage/make_superuser") +# def make_superuser(): # super_only() # params = request.args # user_id = params['user_id'] @@ -926,8 +966,8 @@ def is_redis_available(): # flash("We've made {} a superuser!".format(user.name_and_org)) # return redirect(url_for("manage_users")) -#@app.route("/manage/assume_identity") -#def assume_identity(): +# @app.route("/manage/assume_identity") +# def assume_identity(): # super_only() # params = request.args # user_id = params['user_id'] @@ -936,12 +976,11 @@ def is_redis_available(): # return LoginUser().actual_login(user, assumed_by=assumed_by) -#@app.route("/n/register", methods=('GET', 'POST')) +# @app.route("/n/register", methods=('GET', 'POST')) def register(): params = None errors = None - params = request.form if request.form else request.args params = params.to_dict(flat=True) @@ -951,7 +990,8 @@ def register(): errors = result.errors if len(errors) == 0: - flash("Registration successful. You may login with your new account", "alert-info") + flash( + "Registration successful. You may login with your new account", "alert-info") return redirect(url_for("login")) return render_template("new_security/register_user.html", values=params, errors=errors) @@ -971,6 +1011,7 @@ def url_for_hmac(endpoint, **values): combiner = "?" return url + combiner + "hm=" + hm + def data_hmac(stringy): """Takes arbitray data string and appends :hmac so we know data hasn't been tampered with""" return stringy + ":" + actual_hmac_creation(stringy) @@ -993,6 +1034,7 @@ def verify_url_hmac(url): assert hm == hmac, "Unexpected url (stage 3)" + def actual_hmac_creation(stringy): """Helper function to create the actual hmac""" @@ -1005,6 +1047,7 @@ def actual_hmac_creation(stringy): hm = hm[:20] return hm + app.jinja_env.globals.update(url_for_hmac=url_for_hmac, data_hmac=data_hmac) @@ -1017,27 +1060,29 @@ app.jinja_env.globals.update(url_for_hmac=url_for_hmac, # Body=body)) # Redis.rpush("mail_queue", msg) + def send_email(toaddr, msg, fromaddr="no-reply@genenetwork.org"): """Send an E-mail through SMTP_CONNECT host. If SMTP_USERNAME is not 'UNKNOWN' TLS is used """ if SMTP_USERNAME == 'UNKNOWN': - logger.debug("SMTP: connecting with host "+SMTP_CONNECT) + logger.debug("SMTP: connecting with host " + SMTP_CONNECT) server = SMTP(SMTP_CONNECT) server.sendmail(fromaddr, toaddr, msg) else: - logger.debug("SMTP: connecting TLS with host "+SMTP_CONNECT) + logger.debug("SMTP: connecting TLS with host " + SMTP_CONNECT) server = SMTP(SMTP_CONNECT) server.starttls() - logger.debug("SMTP: login with user "+SMTP_USERNAME) + logger.debug("SMTP: login with user " + SMTP_USERNAME) server.login(SMTP_USERNAME, SMTP_PASSWORD) - logger.debug("SMTP: "+fromaddr) - logger.debug("SMTP: "+toaddr) - logger.debug("SMTP: "+msg) + logger.debug("SMTP: " + fromaddr) + logger.debug("SMTP: " + toaddr) + logger.debug("SMTP: " + msg) server.sendmail(fromaddr, toaddr, msg) server.quit() - logger.info("Successfully sent email to "+toaddr) + logger.info("Successfully sent email to " + toaddr) + class GroupsManager: def __init__(self, kw): diff --git a/wqflask/wqflask/user_session.py b/wqflask/wqflask/user_session.py index cc0ac744..67e2e158 100644 --- a/wqflask/wqflask/user_session.py +++ b/wqflask/wqflask/user_session.py @@ -20,30 +20,36 @@ logger = getLogger(__name__) THREE_DAYS = 60 * 60 * 24 * 3 THIRTY_DAYS = 60 * 60 * 24 * 30 + @app.before_request def get_user_session(): logger.info("@app.before_request get_session") g.user_session = UserSession() - #ZS: I think this should solve the issue of deleting the cookie and redirecting to the home page when a user's session has expired + # ZS: I think this should solve the issue of deleting the cookie and redirecting to the home page when a user's session has expired if not g.user_session: response = make_response(redirect(url_for('login'))) response.set_cookie('session_id_v2', '', expires=0) return response + @app.after_request def set_user_session(response): if hasattr(g, 'user_session'): if not request.cookies.get(g.user_session.cookie_name): - response.set_cookie(g.user_session.cookie_name, g.user_session.cookie) + response.set_cookie(g.user_session.cookie_name, + g.user_session.cookie) return response + def verify_cookie(cookie): the_uuid, separator, the_signature = cookie.partition(':') assert len(the_uuid) == 36, "Is session_id a uuid?" assert separator == ":", "Expected a : here" - assert the_signature == hmac.hmac_creation(the_uuid), "Uh-oh, someone tampering with the cookie?" + assert the_signature == hmac.hmac_creation( + the_uuid), "Uh-oh, someone tampering with the cookie?" return the_uuid + def create_signed_cookie(): the_uuid = str(uuid.uuid4()) signature = hmac.hmac_creation(the_uuid) @@ -51,17 +57,21 @@ def create_signed_cookie(): logger.debug("uuid_signed:", uuid_signed) return the_uuid, uuid_signed -@app.route("/user/manage", methods=('GET','POST')) + +@app.route("/user/manage", methods=('GET', 'POST')) def manage_user(): params = request.form if request.form else request.args if 'new_full_name' in params: - set_user_attribute(g.user_session.user_id, 'full_name', params['new_full_name']) + set_user_attribute(g.user_session.user_id, + 'full_name', params['new_full_name']) if 'new_organization' in params: - set_user_attribute(g.user_session.user_id, 'organization', params['new_organization']) + set_user_attribute(g.user_session.user_id, + 'organization', params['new_organization']) user_details = get_user_by_unique_column("user_id", g.user_session.user_id) - return render_template("admin/manage_user.html", user_details = user_details) + return render_template("admin/manage_user.html", user_details=user_details) + class UserSession: """Logged in user handling""" @@ -89,25 +99,26 @@ class UserSession: self.session_id = session_id self.record = Redis.hgetall(self.redis_key) - #ZS: If user correctled logged in but their session expired - #ZS: Need to test this by setting the time-out to be really short or something + # ZS: If user correctled logged in but their session expired + # ZS: Need to test this by setting the time-out to be really short or something if not self.record or self.record == []: if user_cookie: self.logged_in = False - self.record = dict(login_time = time.time(), - user_type = "anon", - user_id = str(uuid.uuid4())) + self.record = dict(login_time=time.time(), + user_type="anon", + user_id=str(uuid.uuid4())) Redis.hmset(self.redis_key, self.record) Redis.expire(self.redis_key, THIRTY_DAYS) - ########### Grrr...this won't work because of the way flask handles cookies + # Grrr...this won't work because of the way flask handles cookies # Delete the cookie - flash("Due to inactivity your session has expired. If you'd like please login again.") + flash( + "Due to inactivity your session has expired. If you'd like please login again.") return None else: - self.record = dict(login_time = time.time(), - user_type = "anon", - user_id = str(uuid.uuid4())) + self.record = dict(login_time=time.time(), + user_type="anon", + user_id=str(uuid.uuid4())) Redis.hmset(self.redis_key, self.record) Redis.expire(self.redis_key, THIRTY_DAYS) else: @@ -138,13 +149,13 @@ class UserSession: def redis_user_id(self): """User id from Redis (need to check if this is the same as the id stored in self.records)""" - #ZS: This part is a bit weird. Some accounts used to not have saved user ids, and in the process of testing I think I created some duplicate accounts for myself. - #ZS: Accounts should automatically generate user_ids if they don't already have one now, so this might not be necessary for anything other than my account's collections + # ZS: This part is a bit weird. Some accounts used to not have saved user ids, and in the process of testing I think I created some duplicate accounts for myself. + # ZS: Accounts should automatically generate user_ids if they don't already have one now, so this might not be necessary for anything other than my account's collections if 'user_email_address' in self.record: user_email = self.record['user_email_address'] - #ZS: Get user's collections if they exist + # ZS: Get user's collections if they exist user_id = None user_id = get_user_id("email_address", user_email) elif 'user_id' in self.record: @@ -153,7 +164,7 @@ class UserSession: user_github_id = self.record['github_id'] user_id = None user_id = get_user_id("github_id", user_github_id) - else: #ZS: Anonymous user + else: # ZS: Anonymous user return None return user_id @@ -170,9 +181,11 @@ class UserSession: def user_collections(self): """List of user's collections""" - #ZS: Get user's collections if they exist + # ZS: Get user's collections if they exist collections = get_user_collections(self.user_id) - collections = [item for item in collections if item['name'] != "Your Default Collection"] + [item for item in collections if item['name'] == "Your Default Collection"] #ZS: Ensure Default Collection is last in list + collections = [item for item in collections if item['name'] != "Your Default Collection"] + \ + [item for item in collections if item['name'] + == "Your Default Collection"] # ZS: Ensure Default Collection is last in list return collections @property @@ -189,7 +202,7 @@ class UserSession: 'created_timestamp': datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), 'changed_timestamp': datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), 'num_members': len(traits), - 'members': list(traits) } + 'members': list(traits)} current_collections = self.user_collections current_collections.append(collection_dict) @@ -228,12 +241,14 @@ class UserSession: this_collection = self.get_collection_by_id(collection_id) updated_collection = this_collection - current_members_minus_new = [member for member in this_collection['members'] if member not in traits_to_add] + current_members_minus_new = [ + member for member in this_collection['members'] if member not in traits_to_add] updated_traits = traits_to_add + current_members_minus_new updated_collection['members'] = updated_traits updated_collection['num_members'] = len(updated_traits) - updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime( + '%b %d %Y %I:%M%p') updated_collections = [] for collection in self.user_collections: @@ -259,7 +274,8 @@ class UserSession: updated_collection['members'] = updated_traits updated_collection['num_members'] = len(updated_traits) - updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime( + '%b %d %Y %I:%M%p') updated_collections = [] for collection in self.user_collections: @@ -302,5 +318,3 @@ class UserSession: # And more importantly delete the redis record Redis.delete(self.redis_key) self.logged_in = False - - diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 37f2094f..f9b8f310 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -140,8 +140,8 @@ def handle_bad_request(e): logger.error(traceback.format_exc()) now = datetime.datetime.utcnow() time_str = now.strftime('%l:%M%p UTC %b %d, %Y') - formatted_lines = [request.url + - " ("+time_str+")"]+traceback.format_exc().splitlines() + formatted_lines = [request.url + + " (" + time_str + ")"] + traceback.format_exc().splitlines() # Handle random animations # Use a cookie to have one animation on refresh @@ -240,7 +240,7 @@ def search_page(): if USE_REDIS and valid_search: Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL)) - Redis.expire(key, 60*60) + Redis.expire(key, 60 * 60) if valid_search: return render_template("search_result_page.html", **result) @@ -518,10 +518,10 @@ def export_perm_data(): ["#N_genotypes: " + str(perm_info['n_genotypes'])], ["#Genotype_file: " + perm_info['genofile']], ["#Units_linkage: " + perm_info['units_linkage']], - ["#Permutation_stratified_by: " + - ", ".join([str(cofactor) for cofactor in perm_info['strat_cofactors']])], - ["#RESULTS_1: Suggestive LRS(p=0.63) = " + - str(np.percentile(np.array(perm_info['perm_data']), 67))], + ["#Permutation_stratified_by: " + + ", ".join([str(cofactor) for cofactor in perm_info['strat_cofactors']])], + ["#RESULTS_1: Suggestive LRS(p=0.63) = " + + str(np.percentile(np.array(perm_info['perm_data']), 67))], ["#RESULTS_2: Significant LRS(p=0.05) = " + str( np.percentile(np.array(perm_info['perm_data']), 95))], ["#RESULTS_3: Highly Significant LRS(p=0.01) = " + str( @@ -601,7 +601,7 @@ def heatmap_page(): pickled_result = pickle.dumps(result, pickle.HIGHEST_PROTOCOL) logger.info("pickled result length:", len(pickled_result)) Redis.set(key, pickled_result) - Redis.expire(key, 60*60) + Redis.expire(key, 60 * 60) with Bench("Rendering template"): rendered_template = render_template("heatmap.html", **result) diff --git a/wqflask/wqflask/wgcna/wgcna_analysis.py b/wqflask/wqflask/wgcna/wgcna_analysis.py index 21516b30..f96892a0 100644 --- a/wqflask/wqflask/wgcna/wgcna_analysis.py +++ b/wqflask/wqflask/wgcna/wgcna_analysis.py @@ -70,7 +70,7 @@ class WGCNA: self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] print(("Retrieved phenotype data from database", - requestform['trait_list'])) + requestform['trait_list'])) helper_functions.get_trait_db_obs(self, self.trait_db_list) # self.input contains the phenotype values we need to send to R diff --git a/wqflask/wsgi.py b/wqflask/wsgi.py index be9c7b37..755da333 100644 --- a/wqflask/wsgi.py +++ b/wqflask/wsgi.py @@ -1,4 +1,4 @@ -from run_gunicorn import app as application # expect application as a name +from run_gunicorn import app as application # expect application as a name if __name__ == "__main__": application.run() |