From a7f2e8dea0a84a70aec29159a9ae35bd55ba047d Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 17 Jun 2021 08:52:17 +0300 Subject: doc: Add results of investing rpy2 blocking issue in gn2 --- doc/rpy2-performance.org | 170 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 doc/rpy2-performance.org diff --git a/doc/rpy2-performance.org b/doc/rpy2-performance.org new file mode 100644 index 00000000..29b99ba0 --- /dev/null +++ b/doc/rpy2-performance.org @@ -0,0 +1,170 @@ +* Python-Rpy2 performance issues with genenetwork2 + +At one point, genenetwork2 was down. A possible cause was that it +wrote into the log file in an infinite loop due to rpy2, so a solution +was to empty it. Currently, as a work around, rpy2 is disabled by +removing it's imports. This affects WGCNA/ CTL imports and commenting +out Biweight Midcorrelation option in the trait page. See: + +- [[https://github.com/genenetwork/genenetwork2/commit/1baf5f7611909c651483208184c5fbf7d4a7a088][1baf5f7]] +- [[https://github.com/genenetwork/genenetwork2/commit/afee4d625248565857df98d3510f680ae6204864][afee4d6]] +- [[https://github.com/genenetwork/genenetwork2/commit/c458bf0ad731e5e5fd9cbd0686936b3a441bae63][c458bf0]] +- [[https://github.com/genenetwork/genenetwork2/commit/d31f3f763471b19559ca74e73b52b3cb5e7153ce][d31f3f7]] + +** Reproducing the problem + +I went back to commit #b8408cea. With regards to logs, I never +experienced any log issue. Perhaps it's because of how I start my +server: + +: env SERVER_PORT=5004 TMPDIR=/home/bonface/tmp WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG GENENETWORK_FILES=/home/bonface/data/genotype_files/ GN2_PROFILE=/home/bonface/opt/python3-genenetwork2 ./scripts/run_debug.sh + +However, when loading the homepage, I occasionally ran into this trace: + +#+begin_export ascii +DEBUG:wqflask.views:.check_access_permissions: @app.before_request check_access_permissions +DEBUG:wqflask.views:.shutdown_session: remove db_session +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Error: ignoring SIGPIPE signal + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: In addition: +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Warning messages: + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: 1: +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE, : +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: library '/home/bonface/R/x86_64-unknown-linux-gnu-library/4.0' contains no packages + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: 2: +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE, : +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: library '/home/bonface/R/x86_64-unknown-linux-gnu-library/4.0' contains no packages + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Fatal error: unable to initialize the JIT + + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: + *** caught segfault *** + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: address (nil), cause 'memory not mapped' + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: +Possible actions: +1: abort (with core dump, if enabled) +2: normal R exit +3: exit R without saving workspace +4: exit R saving workspace + +Selection: + +#+end_export + +This blocks the flask service. Seems to be related to: [[https://github.com/rpy2/rpy2/issues/769][rpy2-issue#769]] +and [[https://github.com/rpy2/rpy2/issues/809][rpy2-issue#809]]. I tried to reproduce this problem using some endpoint: + +#+begin_src python +@app.route("/test") + def test(): + from rpy2 import robjects as ro + from rpy2 import rinterface + from threading import Thread + + def rpy2_init_simple(): + rinterface.initr_simple() + + thread = Thread(target=rpy2_init_simple) + thread.start() + return "This is a test after importing rpy2" +#+end_src + +which generates this trace: + +#+begin_export ascii +/home/bonface/opt/python3-genenetwork2/lib/python3.8/site-packages/rpy2/rinterface.py:955: UserWarning: R is not initialized by the main thread. + Its taking over SIGINT cannot be reversed here, and as a + consequence the embedded R cannot be interrupted with Ctrl-C. + Consider (re)setting the signal handler of your choice from + the main thread. +warnings.warn( +DEBUG:wqflask.views:.shutdown_session: remove db_session + +#+end_export + +Modifying the endpoint to: + +#+begin_src python +@app.route("/test") + def test(): + import wqflask.correlation.show_corr_results + import wqflask.ctl.ctl_analysis + import time + from wqflask.correlation.correlation_functions import cal_zero_order_corr_for_tiss + + print("Sleeping for 3 seconds") + time.sleep(3) + return "This is a test after importing rpy2" +#+end_src + +and refreshing the page a couple of times, I get: + +#+begin_export ascii +DEBUG:wqflask.views:.check_access_permissions: @app.before_request check_access_ +permissions +Sleeping for 3 seconds +DEBUG:wqflask.views:.shutdown_session: remove db_session +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Error: ignoring SIGPI +PE signal + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: In addition: +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Warning messages: + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: 1: +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: In (function (package +, help, pos = 2, lib.loc = NULL, character.only = FALSE, : +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: library '/home/bonfa +ce/R/x86_64-unknown-linux-gnu-library/4.0' contains no packages + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: 2: +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: In (function (package +, help, pos = 2, lib.loc = NULL, character.only = FALSE, : +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: library '/home/bonfa +ce/R/x86_64-unknown-linux-gnu-library/4.0' contains no packages + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: +*** caught segfault *** + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: address (nil), cause +'memory not mapped' + +WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: +Possible actions: +1: abort (with core dump, if enabled) +2: normal R exit +3: exit R without saving workspace +4: exit R saving workspace + +Selection: [2021-06-16 13:11:00 +0300] [18657] [INFO] Handling signal: winch +[2021-06-16 13:11:00 +0300] [18657] [INFO] Handling signal: winch +[2021-06-16 13:13:02 +0300] [18657] [INFO] Handling signal: winch +#+end_export + +However, this seems to be non-deterministic, in the sense that I can't really pin what causes the above. I've tried to write a Locust Test that simulates users hitting that endpoint: + +#+begin_src python +"""Load test a single trait page""" +from locust import HttpUser, task, between + + + class LoadTest(HttpUser): + wait_time = between(1, 2.5) + + @task + def fetch_trait(self): + """Fetch a single trait""" + self.client.get("/test") +#+end_src -- cgit v1.2.3 From 7cbf9f75ce3e7655f93dd5c3b975ae4430d567fb Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 24 May 2021 16:35:11 +0300 Subject: add query for fetching probeset data --- wqflask/base/data_set.py | 258 +++++++++++++-------- wqflask/wqflask/correlation/correlation_gn3_api.py | 144 +++++++++++- 2 files changed, 303 insertions(+), 99 deletions(-) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 75ddf278..10f0e110 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -166,7 +166,6 @@ class DatasetType: if t in ['pheno', 'other_pheno']: group_name = name.replace("Publish", "") - results = g.db.execute(sql_query_mapping[t] % group_name).fetchone() if results: self.datasets[name] = dataset_name_mapping[t] @@ -278,7 +277,7 @@ class Markers: filtered_markers = [] for marker in self.markers: if marker['name'] in p_values: - #logger.debug("marker {} IS in p_values".format(i)) + # logger.debug("marker {} IS in p_values".format(i)) marker['p_value'] = p_values[marker['name']] if math.isnan(marker['p_value']) or (marker['p_value'] <= 0): marker['lod_score'] = 0 @@ -299,7 +298,7 @@ class HumanMarkers(Markers): self.markers = [] for line in marker_data_fh: splat = line.strip().split() - #logger.debug("splat:", splat) + # logger.debug("splat:", splat) if len(specified_markers) > 0: if splat[1] in specified_markers: marker = {} @@ -441,7 +440,7 @@ class DatasetGroup: # genotype_1 is Dataset Object without parents and f1 # genotype_2 is Dataset Object with parents and f1 (not for intercross) - #genotype_1 = reaper.Dataset() + # genotype_1 = reaper.Dataset() # reaper barfs on unicode filenames, so here we ensure it's a string if self.genofile: @@ -650,9 +649,39 @@ class DataSet: - def get_trait_data(self, sample_list=None): + + def chunk_dataset(self, dataset, n): + + + results = {} + + query = """ + SELECT ProbeSetXRef.DataId,ProbeSet.Name + FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze + WHERE ProbeSetFreeze.Name = '{}' AND + ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND + ProbeSetXRef.ProbeSetId = ProbeSet.Id + """.format(self.name) + + # should cache this + + traits_name_dict= dict(g.db.execute(query).fetchall()) + + + + + for i in range(0, len(dataset), n): + matrix = list(dataset[i:i + n]) + trait_name = traits_name_dict[matrix[0][0]] + + my_values = [value for (trait_name, strain, value) in matrix] + results[trait_name] = my_values + return results + + def get_probeset_data(self, sample_list=None, trait_ids=None): if sample_list: self.samplelist = sample_list + else: self.samplelist = self.group.samplelist @@ -666,27 +695,59 @@ class DataSet: and Strain.SpeciesId=Species.Id and Species.name = '{}' """.format(create_in_clause(self.samplelist), *mescape(self.group.species)) - logger.sql(query) results = dict(g.db.execute(query).fetchall()) sample_ids = [results[item] for item in self.samplelist] + query = """SELECT * from ProbeSetData + where StrainID in {} + and id in (SELECT ProbeSetXRef.DataId + FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) + WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id + and ProbeSetFreeze.Name = '{}' + and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids),self.name) + + query_results=list(g.db.execute(query).fetchall()) + + data_results=self.chunk_dataset(query_results, len(sample_ids)) + self.trait_data=data_results + + def get_trait_data(self, sample_list=None): + if sample_list: + self.samplelist=sample_list + else: + self.samplelist=self.group.samplelist + + if self.group.parlist != None and self.group.f1list != None: + if (self.group.parlist + self.group.f1list) in self.samplelist: + self.samplelist += self.group.parlist + self.group.f1list + + query=""" + SELECT Strain.Name, Strain.Id FROM Strain, Species + WHERE Strain.Name IN {} + and Strain.SpeciesId=Species.Id + and Species.name = '{}' + """.format(create_in_clause(self.samplelist), *mescape(self.group.species)) + logger.sql(query) + results=dict(g.db.execute(query).fetchall()) + sample_ids=[results[item] for item in self.samplelist] + # MySQL limits the number of tables that can be used in a join to 61, # so we break the sample ids into smaller chunks # Postgres doesn't have that limit, so we can get rid of this after we transition - chunk_size = 50 - number_chunks = int(math.ceil(len(sample_ids) / chunk_size)) - trait_sample_data = [] + chunk_size=50 + number_chunks=int(math.ceil(len(sample_ids) / chunk_size)) + trait_sample_data=[] for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks): if self.type == "Publish": - dataset_type = "Phenotype" + dataset_type="Phenotype" else: - dataset_type = self.type - temp = ['T%s.value' % item for item in sample_ids_step] + dataset_type=self.type + temp=['T%s.value' % item for item in sample_ids_step] if self.type == "Publish": - query = "SELECT {}XRef.Id,".format(escape(self.type)) + query="SELECT {}XRef.Id,".format(escape(self.type)) else: - query = "SELECT {}.Name,".format(escape(dataset_type)) - data_start_pos = 1 + query="SELECT {}.Name,".format(escape(dataset_type)) + data_start_pos=1 query += ', '.join(temp) query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(dataset_type, self.type, @@ -715,27 +776,27 @@ class DataSet: """.format(*mescape(self.type, self.type, self.type, self.type, self.name, dataset_type, self.type, self.type, dataset_type)) - results = g.db.execute(query).fetchall() + results=g.db.execute(query).fetchall() trait_sample_data.append(results) - trait_count = len(trait_sample_data[0]) - self.trait_data = collections.defaultdict(list) + trait_count=len(trait_sample_data[0]) + self.trait_data=collections.defaultdict(list) # put all of the separate data together into a dictionary where the keys are # trait names and values are lists of sample values for trait_counter in range(trait_count): - trait_name = trait_sample_data[0][trait_counter][0] + trait_name=trait_sample_data[0][trait_counter][0] for chunk_counter in range(int(number_chunks)): self.trait_data[trait_name] += ( trait_sample_data[chunk_counter][trait_counter][data_start_pos:]) class PhenotypeDataSet(DataSet): - DS_NAME_MAP['Publish'] = 'PhenotypeDataSet' + DS_NAME_MAP['Publish']='PhenotypeDataSet' def setup(self): # Fields in the database table - self.search_fields = ['Phenotype.Post_publication_description', + self.search_fields=['Phenotype.Post_publication_description', 'Phenotype.Pre_publication_description', 'Phenotype.Pre_publication_abbreviation', 'Phenotype.Post_publication_abbreviation', @@ -748,7 +809,7 @@ class PhenotypeDataSet(DataSet): 'PublishXRef.Id'] # Figure out what display_fields is - self.display_fields = ['name', 'group_code', + self.display_fields=['name', 'group_code', 'pubmed_id', 'pre_publication_description', 'post_publication_description', @@ -766,7 +827,7 @@ class PhenotypeDataSet(DataSet): 'sequence', 'units', 'comments'] # Fields displayed in the search results table header - self.header_fields = ['Index', + self.header_fields=['Index', 'Record', 'Description', 'Authors', @@ -775,9 +836,9 @@ class PhenotypeDataSet(DataSet): 'Max LRS Location', 'Additive Effect'] - self.type = 'Publish' + self.type='Publish' - self.query_for_group = ''' + self.query_for_group=''' SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType FROM @@ -797,13 +858,13 @@ class PhenotypeDataSet(DataSet): if not this_trait.haveinfo: this_trait.retrieve_info(get_qtl_info=True) - description = this_trait.post_publication_description + description=this_trait.post_publication_description # If the dataset is confidential and the user has access to confidential # phenotype traits, then display the pre-publication description instead # of the post-publication description if this_trait.confidential: - this_trait.description_display = "" + this_trait.description_display="" continue # for now, because no authorization features if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait( @@ -811,46 +872,46 @@ class PhenotypeDataSet(DataSet): userName=self.userName, authorized_users=this_trait.authorized_users): - description = this_trait.pre_publication_description + description=this_trait.pre_publication_description if len(description) > 0: - this_trait.description_display = description.strip() + this_trait.description_display=description.strip() else: - this_trait.description_display = "" + this_trait.description_display="" if not this_trait.year.isdigit(): - this_trait.pubmed_text = "N/A" + this_trait.pubmed_text="N/A" else: - this_trait.pubmed_text = this_trait.year + this_trait.pubmed_text=this_trait.year if this_trait.pubmed_id: - this_trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id + this_trait.pubmed_link=webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id # LRS and its location - this_trait.LRS_score_repr = "N/A" - this_trait.LRS_location_repr = "N/A" + this_trait.LRS_score_repr="N/A" + this_trait.LRS_location_repr="N/A" if this_trait.lrs: - query = """ + query=""" select Geno.Chr, Geno.Mb from Geno, Species where Species.Name = '%s' and Geno.Name = '%s' and Geno.SpeciesId = Species.Id """ % (species, this_trait.locus) logger.sql(query) - result = g.db.execute(query).fetchone() + result=g.db.execute(query).fetchone() if result: if result[0] and result[1]: - LRS_Chr = result[0] - LRS_Mb = result[1] + LRS_Chr=result[0] + LRS_Mb=result[1] - this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs - this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % ( + this_trait.LRS_score_repr=LRS_score_repr='%3.1f' % this_trait.lrs + this_trait.LRS_location_repr=LRS_location_repr='Chr%s: %.6f' % ( LRS_Chr, float(LRS_Mb)) def retrieve_sample_data(self, trait): - query = """ + query=""" SELECT Strain.Name, PublishData.value, PublishSE.error, NStrain.count, Strain.Name2 FROM @@ -868,34 +929,34 @@ class PhenotypeDataSet(DataSet): Strain.Name """ logger.sql(query) - results = g.db.execute(query, (trait, self.id)).fetchall() + results=g.db.execute(query, (trait, self.id)).fetchall() return results class GenotypeDataSet(DataSet): - DS_NAME_MAP['Geno'] = 'GenotypeDataSet' + DS_NAME_MAP['Geno']='GenotypeDataSet' def setup(self): # Fields in the database table - self.search_fields = ['Name', + self.search_fields=['Name', 'Chr'] # Find out what display_fields is - self.display_fields = ['name', + self.display_fields=['name', 'chr', 'mb', 'source2', 'sequence'] # Fields displayed in the search results table header - self.header_fields = ['Index', + self.header_fields=['Index', 'ID', 'Location'] # Todo: Obsolete or rename this field - self.type = 'Geno' + self.type='Geno' - self.query_for_group = ''' + self.query_for_group=''' SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType FROM @@ -914,11 +975,11 @@ class GenotypeDataSet(DataSet): this_trait.retrieveInfo() if this_trait.chr and this_trait.mb: - this_trait.location_repr = 'Chr%s: %.6f' % ( + this_trait.location_repr='Chr%s: %.6f' % ( this_trait.chr, float(this_trait.mb)) def retrieve_sample_data(self, trait): - query = """ + query=""" SELECT Strain.Name, GenoData.value, GenoSE.error, "N/A", Strain.Name2 FROM @@ -935,7 +996,7 @@ class GenotypeDataSet(DataSet): Strain.Name """ logger.sql(query) - results = g.db.execute(query, + results=g.db.execute(query, (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name)).fetchall() return results @@ -949,11 +1010,11 @@ class MrnaAssayDataSet(DataSet): platform and is far too specific. ''' - DS_NAME_MAP['ProbeSet'] = 'MrnaAssayDataSet' + DS_NAME_MAP['ProbeSet']='MrnaAssayDataSet' def setup(self): # Fields in the database table - self.search_fields = ['Name', + self.search_fields=['Name', 'Description', 'Probe_Target_Description', 'Symbol', @@ -963,7 +1024,7 @@ class MrnaAssayDataSet(DataSet): 'RefSeq_TranscriptId'] # Find out what display_fields is - self.display_fields = ['name', 'symbol', + self.display_fields=['name', 'symbol', 'description', 'probe_target_description', 'chr', 'mb', 'alias', 'geneid', @@ -983,7 +1044,7 @@ class MrnaAssayDataSet(DataSet): 'flag'] # Fields displayed in the search results table header - self.header_fields = ['Index', + self.header_fields=['Index', 'Record', 'Symbol', 'Description', @@ -994,9 +1055,9 @@ class MrnaAssayDataSet(DataSet): 'Additive Effect'] # Todo: Obsolete or rename this field - self.type = 'ProbeSet' + self.type='ProbeSet' - self.query_for_group = ''' + self.query_for_group=''' SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType FROM @@ -1014,7 +1075,7 @@ class MrnaAssayDataSet(DataSet): # Note: setting trait_list to [] is probably not a great idea. if not trait_list: - trait_list = [] + trait_list=[] for this_trait in trait_list: @@ -1022,33 +1083,33 @@ class MrnaAssayDataSet(DataSet): this_trait.retrieveInfo(QTL=1) if not this_trait.symbol: - this_trait.symbol = "N/A" + this_trait.symbol="N/A" # XZ, 12/08/2008: description # XZ, 06/05/2009: Rob asked to add probe target description - description_string = str( + description_string=str( str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8') - target_string = str( + target_string=str( str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8') if len(description_string) > 1 and description_string != 'None': - description_display = description_string + description_display=description_string else: - description_display = this_trait.symbol + description_display=this_trait.symbol if (len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None'): - description_display = description_display + '; ' + target_string.strip() + description_display=description_display + '; ' + target_string.strip() # Save it for the jinja2 template - this_trait.description_display = description_display + this_trait.description_display=description_display if this_trait.chr and this_trait.mb: - this_trait.location_repr = 'Chr%s: %.6f' % ( + this_trait.location_repr='Chr%s: %.6f' % ( this_trait.chr, float(this_trait.mb)) # Get mean expression value - query = ( + query=( """select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet where ProbeSetXRef.ProbeSetFreezeId = %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and @@ -1056,44 +1117,45 @@ class MrnaAssayDataSet(DataSet): """ % (escape(str(this_trait.dataset.id)), escape(this_trait.name))) - #logger.debug("query is:", pf(query)) + # logger.debug("query is:", pf(query)) logger.sql(query) - result = g.db.execute(query).fetchone() + result=g.db.execute(query).fetchone() - mean = result[0] if result else 0 + mean=result[0] if result else 0 if mean: - this_trait.mean = "%2.3f" % mean + this_trait.mean="%2.3f" % mean # LRS and its location - this_trait.LRS_score_repr = 'N/A' - this_trait.LRS_location_repr = 'N/A' + this_trait.LRS_score_repr='N/A' + this_trait.LRS_location_repr='N/A' # Max LRS and its Locus location if this_trait.lrs and this_trait.locus: - query = """ + query=""" select Geno.Chr, Geno.Mb from Geno, Species where Species.Name = '{}' and Geno.Name = '{}' and Geno.SpeciesId = Species.Id """.format(species, this_trait.locus) logger.sql(query) - result = g.db.execute(query).fetchone() + result=g.db.execute(query).fetchone() if result: - lrs_chr, lrs_mb = result - this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs - this_trait.LRS_location_repr = 'Chr%s: %.6f' % ( + lrs_chr, lrs_mb=result + this_trait.LRS_score_repr='%3.1f' % this_trait.lrs + this_trait.LRS_location_repr='Chr%s: %.6f' % ( lrs_chr, float(lrs_mb)) return trait_list def retrieve_sample_data(self, trait): - query = """ + query=""" SELECT Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2 FROM - (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef) + (ProbeSetData, ProbeSetFreeze, + Strain, ProbeSet, ProbeSetXRef) left join ProbeSetSE on (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) left join NStrain on @@ -1109,19 +1171,19 @@ class MrnaAssayDataSet(DataSet): Strain.Name """ % (escape(trait), escape(self.name)) logger.sql(query) - results = g.db.execute(query).fetchall() - #logger.debug("RETRIEVED RESULTS HERE:", results) + results=g.db.execute(query).fetchall() + # logger.debug("RETRIEVED RESULTS HERE:", results) return results def retrieve_genes(self, column_name): - query = """ + query=""" select ProbeSet.Name, ProbeSet.%s from ProbeSet,ProbeSetXRef where ProbeSetXRef.ProbeSetFreezeId = %s and ProbeSetXRef.ProbeSetId=ProbeSet.Id; """ % (column_name, escape(str(self.id))) logger.sql(query) - results = g.db.execute(query).fetchall() + results=g.db.execute(query).fetchall() return dict(results) @@ -1129,40 +1191,40 @@ class MrnaAssayDataSet(DataSet): class TempDataSet(DataSet): '''Temporary user-generated data set''' - DS_NAME_MAP['Temp'] = 'TempDataSet' + DS_NAME_MAP['Temp']='TempDataSet' def setup(self): - self.search_fields = ['name', + self.search_fields=['name', 'description'] - self.display_fields = ['name', + self.display_fields=['name', 'description'] - self.header_fields = ['Name', + self.header_fields=['Name', 'Description'] - self.type = 'Temp' + self.type='Temp' # Need to double check later how these are used - self.id = 1 - self.fullname = 'Temporary Storage' - self.shortname = 'Temp' + self.id=1 + self.fullname='Temporary Storage' + self.shortname='Temp' def geno_mrna_confidentiality(ob): - dataset_table = ob.type + "Freeze" - #logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table)) + dataset_table=ob.type + "Freeze" + # logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table)) - query = '''SELECT Id, Name, FullName, confidentiality, + query='''SELECT Id, Name, FullName, confidentiality, AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table, ob.name) logger.sql(query) - result = g.db.execute(query) + result=g.db.execute(query) (dataset_id, name, full_name, confidential, - authorized_users) = result.fetchall()[0] + authorized_users)=result.fetchall()[0] if confidential: return True diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index 6974dbd5..3e1ce1dc 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -27,6 +27,34 @@ def create_target_this_trait(start_vars): return (this_dataset, this_trait, target_dataset, sample_data) + +def test_process_data(this_trait,dataset,start_vars): + """test function for bxd,all and other sample data""" + + corr_samples_group = start_vars["corr_samples_group"] + + + primary_samples = dataset.group.samplelist + if dataset.group.parlist != None: + primary_samples += dataset.group.parlist + if dataset.group.f1list != None: + primary_samples += dataset.group.f1list + + # If either BXD/whatever Only or All Samples, append all of that group's samplelist + if corr_samples_group != 'samples_other': + sample_data = process_samples(start_vars, primary_samples) + + # If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and + # exclude the primary samples (because they would have been added in the previous + # if statement if the user selected All Samples) + if corr_samples_group != 'samples_primary': + if corr_samples_group == 'samples_other': + primary_samples = [x for x in primary_samples if x not in ( + dataset.group.parlist + dataset.group.f1list)] + sample_data = process_samples(start_vars, list(this_trait.data.keys()), primary_samples) + + return sample_data + def process_samples(start_vars, sample_names, excluded_samples=None): """process samples""" sample_data = {} @@ -118,13 +146,22 @@ def fetch_sample_data(start_vars, this_trait, this_dataset, target_dataset): sample_data = process_samples( start_vars, this_dataset.group.samplelist) - target_dataset.get_trait_data(list(sample_data.keys())) + + # sample_data = test_process_data(this_trait,this_dataset,start_vars) + + if target_dataset.type =="ProbeSet": + # pass + target_dataset.get_probeset_data(list(sample_data.keys())) + else: + target_dataset.get_trait_data(list(sample_data.keys())) this_trait = retrieve_sample_data(this_trait, this_dataset) this_trait_data = { "trait_sample_data": sample_data, "trait_id": start_vars["trait_id"] } + # should remove this len(samplelist) == len(strain_values) + results = map_shared_keys_to_values( target_dataset.samplelist, target_dataset.trait_data) @@ -201,6 +238,7 @@ def compute_correlation(start_vars, method="pearson"): "target_dataset": start_vars['corr_dataset'], "return_results": corr_return_results} + return correlation_data @@ -261,3 +299,107 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict): } return (primary_tissue_data, target_tissue_data) return None + + +def generate_corr_data(corr_results, target_dataset): + counter = 0 + results_list = [] + for (index, trait_corr) in enumerate(corr_results): + trait_name = list(trait_corr.keys())[0] + trait = create_trait(dataset=target_dataset, + name=trait_name) + + trait_corr_data = trait_corr[trait_name] + + if trait.view == False: + continue + results_dict = {} + results_dict['index'] = index + 1 + results_dict['trait_id'] = trait.name + results_dict['dataset'] = trait.dataset.name + # results_dict['hmac'] = hmac.data_hmac( + # '{}:{}'.format(trait.name, trait.dataset.name)) + if target_dataset.type == "ProbeSet": + results_dict['symbol'] = trait.symbol + results_dict['description'] = "N/A" + results_dict['location'] = trait.location_repr + results_dict['mean'] = "N/A" + results_dict['additive'] = "N/A" + if bool(trait.description_display): + results_dict['description'] = trait.description_display + if bool(trait.mean): + results_dict['mean'] = f"{float(trait.mean):.3f}" + try: + results_dict['lod_score'] = f"{float(trait.LRS_score_repr) / 4.61:.1f}" + except: + results_dict['lod_score'] = "N/A" + results_dict['lrs_location'] = trait.LRS_location_repr + if bool(trait.additive): + results_dict['additive'] = f"{float(trait.additive):.3f}" + results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}" + results_dict['num_overlap'] = trait.num_overlap + results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}" + results_dict['lit_corr'] = "--" + results_dict['tissue_corr'] = "--" + results_dict['tissue_pvalue'] = "--" + tissue_corr = trait_corr_data.get('tissue_corr',0) + lit_corr = trait_corr_data.get('lit_corr',0) + if bool(lit_corr): + results_dict['lit_corr'] = f"{float(trait_corr_data.get('lit_corr',0)):.3f}" + if bool(tissue_corr): + results_dict['tissue_corr'] = f"{float(trait_corr_data.get('tissue_corr',0)):.3f}" + results_dict['tissue_pvalue'] = f"{float(trait_corr_data.get('tissue_pvalue',0)):.3e}" + elif target_dataset.type == "Publish": + results_dict['abbreviation_display'] = "N/A" + results_dict['description'] = "N/A" + results_dict['mean'] = "N/A" + results_dict['authors_display'] = "N/A" + results_dict['additive'] = "N/A" + if for_api: + results_dict['pubmed_id'] = "N/A" + results_dict['year'] = "N/A" + else: + results_dict['pubmed_link'] = "N/A" + results_dict['pubmed_text'] = "N/A" + + if bool(trait.abbreviation): + results_dict['abbreviation_display'] = trait.abbreviation + if bool(trait.description_display): + results_dict['description'] = trait.description_display + if bool(trait.mean): + results_dict['mean'] = f"{float(trait.mean):.3f}" + if bool(trait.authors): + authors_list = trait.authors.split(',') + if len(authors_list) > 6: + results_dict['authors_display'] = ", ".join( + authors_list[:6]) + ", et al." + else: + results_dict['authors_display'] = trait.authors + if bool(trait.pubmed_id): + if for_api: + results_dict['pubmed_id'] = trait.pubmed_id + results_dict['year'] = trait.pubmed_text + else: + results_dict['pubmed_link'] = trait.pubmed_link + results_dict['pubmed_text'] = trait.pubmed_text + try: + results_dict['lod_score'] = f"{float(trait.LRS_score_repr) / 4.61:.1f}" + except: + results_dict['lod_score'] = "N/A" + results_dict['lrs_location'] = trait.LRS_location_repr + if bool(trait.additive): + results_dict['additive'] = f"{float(trait.additive):.3f}" + results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}" + results_dict['num_overlap'] = trait.num_overlap + results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}" + else: + results_dict['location'] = trait.location_repr + results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}" + results_dict['num_overlap'] = trait.num_overlap + results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}" + + results_list.append(results_dict) + + return results_list + + -- cgit v1.2.3 From 96eeaeec98de74607108127f3c347542e6a3e991 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 24 May 2021 16:37:36 +0300 Subject: pep8 formatting --- wqflask/base/data_set.py | 214 ++++++++++++++++++++++++----------------------- 1 file changed, 109 insertions(+), 105 deletions(-) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 10f0e110..7080b7b7 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -647,12 +647,8 @@ class DataSet: "Dataset {} is not yet available in GeneNetwork.".format(self.name)) pass - - - def chunk_dataset(self, dataset, n): - results = {} query = """ @@ -665,10 +661,7 @@ class DataSet: # should cache this - traits_name_dict= dict(g.db.execute(query).fetchall()) - - - + traits_name_dict = dict(g.db.execute(query).fetchall()) for i in range(0, len(dataset), n): matrix = list(dataset[i:i + n]) @@ -704,50 +697,50 @@ class DataSet: FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and ProbeSetFreeze.Name = '{}' - and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids),self.name) + and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids), self.name) - query_results=list(g.db.execute(query).fetchall()) + query_results = list(g.db.execute(query).fetchall()) - data_results=self.chunk_dataset(query_results, len(sample_ids)) - self.trait_data=data_results + data_results = self.chunk_dataset(query_results, len(sample_ids)) + self.trait_data = data_results def get_trait_data(self, sample_list=None): if sample_list: - self.samplelist=sample_list + self.samplelist = sample_list else: - self.samplelist=self.group.samplelist + self.samplelist = self.group.samplelist if self.group.parlist != None and self.group.f1list != None: if (self.group.parlist + self.group.f1list) in self.samplelist: self.samplelist += self.group.parlist + self.group.f1list - query=""" + query = """ SELECT Strain.Name, Strain.Id FROM Strain, Species WHERE Strain.Name IN {} and Strain.SpeciesId=Species.Id and Species.name = '{}' """.format(create_in_clause(self.samplelist), *mescape(self.group.species)) logger.sql(query) - results=dict(g.db.execute(query).fetchall()) - sample_ids=[results[item] for item in self.samplelist] + results = dict(g.db.execute(query).fetchall()) + sample_ids = [results[item] for item in self.samplelist] # MySQL limits the number of tables that can be used in a join to 61, # so we break the sample ids into smaller chunks # Postgres doesn't have that limit, so we can get rid of this after we transition - chunk_size=50 - number_chunks=int(math.ceil(len(sample_ids) / chunk_size)) - trait_sample_data=[] + chunk_size = 50 + number_chunks = int(math.ceil(len(sample_ids) / chunk_size)) + trait_sample_data = [] for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks): if self.type == "Publish": - dataset_type="Phenotype" + dataset_type = "Phenotype" else: - dataset_type=self.type - temp=['T%s.value' % item for item in sample_ids_step] + dataset_type = self.type + temp = ['T%s.value' % item for item in sample_ids_step] if self.type == "Publish": - query="SELECT {}XRef.Id,".format(escape(self.type)) + query = "SELECT {}XRef.Id,".format(escape(self.type)) else: - query="SELECT {}.Name,".format(escape(dataset_type)) - data_start_pos=1 + query = "SELECT {}.Name,".format(escape(dataset_type)) + data_start_pos = 1 query += ', '.join(temp) query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(dataset_type, self.type, @@ -776,27 +769,27 @@ class DataSet: """.format(*mescape(self.type, self.type, self.type, self.type, self.name, dataset_type, self.type, self.type, dataset_type)) - results=g.db.execute(query).fetchall() + results = g.db.execute(query).fetchall() trait_sample_data.append(results) - trait_count=len(trait_sample_data[0]) - self.trait_data=collections.defaultdict(list) + trait_count = len(trait_sample_data[0]) + self.trait_data = collections.defaultdict(list) # put all of the separate data together into a dictionary where the keys are # trait names and values are lists of sample values for trait_counter in range(trait_count): - trait_name=trait_sample_data[0][trait_counter][0] + trait_name = trait_sample_data[0][trait_counter][0] for chunk_counter in range(int(number_chunks)): self.trait_data[trait_name] += ( trait_sample_data[chunk_counter][trait_counter][data_start_pos:]) class PhenotypeDataSet(DataSet): - DS_NAME_MAP['Publish']='PhenotypeDataSet' + DS_NAME_MAP['Publish'] = 'PhenotypeDataSet' def setup(self): # Fields in the database table - self.search_fields=['Phenotype.Post_publication_description', + self.search_fields = ['Phenotype.Post_publication_description', 'Phenotype.Pre_publication_description', 'Phenotype.Pre_publication_abbreviation', 'Phenotype.Post_publication_abbreviation', @@ -809,7 +802,7 @@ class PhenotypeDataSet(DataSet): 'PublishXRef.Id'] # Figure out what display_fields is - self.display_fields=['name', 'group_code', + self.display_fields = ['name', 'group_code', 'pubmed_id', 'pre_publication_description', 'post_publication_description', @@ -827,7 +820,7 @@ class PhenotypeDataSet(DataSet): 'sequence', 'units', 'comments'] # Fields displayed in the search results table header - self.header_fields=['Index', + self.header_fields = ['Index', 'Record', 'Description', 'Authors', @@ -836,9 +829,9 @@ class PhenotypeDataSet(DataSet): 'Max LRS Location', 'Additive Effect'] - self.type='Publish' + self.type = 'Publish' - self.query_for_group=''' + self.query_for_group = ''' SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType FROM @@ -858,13 +851,13 @@ class PhenotypeDataSet(DataSet): if not this_trait.haveinfo: this_trait.retrieve_info(get_qtl_info=True) - description=this_trait.post_publication_description + description = this_trait.post_publication_description # If the dataset is confidential and the user has access to confidential # phenotype traits, then display the pre-publication description instead # of the post-publication description if this_trait.confidential: - this_trait.description_display="" + this_trait.description_display = "" continue # for now, because no authorization features if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait( @@ -872,46 +865,46 @@ class PhenotypeDataSet(DataSet): userName=self.userName, authorized_users=this_trait.authorized_users): - description=this_trait.pre_publication_description + description = this_trait.pre_publication_description if len(description) > 0: - this_trait.description_display=description.strip() + this_trait.description_display = description.strip() else: - this_trait.description_display="" + this_trait.description_display = "" if not this_trait.year.isdigit(): - this_trait.pubmed_text="N/A" + this_trait.pubmed_text = "N/A" else: - this_trait.pubmed_text=this_trait.year + this_trait.pubmed_text = this_trait.year if this_trait.pubmed_id: - this_trait.pubmed_link=webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id + this_trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id # LRS and its location - this_trait.LRS_score_repr="N/A" - this_trait.LRS_location_repr="N/A" + this_trait.LRS_score_repr = "N/A" + this_trait.LRS_location_repr = "N/A" if this_trait.lrs: - query=""" + query = """ select Geno.Chr, Geno.Mb from Geno, Species where Species.Name = '%s' and Geno.Name = '%s' and Geno.SpeciesId = Species.Id """ % (species, this_trait.locus) logger.sql(query) - result=g.db.execute(query).fetchone() + result = g.db.execute(query).fetchone() if result: if result[0] and result[1]: - LRS_Chr=result[0] - LRS_Mb=result[1] + LRS_Chr = result[0] + LRS_Mb = result[1] - this_trait.LRS_score_repr=LRS_score_repr='%3.1f' % this_trait.lrs - this_trait.LRS_location_repr=LRS_location_repr='Chr%s: %.6f' % ( + this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs + this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % ( LRS_Chr, float(LRS_Mb)) def retrieve_sample_data(self, trait): - query=""" + query = """ SELECT Strain.Name, PublishData.value, PublishSE.error, NStrain.count, Strain.Name2 FROM @@ -929,34 +922,34 @@ class PhenotypeDataSet(DataSet): Strain.Name """ logger.sql(query) - results=g.db.execute(query, (trait, self.id)).fetchall() + results = g.db.execute(query, (trait, self.id)).fetchall() return results class GenotypeDataSet(DataSet): - DS_NAME_MAP['Geno']='GenotypeDataSet' + DS_NAME_MAP['Geno'] = 'GenotypeDataSet' def setup(self): # Fields in the database table - self.search_fields=['Name', + self.search_fields = ['Name', 'Chr'] # Find out what display_fields is - self.display_fields=['name', + self.display_fields = ['name', 'chr', 'mb', 'source2', 'sequence'] # Fields displayed in the search results table header - self.header_fields=['Index', + self.header_fields = ['Index', 'ID', 'Location'] # Todo: Obsolete or rename this field - self.type='Geno' + self.type = 'Geno' - self.query_for_group=''' + self.query_for_group = ''' SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType FROM @@ -975,11 +968,11 @@ class GenotypeDataSet(DataSet): this_trait.retrieveInfo() if this_trait.chr and this_trait.mb: - this_trait.location_repr='Chr%s: %.6f' % ( + this_trait.location_repr = 'Chr%s: %.6f' % ( this_trait.chr, float(this_trait.mb)) def retrieve_sample_data(self, trait): - query=""" + query = """ SELECT Strain.Name, GenoData.value, GenoSE.error, "N/A", Strain.Name2 FROM @@ -996,7 +989,7 @@ class GenotypeDataSet(DataSet): Strain.Name """ logger.sql(query) - results=g.db.execute(query, + results = g.db.execute(query, (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name)).fetchall() return results @@ -1010,11 +1003,11 @@ class MrnaAssayDataSet(DataSet): platform and is far too specific. ''' - DS_NAME_MAP['ProbeSet']='MrnaAssayDataSet' + DS_NAME_MAP['ProbeSet'] = 'MrnaAssayDataSet' def setup(self): # Fields in the database table - self.search_fields=['Name', + self.search_fields = ['Name', 'Description', 'Probe_Target_Description', 'Symbol', @@ -1024,7 +1017,7 @@ class MrnaAssayDataSet(DataSet): 'RefSeq_TranscriptId'] # Find out what display_fields is - self.display_fields=['name', 'symbol', + self.display_fields = ['name', 'symbol', 'description', 'probe_target_description', 'chr', 'mb', 'alias', 'geneid', @@ -1044,7 +1037,7 @@ class MrnaAssayDataSet(DataSet): 'flag'] # Fields displayed in the search results table header - self.header_fields=['Index', + self.header_fields = ['Index', 'Record', 'Symbol', 'Description', @@ -1055,9 +1048,9 @@ class MrnaAssayDataSet(DataSet): 'Additive Effect'] # Todo: Obsolete or rename this field - self.type='ProbeSet' + self.type = 'ProbeSet' - self.query_for_group=''' + self.query_for_group = ''' SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType FROM @@ -1075,7 +1068,7 @@ class MrnaAssayDataSet(DataSet): # Note: setting trait_list to [] is probably not a great idea. if not trait_list: - trait_list=[] + trait_list = [] for this_trait in trait_list: @@ -1083,33 +1076,33 @@ class MrnaAssayDataSet(DataSet): this_trait.retrieveInfo(QTL=1) if not this_trait.symbol: - this_trait.symbol="N/A" + this_trait.symbol = "N/A" # XZ, 12/08/2008: description # XZ, 06/05/2009: Rob asked to add probe target description - description_string=str( + description_string = str( str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8') - target_string=str( + target_string = str( str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8') if len(description_string) > 1 and description_string != 'None': - description_display=description_string + description_display = description_string else: - description_display=this_trait.symbol + description_display = this_trait.symbol if (len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None'): - description_display=description_display + '; ' + target_string.strip() + description_display = description_display + '; ' + target_string.strip() # Save it for the jinja2 template - this_trait.description_display=description_display + this_trait.description_display = description_display if this_trait.chr and this_trait.mb: - this_trait.location_repr='Chr%s: %.6f' % ( + this_trait.location_repr = 'Chr%s: %.6f' % ( this_trait.chr, float(this_trait.mb)) # Get mean expression value - query=( + query = ( """select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet where ProbeSetXRef.ProbeSetFreezeId = %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and @@ -1119,38 +1112,38 @@ class MrnaAssayDataSet(DataSet): # logger.debug("query is:", pf(query)) logger.sql(query) - result=g.db.execute(query).fetchone() + result = g.db.execute(query).fetchone() - mean=result[0] if result else 0 + mean = result[0] if result else 0 if mean: - this_trait.mean="%2.3f" % mean + this_trait.mean = "%2.3f" % mean # LRS and its location - this_trait.LRS_score_repr='N/A' - this_trait.LRS_location_repr='N/A' + this_trait.LRS_score_repr = 'N/A' + this_trait.LRS_location_repr = 'N/A' # Max LRS and its Locus location if this_trait.lrs and this_trait.locus: - query=""" + query = """ select Geno.Chr, Geno.Mb from Geno, Species where Species.Name = '{}' and Geno.Name = '{}' and Geno.SpeciesId = Species.Id """.format(species, this_trait.locus) logger.sql(query) - result=g.db.execute(query).fetchone() + result = g.db.execute(query).fetchone() if result: - lrs_chr, lrs_mb=result - this_trait.LRS_score_repr='%3.1f' % this_trait.lrs - this_trait.LRS_location_repr='Chr%s: %.6f' % ( + lrs_chr, lrs_mb = result + this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs + this_trait.LRS_location_repr = 'Chr%s: %.6f' % ( lrs_chr, float(lrs_mb)) return trait_list def retrieve_sample_data(self, trait): - query=""" + query = """ SELECT Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2 FROM @@ -1171,19 +1164,19 @@ class MrnaAssayDataSet(DataSet): Strain.Name """ % (escape(trait), escape(self.name)) logger.sql(query) - results=g.db.execute(query).fetchall() + results = g.db.execute(query).fetchall() # logger.debug("RETRIEVED RESULTS HERE:", results) return results def retrieve_genes(self, column_name): - query=""" + query = """ select ProbeSet.Name, ProbeSet.%s from ProbeSet,ProbeSetXRef where ProbeSetXRef.ProbeSetFreezeId = %s and ProbeSetXRef.ProbeSetId=ProbeSet.Id; """ % (column_name, escape(str(self.id))) logger.sql(query) - results=g.db.execute(query).fetchall() + results = g.db.execute(query).fetchall() return dict(results) @@ -1191,40 +1184,51 @@ class MrnaAssayDataSet(DataSet): class TempDataSet(DataSet): '''Temporary user-generated data set''' - DS_NAME_MAP['Temp']='TempDataSet' + DS_NAME_MAP['Temp'] = 'TempDataSet' def setup(self): - self.search_fields=['name', + self.search_fields = ['name', 'description'] - self.display_fields=['name', + self.display_fields = ['name', 'description'] - self.header_fields=['Name', + self.header_fields = ['Name', 'Description'] - self.type='Temp' + self.type = 'Temp' # Need to double check later how these are used - self.id=1 - self.fullname='Temporary Storage' - self.shortname='Temp' + self.id = 1 + self.fullname = 'Temporary Storage' + self.shortname = 'Temp' def geno_mrna_confidentiality(ob): - dataset_table=ob.type + "Freeze" + dataset_table = ob.type + "Freeze" # logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table)) - query='''SELECT Id, Name, FullName, confidentiality, + query = '''SELECT Id, Name, FullName, confidentiality, AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table, ob.name) logger.sql(query) - result=g.db.execute(query) + result = g.db.execute(query) + + (dataset_id, + name, + full_name, + confidential, + authorized_users) = result.fetchall()[0] + + if confidential: + return True +uery) + result = g.db.execute(query) (dataset_id, name, full_name, confidential, - authorized_users)=result.fetchall()[0] + authorized_users) = result.fetchall()[0] if confidential: return True -- cgit v1.2.3 From f80c11f8d68b6a01215e8260234931dbf211fddf Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 24 May 2021 16:43:45 +0300 Subject: minor fix --- wqflask/base/data_set.py | 1 - 1 file changed, 1 deletion(-) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 7080b7b7..62afdb63 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -1221,7 +1221,6 @@ def geno_mrna_confidentiality(ob): if confidential: return True -uery) result = g.db.execute(query) (dataset_id, -- cgit v1.2.3 From d5cb6d1a7e14230c30df6681b071165951c2cb69 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 9 Jun 2021 07:25:03 +0300 Subject: remove unused functions + minor fixes --- wqflask/base/data_set.py | 2 + wqflask/wqflask/correlation/correlation_gn3_api.py | 115 +-------------------- 2 files changed, 7 insertions(+), 110 deletions(-) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 62afdb63..d31161ec 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -672,6 +672,8 @@ class DataSet: return results def get_probeset_data(self, sample_list=None, trait_ids=None): + + # improvement of get trait data--->>> if sample_list: self.samplelist = sample_list diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index 3e1ce1dc..eb986655 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -27,13 +27,11 @@ def create_target_this_trait(start_vars): return (this_dataset, this_trait, target_dataset, sample_data) - -def test_process_data(this_trait,dataset,start_vars): +def test_process_data(this_trait, dataset, start_vars): """test function for bxd,all and other sample data""" corr_samples_group = start_vars["corr_samples_group"] - primary_samples = dataset.group.samplelist if dataset.group.parlist != None: primary_samples += dataset.group.parlist @@ -51,10 +49,12 @@ def test_process_data(this_trait,dataset,start_vars): if corr_samples_group == 'samples_other': primary_samples = [x for x in primary_samples if x not in ( dataset.group.parlist + dataset.group.f1list)] - sample_data = process_samples(start_vars, list(this_trait.data.keys()), primary_samples) + sample_data = process_samples(start_vars, list( + this_trait.data.keys()), primary_samples) return sample_data + def process_samples(start_vars, sample_names, excluded_samples=None): """process samples""" sample_data = {} @@ -149,7 +149,7 @@ def fetch_sample_data(start_vars, this_trait, this_dataset, target_dataset): # sample_data = test_process_data(this_trait,this_dataset,start_vars) - if target_dataset.type =="ProbeSet": + if target_dataset.type == "ProbeSet": # pass target_dataset.get_probeset_data(list(sample_data.keys())) else: @@ -238,7 +238,6 @@ def compute_correlation(start_vars, method="pearson"): "target_dataset": start_vars['corr_dataset'], "return_results": corr_return_results} - return correlation_data @@ -299,107 +298,3 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict): } return (primary_tissue_data, target_tissue_data) return None - - -def generate_corr_data(corr_results, target_dataset): - counter = 0 - results_list = [] - for (index, trait_corr) in enumerate(corr_results): - trait_name = list(trait_corr.keys())[0] - trait = create_trait(dataset=target_dataset, - name=trait_name) - - trait_corr_data = trait_corr[trait_name] - - if trait.view == False: - continue - results_dict = {} - results_dict['index'] = index + 1 - results_dict['trait_id'] = trait.name - results_dict['dataset'] = trait.dataset.name - # results_dict['hmac'] = hmac.data_hmac( - # '{}:{}'.format(trait.name, trait.dataset.name)) - if target_dataset.type == "ProbeSet": - results_dict['symbol'] = trait.symbol - results_dict['description'] = "N/A" - results_dict['location'] = trait.location_repr - results_dict['mean'] = "N/A" - results_dict['additive'] = "N/A" - if bool(trait.description_display): - results_dict['description'] = trait.description_display - if bool(trait.mean): - results_dict['mean'] = f"{float(trait.mean):.3f}" - try: - results_dict['lod_score'] = f"{float(trait.LRS_score_repr) / 4.61:.1f}" - except: - results_dict['lod_score'] = "N/A" - results_dict['lrs_location'] = trait.LRS_location_repr - if bool(trait.additive): - results_dict['additive'] = f"{float(trait.additive):.3f}" - results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}" - results_dict['num_overlap'] = trait.num_overlap - results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}" - results_dict['lit_corr'] = "--" - results_dict['tissue_corr'] = "--" - results_dict['tissue_pvalue'] = "--" - tissue_corr = trait_corr_data.get('tissue_corr',0) - lit_corr = trait_corr_data.get('lit_corr',0) - if bool(lit_corr): - results_dict['lit_corr'] = f"{float(trait_corr_data.get('lit_corr',0)):.3f}" - if bool(tissue_corr): - results_dict['tissue_corr'] = f"{float(trait_corr_data.get('tissue_corr',0)):.3f}" - results_dict['tissue_pvalue'] = f"{float(trait_corr_data.get('tissue_pvalue',0)):.3e}" - elif target_dataset.type == "Publish": - results_dict['abbreviation_display'] = "N/A" - results_dict['description'] = "N/A" - results_dict['mean'] = "N/A" - results_dict['authors_display'] = "N/A" - results_dict['additive'] = "N/A" - if for_api: - results_dict['pubmed_id'] = "N/A" - results_dict['year'] = "N/A" - else: - results_dict['pubmed_link'] = "N/A" - results_dict['pubmed_text'] = "N/A" - - if bool(trait.abbreviation): - results_dict['abbreviation_display'] = trait.abbreviation - if bool(trait.description_display): - results_dict['description'] = trait.description_display - if bool(trait.mean): - results_dict['mean'] = f"{float(trait.mean):.3f}" - if bool(trait.authors): - authors_list = trait.authors.split(',') - if len(authors_list) > 6: - results_dict['authors_display'] = ", ".join( - authors_list[:6]) + ", et al." - else: - results_dict['authors_display'] = trait.authors - if bool(trait.pubmed_id): - if for_api: - results_dict['pubmed_id'] = trait.pubmed_id - results_dict['year'] = trait.pubmed_text - else: - results_dict['pubmed_link'] = trait.pubmed_link - results_dict['pubmed_text'] = trait.pubmed_text - try: - results_dict['lod_score'] = f"{float(trait.LRS_score_repr) / 4.61:.1f}" - except: - results_dict['lod_score'] = "N/A" - results_dict['lrs_location'] = trait.LRS_location_repr - if bool(trait.additive): - results_dict['additive'] = f"{float(trait.additive):.3f}" - results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}" - results_dict['num_overlap'] = trait.num_overlap - results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}" - else: - results_dict['location'] = trait.location_repr - results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}" - results_dict['num_overlap'] = trait.num_overlap - results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}" - - results_list.append(results_dict) - - return results_list - - -- cgit v1.2.3 From cfc738303e7ddd213919a0a15885d1e846277848 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 14 Jun 2021 00:52:10 +0300 Subject: remove print statements --- wqflask/utility/helper_functions.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py index 50e00421..27dd0729 100644 --- a/wqflask/utility/helper_functions.py +++ b/wqflask/utility/helper_functions.py @@ -21,15 +21,11 @@ def get_species_dataset_trait(self, start_vars): self.dataset = data_set.create_dataset(start_vars['dataset']) else: self.dataset = data_set.create_dataset(start_vars['dataset']) - logger.debug("After creating dataset") self.species = TheSpecies(dataset=self.dataset) - logger.debug("After creating species") self.this_trait = create_trait(dataset=self.dataset, name=start_vars['trait_id'], cellid=None, get_qtl_info=True) - logger.debug("After creating trait") - def get_trait_db_obs(self, trait_db_list): if isinstance(trait_db_list, str): -- cgit v1.2.3 From 8418cf1554c664130a16b0b2030d1ca7680bf81c Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 14 Jun 2021 00:54:02 +0300 Subject: sort sample name by sample_ids --- wqflask/base/data_set.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index d31161ec..181e83be 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -693,6 +693,9 @@ class DataSet: results = dict(g.db.execute(query).fetchall()) sample_ids = [results[item] for item in self.samplelist] + sorted_samplelist = [strain_name for strain_name, strain_id in sorted( + results.items(), key=lambda item: item[1])] + query = """SELECT * from ProbeSetData where StrainID in {} and id in (SELECT ProbeSetXRef.DataId @@ -702,9 +705,10 @@ class DataSet: and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids), self.name) query_results = list(g.db.execute(query).fetchall()) - data_results = self.chunk_dataset(query_results, len(sample_ids)) + self.samplelist = sorted_samplelist self.trait_data = data_results + def get_trait_data(self, sample_list=None): if sample_list: -- cgit v1.2.3 From d703e0ae5f3ef92efb026bfdcc9d1bfe2d296a34 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 14 Jun 2021 00:54:55 +0300 Subject: minor refactoring --- wqflask/wqflask/correlation/correlation_gn3_api.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index eb986655..fedc3146 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -146,11 +146,7 @@ def fetch_sample_data(start_vars, this_trait, this_dataset, target_dataset): sample_data = process_samples( start_vars, this_dataset.group.samplelist) - - # sample_data = test_process_data(this_trait,this_dataset,start_vars) - if target_dataset.type == "ProbeSet": - # pass target_dataset.get_probeset_data(list(sample_data.keys())) else: target_dataset.get_trait_data(list(sample_data.keys())) @@ -159,9 +155,6 @@ def fetch_sample_data(start_vars, this_trait, this_dataset, target_dataset): "trait_sample_data": sample_data, "trait_id": start_vars["trait_id"] } - - # should remove this len(samplelist) == len(strain_values) - results = map_shared_keys_to_values( target_dataset.samplelist, target_dataset.trait_data) -- cgit v1.2.3 From e6a08e0fe5a2382cef9c7b9a3b71e17dda6f784e Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 14 Jun 2021 01:15:11 +0300 Subject: add f1list and parlist to primary_samples --- wqflask/wqflask/correlation/correlation_gn3_api.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index fedc3146..30c05f03 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -146,6 +146,10 @@ def fetch_sample_data(start_vars, this_trait, this_dataset, target_dataset): sample_data = process_samples( start_vars, this_dataset.group.samplelist) + + sample_data = test_process_data(this_trait, this_dataset, start_vars) + + if target_dataset.type == "ProbeSet": target_dataset.get_probeset_data(list(sample_data.keys())) else: -- cgit v1.2.3 From b133635fadba19ab1017dc1739cf5ddca1a6bd08 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 14 Jun 2021 09:28:46 +0300 Subject: delete loggers and comments --- wqflask/base/data_set.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 181e83be..6dc44829 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -440,7 +440,6 @@ class DatasetGroup: # genotype_1 is Dataset Object without parents and f1 # genotype_2 is Dataset Object with parents and f1 (not for intercross) - # genotype_1 = reaper.Dataset() # reaper barfs on unicode filenames, so here we ensure it's a string if self.genofile: @@ -1116,7 +1115,6 @@ class MrnaAssayDataSet(DataSet): """ % (escape(str(this_trait.dataset.id)), escape(this_trait.name))) - # logger.debug("query is:", pf(query)) logger.sql(query) result = g.db.execute(query).fetchone() @@ -1171,7 +1169,6 @@ class MrnaAssayDataSet(DataSet): """ % (escape(trait), escape(self.name)) logger.sql(query) results = g.db.execute(query).fetchall() - # logger.debug("RETRIEVED RESULTS HERE:", results) return results def retrieve_genes(self, column_name): @@ -1212,7 +1209,6 @@ class TempDataSet(DataSet): def geno_mrna_confidentiality(ob): dataset_table = ob.type + "Freeze" - # logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table)) query = '''SELECT Id, Name, FullName, confidentiality, AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table, ob.name) -- cgit v1.2.3 From f7027d4c87786da0d505e0e8e1da23d6e55f6d80 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 17 Jun 2021 10:09:38 +0300 Subject: doc: rpy2-performance: Replace begin_export with begin_src --- doc/rpy2-performance.org | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/doc/rpy2-performance.org b/doc/rpy2-performance.org index 29b99ba0..ddcac81a 100644 --- a/doc/rpy2-performance.org +++ b/doc/rpy2-performance.org @@ -21,7 +21,7 @@ server: However, when loading the homepage, I occasionally ran into this trace: -#+begin_export ascii +#+begin_src DEBUG:wqflask.views:.check_access_permissions: @app.before_request check_access_permissions DEBUG:wqflask.views:.shutdown_session: remove db_session WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Error: ignoring SIGPIPE signal @@ -58,7 +58,7 @@ Possible actions: Selection: -#+end_export +#+end_src This blocks the flask service. Seems to be related to: [[https://github.com/rpy2/rpy2/issues/769][rpy2-issue#769]] and [[https://github.com/rpy2/rpy2/issues/809][rpy2-issue#809]]. I tried to reproduce this problem using some endpoint: @@ -80,7 +80,7 @@ and [[https://github.com/rpy2/rpy2/issues/809][rpy2-issue#809]]. I tried to repr which generates this trace: -#+begin_export ascii +#+begin_src /home/bonface/opt/python3-genenetwork2/lib/python3.8/site-packages/rpy2/rinterface.py:955: UserWarning: R is not initialized by the main thread. Its taking over SIGINT cannot be reversed here, and as a consequence the embedded R cannot be interrupted with Ctrl-C. @@ -89,7 +89,7 @@ which generates this trace: warnings.warn( DEBUG:wqflask.views:.shutdown_session: remove db_session -#+end_export +#+end_src Modifying the endpoint to: @@ -108,7 +108,7 @@ Modifying the endpoint to: and refreshing the page a couple of times, I get: -#+begin_export ascii +#+begin_src DEBUG:wqflask.views:.check_access_permissions: @app.before_request check_access_ permissions Sleeping for 3 seconds @@ -136,7 +136,7 @@ WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: library '/home/bonfa ce/R/x86_64-unknown-linux-gnu-library/4.0' contains no packages WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: -*** caught segfault *** +\*** caught segfault *** WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: address (nil), cause 'memory not mapped' @@ -151,9 +151,11 @@ Possible actions: Selection: [2021-06-16 13:11:00 +0300] [18657] [INFO] Handling signal: winch [2021-06-16 13:11:00 +0300] [18657] [INFO] Handling signal: winch [2021-06-16 13:13:02 +0300] [18657] [INFO] Handling signal: winch -#+end_export +#+end_src -However, this seems to be non-deterministic, in the sense that I can't really pin what causes the above. I've tried to write a Locust Test that simulates users hitting that endpoint: +However, this seems to be non-deterministic, in the sense that I can't +really pin what causes the above. I've tried to write a Locust Test +that simulates users hitting that endpoint: #+begin_src python """Load test a single trait page""" @@ -168,3 +170,4 @@ from locust import HttpUser, task, between """Fetch a single trait""" self.client.get("/test") #+end_src + -- cgit v1.2.3 From a992e2038424c2acdf11078356204847d128c8c6 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 17 Jun 2021 10:11:53 +0300 Subject: docs: rpy2-performance: Indicate version of python-rpy2 being used --- doc/rpy2-performance.org | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/rpy2-performance.org b/doc/rpy2-performance.org index ddcac81a..177c427e 100644 --- a/doc/rpy2-performance.org +++ b/doc/rpy2-performance.org @@ -1,10 +1,10 @@ * Python-Rpy2 performance issues with genenetwork2 At one point, genenetwork2 was down. A possible cause was that it -wrote into the log file in an infinite loop due to rpy2, so a solution -was to empty it. Currently, as a work around, rpy2 is disabled by -removing it's imports. This affects WGCNA/ CTL imports and commenting -out Biweight Midcorrelation option in the trait page. See: +wrote into the log file in an infinite loop due to rpy2(v3.4.4), so a +solution was to empty it. Currently, as a work around, rpy2 is +disabled by removing it's imports. This affects WGCNA/ CTL imports and +commenting out Biweight Midcorrelation option in the trait page. See: - [[https://github.com/genenetwork/genenetwork2/commit/1baf5f7611909c651483208184c5fbf7d4a7a088][1baf5f7]] - [[https://github.com/genenetwork/genenetwork2/commit/afee4d625248565857df98d3510f680ae6204864][afee4d6]] -- cgit v1.2.3 From a5981b8e71380b171b210d55da58e5037b455a6c Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 17 Jun 2021 10:20:39 +0300 Subject: doc: rpy2-performance: Add a section exploring possible solutions --- doc/rpy2-performance.org | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/rpy2-performance.org b/doc/rpy2-performance.org index 177c427e..8f917ca0 100644 --- a/doc/rpy2-performance.org +++ b/doc/rpy2-performance.org @@ -171,3 +171,12 @@ from locust import HttpUser, task, between self.client.get("/test") #+end_src + +** A possible solution + +From this [[https://github.com/rpy2/rpy2/issues/809#issuecomment-845923975][comment]], a possible reason for the above traces, is that +from Flask's end, a [[https://tldp.org/LDP/lpg/node20.html][SIGPIPE]] is somehow generated by our Python +code. However, at this particular point, the R thread just happens to +be running, and R can't handle this correctly. This seems to have been +fixed in this [[https://github.com/rpy2/rpy2/pull/810][PR]] with a this [[https://github.com/rpy2/rpy2/issues/809#issuecomment-851618215][explanation]]. On our end, to have these +changes, we have to update our python-rpy2 version. -- cgit v1.2.3 From 90a427fcc855910a812f3cee710ede335071768a Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 17 Jun 2021 19:43:35 +0000 Subject: Fixed issue that caused filtering by attribute values to not work if attribute values didn't exist for some samples --- wqflask/wqflask/static/new/javascript/show_trait.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/wqflask/wqflask/static/new/javascript/show_trait.js b/wqflask/wqflask/static/new/javascript/show_trait.js index 569046d3..77ef1720 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait.js +++ b/wqflask/wqflask/static/new/javascript/show_trait.js @@ -747,7 +747,11 @@ filter_by_value = function() { if (filter_column == "value" || filter_column == "stderr"){ var this_col_value = filter_val_nodes[i].childNodes[0].value; } else { - var this_col_value = filter_val_nodes[i].childNodes[0].data; + if (filter_val_nodes[i].childNodes[0] !== undefined){ + var this_col_value = filter_val_nodes[i].childNodes[0].data; + } else { + continue + } } let this_val_node = val_nodes[i].childNodes[0]; @@ -1700,4 +1704,4 @@ $('#normalize').click(edit_data_change); Number.prototype.countDecimals = function () { if(Math.floor(this.valueOf()) === this.valueOf()) return 0; return this.toString().split(".")[1].length || 0; -} \ No newline at end of file +} -- cgit v1.2.3 From e5a8fad251bff8160e49c6c9fea42b815488e6c3 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 17 Jun 2021 19:45:40 +0000 Subject: Added function for getting list of attributes with all numerical values (that can be used with the 'Filter by value' feature) --- wqflask/wqflask/show_trait/show_trait.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 9ee6a16d..c07430dd 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -177,10 +177,13 @@ class ShowTrait: sample_lists = [group.sample_list for group in self.sample_groups] categorical_var_list = [] + self.numerical_var_list = [] if not self.temp_trait: # ZS: Only using first samplelist, since I think mapping only uses those samples categorical_var_list = get_categorical_variables( self.this_trait, self.sample_groups[0]) + self.numerical_var_list = get_numerical_variables( + self.this_trait, self.sample_groups[0]) # ZS: Get list of chromosomes to select for mapping self.chr_list = [["All", -1]] @@ -694,6 +697,26 @@ def get_categorical_variables(this_trait, sample_list) -> list: return categorical_var_list +def get_numerical_variables(this_trait, sample_list) -> list: + numerical_var_list = [] + + if len(sample_list.attributes) > 0: + for attribute in sample_list.attributes: + all_numeric = True + all_none = True + for attr_val in sample_list.attributes[attribute].distinct_values: + if not attr_val: + continue + try: + val_as_float = float(attr_val) + all_none = False + except: + all_numeric = False + break + if all_numeric and not all_none: + numerical_var_list.append(sample_list.attributes[attribute].name) + + return numerical_var_list def get_genotype_scales(genofiles): geno_scales = {} -- cgit v1.2.3 From 2b52a4f16008a450386cf46d008452942c2d98e4 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 17 Jun 2021 19:46:12 +0000 Subject: Only show all-numerical attributes in the 'Filter samples by value' drop-down --- .../wqflask/templates/show_trait_transform_and_filter.html | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/wqflask/wqflask/templates/show_trait_transform_and_filter.html b/wqflask/wqflask/templates/show_trait_transform_and_filter.html index e3f5ef81..20f78b48 100644 --- a/wqflask/wqflask/templates/show_trait_transform_and_filter.html +++ b/wqflask/wqflask/templates/show_trait_transform_and_filter.html @@ -46,19 +46,17 @@ {% endif %}
- - {% if sample_groups[0].attributes %} + + {% if (numerical_var_list|length > 0) or js_data.se_exists %} {% endif %} @@ -116,4 +114,4 @@

Samples with no value (x) can be hidden by clickingHide No Value button.

- \ No newline at end of file + -- cgit v1.2.3 From 0e8b5ad4a5a7bf7eba5d5e24b38d5df26d6df395 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 17 Jun 2021 19:53:53 +0000 Subject: UCSC Genome Browser link was marked as a broken link, but it apparently works now so I added it back as a link --- wqflask/wqflask/templates/base.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wqflask/wqflask/templates/base.html b/wqflask/wqflask/templates/base.html index ddb1d272..578a0495 100644 --- a/wqflask/wqflask/templates/base.html +++ b/wqflask/wqflask/templates/base.html @@ -85,7 +85,7 @@
  • Variant Browser
  • Bayesian Network Webserver
  • Systems Genetics PheWAS
  • -
  • Genome Browser
  • +
  • Genome Browser
  • BXD Power Calculator
  • Interplanetary File System
  • -- cgit v1.2.3 From aefd88a9950592fb8cdc28cda43a2ca3c39e7f60 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 17 Jun 2021 19:56:34 +0000 Subject: The listserv link was marked as broken but is working, so I made it a link again, but the IRC channel link is broken so I marked it as broken --- wqflask/wqflask/templates/base.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wqflask/wqflask/templates/base.html b/wqflask/wqflask/templates/base.html index 578a0495..12dddf89 100644 --- a/wqflask/wqflask/templates/base.html +++ b/wqflask/wqflask/templates/base.html @@ -208,7 +208,7 @@ JOSS

    - Development and source code on github with issue tracker and documentation. Join the mailing list and find us on IRC (#genenetwork channel). + Development and source code on github with issue tracker and documentation. Join the mailing list and find us on IRC (#genenetwork channel). {% if version: %}

    GeneNetwork {{ version }}

    {% endif %} -- cgit v1.2.3 From fafce2f44087edf51756f0118054d1e3aa654273 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 18 Jun 2021 19:21:11 +0000 Subject: Re-enable bicor for correlations and fix issue where ro.Vector needed to be changed to ro.FloatVector --- wqflask/wqflask/correlation/show_corr_results.py | 30 ++++++++++++------------ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 2f3df67a..f1cf3733 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -22,7 +22,7 @@ import collections import json import scipy import numpy -# import rpy2.robjects as ro # R Objects +import rpy2.robjects as ro # R Objects import utility.logger import utility.webqtlUtil @@ -459,9 +459,9 @@ class CorrelationResults: if num_overlap > 5: # ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/ - # if self.corr_method == 'bicor': - # sample_r, sample_p = do_bicor( - # self.this_trait_vals, target_vals) + if self.corr_method == 'bicor': + sample_r, sample_p = do_bicor( + self.this_trait_vals, target_vals) if self.corr_method == 'pearson': sample_r, sample_p = scipy.stats.pearsonr( self.this_trait_vals, target_vals) @@ -487,22 +487,22 @@ class CorrelationResults: self.sample_data[str(sample)] = float(value) -# def do_bicor(this_trait_vals, target_trait_vals): -# r_library = ro.r["library"] # Map the library function -# r_options = ro.r["options"] # Map the options function +def do_bicor(this_trait_vals, target_trait_vals): + r_library = ro.r["library"] # Map the library function + r_options = ro.r["options"] # Map the options function -# r_library("WGCNA") -# r_bicor = ro.r["bicorAndPvalue"] # Map the bicorAndPvalue function + r_library("WGCNA") + r_bicor = ro.r["bicorAndPvalue"] # Map the bicorAndPvalue function -# r_options(stringsAsFactors=False) + r_options(stringsAsFactors=False) -# this_vals = ro.Vector(this_trait_vals) -# target_vals = ro.Vector(target_trait_vals) + this_vals = ro.FloatVector(this_trait_vals) + target_vals = ro.FloatVector(target_trait_vals) -# the_r, the_p, _fisher_transform, _the_t, _n_obs = [ -# numpy.asarray(x) for x in r_bicor(x=this_vals, y=target_vals)] + the_r, the_p, _fisher_transform, _the_t, _n_obs = [ + numpy.asarray(x) for x in r_bicor(x=this_vals, y=target_vals)] -# return the_r, the_p + return the_r, the_p def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_api=False): -- cgit v1.2.3 From df8476115e580fa5dfbf0e2e9a8f6e5e39ae7b99 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 18 Jun 2021 19:21:30 +0000 Subject: Reenable PCA for correlation matrix --- .../wqflask/correlation_matrix/show_corr_matrix.py | 124 +++++++++++---------- 1 file changed, 63 insertions(+), 61 deletions(-) diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index 9ac02ac5..e7b16e77 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -23,6 +23,9 @@ import math import random import string +import rpy2.robjects as ro +from rpy2.robjects.packages import importr + import numpy as np import scipy @@ -160,23 +163,22 @@ class CorrelationMatrix: for sample in self.all_sample_list: groups.append(1) - # Not doing PCA until rpy2 is excised self.pca_works = "False" - # try: - # corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) - # corr_eigen_value, corr_eigen_vectors = sortEigenVectors( - # corr_result_eigen) - - # if self.do_PCA == True: - # self.pca_works = "True" - # self.pca_trait_ids = [] - # pca = self.calculate_pca( - # list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) - # self.loadings_array = self.process_loadings() - # else: - # self.pca_works = "False" - # except: - # self.pca_works = "False" + try: + corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) + corr_eigen_value, corr_eigen_vectors = sortEigenVectors( + corr_result_eigen) + + if self.do_PCA == True: + self.pca_works = "True" + self.pca_trait_ids = [] + pca = self.calculate_pca( + list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) + self.loadings_array = self.process_loadings() + else: + self.pca_works = "False" + except: + self.pca_works = "False" self.js_data = dict(traits=[trait.name for trait in self.traits], groups=groups, @@ -185,51 +187,51 @@ class CorrelationMatrix: samples=self.all_sample_list, sample_data=self.sample_data,) - # def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): - # base = importr('base') - # stats = importr('stats') - - # corr_results_to_list = robjects.FloatVector( - # [item for sublist in self.pca_corr_results for item in sublist]) - - # m = robjects.r.matrix(corr_results_to_list, nrow=len(cols)) - # eigen = base.eigen(m) - # pca = stats.princomp(m, cor="TRUE") - # self.loadings = pca.rx('loadings') - # self.scores = pca.rx('scores') - # self.scale = pca.rx('scale') - - # trait_array = zScore(self.trait_data_array) - # trait_array_vectors = np.dot(corr_eigen_vectors, trait_array) - - # pca_traits = [] - # for i, vector in enumerate(trait_array_vectors): - # # ZS: Check if below check is necessary - # # if corr_eigen_value[i-1] > 100.0/len(self.trait_list): - # pca_traits.append((vector * -1.0).tolist()) - - # this_group_name = self.trait_list[0][1].group.name - # temp_dataset = data_set.create_dataset( - # dataset_name="Temp", dataset_type="Temp", group_name=this_group_name) - # temp_dataset.group.get_samplelist() - # for i, pca_trait in enumerate(pca_traits): - # trait_id = "PCA" + str(i + 1) + "_" + temp_dataset.group.species + "_" + \ - # this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") - # this_vals_string = "" - # position = 0 - # for sample in temp_dataset.group.all_samples_ordered(): - # if sample in self.shared_samples_list: - # this_vals_string += str(pca_trait[position]) - # this_vals_string += " " - # position += 1 - # else: - # this_vals_string += "x " - # this_vals_string = this_vals_string[:-1] - - # Redis.set(trait_id, this_vals_string, ex=THIRTY_DAYS) - # self.pca_trait_ids.append(trait_id) - - # return pca + def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): + base = importr('base') + stats = importr('stats') + + corr_results_to_list = ro.FloatVector( + [item for sublist in self.pca_corr_results for item in sublist]) + + m = ro.r.matrix(corr_results_to_list, nrow=len(cols)) + eigen = base.eigen(m) + pca = stats.princomp(m, cor="TRUE") + self.loadings = pca.rx('loadings') + self.scores = pca.rx('scores') + self.scale = pca.rx('scale') + + trait_array = zScore(self.trait_data_array) + trait_array_vectors = np.dot(corr_eigen_vectors, trait_array) + + pca_traits = [] + for i, vector in enumerate(trait_array_vectors): + # ZS: Check if below check is necessary + # if corr_eigen_value[i-1] > 100.0/len(self.trait_list): + pca_traits.append((vector * -1.0).tolist()) + + this_group_name = self.trait_list[0][1].group.name + temp_dataset = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=this_group_name) + temp_dataset.group.get_samplelist() + for i, pca_trait in enumerate(pca_traits): + trait_id = "PCA" + str(i + 1) + "_" + temp_dataset.group.species + "_" + \ + this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") + this_vals_string = "" + position = 0 + for sample in temp_dataset.group.all_samples_ordered(): + if sample in self.shared_samples_list: + this_vals_string += str(pca_trait[position]) + this_vals_string += " " + position += 1 + else: + this_vals_string += "x " + this_vals_string = this_vals_string[:-1] + + Redis.set(trait_id, this_vals_string, ex=THIRTY_DAYS) + self.pca_trait_ids.append(trait_id) + + return pca def process_loadings(self): loadings_array = [] -- cgit v1.2.3 From f314728334fde0677ea515b5910db86086fdd5ef Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 18 Jun 2021 19:21:54 +0000 Subject: Re-enable bicor as an option in the trait page templatee --- wqflask/wqflask/templates/show_trait_calculate_correlations.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wqflask/wqflask/templates/show_trait_calculate_correlations.html b/wqflask/wqflask/templates/show_trait_calculate_correlations.html index e623a968..59f9b47c 100644 --- a/wqflask/wqflask/templates/show_trait_calculate_correlations.html +++ b/wqflask/wqflask/templates/show_trait_calculate_correlations.html @@ -70,7 +70,7 @@ -- cgit v1.2.3 From 31ad8698fe69da8d13c7a67cbf7e7ddeda67a734 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 5 Jan 2021 16:10:25 -0600 Subject: Added Scroller to mapping results table --- wqflask/wqflask/templates/mapping_results.html | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/wqflask/wqflask/templates/mapping_results.html b/wqflask/wqflask/templates/mapping_results.html index d6fc6e37..35d8a157 100644 --- a/wqflask/wqflask/templates/mapping_results.html +++ b/wqflask/wqflask/templates/mapping_results.html @@ -357,7 +357,9 @@ {% endif %} - + + + @@ -409,13 +411,12 @@ "info": "Showing from _START_ to _END_ of " + js_data.total_markers + " records", }, "order": [[1, "asc" ]], - "sDom": "iRZtir", - "iDisplayLength": -1, - "autoWidth": false, - "deferRender": true, + "sDom": "itir", + "autoWidth": true, "bSortClasses": false, - "scrollCollapse": false, - "paging": false + "scrollY": "100vh", + "scroller": true, + "scrollCollapse": true } ); {% elif selectedChr != -1 and plotScale =="physic" and (dataset.group.species == 'mouse' or dataset.group.species == 'rat') %} $('#trait_table').dataTable( { -- cgit v1.2.3 From 116f911561dc81565dc0f77c12e901c0d53de4e5 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 5 Jan 2021 16:10:45 -0600 Subject: Changed limit from 2000 markers to 10000 markers for the number to show in mapping results table --- wqflask/wqflask/marker_regression/run_mapping.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py index c5b980a7..f601201b 100644 --- a/wqflask/wqflask/marker_regression/run_mapping.py +++ b/wqflask/wqflask/marker_regression/run_mapping.py @@ -673,9 +673,9 @@ def trim_markers_for_table(markers): sorted_markers = sorted( markers, key=lambda k: k['lrs_value'], reverse=True) - # ZS: So we end up with a list of just 2000 markers - if len(sorted_markers) >= 2000: - trimmed_sorted_markers = sorted_markers[:2000] + #ZS: So we end up with a list of just 2000 markers + if len(sorted_markers) >= 10000: + trimmed_sorted_markers = sorted_markers[:10000] return trimmed_sorted_markers else: return sorted_markers -- cgit v1.2.3 From d6937e74b85a4fc44153530520774836eed60fe6 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 28 Jun 2021 18:31:29 +0000 Subject: Added doc for creating guix profile, written by Bonface --- doc/guix_profile_setup.org | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 doc/guix_profile_setup.org diff --git a/doc/guix_profile_setup.org b/doc/guix_profile_setup.org new file mode 100644 index 00000000..c397377c --- /dev/null +++ b/doc/guix_profile_setup.org @@ -0,0 +1,39 @@ +* Setting up GUIX profile for GN + +First create a guix profile with the latest packages: + +: ~/opt/guix/bin/guix pull + +This will create a profile with the latest packages under`~/.config/guix/current` + +Now you have the latest guix. Check: `$HOME/.config/guix/current/bin/guix --version` + +At this point, it's worth mentioning that installing +python3-genenetwork using `$HOME/.config/guix/current/bin/guix` should +work; but let's use the dev version(since that may come handy in +time), and it's a nice thing to know. + +Next, we ensure that the appropriate GUILEPATHS are set: + +: export GUILE_LOAD_PATH=$HOME/.config/guix/current/share/guile/site/3.0/ +: export GUILE_LOAD_COMPILED_PATH=$HOME/.config/guix/current/lib/guile/3.0/site-ccache/ + +Get into the container: + +: $HOME/.config/guix/current/bin/guix environment -C guix --ad-hoc bash gcc-toolchain +: ./bootstrap +: ./configure --localstatedir=/var --sysconfdir=/etc + +Check that everything works: + +: make check + +Clean up and build: + +: make clean-go +: make -j 4 +: exit + +Install Python3 (substitute paths when necessary): + +: env GUIX_PACKAGE_PATH='/home/zas1024/guix-bioinformatics:/home/zas1024/guix-past/modules' $HOME/.config/guix/current/bin/guix install python3-genenetwork2 -p ~/opt/python3-genenetwork2 --substitute-urls="http://guix.genenetwork.org https://berlin.guixsd.org https://ci.guix.gnu.org https://mirror.hydra.gnu.org" -- cgit v1.2.3 From bab4e72f657eef01934dc7a1645dce6d3035c3fd Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 1 Jul 2021 18:41:45 +0000 Subject: Fixed bug where there'd be an error if the max boot count is 0 for a chromosome --- wqflask/wqflask/marker_regression/display_mapping_results.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py index ec17d3b0..2d74ea52 100644 --- a/wqflask/wqflask/marker_regression/display_mapping_results.py +++ b/wqflask/wqflask/marker_regression/display_mapping_results.py @@ -861,6 +861,9 @@ class DisplayMappingResults: (item[1], yZero - item[2] * bootHeightThresh / maxBootCount)), fill=self.BOOTSTRAP_BOX_COLOR, outline=BLACK) + if maxBootCount == 0: + return + # draw boot scale highestPercent = (maxBootCount * 100.0) / nboot bootScale = Plot.detScale(0, highestPercent) -- cgit v1.2.3 From 323c52ff746dab78a2edc343958a81e0ac1b22a5 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 1 Jul 2021 20:59:47 +0000 Subject: Fixed issue where zooming into chromosome 1 was causing the Y axis scaling to be bad --- .../marker_regression/display_mapping_results.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py index 2d74ea52..dfae4065 100644 --- a/wqflask/wqflask/marker_regression/display_mapping_results.py +++ b/wqflask/wqflask/marker_regression/display_mapping_results.py @@ -2289,20 +2289,9 @@ class DisplayMappingResults: font=VERDANA_FILE, size=int(18 * zoom * 1.5)) yZero = yTopOffset + plotHeight - # LRSHeightThresh = drawAreaHeight - # AdditiveHeightThresh = drawAreaHeight/2 - # DominanceHeightThresh = drawAreaHeight/2 - if self.selectedChr == 1: - LRSHeightThresh = drawAreaHeight - yTopOffset + 30 * (zoom - 1) - AdditiveHeightThresh = LRSHeightThresh / 2 - DominanceHeightThresh = LRSHeightThresh / 2 - else: - LRSHeightThresh = drawAreaHeight - AdditiveHeightThresh = drawAreaHeight / 2 - DominanceHeightThresh = drawAreaHeight / 2 - # LRSHeightThresh = (yZero - yTopOffset + 30*(zoom - 1)) - # AdditiveHeightThresh = LRSHeightThresh/2 - # DominanceHeightThresh = LRSHeightThresh/2 + LRSHeightThresh = drawAreaHeight + AdditiveHeightThresh = drawAreaHeight / 2 + DominanceHeightThresh = drawAreaHeight / 2 if LRS_LOD_Max > 100: LRSScale = 20.0 -- cgit v1.2.3 From d4c990beaf72dd885d6baa7dc1035a7044c79cdd Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 1 Jul 2021 21:28:10 +0000 Subject: Fixed issue where the interval map QTL line might extend beyond the graph edge when zoomed into a Mb range + removed some commented out code --- .../marker_regression/display_mapping_results.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py index dfae4065..1fcc2832 100644 --- a/wqflask/wqflask/marker_regression/display_mapping_results.py +++ b/wqflask/wqflask/marker_regression/display_mapping_results.py @@ -2561,7 +2561,10 @@ class DisplayMappingResults: Xc = startPosX + ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) * ( ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) / ((qtlresult['Mb'] - start_cm - startMb + self.GraphInterval) * plotXScale)) else: - Xc = startPosX + (qtlresult['Mb'] - startMb) * plotXScale + if qtlresult['Mb'] > endMb: + Xc = startPosX + endMb * plotXScale + else: + Xc = startPosX + (qtlresult['Mb'] - startMb) * plotXScale # updated by NL 06-18-2011: # fix the over limit LRS graph issue since genotype trait may give infinite LRS; @@ -2572,36 +2575,29 @@ class DisplayMappingResults: if 'lrs_value' in qtlresult: if self.LRS_LOD == "LOD" or self.LRS_LOD == "-logP": if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value'] == 'inf': - #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) Yc = yZero - webqtlConfig.MAXLRS * \ LRSHeightThresh / \ (LRS_LOD_Max * self.LODFACTOR) else: - #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) Yc = yZero - \ qtlresult['lrs_value'] * LRSHeightThresh / \ (LRS_LOD_Max * self.LODFACTOR) else: if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value'] == 'inf': - #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] Yc = yZero - webqtlConfig.MAXLRS * LRSHeightThresh / LRS_LOD_Max else: - #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRSAxisList[-1] Yc = yZero - \ qtlresult['lrs_value'] * \ LRSHeightThresh / LRS_LOD_Max else: if qtlresult['lod_score'] > 100 or qtlresult['lod_score'] == 'inf': - #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] Yc = yZero - webqtlConfig.MAXLRS * LRSHeightThresh / LRS_LOD_Max else: if self.LRS_LOD == "LRS": - #Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRSAxisList[-1] Yc = yZero - \ qtlresult['lod_score'] * self.LODFACTOR * \ LRSHeightThresh / LRS_LOD_Max else: - #Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRSAxisList[-1] Yc = yZero - \ qtlresult['lod_score'] * \ LRSHeightThresh / LRS_LOD_Max @@ -2634,14 +2630,12 @@ class DisplayMappingResults: AdditiveHeightThresh / additiveMax AdditiveCoordXY.append((Xc, Yc)) + if qtlresult['Mb'] > endMb: + break + m += 1 if self.manhattan_plot != True: - # im_drawer.polygon( - # xy=LRSCoordXY, - # outline=thisLRSColor - # #, closed=0, edgeWidth=lrsEdgeWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth) - # ) draw_open_polygon(canvas, xy=LRSCoordXY, outline=thisLRSColor, width=lrsEdgeWidth) -- cgit v1.2.3 From 74c1d6a6ef070271adaf486fc9a494662855d96f Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 1 Jul 2021 21:40:24 +0000 Subject: Fixed issue with suggestive/significant lines extending beyond figure edge + fixed a couple issues with the last commit --- wqflask/wqflask/marker_regression/display_mapping_results.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py index 1fcc2832..f941267e 100644 --- a/wqflask/wqflask/marker_regression/display_mapping_results.py +++ b/wqflask/wqflask/marker_regression/display_mapping_results.py @@ -2372,8 +2372,7 @@ class DisplayMappingResults: # ZS: I don't know if what I did here with this inner function is clever or overly complicated, but it's the only way I could think of to avoid duplicating the code inside this function def add_suggestive_significant_lines_and_legend(start_pos_x, chr_length_dist): - rightEdge = int(start_pos_x + chr_length_dist * \ - plotXScale - self.SUGGESTIVE_WIDTH / 1.5) + rightEdge = xLeftOffset + plotWidth im_drawer.line( xy=((start_pos_x + self.SUGGESTIVE_WIDTH / 1.5, suggestiveY), (rightEdge, suggestiveY)), @@ -2561,7 +2560,7 @@ class DisplayMappingResults: Xc = startPosX + ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) * ( ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) / ((qtlresult['Mb'] - start_cm - startMb + self.GraphInterval) * plotXScale)) else: - if qtlresult['Mb'] > endMb: + if self.selectedChr != -1 and qtlresult['Mb'] > endMb: Xc = startPosX + endMb * plotXScale else: Xc = startPosX + (qtlresult['Mb'] - startMb) * plotXScale @@ -2630,7 +2629,7 @@ class DisplayMappingResults: AdditiveHeightThresh / additiveMax AdditiveCoordXY.append((Xc, Yc)) - if qtlresult['Mb'] > endMb: + if self.selectedChr != -1 and qtlresult['Mb'] > endMb: break m += 1 -- cgit v1.2.3 From bc2869179f2483d9ad5995d3abb0c9dbc1024acd Mon Sep 17 00:00:00 2001 From: zsloan Date: Sat, 3 Jul 2021 20:44:41 +0000 Subject: Increased width of mapping options div and covariates window --- wqflask/wqflask/static/new/css/show_trait.css | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/wqflask/wqflask/static/new/css/show_trait.css b/wqflask/wqflask/static/new/css/show_trait.css index 27404801..782dabc2 100644 --- a/wqflask/wqflask/static/new/css/show_trait.css +++ b/wqflask/wqflask/static/new/css/show_trait.css @@ -159,10 +159,10 @@ div.normalize-div { } div.mapping-main { - min-width: 1200px; + min-width: 1400px; } div.mapping-options { - min-width: 500px; + min-width: 700px; } div.covar-options { @@ -194,7 +194,7 @@ div.select-covar-div { .selected-covariates { overflow-y: scroll; resize: none; - width: 200px; + width: 400px; } .cofactor-input { @@ -259,4 +259,4 @@ input.trait-value-input { div.inline-div { display: inline; -} \ No newline at end of file +} -- cgit v1.2.3