From a7f2e8dea0a84a70aec29159a9ae35bd55ba047d Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Thu, 17 Jun 2021 08:52:17 +0300
Subject: doc: Add results of investing rpy2 blocking issue in gn2

---
 doc/rpy2-performance.org | 170 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 170 insertions(+)
 create mode 100644 doc/rpy2-performance.org

diff --git a/doc/rpy2-performance.org b/doc/rpy2-performance.org
new file mode 100644
index 00000000..29b99ba0
--- /dev/null
+++ b/doc/rpy2-performance.org
@@ -0,0 +1,170 @@
+* Python-Rpy2 performance issues with genenetwork2
+
+At one point, genenetwork2 was down. A possible cause was that it
+wrote into the log file in an infinite loop due to rpy2, so a solution
+was to empty it. Currently, as a work around, rpy2 is disabled by
+removing it's imports. This affects WGCNA/ CTL imports and commenting
+out Biweight Midcorrelation option in the trait page. See:
+
+- [[https://github.com/genenetwork/genenetwork2/commit/1baf5f7611909c651483208184c5fbf7d4a7a088][1baf5f7]]
+- [[https://github.com/genenetwork/genenetwork2/commit/afee4d625248565857df98d3510f680ae6204864][afee4d6]]
+- [[https://github.com/genenetwork/genenetwork2/commit/c458bf0ad731e5e5fd9cbd0686936b3a441bae63][c458bf0]]
+- [[https://github.com/genenetwork/genenetwork2/commit/d31f3f763471b19559ca74e73b52b3cb5e7153ce][d31f3f7]]
+
+** Reproducing the problem
+
+I went back to commit #b8408cea. With regards to logs, I never
+experienced any log issue. Perhaps it's because of how I start my
+server:
+
+: env SERVER_PORT=5004 TMPDIR=/home/bonface/tmp WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG GENENETWORK_FILES=/home/bonface/data/genotype_files/ GN2_PROFILE=/home/bonface/opt/python3-genenetwork2 ./scripts/run_debug.sh
+
+However, when loading the homepage, I occasionally ran into this trace:
+
+#+begin_export ascii
+DEBUG:wqflask.views:.check_access_permissions: @app.before_request check_access_permissions
+DEBUG:wqflask.views:.shutdown_session: remove db_session
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Error: ignoring SIGPIPE signal
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: In addition:
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Warning messages:
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: 1:
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]:
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]:  library '/home/bonface/R/x86_64-unknown-linux-gnu-library/4.0' contains no packages
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: 2:
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]:
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]:  library '/home/bonface/R/x86_64-unknown-linux-gnu-library/4.0' contains no packages
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Fatal error: unable to initialize the JIT
+
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]:
+ *** caught segfault ***
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: address (nil), cause 'memory not mapped'
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]:
+Possible actions:
+1: abort (with core dump, if enabled)
+2: normal R exit
+3: exit R without saving workspace
+4: exit R saving workspace
+
+Selection:
+
+#+end_export
+
+This blocks the flask service. Seems to be related to: [[https://github.com/rpy2/rpy2/issues/769][rpy2-issue#769]]
+and [[https://github.com/rpy2/rpy2/issues/809][rpy2-issue#809]]. I tried to reproduce this problem using some endpoint:
+
+#+begin_src python
+@app.route("/test")
+  def test():
+      from rpy2 import robjects as ro
+      from rpy2 import rinterface
+      from threading import Thread
+
+      def rpy2_init_simple():
+          rinterface.initr_simple()
+
+      thread = Thread(target=rpy2_init_simple)
+      thread.start()
+      return "This is a test after importing rpy2"
+#+end_src
+
+which generates this trace:
+
+#+begin_export ascii
+/home/bonface/opt/python3-genenetwork2/lib/python3.8/site-packages/rpy2/rinterface.py:955: UserWarning: R is not initialized by the main thread.
+              Its taking over SIGINT cannot be reversed here, and as a
+              consequence the embedded R cannot be interrupted with Ctrl-C.
+              Consider (re)setting the signal handler of your choice from
+              the main thread.
+warnings.warn(
+DEBUG:wqflask.views:.shutdown_session: remove db_session
+
+#+end_export
+
+Modifying the endpoint to:
+
+#+begin_src python
+@app.route("/test")
+  def test():
+      import wqflask.correlation.show_corr_results
+      import wqflask.ctl.ctl_analysis
+      import time
+      from wqflask.correlation.correlation_functions import cal_zero_order_corr_for_tiss
+
+      print("Sleeping for 3 seconds")
+      time.sleep(3)
+      return "This is a test after importing rpy2"
+#+end_src
+
+and refreshing the page a couple of times, I get:
+
+#+begin_export ascii
+DEBUG:wqflask.views:.check_access_permissions: @app.before_request check_access_
+permissions
+Sleeping for 3 seconds
+DEBUG:wqflask.views:.shutdown_session: remove db_session
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Error: ignoring SIGPI
+PE signal
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: In addition:
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Warning messages:
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: 1:
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: In (function (package
+, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]:
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]:  library '/home/bonfa
+ce/R/x86_64-unknown-linux-gnu-library/4.0' contains no packages
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: 2:
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: In (function (package
+, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]:
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]:  library '/home/bonfa
+ce/R/x86_64-unknown-linux-gnu-library/4.0' contains no packages
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]:
+*** caught segfault ***
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: address (nil), cause
+'memory not mapped'
+
+WARNING:rpy2.rinterface_lib.callbacks:R[write to console]:
+Possible actions:
+1: abort (with core dump, if enabled)
+2: normal R exit
+3: exit R without saving workspace
+4: exit R saving workspace
+
+Selection: [2021-06-16 13:11:00 +0300] [18657] [INFO] Handling signal: winch
+[2021-06-16 13:11:00 +0300] [18657] [INFO] Handling signal: winch
+[2021-06-16 13:13:02 +0300] [18657] [INFO] Handling signal: winch
+#+end_export
+
+However, this seems to be non-deterministic, in the sense that I can't really pin what causes the above. I've tried to write a Locust Test that simulates users hitting that endpoint:
+
+#+begin_src python
+"""Load test a single trait page"""
+from locust import HttpUser, task, between
+
+
+  class LoadTest(HttpUser):
+      wait_time = between(1, 2.5)
+
+      @task
+      def fetch_trait(self):
+          """Fetch a single trait"""
+          self.client.get("/test")
+#+end_src
-- 
cgit v1.2.3


From 7cbf9f75ce3e7655f93dd5c3b975ae4430d567fb Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 24 May 2021 16:35:11 +0300
Subject: add query for fetching probeset data

---
 wqflask/base/data_set.py                           | 258 +++++++++++++--------
 wqflask/wqflask/correlation/correlation_gn3_api.py | 144 +++++++++++-
 2 files changed, 303 insertions(+), 99 deletions(-)

diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 75ddf278..10f0e110 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -166,7 +166,6 @@ class DatasetType:
         if t in ['pheno', 'other_pheno']:
             group_name = name.replace("Publish", "")
 
-
         results = g.db.execute(sql_query_mapping[t] % group_name).fetchone()
         if results:
             self.datasets[name] = dataset_name_mapping[t]
@@ -278,7 +277,7 @@ class Markers:
             filtered_markers = []
             for marker in self.markers:
                 if marker['name'] in p_values:
-                    #logger.debug("marker {} IS in p_values".format(i))
+                    # logger.debug("marker {} IS in p_values".format(i))
                     marker['p_value'] = p_values[marker['name']]
                     if math.isnan(marker['p_value']) or (marker['p_value'] <= 0):
                         marker['lod_score'] = 0
@@ -299,7 +298,7 @@ class HumanMarkers(Markers):
         self.markers = []
         for line in marker_data_fh:
             splat = line.strip().split()
-            #logger.debug("splat:", splat)
+            # logger.debug("splat:", splat)
             if len(specified_markers) > 0:
                 if splat[1] in specified_markers:
                     marker = {}
@@ -441,7 +440,7 @@ class DatasetGroup:
         # genotype_1 is Dataset Object without parents and f1
         # genotype_2 is Dataset Object with parents and f1 (not for intercross)
 
-        #genotype_1 = reaper.Dataset()
+        # genotype_1 = reaper.Dataset()
 
         # reaper barfs on unicode filenames, so here we ensure it's a string
         if self.genofile:
@@ -650,9 +649,39 @@ class DataSet:
 
 
-    def get_trait_data(self, sample_list=None):
+
+    def chunk_dataset(self, dataset, n):
+
+
+        results = {}
+
+        query = """
+                SELECT ProbeSetXRef.DataId,ProbeSet.Name
+                FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze
+                WHERE ProbeSetFreeze.Name = '{}' AND
+                      ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+                      ProbeSetXRef.ProbeSetId = ProbeSet.Id
+        """.format(self.name)
+
+        # should cache this
+
+        traits_name_dict= dict(g.db.execute(query).fetchall())
+
+
+
+
+        for i in range(0, len(dataset), n):
+            matrix = list(dataset[i:i + n])
+            trait_name = traits_name_dict[matrix[0][0]]
+
+            my_values = [value for (trait_name, strain, value) in matrix]
+            results[trait_name] = my_values
+        return results
+
+    def get_probeset_data(self, sample_list=None, trait_ids=None):
         if sample_list:
             self.samplelist = sample_list
+
         else:
             self.samplelist = self.group.samplelist
 
@@ -666,27 +695,59 @@ class DataSet:
             and Strain.SpeciesId=Species.Id
             and Species.name = '{}'
             """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
-        logger.sql(query)
         results = dict(g.db.execute(query).fetchall())
         sample_ids = [results[item] for item in self.samplelist]
 
+        query = """SELECT * from ProbeSetData
+                where StrainID in {}
+                and id in (SELECT ProbeSetXRef.DataId
+                FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
+                WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
+                and ProbeSetFreeze.Name = '{}'
+                and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids),self.name)
+
+        query_results=list(g.db.execute(query).fetchall())
+
+        data_results=self.chunk_dataset(query_results, len(sample_ids))
+        self.trait_data=data_results
+
+    def get_trait_data(self, sample_list=None):
+        if sample_list:
+            self.samplelist=sample_list
+        else:
+            self.samplelist=self.group.samplelist
+
+        if self.group.parlist != None and self.group.f1list != None:
+            if (self.group.parlist + self.group.f1list) in self.samplelist:
+                self.samplelist += self.group.parlist + self.group.f1list
+
+        query="""
+            SELECT Strain.Name, Strain.Id FROM Strain, Species
+            WHERE Strain.Name IN {}
+            and Strain.SpeciesId=Species.Id
+            and Species.name = '{}'
+            """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
+        logger.sql(query)
+        results=dict(g.db.execute(query).fetchall())
+        sample_ids=[results[item] for item in self.samplelist]
+
         # MySQL limits the number of tables that can be used in a join to 61,
         # so we break the sample ids into smaller chunks
         # Postgres doesn't have that limit, so we can get rid of this after we transition
-        chunk_size = 50
-        number_chunks = int(math.ceil(len(sample_ids) / chunk_size))
-        trait_sample_data = []
+        chunk_size=50
+        number_chunks=int(math.ceil(len(sample_ids) / chunk_size))
+        trait_sample_data=[]
         for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks):
             if self.type == "Publish":
-                dataset_type = "Phenotype"
+                dataset_type="Phenotype"
             else:
-                dataset_type = self.type
-            temp = ['T%s.value' % item for item in sample_ids_step]
+                dataset_type=self.type
+            temp=['T%s.value' % item for item in sample_ids_step]
             if self.type == "Publish":
-                query = "SELECT {}XRef.Id,".format(escape(self.type))
+                query="SELECT {}XRef.Id,".format(escape(self.type))
             else:
-                query = "SELECT {}.Name,".format(escape(dataset_type))
-            data_start_pos = 1
+                query="SELECT {}.Name,".format(escape(dataset_type))
+            data_start_pos=1
             query += ', '.join(temp)
             query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(dataset_type,
                                                                      self.type,
@@ -715,27 +776,27 @@ class DataSet:
                         """.format(*mescape(self.type, self.type, self.type, self.type,
                                             self.name, dataset_type, self.type, self.type, dataset_type))
 
-            results = g.db.execute(query).fetchall()
+            results=g.db.execute(query).fetchall()
             trait_sample_data.append(results)
 
-        trait_count = len(trait_sample_data[0])
-        self.trait_data = collections.defaultdict(list)
+        trait_count=len(trait_sample_data[0])
+        self.trait_data=collections.defaultdict(list)
 
         # put all of the separate data together into a dictionary where the keys are
         # trait names and values are lists of sample values
         for trait_counter in range(trait_count):
-            trait_name = trait_sample_data[0][trait_counter][0]
+            trait_name=trait_sample_data[0][trait_counter][0]
             for chunk_counter in range(int(number_chunks)):
                 self.trait_data[trait_name] += (
                     trait_sample_data[chunk_counter][trait_counter][data_start_pos:])
 
 
 class PhenotypeDataSet(DataSet):
-    DS_NAME_MAP['Publish'] = 'PhenotypeDataSet'
+    DS_NAME_MAP['Publish']='PhenotypeDataSet'
 
     def setup(self):
         # Fields in the database table
-        self.search_fields = ['Phenotype.Post_publication_description',
+        self.search_fields=['Phenotype.Post_publication_description',
                               'Phenotype.Pre_publication_description',
                               'Phenotype.Pre_publication_abbreviation',
                               'Phenotype.Post_publication_abbreviation',
@@ -748,7 +809,7 @@ class PhenotypeDataSet(DataSet):
                               'PublishXRef.Id']
 
         # Figure out what display_fields is
-        self.display_fields = ['name', 'group_code',
+        self.display_fields=['name', 'group_code',
                                'pubmed_id',
                                'pre_publication_description',
                                'post_publication_description',
@@ -766,7 +827,7 @@ class PhenotypeDataSet(DataSet):
                                'sequence', 'units', 'comments']
 
         # Fields displayed in the search results table header
-        self.header_fields = ['Index',
+        self.header_fields=['Index',
                               'Record',
                               'Description',
                               'Authors',
@@ -775,9 +836,9 @@ class PhenotypeDataSet(DataSet):
                               'Max LRS Location',
                               'Additive Effect']
 
-        self.type = 'Publish'
+        self.type='Publish'
 
-        self.query_for_group = '''
+        self.query_for_group='''
                             SELECT
                                     InbredSet.Name, InbredSet.Id, InbredSet.GeneticType
                             FROM
@@ -797,13 +858,13 @@ class PhenotypeDataSet(DataSet):
             if not this_trait.haveinfo:
                 this_trait.retrieve_info(get_qtl_info=True)
 
-            description = this_trait.post_publication_description
+            description=this_trait.post_publication_description
 
             # If the dataset is confidential and the user has access to confidential
             # phenotype traits, then display the pre-publication description instead
             # of the post-publication description
             if this_trait.confidential:
-                this_trait.description_display = ""
+                this_trait.description_display=""
                 continue   # for now, because no authorization features
 
                 if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(
@@ -811,46 +872,46 @@ class PhenotypeDataSet(DataSet):
                         userName=self.userName,
                         authorized_users=this_trait.authorized_users):
 
-                    description = this_trait.pre_publication_description
+                    description=this_trait.pre_publication_description
 
             if len(description) > 0:
-                this_trait.description_display = description.strip()
+                this_trait.description_display=description.strip()
             else:
-                this_trait.description_display = ""
+                this_trait.description_display=""
 
             if not this_trait.year.isdigit():
-                this_trait.pubmed_text = "N/A"
+                this_trait.pubmed_text="N/A"
             else:
-                this_trait.pubmed_text = this_trait.year
+                this_trait.pubmed_text=this_trait.year
 
             if this_trait.pubmed_id:
-                this_trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id
+                this_trait.pubmed_link=webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id
 
             # LRS and its location
-            this_trait.LRS_score_repr = "N/A"
-            this_trait.LRS_location_repr = "N/A"
+            this_trait.LRS_score_repr="N/A"
+            this_trait.LRS_location_repr="N/A"
 
             if this_trait.lrs:
-                query = """
+                query="""
                     select Geno.Chr, Geno.Mb from Geno, Species
                     where Species.Name = '%s' and
                         Geno.Name = '%s' and
                         Geno.SpeciesId = Species.Id
                 """ % (species, this_trait.locus)
                 logger.sql(query)
-                result = g.db.execute(query).fetchone()
+                result=g.db.execute(query).fetchone()
 
                 if result:
                     if result[0] and result[1]:
-                        LRS_Chr = result[0]
-                        LRS_Mb = result[1]
+                        LRS_Chr=result[0]
+                        LRS_Mb=result[1]
 
-                        this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
-                        this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (
+                        this_trait.LRS_score_repr=LRS_score_repr='%3.1f' % this_trait.lrs
+                        this_trait.LRS_location_repr=LRS_location_repr='Chr%s: %.6f' % (
                             LRS_Chr, float(LRS_Mb))
 
     def retrieve_sample_data(self, trait):
-        query = """
+        query="""
                     SELECT
                             Strain.Name, PublishData.value, PublishSE.error, NStrain.count, Strain.Name2
                     FROM
@@ -868,34 +929,34 @@ class PhenotypeDataSet(DataSet):
                             Strain.Name
                     """
         logger.sql(query)
-        results = g.db.execute(query, (trait, self.id)).fetchall()
+        results=g.db.execute(query, (trait, self.id)).fetchall()
         return results
 
 
 class GenotypeDataSet(DataSet):
-    DS_NAME_MAP['Geno'] = 'GenotypeDataSet'
+    DS_NAME_MAP['Geno']='GenotypeDataSet'
 
     def setup(self):
         # Fields in the database table
-        self.search_fields = ['Name',
+        self.search_fields=['Name',
                               'Chr']
 
         # Find out what display_fields is
-        self.display_fields = ['name',
+        self.display_fields=['name',
                                'chr',
                                'mb',
                                'source2',
                                'sequence']
 
         # Fields displayed in the search results table header
-        self.header_fields = ['Index',
+        self.header_fields=['Index',
                               'ID',
                               'Location']
 
         # Todo: Obsolete or rename this field
-        self.type = 'Geno'
+        self.type='Geno'
 
-        self.query_for_group = '''
+        self.query_for_group='''
                 SELECT
                         InbredSet.Name, InbredSet.Id, InbredSet.GeneticType
                 FROM
@@ -914,11 +975,11 @@ class GenotypeDataSet(DataSet):
                 this_trait.retrieveInfo()
 
             if this_trait.chr and this_trait.mb:
-                this_trait.location_repr = 'Chr%s: %.6f' % (
+                this_trait.location_repr='Chr%s: %.6f' % (
                     this_trait.chr, float(this_trait.mb))
 
     def retrieve_sample_data(self, trait):
-        query = """
+        query="""
                     SELECT
                             Strain.Name, GenoData.value, GenoSE.error, "N/A", Strain.Name2
                     FROM
@@ -935,7 +996,7 @@ class GenotypeDataSet(DataSet):
                             Strain.Name
                     """
         logger.sql(query)
-        results = g.db.execute(query,
+        results=g.db.execute(query,
                                (webqtlDatabaseFunction.retrieve_species_id(self.group.name),
                                 trait, self.name)).fetchall()
         return results
@@ -949,11 +1010,11 @@ class MrnaAssayDataSet(DataSet):
     platform and is far too specific.
 
     '''
-    DS_NAME_MAP['ProbeSet'] = 'MrnaAssayDataSet'
+    DS_NAME_MAP['ProbeSet']='MrnaAssayDataSet'
 
     def setup(self):
         # Fields in the database table
-        self.search_fields = ['Name',
+        self.search_fields=['Name',
                               'Description',
                               'Probe_Target_Description',
                               'Symbol',
@@ -963,7 +1024,7 @@ class MrnaAssayDataSet(DataSet):
                               'RefSeq_TranscriptId']
 
         # Find out what display_fields is
-        self.display_fields = ['name', 'symbol',
+        self.display_fields=['name', 'symbol',
                                'description', 'probe_target_description',
                                'chr', 'mb',
                                'alias', 'geneid',
@@ -983,7 +1044,7 @@ class MrnaAssayDataSet(DataSet):
                                'flag']
 
         # Fields displayed in the search results table header
-        self.header_fields = ['Index',
+        self.header_fields=['Index',
                               'Record',
                               'Symbol',
                               'Description',
@@ -994,9 +1055,9 @@ class MrnaAssayDataSet(DataSet):
                               'Additive Effect']
 
         # Todo: Obsolete or rename this field
-        self.type = 'ProbeSet'
+        self.type='ProbeSet'
 
-        self.query_for_group = '''
+        self.query_for_group='''
                         SELECT
                                 InbredSet.Name, InbredSet.Id, InbredSet.GeneticType
                         FROM
@@ -1014,7 +1075,7 @@ class MrnaAssayDataSet(DataSet):
 
         #  Note: setting trait_list to [] is probably not a great idea.
         if not trait_list:
-            trait_list = []
+            trait_list=[]
 
         for this_trait in trait_list:
 
@@ -1022,33 +1083,33 @@ class MrnaAssayDataSet(DataSet):
                 this_trait.retrieveInfo(QTL=1)
 
             if not this_trait.symbol:
-                this_trait.symbol = "N/A"
+                this_trait.symbol="N/A"
 
             # XZ, 12/08/2008: description
             # XZ, 06/05/2009: Rob asked to add probe target description
-            description_string = str(
+            description_string=str(
                 str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8')
-            target_string = str(
+            target_string=str(
                 str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8')
 
             if len(description_string) > 1 and description_string != 'None':
-                description_display = description_string
+                description_display=description_string
             else:
-                description_display = this_trait.symbol
+                description_display=this_trait.symbol
 
             if (len(description_display) > 1 and description_display != 'N/A'
                     and len(target_string) > 1 and target_string != 'None'):
-                description_display = description_display + '; ' + target_string.strip()
+                description_display=description_display + '; ' + target_string.strip()
 
             # Save it for the jinja2 template
-            this_trait.description_display = description_display
+            this_trait.description_display=description_display
 
             if this_trait.chr and this_trait.mb:
-                this_trait.location_repr = 'Chr%s: %.6f' % (
+                this_trait.location_repr='Chr%s: %.6f' % (
                     this_trait.chr, float(this_trait.mb))
 
             # Get mean expression value
-            query = (
+            query=(
                 """select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet
                 where ProbeSetXRef.ProbeSetFreezeId = %s and
                 ProbeSet.Id = ProbeSetXRef.ProbeSetId and
@@ -1056,44 +1117,45 @@ class MrnaAssayDataSet(DataSet):
             """ % (escape(str(this_trait.dataset.id)),
                    escape(this_trait.name)))
 
-            #logger.debug("query is:", pf(query))
+            # logger.debug("query is:", pf(query))
             logger.sql(query)
-            result = g.db.execute(query).fetchone()
+            result=g.db.execute(query).fetchone()
 
-            mean = result[0] if result else 0
+            mean=result[0] if result else 0
 
             if mean:
-                this_trait.mean = "%2.3f" % mean
+                this_trait.mean="%2.3f" % mean
 
             # LRS and its location
-            this_trait.LRS_score_repr = 'N/A'
-            this_trait.LRS_location_repr = 'N/A'
+            this_trait.LRS_score_repr='N/A'
+            this_trait.LRS_location_repr='N/A'
 
             # Max LRS and its Locus location
             if this_trait.lrs and this_trait.locus:
-                query = """
+                query="""
                     select Geno.Chr, Geno.Mb from Geno, Species
                     where Species.Name = '{}' and
                         Geno.Name = '{}' and
                         Geno.SpeciesId = Species.Id
                 """.format(species, this_trait.locus)
                 logger.sql(query)
-                result = g.db.execute(query).fetchone()
+                result=g.db.execute(query).fetchone()
 
                 if result:
-                    lrs_chr, lrs_mb = result
-                    this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs
-                    this_trait.LRS_location_repr = 'Chr%s: %.6f' % (
+                    lrs_chr, lrs_mb=result
+                    this_trait.LRS_score_repr='%3.1f' % this_trait.lrs
+                    this_trait.LRS_location_repr='Chr%s: %.6f' % (
                         lrs_chr, float(lrs_mb))
 
         return trait_list
 
     def retrieve_sample_data(self, trait):
-        query = """
+        query="""
                     SELECT
                             Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2
                     FROM
-                            (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
+                            (ProbeSetData, ProbeSetFreeze,
+                             Strain, ProbeSet, ProbeSetXRef)
                     left join ProbeSetSE on
                             (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
                     left join NStrain on
@@ -1109,19 +1171,19 @@ class MrnaAssayDataSet(DataSet):
                             Strain.Name
                     """ % (escape(trait), escape(self.name))
         logger.sql(query)
-        results = g.db.execute(query).fetchall()
-        #logger.debug("RETRIEVED RESULTS HERE:", results)
+        results=g.db.execute(query).fetchall()
+        # logger.debug("RETRIEVED RESULTS HERE:", results)
         return results
 
     def retrieve_genes(self, column_name):
-        query = """
+        query="""
                     select ProbeSet.Name, ProbeSet.%s
                     from ProbeSet,ProbeSetXRef
                     where ProbeSetXRef.ProbeSetFreezeId = %s and
                     ProbeSetXRef.ProbeSetId=ProbeSet.Id;
                 """ % (column_name, escape(str(self.id)))
         logger.sql(query)
-        results = g.db.execute(query).fetchall()
+        results=g.db.execute(query).fetchall()
 
         return dict(results)
 
@@ -1129,40 +1191,40 @@ class MrnaAssayDataSet(DataSet):
 class TempDataSet(DataSet):
     '''Temporary user-generated data set'''
 
-    DS_NAME_MAP['Temp'] = 'TempDataSet'
+    DS_NAME_MAP['Temp']='TempDataSet'
 
     def setup(self):
-        self.search_fields = ['name',
+        self.search_fields=['name',
                               'description']
 
-        self.display_fields = ['name',
+        self.display_fields=['name',
                                'description']
 
-        self.header_fields = ['Name',
+        self.header_fields=['Name',
                               'Description']
 
-        self.type = 'Temp'
+        self.type='Temp'
 
         # Need to double check later how these are used
-        self.id = 1
-        self.fullname = 'Temporary Storage'
-        self.shortname = 'Temp'
+        self.id=1
+        self.fullname='Temporary Storage'
+        self.shortname='Temp'
 
 
 def geno_mrna_confidentiality(ob):
-    dataset_table = ob.type + "Freeze"
-    #logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table))
+    dataset_table=ob.type + "Freeze"
+    # logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table))
 
-    query = '''SELECT Id, Name, FullName, confidentiality,
+    query='''SELECT Id, Name, FullName, confidentiality,
                         AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table, ob.name)
     logger.sql(query)
-    result = g.db.execute(query)
+    result=g.db.execute(query)
 
     (dataset_id,
      name,
      full_name,
      confidential,
-     authorized_users) = result.fetchall()[0]
+     authorized_users)=result.fetchall()[0]
 
     if confidential:
         return True
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 6974dbd5..3e1ce1dc 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -27,6 +27,34 @@ def create_target_this_trait(start_vars):
     return (this_dataset, this_trait, target_dataset, sample_data)
 
 
+
+def test_process_data(this_trait,dataset,start_vars):
+    """test function for bxd,all and other sample data"""
+
+    corr_samples_group = start_vars["corr_samples_group"]
+
+
+    primary_samples = dataset.group.samplelist
+    if dataset.group.parlist != None:
+        primary_samples += dataset.group.parlist
+    if dataset.group.f1list != None:
+        primary_samples += dataset.group.f1list
+
+    # If either BXD/whatever Only or All Samples, append all of that group's samplelist
+    if corr_samples_group != 'samples_other':
+        sample_data = process_samples(start_vars, primary_samples)
+
+    # If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and
+    # exclude the primary samples (because they would have been added in the previous
+    # if statement if the user selected All Samples)
+    if corr_samples_group != 'samples_primary':
+        if corr_samples_group == 'samples_other':
+            primary_samples = [x for x in primary_samples if x not in (
+                dataset.group.parlist + dataset.group.f1list)]
+        sample_data = process_samples(start_vars, list(this_trait.data.keys()), primary_samples)
+
+    return sample_data
+
 def process_samples(start_vars, sample_names, excluded_samples=None):
     """process samples"""
     sample_data = {}
@@ -118,13 +146,22 @@ def fetch_sample_data(start_vars, this_trait, this_dataset, target_dataset):
 
     sample_data = process_samples(
         start_vars, this_dataset.group.samplelist)
-    target_dataset.get_trait_data(list(sample_data.keys()))
+
+    # sample_data = test_process_data(this_trait,this_dataset,start_vars)
+
+    if target_dataset.type =="ProbeSet":
+        # pass
+        target_dataset.get_probeset_data(list(sample_data.keys()))
+    else:
+        target_dataset.get_trait_data(list(sample_data.keys()))
     this_trait = retrieve_sample_data(this_trait, this_dataset)
     this_trait_data = {
         "trait_sample_data": sample_data,
         "trait_id": start_vars["trait_id"]
     }
 
+    # should remove this  len(samplelist) == len(strain_values)
+
     results = map_shared_keys_to_values(
         target_dataset.samplelist, target_dataset.trait_data)
 
@@ -201,6 +238,7 @@ def compute_correlation(start_vars, method="pearson"):
                         "target_dataset": start_vars['corr_dataset'],
                         "return_results": corr_return_results}
 
+
     return correlation_data
 
 
@@ -261,3 +299,107 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict):
         }
         return (primary_tissue_data, target_tissue_data)
     return None
+
+
+def generate_corr_data(corr_results, target_dataset):
+    counter = 0
+    results_list = []
+    for (index, trait_corr) in enumerate(corr_results):
+        trait_name = list(trait_corr.keys())[0]
+        trait = create_trait(dataset=target_dataset,
+                             name=trait_name)
+
+        trait_corr_data =  trait_corr[trait_name]
+
+        if trait.view == False:
+            continue
+        results_dict = {}
+        results_dict['index'] = index + 1
+        results_dict['trait_id'] = trait.name
+        results_dict['dataset'] = trait.dataset.name
+        # results_dict['hmac'] = hmac.data_hmac(
+        #     '{}:{}'.format(trait.name, trait.dataset.name))
+        if target_dataset.type == "ProbeSet":
+            results_dict['symbol'] = trait.symbol
+            results_dict['description'] = "N/A"
+            results_dict['location'] = trait.location_repr
+            results_dict['mean'] = "N/A"
+            results_dict['additive'] = "N/A"
+            if bool(trait.description_display):
+                results_dict['description'] = trait.description_display
+            if bool(trait.mean):
+                results_dict['mean'] = f"{float(trait.mean):.3f}"
+            try:
+                results_dict['lod_score'] = f"{float(trait.LRS_score_repr) / 4.61:.1f}"
+            except:
+                results_dict['lod_score'] = "N/A"
+            results_dict['lrs_location'] = trait.LRS_location_repr
+            if bool(trait.additive):
+                results_dict['additive'] = f"{float(trait.additive):.3f}"
+            results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}"
+            results_dict['num_overlap'] = trait.num_overlap
+            results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}"
+            results_dict['lit_corr'] = "--"
+            results_dict['tissue_corr'] = "--"
+            results_dict['tissue_pvalue'] = "--"
+            tissue_corr = trait_corr_data.get('tissue_corr',0)
+            lit_corr = trait_corr_data.get('lit_corr',0)
+            if bool(lit_corr):
+                results_dict['lit_corr'] = f"{float(trait_corr_data.get('lit_corr',0)):.3f}"
+            if bool(tissue_corr):
+                results_dict['tissue_corr'] = f"{float(trait_corr_data.get('tissue_corr',0)):.3f}"
+                results_dict['tissue_pvalue'] = f"{float(trait_corr_data.get('tissue_pvalue',0)):.3e}"
+        elif target_dataset.type == "Publish":
+            results_dict['abbreviation_display'] = "N/A"
+            results_dict['description'] = "N/A"
+            results_dict['mean'] = "N/A"
+            results_dict['authors_display'] = "N/A"
+            results_dict['additive'] = "N/A"
+            if for_api:
+                results_dict['pubmed_id'] = "N/A"
+                results_dict['year'] = "N/A"
+            else:
+                results_dict['pubmed_link'] = "N/A"
+                results_dict['pubmed_text'] = "N/A"
+
+            if bool(trait.abbreviation):
+                results_dict['abbreviation_display'] = trait.abbreviation
+            if bool(trait.description_display):
+                results_dict['description'] = trait.description_display
+            if bool(trait.mean):
+                results_dict['mean'] = f"{float(trait.mean):.3f}"
+            if bool(trait.authors):
+                authors_list = trait.authors.split(',')
+                if len(authors_list) > 6:
+                    results_dict['authors_display'] = ", ".join(
+                        authors_list[:6]) + ", et al."
+                else:
+                    results_dict['authors_display'] = trait.authors
+            if bool(trait.pubmed_id):
+                if for_api:
+                    results_dict['pubmed_id'] = trait.pubmed_id
+                    results_dict['year'] = trait.pubmed_text
+                else:
+                    results_dict['pubmed_link'] = trait.pubmed_link
+                    results_dict['pubmed_text'] = trait.pubmed_text
+            try:
+                results_dict['lod_score'] = f"{float(trait.LRS_score_repr) / 4.61:.1f}"
+            except:
+                results_dict['lod_score'] = "N/A"
+            results_dict['lrs_location'] = trait.LRS_location_repr
+            if bool(trait.additive):
+                results_dict['additive'] = f"{float(trait.additive):.3f}"
+            results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}"
+            results_dict['num_overlap'] = trait.num_overlap
+            results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}"
+        else:
+            results_dict['location'] = trait.location_repr
+            results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}"
+            results_dict['num_overlap'] = trait.num_overlap
+            results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}"
+
+        results_list.append(results_dict)
+
+    return results_list
+
+
-- 
cgit v1.2.3


From 96eeaeec98de74607108127f3c347542e6a3e991 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 24 May 2021 16:37:36 +0300
Subject: pep8 formatting

---
 wqflask/base/data_set.py | 214 ++++++++++++++++++++++++-----------------------
 1 file changed, 109 insertions(+), 105 deletions(-)

diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 10f0e110..7080b7b7 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -647,12 +647,8 @@ class DataSet:
                 "Dataset {} is not yet available in GeneNetwork.".format(self.name))
             pass
 
-
-
-
     def chunk_dataset(self, dataset, n):
 
-
         results = {}
 
         query = """
@@ -665,10 +661,7 @@ class DataSet:
 
         # should cache this
 
-        traits_name_dict= dict(g.db.execute(query).fetchall())
-
-
-
+        traits_name_dict = dict(g.db.execute(query).fetchall())
 
         for i in range(0, len(dataset), n):
             matrix = list(dataset[i:i + n])
@@ -704,50 +697,50 @@ class DataSet:
                 FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
                 WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
                 and ProbeSetFreeze.Name = '{}'
-                and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids),self.name)
+                and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids), self.name)
 
-        query_results=list(g.db.execute(query).fetchall())
+        query_results = list(g.db.execute(query).fetchall())
 
-        data_results=self.chunk_dataset(query_results, len(sample_ids))
-        self.trait_data=data_results
+        data_results = self.chunk_dataset(query_results, len(sample_ids))
+        self.trait_data = data_results
 
     def get_trait_data(self, sample_list=None):
         if sample_list:
-            self.samplelist=sample_list
+            self.samplelist = sample_list
         else:
-            self.samplelist=self.group.samplelist
+            self.samplelist = self.group.samplelist
 
         if self.group.parlist != None and self.group.f1list != None:
             if (self.group.parlist + self.group.f1list) in self.samplelist:
                 self.samplelist += self.group.parlist + self.group.f1list
 
-        query="""
+        query = """
             SELECT Strain.Name, Strain.Id FROM Strain, Species
             WHERE Strain.Name IN {}
             and Strain.SpeciesId=Species.Id
             and Species.name = '{}'
             """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
         logger.sql(query)
-        results=dict(g.db.execute(query).fetchall())
-        sample_ids=[results[item] for item in self.samplelist]
+        results = dict(g.db.execute(query).fetchall())
+        sample_ids = [results[item] for item in self.samplelist]
 
         # MySQL limits the number of tables that can be used in a join to 61,
         # so we break the sample ids into smaller chunks
         # Postgres doesn't have that limit, so we can get rid of this after we transition
-        chunk_size=50
-        number_chunks=int(math.ceil(len(sample_ids) / chunk_size))
-        trait_sample_data=[]
+        chunk_size = 50
+        number_chunks = int(math.ceil(len(sample_ids) / chunk_size))
+        trait_sample_data = []
         for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks):
             if self.type == "Publish":
-                dataset_type="Phenotype"
+                dataset_type = "Phenotype"
             else:
-                dataset_type=self.type
-            temp=['T%s.value' % item for item in sample_ids_step]
+                dataset_type = self.type
+            temp = ['T%s.value' % item for item in sample_ids_step]
             if self.type == "Publish":
-                query="SELECT {}XRef.Id,".format(escape(self.type))
+                query = "SELECT {}XRef.Id,".format(escape(self.type))
             else:
-                query="SELECT {}.Name,".format(escape(dataset_type))
-            data_start_pos=1
+                query = "SELECT {}.Name,".format(escape(dataset_type))
+            data_start_pos = 1
             query += ', '.join(temp)
             query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(dataset_type,
                                                                      self.type,
@@ -776,27 +769,27 @@ class DataSet:
                         """.format(*mescape(self.type, self.type, self.type, self.type,
                                             self.name, dataset_type, self.type, self.type, dataset_type))
 
-            results=g.db.execute(query).fetchall()
+            results = g.db.execute(query).fetchall()
             trait_sample_data.append(results)
 
-        trait_count=len(trait_sample_data[0])
-        self.trait_data=collections.defaultdict(list)
+        trait_count = len(trait_sample_data[0])
+        self.trait_data = collections.defaultdict(list)
 
         # put all of the separate data together into a dictionary where the keys are
         # trait names and values are lists of sample values
         for trait_counter in range(trait_count):
-            trait_name=trait_sample_data[0][trait_counter][0]
+            trait_name = trait_sample_data[0][trait_counter][0]
             for chunk_counter in range(int(number_chunks)):
                 self.trait_data[trait_name] += (
                     trait_sample_data[chunk_counter][trait_counter][data_start_pos:])
 
 
 class PhenotypeDataSet(DataSet):
-    DS_NAME_MAP['Publish']='PhenotypeDataSet'
+    DS_NAME_MAP['Publish'] = 'PhenotypeDataSet'
 
     def setup(self):
         # Fields in the database table
-        self.search_fields=['Phenotype.Post_publication_description',
+        self.search_fields = ['Phenotype.Post_publication_description',
                               'Phenotype.Pre_publication_description',
                               'Phenotype.Pre_publication_abbreviation',
                               'Phenotype.Post_publication_abbreviation',
@@ -809,7 +802,7 @@ class PhenotypeDataSet(DataSet):
                               'PublishXRef.Id']
 
         # Figure out what display_fields is
-        self.display_fields=['name', 'group_code',
+        self.display_fields = ['name', 'group_code',
                                'pubmed_id',
                                'pre_publication_description',
                                'post_publication_description',
@@ -827,7 +820,7 @@ class PhenotypeDataSet(DataSet):
                                'sequence', 'units', 'comments']
 
         # Fields displayed in the search results table header
-        self.header_fields=['Index',
+        self.header_fields = ['Index',
                               'Record',
                               'Description',
                               'Authors',
@@ -836,9 +829,9 @@ class PhenotypeDataSet(DataSet):
                               'Max LRS Location',
                               'Additive Effect']
 
-        self.type='Publish'
+        self.type = 'Publish'
 
-        self.query_for_group='''
+        self.query_for_group = '''
                             SELECT
                                     InbredSet.Name, InbredSet.Id, InbredSet.GeneticType
                             FROM
@@ -858,13 +851,13 @@ class PhenotypeDataSet(DataSet):
             if not this_trait.haveinfo:
                 this_trait.retrieve_info(get_qtl_info=True)
 
-            description=this_trait.post_publication_description
+            description = this_trait.post_publication_description
 
             # If the dataset is confidential and the user has access to confidential
             # phenotype traits, then display the pre-publication description instead
             # of the post-publication description
             if this_trait.confidential:
-                this_trait.description_display=""
+                this_trait.description_display = ""
                 continue   # for now, because no authorization features
 
                 if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(
@@ -872,46 +865,46 @@ class PhenotypeDataSet(DataSet):
                         userName=self.userName,
                         authorized_users=this_trait.authorized_users):
 
-                    description=this_trait.pre_publication_description
+                    description = this_trait.pre_publication_description
 
             if len(description) > 0:
-                this_trait.description_display=description.strip()
+                this_trait.description_display = description.strip()
             else:
-                this_trait.description_display=""
+                this_trait.description_display = ""
 
             if not this_trait.year.isdigit():
-                this_trait.pubmed_text="N/A"
+                this_trait.pubmed_text = "N/A"
             else:
-                this_trait.pubmed_text=this_trait.year
+                this_trait.pubmed_text = this_trait.year
 
             if this_trait.pubmed_id:
-                this_trait.pubmed_link=webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id
+                this_trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id
 
             # LRS and its location
-            this_trait.LRS_score_repr="N/A"
-            this_trait.LRS_location_repr="N/A"
+            this_trait.LRS_score_repr = "N/A"
+            this_trait.LRS_location_repr = "N/A"
 
             if this_trait.lrs:
-                query="""
+                query = """
                     select Geno.Chr, Geno.Mb from Geno, Species
                     where Species.Name = '%s' and
                         Geno.Name = '%s' and
                         Geno.SpeciesId = Species.Id
                 """ % (species, this_trait.locus)
                 logger.sql(query)
-                result=g.db.execute(query).fetchone()
+                result = g.db.execute(query).fetchone()
 
                 if result:
                     if result[0] and result[1]:
-                        LRS_Chr=result[0]
-                        LRS_Mb=result[1]
+                        LRS_Chr = result[0]
+                        LRS_Mb = result[1]
 
-                        this_trait.LRS_score_repr=LRS_score_repr='%3.1f' % this_trait.lrs
-                        this_trait.LRS_location_repr=LRS_location_repr='Chr%s: %.6f' % (
+                        this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
+                        this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (
                             LRS_Chr, float(LRS_Mb))
 
     def retrieve_sample_data(self, trait):
-        query="""
+        query = """
                     SELECT
                             Strain.Name, PublishData.value, PublishSE.error, NStrain.count, Strain.Name2
                     FROM
@@ -929,34 +922,34 @@ class PhenotypeDataSet(DataSet):
                             Strain.Name
                     """
         logger.sql(query)
-        results=g.db.execute(query, (trait, self.id)).fetchall()
+        results = g.db.execute(query, (trait, self.id)).fetchall()
         return results
 
 
 class GenotypeDataSet(DataSet):
-    DS_NAME_MAP['Geno']='GenotypeDataSet'
+    DS_NAME_MAP['Geno'] = 'GenotypeDataSet'
 
     def setup(self):
         # Fields in the database table
-        self.search_fields=['Name',
+        self.search_fields = ['Name',
                               'Chr']
 
         # Find out what display_fields is
-        self.display_fields=['name',
+        self.display_fields = ['name',
                                'chr',
                                'mb',
                                'source2',
                                'sequence']
 
         # Fields displayed in the search results table header
-        self.header_fields=['Index',
+        self.header_fields = ['Index',
                               'ID',
                               'Location']
 
         # Todo: Obsolete or rename this field
-        self.type='Geno'
+        self.type = 'Geno'
 
-        self.query_for_group='''
+        self.query_for_group = '''
                 SELECT
                         InbredSet.Name, InbredSet.Id, InbredSet.GeneticType
                 FROM
@@ -975,11 +968,11 @@ class GenotypeDataSet(DataSet):
                 this_trait.retrieveInfo()
 
             if this_trait.chr and this_trait.mb:
-                this_trait.location_repr='Chr%s: %.6f' % (
+                this_trait.location_repr = 'Chr%s: %.6f' % (
                     this_trait.chr, float(this_trait.mb))
 
     def retrieve_sample_data(self, trait):
-        query="""
+        query = """
                     SELECT
                             Strain.Name, GenoData.value, GenoSE.error, "N/A", Strain.Name2
                     FROM
@@ -996,7 +989,7 @@ class GenotypeDataSet(DataSet):
                             Strain.Name
                     """
         logger.sql(query)
-        results=g.db.execute(query,
+        results = g.db.execute(query,
                                (webqtlDatabaseFunction.retrieve_species_id(self.group.name),
                                 trait, self.name)).fetchall()
         return results
@@ -1010,11 +1003,11 @@ class MrnaAssayDataSet(DataSet):
     platform and is far too specific.
 
     '''
-    DS_NAME_MAP['ProbeSet']='MrnaAssayDataSet'
+    DS_NAME_MAP['ProbeSet'] = 'MrnaAssayDataSet'
 
     def setup(self):
         # Fields in the database table
-        self.search_fields=['Name',
+        self.search_fields = ['Name',
                               'Description',
                               'Probe_Target_Description',
                               'Symbol',
@@ -1024,7 +1017,7 @@ class MrnaAssayDataSet(DataSet):
                               'RefSeq_TranscriptId']
 
         # Find out what display_fields is
-        self.display_fields=['name', 'symbol',
+        self.display_fields = ['name', 'symbol',
                                'description', 'probe_target_description',
                                'chr', 'mb',
                                'alias', 'geneid',
@@ -1044,7 +1037,7 @@ class MrnaAssayDataSet(DataSet):
                                'flag']
 
         # Fields displayed in the search results table header
-        self.header_fields=['Index',
+        self.header_fields = ['Index',
                               'Record',
                               'Symbol',
                               'Description',
@@ -1055,9 +1048,9 @@ class MrnaAssayDataSet(DataSet):
                               'Additive Effect']
 
         # Todo: Obsolete or rename this field
-        self.type='ProbeSet'
+        self.type = 'ProbeSet'
 
-        self.query_for_group='''
+        self.query_for_group = '''
                         SELECT
                                 InbredSet.Name, InbredSet.Id, InbredSet.GeneticType
                         FROM
@@ -1075,7 +1068,7 @@ class MrnaAssayDataSet(DataSet):
 
         #  Note: setting trait_list to [] is probably not a great idea.
         if not trait_list:
-            trait_list=[]
+            trait_list = []
 
         for this_trait in trait_list:
 
@@ -1083,33 +1076,33 @@ class MrnaAssayDataSet(DataSet):
                 this_trait.retrieveInfo(QTL=1)
 
             if not this_trait.symbol:
-                this_trait.symbol="N/A"
+                this_trait.symbol = "N/A"
 
             # XZ, 12/08/2008: description
             # XZ, 06/05/2009: Rob asked to add probe target description
-            description_string=str(
+            description_string = str(
                 str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8')
-            target_string=str(
+            target_string = str(
                 str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8')
 
             if len(description_string) > 1 and description_string != 'None':
-                description_display=description_string
+                description_display = description_string
             else:
-                description_display=this_trait.symbol
+                description_display = this_trait.symbol
 
             if (len(description_display) > 1 and description_display != 'N/A'
                     and len(target_string) > 1 and target_string != 'None'):
-                description_display=description_display + '; ' + target_string.strip()
+                description_display = description_display + '; ' + target_string.strip()
 
             # Save it for the jinja2 template
-            this_trait.description_display=description_display
+            this_trait.description_display = description_display
 
             if this_trait.chr and this_trait.mb:
-                this_trait.location_repr='Chr%s: %.6f' % (
+                this_trait.location_repr = 'Chr%s: %.6f' % (
                     this_trait.chr, float(this_trait.mb))
 
             # Get mean expression value
-            query=(
+            query = (
                 """select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet
                 where ProbeSetXRef.ProbeSetFreezeId = %s and
                 ProbeSet.Id = ProbeSetXRef.ProbeSetId and
@@ -1119,38 +1112,38 @@ class MrnaAssayDataSet(DataSet):
 
             # logger.debug("query is:", pf(query))
             logger.sql(query)
-            result=g.db.execute(query).fetchone()
+            result = g.db.execute(query).fetchone()
 
-            mean=result[0] if result else 0
+            mean = result[0] if result else 0
 
             if mean:
-                this_trait.mean="%2.3f" % mean
+                this_trait.mean = "%2.3f" % mean
 
             # LRS and its location
-            this_trait.LRS_score_repr='N/A'
-            this_trait.LRS_location_repr='N/A'
+            this_trait.LRS_score_repr = 'N/A'
+            this_trait.LRS_location_repr = 'N/A'
 
             # Max LRS and its Locus location
             if this_trait.lrs and this_trait.locus:
-                query="""
+                query = """
                     select Geno.Chr, Geno.Mb from Geno, Species
                     where Species.Name = '{}' and
                         Geno.Name = '{}' and
                         Geno.SpeciesId = Species.Id
                 """.format(species, this_trait.locus)
                 logger.sql(query)
-                result=g.db.execute(query).fetchone()
+                result = g.db.execute(query).fetchone()
 
                 if result:
-                    lrs_chr, lrs_mb=result
-                    this_trait.LRS_score_repr='%3.1f' % this_trait.lrs
-                    this_trait.LRS_location_repr='Chr%s: %.6f' % (
+                    lrs_chr, lrs_mb = result
+                    this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs
+                    this_trait.LRS_location_repr = 'Chr%s: %.6f' % (
                         lrs_chr, float(lrs_mb))
 
         return trait_list
 
     def retrieve_sample_data(self, trait):
-        query="""
+        query = """
                     SELECT
                             Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2
                     FROM
@@ -1171,19 +1164,19 @@ class MrnaAssayDataSet(DataSet):
                             Strain.Name
                     """ % (escape(trait), escape(self.name))
         logger.sql(query)
-        results=g.db.execute(query).fetchall()
+        results = g.db.execute(query).fetchall()
         # logger.debug("RETRIEVED RESULTS HERE:", results)
         return results
 
     def retrieve_genes(self, column_name):
-        query="""
+        query = """
                     select ProbeSet.Name, ProbeSet.%s
                     from ProbeSet,ProbeSetXRef
                     where ProbeSetXRef.ProbeSetFreezeId = %s and
                     ProbeSetXRef.ProbeSetId=ProbeSet.Id;
                 """ % (column_name, escape(str(self.id)))
         logger.sql(query)
-        results=g.db.execute(query).fetchall()
+        results = g.db.execute(query).fetchall()
 
         return dict(results)
 
@@ -1191,40 +1184,51 @@ class MrnaAssayDataSet(DataSet):
 class TempDataSet(DataSet):
     '''Temporary user-generated data set'''
 
-    DS_NAME_MAP['Temp']='TempDataSet'
+    DS_NAME_MAP['Temp'] = 'TempDataSet'
 
     def setup(self):
-        self.search_fields=['name',
+        self.search_fields = ['name',
                               'description']
 
-        self.display_fields=['name',
+        self.display_fields = ['name',
                                'description']
 
-        self.header_fields=['Name',
+        self.header_fields = ['Name',
                               'Description']
 
-        self.type='Temp'
+        self.type = 'Temp'
 
         # Need to double check later how these are used
-        self.id=1
-        self.fullname='Temporary Storage'
-        self.shortname='Temp'
+        self.id = 1
+        self.fullname = 'Temporary Storage'
+        self.shortname = 'Temp'
 
 
 def geno_mrna_confidentiality(ob):
-    dataset_table=ob.type + "Freeze"
+    dataset_table = ob.type + "Freeze"
     # logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table))
 
-    query='''SELECT Id, Name, FullName, confidentiality,
+    query = '''SELECT Id, Name, FullName, confidentiality,
                         AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table, ob.name)
     logger.sql(query)
-    result=g.db.execute(query)
+    result = g.db.execute(query)
+
+    (dataset_id,
+     name,
+     full_name,
+     confidential,
+     authorized_users) = result.fetchall()[0]
+
+    if confidential:
+        return True
+uery)
+    result = g.db.execute(query)
 
     (dataset_id,
      name,
      full_name,
      confidential,
-     authorized_users)=result.fetchall()[0]
+     authorized_users) = result.fetchall()[0]
 
     if confidential:
         return True
-- 
cgit v1.2.3


From f80c11f8d68b6a01215e8260234931dbf211fddf Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 24 May 2021 16:43:45 +0300
Subject: minor fix

---
 wqflask/base/data_set.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 7080b7b7..62afdb63 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -1221,7 +1221,6 @@ def geno_mrna_confidentiality(ob):
 
     if confidential:
         return True
-uery)
     result = g.db.execute(query)
 
     (dataset_id,
-- 
cgit v1.2.3


From d5cb6d1a7e14230c30df6681b071165951c2cb69 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Wed, 9 Jun 2021 07:25:03 +0300
Subject: remove unused functions + minor fixes

---
 wqflask/base/data_set.py                           |   2 +
 wqflask/wqflask/correlation/correlation_gn3_api.py | 115 +--------------------
 2 files changed, 7 insertions(+), 110 deletions(-)

diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 62afdb63..d31161ec 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -672,6 +672,8 @@ class DataSet:
         return results
 
     def get_probeset_data(self, sample_list=None, trait_ids=None):
+
+        # improvement of get trait data--->>>
         if sample_list:
             self.samplelist = sample_list
 
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 3e1ce1dc..eb986655 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -27,13 +27,11 @@ def create_target_this_trait(start_vars):
     return (this_dataset, this_trait, target_dataset, sample_data)
 
 
-
-def test_process_data(this_trait,dataset,start_vars):
+def test_process_data(this_trait, dataset, start_vars):
     """test function for bxd,all and other sample data"""
 
     corr_samples_group = start_vars["corr_samples_group"]
 
-
     primary_samples = dataset.group.samplelist
     if dataset.group.parlist != None:
         primary_samples += dataset.group.parlist
@@ -51,10 +49,12 @@ def test_process_data(this_trait,dataset,start_vars):
         if corr_samples_group == 'samples_other':
             primary_samples = [x for x in primary_samples if x not in (
                 dataset.group.parlist + dataset.group.f1list)]
-        sample_data = process_samples(start_vars, list(this_trait.data.keys()), primary_samples)
+        sample_data = process_samples(start_vars, list(
+            this_trait.data.keys()), primary_samples)
 
     return sample_data
 
+
 def process_samples(start_vars, sample_names, excluded_samples=None):
     """process samples"""
     sample_data = {}
@@ -149,7 +149,7 @@ def fetch_sample_data(start_vars, this_trait, this_dataset, target_dataset):
 
     # sample_data = test_process_data(this_trait,this_dataset,start_vars)
 
-    if target_dataset.type =="ProbeSet":
+    if target_dataset.type == "ProbeSet":
         # pass
         target_dataset.get_probeset_data(list(sample_data.keys()))
     else:
@@ -238,7 +238,6 @@ def compute_correlation(start_vars, method="pearson"):
                         "target_dataset": start_vars['corr_dataset'],
                         "return_results": corr_return_results}
 
-
     return correlation_data
 
 
@@ -299,107 +298,3 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict):
         }
         return (primary_tissue_data, target_tissue_data)
     return None
-
-
-def generate_corr_data(corr_results, target_dataset):
-    counter = 0
-    results_list = []
-    for (index, trait_corr) in enumerate(corr_results):
-        trait_name = list(trait_corr.keys())[0]
-        trait = create_trait(dataset=target_dataset,
-                             name=trait_name)
-
-        trait_corr_data =  trait_corr[trait_name]
-
-        if trait.view == False:
-            continue
-        results_dict = {}
-        results_dict['index'] = index + 1
-        results_dict['trait_id'] = trait.name
-        results_dict['dataset'] = trait.dataset.name
-        # results_dict['hmac'] = hmac.data_hmac(
-        #     '{}:{}'.format(trait.name, trait.dataset.name))
-        if target_dataset.type == "ProbeSet":
-            results_dict['symbol'] = trait.symbol
-            results_dict['description'] = "N/A"
-            results_dict['location'] = trait.location_repr
-            results_dict['mean'] = "N/A"
-            results_dict['additive'] = "N/A"
-            if bool(trait.description_display):
-                results_dict['description'] = trait.description_display
-            if bool(trait.mean):
-                results_dict['mean'] = f"{float(trait.mean):.3f}"
-            try:
-                results_dict['lod_score'] = f"{float(trait.LRS_score_repr) / 4.61:.1f}"
-            except:
-                results_dict['lod_score'] = "N/A"
-            results_dict['lrs_location'] = trait.LRS_location_repr
-            if bool(trait.additive):
-                results_dict['additive'] = f"{float(trait.additive):.3f}"
-            results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}"
-            results_dict['num_overlap'] = trait.num_overlap
-            results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}"
-            results_dict['lit_corr'] = "--"
-            results_dict['tissue_corr'] = "--"
-            results_dict['tissue_pvalue'] = "--"
-            tissue_corr = trait_corr_data.get('tissue_corr',0)
-            lit_corr = trait_corr_data.get('lit_corr',0)
-            if bool(lit_corr):
-                results_dict['lit_corr'] = f"{float(trait_corr_data.get('lit_corr',0)):.3f}"
-            if bool(tissue_corr):
-                results_dict['tissue_corr'] = f"{float(trait_corr_data.get('tissue_corr',0)):.3f}"
-                results_dict['tissue_pvalue'] = f"{float(trait_corr_data.get('tissue_pvalue',0)):.3e}"
-        elif target_dataset.type == "Publish":
-            results_dict['abbreviation_display'] = "N/A"
-            results_dict['description'] = "N/A"
-            results_dict['mean'] = "N/A"
-            results_dict['authors_display'] = "N/A"
-            results_dict['additive'] = "N/A"
-            if for_api:
-                results_dict['pubmed_id'] = "N/A"
-                results_dict['year'] = "N/A"
-            else:
-                results_dict['pubmed_link'] = "N/A"
-                results_dict['pubmed_text'] = "N/A"
-
-            if bool(trait.abbreviation):
-                results_dict['abbreviation_display'] = trait.abbreviation
-            if bool(trait.description_display):
-                results_dict['description'] = trait.description_display
-            if bool(trait.mean):
-                results_dict['mean'] = f"{float(trait.mean):.3f}"
-            if bool(trait.authors):
-                authors_list = trait.authors.split(',')
-                if len(authors_list) > 6:
-                    results_dict['authors_display'] = ", ".join(
-                        authors_list[:6]) + ", et al."
-                else:
-                    results_dict['authors_display'] = trait.authors
-            if bool(trait.pubmed_id):
-                if for_api:
-                    results_dict['pubmed_id'] = trait.pubmed_id
-                    results_dict['year'] = trait.pubmed_text
-                else:
-                    results_dict['pubmed_link'] = trait.pubmed_link
-                    results_dict['pubmed_text'] = trait.pubmed_text
-            try:
-                results_dict['lod_score'] = f"{float(trait.LRS_score_repr) / 4.61:.1f}"
-            except:
-                results_dict['lod_score'] = "N/A"
-            results_dict['lrs_location'] = trait.LRS_location_repr
-            if bool(trait.additive):
-                results_dict['additive'] = f"{float(trait.additive):.3f}"
-            results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}"
-            results_dict['num_overlap'] = trait.num_overlap
-            results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}"
-        else:
-            results_dict['location'] = trait.location_repr
-            results_dict['sample_r'] = f"{float(trait_corr_data.get('sample_r',0)):.3f}"
-            results_dict['num_overlap'] = trait.num_overlap
-            results_dict['sample_p'] = f"{float(trait_corr_data.get('sample_p',0)):.3e}"
-
-        results_list.append(results_dict)
-
-    return results_list
-
-
-- 
cgit v1.2.3


From cfc738303e7ddd213919a0a15885d1e846277848 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 14 Jun 2021 00:52:10 +0300
Subject: remove print statements

---
 wqflask/utility/helper_functions.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py
index 50e00421..27dd0729 100644
--- a/wqflask/utility/helper_functions.py
+++ b/wqflask/utility/helper_functions.py
@@ -21,15 +21,11 @@ def get_species_dataset_trait(self, start_vars):
             self.dataset = data_set.create_dataset(start_vars['dataset'])
     else:
         self.dataset = data_set.create_dataset(start_vars['dataset'])
-    logger.debug("After creating dataset")
     self.species = TheSpecies(dataset=self.dataset)
-    logger.debug("After creating species")
     self.this_trait = create_trait(dataset=self.dataset,
                                    name=start_vars['trait_id'],
                                    cellid=None,
                                    get_qtl_info=True)
-    logger.debug("After creating trait")
-
 
 def get_trait_db_obs(self, trait_db_list):
     if isinstance(trait_db_list, str):
-- 
cgit v1.2.3


From 8418cf1554c664130a16b0b2030d1ca7680bf81c Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 14 Jun 2021 00:54:02 +0300
Subject: sort sample name by sample_ids

---
 wqflask/base/data_set.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index d31161ec..181e83be 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -693,6 +693,9 @@ class DataSet:
         results = dict(g.db.execute(query).fetchall())
         sample_ids = [results[item] for item in self.samplelist]
 
+        sorted_samplelist = [strain_name for strain_name, strain_id in sorted(
+            results.items(), key=lambda item: item[1])]
+
         query = """SELECT * from ProbeSetData
                 where StrainID in {}
                 and id in (SELECT ProbeSetXRef.DataId
@@ -702,9 +705,10 @@ class DataSet:
                 and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids), self.name)
 
         query_results = list(g.db.execute(query).fetchall())
-
         data_results = self.chunk_dataset(query_results, len(sample_ids))
+        self.samplelist = sorted_samplelist
         self.trait_data = data_results
+        
 
     def get_trait_data(self, sample_list=None):
         if sample_list:
-- 
cgit v1.2.3


From d703e0ae5f3ef92efb026bfdcc9d1bfe2d296a34 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 14 Jun 2021 00:54:55 +0300
Subject: minor refactoring

---
 wqflask/wqflask/correlation/correlation_gn3_api.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index eb986655..fedc3146 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -146,11 +146,7 @@ def fetch_sample_data(start_vars, this_trait, this_dataset, target_dataset):
 
     sample_data = process_samples(
         start_vars, this_dataset.group.samplelist)
-
-    # sample_data = test_process_data(this_trait,this_dataset,start_vars)
-
     if target_dataset.type == "ProbeSet":
-        # pass
         target_dataset.get_probeset_data(list(sample_data.keys()))
     else:
         target_dataset.get_trait_data(list(sample_data.keys()))
@@ -159,9 +155,6 @@ def fetch_sample_data(start_vars, this_trait, this_dataset, target_dataset):
         "trait_sample_data": sample_data,
         "trait_id": start_vars["trait_id"]
     }
-
-    # should remove this  len(samplelist) == len(strain_values)
-
     results = map_shared_keys_to_values(
         target_dataset.samplelist, target_dataset.trait_data)
 
-- 
cgit v1.2.3


From e6a08e0fe5a2382cef9c7b9a3b71e17dda6f784e Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 14 Jun 2021 01:15:11 +0300
Subject: add f1list and parlist to primary_samples

---
 wqflask/wqflask/correlation/correlation_gn3_api.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index fedc3146..30c05f03 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -146,6 +146,10 @@ def fetch_sample_data(start_vars, this_trait, this_dataset, target_dataset):
 
     sample_data = process_samples(
         start_vars, this_dataset.group.samplelist)
+
+    sample_data = test_process_data(this_trait, this_dataset, start_vars)
+
+
     if target_dataset.type == "ProbeSet":
         target_dataset.get_probeset_data(list(sample_data.keys()))
     else:
-- 
cgit v1.2.3


From b133635fadba19ab1017dc1739cf5ddca1a6bd08 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 14 Jun 2021 09:28:46 +0300
Subject: delete loggers and comments

---
 wqflask/base/data_set.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 181e83be..6dc44829 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -440,7 +440,6 @@ class DatasetGroup:
         # genotype_1 is Dataset Object without parents and f1
         # genotype_2 is Dataset Object with parents and f1 (not for intercross)
 
-        # genotype_1 = reaper.Dataset()
 
         # reaper barfs on unicode filenames, so here we ensure it's a string
         if self.genofile:
@@ -1116,7 +1115,6 @@ class MrnaAssayDataSet(DataSet):
             """ % (escape(str(this_trait.dataset.id)),
                    escape(this_trait.name)))
 
-            # logger.debug("query is:", pf(query))
             logger.sql(query)
             result = g.db.execute(query).fetchone()
 
@@ -1171,7 +1169,6 @@ class MrnaAssayDataSet(DataSet):
                     """ % (escape(trait), escape(self.name))
         logger.sql(query)
         results = g.db.execute(query).fetchall()
-        # logger.debug("RETRIEVED RESULTS HERE:", results)
         return results
 
     def retrieve_genes(self, column_name):
@@ -1212,7 +1209,6 @@ class TempDataSet(DataSet):
 
 def geno_mrna_confidentiality(ob):
     dataset_table = ob.type + "Freeze"
-    # logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table))
 
     query = '''SELECT Id, Name, FullName, confidentiality,
                         AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table, ob.name)
-- 
cgit v1.2.3


From f7027d4c87786da0d505e0e8e1da23d6e55f6d80 Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Thu, 17 Jun 2021 10:09:38 +0300
Subject: doc: rpy2-performance: Replace begin_export with begin_src

---
 doc/rpy2-performance.org | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/doc/rpy2-performance.org b/doc/rpy2-performance.org
index 29b99ba0..ddcac81a 100644
--- a/doc/rpy2-performance.org
+++ b/doc/rpy2-performance.org
@@ -21,7 +21,7 @@ server:
 
 However, when loading the homepage, I occasionally ran into this trace:
 
-#+begin_export ascii
+#+begin_src
 DEBUG:wqflask.views:.check_access_permissions: @app.before_request check_access_permissions
 DEBUG:wqflask.views:.shutdown_session: remove db_session
 WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: Error: ignoring SIGPIPE signal
@@ -58,7 +58,7 @@ Possible actions:
 
 Selection:
 
-#+end_export
+#+end_src
 
 This blocks the flask service. Seems to be related to: [[https://github.com/rpy2/rpy2/issues/769][rpy2-issue#769]]
 and [[https://github.com/rpy2/rpy2/issues/809][rpy2-issue#809]]. I tried to reproduce this problem using some endpoint:
@@ -80,7 +80,7 @@ and [[https://github.com/rpy2/rpy2/issues/809][rpy2-issue#809]]. I tried to repr
 
 which generates this trace:
 
-#+begin_export ascii
+#+begin_src
 /home/bonface/opt/python3-genenetwork2/lib/python3.8/site-packages/rpy2/rinterface.py:955: UserWarning: R is not initialized by the main thread.
               Its taking over SIGINT cannot be reversed here, and as a
               consequence the embedded R cannot be interrupted with Ctrl-C.
@@ -89,7 +89,7 @@ which generates this trace:
 warnings.warn(
 DEBUG:wqflask.views:.shutdown_session: remove db_session
 
-#+end_export
+#+end_src
 
 Modifying the endpoint to:
 
@@ -108,7 +108,7 @@ Modifying the endpoint to:
 
 and refreshing the page a couple of times, I get:
 
-#+begin_export ascii
+#+begin_src
 DEBUG:wqflask.views:.check_access_permissions: @app.before_request check_access_
 permissions
 Sleeping for 3 seconds
@@ -136,7 +136,7 @@ WARNING:rpy2.rinterface_lib.callbacks:R[write to console]:  library '/home/bonfa
 ce/R/x86_64-unknown-linux-gnu-library/4.0' contains no packages
 
 WARNING:rpy2.rinterface_lib.callbacks:R[write to console]:
-*** caught segfault ***
+\*** caught segfault ***
 
 WARNING:rpy2.rinterface_lib.callbacks:R[write to console]: address (nil), cause
 'memory not mapped'
@@ -151,9 +151,11 @@ Possible actions:
 Selection: [2021-06-16 13:11:00 +0300] [18657] [INFO] Handling signal: winch
 [2021-06-16 13:11:00 +0300] [18657] [INFO] Handling signal: winch
 [2021-06-16 13:13:02 +0300] [18657] [INFO] Handling signal: winch
-#+end_export
+#+end_src
 
-However, this seems to be non-deterministic, in the sense that I can't really pin what causes the above. I've tried to write a Locust Test that simulates users hitting that endpoint:
+However, this seems to be non-deterministic, in the sense that I can't
+really pin what causes the above. I've tried to write a Locust Test
+that simulates users hitting that endpoint:
 
 #+begin_src python
 """Load test a single trait page"""
@@ -168,3 +170,4 @@ from locust import HttpUser, task, between
           """Fetch a single trait"""
           self.client.get("/test")
 #+end_src
+
-- 
cgit v1.2.3


From a992e2038424c2acdf11078356204847d128c8c6 Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Thu, 17 Jun 2021 10:11:53 +0300
Subject: docs: rpy2-performance: Indicate version of python-rpy2 being used

---
 doc/rpy2-performance.org | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/rpy2-performance.org b/doc/rpy2-performance.org
index ddcac81a..177c427e 100644
--- a/doc/rpy2-performance.org
+++ b/doc/rpy2-performance.org
@@ -1,10 +1,10 @@
 * Python-Rpy2 performance issues with genenetwork2
 
 At one point, genenetwork2 was down. A possible cause was that it
-wrote into the log file in an infinite loop due to rpy2, so a solution
-was to empty it. Currently, as a work around, rpy2 is disabled by
-removing it's imports. This affects WGCNA/ CTL imports and commenting
-out Biweight Midcorrelation option in the trait page. See:
+wrote into the log file in an infinite loop due to rpy2(v3.4.4), so a
+solution was to empty it. Currently, as a work around, rpy2 is
+disabled by removing it's imports. This affects WGCNA/ CTL imports and
+commenting out Biweight Midcorrelation option in the trait page. See:
 
 - [[https://github.com/genenetwork/genenetwork2/commit/1baf5f7611909c651483208184c5fbf7d4a7a088][1baf5f7]]
 - [[https://github.com/genenetwork/genenetwork2/commit/afee4d625248565857df98d3510f680ae6204864][afee4d6]]
-- 
cgit v1.2.3


From a5981b8e71380b171b210d55da58e5037b455a6c Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Thu, 17 Jun 2021 10:20:39 +0300
Subject: doc: rpy2-performance: Add a section exploring possible solutions

---
 doc/rpy2-performance.org | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/doc/rpy2-performance.org b/doc/rpy2-performance.org
index 177c427e..8f917ca0 100644
--- a/doc/rpy2-performance.org
+++ b/doc/rpy2-performance.org
@@ -171,3 +171,12 @@ from locust import HttpUser, task, between
           self.client.get("/test")
 #+end_src
 
+
+** A possible solution
+
+From this [[https://github.com/rpy2/rpy2/issues/809#issuecomment-845923975][comment]], a possible reason for the above traces, is that
+from Flask's end, a [[https://tldp.org/LDP/lpg/node20.html][SIGPIPE]] is somehow generated by our Python
+code. However, at this particular point, the R thread just happens to
+be running, and R can't handle this correctly. This seems to have been
+fixed in this [[https://github.com/rpy2/rpy2/pull/810][PR]] with a this [[https://github.com/rpy2/rpy2/issues/809#issuecomment-851618215][explanation]]. On our end, to have these
+changes, we have to update our python-rpy2 version.
-- 
cgit v1.2.3


From 90a427fcc855910a812f3cee710ede335071768a Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 17 Jun 2021 19:43:35 +0000
Subject: Fixed issue that caused filtering by attribute values to not work if
 attribute values didn't exist for some samples

---
 wqflask/wqflask/static/new/javascript/show_trait.js | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/wqflask/wqflask/static/new/javascript/show_trait.js b/wqflask/wqflask/static/new/javascript/show_trait.js
index 569046d3..77ef1720 100644
--- a/wqflask/wqflask/static/new/javascript/show_trait.js
+++ b/wqflask/wqflask/static/new/javascript/show_trait.js
@@ -747,7 +747,11 @@ filter_by_value = function() {
     if (filter_column == "value" || filter_column == "stderr"){
       var this_col_value = filter_val_nodes[i].childNodes[0].value;
     } else {
-      var this_col_value = filter_val_nodes[i].childNodes[0].data;
+      if (filter_val_nodes[i].childNodes[0] !== undefined){
+        var this_col_value = filter_val_nodes[i].childNodes[0].data;
+      } else {
+        continue
+      }
     }
     let this_val_node = val_nodes[i].childNodes[0];
 
@@ -1700,4 +1704,4 @@ $('#normalize').click(edit_data_change);
 Number.prototype.countDecimals = function () {
   if(Math.floor(this.valueOf()) === this.valueOf()) return 0;
     return this.toString().split(".")[1].length || 0;
-}
\ No newline at end of file
+}
-- 
cgit v1.2.3


From e5a8fad251bff8160e49c6c9fea42b815488e6c3 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 17 Jun 2021 19:45:40 +0000
Subject: Added function for getting list of attributes with all numerical
 values (that can be used with the 'Filter by value' feature)

---
 wqflask/wqflask/show_trait/show_trait.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 9ee6a16d..c07430dd 100644
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -177,10 +177,13 @@ class ShowTrait:
         sample_lists = [group.sample_list for group in self.sample_groups]
 
         categorical_var_list = []
+        self.numerical_var_list = []
         if not self.temp_trait:
             # ZS: Only using first samplelist, since I think mapping only uses those samples
             categorical_var_list = get_categorical_variables(
                 self.this_trait, self.sample_groups[0])
+            self.numerical_var_list = get_numerical_variables(
+                self.this_trait, self.sample_groups[0])
 
         # ZS: Get list of chromosomes to select for mapping
         self.chr_list = [["All", -1]]
@@ -694,6 +697,26 @@ def get_categorical_variables(this_trait, sample_list) -> list:
 
     return categorical_var_list
 
+def get_numerical_variables(this_trait, sample_list) -> list:
+    numerical_var_list = []
+
+    if len(sample_list.attributes) > 0:
+        for attribute in sample_list.attributes:
+            all_numeric = True
+            all_none = True
+            for attr_val in sample_list.attributes[attribute].distinct_values:
+                if not attr_val:
+                    continue
+                try:
+                    val_as_float = float(attr_val)
+                    all_none = False
+                except:
+                    all_numeric = False
+                    break
+            if all_numeric and not all_none:
+                numerical_var_list.append(sample_list.attributes[attribute].name)
+
+    return numerical_var_list
 
 def get_genotype_scales(genofiles):
     geno_scales = {}
-- 
cgit v1.2.3


From 2b52a4f16008a450386cf46d008452942c2d98e4 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 17 Jun 2021 19:46:12 +0000
Subject: Only show all-numerical attributes in the 'Filter samples by value'
 drop-down

---
 .../wqflask/templates/show_trait_transform_and_filter.html   | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/wqflask/wqflask/templates/show_trait_transform_and_filter.html b/wqflask/wqflask/templates/show_trait_transform_and_filter.html
index e3f5ef81..20f78b48 100644
--- a/wqflask/wqflask/templates/show_trait_transform_and_filter.html
+++ b/wqflask/wqflask/templates/show_trait_transform_and_filter.html
@@ -46,19 +46,17 @@
     </div>
     {% endif %}
     <div id="filterMenuSpan" class="input-append block-div-2">
-      <label for="filter_samples_field">Filter samples by {% if not sample_groups[0].attributes %}value{% endif %} </label>
-      {% if sample_groups[0].attributes %}
+      <label for="filter_samples_field">Filter samples by {% if (numerical_var_list|length == 0) and (not js_data.se_exists) %}value{% endif %} </label>
+      {% if (numerical_var_list|length > 0) or js_data.se_exists %}
       <select id="filter_column">
         <option value="value">Value</option>
         {% if js_data.se_exists %}
         <option value="stderr">SE</option>
         {% endif %}
-        {% for attribute in sample_groups[0].attributes %}
-
+        {% for attribute in numerical_var_list %}
         <option value="{{ loop.index }}">
-          {{ sample_groups[0].attributes[attribute].name }}
+          {{ attribute }}
         </option>
-
         {% endfor %}
       </select>
       {% endif %}
@@ -116,4 +114,4 @@
 
     <p>Samples with no value (x) can be hidden by clicking<strong>Hide No Value</strong> button.</p>
   </div>
-</div>
\ No newline at end of file
+</div>
-- 
cgit v1.2.3


From 0e8b5ad4a5a7bf7eba5d5e24b38d5df26d6df395 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 17 Jun 2021 19:53:53 +0000
Subject: UCSC Genome Browser link was marked as a broken link, but it
 apparently works now so I added it back as a link

---
 wqflask/wqflask/templates/base.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wqflask/wqflask/templates/base.html b/wqflask/wqflask/templates/base.html
index ddb1d272..578a0495 100644
--- a/wqflask/wqflask/templates/base.html
+++ b/wqflask/wqflask/templates/base.html
@@ -85,7 +85,7 @@
                                   <li><a href="/snp_browser">Variant Browser</a></li>
                                   <li><a href="http://bnw.genenetwork.org/sourcecodes/home.php">Bayesian Network Webserver</a></li>
                                   <li><a href="https://systems-genetics.org/">Systems Genetics PheWAS</a></li>
-                                  <li><span class="broken_link" href="http://ucscbrowser.genenetwork.org/">Genome Browser</span></li>
+                                  <li><a href="http://ucscbrowser.genenetwork.org/">Genome Browser</a></li>
                                   <li><a href="http://power.genenetwork.org">BXD Power Calculator</a></li>
                                   <li><a href="http://datafiles.genenetwork.org">Interplanetary File System</a></li>
                                 </ul>
-- 
cgit v1.2.3


From aefd88a9950592fb8cdc28cda43a2ca3c39e7f60 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 17 Jun 2021 19:56:34 +0000
Subject: The listserv link was marked as broken but is working, so I made it a
 link again, but the IRC channel link is broken so I marked it as broken

---
 wqflask/wqflask/templates/base.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wqflask/wqflask/templates/base.html b/wqflask/wqflask/templates/base.html
index 578a0495..12dddf89 100644
--- a/wqflask/wqflask/templates/base.html
+++ b/wqflask/wqflask/templates/base.html
@@ -208,7 +208,7 @@
                     <a href="http://joss.theoj.org/papers/10.21105/joss.00025"><img src="https://camo.githubusercontent.com/846b750f582ae8f1d0b4f7e8fee78bed705c88ba/687474703a2f2f6a6f73732e7468656f6a2e6f72672f7061706572732f31302e32313130352f6a6f73732e30303032352f7374617475732e737667" alt="JOSS" data-canonical-src="http://joss.theoj.org/papers/10.21105/joss.00025/status.svg" style="max-width:100%;"></a>
             </p>
             <p>
-            Development and source code on <a href="https://github.com/genenetwork/">github</a> with <a href="https://github.com/genenetwork/genenetwork2/issues">issue tracker</a> and <a href="https://github.com/genenetwork/genenetwork2/blob/master/README.md">documentation</a>. Join the <span class="broken_link" href="http://listserv.uthsc.edu/mailman/listinfo/genenetwork-dev">mailing list</span> and find us on <a href="https://webchat.freenode.net/">IRC</a> (#genenetwork channel).
+            Development and source code on <a href="https://github.com/genenetwork/">github</a> with <a href="https://github.com/genenetwork/genenetwork2/issues">issue tracker</a> and <a href="https://github.com/genenetwork/genenetwork2/blob/master/README.md">documentation</a>. Join the <a href="http://listserv.uthsc.edu/mailman/listinfo/genenetwork-dev">mailing list</a> and find us on <span class="broken_link" href="https://webchat.freenode.net/">IRC</span> (#genenetwork channel).
             {% if version: %}
             <p><small>GeneNetwork {{ version }}</small></p>
             {% endif %}
-- 
cgit v1.2.3


From fafce2f44087edf51756f0118054d1e3aa654273 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Fri, 18 Jun 2021 19:21:11 +0000
Subject: Re-enable bicor for correlations and fix issue where ro.Vector needed
 to be changed to ro.FloatVector

---
 wqflask/wqflask/correlation/show_corr_results.py | 30 ++++++++++++------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 2f3df67a..f1cf3733 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -22,7 +22,7 @@ import collections
 import json
 import scipy
 import numpy
-# import rpy2.robjects as ro                    # R Objects
+import rpy2.robjects as ro                    # R Objects
 import utility.logger
 import utility.webqtlUtil
 
@@ -459,9 +459,9 @@ class CorrelationResults:
 
         if num_overlap > 5:
             # ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/
-            # if self.corr_method == 'bicor':
-            #     sample_r, sample_p = do_bicor(
-            #         self.this_trait_vals, target_vals)
+            if self.corr_method == 'bicor':
+                sample_r, sample_p = do_bicor(
+                    self.this_trait_vals, target_vals)
             if self.corr_method == 'pearson':
                 sample_r, sample_p = scipy.stats.pearsonr(
                     self.this_trait_vals, target_vals)
@@ -487,22 +487,22 @@ class CorrelationResults:
                     self.sample_data[str(sample)] = float(value)
 
 
-# def do_bicor(this_trait_vals, target_trait_vals):
-#     r_library = ro.r["library"]             # Map the library function
-#     r_options = ro.r["options"]             # Map the options function
+def do_bicor(this_trait_vals, target_trait_vals):
+    r_library = ro.r["library"]             # Map the library function
+    r_options = ro.r["options"]             # Map the options function
 
-#     r_library("WGCNA")
-#     r_bicor = ro.r["bicorAndPvalue"]        # Map the bicorAndPvalue function
+    r_library("WGCNA")
+    r_bicor = ro.r["bicorAndPvalue"]        # Map the bicorAndPvalue function
 
-#     r_options(stringsAsFactors=False)
+    r_options(stringsAsFactors=False)
 
-#     this_vals = ro.Vector(this_trait_vals)
-#     target_vals = ro.Vector(target_trait_vals)
+    this_vals = ro.FloatVector(this_trait_vals)
+    target_vals = ro.FloatVector(target_trait_vals)
 
-#     the_r, the_p, _fisher_transform, _the_t, _n_obs = [
-#         numpy.asarray(x) for x in r_bicor(x=this_vals, y=target_vals)]
+    the_r, the_p, _fisher_transform, _the_t, _n_obs = [
+        numpy.asarray(x) for x in r_bicor(x=this_vals, y=target_vals)]
 
-#     return the_r, the_p
+    return the_r, the_p
 
 
 def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_api=False):
-- 
cgit v1.2.3


From df8476115e580fa5dfbf0e2e9a8f6e5e39ae7b99 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Fri, 18 Jun 2021 19:21:30 +0000
Subject: Reenable PCA for correlation matrix

---
 .../wqflask/correlation_matrix/show_corr_matrix.py | 124 +++++++++++----------
 1 file changed, 63 insertions(+), 61 deletions(-)

diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
index 9ac02ac5..e7b16e77 100644
--- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
+++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
@@ -23,6 +23,9 @@ import math
 import random
 import string
 
+import rpy2.robjects as ro
+from rpy2.robjects.packages import importr
+
 import numpy as np
 import scipy
 
@@ -160,23 +163,22 @@ class CorrelationMatrix:
         for sample in self.all_sample_list:
             groups.append(1)
 
-        # Not doing PCA until rpy2 is excised
         self.pca_works = "False"
-        # try:
-        #     corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results))
-        #     corr_eigen_value, corr_eigen_vectors = sortEigenVectors(
-        #         corr_result_eigen)
-
-        #     if self.do_PCA == True:
-        #         self.pca_works = "True"
-        #         self.pca_trait_ids = []
-        #         pca = self.calculate_pca(
-        #             list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors)
-        #         self.loadings_array = self.process_loadings()
-        #     else:
-        #         self.pca_works = "False"
-        # except:
-        #     self.pca_works = "False"
+        try:
+            corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results))
+            corr_eigen_value, corr_eigen_vectors = sortEigenVectors(
+                corr_result_eigen)
+
+            if self.do_PCA == True:
+                self.pca_works = "True"
+                self.pca_trait_ids = []
+                pca = self.calculate_pca(
+                    list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors)
+                self.loadings_array = self.process_loadings()
+            else:
+                self.pca_works = "False"
+        except:
+            self.pca_works = "False"
 
         self.js_data = dict(traits=[trait.name for trait in self.traits],
                             groups=groups,
@@ -185,51 +187,51 @@ class CorrelationMatrix:
                             samples=self.all_sample_list,
                             sample_data=self.sample_data,)
 
-    # def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors):
-    #     base = importr('base')
-    #     stats = importr('stats')
-
-    #     corr_results_to_list = robjects.FloatVector(
-    #         [item for sublist in self.pca_corr_results for item in sublist])
-
-    #     m = robjects.r.matrix(corr_results_to_list, nrow=len(cols))
-    #     eigen = base.eigen(m)
-    #     pca = stats.princomp(m, cor="TRUE")
-    #     self.loadings = pca.rx('loadings')
-    #     self.scores = pca.rx('scores')
-    #     self.scale = pca.rx('scale')
-
-    #     trait_array = zScore(self.trait_data_array)
-    #     trait_array_vectors = np.dot(corr_eigen_vectors, trait_array)
-
-    #     pca_traits = []
-    #     for i, vector in enumerate(trait_array_vectors):
-    #         # ZS: Check if below check is necessary
-    #         # if corr_eigen_value[i-1] > 100.0/len(self.trait_list):
-    #         pca_traits.append((vector * -1.0).tolist())
-
-    #     this_group_name = self.trait_list[0][1].group.name
-    #     temp_dataset = data_set.create_dataset(
-    #         dataset_name="Temp", dataset_type="Temp", group_name=this_group_name)
-    #     temp_dataset.group.get_samplelist()
-    #     for i, pca_trait in enumerate(pca_traits):
-    #         trait_id = "PCA" + str(i + 1) + "_" + temp_dataset.group.species + "_" + \
-    #             this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
-    #         this_vals_string = ""
-    #         position = 0
-    #         for sample in temp_dataset.group.all_samples_ordered():
-    #             if sample in self.shared_samples_list:
-    #                 this_vals_string += str(pca_trait[position])
-    #                 this_vals_string += " "
-    #                 position += 1
-    #             else:
-    #                 this_vals_string += "x "
-    #         this_vals_string = this_vals_string[:-1]
-
-    #         Redis.set(trait_id, this_vals_string, ex=THIRTY_DAYS)
-    #         self.pca_trait_ids.append(trait_id)
-
-    #     return pca
+    def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors):
+        base = importr('base')
+        stats = importr('stats')
+
+        corr_results_to_list = ro.FloatVector(
+            [item for sublist in self.pca_corr_results for item in sublist])
+
+        m = ro.r.matrix(corr_results_to_list, nrow=len(cols))
+        eigen = base.eigen(m)
+        pca = stats.princomp(m, cor="TRUE")
+        self.loadings = pca.rx('loadings')
+        self.scores = pca.rx('scores')
+        self.scale = pca.rx('scale')
+
+        trait_array = zScore(self.trait_data_array)
+        trait_array_vectors = np.dot(corr_eigen_vectors, trait_array)
+
+        pca_traits = []
+        for i, vector in enumerate(trait_array_vectors):
+            # ZS: Check if below check is necessary
+            # if corr_eigen_value[i-1] > 100.0/len(self.trait_list):
+            pca_traits.append((vector * -1.0).tolist())
+
+        this_group_name = self.trait_list[0][1].group.name
+        temp_dataset = data_set.create_dataset(
+            dataset_name="Temp", dataset_type="Temp", group_name=this_group_name)
+        temp_dataset.group.get_samplelist()
+        for i, pca_trait in enumerate(pca_traits):
+            trait_id = "PCA" + str(i + 1) + "_" + temp_dataset.group.species + "_" + \
+                this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
+            this_vals_string = ""
+            position = 0
+            for sample in temp_dataset.group.all_samples_ordered():
+                if sample in self.shared_samples_list:
+                    this_vals_string += str(pca_trait[position])
+                    this_vals_string += " "
+                    position += 1
+                else:
+                    this_vals_string += "x "
+            this_vals_string = this_vals_string[:-1]
+
+            Redis.set(trait_id, this_vals_string, ex=THIRTY_DAYS)
+            self.pca_trait_ids.append(trait_id)
+
+        return pca
 
     def process_loadings(self):
         loadings_array = []
-- 
cgit v1.2.3


From f314728334fde0677ea515b5910db86086fdd5ef Mon Sep 17 00:00:00 2001
From: zsloan
Date: Fri, 18 Jun 2021 19:21:54 +0000
Subject: Re-enable bicor as an option in the trait page templatee

---
 wqflask/wqflask/templates/show_trait_calculate_correlations.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wqflask/wqflask/templates/show_trait_calculate_correlations.html b/wqflask/wqflask/templates/show_trait_calculate_correlations.html
index e623a968..59f9b47c 100644
--- a/wqflask/wqflask/templates/show_trait_calculate_correlations.html
+++ b/wqflask/wqflask/templates/show_trait_calculate_correlations.html
@@ -70,7 +70,7 @@
                 <select name="corr_sample_method" class="form-control">
                     <option value="pearson">Pearson</option>
                     <option value="spearman">Spearman Rank</option>
-                    <!-- <option value="bicor">Biweight Midcorrelation</option> -->
+                    <option value="bicor">Biweight Midcorrelation</option>
                 </select>
             </div>
         </div>
-- 
cgit v1.2.3


From 31ad8698fe69da8d13c7a67cbf7e7ddeda67a734 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Tue, 5 Jan 2021 16:10:25 -0600
Subject: Added Scroller to mapping results table

---
 wqflask/wqflask/templates/mapping_results.html | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/wqflask/wqflask/templates/mapping_results.html b/wqflask/wqflask/templates/mapping_results.html
index d6fc6e37..35d8a157 100644
--- a/wqflask/wqflask/templates/mapping_results.html
+++ b/wqflask/wqflask/templates/mapping_results.html
@@ -357,7 +357,9 @@
     {% endif %}
 
     <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTables/js/jquery.dataTables.min.js') }}"></script>
-   <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/buttons/js/dataTables.buttons.min.js') }}"></script>
+    <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/buttons/js/dataTables.buttons.min.js') }}"></script>
+    <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/scroller/js/dataTables.scroller.min.js') }}"></script>
+
     <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/plugins/sorting/scientific.js') }}"></script>
     <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/plugins/sorting/natural.js') }}"></script>
     <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='purescript-genome-browser/js/purescript-genetics-browser.js') }}"></script>
@@ -409,13 +411,12 @@
                   "info": "Showing from _START_ to _END_ of " + js_data.total_markers + " records",
                 },
                 "order": [[1, "asc" ]],
-                "sDom": "iRZtir",
-                "iDisplayLength": -1,
-                "autoWidth": false,
-                "deferRender": true,
+                "sDom": "itir",
+                "autoWidth": true,
                 "bSortClasses": false,
-                "scrollCollapse": false,
-                "paging": false
+                "scrollY": "100vh",
+                "scroller":  true,
+                "scrollCollapse": true
             } );
             {% elif selectedChr != -1 and plotScale =="physic" and (dataset.group.species == 'mouse' or dataset.group.species == 'rat') %}
             $('#trait_table').dataTable( {
-- 
cgit v1.2.3


From 116f911561dc81565dc0f77c12e901c0d53de4e5 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Tue, 5 Jan 2021 16:10:45 -0600
Subject: Changed limit from 2000 markers to 10000 markers for the number to
 show in mapping results table

---
 wqflask/wqflask/marker_regression/run_mapping.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py
index c5b980a7..f601201b 100644
--- a/wqflask/wqflask/marker_regression/run_mapping.py
+++ b/wqflask/wqflask/marker_regression/run_mapping.py
@@ -673,9 +673,9 @@ def trim_markers_for_table(markers):
         sorted_markers = sorted(
             markers, key=lambda k: k['lrs_value'], reverse=True)
 
-    # ZS: So we end up with a list of just 2000 markers
-    if len(sorted_markers) >= 2000:
-        trimmed_sorted_markers = sorted_markers[:2000]
+    #ZS: So we end up with a list of just 2000 markers
+    if len(sorted_markers) >= 10000:
+        trimmed_sorted_markers = sorted_markers[:10000]
         return trimmed_sorted_markers
     else:
         return sorted_markers
-- 
cgit v1.2.3


From d6937e74b85a4fc44153530520774836eed60fe6 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Mon, 28 Jun 2021 18:31:29 +0000
Subject: Added doc for creating guix profile, written by Bonface

---
 doc/guix_profile_setup.org | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 doc/guix_profile_setup.org

diff --git a/doc/guix_profile_setup.org b/doc/guix_profile_setup.org
new file mode 100644
index 00000000..c397377c
--- /dev/null
+++ b/doc/guix_profile_setup.org
@@ -0,0 +1,39 @@
+* Setting up GUIX profile for GN
+
+First create a guix profile with the latest packages:
+
+: ~/opt/guix/bin/guix pull
+
+This will create a profile with the latest packages under`~/.config/guix/current`
+
+Now you have the latest guix. Check: `$HOME/.config/guix/current/bin/guix --version`
+
+At this point, it's worth mentioning that installing
+python3-genenetwork using `$HOME/.config/guix/current/bin/guix` should
+work; but let's use the dev version(since that may come handy in
+time), and it's a nice thing to know.
+
+Next, we ensure that the appropriate GUILE<sub>PATHS</sub> are set:
+
+: export GUILE_LOAD_PATH=$HOME/.config/guix/current/share/guile/site/3.0/
+: export GUILE_LOAD_COMPILED_PATH=$HOME/.config/guix/current/lib/guile/3.0/site-ccache/ 
+
+Get into the container:
+
+: $HOME/.config/guix/current/bin/guix environment -C guix --ad-hoc bash gcc-toolchain
+: ./bootstrap
+: ./configure --localstatedir=/var --sysconfdir=/etc
+
+Check that everything works:
+
+: make check
+
+Clean up and build:
+
+: make clean-go
+: make -j 4
+: exit
+
+Install Python3 (substitute paths when necessary):
+
+: env GUIX_PACKAGE_PATH='/home/zas1024/guix-bioinformatics:/home/zas1024/guix-past/modules' $HOME/.config/guix/current/bin/guix install python3-genenetwork2 -p ~/opt/python3-genenetwork2 --substitute-urls="http://guix.genenetwork.org https://berlin.guixsd.org https://ci.guix.gnu.org https://mirror.hydra.gnu.org"
-- 
cgit v1.2.3


From bab4e72f657eef01934dc7a1645dce6d3035c3fd Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 1 Jul 2021 18:41:45 +0000
Subject: Fixed bug where there'd be an error if the max boot count is 0 for a
 chromosome

---
 wqflask/wqflask/marker_regression/display_mapping_results.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py
index ec17d3b0..2d74ea52 100644
--- a/wqflask/wqflask/marker_regression/display_mapping_results.py
+++ b/wqflask/wqflask/marker_regression/display_mapping_results.py
@@ -861,6 +861,9 @@ class DisplayMappingResults:
                             (item[1], yZero - item[2] * bootHeightThresh / maxBootCount)),
                         fill=self.BOOTSTRAP_BOX_COLOR, outline=BLACK)
 
+        if maxBootCount == 0:
+            return
+
         # draw boot scale
         highestPercent = (maxBootCount * 100.0) / nboot
         bootScale = Plot.detScale(0, highestPercent)
-- 
cgit v1.2.3


From 323c52ff746dab78a2edc343958a81e0ac1b22a5 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 1 Jul 2021 20:59:47 +0000
Subject: Fixed issue where zooming into chromosome 1 was causing the Y axis
 scaling to be bad

---
 .../marker_regression/display_mapping_results.py        | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py
index 2d74ea52..dfae4065 100644
--- a/wqflask/wqflask/marker_regression/display_mapping_results.py
+++ b/wqflask/wqflask/marker_regression/display_mapping_results.py
@@ -2289,20 +2289,9 @@ class DisplayMappingResults:
             font=VERDANA_FILE, size=int(18 * zoom * 1.5))
 
         yZero = yTopOffset + plotHeight
-        # LRSHeightThresh = drawAreaHeight
-        # AdditiveHeightThresh = drawAreaHeight/2
-        # DominanceHeightThresh = drawAreaHeight/2
-        if self.selectedChr == 1:
-            LRSHeightThresh = drawAreaHeight - yTopOffset + 30 * (zoom - 1)
-            AdditiveHeightThresh = LRSHeightThresh / 2
-            DominanceHeightThresh = LRSHeightThresh / 2
-        else:
-            LRSHeightThresh = drawAreaHeight
-            AdditiveHeightThresh = drawAreaHeight / 2
-            DominanceHeightThresh = drawAreaHeight / 2
-        # LRSHeightThresh = (yZero - yTopOffset + 30*(zoom - 1))
-        # AdditiveHeightThresh = LRSHeightThresh/2
-        # DominanceHeightThresh = LRSHeightThresh/2
+        LRSHeightThresh = drawAreaHeight
+        AdditiveHeightThresh = drawAreaHeight / 2
+        DominanceHeightThresh = drawAreaHeight / 2
 
         if LRS_LOD_Max > 100:
             LRSScale = 20.0
-- 
cgit v1.2.3


From d4c990beaf72dd885d6baa7dc1035a7044c79cdd Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 1 Jul 2021 21:28:10 +0000
Subject: Fixed issue where the interval map QTL line might extend beyond the
 graph edge when zoomed into a Mb range + removed some commented out code

---
 .../marker_regression/display_mapping_results.py     | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py
index dfae4065..1fcc2832 100644
--- a/wqflask/wqflask/marker_regression/display_mapping_results.py
+++ b/wqflask/wqflask/marker_regression/display_mapping_results.py
@@ -2561,7 +2561,10 @@ class DisplayMappingResults:
                                 Xc = startPosX + ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) * (
                                     ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) / ((qtlresult['Mb'] - start_cm - startMb + self.GraphInterval) * plotXScale))
                 else:
-                    Xc = startPosX + (qtlresult['Mb'] - startMb) * plotXScale
+                    if qtlresult['Mb'] > endMb:
+                        Xc = startPosX + endMb * plotXScale
+                    else:
+                        Xc = startPosX + (qtlresult['Mb'] - startMb) * plotXScale
 
                 # updated by NL 06-18-2011:
                 # fix the over limit LRS graph issue since genotype trait may give infinite LRS;
@@ -2572,36 +2575,29 @@ class DisplayMappingResults:
                 if 'lrs_value' in qtlresult:
                     if self.LRS_LOD == "LOD" or self.LRS_LOD == "-logP":
                         if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value'] == 'inf':
-                            #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR)
                             Yc = yZero - webqtlConfig.MAXLRS * \
                                 LRSHeightThresh / \
                                 (LRS_LOD_Max * self.LODFACTOR)
                         else:
-                            #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR)
                             Yc = yZero - \
                                 qtlresult['lrs_value'] * LRSHeightThresh / \
                                 (LRS_LOD_Max * self.LODFACTOR)
                     else:
                         if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value'] == 'inf':
-                            #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1]
                             Yc = yZero - webqtlConfig.MAXLRS * LRSHeightThresh / LRS_LOD_Max
                         else:
-                            #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRSAxisList[-1]
                             Yc = yZero - \
                                 qtlresult['lrs_value'] * \
                                 LRSHeightThresh / LRS_LOD_Max
                 else:
                     if qtlresult['lod_score'] > 100 or qtlresult['lod_score'] == 'inf':
-                        #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1]
                         Yc = yZero - webqtlConfig.MAXLRS * LRSHeightThresh / LRS_LOD_Max
                     else:
                         if self.LRS_LOD == "LRS":
-                            #Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRSAxisList[-1]
                             Yc = yZero - \
                                 qtlresult['lod_score'] * self.LODFACTOR * \
                                 LRSHeightThresh / LRS_LOD_Max
                         else:
-                            #Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRSAxisList[-1]
                             Yc = yZero - \
                                 qtlresult['lod_score'] * \
                                 LRSHeightThresh / LRS_LOD_Max
@@ -2634,14 +2630,12 @@ class DisplayMappingResults:
                         AdditiveHeightThresh / additiveMax
                     AdditiveCoordXY.append((Xc, Yc))
 
+                if qtlresult['Mb'] > endMb:
+                    break
+
                 m += 1
 
         if self.manhattan_plot != True:
-            # im_drawer.polygon(
-            #     xy=LRSCoordXY,
-            #     outline=thisLRSColor
-            #     #, closed=0, edgeWidth=lrsEdgeWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth)
-            # )
             draw_open_polygon(canvas, xy=LRSCoordXY, outline=thisLRSColor,
                               width=lrsEdgeWidth)
 
-- 
cgit v1.2.3


From 74c1d6a6ef070271adaf486fc9a494662855d96f Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 1 Jul 2021 21:40:24 +0000
Subject: Fixed issue with suggestive/significant lines extending beyond figure
 edge + fixed a couple issues with the last commit

---
 wqflask/wqflask/marker_regression/display_mapping_results.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py
index 1fcc2832..f941267e 100644
--- a/wqflask/wqflask/marker_regression/display_mapping_results.py
+++ b/wqflask/wqflask/marker_regression/display_mapping_results.py
@@ -2372,8 +2372,7 @@ class DisplayMappingResults:
 
             # ZS: I don't know if what I did here with this inner function is clever or overly complicated, but it's the only way I could think of to avoid duplicating the code inside this function
             def add_suggestive_significant_lines_and_legend(start_pos_x, chr_length_dist):
-                rightEdge = int(start_pos_x + chr_length_dist * \
-                                plotXScale - self.SUGGESTIVE_WIDTH / 1.5)
+                rightEdge = xLeftOffset + plotWidth
                 im_drawer.line(
                     xy=((start_pos_x + self.SUGGESTIVE_WIDTH / 1.5, suggestiveY),
                         (rightEdge, suggestiveY)),
@@ -2561,7 +2560,7 @@ class DisplayMappingResults:
                                 Xc = startPosX + ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) * (
                                     ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) / ((qtlresult['Mb'] - start_cm - startMb + self.GraphInterval) * plotXScale))
                 else:
-                    if qtlresult['Mb'] > endMb:
+                    if self.selectedChr != -1 and qtlresult['Mb'] > endMb:
                         Xc = startPosX + endMb * plotXScale
                     else:
                         Xc = startPosX + (qtlresult['Mb'] - startMb) * plotXScale
@@ -2630,7 +2629,7 @@ class DisplayMappingResults:
                         AdditiveHeightThresh / additiveMax
                     AdditiveCoordXY.append((Xc, Yc))
 
-                if qtlresult['Mb'] > endMb:
+                if self.selectedChr != -1 and qtlresult['Mb'] > endMb:
                     break
 
                 m += 1
-- 
cgit v1.2.3


From bc2869179f2483d9ad5995d3abb0c9dbc1024acd Mon Sep 17 00:00:00 2001
From: zsloan
Date: Sat, 3 Jul 2021 20:44:41 +0000
Subject: Increased width of mapping options div and covariates window

---
 wqflask/wqflask/static/new/css/show_trait.css | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/wqflask/wqflask/static/new/css/show_trait.css b/wqflask/wqflask/static/new/css/show_trait.css
index 27404801..782dabc2 100644
--- a/wqflask/wqflask/static/new/css/show_trait.css
+++ b/wqflask/wqflask/static/new/css/show_trait.css
@@ -159,10 +159,10 @@ div.normalize-div {
 }
 
 div.mapping-main {
-  min-width: 1200px;
+  min-width: 1400px;
 }
 div.mapping-options {
-  min-width: 500px;
+  min-width: 700px;
 }
 
 div.covar-options {
@@ -194,7 +194,7 @@ div.select-covar-div {
 .selected-covariates {
   overflow-y: scroll;
   resize: none;
-  width: 200px;
+  width: 400px;
 }
 
 .cofactor-input {
@@ -259,4 +259,4 @@ input.trait-value-input {
 
 div.inline-div {
   display: inline;
-}
\ No newline at end of file
+}
-- 
cgit v1.2.3